#!/usr/bin/env python3 # requires: selenium, chromium-driver, retry from selenium import webdriver from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.options import Options import selenium.common.exceptions as sel_ex import sys import time import urllib.parse # from retry import retry import argparse import logging logging.basicConfig(stream=sys.stderr, level=logging.INFO) logger = logging.getLogger() # retry_logger = None def selenium_get(wd, url, sleep=0, out=None): logger.info("before load") wd.get(url) logger.info("load done") if sleep: logger.info("will sleep for %ds", sleep) time.sleep(sleep) logger.info("sleep done") html = wd.execute_script("return document.getElementsByTagName('html')[0].innerHTML") print(html, file=out) def main(): parser = argparse.ArgumentParser(description='Fetch a web page, run the JavaScript, and write the output to stdout.') parser.add_argument('url', type=str, help='URL of page to load') parser.add_argument('--sleep', type=int, default=0, help='seconds to sleep before dumping DOM as HTML') args = parser.parse_args() opts = Options() opts.add_argument("--headless") # opts.add_argument("--blink-settings=imagesEnabled=false") with webdriver.Chrome(ChromeDriverManager().install(), options=opts) as wd: sources = selenium_get(wd, args.url, sleep=args.sleep, out=sys.stdout) main()