import argparse import time from pathlib import Path import requests from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from ee.part import Part, load_db from ee.xml import uris class Job(object): def __init__(self, url, title): self.url = url self.title = title self.path = Path(url).name def work(in_path: Path, out_dir: Path, index_path: Path): in_parts = load_db(in_path) ds_rel = uris.make_link_rel("datasheet") jobs = {} for xml in in_parts.iterparts(): part = Part(xml) for link in part.get_links(): rels = link.relationProp.split(" ") if ds_rel not in rels: continue jobs[link.urlProp] = (Job(link.urlProp, link.titleProp)) options = webdriver.ChromeOptions() print("out_dir={}".format(out_dir)) options.add_experimental_option("prefs", { "download.default_directory": str(out_dir), }) driver = webdriver.Chrome(options=options) # FILE_SAVER_MIN_JS_URL = "https://raw.githubusercontent.com/eligrey/FileSaver.js/1.2.1/FileSaver.js" FILE_SAVER_MIN_JS_URL = "https://raw.githubusercontent.com/eligrey/FileSaver.js/v2.0.2/dist/FileSaver.js" file_saver_min_js = requests.get(FILE_SAVER_MIN_JS_URL).text # print("---------------------- file_saver_min_js ----------------------") # print(file_saver_min_js) # print("---------------------- file_saver_min_js ----------------------") driver.execute_script(file_saver_min_js) for j in jobs.values(): print("Fetching {}".format(j.url)) driver.execute_script(''' window.done = false; var args = arguments; function work() { console.log("WORK"); return fetch(args[0], { "credentials": "same-origin", "referrerPolicy": "no-referrer-when-downgrade", "body": null, "method": "HEAD", "mode": "no-cors" }).then(resp => { window.resp = resp; return resp; }); } setTimeout(work, 2000); return fetch(arguments[0], { "credentials": "same-origin", "referrerPolicy": "no-referrer-when-downgrade", "body": null, "method": "GET", "mode": "no-cors" }).then(resp => { return resp.blob(); }).then(blob => { saveAs(blob, arguments[1]); window.done = true; }); ''', j.url, j.path) timeout = 5 * 1000 done = WebDriverWait(driver, timeout).until(lambda d: d.execute_script('return window.done')) print("done={}".format(done)) time.sleep(1000) break with index_path.open("w") as f: print("", file=f) parser = argparse.ArgumentParser() parser.add_argument("--in", dest="in_path", required=True, metavar="PART DB") parser.add_argument("--destination", required=True, metavar="DIRECTORY") parser.add_argument("--index", required=True, metavar="INDEX") args = parser.parse_args() work(Path(args.in_path), Path(args.destination), Path(args.index))