From bd33b778f17e5751a14160baeae6cdcd41ce1ca7 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Thu, 15 Aug 2019 12:01:17 +0200 Subject: datasheet: wip --- src/ee/tools/part_download_datasheets.py | 117 +++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 src/ee/tools/part_download_datasheets.py (limited to 'src/ee/tools/part_download_datasheets.py') diff --git a/src/ee/tools/part_download_datasheets.py b/src/ee/tools/part_download_datasheets.py new file mode 100644 index 0000000..1689502 --- /dev/null +++ b/src/ee/tools/part_download_datasheets.py @@ -0,0 +1,117 @@ +import argparse +import time +from pathlib import Path + +import requests +from selenium import webdriver +from selenium.webdriver.support.wait import WebDriverWait + +from ee.part import Part, load_db +from ee.xml import uris + + +class Job(object): + def __init__(self, url, title): + self.url = url + self.title = title + + self.path = Path(url).name + + +def work(in_path: Path, out_dir: Path, index_path: Path): + in_parts = load_db(in_path) + + ds_rel = uris.make_link_rel("datasheet") + + jobs = {} + for xml in in_parts.iterparts(): + part = Part(xml) + + for link in part.get_links(): + rels = link.relationProp.split(" ") + if ds_rel not in rels: + continue + + jobs[link.urlProp] = (Job(link.urlProp, link.titleProp)) + + options = webdriver.ChromeOptions() + print("out_dir={}".format(out_dir)) + options.add_experimental_option("prefs", { + "download.default_directory": str(out_dir), + }) + driver = webdriver.Chrome(options=options) + + # FILE_SAVER_MIN_JS_URL = "https://raw.githubusercontent.com/eligrey/FileSaver.js/1.2.1/FileSaver.js" + FILE_SAVER_MIN_JS_URL = "https://raw.githubusercontent.com/eligrey/FileSaver.js/v2.0.2/dist/FileSaver.js" + file_saver_min_js = requests.get(FILE_SAVER_MIN_JS_URL).text + + # print("---------------------- file_saver_min_js ----------------------") + # print(file_saver_min_js) + # print("---------------------- file_saver_min_js ----------------------") + driver.execute_script(file_saver_min_js) + + for j in jobs.values(): + print("Fetching {}".format(j.url)) + + driver.execute_script(''' + window.done = false; + var args = arguments; + function work() { + console.log("WORK"); + return fetch(args[0], { + "credentials": "same-origin", + "referrerPolicy": "no-referrer-when-downgrade", + "body": null, + "method": "HEAD", + "mode": "no-cors" + }).then(resp => { + window.resp = resp; + return resp; + }); + } + setTimeout(work, 2000); + + return fetch(arguments[0], { + "credentials": "same-origin", + "referrerPolicy": "no-referrer-when-downgrade", + "body": null, + "method": "GET", + "mode": "no-cors" + }).then(resp => { + return resp.blob(); + }).then(blob => { + saveAs(blob, arguments[1]); + window.done = true; + }); + ''', j.url, j.path) + + timeout = 5 * 1000 + done = WebDriverWait(driver, timeout).until(lambda d: d.execute_script('return window.done')) + print("done={}".format(done)) + + time.sleep(1000) + + break + + with index_path.open("w") as f: + print("", file=f) + + +parser = argparse.ArgumentParser() + +parser.add_argument("--in", + dest="in_path", + required=True, + metavar="PART DB") + +parser.add_argument("--destination", + required=True, + metavar="DIRECTORY") + +parser.add_argument("--index", + required=True, + metavar="INDEX") + +args = parser.parse_args() + +work(Path(args.in_path), Path(args.destination), Path(args.index)) -- cgit v1.2.3