diff options
author | Trygve Laugstøl <trygvis@inamo.no> | 2019-05-23 13:20:02 +0200 |
---|---|---|
committer | Trygve Laugstøl <trygvis@inamo.no> | 2019-08-01 14:54:07 +0200 |
commit | 16ccbfdc70f9407b0bd600fe600e98ecfae7f198 (patch) | |
tree | 7c7437fa39eb1e1903dd5bb9eb6bf6f88e5fb4bb /src/ee/digikey | |
parent | 40da2eb8bc5cad1170d689c135e53c6c180ed965 (diff) | |
download | ee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.tar.gz ee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.tar.bz2 ee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.tar.xz ee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.zip |
digikey: Better discovery of form for searching.
Diffstat (limited to 'src/ee/digikey')
-rw-r--r-- | src/ee/digikey/__init__.py | 60 | ||||
-rw-r--r-- | src/ee/digikey/search_parts.py | 34 |
2 files changed, 74 insertions, 20 deletions
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index 87860b9..3eb723e 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -14,6 +14,7 @@ from lxml import html from selenium import webdriver import ee._utils +from ee import EeException from ee.money import Money, get_default_context from ee.tools import mk_parents @@ -63,19 +64,21 @@ def _first(collection, default=None): class DigikeyStore(object): BASEURL = "http://purl.org/ee/supplier/digikey" - def __init__(self, url, store, products_url): + def __init__(self, url, store, frontpage_url, products_url): self.url = url self.store = store + self.frontpage_url = frontpage_url self.products_url = products_url @staticmethod def from_store_code(store_code): url = "{}?store={}".format(DigikeyStore.BASEURL, store_code) - products_url = "https://www.digikey.com/products/en" if store_code == "us" else \ - "https://www.digikey.{}/products/en".format(store_code) + frontpage_url = "https://www.digikey.com" if store_code == "us" else \ + "https://www.digikey.{}".format(store_code) + products_url = "{}/products/en".format(frontpage_url) - return DigikeyStore(url, store_code, products_url) + return DigikeyStore(url, store_code, frontpage_url, products_url) @staticmethod def from_url(store_url) -> Optional["DigikeyStore"]: @@ -256,6 +259,7 @@ class SearchResponseTypes(enum.Enum): SINGLE = 2 # A product page was returned TOO_MANY = 3 # A listing of categories was given, the user is expected to narrow down the search NO_MATCHES = 4 + EXCEPTION = 5 class DigikeySearchResponse(object): @@ -277,15 +281,42 @@ class DigikeyClient(object): self.baseurl = baseurl self.on_download = on_download or self.__nop self.cache = ee._utils.maybe_cache(cache_dir) - self.driver: Optional[webdriver.Chrome] = None + self._driver: Optional[webdriver.Chrome] = None def search(self, query: str, page_size=10) -> str: return self.product_search(query, page_size) + def _find_first_visible(self, xpath): + return next((e for e in self.driver.find_elements_by_xpath(xpath) if e.is_displayed()), None) + def product_search(self, query: str, page_size=10) -> str: params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)} cache_key = urllib.parse.quote(query) - page = self._req(self.baseurl, cache_key=cache_key, params=params) + + cached = self.cache.lookup(cache_key) + if cached: + self.on_download("Using cached {}".format(cache_key)) + return cached + + def find_form(): + return self._find_first_visible(".//input[@name='keywords']"), \ + self._find_first_visible("//*[@id='header-search-button']") + + form_input, form_button = find_form() + if not form_input or not form_button: + self.driver.get(self.baseurl) + + form_input, form_button = find_form() + + if not form_input or not form_button: + raise EeException("Could not find form") + + form_input.send_keys(query) + form_button.click() + + page = self.driver.page_source + + self.cache.save(cache_key, page) return page @@ -299,10 +330,6 @@ class DigikeyClient(object): self.on_download("Downloading {}".format(url)) - if self.driver is None: - options = webdriver.ChromeOptions() - self.driver = webdriver.Chrome(chrome_options=options) - self.driver.get(url) src = self.driver.page_source @@ -316,6 +343,14 @@ class DigikeyClient(object): def get(self, url, cache_key, params=None): return self._req(url, cache_key, params) + @property + def driver(self) -> webdriver.Chrome: + if self._driver is None: + options = webdriver.ChromeOptions() + self._driver = webdriver.Chrome(chrome_options=options) + + return self._driver + class DigikeyParser(object): def __init__(self, digikey: Digikey): @@ -468,6 +503,11 @@ class DigikeyParser(object): def parse_string(self, origin_url, page_content: str): tree = html.fromstring(page_content) + fail = _first(tree.xpath("//h1[text()='403']")) + + if fail is not None: + return DigikeySearchResponse(0, SearchResponseTypes.EXCEPTION) + count = _first([_parse_int(e.text) for e in tree.xpath("//span[@id='matching-records-count']") if e.text]) if count: diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py index 59a1fe3..e79959a 100644 --- a/src/ee/digikey/search_parts.py +++ b/src/ee/digikey/search_parts.py @@ -5,6 +5,7 @@ from typing import List, MutableSet, Mapping from ee.db import ObjDb from ee.digikey import Digikey, DigikeyParser, DigikeyClient, SearchResponseTypes, DigikeyProduct, DigikeyStore from ee.part import PartDb, load_db, save_db, Part +from ee.tools import mk_parents from ee.xml import types from ee.xml.uris import make_digikey_fact_key @@ -61,7 +62,7 @@ class QueryEngine(object): self.log = log self.store = DigikeyStore.from_store_code(store_code) self.parser = DigikeyParser(Digikey()) - self.client = DigikeyClient(self.store.products_url, cache_dir) + self.client = DigikeyClient(self.store.frontpage_url, cache_dir) out_parts: ObjDb[Part] = ObjDb[Part]() self.uri_idx = out_parts.add_unique_index("uri", lambda p: p.uri) @@ -71,10 +72,10 @@ class QueryEngine(object): self.out_parts = out_parts def pn_search(self, pn): - self.pn_spn_search(pn, False) + return self.pn_spn_search(pn, False) def spn_search(self, pn): - self.pn_spn_search(pn, True) + return self.pn_spn_search(pn, True) def pn_spn_search(self, pn, is_spn): s = "Searching for '{}'".format(pn) @@ -87,7 +88,9 @@ class QueryEngine(object): text = self.client.search(pn) response = self.parser.parse_string(self.client.baseurl, text) - if response.response_type == SearchResponseTypes.SINGLE: + if response.response_type == SearchResponseTypes.EXCEPTION: + result = "exception" + elif response.response_type == SearchResponseTypes.SINGLE: out_part = resolved(self.store.url, response.products[0]) result = "found" elif response.response_type == SearchResponseTypes.MANY: @@ -136,6 +139,8 @@ class QueryEngine(object): print("\nResult: {}".format(result), file=self.log) print("", file=self.log) + return response.response_type + def resolved(supplier, p: DigikeyProduct) -> Part: # TODO: fix uri @@ -177,6 +182,7 @@ def resolved(supplier, p: DigikeyProduct) -> Part: def search_parts(in_path: Path, out_path: Path, log_path: Path, cache_dir: Path, store_code): + mk_parents(log_path) with log_path.open("w") as log: run_search_parts(in_path, out_path, log, cache_dir, store_code) @@ -210,13 +216,21 @@ def run_search_parts(in_path: Path, out_path: Path, log, cache_dir: Path, store_ print("Executing {} manufacturer product number searches\n\n".format(len(pn_queries)), file=log) + exception = False for q in pn_queries: - engine.pn_search(q.pn) - - print("Executing {} supplier product number searches\n\n".format(len(spn_queries)), file=log) - - for q in spn_queries: - engine.spn_search(q.spn) + res = engine.pn_search(q.pn) + if res == SearchResponseTypes.EXCEPTION: + exception = True + break + + if not exception: + print("Executing {} supplier product number searches\n\n".format(len(spn_queries)), file=log) + + for q in spn_queries: + res = engine.spn_search(q.spn) + if res == SearchResponseTypes.EXCEPTION: + exception = True + break part_db = PartDb() for part in engine.out_parts: |