aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2019-05-23 13:20:02 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2019-08-01 14:54:07 +0200
commit16ccbfdc70f9407b0bd600fe600e98ecfae7f198 (patch)
tree7c7437fa39eb1e1903dd5bb9eb6bf6f88e5fb4bb
parent40da2eb8bc5cad1170d689c135e53c6c180ed965 (diff)
downloadee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.tar.gz
ee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.tar.bz2
ee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.tar.xz
ee-python-16ccbfdc70f9407b0bd600fe600e98ecfae7f198.zip
digikey: Better discovery of form for searching.
-rw-r--r--src/ee/digikey/__init__.py60
-rw-r--r--src/ee/digikey/search_parts.py34
-rw-r--r--src/ee/kicad/functions.py2
3 files changed, 75 insertions, 21 deletions
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py
index 87860b9..3eb723e 100644
--- a/src/ee/digikey/__init__.py
+++ b/src/ee/digikey/__init__.py
@@ -14,6 +14,7 @@ from lxml import html
from selenium import webdriver
import ee._utils
+from ee import EeException
from ee.money import Money, get_default_context
from ee.tools import mk_parents
@@ -63,19 +64,21 @@ def _first(collection, default=None):
class DigikeyStore(object):
BASEURL = "http://purl.org/ee/supplier/digikey"
- def __init__(self, url, store, products_url):
+ def __init__(self, url, store, frontpage_url, products_url):
self.url = url
self.store = store
+ self.frontpage_url = frontpage_url
self.products_url = products_url
@staticmethod
def from_store_code(store_code):
url = "{}?store={}".format(DigikeyStore.BASEURL, store_code)
- products_url = "https://www.digikey.com/products/en" if store_code == "us" else \
- "https://www.digikey.{}/products/en".format(store_code)
+ frontpage_url = "https://www.digikey.com" if store_code == "us" else \
+ "https://www.digikey.{}".format(store_code)
+ products_url = "{}/products/en".format(frontpage_url)
- return DigikeyStore(url, store_code, products_url)
+ return DigikeyStore(url, store_code, frontpage_url, products_url)
@staticmethod
def from_url(store_url) -> Optional["DigikeyStore"]:
@@ -256,6 +259,7 @@ class SearchResponseTypes(enum.Enum):
SINGLE = 2 # A product page was returned
TOO_MANY = 3 # A listing of categories was given, the user is expected to narrow down the search
NO_MATCHES = 4
+ EXCEPTION = 5
class DigikeySearchResponse(object):
@@ -277,15 +281,42 @@ class DigikeyClient(object):
self.baseurl = baseurl
self.on_download = on_download or self.__nop
self.cache = ee._utils.maybe_cache(cache_dir)
- self.driver: Optional[webdriver.Chrome] = None
+ self._driver: Optional[webdriver.Chrome] = None
def search(self, query: str, page_size=10) -> str:
return self.product_search(query, page_size)
+ def _find_first_visible(self, xpath):
+ return next((e for e in self.driver.find_elements_by_xpath(xpath) if e.is_displayed()), None)
+
def product_search(self, query: str, page_size=10) -> str:
params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)}
cache_key = urllib.parse.quote(query)
- page = self._req(self.baseurl, cache_key=cache_key, params=params)
+
+ cached = self.cache.lookup(cache_key)
+ if cached:
+ self.on_download("Using cached {}".format(cache_key))
+ return cached
+
+ def find_form():
+ return self._find_first_visible(".//input[@name='keywords']"), \
+ self._find_first_visible("//*[@id='header-search-button']")
+
+ form_input, form_button = find_form()
+ if not form_input or not form_button:
+ self.driver.get(self.baseurl)
+
+ form_input, form_button = find_form()
+
+ if not form_input or not form_button:
+ raise EeException("Could not find form")
+
+ form_input.send_keys(query)
+ form_button.click()
+
+ page = self.driver.page_source
+
+ self.cache.save(cache_key, page)
return page
@@ -299,10 +330,6 @@ class DigikeyClient(object):
self.on_download("Downloading {}".format(url))
- if self.driver is None:
- options = webdriver.ChromeOptions()
- self.driver = webdriver.Chrome(chrome_options=options)
-
self.driver.get(url)
src = self.driver.page_source
@@ -316,6 +343,14 @@ class DigikeyClient(object):
def get(self, url, cache_key, params=None):
return self._req(url, cache_key, params)
+ @property
+ def driver(self) -> webdriver.Chrome:
+ if self._driver is None:
+ options = webdriver.ChromeOptions()
+ self._driver = webdriver.Chrome(chrome_options=options)
+
+ return self._driver
+
class DigikeyParser(object):
def __init__(self, digikey: Digikey):
@@ -468,6 +503,11 @@ class DigikeyParser(object):
def parse_string(self, origin_url, page_content: str):
tree = html.fromstring(page_content)
+ fail = _first(tree.xpath("//h1[text()='403']"))
+
+ if fail is not None:
+ return DigikeySearchResponse(0, SearchResponseTypes.EXCEPTION)
+
count = _first([_parse_int(e.text) for e in tree.xpath("//span[@id='matching-records-count']") if e.text])
if count:
diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py
index 59a1fe3..e79959a 100644
--- a/src/ee/digikey/search_parts.py
+++ b/src/ee/digikey/search_parts.py
@@ -5,6 +5,7 @@ from typing import List, MutableSet, Mapping
from ee.db import ObjDb
from ee.digikey import Digikey, DigikeyParser, DigikeyClient, SearchResponseTypes, DigikeyProduct, DigikeyStore
from ee.part import PartDb, load_db, save_db, Part
+from ee.tools import mk_parents
from ee.xml import types
from ee.xml.uris import make_digikey_fact_key
@@ -61,7 +62,7 @@ class QueryEngine(object):
self.log = log
self.store = DigikeyStore.from_store_code(store_code)
self.parser = DigikeyParser(Digikey())
- self.client = DigikeyClient(self.store.products_url, cache_dir)
+ self.client = DigikeyClient(self.store.frontpage_url, cache_dir)
out_parts: ObjDb[Part] = ObjDb[Part]()
self.uri_idx = out_parts.add_unique_index("uri", lambda p: p.uri)
@@ -71,10 +72,10 @@ class QueryEngine(object):
self.out_parts = out_parts
def pn_search(self, pn):
- self.pn_spn_search(pn, False)
+ return self.pn_spn_search(pn, False)
def spn_search(self, pn):
- self.pn_spn_search(pn, True)
+ return self.pn_spn_search(pn, True)
def pn_spn_search(self, pn, is_spn):
s = "Searching for '{}'".format(pn)
@@ -87,7 +88,9 @@ class QueryEngine(object):
text = self.client.search(pn)
response = self.parser.parse_string(self.client.baseurl, text)
- if response.response_type == SearchResponseTypes.SINGLE:
+ if response.response_type == SearchResponseTypes.EXCEPTION:
+ result = "exception"
+ elif response.response_type == SearchResponseTypes.SINGLE:
out_part = resolved(self.store.url, response.products[0])
result = "found"
elif response.response_type == SearchResponseTypes.MANY:
@@ -136,6 +139,8 @@ class QueryEngine(object):
print("\nResult: {}".format(result), file=self.log)
print("", file=self.log)
+ return response.response_type
+
def resolved(supplier, p: DigikeyProduct) -> Part:
# TODO: fix uri
@@ -177,6 +182,7 @@ def resolved(supplier, p: DigikeyProduct) -> Part:
def search_parts(in_path: Path, out_path: Path, log_path: Path, cache_dir: Path, store_code):
+ mk_parents(log_path)
with log_path.open("w") as log:
run_search_parts(in_path, out_path, log, cache_dir, store_code)
@@ -210,13 +216,21 @@ def run_search_parts(in_path: Path, out_path: Path, log, cache_dir: Path, store_
print("Executing {} manufacturer product number searches\n\n".format(len(pn_queries)), file=log)
+ exception = False
for q in pn_queries:
- engine.pn_search(q.pn)
-
- print("Executing {} supplier product number searches\n\n".format(len(spn_queries)), file=log)
-
- for q in spn_queries:
- engine.spn_search(q.spn)
+ res = engine.pn_search(q.pn)
+ if res == SearchResponseTypes.EXCEPTION:
+ exception = True
+ break
+
+ if not exception:
+ print("Executing {} supplier product number searches\n\n".format(len(spn_queries)), file=log)
+
+ for q in spn_queries:
+ res = engine.spn_search(q.spn)
+ if res == SearchResponseTypes.EXCEPTION:
+ exception = True
+ break
part_db = PartDb()
for part in engine.out_parts:
diff --git a/src/ee/kicad/functions.py b/src/ee/kicad/functions.py
index b8ad7f5..76637c3 100644
--- a/src/ee/kicad/functions.py
+++ b/src/ee/kicad/functions.py
@@ -112,7 +112,7 @@ def mpn_strategy(**kwargs):
for field in part.facts.all(sch_fact_types.field):
k, v = re.split(":", field.value, 1)
- if k == "mpn":
+ if k.lower() == "mpn":
part.add_mpn(v)
return part