aboutsummaryrefslogtreecommitdiff
path: root/src/ee/digikey
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2018-08-02 23:34:45 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2018-08-02 23:34:45 +0200
commit238627537deafd51f41c929747c041d193e66ab9 (patch)
tree538ad32c7d98cc93518f8c2e6b5fa9dcb09ab2a4 /src/ee/digikey
parenta02955a2e10d4790828bf351f6b3e68ca10a50ff (diff)
downloadee-python-238627537deafd51f41c929747c041d193e66ab9.tar.gz
ee-python-238627537deafd51f41c929747c041d193e66ab9.tar.bz2
ee-python-238627537deafd51f41c929747c041d193e66ab9.tar.xz
ee-python-238627537deafd51f41c929747c041d193e66ab9.zip
o Handling digikey pages with a special "exact match" header + list of
other categories to continue to search in. The header is just enough data for us. o Better digikey tests, saving local HTML instead of doing online requests.
Diffstat (limited to 'src/ee/digikey')
-rw-r--r--src/ee/digikey/__init__.py46
1 files changed, 30 insertions, 16 deletions
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py
index 6fa3161..f230f00 100644
--- a/src/ee/digikey/__init__.py
+++ b/src/ee/digikey/__init__.py
@@ -213,16 +213,11 @@ class DigikeyClient(object):
def __nop(self, message):
pass
- def __init__(self, digikey: Digikey, on_download=None):
+ def __init__(self, digikey: Digikey, cache_dir=None, on_download=None):
self.digikey = digikey
self.on_download = on_download or self.__nop
- cache = FileCache('digikey_cache', forever=True)
- self.sess = CacheControl(requests.Session(), cache=cache, heuristic=ExpiresAfter(days=1))
-
- # adapter = CacheControlAdapter(cache=cache, heuristic=ExpiresAfter(days=1))
- # self.sess = requests.Session()
- # self.sess.mount('http://', adapter)
- # self.sess.mount('https://', adapter)
+ cache = FileCache(cache_dir or 'http_cache', forever=True)
+ self.sess = CacheControl(requests.Session(), cache=cache, heuristic=ExpiresAfter(days=10*365))
def _req(self, url, params=None):
if not url.startswith("http"):
@@ -274,8 +269,8 @@ class DigikeyClient(object):
return None
- # noinspection PyMethodMayBeStatic
- def _search_process_multiple_results(self, tree: html, res: DigikeySearchResponse):
+ @staticmethod
+ def _handle_product_table(tree: html, res: DigikeySearchResponse):
products = tree.xpath("//*[@itemtype='http://schema.org/Product']")
for product in products:
@@ -290,24 +285,43 @@ class DigikeyClient(object):
return len(products)
+ @staticmethod
+ def _handle_exact_part_list(tree: html, res: DigikeySearchResponse):
+ products = tree.xpath(".//tr[@class='exactPart']")
+
+ for product in products:
+ a = _first((a for a in product.xpath(".//td/span/a[@href]")))
+ part_number = _first(product.xpath(".//span[last()]"))
+
+ if a is not None and part_number is not None:
+ url = a.get("href")
+ mpn = a.text
+ res.append(DigikeyProduct(part_number.text, mpn, url))
+
+ return len(products)
+
def search(self, query: str, page_size=10) -> DigikeySearchResponse:
- # http://www.digikey.com/products/en?x=0&y=0&lang=en&site=us&keywords=553-2320-1-ND
- # params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size), 'x': 0, 'y': 0}
params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)}
page = self._req("https://www.digikey.com/products/en", params=params)
- # print("page: ")
- # print(page.content)
- tree = html.fromstring(page.content)
+ return self.parse_string(page.content)
+
+ def parse_string(self, page_content: str):
+ tree = html.fromstring(page_content)
count = _first([_parse_int(e.text) for e in tree.xpath("//span[@id='matching-records-count']") if e.text])
if count:
product_table = _first(tree.xpath("//table[@id='productTable']"))
+ exact_part_list = _first(tree.xpath("//table[@id='exactPartList']"))
if product_table is not None:
res = DigikeySearchResponse(count, SearchResponseTypes.MANY)
- self._search_process_multiple_results(product_table, res)
+ self._handle_product_table(product_table, res)
+ return res
+ elif exact_part_list is not None:
+ res = DigikeySearchResponse(count, SearchResponseTypes.MANY)
+ self._handle_exact_part_list(exact_part_list, res)
return res
else:
# If the search matches multiple product categories the user has to select the appropriate category