aboutsummaryrefslogtreecommitdiff
path: root/src/ee/digikey
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2019-05-28 09:46:41 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2019-05-28 09:46:41 +0200
commit2e2956823c9cd02c766b296cbcbea9130bd07b36 (patch)
tree29be58764f33374cd768a5151182d1a2aa75b041 /src/ee/digikey
parentd108963a31f726ec8e1f471695addbbabb0db312 (diff)
downloadee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.gz
ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.bz2
ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.xz
ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.zip
digikey: Better search when getting multiple results back. Instead of
doing a new search with the selected digikey part number, do a direct lookup with the product's URL instead. This ensures that we always get a match and don't get confused when multiple part numbers are returned.
Diffstat (limited to 'src/ee/digikey')
-rw-r--r--src/ee/digikey/__init__.py33
-rw-r--r--src/ee/digikey/search_parts.py29
2 files changed, 42 insertions, 20 deletions
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py
index 31d5c41..031faff 100644
--- a/src/ee/digikey/__init__.py
+++ b/src/ee/digikey/__init__.py
@@ -252,9 +252,9 @@ class DigikeyProductCategory(object):
class SearchResponseTypes(enum.Enum):
- MANY = 1
- SINGLE = 2
- TOO_MANY = 3
+ MANY = 1 # A product table was returned.
+ SINGLE = 2 # A product page was returned
+ TOO_MANY = 3 # A listing of categories was given, the user is expected to narrow down the search
NO_MATCHES = 4
@@ -310,16 +310,23 @@ class DigikeyClient(object):
return src
+ def get_for_product_url(self, url, product_number):
+ return self._req(url, "product-{}".format(product_number))
+
+ def get(self, url, cache_key, params=None):
+ return self._req(url, cache_key, params)
+
class DigikeyParser(object):
def __init__(self, digikey: Digikey):
self.digikey = digikey or Digikey()
- def _search_process_single_result(self, tree: html) -> Optional[DigikeyProduct]:
+ def _search_process_single_result(self, origin_url, tree: html) -> Optional[DigikeyProduct]:
attributes = []
categories = []
url = _first((link.get("href") for link in tree.xpath("/html/head/link[@rel='canonical' and @href]")))
+ url = self.ensure_absolute_url(origin_url, url)
part_number = mpn = None
for n in tree.xpath("//*[@itemprop='productID' and @content]"):
@@ -402,7 +409,6 @@ class DigikeyParser(object):
docs = []
for row in tree.xpath("//*[@class='product-details-documents-media product-details-section']//tr"):
- # print("row={}".format(row))
kind: str = _first(row.xpath(".//th/text()"))
if not kind:
@@ -411,7 +417,6 @@ class DigikeyParser(object):
kind = kind.strip()
for a in row.xpath(".//td//a[not(contains(@class, '-expander-toggle'))]"):
- # print("a={}".format(a))
title = a.text
if not title:
continue
@@ -429,12 +434,12 @@ class DigikeyParser(object):
return docs
- @staticmethod
- def _handle_product_table(tree: html, res: DigikeySearchResponse):
+ def _handle_product_table(self, origin_url, tree: html, res: DigikeySearchResponse):
products = tree.xpath("//*[@itemtype='http://schema.org/Product']")
for product in products:
url = _first((a.get("href") for a in product.xpath(".//*[@class='tr-image']//a[@href]")))
+ url = self.ensure_absolute_url(origin_url, url)
part_number = _first(product.xpath(".//*[@itemprop='productid' and @content]"))
mpn = _first(product.xpath(".//*[@itemprop='name']"))
@@ -445,8 +450,7 @@ class DigikeyParser(object):
return len(products)
- @staticmethod
- def _handle_exact_part_list(tree: html, res: DigikeySearchResponse):
+ def _handle_exact_part_list(self, origin_url, tree: html, res: DigikeySearchResponse):
products = tree.xpath(".//tr[@class='exactPart']")
for product in products:
@@ -455,12 +459,13 @@ class DigikeyParser(object):
if a is not None and part_number is not None:
url = a.get("href")
+ url = self.ensure_absolute_url(origin_url, url)
mpn = a.text
res.append(DigikeyProduct(part_number.text, mpn, url))
return len(products)
- def parse_string(self, page_content: str):
+ def parse_string(self, origin_url, page_content: str):
tree = html.fromstring(page_content)
count = _first([_parse_int(e.text) for e in tree.xpath("//span[@id='matching-records-count']") if e.text])
@@ -471,11 +476,11 @@ class DigikeyParser(object):
if product_table is not None:
res = DigikeySearchResponse(count, SearchResponseTypes.MANY)
- self._handle_product_table(product_table, res)
+ self._handle_product_table(origin_url, product_table, res)
return res
elif exact_part_list is not None:
res = DigikeySearchResponse(count, SearchResponseTypes.MANY)
- self._handle_exact_part_list(exact_part_list, res)
+ self._handle_exact_part_list(origin_url, exact_part_list, res)
return res
else:
# If the search matches multiple product categories the user has to select the appropriate category
@@ -483,7 +488,7 @@ class DigikeyParser(object):
return DigikeySearchResponse(count, SearchResponseTypes.TOO_MANY)
else:
- p = self._search_process_single_result(tree)
+ p = self._search_process_single_result(origin_url, tree)
if p:
res = DigikeySearchResponse(1, SearchResponseTypes.SINGLE)
res.append(p)
diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py
index 5319ba9..59a1fe3 100644
--- a/src/ee/digikey/search_parts.py
+++ b/src/ee/digikey/search_parts.py
@@ -77,34 +77,51 @@ class QueryEngine(object):
self.pn_spn_search(pn, True)
def pn_spn_search(self, pn, is_spn):
+ s = "Searching for '{}'".format(pn)
+ print(s, file=self.log)
+ print("=" * len(s) + "\n", file=self.log)
+
out_part = None
result = None
text = self.client.search(pn)
- response = self.parser.parse_string(text)
+ response = self.parser.parse_string(self.client.baseurl, text)
if response.response_type == SearchResponseTypes.SINGLE:
out_part = resolved(self.store.url, response.products[0])
result = "found"
elif response.response_type == SearchResponseTypes.MANY:
- # find those with an exact match. Digikey uses a prefix search so a query for "FOO" will return "FOO"
- # and "FOOT".
+ print("Got many responses:\n", file=self.log)
+
+ from ee._utils import gen_rst_table
+ data = [[p.part_number, p.mpn] for p in response.products]
+ print(gen_rst_table(["DK", "MPN"], data), file=self.log)
+
+ # find those with an exact match. Digikey uses a "contains" search so a query for "FOO" will return "FOO",
+ # "FOOT" and "AFOO".
def get_field(p):
return p.part_number if is_spn else p.mpn
filtered_products = [p for p in response.products if get_field(p) == pn]
if len(filtered_products) == 0:
+ print("No items matched the query.", file=self.log)
result = "not-found"
else:
- dpn = sorted(filtered_products, key=lambda p: p.part_number)[0].part_number
+ part = sorted(filtered_products, key=lambda p: p.part_number)[0]
+
+ print("Found {} matching products, but their facts are the same so picked ``{}`` for more info.".format(
+ len(filtered_products), part.part_number), file=self.log)
- response = self.parser.parse_string(self.client.search(dpn))
+ page = self.client.get_for_product_url(part.url, part.part_number)
+ response = self.parser.parse_string(self.client.baseurl, page)
if response.response_type == SearchResponseTypes.SINGLE:
out_part = resolved(self.store.url, response.products[0])
result = "found"
else:
+ print("Unable to narrow down the part, got {} new products. Giving up.".format(
+ len(response.products)), file=self.log)
result = "many"
elif response.response_type == SearchResponseTypes.TOO_MANY:
@@ -116,7 +133,7 @@ class QueryEngine(object):
if out_part.uri not in self.uri_idx:
self.out_parts.add(out_part)
- print("Searching for '{}': {}".format(pn, result), file=self.log)
+ print("\nResult: {}".format(result), file=self.log)
print("", file=self.log)