From 2e2956823c9cd02c766b296cbcbea9130bd07b36 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Tue, 28 May 2019 09:46:41 +0200 Subject: digikey: Better search when getting multiple results back. Instead of doing a new search with the selected digikey part number, do a direct lookup with the product's URL instead. This ensures that we always get a match and don't get confused when multiple part numbers are returned. --- src/ee/digikey/__init__.py | 33 +++++++++++++++++++-------------- src/ee/digikey/search_parts.py | 29 +++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 20 deletions(-) (limited to 'src/ee/digikey') diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index 31d5c41..031faff 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -252,9 +252,9 @@ class DigikeyProductCategory(object): class SearchResponseTypes(enum.Enum): - MANY = 1 - SINGLE = 2 - TOO_MANY = 3 + MANY = 1 # A product table was returned. + SINGLE = 2 # A product page was returned + TOO_MANY = 3 # A listing of categories was given, the user is expected to narrow down the search NO_MATCHES = 4 @@ -310,16 +310,23 @@ class DigikeyClient(object): return src + def get_for_product_url(self, url, product_number): + return self._req(url, "product-{}".format(product_number)) + + def get(self, url, cache_key, params=None): + return self._req(url, cache_key, params) + class DigikeyParser(object): def __init__(self, digikey: Digikey): self.digikey = digikey or Digikey() - def _search_process_single_result(self, tree: html) -> Optional[DigikeyProduct]: + def _search_process_single_result(self, origin_url, tree: html) -> Optional[DigikeyProduct]: attributes = [] categories = [] url = _first((link.get("href") for link in tree.xpath("/html/head/link[@rel='canonical' and @href]"))) + url = self.ensure_absolute_url(origin_url, url) part_number = mpn = None for n in tree.xpath("//*[@itemprop='productID' and @content]"): @@ -402,7 +409,6 @@ class DigikeyParser(object): docs = [] for row in tree.xpath("//*[@class='product-details-documents-media product-details-section']//tr"): - # print("row={}".format(row)) kind: str = _first(row.xpath(".//th/text()")) if not kind: @@ -411,7 +417,6 @@ class DigikeyParser(object): kind = kind.strip() for a in row.xpath(".//td//a[not(contains(@class, '-expander-toggle'))]"): - # print("a={}".format(a)) title = a.text if not title: continue @@ -429,12 +434,12 @@ class DigikeyParser(object): return docs - @staticmethod - def _handle_product_table(tree: html, res: DigikeySearchResponse): + def _handle_product_table(self, origin_url, tree: html, res: DigikeySearchResponse): products = tree.xpath("//*[@itemtype='http://schema.org/Product']") for product in products: url = _first((a.get("href") for a in product.xpath(".//*[@class='tr-image']//a[@href]"))) + url = self.ensure_absolute_url(origin_url, url) part_number = _first(product.xpath(".//*[@itemprop='productid' and @content]")) mpn = _first(product.xpath(".//*[@itemprop='name']")) @@ -445,8 +450,7 @@ class DigikeyParser(object): return len(products) - @staticmethod - def _handle_exact_part_list(tree: html, res: DigikeySearchResponse): + def _handle_exact_part_list(self, origin_url, tree: html, res: DigikeySearchResponse): products = tree.xpath(".//tr[@class='exactPart']") for product in products: @@ -455,12 +459,13 @@ class DigikeyParser(object): if a is not None and part_number is not None: url = a.get("href") + url = self.ensure_absolute_url(origin_url, url) mpn = a.text res.append(DigikeyProduct(part_number.text, mpn, url)) return len(products) - def parse_string(self, page_content: str): + def parse_string(self, origin_url, page_content: str): tree = html.fromstring(page_content) count = _first([_parse_int(e.text) for e in tree.xpath("//span[@id='matching-records-count']") if e.text]) @@ -471,11 +476,11 @@ class DigikeyParser(object): if product_table is not None: res = DigikeySearchResponse(count, SearchResponseTypes.MANY) - self._handle_product_table(product_table, res) + self._handle_product_table(origin_url, product_table, res) return res elif exact_part_list is not None: res = DigikeySearchResponse(count, SearchResponseTypes.MANY) - self._handle_exact_part_list(exact_part_list, res) + self._handle_exact_part_list(origin_url, exact_part_list, res) return res else: # If the search matches multiple product categories the user has to select the appropriate category @@ -483,7 +488,7 @@ class DigikeyParser(object): return DigikeySearchResponse(count, SearchResponseTypes.TOO_MANY) else: - p = self._search_process_single_result(tree) + p = self._search_process_single_result(origin_url, tree) if p: res = DigikeySearchResponse(1, SearchResponseTypes.SINGLE) res.append(p) diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py index 5319ba9..59a1fe3 100644 --- a/src/ee/digikey/search_parts.py +++ b/src/ee/digikey/search_parts.py @@ -77,34 +77,51 @@ class QueryEngine(object): self.pn_spn_search(pn, True) def pn_spn_search(self, pn, is_spn): + s = "Searching for '{}'".format(pn) + print(s, file=self.log) + print("=" * len(s) + "\n", file=self.log) + out_part = None result = None text = self.client.search(pn) - response = self.parser.parse_string(text) + response = self.parser.parse_string(self.client.baseurl, text) if response.response_type == SearchResponseTypes.SINGLE: out_part = resolved(self.store.url, response.products[0]) result = "found" elif response.response_type == SearchResponseTypes.MANY: - # find those with an exact match. Digikey uses a prefix search so a query for "FOO" will return "FOO" - # and "FOOT". + print("Got many responses:\n", file=self.log) + + from ee._utils import gen_rst_table + data = [[p.part_number, p.mpn] for p in response.products] + print(gen_rst_table(["DK", "MPN"], data), file=self.log) + + # find those with an exact match. Digikey uses a "contains" search so a query for "FOO" will return "FOO", + # "FOOT" and "AFOO". def get_field(p): return p.part_number if is_spn else p.mpn filtered_products = [p for p in response.products if get_field(p) == pn] if len(filtered_products) == 0: + print("No items matched the query.", file=self.log) result = "not-found" else: - dpn = sorted(filtered_products, key=lambda p: p.part_number)[0].part_number + part = sorted(filtered_products, key=lambda p: p.part_number)[0] + + print("Found {} matching products, but their facts are the same so picked ``{}`` for more info.".format( + len(filtered_products), part.part_number), file=self.log) - response = self.parser.parse_string(self.client.search(dpn)) + page = self.client.get_for_product_url(part.url, part.part_number) + response = self.parser.parse_string(self.client.baseurl, page) if response.response_type == SearchResponseTypes.SINGLE: out_part = resolved(self.store.url, response.products[0]) result = "found" else: + print("Unable to narrow down the part, got {} new products. Giving up.".format( + len(response.products)), file=self.log) result = "many" elif response.response_type == SearchResponseTypes.TOO_MANY: @@ -116,7 +133,7 @@ class QueryEngine(object): if out_part.uri not in self.uri_idx: self.out_parts.add(out_part) - print("Searching for '{}': {}".format(pn, result), file=self.log) + print("\nResult: {}".format(result), file=self.log) print("", file=self.log) -- cgit v1.2.3