diff options
author | Trygve Laugstøl <trygvis@inamo.no> | 2019-05-28 09:46:41 +0200 |
---|---|---|
committer | Trygve Laugstøl <trygvis@inamo.no> | 2019-05-28 09:46:41 +0200 |
commit | 2e2956823c9cd02c766b296cbcbea9130bd07b36 (patch) | |
tree | 29be58764f33374cd768a5151182d1a2aa75b041 /src/ee | |
parent | d108963a31f726ec8e1f471695addbbabb0db312 (diff) | |
download | ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.gz ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.bz2 ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.xz ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.zip |
digikey: Better search when getting multiple results back. Instead of
doing a new search with the selected digikey part number, do a direct
lookup with the product's URL instead. This ensures that we always get a
match and don't get confused when multiple part numbers are returned.
Diffstat (limited to 'src/ee')
-rw-r--r-- | src/ee/_utils.py | 21 | ||||
-rw-r--r-- | src/ee/digikey/__init__.py | 33 | ||||
-rw-r--r-- | src/ee/digikey/search_parts.py | 29 | ||||
-rw-r--r-- | src/ee/tools/digikey_download_facts.py | 10 |
4 files changed, 68 insertions, 25 deletions
diff --git a/src/ee/_utils.py b/src/ee/_utils.py index 08e75fa..5960161 100644 --- a/src/ee/_utils.py +++ b/src/ee/_utils.py @@ -77,3 +77,24 @@ class EmptyHttpCache(object): def maybe_cache(path: Optional[Path], **kwargs) -> HttpCache: return HttpCache(path, **kwargs) if path is not None else EmptyHttpCache() + + +def gen_rst_table(header: List[str], data: List[List[str]]): + column_widths = [] + for i in range(len(header)): + w = len(header[i]) + for row in data: + w = max(w, len(row[i])) + column_widths.append(w) + + import io + buf = io.StringIO() + sep = "+-" + "-+-".join(["-" * w for i, w in enumerate(column_widths)]) + "-+" + print(sep, file=buf) + print("| " + " | ".join([header[i].ljust(w) for i, w in enumerate(column_widths)]) + " |", file=buf) + print(sep.replace("-", "="), file=buf) + for row in data: + print("| " + " | ".join([row[i].ljust(w) for i, w in enumerate(column_widths)]) + " |", file=buf) + print(sep, file=buf) + + return buf.getvalue() diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index 31d5c41..031faff 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -252,9 +252,9 @@ class DigikeyProductCategory(object): class SearchResponseTypes(enum.Enum): - MANY = 1 - SINGLE = 2 - TOO_MANY = 3 + MANY = 1 # A product table was returned. + SINGLE = 2 # A product page was returned + TOO_MANY = 3 # A listing of categories was given, the user is expected to narrow down the search NO_MATCHES = 4 @@ -310,16 +310,23 @@ class DigikeyClient(object): return src + def get_for_product_url(self, url, product_number): + return self._req(url, "product-{}".format(product_number)) + + def get(self, url, cache_key, params=None): + return self._req(url, cache_key, params) + class DigikeyParser(object): def __init__(self, digikey: Digikey): self.digikey = digikey or Digikey() - def _search_process_single_result(self, tree: html) -> Optional[DigikeyProduct]: + def _search_process_single_result(self, origin_url, tree: html) -> Optional[DigikeyProduct]: attributes = [] categories = [] url = _first((link.get("href") for link in tree.xpath("/html/head/link[@rel='canonical' and @href]"))) + url = self.ensure_absolute_url(origin_url, url) part_number = mpn = None for n in tree.xpath("//*[@itemprop='productID' and @content]"): @@ -402,7 +409,6 @@ class DigikeyParser(object): docs = [] for row in tree.xpath("//*[@class='product-details-documents-media product-details-section']//tr"): - # print("row={}".format(row)) kind: str = _first(row.xpath(".//th/text()")) if not kind: @@ -411,7 +417,6 @@ class DigikeyParser(object): kind = kind.strip() for a in row.xpath(".//td//a[not(contains(@class, '-expander-toggle'))]"): - # print("a={}".format(a)) title = a.text if not title: continue @@ -429,12 +434,12 @@ class DigikeyParser(object): return docs - @staticmethod - def _handle_product_table(tree: html, res: DigikeySearchResponse): + def _handle_product_table(self, origin_url, tree: html, res: DigikeySearchResponse): products = tree.xpath("//*[@itemtype='http://schema.org/Product']") for product in products: url = _first((a.get("href") for a in product.xpath(".//*[@class='tr-image']//a[@href]"))) + url = self.ensure_absolute_url(origin_url, url) part_number = _first(product.xpath(".//*[@itemprop='productid' and @content]")) mpn = _first(product.xpath(".//*[@itemprop='name']")) @@ -445,8 +450,7 @@ class DigikeyParser(object): return len(products) - @staticmethod - def _handle_exact_part_list(tree: html, res: DigikeySearchResponse): + def _handle_exact_part_list(self, origin_url, tree: html, res: DigikeySearchResponse): products = tree.xpath(".//tr[@class='exactPart']") for product in products: @@ -455,12 +459,13 @@ class DigikeyParser(object): if a is not None and part_number is not None: url = a.get("href") + url = self.ensure_absolute_url(origin_url, url) mpn = a.text res.append(DigikeyProduct(part_number.text, mpn, url)) return len(products) - def parse_string(self, page_content: str): + def parse_string(self, origin_url, page_content: str): tree = html.fromstring(page_content) count = _first([_parse_int(e.text) for e in tree.xpath("//span[@id='matching-records-count']") if e.text]) @@ -471,11 +476,11 @@ class DigikeyParser(object): if product_table is not None: res = DigikeySearchResponse(count, SearchResponseTypes.MANY) - self._handle_product_table(product_table, res) + self._handle_product_table(origin_url, product_table, res) return res elif exact_part_list is not None: res = DigikeySearchResponse(count, SearchResponseTypes.MANY) - self._handle_exact_part_list(exact_part_list, res) + self._handle_exact_part_list(origin_url, exact_part_list, res) return res else: # If the search matches multiple product categories the user has to select the appropriate category @@ -483,7 +488,7 @@ class DigikeyParser(object): return DigikeySearchResponse(count, SearchResponseTypes.TOO_MANY) else: - p = self._search_process_single_result(tree) + p = self._search_process_single_result(origin_url, tree) if p: res = DigikeySearchResponse(1, SearchResponseTypes.SINGLE) res.append(p) diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py index 5319ba9..59a1fe3 100644 --- a/src/ee/digikey/search_parts.py +++ b/src/ee/digikey/search_parts.py @@ -77,34 +77,51 @@ class QueryEngine(object): self.pn_spn_search(pn, True) def pn_spn_search(self, pn, is_spn): + s = "Searching for '{}'".format(pn) + print(s, file=self.log) + print("=" * len(s) + "\n", file=self.log) + out_part = None result = None text = self.client.search(pn) - response = self.parser.parse_string(text) + response = self.parser.parse_string(self.client.baseurl, text) if response.response_type == SearchResponseTypes.SINGLE: out_part = resolved(self.store.url, response.products[0]) result = "found" elif response.response_type == SearchResponseTypes.MANY: - # find those with an exact match. Digikey uses a prefix search so a query for "FOO" will return "FOO" - # and "FOOT". + print("Got many responses:\n", file=self.log) + + from ee._utils import gen_rst_table + data = [[p.part_number, p.mpn] for p in response.products] + print(gen_rst_table(["DK", "MPN"], data), file=self.log) + + # find those with an exact match. Digikey uses a "contains" search so a query for "FOO" will return "FOO", + # "FOOT" and "AFOO". def get_field(p): return p.part_number if is_spn else p.mpn filtered_products = [p for p in response.products if get_field(p) == pn] if len(filtered_products) == 0: + print("No items matched the query.", file=self.log) result = "not-found" else: - dpn = sorted(filtered_products, key=lambda p: p.part_number)[0].part_number + part = sorted(filtered_products, key=lambda p: p.part_number)[0] + + print("Found {} matching products, but their facts are the same so picked ``{}`` for more info.".format( + len(filtered_products), part.part_number), file=self.log) - response = self.parser.parse_string(self.client.search(dpn)) + page = self.client.get_for_product_url(part.url, part.part_number) + response = self.parser.parse_string(self.client.baseurl, page) if response.response_type == SearchResponseTypes.SINGLE: out_part = resolved(self.store.url, response.products[0]) result = "found" else: + print("Unable to narrow down the part, got {} new products. Giving up.".format( + len(response.products)), file=self.log) result = "many" elif response.response_type == SearchResponseTypes.TOO_MANY: @@ -116,7 +133,7 @@ class QueryEngine(object): if out_part.uri not in self.uri_idx: self.out_parts.add(out_part) - print("Searching for '{}': {}".format(pn, result), file=self.log) + print("\nResult: {}".format(result), file=self.log) print("", file=self.log) diff --git a/src/ee/tools/digikey_download_facts.py b/src/ee/tools/digikey_download_facts.py index 5cfdba7..d7ecce5 100644 --- a/src/ee/tools/digikey_download_facts.py +++ b/src/ee/tools/digikey_download_facts.py @@ -1,3 +1,6 @@ +if True: + raise Exception("This module is deprecated.") + import argparse from functools import total_ordering @@ -5,9 +8,6 @@ import ee.digikey as dk from ee.digikey import SearchResponseTypes, DigikeyProduct from ee.tools import log -if True: - raise Exception("This module is deprecated.") - @total_ordering class Query(object): @@ -83,7 +83,7 @@ for q in queries: continue log.info("Searching for {}".format(p)) - response = parser.parse_string(client.search(p)) + response = parser.parse_string(client.baseurl, client.search(p)) todos = [] @@ -113,7 +113,7 @@ for q in queries: log.warn("Part not found") for part_number in todos: - response = parser.parse_string(client.search(part_number)) + response = parser.parse_string(client.baseurl, client.search(part_number)) if response.response_type == SearchResponseTypes.SINGLE: p = sorted(response.products, key=lambda p: p.part_number)[0] |