aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2019-05-28 09:46:41 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2019-05-28 09:46:41 +0200
commit2e2956823c9cd02c766b296cbcbea9130bd07b36 (patch)
tree29be58764f33374cd768a5151182d1a2aa75b041
parentd108963a31f726ec8e1f471695addbbabb0db312 (diff)
downloadee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.gz
ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.bz2
ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.tar.xz
ee-python-2e2956823c9cd02c766b296cbcbea9130bd07b36.zip
digikey: Better search when getting multiple results back. Instead of
doing a new search with the selected digikey part number, do a direct lookup with the product's URL instead. This ensures that we always get a match and don't get confused when multiple part numbers are returned.
-rw-r--r--src/ee/_utils.py21
-rw-r--r--src/ee/digikey/__init__.py33
-rw-r--r--src/ee/digikey/search_parts.py29
-rw-r--r--src/ee/tools/digikey_download_facts.py10
-rw-r--r--test/test_digikey.py8
5 files changed, 72 insertions, 29 deletions
diff --git a/src/ee/_utils.py b/src/ee/_utils.py
index 08e75fa..5960161 100644
--- a/src/ee/_utils.py
+++ b/src/ee/_utils.py
@@ -77,3 +77,24 @@ class EmptyHttpCache(object):
def maybe_cache(path: Optional[Path], **kwargs) -> HttpCache:
return HttpCache(path, **kwargs) if path is not None else EmptyHttpCache()
+
+
+def gen_rst_table(header: List[str], data: List[List[str]]):
+ column_widths = []
+ for i in range(len(header)):
+ w = len(header[i])
+ for row in data:
+ w = max(w, len(row[i]))
+ column_widths.append(w)
+
+ import io
+ buf = io.StringIO()
+ sep = "+-" + "-+-".join(["-" * w for i, w in enumerate(column_widths)]) + "-+"
+ print(sep, file=buf)
+ print("| " + " | ".join([header[i].ljust(w) for i, w in enumerate(column_widths)]) + " |", file=buf)
+ print(sep.replace("-", "="), file=buf)
+ for row in data:
+ print("| " + " | ".join([row[i].ljust(w) for i, w in enumerate(column_widths)]) + " |", file=buf)
+ print(sep, file=buf)
+
+ return buf.getvalue()
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py
index 31d5c41..031faff 100644
--- a/src/ee/digikey/__init__.py
+++ b/src/ee/digikey/__init__.py
@@ -252,9 +252,9 @@ class DigikeyProductCategory(object):
class SearchResponseTypes(enum.Enum):
- MANY = 1
- SINGLE = 2
- TOO_MANY = 3
+ MANY = 1 # A product table was returned.
+ SINGLE = 2 # A product page was returned
+ TOO_MANY = 3 # A listing of categories was given, the user is expected to narrow down the search
NO_MATCHES = 4
@@ -310,16 +310,23 @@ class DigikeyClient(object):
return src
+ def get_for_product_url(self, url, product_number):
+ return self._req(url, "product-{}".format(product_number))
+
+ def get(self, url, cache_key, params=None):
+ return self._req(url, cache_key, params)
+
class DigikeyParser(object):
def __init__(self, digikey: Digikey):
self.digikey = digikey or Digikey()
- def _search_process_single_result(self, tree: html) -> Optional[DigikeyProduct]:
+ def _search_process_single_result(self, origin_url, tree: html) -> Optional[DigikeyProduct]:
attributes = []
categories = []
url = _first((link.get("href") for link in tree.xpath("/html/head/link[@rel='canonical' and @href]")))
+ url = self.ensure_absolute_url(origin_url, url)
part_number = mpn = None
for n in tree.xpath("//*[@itemprop='productID' and @content]"):
@@ -402,7 +409,6 @@ class DigikeyParser(object):
docs = []
for row in tree.xpath("//*[@class='product-details-documents-media product-details-section']//tr"):
- # print("row={}".format(row))
kind: str = _first(row.xpath(".//th/text()"))
if not kind:
@@ -411,7 +417,6 @@ class DigikeyParser(object):
kind = kind.strip()
for a in row.xpath(".//td//a[not(contains(@class, '-expander-toggle'))]"):
- # print("a={}".format(a))
title = a.text
if not title:
continue
@@ -429,12 +434,12 @@ class DigikeyParser(object):
return docs
- @staticmethod
- def _handle_product_table(tree: html, res: DigikeySearchResponse):
+ def _handle_product_table(self, origin_url, tree: html, res: DigikeySearchResponse):
products = tree.xpath("//*[@itemtype='http://schema.org/Product']")
for product in products:
url = _first((a.get("href") for a in product.xpath(".//*[@class='tr-image']//a[@href]")))
+ url = self.ensure_absolute_url(origin_url, url)
part_number = _first(product.xpath(".//*[@itemprop='productid' and @content]"))
mpn = _first(product.xpath(".//*[@itemprop='name']"))
@@ -445,8 +450,7 @@ class DigikeyParser(object):
return len(products)
- @staticmethod
- def _handle_exact_part_list(tree: html, res: DigikeySearchResponse):
+ def _handle_exact_part_list(self, origin_url, tree: html, res: DigikeySearchResponse):
products = tree.xpath(".//tr[@class='exactPart']")
for product in products:
@@ -455,12 +459,13 @@ class DigikeyParser(object):
if a is not None and part_number is not None:
url = a.get("href")
+ url = self.ensure_absolute_url(origin_url, url)
mpn = a.text
res.append(DigikeyProduct(part_number.text, mpn, url))
return len(products)
- def parse_string(self, page_content: str):
+ def parse_string(self, origin_url, page_content: str):
tree = html.fromstring(page_content)
count = _first([_parse_int(e.text) for e in tree.xpath("//span[@id='matching-records-count']") if e.text])
@@ -471,11 +476,11 @@ class DigikeyParser(object):
if product_table is not None:
res = DigikeySearchResponse(count, SearchResponseTypes.MANY)
- self._handle_product_table(product_table, res)
+ self._handle_product_table(origin_url, product_table, res)
return res
elif exact_part_list is not None:
res = DigikeySearchResponse(count, SearchResponseTypes.MANY)
- self._handle_exact_part_list(exact_part_list, res)
+ self._handle_exact_part_list(origin_url, exact_part_list, res)
return res
else:
# If the search matches multiple product categories the user has to select the appropriate category
@@ -483,7 +488,7 @@ class DigikeyParser(object):
return DigikeySearchResponse(count, SearchResponseTypes.TOO_MANY)
else:
- p = self._search_process_single_result(tree)
+ p = self._search_process_single_result(origin_url, tree)
if p:
res = DigikeySearchResponse(1, SearchResponseTypes.SINGLE)
res.append(p)
diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py
index 5319ba9..59a1fe3 100644
--- a/src/ee/digikey/search_parts.py
+++ b/src/ee/digikey/search_parts.py
@@ -77,34 +77,51 @@ class QueryEngine(object):
self.pn_spn_search(pn, True)
def pn_spn_search(self, pn, is_spn):
+ s = "Searching for '{}'".format(pn)
+ print(s, file=self.log)
+ print("=" * len(s) + "\n", file=self.log)
+
out_part = None
result = None
text = self.client.search(pn)
- response = self.parser.parse_string(text)
+ response = self.parser.parse_string(self.client.baseurl, text)
if response.response_type == SearchResponseTypes.SINGLE:
out_part = resolved(self.store.url, response.products[0])
result = "found"
elif response.response_type == SearchResponseTypes.MANY:
- # find those with an exact match. Digikey uses a prefix search so a query for "FOO" will return "FOO"
- # and "FOOT".
+ print("Got many responses:\n", file=self.log)
+
+ from ee._utils import gen_rst_table
+ data = [[p.part_number, p.mpn] for p in response.products]
+ print(gen_rst_table(["DK", "MPN"], data), file=self.log)
+
+ # find those with an exact match. Digikey uses a "contains" search so a query for "FOO" will return "FOO",
+ # "FOOT" and "AFOO".
def get_field(p):
return p.part_number if is_spn else p.mpn
filtered_products = [p for p in response.products if get_field(p) == pn]
if len(filtered_products) == 0:
+ print("No items matched the query.", file=self.log)
result = "not-found"
else:
- dpn = sorted(filtered_products, key=lambda p: p.part_number)[0].part_number
+ part = sorted(filtered_products, key=lambda p: p.part_number)[0]
+
+ print("Found {} matching products, but their facts are the same so picked ``{}`` for more info.".format(
+ len(filtered_products), part.part_number), file=self.log)
- response = self.parser.parse_string(self.client.search(dpn))
+ page = self.client.get_for_product_url(part.url, part.part_number)
+ response = self.parser.parse_string(self.client.baseurl, page)
if response.response_type == SearchResponseTypes.SINGLE:
out_part = resolved(self.store.url, response.products[0])
result = "found"
else:
+ print("Unable to narrow down the part, got {} new products. Giving up.".format(
+ len(response.products)), file=self.log)
result = "many"
elif response.response_type == SearchResponseTypes.TOO_MANY:
@@ -116,7 +133,7 @@ class QueryEngine(object):
if out_part.uri not in self.uri_idx:
self.out_parts.add(out_part)
- print("Searching for '{}': {}".format(pn, result), file=self.log)
+ print("\nResult: {}".format(result), file=self.log)
print("", file=self.log)
diff --git a/src/ee/tools/digikey_download_facts.py b/src/ee/tools/digikey_download_facts.py
index 5cfdba7..d7ecce5 100644
--- a/src/ee/tools/digikey_download_facts.py
+++ b/src/ee/tools/digikey_download_facts.py
@@ -1,3 +1,6 @@
+if True:
+ raise Exception("This module is deprecated.")
+
import argparse
from functools import total_ordering
@@ -5,9 +8,6 @@ import ee.digikey as dk
from ee.digikey import SearchResponseTypes, DigikeyProduct
from ee.tools import log
-if True:
- raise Exception("This module is deprecated.")
-
@total_ordering
class Query(object):
@@ -83,7 +83,7 @@ for q in queries:
continue
log.info("Searching for {}".format(p))
- response = parser.parse_string(client.search(p))
+ response = parser.parse_string(client.baseurl, client.search(p))
todos = []
@@ -113,7 +113,7 @@ for q in queries:
log.warn("Part not found")
for part_number in todos:
- response = parser.parse_string(client.search(part_number))
+ response = parser.parse_string(client.baseurl, client.search(part_number))
if response.response_type == SearchResponseTypes.SINGLE:
p = sorted(response.products, key=lambda p: p.part_number)[0]
diff --git a/test/test_digikey.py b/test/test_digikey.py
index 528ed8e..f1b010b 100644
--- a/test/test_digikey.py
+++ b/test/test_digikey.py
@@ -21,7 +21,7 @@ force_refresh = False # Set to True to always fetch the updated html files
def test_digikey_1(tmpdir):
content = client.search("TCR2LF18LM(CTTR-ND")
- res = parser.parse_string(content)
+ res = parser.parse_string(client.baseurl, content)
assert res.response_type == dk.SearchResponseTypes.SINGLE
p = res.products[0]
assert p.part_number == "TCR2LF18LM(CTTR-ND"
@@ -50,7 +50,7 @@ def test_digikey_1(tmpdir):
def test_digikey_2():
content = client.product_search("TCR2LF", page_size=500)
- res = parser.parse_string(content)
+ res = parser.parse_string(client.baseurl, content)
assert res.response_type == dk.SearchResponseTypes.MANY
[print("dpn={}, mpn={}".format(p.part_number, p.mpn)) for p in res.products]
assert len(res.products) > 10
@@ -64,7 +64,7 @@ def test_digikey_2():
content = client.product_search(dpn)
- res = parser.parse_string(content)
+ res = parser.parse_string(client.baseurl, content)
assert res.response_type == dk.SearchResponseTypes.SINGLE
p = res.products[0]
@@ -76,7 +76,7 @@ def test_digikey_2():
def test_digikey_3():
content = client.product_search("RS1MTR")
- res = parser.parse_string(content)
+ res = parser.parse_string(client.baseurl, content)
assert res.response_type == dk.SearchResponseTypes.MANY
[print("dpn={}, mpn={}".format(p.part_number, p.mpn)) for p in res.products]
assert len(res.products) > 0