From 090a2703703877bd150aae637031c5d7dcba2df4 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Sun, 10 Sep 2017 12:32:49 +0200 Subject: setup.py: Adding install_requires. digikey: Updating tests. Making sure the directory exist before writing facts. Stop recursing into new searches when a search returns multiple hits. Let the frontends do that. --- src/ee/digikey/__init__.py | 36 +++++++++++++++++----------------- src/ee/tools/digikey_download_facts.py | 30 ++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index bd9a86b..64d8943 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -66,11 +66,11 @@ class Digikey(object): @total_ordering class DigikeyProduct(object): - def __init__(self, part_number, mpn, attributes, categories): + def __init__(self, part_number, mpn, attributes=None, categories=None): self.part_number = _clean(part_number) self.mpn = _clean(mpn) - self.attributes = attributes - self.categories = categories + self.attributes = attributes or [] + self.categories = categories or [] self.quantity_available = None self.description = None @@ -152,10 +152,10 @@ class DigikeySearchResponse(object): self.count = count self.response_type = response_type - self.products = set() + self.products = list() def append(self, product): - self.products.add(product) + self.products.append(product) class DigikeyClient(object): @@ -173,7 +173,7 @@ class DigikeyClient(object): # self.sess.mount('http://', adapter) # self.sess.mount('https://', adapter) - def req(self, url, params=None): + def _req(self, url, params=None): if not url.startswith("http"): url = "https://www.digikey.com" + url s = "" if not params else "?" + urllib.parse.urlencode(params) @@ -221,24 +221,24 @@ class DigikeyClient(object): return None def _search_process_multiple_results(self, tree: html, res: DigikeySearchResponse): - product_ids = [e.get("content").strip().replace('sku:', '') for e in - tree.xpath("//*[@itemprop='productid' and @content]")] + products = tree.xpath("//*[@itemtype='http://schema.org/Product']") - for product_id in product_ids: - tmp = self.search(product_id) - if isinstance(tmp, DigikeyProduct): - res.append(tmp) - else: - [res.append(p) for p in tmp.products] + for product in products: + part_number = _first(product.xpath("//*[@itemprop='productid' and @content]")) + mpn = _first(product.xpath("//*[@itemprop='name']")) - return len(product_ids) + if part_number is not None and mpn is not None: + res.append(DigikeyProduct( + part_number.get("content").strip().replace('sku:', ''), + mpn.text)) - def search(self, query: str) -> DigikeySearchResponse: - page_size = 10 + return len(products) + def search(self, query: str, page_size=10) -> DigikeySearchResponse: # http://www.digikey.com/products/en?x=0&y=0&lang=en&site=us&keywords=553-2320-1-ND + params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size), 'x': 0, 'y': 0} params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)} - page = self.req("https://www.digikey.com/products/en", params=params) + page = self._req("https://www.digikey.com/products/en", params=params) # print("page: ") # print(page.content) diff --git a/src/ee/tools/digikey_download_facts.py b/src/ee/tools/digikey_download_facts.py index a1e242b..08a8029 100644 --- a/src/ee/tools/digikey_download_facts.py +++ b/src/ee/tools/digikey_download_facts.py @@ -37,9 +37,16 @@ def mpn_to_path(mpn): return "{}/{}.yaml".format(args.out, mpn) -def on_product(p: DigikeyProduct): - y = p.to_yaml() - with open(mpn_to_path(p.mpn), "w") as f: +def on_product(product: DigikeyProduct): + y = product.to_yaml() + + filename = mpn_to_path(product.mpn) + dirname = os.path.dirname(filename) + + if not os.path.isdir(dirname): + os.mkdir(dirname) + + with open(filename, "w") as f: yaml.dump(y, f, encoding="utf-8", allow_unicode=True) @@ -54,11 +61,22 @@ for p in args.parts: if response.response_type == SearchResponseTypes.SINGLE: p = response.products[0] - print(color("Found {}".format(p.mpn), "white")) + print(color("Direct match {}".format(p.mpn), "white")) on_product(p) elif response.response_type == SearchResponseTypes.MANY: - for k, g in groupby(sorted(response.products), lambda p: p.mpn): - print(color("Found {}".format(k), "white")) + hits = list(groupby(sorted(response.products), lambda p: p.mpn)) + + if len(hits) == 1: + (mpn, products) = hits[0] + products = list(products) + + if len(products) == 1: + print(color("Got many results, but they all point to the same part: {}".format(mpn), "white")) + on_product(products[0]) + continue + + for k, g in hits: + print(color("Got many results with many parts: {}: {}".format(k, list(g)), "white")) on_product(list(g)[0]) elif response.response_type == SearchResponseTypes.TOO_MANY: print(color("Too many results ({}), select a category first".format(response.count), 'red')) -- cgit v1.2.3