From 090a2703703877bd150aae637031c5d7dcba2df4 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Sun, 10 Sep 2017 12:32:49 +0200 Subject: setup.py: Adding install_requires. digikey: Updating tests. Making sure the directory exist before writing facts. Stop recursing into new searches when a search returns multiple hits. Let the frontends do that. --- .gitignore | 4 +++- setup.py | 37 +++++++++++++++++++++++----------- src/ee/digikey/__init__.py | 36 ++++++++++++++++----------------- src/ee/tools/digikey_download_facts.py | 30 +++++++++++++++++++++------ test/test_digikey.py | 20 +++++++++--------- 5 files changed, 80 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index 332b420..7ba7dd5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +*.tmp +digikey_cache + *.pyc env env-* @@ -8,7 +11,6 @@ ee.egg-info .tox .ipynb*/ .ipynb_checkpoints -test/digikey_cache *.png diff --git a/setup.py b/setup.py index 1580612..0470b81 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,29 @@ from distutils.core import setup -from glob import glob -from os.path import basename -from os.path import dirname -from os.path import join -from os.path import splitext + from setuptools import find_packages setup(name='ee', - version='1.0', - packages=find_packages('src'), - package_dir={'': 'src'}, - entry_points = { - 'console_scripts': ['ee=ee.__main__:main'], - }, - ) + version='1.0', + packages=find_packages('src'), + package_dir={'': 'src'}, + entry_points={ + 'console_scripts': ['ee=ee.__main__:main'], + }, + install_requires=[ + 'ansicolors', + 'cachecontrol', + 'lockfile', + 'lxml', + 'matplotlib', + 'mpmath', + 'mypy', + 'numpy', + 'pandas', + 'parsec', + 'Pillow', + 'pytest', + 'pyyaml', + 'requests', + 'sympy', + ], + ) diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index bd9a86b..64d8943 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -66,11 +66,11 @@ class Digikey(object): @total_ordering class DigikeyProduct(object): - def __init__(self, part_number, mpn, attributes, categories): + def __init__(self, part_number, mpn, attributes=None, categories=None): self.part_number = _clean(part_number) self.mpn = _clean(mpn) - self.attributes = attributes - self.categories = categories + self.attributes = attributes or [] + self.categories = categories or [] self.quantity_available = None self.description = None @@ -152,10 +152,10 @@ class DigikeySearchResponse(object): self.count = count self.response_type = response_type - self.products = set() + self.products = list() def append(self, product): - self.products.add(product) + self.products.append(product) class DigikeyClient(object): @@ -173,7 +173,7 @@ class DigikeyClient(object): # self.sess.mount('http://', adapter) # self.sess.mount('https://', adapter) - def req(self, url, params=None): + def _req(self, url, params=None): if not url.startswith("http"): url = "https://www.digikey.com" + url s = "" if not params else "?" + urllib.parse.urlencode(params) @@ -221,24 +221,24 @@ class DigikeyClient(object): return None def _search_process_multiple_results(self, tree: html, res: DigikeySearchResponse): - product_ids = [e.get("content").strip().replace('sku:', '') for e in - tree.xpath("//*[@itemprop='productid' and @content]")] + products = tree.xpath("//*[@itemtype='http://schema.org/Product']") - for product_id in product_ids: - tmp = self.search(product_id) - if isinstance(tmp, DigikeyProduct): - res.append(tmp) - else: - [res.append(p) for p in tmp.products] + for product in products: + part_number = _first(product.xpath("//*[@itemprop='productid' and @content]")) + mpn = _first(product.xpath("//*[@itemprop='name']")) - return len(product_ids) + if part_number is not None and mpn is not None: + res.append(DigikeyProduct( + part_number.get("content").strip().replace('sku:', ''), + mpn.text)) - def search(self, query: str) -> DigikeySearchResponse: - page_size = 10 + return len(products) + def search(self, query: str, page_size=10) -> DigikeySearchResponse: # http://www.digikey.com/products/en?x=0&y=0&lang=en&site=us&keywords=553-2320-1-ND + params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size), 'x': 0, 'y': 0} params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)} - page = self.req("https://www.digikey.com/products/en", params=params) + page = self._req("https://www.digikey.com/products/en", params=params) # print("page: ") # print(page.content) diff --git a/src/ee/tools/digikey_download_facts.py b/src/ee/tools/digikey_download_facts.py index a1e242b..08a8029 100644 --- a/src/ee/tools/digikey_download_facts.py +++ b/src/ee/tools/digikey_download_facts.py @@ -37,9 +37,16 @@ def mpn_to_path(mpn): return "{}/{}.yaml".format(args.out, mpn) -def on_product(p: DigikeyProduct): - y = p.to_yaml() - with open(mpn_to_path(p.mpn), "w") as f: +def on_product(product: DigikeyProduct): + y = product.to_yaml() + + filename = mpn_to_path(product.mpn) + dirname = os.path.dirname(filename) + + if not os.path.isdir(dirname): + os.mkdir(dirname) + + with open(filename, "w") as f: yaml.dump(y, f, encoding="utf-8", allow_unicode=True) @@ -54,11 +61,22 @@ for p in args.parts: if response.response_type == SearchResponseTypes.SINGLE: p = response.products[0] - print(color("Found {}".format(p.mpn), "white")) + print(color("Direct match {}".format(p.mpn), "white")) on_product(p) elif response.response_type == SearchResponseTypes.MANY: - for k, g in groupby(sorted(response.products), lambda p: p.mpn): - print(color("Found {}".format(k), "white")) + hits = list(groupby(sorted(response.products), lambda p: p.mpn)) + + if len(hits) == 1: + (mpn, products) = hits[0] + products = list(products) + + if len(products) == 1: + print(color("Got many results, but they all point to the same part: {}".format(mpn), "white")) + on_product(products[0]) + continue + + for k, g in hits: + print(color("Got many results with many parts: {}: {}".format(k, list(g)), "white")) on_product(list(g)[0]) elif response.response_type == SearchResponseTypes.TOO_MANY: print(color("Too many results ({}), select a category first".format(response.count), 'red')) diff --git a/test/test_digikey.py b/test/test_digikey.py index 6073e25..7509e2b 100644 --- a/test/test_digikey.py +++ b/test/test_digikey.py @@ -6,23 +6,23 @@ import sys basedir = os.path.dirname(os.path.abspath(__file__)) digikey = dk.Digikey() -client = dk.DigikeyClient(digikey) +client = dk.DigikeyClient(digikey, on_download=print) def test_digikey_1(): - p = client.search("TCR2LF18LM(CTTR-ND") - assert isinstance(p, dk.DigikeyProduct) + res = client.search("TCR2LF18LM(CTTR-ND") + assert res.response_type == dk.SearchResponseTypes.SINGLE + p = res.products[0] assert p.part_number == "TCR2LF18LM(CTTR-ND" + assert p.mpn == "TCR2LF18,LM(CT" assert len(p.attributes) > 5 x = p.to_yaml() - print(type(x)) - print("{}".format(x)) + print(str(x)) yaml.dump(x, sys.stdout) def test_digikey_2(): - response = client.search("TCR2LF") - [print(p.part_id) for p in response.products] - assert len(response.products) == 28 - # p = products[0] - # assert p.part_number == "TCR2LF18LM(CTTR-ND" + res = client.search("TCR2LF", page_size=500) + assert res.response_type == dk.SearchResponseTypes.MANY + [print(p.part_number) for p in res.products] + assert len(res.products) == 28 -- cgit v1.2.3