From 37e4be24129b6980e3e9fced7345d4a596af3d58 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Sun, 14 Apr 2019 19:41:30 +0200 Subject: digikey: o More flexibility, making room for the multiple digikey stores. o Removing URLs from core code. new tools: split_parts_by_supplier. --- src/ee/db.py | 4 +- src/ee/digikey/__init__.py | 50 ++++++++++++++++++++++-- src/ee/digikey/search_parts.py | 23 +++++------ src/ee/project/__init__.py | 15 +++++-- src/ee/tools/digikey_download_facts.py | 8 +++- src/ee/tools/digikey_search_parts.py | 8 +++- src/ee/tools/split_parts_by_supplier.py | 69 +++++++++++++++++++++++++++++++++ src/ee/tools/templates/build.ninja.j2 | 8 ++-- src/ee/xml/uris.py | 4 +- test/test_digikey.py | 26 ++++++++++++- 10 files changed, 184 insertions(+), 31 deletions(-) create mode 100644 src/ee/tools/split_parts_by_supplier.py diff --git a/src/ee/db.py b/src/ee/db.py index 57533ef..79d48ee 100644 --- a/src/ee/db.py +++ b/src/ee/db.py @@ -1,4 +1,4 @@ -from typing import TypeVar, Generic, Callable, MutableMapping, List, Iterable, Union, Any, Mapping +from typing import TypeVar, Generic, Callable, MutableMapping, List, Iterable, Union, Any, Mapping, Tuple, AbstractSet K = TypeVar('K') V = TypeVar('V') @@ -69,7 +69,7 @@ class ListIndex(Index[K, V]): def __iter__(self): return self.idx.__iter__() - def items(self): + def items(self) -> AbstractSet[Tuple[K, V]]: return self.idx.items() def values(self): diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index 9815a8f..31d5c41 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -8,6 +8,7 @@ import urllib.parse from functools import total_ordering from pathlib import Path from typing import List, Optional +from urllib.parse import urlparse, parse_qs from lxml import html from selenium import webdriver @@ -59,6 +60,48 @@ def _first(collection, default=None): return next(iter(collection), default) +class DigikeyStore(object): + BASEURL = "http://purl.org/ee/supplier/digikey" + + def __init__(self, url, store, products_url): + self.url = url + self.store = store + self.products_url = products_url + + @staticmethod + def from_store_code(store_code): + url = "{}?store={}".format(DigikeyStore.BASEURL, store_code) + + products_url = "https://www.digikey.com/products/en" if store_code == "us" else \ + "https://www.digikey.{}/products/en".format(store_code) + + return DigikeyStore(url, store_code, products_url) + + @staticmethod + def from_url(store_url) -> Optional["DigikeyStore"]: + base = urlparse(DigikeyStore.BASEURL) + url = urlparse(store_url) + + if base.scheme != url.scheme or \ + base.netloc != url.netloc or \ + base.path != url.path: + return None + + q = parse_qs((url.query or "").strip()) + + store = q.get("store") + + if not store: + return None + + del q["store"] + + if len(q): + return None + + return DigikeyStore.from_store_code(store[0]) + + class Digikey(object): def __init__(self): self.attribute_types = {} @@ -230,7 +273,8 @@ class DigikeyClient(object): def __nop(self, message): pass - def __init__(self, cache_dir: Path = None, on_download=None): + def __init__(self, baseurl, cache_dir: Path = None, on_download=None): + self.baseurl = baseurl self.on_download = on_download or self.__nop self.cache = ee._utils.maybe_cache(cache_dir) self.driver: Optional[webdriver.Chrome] = None @@ -241,13 +285,11 @@ class DigikeyClient(object): def product_search(self, query: str, page_size=10) -> str: params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)} cache_key = urllib.parse.quote(query) - page = self._req("https://www.digikey.com/products/en", cache_key=cache_key, params=params) + page = self._req(self.baseurl, cache_key=cache_key, params=params) return page def _req(self, url, cache_key, params=None): - if not url.startswith("http"): - url = "https://www.digikey.com" + url url = url + ("" if not params else "?" + urllib.parse.urlencode(params)) cached = self.cache.lookup(cache_key) diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py index fec5615..c7f981e 100644 --- a/src/ee/digikey/search_parts.py +++ b/src/ee/digikey/search_parts.py @@ -1,19 +1,18 @@ from pathlib import Path from typing import List -from ee.digikey import Digikey, DigikeyParser, DigikeyClient, SearchResponseTypes, DigikeyProduct +from ee.digikey import Digikey, DigikeyParser, DigikeyClient, SearchResponseTypes, DigikeyProduct, DigikeyStore from ee.part import PartDb, load_db, save_db, Part -from ee.project import Project from ee.xml import types -from ee.xml.uris import DIGIKEY_URI, make_digikey_fact_key +from ee.xml.uris import make_digikey_fact_key __all__ = ["search_parts"] -def resolved(p: DigikeyProduct) -> Part: +def resolved(supplier, p: DigikeyProduct) -> Part: # TODO: fix uri xml = types.Part(uri="https://digikey.com/pn#{}".format(p.part_number), - supplier=DIGIKEY_URI, + supplier=supplier, description=p.description, distributor_info=types.DistributorInfo(state="resolved"), links=types.LinkList(), @@ -50,15 +49,17 @@ def resolved(p: DigikeyProduct) -> Part: return part -def search_parts(in_path: Path, out_path: Path, cache_dir: Path): +def search_parts(in_path: Path, out_path: Path, cache_dir: Path, store_code): in_db = load_db(in_path) out_parts = PartDb() + store = DigikeyStore.from_store_code(store_code) + parser = DigikeyParser(Digikey()) - client = DigikeyClient(cache_dir) + client = DigikeyClient(store.products_url, cache_dir) for xml in in_db.iterparts(): - if xml.supplierProp is not None and xml.supplierProp != DIGIKEY_URI: + if xml.supplierProp is not None and xml.supplierProp != store.url: continue part = Part(xml) @@ -83,7 +84,7 @@ def search_parts(in_path: Path, out_path: Path, cache_dir: Path): out_id = query out_part = types.Part(uri=out_id, distributor_info=types.DistributorInfo(), - supplier=DIGIKEY_URI, + supplier=store.url, references=xml.referencesProp) di = out_part.distributor_infoProp @@ -91,7 +92,7 @@ def search_parts(in_path: Path, out_path: Path, cache_dir: Path): response = parser.parse_string(text) if response.response_type == SearchResponseTypes.SINGLE: - out_part = resolved(response.products[0]) + out_part = resolved(store.url, response.products[0]) elif response.response_type == SearchResponseTypes.MANY: # find those with an exact match. Digikey uses a prefix search so a query for "FOO" will return "FOO" @@ -108,7 +109,7 @@ def search_parts(in_path: Path, out_path: Path, cache_dir: Path): response = parser.parse_string(client.search(dpn)) if response.response_type == SearchResponseTypes.SINGLE: - out_part = resolved(response.products[0]) + out_part = resolved(store.url, response.products[0]) else: di.stateProp = "many" diff --git a/src/ee/project/__init__.py b/src/ee/project/__init__.py index 24635ee..0857e7c 100644 --- a/src/ee/project/__init__.py +++ b/src/ee/project/__init__.py @@ -2,8 +2,8 @@ import configparser from pathlib import Path from ee import EeException +from ee.digikey import DigikeyStore from ee.tools import mk_parents -from ee.xml.uris import DIGIKEY_URI def load_config(project_dir: Path) -> configparser.ConfigParser: @@ -20,9 +20,10 @@ def load_config(project_dir: Path) -> configparser.ConfigParser: class SupplierDescriptor(object): - def __init__(self, key: str, uri: str): + def __init__(self, key: str, uri: str, name: str): self.key = key self.uri = uri + self.name = name class Project(object): @@ -34,7 +35,9 @@ class Project(object): self._cfg = cfg # TODO: read from config - self._suppliers = [SupplierDescriptor("digikey", DIGIKEY_URI)] + self._suppliers = [] + digikey_store = DigikeyStore.from_store_code("us") + self._suppliers.append(SupplierDescriptor("digikey", digikey_store.url, "Digikey")) def get_supplier_by_key(self, key) -> SupplierDescriptor: sd = next((s for s in self._suppliers if s.key == key), None) @@ -42,6 +45,12 @@ class Project(object): return sd raise EeException("No such supplier configured: {}".format(key)) + def get_supplier_by_uri(self, uri) -> SupplierDescriptor: + sd = next((s for s in self._suppliers if s.uri == uri), None) + if sd: + return sd + raise EeException("No such supplier configured: {}".format(uri)) + @property def cfg(self): return self._cfg diff --git a/src/ee/tools/digikey_download_facts.py b/src/ee/tools/digikey_download_facts.py index 3ab8551..5cfdba7 100644 --- a/src/ee/tools/digikey_download_facts.py +++ b/src/ee/tools/digikey_download_facts.py @@ -5,6 +5,9 @@ import ee.digikey as dk from ee.digikey import SearchResponseTypes, DigikeyProduct from ee.tools import log +if True: + raise Exception("This module is deprecated.") + @total_ordering class Query(object): @@ -45,7 +48,7 @@ parser.add_argument("--force", args = parser.parse_args() digikey = dk.Digikey() -client = dk.DigikeyClient(on_download=log.debug) +client = dk.DigikeyClient("https://www.digikey.com/products/en", on_download=log.debug) parser = dk.DigikeyParser(digikey) repo = dk.DigikeyRepository(digikey, args.out) @@ -99,7 +102,8 @@ for q in queries: # Pick the first one, should be as good as any part_number = sorted(viable_products, key=lambda p: p.part_number)[0].part_number - log.info("Got many hits for term '{}', will use {} for downloading attributes.".format(q.query, part_number)) + log.info( + "Got many hits for term '{}', will use {} for downloading attributes.".format(q.query, part_number)) todos.append(part_number) else: log.warn("Got many results: {}".format(", ".join([p.part_number for p in response.products]))) diff --git a/src/ee/tools/digikey_search_parts.py b/src/ee/tools/digikey_search_parts.py index 6cf104d..28ccbb9 100644 --- a/src/ee/tools/digikey_search_parts.py +++ b/src/ee/tools/digikey_search_parts.py @@ -15,9 +15,15 @@ parser.add_argument("--out", required=True, metavar="PART DB") +parser.add_argument("--store", + default="us", + metavar="STORE CODE") + args = parser.parse_args() project = Project.load() cache_dir = project.cache_dir / "digikey" -search_parts(Path(args.in_path), Path(args.out), cache_dir) +store_code = args.store + +search_parts(Path(args.in_path), Path(args.out), cache_dir, store_code) diff --git a/src/ee/tools/split_parts_by_supplier.py b/src/ee/tools/split_parts_by_supplier.py new file mode 100644 index 0000000..1ebf094 --- /dev/null +++ b/src/ee/tools/split_parts_by_supplier.py @@ -0,0 +1,69 @@ +import argparse +from pathlib import Path +from typing import List + +from ee.db import ObjDb +from ee.part import Part, load_db, save_db, PartDb +from ee.project import Project + + +class OrderPart(object): + def __init__(self, order_part: Part, part: Part): + self.order_part = order_part + self.part = part + + +def uri_fn(part: Part): + return part.uri + + +def split_parts_by_supplier(project: Project, order_file: Path, part_dbs: List[Path], out_dir: Path): + parts: ObjDb[Part] = ObjDb[Part]() + part_by_uri = parts.add_unique_index("uri", uri_fn) + + for part_db in part_dbs: + for xml in load_db(part_db).iterparts(): + parts.add(Part(xml)) + + order_parts: ObjDb[OrderPart] = ObjDb() + supplier_idx = order_parts.add_index("supplier", lambda op: op.part.supplier) + for xml in load_db(order_file).iterparts(): + order_part = Part(xml) + part = part_by_uri.get_single(order_part.get_exactly_one_part_reference().part_uriProp) + order_parts.add(OrderPart(order_part, part)) + + for supplier, parts_for_supplier in supplier_idx.items(): + desc = project.get_supplier_by_uri(supplier) + + print("{}: {}".format(desc.name, len(parts))) + # supplier_db: ObjDb[Part] = ObjDb[Part]() + # supplier_db.add_unique_index("uri", uri_fn) + + supplier_descriptor = project.get_supplier_by_uri(supplier) + + db = PartDb() + for part_for_supplier in parts_for_supplier: + db.add_entry(part_for_supplier.part.underlying, False) + + save_db(out_dir / "{}.xml".format(supplier_descriptor.key), db, sort=True) + + +parser = argparse.ArgumentParser() + +parser.add_argument("--parts", + required=True, + metavar="PART DB") + +parser.add_argument("--part-db", + nargs="*", + required=True, + metavar="PART DB") + +parser.add_argument("--out-dir", + metavar="DIR FOR PART DBS") + +args = parser.parse_args() + +part_db_dirs = [Path(part_db) for part_db in args.part_db] + +split_parts_by_supplier(Project.load(), Path(args.parts), part_db_dirs, Path(args.out_dir)) diff --git a/src/ee/tools/templates/build.ninja.j2 b/src/ee/tools/templates/build.ninja.j2 index ea979da..d62f350 100644 --- a/src/ee/tools/templates/build.ninja.j2 +++ b/src/ee/tools/templates/build.ninja.j2 @@ -45,9 +45,9 @@ rule create-bom description = create-bom command = $ee create-bom --schematic $schematic --part-db $part_dbs --out $out $strategy -rule export-order - description = export-order - command = $ee export-order --order $order $part_dbs --out-dir $out_dir +rule split-parts-by-supplier + description = split-parts-by-supplier + command = $ee split-parts-by-supplier --parts $order $part_dbs --out-dir $out_dir rule import-parts-yaml description = import-parts-yaml $in @@ -90,7 +90,7 @@ build ee/bom.xml | $report_dir/bom.rst: create-bom ee/sch.xml {%- for p in part_ strategy = --strategy {{ project.cfg["create-bom"]["strategy"] }} {%- endif %} -build ee/orders/index.xml: export-order ee/bom.xml {%- for p in part_dbs %} {{ p }}.xml{% endfor %} +build ee/orders/index.xml: split-parts-by-supplier ee/bom.xml {%- for p in part_dbs %} {{ p }}.xml{% endfor %} order = ee/bom.xml part_dbs ={%- for p in part_dbs %} --part-db {{ p }}.xml{% endfor %} out_dir = ee/orders diff --git a/src/ee/xml/uris.py b/src/ee/xml/uris.py index f0a1022..c716103 100644 --- a/src/ee/xml/uris.py +++ b/src/ee/xml/uris.py @@ -6,9 +6,7 @@ DIODE = "http://purl.org/ee/part-type#diode" INDUCTOR = "http://purl.org/ee/part-type#inductor" CRYSTAL = "http://purl.org/ee/part-type#inductor" -DIGIKEY_URI = "https://digikey.com" - -_DIGIKEY_FACT_KEY_PREFIX = "http://purl.org/trygvis/ee/digikey-fact-key#" +_DIGIKEY_FACT_KEY_PREFIX = "http://purl.org/ee/digikey-fact-key#" def make_digikey_fact_key(key: int) -> str: diff --git a/test/test_digikey.py b/test/test_digikey.py index 2e9b1f1..568360e 100644 --- a/test/test_digikey.py +++ b/test/test_digikey.py @@ -12,7 +12,8 @@ static_copies = basedir / "digikey" / "static-copies" # type: Path digikey = dk.Digikey() parser = dk.DigikeyParser(digikey) -client = dk.DigikeyClient(cache_dir=basedir / "digikey" / "static-copies", on_download=print) +client = dk.DigikeyClient("https://www.digikey.com/products/en", cache_dir=basedir / "digikey" / "static-copies", + on_download=print) force_refresh = False # Set to True to always fetch the updated html files @@ -83,3 +84,26 @@ def test_digikey_3(): p = next((p for p in res.products if p.part_number == "1655-1501-1-ND"), None) assert p.mpn == "RS1MTR" assert p.url == "/product-detail/en/smc-diode-solutions/RS1MTR/1655-1501-1-ND/6022946" + + +@pytest.mark.digikey +def test_store(): + store = dk.DigikeyStore.from_store_code("us") + assert store.products_url == "https://www.digikey.com/products/en" + assert store.url == "http://purl.org/ee/supplier/digikey?store=us" + assert store.store == "us" + + store = dk.DigikeyStore.from_url(store.url) + assert store.products_url == "https://www.digikey.com/products/en" + assert store.url == "http://purl.org/ee/supplier/digikey?store=us" + assert store.store == "us" + + store = dk.DigikeyStore.from_store_code("no") + assert store.products_url == "https://www.digikey.no/products/en" + assert store.url == "http://purl.org/ee/supplier/digikey?store=no" + assert store.store == "no" + + store = dk.DigikeyStore.from_url(store.url) + assert store.products_url == "https://www.digikey.no/products/en" + assert store.url == "http://purl.org/ee/supplier/digikey?store=no" + assert store.store == "no" -- cgit v1.2.3