From 37e4be24129b6980e3e9fced7345d4a596af3d58 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Sun, 14 Apr 2019 19:41:30 +0200 Subject: digikey: o More flexibility, making room for the multiple digikey stores. o Removing URLs from core code. new tools: split_parts_by_supplier. --- src/ee/digikey/__init__.py | 50 ++++++++++++++++++++++++++++++++++++++---- src/ee/digikey/search_parts.py | 23 +++++++++---------- 2 files changed, 58 insertions(+), 15 deletions(-) (limited to 'src/ee/digikey') diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index 9815a8f..31d5c41 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -8,6 +8,7 @@ import urllib.parse from functools import total_ordering from pathlib import Path from typing import List, Optional +from urllib.parse import urlparse, parse_qs from lxml import html from selenium import webdriver @@ -59,6 +60,48 @@ def _first(collection, default=None): return next(iter(collection), default) +class DigikeyStore(object): + BASEURL = "http://purl.org/ee/supplier/digikey" + + def __init__(self, url, store, products_url): + self.url = url + self.store = store + self.products_url = products_url + + @staticmethod + def from_store_code(store_code): + url = "{}?store={}".format(DigikeyStore.BASEURL, store_code) + + products_url = "https://www.digikey.com/products/en" if store_code == "us" else \ + "https://www.digikey.{}/products/en".format(store_code) + + return DigikeyStore(url, store_code, products_url) + + @staticmethod + def from_url(store_url) -> Optional["DigikeyStore"]: + base = urlparse(DigikeyStore.BASEURL) + url = urlparse(store_url) + + if base.scheme != url.scheme or \ + base.netloc != url.netloc or \ + base.path != url.path: + return None + + q = parse_qs((url.query or "").strip()) + + store = q.get("store") + + if not store: + return None + + del q["store"] + + if len(q): + return None + + return DigikeyStore.from_store_code(store[0]) + + class Digikey(object): def __init__(self): self.attribute_types = {} @@ -230,7 +273,8 @@ class DigikeyClient(object): def __nop(self, message): pass - def __init__(self, cache_dir: Path = None, on_download=None): + def __init__(self, baseurl, cache_dir: Path = None, on_download=None): + self.baseurl = baseurl self.on_download = on_download or self.__nop self.cache = ee._utils.maybe_cache(cache_dir) self.driver: Optional[webdriver.Chrome] = None @@ -241,13 +285,11 @@ class DigikeyClient(object): def product_search(self, query: str, page_size=10) -> str: params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)} cache_key = urllib.parse.quote(query) - page = self._req("https://www.digikey.com/products/en", cache_key=cache_key, params=params) + page = self._req(self.baseurl, cache_key=cache_key, params=params) return page def _req(self, url, cache_key, params=None): - if not url.startswith("http"): - url = "https://www.digikey.com" + url url = url + ("" if not params else "?" + urllib.parse.urlencode(params)) cached = self.cache.lookup(cache_key) diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py index fec5615..c7f981e 100644 --- a/src/ee/digikey/search_parts.py +++ b/src/ee/digikey/search_parts.py @@ -1,19 +1,18 @@ from pathlib import Path from typing import List -from ee.digikey import Digikey, DigikeyParser, DigikeyClient, SearchResponseTypes, DigikeyProduct +from ee.digikey import Digikey, DigikeyParser, DigikeyClient, SearchResponseTypes, DigikeyProduct, DigikeyStore from ee.part import PartDb, load_db, save_db, Part -from ee.project import Project from ee.xml import types -from ee.xml.uris import DIGIKEY_URI, make_digikey_fact_key +from ee.xml.uris import make_digikey_fact_key __all__ = ["search_parts"] -def resolved(p: DigikeyProduct) -> Part: +def resolved(supplier, p: DigikeyProduct) -> Part: # TODO: fix uri xml = types.Part(uri="https://digikey.com/pn#{}".format(p.part_number), - supplier=DIGIKEY_URI, + supplier=supplier, description=p.description, distributor_info=types.DistributorInfo(state="resolved"), links=types.LinkList(), @@ -50,15 +49,17 @@ def resolved(p: DigikeyProduct) -> Part: return part -def search_parts(in_path: Path, out_path: Path, cache_dir: Path): +def search_parts(in_path: Path, out_path: Path, cache_dir: Path, store_code): in_db = load_db(in_path) out_parts = PartDb() + store = DigikeyStore.from_store_code(store_code) + parser = DigikeyParser(Digikey()) - client = DigikeyClient(cache_dir) + client = DigikeyClient(store.products_url, cache_dir) for xml in in_db.iterparts(): - if xml.supplierProp is not None and xml.supplierProp != DIGIKEY_URI: + if xml.supplierProp is not None and xml.supplierProp != store.url: continue part = Part(xml) @@ -83,7 +84,7 @@ def search_parts(in_path: Path, out_path: Path, cache_dir: Path): out_id = query out_part = types.Part(uri=out_id, distributor_info=types.DistributorInfo(), - supplier=DIGIKEY_URI, + supplier=store.url, references=xml.referencesProp) di = out_part.distributor_infoProp @@ -91,7 +92,7 @@ def search_parts(in_path: Path, out_path: Path, cache_dir: Path): response = parser.parse_string(text) if response.response_type == SearchResponseTypes.SINGLE: - out_part = resolved(response.products[0]) + out_part = resolved(store.url, response.products[0]) elif response.response_type == SearchResponseTypes.MANY: # find those with an exact match. Digikey uses a prefix search so a query for "FOO" will return "FOO" @@ -108,7 +109,7 @@ def search_parts(in_path: Path, out_path: Path, cache_dir: Path): response = parser.parse_string(client.search(dpn)) if response.response_type == SearchResponseTypes.SINGLE: - out_part = resolved(response.products[0]) + out_part = resolved(store.url, response.products[0]) else: di.stateProp = "many" -- cgit v1.2.3