From 8e642991d557bd902b749ddd84e41d65c48f79cf Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Wed, 20 Mar 2019 15:30:05 +0100 Subject: o Adding OPL downloader from Seeed studio. o Adding to --- requirements.txt | 1 + src/ee/_utils.py | 9 ++- src/ee/supplier/seeed.py | 151 +++++++++++++++++++++++++++++++++++++ src/ee/tools/seeed_download_opl.py | 16 ++++ src/ee/xml/types.py | 16 +++- xsd/ee.xsd | 1 + 6 files changed, 189 insertions(+), 5 deletions(-) create mode 100644 src/ee/supplier/seeed.py create mode 100644 src/ee/tools/seeed_download_opl.py diff --git a/requirements.txt b/requirements.txt index ec2c2f6..a389f3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,6 +17,7 @@ tox==3.1.2 selenium==3.141.0 generateds==2.30.11 +requests==2.21.0 # for development jupyter==1.0.0 diff --git a/src/ee/_utils.py b/src/ee/_utils.py index f917847..08e75fa 100644 --- a/src/ee/_utils.py +++ b/src/ee/_utils.py @@ -44,8 +44,9 @@ def all(filters): class HttpCache(object): - def __init__(self, path: Path): + def __init__(self, path: Path, ext="html"): self.path = path + self.ext = ext def lookup(self, key): cache_path = self._make_path(key) @@ -62,7 +63,7 @@ class HttpCache(object): f.write(value) def _make_path(self, key) -> Path: - return self.path / "{}.html".format(key) + return self.path / "{}.{}".format(key, self.ext) class EmptyHttpCache(object): @@ -74,5 +75,5 @@ class EmptyHttpCache(object): pass -def maybe_cache(path: Optional[Path]) -> HttpCache: - return HttpCache(path) if path is not None else EmptyHttpCache() +def maybe_cache(path: Optional[Path], **kwargs) -> HttpCache: + return HttpCache(path, **kwargs) if path is not None else EmptyHttpCache() diff --git a/src/ee/supplier/seeed.py b/src/ee/supplier/seeed.py new file mode 100644 index 0000000..2df2068 --- /dev/null +++ b/src/ee/supplier/seeed.py @@ -0,0 +1,151 @@ +import binascii +import json +import re +from pathlib import Path +from typing import Optional +from urllib.parse import urlencode + +import requests +from selenium import webdriver + +import ee._utils +from ee.part import PartDb, save_db +from ee.xml import types + +_title_re = re.compile(r"^([^(]*)\( *[0-9]* *\) *") + +__all__ = [ + "SeeedClient", + "download_opls", +] + + +class SeeedClient(object): + def __init__(self, cache_dir: Path = None): + self.cache = ee._utils.maybe_cache(cache_dir) + self.driver: Optional[webdriver.Chrome] = None + + def get(self, url: str): + cache_key = abs(binascii.crc32(url.encode("utf-8"))) + cached = self.cache.lookup(cache_key) + if cached: + return cached + + if self.driver is None: + options = webdriver.ChromeOptions() + self.driver = webdriver.Chrome(chrome_options=options) + + self.driver.get(url) + + src = self.driver.page_source + self.cache.save(cache_key, src) + + return src + + +def _checksum_opl(tree): + for table in tree.xpath("//*[contains(@class, 'fusion-opl-list')]"): + rows = table.xpath(".//tr[contains(@class, 'f12')]") + for r in rows: + print(r) + break + + +def _get(obj, key): + value = obj.get(key, None) + if value is None: + return "" + return value.strip() + + +def download_opls(out_dir: Path, cache_dir: Path): + opls_types = ["SEEED", "HQCHIP"] + + s = requests.Session() + + from ee.money import get_default_context + money = get_default_context() + + for opls_type in opls_types: + page_offset = 1 + page_length = 30 + count = 0 + + cd = cache_dir / opls_type if cache_dir else cache_dir + cache = ee._utils.maybe_cache(cd, ext="json") + + db = PartDb() + + while True: + cache_key = "{}-{}".format(opls_type, page_offset) + content = cache.lookup(cache_key) + if content is None: + form = { + "page_offset": page_offset, + "page_length": page_length, + "keyword": "", + "category": "", + "type": opls_type, + } + query = { + "guid": "A8D502008EB2E4FD3FE9CE5E0855E8E4", + "appid": "en.pc.bazaar", + } + r = s.get("https://sapi.seeedstudio.com/fusion/opl/list", params=query, data=form) + content = r.text + cache.save(cache_key, content) + + obj = json.loads(content) + supplier_uri = "http://purl.org/ee/supplier/seeed?{}".format(urlencode({"opl": opls_type})) + + parts = obj["data"]["list"] + + for p in parts: + # print(json.dumps(p, indent=4)) + mpn = _get(p, "mpn") + sku = _get(p, "sku") + datasheet = _get(p, "datasheet") + desc = _get(p, "desc") + package = _get(p, "package") + + if mpn is None and sku is None: + continue + + ladder_price = p["ladder_price"] + + part = types.Part(references=types.ReferencesList(), price_breaks=types.PriceBreakList()) + part_numbers = part.referencesProp.part_numberProp + supplier_part_numbers = part.referencesProp.supplier_part_numberProp + pbs = part.price_breaksProp.price_breakProp + + if desc: + part.descriptionProp = desc + + uri_params = { + "opl": opls_type, + } + if sku: + uri_params["sku"] = sku + if mpn: + uri_params["mpn"] = mpn + part.uriProp = "http://purl.org/ee/supplier/seeed?{}".format(urlencode(uri_params)) + + if mpn: + part_numbers.append(types.PartNumber(mpn)) + if sku: + supplier_part_numbers.append(types.SupplierPartNumber(value=mpn, supplier=supplier_uri)) + + for item in ladder_price: + price = money.parse(item["price"], currency="USD") + amount = types.Amount(value=price.amount, currency=price.currency) + pbs.append(types.PriceBreak(quantity=item["qty"], amount=amount)) + + db.add_entry(part, True) + + page_offset += 1 + count += len(parts) + if len(parts) != page_length: + break + + print("Imported {} parts from Seeed's {} library".format(count, opls_type)) + save_db(out_dir / "{}.xml".format(opls_type), db, sort=True) diff --git a/src/ee/tools/seeed_download_opl.py b/src/ee/tools/seeed_download_opl.py new file mode 100644 index 0000000..e53361f --- /dev/null +++ b/src/ee/tools/seeed_download_opl.py @@ -0,0 +1,16 @@ +import argparse +from pathlib import Path + +from ee.supplier import seeed + +parser = argparse.ArgumentParser() + +parser.add_argument("--out-dir", + required=True, + metavar="DIR FOR PART DBS") + +args = parser.parse_args() + +cache_dir = ".ee/cache/seeed" + +seeed.download_opls(Path(args.out_dir), Path(cache_dir)) diff --git a/src/ee/xml/types.py b/src/ee/xml/types.py index 35cd664..a1ab7ec 100644 --- a/src/ee/xml/types.py +++ b/src/ee/xml/types.py @@ -807,11 +807,12 @@ class PartDb(GeneratedsSuper): class Part(GeneratedsSuper): subclass = None superclass = None - def __init__(self, uri=None, part_type=None, references=None, distributor_info=None, facts=None, price_breaks=None, **kwargs_): + def __init__(self, uri=None, part_type=None, description=None, references=None, distributor_info=None, facts=None, price_breaks=None, **kwargs_): self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.uri = _cast(None, uri) self.part_type = part_type + self.description = description self.references = references self.distributor_info = distributor_info self.facts = facts @@ -832,6 +833,11 @@ class Part(GeneratedsSuper): def set_part_type(self, part_type): self.part_type = part_type part_typeProp = property(get_part_type, set_part_type) + def get_description(self): + return self.description + def set_description(self, description): + self.description = description + descriptionProp = property(get_description, set_description) def get_references(self): return self.references def set_references(self, references): @@ -860,6 +866,7 @@ class Part(GeneratedsSuper): def hasContent_(self): if ( self.part_type is not None or + self.description is not None or self.references is not None or self.distributor_info is not None or self.facts is not None or @@ -901,6 +908,9 @@ class Part(GeneratedsSuper): if self.part_type is not None: showIndent(outfile, level, pretty_print) outfile.write('<%spart-type>%s%s' % (namespaceprefix_ , self.gds_encode(self.gds_format_string(quote_xml(self.part_type), input_name='part-type')), namespaceprefix_ , eol_)) + if self.description is not None: + showIndent(outfile, level, pretty_print) + outfile.write('<%sdescription>%s%s' % (namespaceprefix_ , self.gds_encode(self.gds_format_string(quote_xml(self.description), input_name='description')), namespaceprefix_ , eol_)) if self.references is not None: self.references.export(outfile, level, namespaceprefix_, namespacedef_='', name_='references', pretty_print=pretty_print) if self.distributor_info is not None: @@ -926,6 +936,10 @@ class Part(GeneratedsSuper): part_type_ = child_.text part_type_ = self.gds_validate_string(part_type_, node, 'part_type') self.part_type = part_type_ + elif nodeName_ == 'description': + description_ = child_.text + description_ = self.gds_validate_string(description_, node, 'description') + self.description = description_ elif nodeName_ == 'references': obj_ = ReferencesList.factory(parent_object_=self) obj_.build(child_) diff --git a/xsd/ee.xsd b/xsd/ee.xsd index 64d0f11..18d8665 100644 --- a/xsd/ee.xsd +++ b/xsd/ee.xsd @@ -46,6 +46,7 @@ TODO: rename 'id' to 'url'. + -- cgit v1.2.3