diff options
Diffstat (limited to 'trygvis/eda/digikey')
-rw-r--r-- | trygvis/eda/digikey/__init__.py | 300 | ||||
-rw-r--r-- | trygvis/eda/digikey/__main__.py | 42 | ||||
-rw-r--r-- | trygvis/eda/digikey/rdf.py | 7 |
3 files changed, 349 insertions, 0 deletions
diff --git a/trygvis/eda/digikey/__init__.py b/trygvis/eda/digikey/__init__.py new file mode 100644 index 0000000..5f4ad8a --- /dev/null +++ b/trygvis/eda/digikey/__init__.py @@ -0,0 +1,300 @@ +import re + +import requests +from cachecontrol import CacheControl +from cachecontrol.caches.file_cache import FileCache +from cachecontrol.heuristics import ExpiresAfter +from lxml import html +from rdflib import Literal +from rdflib.namespace import RDF, RDFS + +import trygvis.eda.digikey.rdf + +def normalize_filename(part): + return part.replace('/', '_').replace(' ', '_') + +def _clean(s): + if s is None: + return None + s = s.strip() + return None if len(s) == 0 else s + + +class DigikeyDatabase(object): + def __init__(self): + self.productCategories = [] + self.attributeTypes = {} + + def add_product_category(self, pc): + self.productCategories.append(pc) + + def find_category(self, label): + return next((c for c in self.productCategories if c.label == label), None) + + def find_sub_category_by_url(self, url): + for p in self.productCategories: + for sc in p.subCategories: + if sc.url() == url: + return sc + return None + + def merge_attribute_types(self, attributeTypes): + for a in attributeTypes: + if a.id in self.attributeTypes: + # TODO: implement merging + continue + self.attributeTypes[a.id] = a + + def find_type(self, id): + return self.attributeTypes.get(id, None) + + +class DigikeyProductCategory(object): + def __init__(self, id, label, digikey_url=None, parent=None): + self.id = _clean(id) + self.label = _clean(label) + self.digikey_url = digikey_url if digikey_url is None or digikey_url.startswith("http") else \ + "http://www.digikey.com" + digikey_url + self.parent = parent + self.subCategories = [] + + assert self.id is not None + assert self.label is not None + + def add_sub_category(self, id, label, digikey_url): + sc = DigikeyProductCategory(id, label, digikey_url=digikey_url, parent=self) + self.subCategories.append(sc) + + def find_sub_category_by_label(self, label): + return next((sc for sc in self.subCategories if sc.label == label), None) + + def url(self): + return rdf.DIGIKEY_PRODUCT_CATEGORY[self.id] + + def to_nodes(self): + node = self.url() + nodes = [ + (node, RDF.type, rdf.DIGIKEY.productCategory), + (node, RDFS.label, Literal(self.label)), + ] + if self.parent is not None: + parentUrl = rdf.DIGIKEY_PRODUCT_CATEGORY[self.parent.id] + nodes.append((node, rdf.DIGIKEY.parent, parentUrl)) + if self.digikey_url is not None: + nodes.append((node, rdf.DIGIKEY.url, Literal(self.digikey_url))) + return nodes + + +class DigikeyAttributeType(object): + def __init__(self, category, id, label, options): + self.category = category + self.id = _clean(id) + self.label = _clean(label) + self.options = options + + assert self.category is not None + assert self.id is not None + assert self.label is not None + assert self.options is not None + + def to_nodes(self): + nodes = [] + node = rdf.DIGIKEY_ATTRIBUTE_TYPE[self.id] + nodes.append((node, RDF.type, rdf.DIGIKEY.attributeType)) + nodes.append((node, RDFS.label, Literal(self.label))) + + for o in self.options: + optionNode = rdf.DIGIKEY_ATTRIBUTE_VALUE[self.id + '-' + o.id] + nodes.extend([ + (optionNode, rdf.DIGIKEY.id, Literal(o.id)), + (optionNode, RDFS.label, Literal(o.label)), + (node, rdf.DIGIKEY.value, optionNode)]) + return nodes + + +class DigikeyAttributeValue(object): + def __init__(self, id, label, type=None, type_id=None, type_label=None): + self.id = _clean(id) + self.label = _clean(label) + self.type = type + self.type_id = type_id + self.type_label = type_label + + assert self.id is not None + assert self.label is not None + + +class DigikeyProduct(object): + def __init__(self, part_id, part_number, values, categories): + self.part_id = _clean(part_id) + self.part_number = _clean(part_number) + self.values = values + self.categories = categories + self.quantity_available = None + self.description = None + + assert self.part_id is not None + assert self.part_number is not None + + def to_nodes(self): + nodes = [] + node = rdf.DIGIKEY_PART[self.part_id] + nodes.append((node, RDF.type, rdf.DIGIKEY.part)) + nodes.append((node, rdf.DIGIKEY.partNumber, Literal(self.part_number))) + nodes.append((node, RDFS.label, Literal(self.description))) + for v in self.values: + typeLabel = v.type.label if v.type is not None else v.typeLabel + typeId = v.type.id if v.type is not None else v.typeId + nodes.append((node, rdf.DIGIKEY['attribute-value'], rdf.DIGIKEY_ATTRIBUTE_VALUE[typeId + '-' + v.id])) + + for c in self.categories: + nodes.append((node, rdf.DIGIKEY.category, c.url())) + return nodes + + +class DigikeyClient(object): + def __init__(self): + cache = FileCache('digikey_cache', forever=True) + self.sess = CacheControl(requests.Session(), cache=cache, heuristic=ExpiresAfter(days=1)) + + def req(self, url, params=None): + if not url.startswith("http://"): + url = "http://www.digikey.com" + url + return self.sess.get(url, params=params) + + +def _to_string(e): + s = "" + for t in e.itertext(): + s += t + return s.strip() + + +def _id_from_url(url): + if url is None: + return None + m = re.search(r".*/([0-9]+)", url) + return m.group(1) if m else None + + +def download_category_tree(database, client, baseurl="http://www.digikey.com/products/en"): + page = client.req(baseurl) + dom = html.fromstring(page.content) + + items = dom.xpath("//h2[contains(@class, 'catfiltertopitem')]") + for h2 in items: + label = _to_string(h2) + # print(h2) + pcId = None + for a in h2.getchildren(): + url = a.get('href') + pcId = _id_from_url(url) + if pcId is None: + continue + + if pcId is None: + continue + + pc = DigikeyProductCategory(pcId, label) + n = h2.getnext() + if n.tag == 'span': + n = n.getnext() + if n.tag == 'ul': + for a in n.xpath('./li/a'): + label = _to_string(a) + url = a.get('href') + id = _id_from_url(url) + if id is None: + continue + # print(' ' + toString(a) + ', id=' + str(id) + ', url=' + url) + pc.add_sub_category(id, label, url) + + database.add_product_category(pc) + + +def download_attribute_types_from_category(category, client): + page = client.req(category.digikey_url) + tree = html.fromstring(page.content) + + attributes = [] + for form in tree.xpath("//form[contains(@class, 'search-form')]"): + print('form: ' + str(form)) + headers = form.xpath(".//tr[@id='appliedFilterHeaderRow']/th/text()") + print("headers: " + str(headers)) + for select in form.xpath(".//td/select[contains(@class, 'filter-selectors')]"): + td = select.getparent() + index = td.getparent().index(td) + try: + attributeLabel = headers[index] + except: + continue + attributeId = select.get('name') + print("label: " + attributeLabel + ", id: " + attributeId) + options = [] + type = DigikeyAttributeType(category, attributeId, attributeLabel, options) + for o in select.xpath("./option"): + id = o.get('value') + label = _to_string(o) + # print("o: %s" % str(o)) + options.append(DigikeyAttributeValue(id, label, type=type)) + attributes.append(type) + + return attributes + + +def download_product(client, db, query): + # http://www.digikey.com/products/en?x=0&y=0&lang=en&site=us&keywords=553-2320-1-ND + page = client.req("http://www.digikey.com/products/en", params={'lang': 'en', 'site': 'us', 'keywords': query}) + tree = html.fromstring(page.content) + + values = [] + categories = [] + for table in tree.xpath("//table[contains(@class, 'attributes-table-main')]"): + label = None + id = None + for tr in table.xpath(".//tr"): + if tr.get("id") is not None: + continue + tds = tr.xpath("./th | ./td") + if len(tds) != 3: + continue + type_label = _to_string(tds[0]) + label = _to_string(tds[1]) + for input in tds[2].xpath("./input[@name]"): + typeId = input.get("name") + id = input.get("value") + else: + typeId = None + + if id is None or typeId is None: + continue + if typeId == "t": # categories are handled later + continue + values.append(DigikeyAttributeValue(id, label, type_id=typeId, type_label=type_label)) + + for td in table.xpath(".//td[@class='attributes-td-categories-link']"): + tr = td.getparent() + id = None + url = None + for a in td.xpath(".//a[@href]"): + url = a.get("href") + id = _id_from_url(url) + + for input in tr.xpath(".//input[@name='t' and @value]"): + categoryId = input.get("value") + + if id is None: + continue + categories.append(DigikeyProductCategory(id, label, digikey_url=url)) + + part_id = part_number = None + for n in tree.xpath("//input[@name='partid' and @value]"): + part_id = n.get("value") + for n in tree.xpath("//*[@itemprop='productID' and @content]"): + part_number = n.get("content") + part_number = part_number.replace('sku:', '') + + p = DigikeyProduct(part_id, part_number, values, categories) + for n in tree.xpath("//*[@itemprop='description']"): + p.description = _to_string(n) + return p diff --git a/trygvis/eda/digikey/__main__.py b/trygvis/eda/digikey/__main__.py new file mode 100644 index 0000000..ceb341e --- /dev/null +++ b/trygvis/eda/digikey/__main__.py @@ -0,0 +1,42 @@ +import argparse + +from .. import write_graph +from ..cli import * +from ..digikey import * + +parser = argparse.ArgumentParser() +subparsers = parser.add_subparsers(dest='cmd') # help='sub-command help' + +dct_parser = subparsers.add_parser("download-category-tree") +dct_parser.add_argument("-o", "--output", required=False) + +dp_parser = subparsers.add_parser("download-product") +dp_parser.add_argument("-p", "--product") +dp_parser.add_argument("-o", "--output", required=False) + +args = parser.parse_args() + +client = DigikeyClient() +db = DigikeyDatabase() + +if args.cmd == "download-category-tree": + download_category_tree(db, client) + if args.output is not None: + def make_graph(): + g = create_graph(digikey=True) + for pc in db.productCategories: + [g.add(node) for node in pc.to_nodes()] + + for sc in pc.subCategories: + [g.add(node) for node in sc.to_nodes()] + write_graph(make_graph, args.output) + +elif args.cmd == "download-product": + download_category_tree(db, client) + product = download_product(client, db, args.product) + + if args.output is not None: + def make_graph(): + g = create_graph(digikey=True) + [g.add(node) for node in product.to_nodes()] + write_graph(make_graph, args.output) diff --git a/trygvis/eda/digikey/rdf.py b/trygvis/eda/digikey/rdf.py new file mode 100644 index 0000000..5f1dede --- /dev/null +++ b/trygvis/eda/digikey/rdf.py @@ -0,0 +1,7 @@ +from rdflib import Namespace + +DIGIKEY = Namespace("https://trygvis.io/purl/digikey#") +DIGIKEY_ATTRIBUTE_TYPE = Namespace("https://trygvis.io/purl/digikey-attribute-type#") +DIGIKEY_ATTRIBUTE_VALUE = Namespace("https://trygvis.io/purl/digikey-attribute-value#") +DIGIKEY_PART = Namespace("https://trygvis.io/purl/digikey-part#") +DIGIKEY_PRODUCT_CATEGORY = Namespace("https://trygvis.io/purl/digikey-product-category#") |