aboutsummaryrefslogtreecommitdiff
path: root/trygvis/eda/digikey/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'trygvis/eda/digikey/__init__.py')
-rw-r--r--trygvis/eda/digikey/__init__.py300
1 files changed, 300 insertions, 0 deletions
diff --git a/trygvis/eda/digikey/__init__.py b/trygvis/eda/digikey/__init__.py
new file mode 100644
index 0000000..5f4ad8a
--- /dev/null
+++ b/trygvis/eda/digikey/__init__.py
@@ -0,0 +1,300 @@
+import re
+
+import requests
+from cachecontrol import CacheControl
+from cachecontrol.caches.file_cache import FileCache
+from cachecontrol.heuristics import ExpiresAfter
+from lxml import html
+from rdflib import Literal
+from rdflib.namespace import RDF, RDFS
+
+import trygvis.eda.digikey.rdf
+
+def normalize_filename(part):
+ return part.replace('/', '_').replace(' ', '_')
+
+def _clean(s):
+ if s is None:
+ return None
+ s = s.strip()
+ return None if len(s) == 0 else s
+
+
+class DigikeyDatabase(object):
+ def __init__(self):
+ self.productCategories = []
+ self.attributeTypes = {}
+
+ def add_product_category(self, pc):
+ self.productCategories.append(pc)
+
+ def find_category(self, label):
+ return next((c for c in self.productCategories if c.label == label), None)
+
+ def find_sub_category_by_url(self, url):
+ for p in self.productCategories:
+ for sc in p.subCategories:
+ if sc.url() == url:
+ return sc
+ return None
+
+ def merge_attribute_types(self, attributeTypes):
+ for a in attributeTypes:
+ if a.id in self.attributeTypes:
+ # TODO: implement merging
+ continue
+ self.attributeTypes[a.id] = a
+
+ def find_type(self, id):
+ return self.attributeTypes.get(id, None)
+
+
+class DigikeyProductCategory(object):
+ def __init__(self, id, label, digikey_url=None, parent=None):
+ self.id = _clean(id)
+ self.label = _clean(label)
+ self.digikey_url = digikey_url if digikey_url is None or digikey_url.startswith("http") else \
+ "http://www.digikey.com" + digikey_url
+ self.parent = parent
+ self.subCategories = []
+
+ assert self.id is not None
+ assert self.label is not None
+
+ def add_sub_category(self, id, label, digikey_url):
+ sc = DigikeyProductCategory(id, label, digikey_url=digikey_url, parent=self)
+ self.subCategories.append(sc)
+
+ def find_sub_category_by_label(self, label):
+ return next((sc for sc in self.subCategories if sc.label == label), None)
+
+ def url(self):
+ return rdf.DIGIKEY_PRODUCT_CATEGORY[self.id]
+
+ def to_nodes(self):
+ node = self.url()
+ nodes = [
+ (node, RDF.type, rdf.DIGIKEY.productCategory),
+ (node, RDFS.label, Literal(self.label)),
+ ]
+ if self.parent is not None:
+ parentUrl = rdf.DIGIKEY_PRODUCT_CATEGORY[self.parent.id]
+ nodes.append((node, rdf.DIGIKEY.parent, parentUrl))
+ if self.digikey_url is not None:
+ nodes.append((node, rdf.DIGIKEY.url, Literal(self.digikey_url)))
+ return nodes
+
+
+class DigikeyAttributeType(object):
+ def __init__(self, category, id, label, options):
+ self.category = category
+ self.id = _clean(id)
+ self.label = _clean(label)
+ self.options = options
+
+ assert self.category is not None
+ assert self.id is not None
+ assert self.label is not None
+ assert self.options is not None
+
+ def to_nodes(self):
+ nodes = []
+ node = rdf.DIGIKEY_ATTRIBUTE_TYPE[self.id]
+ nodes.append((node, RDF.type, rdf.DIGIKEY.attributeType))
+ nodes.append((node, RDFS.label, Literal(self.label)))
+
+ for o in self.options:
+ optionNode = rdf.DIGIKEY_ATTRIBUTE_VALUE[self.id + '-' + o.id]
+ nodes.extend([
+ (optionNode, rdf.DIGIKEY.id, Literal(o.id)),
+ (optionNode, RDFS.label, Literal(o.label)),
+ (node, rdf.DIGIKEY.value, optionNode)])
+ return nodes
+
+
+class DigikeyAttributeValue(object):
+ def __init__(self, id, label, type=None, type_id=None, type_label=None):
+ self.id = _clean(id)
+ self.label = _clean(label)
+ self.type = type
+ self.type_id = type_id
+ self.type_label = type_label
+
+ assert self.id is not None
+ assert self.label is not None
+
+
+class DigikeyProduct(object):
+ def __init__(self, part_id, part_number, values, categories):
+ self.part_id = _clean(part_id)
+ self.part_number = _clean(part_number)
+ self.values = values
+ self.categories = categories
+ self.quantity_available = None
+ self.description = None
+
+ assert self.part_id is not None
+ assert self.part_number is not None
+
+ def to_nodes(self):
+ nodes = []
+ node = rdf.DIGIKEY_PART[self.part_id]
+ nodes.append((node, RDF.type, rdf.DIGIKEY.part))
+ nodes.append((node, rdf.DIGIKEY.partNumber, Literal(self.part_number)))
+ nodes.append((node, RDFS.label, Literal(self.description)))
+ for v in self.values:
+ typeLabel = v.type.label if v.type is not None else v.typeLabel
+ typeId = v.type.id if v.type is not None else v.typeId
+ nodes.append((node, rdf.DIGIKEY['attribute-value'], rdf.DIGIKEY_ATTRIBUTE_VALUE[typeId + '-' + v.id]))
+
+ for c in self.categories:
+ nodes.append((node, rdf.DIGIKEY.category, c.url()))
+ return nodes
+
+
+class DigikeyClient(object):
+ def __init__(self):
+ cache = FileCache('digikey_cache', forever=True)
+ self.sess = CacheControl(requests.Session(), cache=cache, heuristic=ExpiresAfter(days=1))
+
+ def req(self, url, params=None):
+ if not url.startswith("http://"):
+ url = "http://www.digikey.com" + url
+ return self.sess.get(url, params=params)
+
+
+def _to_string(e):
+ s = ""
+ for t in e.itertext():
+ s += t
+ return s.strip()
+
+
+def _id_from_url(url):
+ if url is None:
+ return None
+ m = re.search(r".*/([0-9]+)", url)
+ return m.group(1) if m else None
+
+
+def download_category_tree(database, client, baseurl="http://www.digikey.com/products/en"):
+ page = client.req(baseurl)
+ dom = html.fromstring(page.content)
+
+ items = dom.xpath("//h2[contains(@class, 'catfiltertopitem')]")
+ for h2 in items:
+ label = _to_string(h2)
+ # print(h2)
+ pcId = None
+ for a in h2.getchildren():
+ url = a.get('href')
+ pcId = _id_from_url(url)
+ if pcId is None:
+ continue
+
+ if pcId is None:
+ continue
+
+ pc = DigikeyProductCategory(pcId, label)
+ n = h2.getnext()
+ if n.tag == 'span':
+ n = n.getnext()
+ if n.tag == 'ul':
+ for a in n.xpath('./li/a'):
+ label = _to_string(a)
+ url = a.get('href')
+ id = _id_from_url(url)
+ if id is None:
+ continue
+ # print(' ' + toString(a) + ', id=' + str(id) + ', url=' + url)
+ pc.add_sub_category(id, label, url)
+
+ database.add_product_category(pc)
+
+
+def download_attribute_types_from_category(category, client):
+ page = client.req(category.digikey_url)
+ tree = html.fromstring(page.content)
+
+ attributes = []
+ for form in tree.xpath("//form[contains(@class, 'search-form')]"):
+ print('form: ' + str(form))
+ headers = form.xpath(".//tr[@id='appliedFilterHeaderRow']/th/text()")
+ print("headers: " + str(headers))
+ for select in form.xpath(".//td/select[contains(@class, 'filter-selectors')]"):
+ td = select.getparent()
+ index = td.getparent().index(td)
+ try:
+ attributeLabel = headers[index]
+ except:
+ continue
+ attributeId = select.get('name')
+ print("label: " + attributeLabel + ", id: " + attributeId)
+ options = []
+ type = DigikeyAttributeType(category, attributeId, attributeLabel, options)
+ for o in select.xpath("./option"):
+ id = o.get('value')
+ label = _to_string(o)
+ # print("o: %s" % str(o))
+ options.append(DigikeyAttributeValue(id, label, type=type))
+ attributes.append(type)
+
+ return attributes
+
+
+def download_product(client, db, query):
+ # http://www.digikey.com/products/en?x=0&y=0&lang=en&site=us&keywords=553-2320-1-ND
+ page = client.req("http://www.digikey.com/products/en", params={'lang': 'en', 'site': 'us', 'keywords': query})
+ tree = html.fromstring(page.content)
+
+ values = []
+ categories = []
+ for table in tree.xpath("//table[contains(@class, 'attributes-table-main')]"):
+ label = None
+ id = None
+ for tr in table.xpath(".//tr"):
+ if tr.get("id") is not None:
+ continue
+ tds = tr.xpath("./th | ./td")
+ if len(tds) != 3:
+ continue
+ type_label = _to_string(tds[0])
+ label = _to_string(tds[1])
+ for input in tds[2].xpath("./input[@name]"):
+ typeId = input.get("name")
+ id = input.get("value")
+ else:
+ typeId = None
+
+ if id is None or typeId is None:
+ continue
+ if typeId == "t": # categories are handled later
+ continue
+ values.append(DigikeyAttributeValue(id, label, type_id=typeId, type_label=type_label))
+
+ for td in table.xpath(".//td[@class='attributes-td-categories-link']"):
+ tr = td.getparent()
+ id = None
+ url = None
+ for a in td.xpath(".//a[@href]"):
+ url = a.get("href")
+ id = _id_from_url(url)
+
+ for input in tr.xpath(".//input[@name='t' and @value]"):
+ categoryId = input.get("value")
+
+ if id is None:
+ continue
+ categories.append(DigikeyProductCategory(id, label, digikey_url=url))
+
+ part_id = part_number = None
+ for n in tree.xpath("//input[@name='partid' and @value]"):
+ part_id = n.get("value")
+ for n in tree.xpath("//*[@itemprop='productID' and @content]"):
+ part_number = n.get("content")
+ part_number = part_number.replace('sku:', '')
+
+ p = DigikeyProduct(part_id, part_number, values, categories)
+ for n in tree.xpath("//*[@itemprop='description']"):
+ p.description = _to_string(n)
+ return p