aboutsummaryrefslogtreecommitdiff
path: root/src/ee/digikey
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2019-10-16 06:12:09 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2019-10-16 08:49:48 +0200
commit038152393727bfafc26f25e3e5c14e6f1219e07a (patch)
tree131c6ade35a4278eb40d787cb421bf74e4532026 /src/ee/digikey
parent73f151ec1da4ca78d4b9326f7efc81e0fad05159 (diff)
downloadee-python-038152393727bfafc26f25e3e5c14e6f1219e07a.tar.gz
ee-python-038152393727bfafc26f25e3e5c14e6f1219e07a.tar.bz2
ee-python-038152393727bfafc26f25e3e5c14e6f1219e07a.tar.xz
ee-python-038152393727bfafc26f25e3e5c14e6f1219e07a.zip
part: Adding category concept.
Useful for later classifiying components.
Diffstat (limited to 'src/ee/digikey')
-rw-r--r--src/ee/digikey/__init__.py57
-rw-r--r--src/ee/digikey/search_parts.py23
2 files changed, 45 insertions, 35 deletions
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py
index a318024..b993c31 100644
--- a/src/ee/digikey/__init__.py
+++ b/src/ee/digikey/__init__.py
@@ -16,6 +16,7 @@ from selenium import webdriver
import ee._utils
from ee import EeException
from ee.money import Money, get_default_context
+from ee.part import Category
from ee.tools import mk_parents
money = get_default_context()
@@ -135,12 +136,13 @@ class Document(object):
@total_ordering
class DigikeyProduct(object):
- def __init__(self, part_number, mpn, url, attributes: List["DigikeyAttributeValue"] = None, categories=None):
+ def __init__(self, part_number, mpn, url, attributes: List["DigikeyAttributeValue"] = None,
+ categories: List[Category] = None):
self.part_number = _clean(part_number)
self.mpn = _clean(mpn)
self.url = url
self.attributes = attributes or [] # type: List["DigikeyAttributeValue"]
- self.categories = categories or []
+ self.categories = categories or [] # type: List[Category]
self.quantity_available = None
self.description = None
self.price_breaks: List[PriceBreak] = []
@@ -158,6 +160,10 @@ class DigikeyProduct(object):
def __hash__(self):
return self.part_number.__hash__()
+ @property
+ def uri(self):
+ return "https://digikey.com/pn#{}".format(self.part_number)
+
def attribute_by_id(self, _id):
return next((a for a in self.attributes if a.attribute_type.id == _id), None)
@@ -228,33 +234,6 @@ class DigikeyAttributeValue(object):
assert self.attribute_type
-@total_ordering
-class DigikeyProductCategory(object):
- def __init__(self, _id, label, digikey_url=None, parent=None):
- self.id = _clean(_id)
- self.label = _clean(label)
- self.digikey_url = digikey_url if digikey_url is None or digikey_url.startswith("http") else \
- "https://www.digikey.com" + digikey_url
- self.parent: DigikeyProductCategory = parent
- self.subCategories: List[DigikeyProductCategory] = []
-
- assert self.id
- assert self.label
-
- def __eq__(self, other: "DigikeyProductCategory"):
- return self.id == other.id
-
- def __lt__(self, other: "DigikeyProductCategory") -> bool:
- return self.label < other.label
-
- def add_sub_category(self, _id, label, digikey_url):
- sc = DigikeyProductCategory(_id, label, digikey_url=digikey_url, parent=self)
- self.subCategories.append(sc)
-
- def find_sub_category_by_label(self, label):
- return next((sc for sc in self.subCategories if sc.label == label), None)
-
-
class SearchResponseTypes(enum.Enum):
MANY = 1 # A product table was returned.
SINGLE = 2 # A product page was returned
@@ -413,6 +392,24 @@ class DigikeyParser(object):
a_type = self.digikey.get_attribute_type(attribute_type_id, label)
attributes.append(DigikeyAttributeValue(value, a_type))
+ # links = tree.xpath("//table[@id='product-attribute-table']//*[class='attributes-td-categories-link']")
+ links = tree.xpath("//*[@class='attributes-td-categories-link']/a")
+ parent = None
+ for a in links:
+ name = _clean(a.text)
+ href = _clean(a.get("href"))
+ if not href:
+ continue
+ m = re.fullmatch(".*/([0-9]+)", href)
+ href = self.ensure_absolute_url(origin_url, href)
+
+ if m and name and href:
+ category_id = m.group(1)
+ uri = "https://digikey.com/category#{}".format(category_id)
+ c = Category(uri, name, href, parent)
+ parent = c
+ categories.append(c)
+
if part_number and mpn:
p = DigikeyProduct(part_number, mpn, url, attributes, categories)
p.price_breaks = self._parse_price_breaks(tree)
@@ -565,7 +562,7 @@ class DigikeyParser(object):
return DigikeySearchResponse(1, SearchResponseTypes.NO_MATCHES)
@staticmethod
- def ensure_absolute_url(origin_url, url):
+ def ensure_absolute_url(origin_url, url: Optional[str]):
if url is None:
return
diff --git a/src/ee/digikey/search_parts.py b/src/ee/digikey/search_parts.py
index eace4e3..1f1184a 100644
--- a/src/ee/digikey/search_parts.py
+++ b/src/ee/digikey/search_parts.py
@@ -1,11 +1,11 @@
import functools
from pathlib import Path
-from typing import List, MutableSet, Mapping
+from typing import List, MutableSet, Mapping, Set
from ee import EeException
from ee.db import ObjDb
from ee.digikey import Digikey, DigikeyParser, DigikeyClient, SearchResponseTypes, DigikeyProduct, DigikeyStore
-from ee.part import PartDb, load_db, save_db, Part
+from ee.part import PartDb, load_db, save_db, Part, Category
from ee.tools import mk_parents
from ee.xml import types, uris
from ee.xml.uris import make_digikey_fact_key
@@ -71,6 +71,7 @@ class QueryEngine(object):
out_parts.add_index("spn", lambda p: [pn.value for pn in p.get_spns()], multiple=True)
self.out_parts = out_parts
+ self.categories: Set[Category] = set()
def pn_search(self, pn):
return self.pn_spn_search(pn, False)
@@ -78,6 +79,9 @@ class QueryEngine(object):
def spn_search(self, pn):
return self.pn_spn_search(pn, True)
+ def _collect_categories(self, product: DigikeyProduct):
+ [self.categories.add(c) for c in product.categories]
+
def pn_spn_search(self, pn, is_spn):
s = "Searching for '{}'".format(pn)
print(s, file=self.log)
@@ -92,7 +96,9 @@ class QueryEngine(object):
if response.response_type == SearchResponseTypes.EXCEPTION:
result = "exception"
elif response.response_type == SearchResponseTypes.SINGLE:
- out_part = resolved(self.store.url, response.products[0])
+ product = response.products[0]
+ out_part = resolved(self.store.url, product)
+ self._collect_categories(product)
out_pn = out_part.get_exactly_one_spn() if is_spn else out_part.get_exactly_one_mpn()
out_pn = out_pn.valueProp
@@ -129,7 +135,9 @@ class QueryEngine(object):
page = self.client.get_for_product_url(part.url, part.part_number)
response = self.parser.parse_string(self.client.baseurl, page)
if response.response_type == SearchResponseTypes.SINGLE:
- out_part = resolved(self.store.url, response.products[0])
+ product = response.products[0]
+ out_part = resolved(self.store.url, product)
+ self._collect_categories(product)
result = "found"
else:
print("Unable to narrow down the part, got {} new products. Giving up.".format(
@@ -153,9 +161,10 @@ class QueryEngine(object):
def resolved(supplier, p: DigikeyProduct) -> Part:
# TODO: fix uri
- xml = types.Part(uri="https://digikey.com/pn#{}".format(p.part_number),
+ xml = types.Part(uri=p.uri,
supplier=supplier,
description=p.description,
+ category=types.CategoryList(),
links=types.LinkList(),
facts=types.FactList(),
references=types.ReferenceList())
@@ -164,6 +173,9 @@ def resolved(supplier, p: DigikeyProduct) -> Part:
if p.url:
part.get_links().append(types.Link(url=p.url, relation="canonical", media_type="text/html"))
+ if len(p.categories):
+ xml.set_category(p.categories[-1].uri)
+
for d in p.documents:
title = "{}: {}".format(d.section, d.title)
relations = ["http://purl.org/ee/link-relation#documentation"]
@@ -277,4 +289,5 @@ def run_search_parts(in_path: Path, out_path: Path, log, cache_dir: Path, store_
part_db = PartDb()
for part in engine.out_parts:
part_db.add_entry(part, True)
+ part_db.categories = engine.categories
save_db(out_path, part_db, sort=True)