diff options
Diffstat (limited to 'src/ee/digikey/__init__.py')
-rw-r--r-- | src/ee/digikey/__init__.py | 79 |
1 files changed, 55 insertions, 24 deletions
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index 615d458..32308e5 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -6,13 +6,11 @@ import os.path import re import urllib.parse from functools import total_ordering +from pathlib import Path from typing import List, Optional -import requests -from cachecontrol import CacheControl -from cachecontrol.caches.file_cache import FileCache -from cachecontrol.heuristics import ExpiresAfter from lxml import html +from selenium import webdriver import ee._utils from ee.tools import mk_parents @@ -73,11 +71,11 @@ class Digikey(object): @total_ordering class DigikeyProduct(object): - def __init__(self, part_number, mpn, url, attributes=None, categories=None): + def __init__(self, part_number, mpn, url, attributes: List["DigikeyAttributeValue"] = None, categories=None): self.part_number = _clean(part_number) self.mpn = _clean(mpn) self.url = url - self.attributes = attributes or [] + self.attributes = attributes or [] # type: List["DigikeyAttributeValue"] self.categories = categories or [] self.quantity_available = None self.description = None @@ -156,7 +154,7 @@ class DigikeyAttributeType(object): class DigikeyAttributeValue(object): - def __init__(self, value, attribute_type): + def __init__(self, value, attribute_type: DigikeyAttributeType): self.value = value self.attribute_type = attribute_type @@ -171,8 +169,8 @@ class DigikeyProductCategory(object): self.label = _clean(label) self.digikey_url = digikey_url if digikey_url is None or digikey_url.startswith("http") else \ "https://www.digikey.com" + digikey_url - self.parent = parent # type: DigikeyProductCategory - self.subCategories = [] # type: List[DigikeyProductCategory] + self.parent: DigikeyProductCategory = parent + self.subCategories: List[DigikeyProductCategory] = [] assert self.id assert self.label @@ -203,7 +201,7 @@ class DigikeySearchResponse(object): self.count = count self.response_type = response_type - self.products = list() # type: List[DigikeyProduct] + self.products: List[DigikeyProduct] = list() def append(self, product: DigikeyProduct): self.products.append(product) @@ -213,18 +211,57 @@ class DigikeyClient(object): def __nop(self, message): pass - def __init__(self, digikey: Digikey, cache_dir=None, on_download=None): - self.digikey = digikey + def __init__(self, cache_dir: Path = None, on_download=None): self.on_download = on_download or self.__nop - cache = FileCache(cache_dir or 'digikey_cache', forever=True) - self.sess = CacheControl(requests.Session(), cache=cache, heuristic=ExpiresAfter(days=10*365)) + self.cache_dir = cache_dir or Path() + self.driver: webdriver.Chrome = None - def _req(self, url, params=None): + def search(self, query: str, page_size=10) -> str: + return self.product_search(query, page_size) + + def product_search(self, query: str, page_size=10) -> str: + params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)} + cache_key = urllib.parse.quote(query) + page = self._req("https://www.digikey.com/products/en", cache_key=cache_key, params=params) + + return page + + def _req(self, url, cache_key, params=None): if not url.startswith("http"): url = "https://www.digikey.com" + url - s = "" if not params else "?" + urllib.parse.urlencode(params) - self.on_download("Downloading {}".format(url + s)) - return self.sess.get(url, params=params) + url = url + ("" if not params else "?" + urllib.parse.urlencode(params)) + + cache_path: Optional[Path] = None + if self.cache_dir: + cache_path = self.cache_dir / "{}.html".format(cache_key) + + if cache_path.exists(): + self.on_download("Using cached {}".format(url)) + with open(str(cache_path), "r") as f: + return f.read() + + self.on_download("Downloading {}".format(url)) + + if self.driver is None: + options = webdriver.ChromeOptions() + self.driver = webdriver.Chrome(chrome_options=options) + + self.driver.get(url) + + src = self.driver.page_source + if cache_path: + cache_path.parent.mkdir(parents=True, exist_ok=True) + + with open(str(cache_path), "w") as f: + f.write(src) + assert self.cache_dir.stat().st_size > 0 + + return src + + +class DigikeyParser(object): + def __init__(self, digikey: Digikey): + self.digikey = digikey or Digikey() def _search_process_single_result(self, tree: html) -> Optional[DigikeyProduct]: attributes = [] @@ -300,12 +337,6 @@ class DigikeyClient(object): return len(products) - def search(self, query: str, page_size=10) -> DigikeySearchResponse: - params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)} - page = self._req("https://www.digikey.com/products/en", params=params) - - return self.parse_string(page.content) - def parse_string(self, page_content: str): tree = html.fromstring(page_content) |