aboutsummaryrefslogtreecommitdiff
path: root/src/ee/digikey/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/ee/digikey/__init__.py')
-rw-r--r--src/ee/digikey/__init__.py79
1 files changed, 55 insertions, 24 deletions
diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py
index 615d458..32308e5 100644
--- a/src/ee/digikey/__init__.py
+++ b/src/ee/digikey/__init__.py
@@ -6,13 +6,11 @@ import os.path
import re
import urllib.parse
from functools import total_ordering
+from pathlib import Path
from typing import List, Optional
-import requests
-from cachecontrol import CacheControl
-from cachecontrol.caches.file_cache import FileCache
-from cachecontrol.heuristics import ExpiresAfter
from lxml import html
+from selenium import webdriver
import ee._utils
from ee.tools import mk_parents
@@ -73,11 +71,11 @@ class Digikey(object):
@total_ordering
class DigikeyProduct(object):
- def __init__(self, part_number, mpn, url, attributes=None, categories=None):
+ def __init__(self, part_number, mpn, url, attributes: List["DigikeyAttributeValue"] = None, categories=None):
self.part_number = _clean(part_number)
self.mpn = _clean(mpn)
self.url = url
- self.attributes = attributes or []
+ self.attributes = attributes or [] # type: List["DigikeyAttributeValue"]
self.categories = categories or []
self.quantity_available = None
self.description = None
@@ -156,7 +154,7 @@ class DigikeyAttributeType(object):
class DigikeyAttributeValue(object):
- def __init__(self, value, attribute_type):
+ def __init__(self, value, attribute_type: DigikeyAttributeType):
self.value = value
self.attribute_type = attribute_type
@@ -171,8 +169,8 @@ class DigikeyProductCategory(object):
self.label = _clean(label)
self.digikey_url = digikey_url if digikey_url is None or digikey_url.startswith("http") else \
"https://www.digikey.com" + digikey_url
- self.parent = parent # type: DigikeyProductCategory
- self.subCategories = [] # type: List[DigikeyProductCategory]
+ self.parent: DigikeyProductCategory = parent
+ self.subCategories: List[DigikeyProductCategory] = []
assert self.id
assert self.label
@@ -203,7 +201,7 @@ class DigikeySearchResponse(object):
self.count = count
self.response_type = response_type
- self.products = list() # type: List[DigikeyProduct]
+ self.products: List[DigikeyProduct] = list()
def append(self, product: DigikeyProduct):
self.products.append(product)
@@ -213,18 +211,57 @@ class DigikeyClient(object):
def __nop(self, message):
pass
- def __init__(self, digikey: Digikey, cache_dir=None, on_download=None):
- self.digikey = digikey
+ def __init__(self, cache_dir: Path = None, on_download=None):
self.on_download = on_download or self.__nop
- cache = FileCache(cache_dir or 'digikey_cache', forever=True)
- self.sess = CacheControl(requests.Session(), cache=cache, heuristic=ExpiresAfter(days=10*365))
+ self.cache_dir = cache_dir or Path()
+ self.driver: webdriver.Chrome = None
- def _req(self, url, params=None):
+ def search(self, query: str, page_size=10) -> str:
+ return self.product_search(query, page_size)
+
+ def product_search(self, query: str, page_size=10) -> str:
+ params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)}
+ cache_key = urllib.parse.quote(query)
+ page = self._req("https://www.digikey.com/products/en", cache_key=cache_key, params=params)
+
+ return page
+
+ def _req(self, url, cache_key, params=None):
if not url.startswith("http"):
url = "https://www.digikey.com" + url
- s = "" if not params else "?" + urllib.parse.urlencode(params)
- self.on_download("Downloading {}".format(url + s))
- return self.sess.get(url, params=params)
+ url = url + ("" if not params else "?" + urllib.parse.urlencode(params))
+
+ cache_path: Optional[Path] = None
+ if self.cache_dir:
+ cache_path = self.cache_dir / "{}.html".format(cache_key)
+
+ if cache_path.exists():
+ self.on_download("Using cached {}".format(url))
+ with open(str(cache_path), "r") as f:
+ return f.read()
+
+ self.on_download("Downloading {}".format(url))
+
+ if self.driver is None:
+ options = webdriver.ChromeOptions()
+ self.driver = webdriver.Chrome(chrome_options=options)
+
+ self.driver.get(url)
+
+ src = self.driver.page_source
+ if cache_path:
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(str(cache_path), "w") as f:
+ f.write(src)
+ assert self.cache_dir.stat().st_size > 0
+
+ return src
+
+
+class DigikeyParser(object):
+ def __init__(self, digikey: Digikey):
+ self.digikey = digikey or Digikey()
def _search_process_single_result(self, tree: html) -> Optional[DigikeyProduct]:
attributes = []
@@ -300,12 +337,6 @@ class DigikeyClient(object):
return len(products)
- def search(self, query: str, page_size=10) -> DigikeySearchResponse:
- params = {'lang': 'en', 'site': 'us', 'keywords': query, 'pageSize': str(page_size)}
- page = self._req("https://www.digikey.com/products/en", params=params)
-
- return self.parse_string(page.content)
-
def parse_string(self, page_content: str):
tree = html.fromstring(page_content)