From 3a90ab0dbf5826bc7476971cd163c9a080d2fb2f Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Fri, 8 Mar 2019 22:30:36 +0100 Subject: Digikey: extracting http caching into its own class. --- src/ee/_utils.py | 38 +++++++++++++++++++++++++++++++++++- src/ee/digikey/__init__.py | 19 +++++------------- src/ee/tools/digikey_search_parts.py | 2 +- 3 files changed, 43 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/ee/_utils.py b/src/ee/_utils.py index 29b039a..f917847 100644 --- a/src/ee/_utils.py +++ b/src/ee/_utils.py @@ -1,4 +1,5 @@ -from typing import List +from pathlib import Path +from typing import List, Optional def ensure_has_columns(df: "pandas.DataFrame", columns: List[str]): @@ -40,3 +41,38 @@ def all(filters): return True return f + + +class HttpCache(object): + def __init__(self, path: Path): + self.path = path + + def lookup(self, key): + cache_path = self._make_path(key) + + if cache_path.exists(): + with open(str(cache_path), "r") as f: + return f.read() + + def save(self, key, value): + cache_path = self._make_path(key) + cache_path.parent.mkdir(parents=True, exist_ok=True) + + with cache_path.open("w") as f: + f.write(value) + + def _make_path(self, key) -> Path: + return self.path / "{}.html".format(key) + + +class EmptyHttpCache(object): + @staticmethod + def lookup(key): + return None + + def save(self, key, value): + pass + + +def maybe_cache(path: Optional[Path]) -> HttpCache: + return HttpCache(path) if path is not None else EmptyHttpCache() diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index 32308e5..6baae84 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -213,7 +213,7 @@ class DigikeyClient(object): def __init__(self, cache_dir: Path = None, on_download=None): self.on_download = on_download or self.__nop - self.cache_dir = cache_dir or Path() + self.cache = ee._utils.maybe_cache(cache_dir) self.driver: webdriver.Chrome = None def search(self, query: str, page_size=10) -> str: @@ -231,14 +231,10 @@ class DigikeyClient(object): url = "https://www.digikey.com" + url url = url + ("" if not params else "?" + urllib.parse.urlencode(params)) - cache_path: Optional[Path] = None - if self.cache_dir: - cache_path = self.cache_dir / "{}.html".format(cache_key) - - if cache_path.exists(): + cached = self.cache.lookup(cache_key) + if cached: self.on_download("Using cached {}".format(url)) - with open(str(cache_path), "r") as f: - return f.read() + return cached self.on_download("Downloading {}".format(url)) @@ -249,12 +245,7 @@ class DigikeyClient(object): self.driver.get(url) src = self.driver.page_source - if cache_path: - cache_path.parent.mkdir(parents=True, exist_ok=True) - - with open(str(cache_path), "w") as f: - f.write(src) - assert self.cache_dir.stat().st_size > 0 + self.cache.save(cache_key, src) return src diff --git a/src/ee/tools/digikey_search_parts.py b/src/ee/tools/digikey_search_parts.py index 80393eb..07c3017 100644 --- a/src/ee/tools/digikey_search_parts.py +++ b/src/ee/tools/digikey_search_parts.py @@ -16,7 +16,7 @@ parser.add_argument("--out", args = parser.parse_args() -cache_dir = ".ee/cache" +cache_dir = ".ee/cache/digikey" force = True search_parts(Path(args.in_), Path(args.out), Path(cache_dir), force) -- cgit v1.2.3