From 0fe32987bedeafee23c6051cb9d9bb6024a559a8 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Tue, 15 Oct 2019 12:46:48 +0200 Subject: digikey: Handling updated web site. --- src/ee/digikey/__init__.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/ee/digikey/__init__.py b/src/ee/digikey/__init__.py index e624db9..a318024 100644 --- a/src/ee/digikey/__init__.py +++ b/src/ee/digikey/__init__.py @@ -298,10 +298,18 @@ class DigikeyClient(object): self.on_download("Using cached {}".format(cache_key)) return cached + def find_country_select(): + # return self._find_first_visible(".//*[@id='evergage-tooltip']") + return self._find_first_visible(".//*[contains(@id, 'evergage-tooltip-')]") + def find_form(): return self._find_first_visible(".//input[@name='keywords']"), \ self._find_first_visible("//*[@id='header-search-button']") + country_select = find_country_select() + if country_select is not None: + country_select.click() + form_input, form_button = find_form() if not form_input or not form_button: self.driver.get(self.baseurl) @@ -484,19 +492,24 @@ class DigikeyParser(object): return docs - def _handle_product_table(self, origin_url, tree: html, res: DigikeySearchResponse): - products = tree.xpath("//*[@itemtype='http://schema.org/Product']") + def _handle_product_table(self, origin_url, product_table: html, res: DigikeySearchResponse): + products = product_table.xpath("./tbody/tr") for product in products: - url = _first((a.get("href") for a in product.xpath(".//*[@class='tr-image']//a[@href]"))) + url = _first((a.get("href") for a in product.xpath(".//*[contains(@class, 'tr-image')]//a[@href]"))) url = self.ensure_absolute_url(origin_url, url) - part_number = _first(product.xpath(".//*[@itemprop='productid' and @content]")) - mpn = _first(product.xpath(".//*[@itemprop='name']")) + part_number = _first(product.xpath(".//*[contains(@class, 'tr-dkPartNumber')]/*/text()")) + mpn = _first(product.xpath(".//*[contains(@class, 'tr-mfgPartNumber')]")) + + mpn = _to_string(mpn) + + part_number = _clean(part_number).replace('sku:', '') + mpn = _clean(mpn) if part_number is not None and mpn is not None: res.append(DigikeyProduct( - part_number.get("content").strip().replace('sku:', ''), - mpn.text, url)) + part_number, + mpn, url)) return len(products) -- cgit v1.2.3