diff --git a/donnee.csv b/donnee.csv new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py index a08da22..93b2da6 100644 --- a/main.py +++ b/main.py @@ -113,7 +113,7 @@ class _ScraperData: return None val = cast(str, app_dict.get("value")).rstrip("+").split("-") - if len(val) > 1: + if len(val) > 1 and val[1] != "": val[0] = str((int(val[0]) + int(val[1])) / 2) return val[0] @@ -301,23 +301,23 @@ class Scraper: def getvins(self, subdir: str): cache: set[str] = set[str]() + page = 0 - for page in range(1, 64): + while True: + page += 1 products_list = self._geturlproductslist(f"{subdir}?page={page}") + print(f"---- {page} ----") if not products_list: break - + for product in products_list: if not isinstance(product, dict): continue link = product.get("seoKeyword") - if not link: - continue - - if link not in cache: + if link and link not in cache: try: infos = self.getjsondata(link).informations() print(infos) @@ -326,4 +326,4 @@ class Scraper: print(f"Erreur sur le produit {link}: {e}") -print(Scraper().getvins("bordeaux.html")) +# Scraper().getvins("bordeaux.html") diff --git a/test.json b/test.json deleted file mode 100644 index df6f179..0000000 --- a/test.json +++ /dev/null @@ -1,27 +0,0 @@ -Haut-Médoc,None,None,90,16.1 -Haut-Médoc,90.0,16,None,23.2 -Haut-Médoc,None,None,None,14.2 -Saint-Estèphe,93,17,96,59.0 -Pessac-Léognan,90,16.5,94,36.0 -Pessac-Léognan,89,16.5,95,39.2 -Haut-Médoc,89.0,15,92,26.8 -Haut-Médoc,92,16.5,93,65.4 -Margaux,92,16,93,64.3 -Moulis,92,15.5,93,41.4 -Haut-Médoc,None,None,None,15.2 -Pauillac,97,18,98,298.0 -Saint-Emilion,95,15,96,106.4 -Haut-Médoc,92,16,95,32.7 -Pomerol,88,16,96,92.7 -Pessac-Léognan,90,16.5,93,33.1 -Haut-Médoc,89,14.5,None,18.8 -Haut-Médoc,93.0,16,94,56.3 -Pessac-Léognan,86,None,92,31.6 -Haut-Médoc,89,16,92,25.1 -Haut-Médoc,92,16,94.5,68.7 -Saint-Estèphe,91.0,None,91.5,27.3 -Pomerol,94,16.5,95,97.5 -Margaux,93,16,95,54.5 -Pessac-Léognan,93,16.5,93,46.9 -Saint-Estèphe,92,16,96,50.1 -Pessac-Léognan,93,16.5,94,60.9