From bfc39db652b40f9428431c1bbeb329ba21d2359c Mon Sep 17 00:00:00 2001 From: Chahrazad650 Date: Tue, 10 Feb 2026 01:25:00 +0100 Subject: [PATCH 1/8] ajout de la fonction informations --- main.py | 15 +++++++++++++++ test_main.py | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/main.py b/main.py index 5de32eb..a0a06c0 100644 --- a/main.py +++ b/main.py @@ -171,6 +171,21 @@ class _ScraperData: def getdata(self) -> dict[str, object]: return self._data + def informations(self) -> str: + """ + Retourne toutes les informations sous la forme : + "Appelation,Parker,J.Robinson,J.Suckling,Prix" + """ + + appellation = self.appellation() + parker = self.parker() + robinson = self.robinson() + suckling = self.suckling() + prix = self.prix() + + return f"{appellation},{parker},{robinson},{suckling},{prix}" + + class Scraper: """ diff --git a/test_main.py b/test_main.py index 162db92..be5f703 100644 --- a/test_main.py +++ b/test_main.py @@ -240,3 +240,7 @@ def test_prix(scraper: Scraper): assert contenu.prix() == 65.0 +def test_informations(scraper: Scraper): + contenu = scraper.getjsondata("nino-negri-5-stelle-sfursat-2022.html") + assert contenu.informations() == "Sforzato di Valtellina,91,17,93.5,65.0" + From 9f1ff1ef7b7c8063d2aceac4c9c17c41f4361423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Tue, 10 Feb 2026 19:01:10 +0100 Subject: [PATCH 2/8] ajout(main.py): initialise la fonction getvin --- main.py | 72 ++++++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/main.py b/main.py index a0a06c0..7c4a9d3 100644 --- a/main.py +++ b/main.py @@ -37,62 +37,44 @@ class _ScraperData: def prix(self) -> float: """ - Retourne le prix unitaire d'une bouteille (75cl). + Retourne le prix unitaire d'une bouteille (75cl). - Le JSON contient plusieurs formats de vente dans content["items"] : - - bouteille seule : nbunit = 1 et equivbtl = 1 -> prix direct - - caisse de plusieurs bouteilles : nbunit > 1 -> on divise le prix total - - formats spéciaux (magnum etc.) : equivbtl > 1 -> même calcul + Le JSON contient plusieurs formats de vente dans content["items"] : + - bouteille seule : nbunit = 1 et equivbtl = 1 -> prix direct + - caisse de plusieurs bouteilles : nbunit > 1 -> on divise le prix total + - formats spéciaux (magnum etc.) : equivbtl > 1 -> même calcul - Formule générale : - prix_unitaire = offerPrice / (nbunit * equivbtl) + Formule générale : + prix_unitaire = offerPrice / (nbunit * equivbtl) + """ - """ + content = self._getcontent() - content = self._getcontent() - - # si content n'existe pas -> erreur if content is None: raise ValueError("Contenu introuvable") - # On récupère la liste des formats disponibles (bouteille, carton...) items = content.get("items") - # Vérification que items est bien une liste non vide if not isinstance(items, list) or len(items) == 0: raise ValueError("Aucun prix disponible (items vide)") - # -------------------------- - # CAS 1 : bouteille unitaire - # -------------------------- - # On cherche un format où nbunit=1 et equivbtl=1 ->bouteille standard 75cl for item in items: - + if not isinstance(item, dict): continue - # On récupère les attributs du format attrs = item.get("attributes", {}) - # On récupère nbunit et equivbtl nbunit = attrs.get("nbunit", {}).get("value") equivbtl = attrs.get("equivbtl", {}).get("value") - # Si c'est une bouteille unitaire if nbunit == "1" and equivbtl == "1": - p = item.get("offerPrice") - # Vérification que c'est bien un nombre if isinstance(p, (int, float)): return float(p) - # -------------------------- - # CAS 2 : caisse ou autre format - # -------------------------- - # On calcule le prix unitaire à partir du prix total for item in items: - if not isinstance(item, dict): continue @@ -102,22 +84,12 @@ class _ScraperData: nbunit = attrs.get("nbunit", {}).get("value") equivbtl = attrs.get("equivbtl", {}).get("value") - # Vérification que toutes les valeurs existent if isinstance(p, (int, float)) and nbunit and equivbtl: - - # Calcul du nombre total de bouteilles équivalentes denom = float(nbunit) * float(equivbtl) - # Évite division par zéro if denom > 0: - - # Calcul du prix unitaire prix_unitaire = float(p) / denom - - # Arrondi à 2 décimales return round(prix_unitaire, 2) - - # Si aucun prix trouvé raise ValueError("Impossible de trouver le prix unitaire.") def appellation(self) -> str | None: @@ -186,7 +158,6 @@ class _ScraperData: return f"{appellation},{parker},{robinson},{suckling},{prix}" - class Scraper: """ Scraper est une classe qui permet de gerer @@ -322,3 +293,26 @@ class Scraper: return _ScraperData(cast(dict[str, object], current_data)) + +def getvins(subdir: str, n: int) -> None: + """_summary_ + + Args: + subdir (str): _description_ + n (int): nombre de page recherché + """ + scraper: Scraper = Scraper() + for i in range(1, n+1): + j = 0 + while True: + try: + var = scraper.getjsondata(subdir=f"{subdir}?page={i}").getdata()["initialReduxState"]["categ"]["content"]["products"][j]["seoKeyword"] + print(scraper.getjsondata(var).informations()) + j+=1 + except: + break + + print(f"--- fin {i}e page ---") +# https://www.millesima.fr/bordeaux.html?page=1 + +getvins("bordeaux.html", 1) From 2aa99453a04fbdf23189048850b978d3d9c33d2c Mon Sep 17 00:00:00 2001 From: Chahrazad650 Date: Tue, 10 Feb 2026 19:39:47 +0100 Subject: [PATCH 3/8] modefication fonction prix() return None+tests --- main.py | 111 ++++++++++++++++++++------------------------------- test_main.py | 20 +++++----- 2 files changed, 53 insertions(+), 78 deletions(-) diff --git a/main.py b/main.py index a0a06c0..b0ad7b4 100644 --- a/main.py +++ b/main.py @@ -35,90 +35,65 @@ class _ScraperData: return None return cast(dict[str, object], current_data.get("attributes")) - def prix(self) -> float: + def prix(self) -> float | None: """ - Retourne le prix unitaire d'une bouteille (75cl). + Retourne le prix unitaire d'une bouteille (75cl). - Le JSON contient plusieurs formats de vente dans content["items"] : - - bouteille seule : nbunit = 1 et equivbtl = 1 -> prix direct - - caisse de plusieurs bouteilles : nbunit > 1 -> on divise le prix total - - formats spéciaux (magnum etc.) : equivbtl > 1 -> même calcul + Si aucun prix n'est disponible, retourne None. + """ - Formule générale : - prix_unitaire = offerPrice / (nbunit * equivbtl) - - """ - - content = self._getcontent() - - # si content n'existe pas -> erreur + content = self._getcontent() + if content is None: - raise ValueError("Contenu introuvable") + return None - # On récupère la liste des formats disponibles (bouteille, carton...) items = content.get("items") - # Vérification que items est bien une liste non vide - if not isinstance(items, list) or len(items) == 0: - raise ValueError("Aucun prix disponible (items vide)") - - # -------------------------- - # CAS 1 : bouteille unitaire - # -------------------------- - # On cherche un format où nbunit=1 et equivbtl=1 ->bouteille standard 75cl - for item in items: + # Si aucun format disponible -> pas de prix + if isinstance(items, list): - if not isinstance(item, dict): - continue + if len(items) == 0: + return None - # On récupère les attributs du format - attrs = item.get("attributes", {}) + for item in items: - # On récupère nbunit et equivbtl - nbunit = attrs.get("nbunit", {}).get("value") - equivbtl = attrs.get("equivbtl", {}).get("value") + if not isinstance(item, dict): + continue - # Si c'est une bouteille unitaire - if nbunit == "1" and equivbtl == "1": + attrs = item.get("attributes", {}) + + nbunit = attrs.get("nbunit", {}).get("value") + equivbtl = attrs.get("equivbtl", {}).get("value") + + if nbunit == "1" and equivbtl == "1": + + p = item.get("offerPrice") + + if isinstance(p, (int, float)): + return float(p) + + for item in items: + + if not isinstance(item, dict): + continue p = item.get("offerPrice") + attrs = item.get("attributes", {}) - # Vérification que c'est bien un nombre - if isinstance(p, (int, float)): - return float(p) + nbunit = attrs.get("nbunit", {}).get("value") + equivbtl = attrs.get("equivbtl", {}).get("value") - # -------------------------- - # CAS 2 : caisse ou autre format - # -------------------------- - # On calcule le prix unitaire à partir du prix total - for item in items: + if isinstance(p, (int, float)) and nbunit and equivbtl: - if not isinstance(item, dict): - continue + denom = float(nbunit) * float(equivbtl) - p = item.get("offerPrice") - attrs = item.get("attributes", {}) + if denom > 0: - nbunit = attrs.get("nbunit", {}).get("value") - equivbtl = attrs.get("equivbtl", {}).get("value") + prix_unitaire = float(p) / denom - # Vérification que toutes les valeurs existent - if isinstance(p, (int, float)) and nbunit and equivbtl: + return round(prix_unitaire, 2) - # Calcul du nombre total de bouteilles équivalentes - denom = float(nbunit) * float(equivbtl) - - # Évite division par zéro - if denom > 0: - - # Calcul du prix unitaire - prix_unitaire = float(p) / denom - - # Arrondi à 2 décimales - return round(prix_unitaire, 2) - - # Si aucun prix trouvé - raise ValueError("Impossible de trouver le prix unitaire.") + return None def appellation(self) -> str | None: """_summary_ @@ -181,12 +156,14 @@ class _ScraperData: parker = self.parker() robinson = self.robinson() suckling = self.suckling() - prix = self.prix() - + try: + prix = self.prix() + except ValueError: + prix = None + return f"{appellation},{parker},{robinson},{suckling},{prix}" - class Scraper: """ Scraper est une classe qui permet de gerer diff --git a/test_main.py b/test_main.py index be5f703..2297262 100644 --- a/test_main.py +++ b/test_main.py @@ -225,22 +225,20 @@ def test_critiques(scraper: Scraper): assert contenu.suckling() == "93.5" assert contenu._getcritiques("test_ts") is None + def test_prix(scraper: Scraper): vide = scraper.getjsondata("") poubelle = scraper.getjsondata("poubelle") contenu = scraper.getjsondata("nino-negri-5-stelle-sfursat-2022.html") - - # Cas vide : items == [] -> on ne peut pas calculer -> ValueError - with pytest.raises(ValueError): - _ = vide.prix() - - # Cas poubelle : JSON incomplet -> _getcontent() None -> ValueError - with pytest.raises(ValueError): - _ = poubelle.prix() - + assert vide.prix() is None + assert poubelle.prix() is None assert contenu.prix() == 65.0 - + + def test_informations(scraper: Scraper): contenu = scraper.getjsondata("nino-negri-5-stelle-sfursat-2022.html") assert contenu.informations() == "Sforzato di Valtellina,91,17,93.5,65.0" - + vide = scraper.getjsondata("") + poubelle = scraper.getjsondata("poubelle") + assert vide.informations() == "None,None,None,None,None" + assert poubelle.informations() == "None,None,None,None,None" From cd1e266f25f7644bfb09c7a374b04136fae29981 Mon Sep 17 00:00:00 2001 From: Chahrazad650 Date: Tue, 10 Feb 2026 19:57:20 +0100 Subject: [PATCH 4/8] optimisation fonction prix() --- main.py | 76 +++++++++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/main.py b/main.py index b0ad7b4..c820cd3 100644 --- a/main.py +++ b/main.py @@ -43,58 +43,44 @@ class _ScraperData: """ content = self._getcontent() - if content is None: return None items = content.get("items") - # Si aucun format disponible -> pas de prix - if isinstance(items, list): - - if len(items) == 0: - return None - - for item in items: - - if not isinstance(item, dict): - continue - - attrs = item.get("attributes", {}) - - nbunit = attrs.get("nbunit", {}).get("value") - equivbtl = attrs.get("equivbtl", {}).get("value") - - if nbunit == "1" and equivbtl == "1": - - p = item.get("offerPrice") - - if isinstance(p, (int, float)): - return float(p) - - for item in items: - - if not isinstance(item, dict): - continue - - p = item.get("offerPrice") - attrs = item.get("attributes", {}) - - nbunit = attrs.get("nbunit", {}).get("value") - equivbtl = attrs.get("equivbtl", {}).get("value") - - if isinstance(p, (int, float)) and nbunit and equivbtl: - - denom = float(nbunit) * float(equivbtl) - - if denom > 0: - - prix_unitaire = float(p) / denom - - return round(prix_unitaire, 2) - + # Vérifie que items existe et n'est pas vide + if not isinstance(items, list) or len(items) == 0: return None + prix_calcule: float | None = None + + for item in items: + + if not isinstance(item, dict): + continue + + p = item.get("offerPrice") + attrs = item.get("attributes", {}) + + nbunit = attrs.get("nbunit", {}).get("value") + equivbtl = attrs.get("equivbtl", {}).get("value") + + if not isinstance(p, (int, float)) or not nbunit or not equivbtl: + continue + + nb = float(nbunit) + eq = float(equivbtl) + + if nb <= 0 or eq <= 0: + continue + + if nb == 1 and eq == 1: + return float(p) + + prix_calcule = round(float(p) / (nb * eq), 2) + + return prix_calcule + def appellation(self) -> str | None: """_summary_ From 96dbaaaaf62dad4c09a3536da1beee72252af849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Wed, 11 Feb 2026 23:35:56 +0100 Subject: [PATCH 5/8] ajout: fonctions de recherche --- main.py | 67 ++++++++++++++++++++++++++++++------------------------- test.json | 27 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 31 deletions(-) create mode 100644 test.json diff --git a/main.py b/main.py index 62d3d22..a08da22 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,8 @@ from typing import cast -from requests import Response, Session +from requests import HTTPError, Response, Session from bs4 import BeautifulSoup, Tag from collections import OrderedDict -from json import loads +from json import JSONDecodeError, loads class _ScraperData: @@ -284,41 +284,46 @@ class Scraper: return _ScraperData(cast(dict[str, object], current_data)) - # def _geturlsearch(self, subdir: str, index: int) -> str | None: - # data: dict[str, object] = self.getjsondata(subdir).getdata() + def _geturlproductslist(self, subdir: str): + try: + data: dict[str, object] = self.getjsondata(subdir).getdata() - # for element in ["initialReduxState", "categ", "content"]: - # data = cast(dict[str, object], data.get(element)) - # if data is None or not isinstance(data, dict): - # return None + for element in ["initialReduxState", "categ", "content"]: + data = cast(dict[str, object], data.get(element)) + if data is None or not isinstance(data, dict): + return None - # products = data.get("products") - # if not isinstance(products, list) or index >= len(products): - # return None + products = data.get("products") + if isinstance(products, list): + return products + except JSONDecodeError | HTTPError: + return None - # product = products[index] - # if isinstance(product, dict): - # return str(product.get("seoKeyword")) + def getvins(self, subdir: str): + cache: set[str] = set[str]() - # return None + for page in range(1, 64): + products_list = self._geturlproductslist(f"{subdir}?page={page}") - # def getvins(self, subdir: str) -> None: - # cache: set[str] = set[str]() + if not products_list: + break + + for product in products_list: + if not isinstance(product, dict): + continue - # for page in range(1, 2): - # index_link = 1 - # while True: - # link: str | None = self._geturlsearch( - # subdir=f"{subdir}?page={page}", index=index_link - # ) + link = product.get("seoKeyword") - # index_link+=1 - # if link is None: - # break - - # if link not in cache: - # print(self.getjsondata(link).informations()) - # cache.add(link) + if not link: + continue + + if link not in cache: + try: + infos = self.getjsondata(link).informations() + print(infos) + cache.add(link) + except JSONDecodeError | HTTPError as e: + print(f"Erreur sur le produit {link}: {e}") -# Scraper().getvins("bordeaux.html") +print(Scraper().getvins("bordeaux.html")) diff --git a/test.json b/test.json new file mode 100644 index 0000000..df6f179 --- /dev/null +++ b/test.json @@ -0,0 +1,27 @@ +Haut-Médoc,None,None,90,16.1 +Haut-Médoc,90.0,16,None,23.2 +Haut-Médoc,None,None,None,14.2 +Saint-Estèphe,93,17,96,59.0 +Pessac-Léognan,90,16.5,94,36.0 +Pessac-Léognan,89,16.5,95,39.2 +Haut-Médoc,89.0,15,92,26.8 +Haut-Médoc,92,16.5,93,65.4 +Margaux,92,16,93,64.3 +Moulis,92,15.5,93,41.4 +Haut-Médoc,None,None,None,15.2 +Pauillac,97,18,98,298.0 +Saint-Emilion,95,15,96,106.4 +Haut-Médoc,92,16,95,32.7 +Pomerol,88,16,96,92.7 +Pessac-Léognan,90,16.5,93,33.1 +Haut-Médoc,89,14.5,None,18.8 +Haut-Médoc,93.0,16,94,56.3 +Pessac-Léognan,86,None,92,31.6 +Haut-Médoc,89,16,92,25.1 +Haut-Médoc,92,16,94.5,68.7 +Saint-Estèphe,91.0,None,91.5,27.3 +Pomerol,94,16.5,95,97.5 +Margaux,93,16,95,54.5 +Pessac-Léognan,93,16.5,93,46.9 +Saint-Estèphe,92,16,96,50.1 +Pessac-Léognan,93,16.5,94,60.9 From 011bb6a6893ea39d793f778da64b85d93fa6b7f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Wed, 11 Feb 2026 23:46:22 +0100 Subject: [PATCH 6/8] ajout(main.py): optimisation des fonctions --- donnee.csv | 0 main.py | 16 ++++++++-------- test.json | 27 --------------------------- 3 files changed, 8 insertions(+), 35 deletions(-) create mode 100644 donnee.csv delete mode 100644 test.json diff --git a/donnee.csv b/donnee.csv new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py index a08da22..93b2da6 100644 --- a/main.py +++ b/main.py @@ -113,7 +113,7 @@ class _ScraperData: return None val = cast(str, app_dict.get("value")).rstrip("+").split("-") - if len(val) > 1: + if len(val) > 1 and val[1] != "": val[0] = str((int(val[0]) + int(val[1])) / 2) return val[0] @@ -301,23 +301,23 @@ class Scraper: def getvins(self, subdir: str): cache: set[str] = set[str]() + page = 0 - for page in range(1, 64): + while True: + page += 1 products_list = self._geturlproductslist(f"{subdir}?page={page}") + print(f"---- {page} ----") if not products_list: break - + for product in products_list: if not isinstance(product, dict): continue link = product.get("seoKeyword") - if not link: - continue - - if link not in cache: + if link and link not in cache: try: infos = self.getjsondata(link).informations() print(infos) @@ -326,4 +326,4 @@ class Scraper: print(f"Erreur sur le produit {link}: {e}") -print(Scraper().getvins("bordeaux.html")) +# Scraper().getvins("bordeaux.html") diff --git a/test.json b/test.json deleted file mode 100644 index df6f179..0000000 --- a/test.json +++ /dev/null @@ -1,27 +0,0 @@ -Haut-Médoc,None,None,90,16.1 -Haut-Médoc,90.0,16,None,23.2 -Haut-Médoc,None,None,None,14.2 -Saint-Estèphe,93,17,96,59.0 -Pessac-Léognan,90,16.5,94,36.0 -Pessac-Léognan,89,16.5,95,39.2 -Haut-Médoc,89.0,15,92,26.8 -Haut-Médoc,92,16.5,93,65.4 -Margaux,92,16,93,64.3 -Moulis,92,15.5,93,41.4 -Haut-Médoc,None,None,None,15.2 -Pauillac,97,18,98,298.0 -Saint-Emilion,95,15,96,106.4 -Haut-Médoc,92,16,95,32.7 -Pomerol,88,16,96,92.7 -Pessac-Léognan,90,16.5,93,33.1 -Haut-Médoc,89,14.5,None,18.8 -Haut-Médoc,93.0,16,94,56.3 -Pessac-Léognan,86,None,92,31.6 -Haut-Médoc,89,16,92,25.1 -Haut-Médoc,92,16,94.5,68.7 -Saint-Estèphe,91.0,None,91.5,27.3 -Pomerol,94,16.5,95,97.5 -Margaux,93,16,95,54.5 -Pessac-Léognan,93,16.5,93,46.9 -Saint-Estèphe,92,16,96,50.1 -Pessac-Léognan,93,16.5,94,60.9 From dd430b9861dfd1a443717854ecb29acaec96de98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Wed, 11 Feb 2026 23:58:51 +0100 Subject: [PATCH 7/8] ajout(main.py): ajout dans csv --- .gitignore | 2 ++ donnee.csv | 0 main.py | 41 ++++++++++++++++++++++------------------- 3 files changed, 24 insertions(+), 19 deletions(-) delete mode 100644 donnee.csv diff --git a/.gitignore b/.gitignore index b7faf40..e67486b 100644 --- a/.gitignore +++ b/.gitignore @@ -205,3 +205,5 @@ cython_debug/ marimo/_static/ marimo/_lsp/ __marimo__/ + +*.csv \ No newline at end of file diff --git a/donnee.csv b/donnee.csv deleted file mode 100644 index e69de29..0000000 diff --git a/main.py b/main.py index 93b2da6..0e641c4 100644 --- a/main.py +++ b/main.py @@ -299,31 +299,34 @@ class Scraper: except JSONDecodeError | HTTPError: return None - def getvins(self, subdir: str): + def getvins(self, subdir: str, filename: str): cache: set[str] = set[str]() page = 0 - while True: - page += 1 - products_list = self._geturlproductslist(f"{subdir}?page={page}") + with open(filename, 'a') as f: + while True: + page += 1 + products_list = self._geturlproductslist(f"{subdir}?page={page}") - print(f"---- {page} ----") - if not products_list: - break + print(f"---- {page} ----") + if not products_list: + break - for product in products_list: - if not isinstance(product, dict): - continue + for product in products_list: + if not isinstance(product, dict): + continue - link = product.get("seoKeyword") + link = product.get("seoKeyword") - if link and link not in cache: - try: - infos = self.getjsondata(link).informations() - print(infos) - cache.add(link) - except JSONDecodeError | HTTPError as e: - print(f"Erreur sur le produit {link}: {e}") + if link and link not in cache: + try: + infos = self.getjsondata(link).informations() + _ = f.write(infos + "\n") + print(infos) + cache.add(link) + except JSONDecodeError | HTTPError as e: + print(f"Erreur sur le produit {link}: {e}") + f.flush() -# Scraper().getvins("bordeaux.html") +Scraper().getvins("bordeaux.html", "donnee.csv") From a33b484deaffb9358b1c382a1e66a82707b6c97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Fri, 13 Feb 2026 17:52:11 +0100 Subject: [PATCH 8/8] ajout: test fonction 7 --- main.py | 49 ++++++++++++++++------ test_main.py | 116 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 136 insertions(+), 29 deletions(-) diff --git a/main.py b/main.py index 0e641c4..c548eb2 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,14 @@ from json import JSONDecodeError, loads class _ScraperData: + """_summary_ + """ def __init__(self, data: dict[str, object]) -> None: + """_summary_ + + Args: + data (dict[str, object]): _description_ + """ self._data: dict[str, object] = data def _getcontent(self) -> dict[str, object] | None: @@ -285,34 +292,49 @@ class Scraper: return _ScraperData(cast(dict[str, object], current_data)) def _geturlproductslist(self, subdir: str): + """_summary_ + + Args: + subdir (str): _description_ + + Returns: + _type_: _description_ + """ try: data: dict[str, object] = self.getjsondata(subdir).getdata() for element in ["initialReduxState", "categ", "content"]: - data = cast(dict[str, object], data.get(element)) - if data is None or not isinstance(data, dict): + data: dict[str, object] = cast(dict[str, object], data.get(element)) + if not isinstance(data, dict): return None - products = data.get("products") + products: list[str] = cast(list[str], data.get("products")) if isinstance(products, list): return products - except JSONDecodeError | HTTPError: + + except (JSONDecodeError, HTTPError): return None def getvins(self, subdir: str, filename: str): - cache: set[str] = set[str]() - page = 0 + """_summary_ + + Args: + subdir (str): _description_ + filename (str): _description_ + """ + with open(filename, "a") as f: + cache: set[str] = set[str]() + page = 0 - with open(filename, 'a') as f: while True: page += 1 products_list = self._geturlproductslist(f"{subdir}?page={page}") - print(f"---- {page} ----") if not products_list: break - for product in products_list: + products_list_length = len(products_list) + for i, product in enumerate(products_list): if not isinstance(product, dict): continue @@ -322,11 +344,14 @@ class Scraper: try: infos = self.getjsondata(link).informations() _ = f.write(infos + "\n") - print(infos) + print( + f"page: {page} | {i + 1}/{products_list_length} {link}" + ) cache.add(link) - except JSONDecodeError | HTTPError as e: + except (JSONDecodeError, HTTPError) as e: print(f"Erreur sur le produit {link}: {e}") f.flush() -Scraper().getvins("bordeaux.html", "donnee.csv") +if __name__ == "__main__": + Scraper().getvins("bordeaux.html", "donnee.csv") diff --git a/test_main.py b/test_main.py index 2297262..39bac9b 100644 --- a/test_main.py +++ b/test_main.py @@ -1,5 +1,5 @@ from json import dumps -from bs4 import Tag +from unittest.mock import patch, mock_open import pytest from requests_mock import Mocker from main import Scraper @@ -71,10 +71,10 @@ def mock_site(): "_id": "J4131/22/C/CC/6-11652", "partnumber": "J4131/22/C/CC/6", "taxRate": "H", - "listPrice": 390, - "offerPrice": 390, - "seoKeyword": "nino-negri-5-stelle-sfursat-2022-c-cc-6.html", - "shortdesc": "Un carton de 6 Bouteilles (75cl)", + "listPrice": 842, + "offerPrice": 842, + "seoKeyword": "vin-de-charazade1867.html", + "shortdesc": "Une bouteille du meilleur vin du monde?", "attributes": { "promotion_o_n": { "valueId": "0", @@ -101,9 +101,9 @@ def mock_site(): "isSpirit": False, }, "nbunit": { - "valueId": "6", + "valueId": "1", "name": "nbunit", - "value": "6", + "value": "1", "isSpirit": False, }, }, @@ -120,14 +120,14 @@ def mock_site(): "appellation": { "valueId": "433", "name": "Appellation", - "value": "Sforzato di Valtellina", - "url": "sforzato-di-valtellina.html", + "value": "Madame-Loïk", + "url": "Madame-loik.html", "isSpirit": False, "groupIdentifier": "appellation_433", }, "note_rp": { "valueId": "91", - "name": "Parker", + "name": "Peter Parker", "value": "91", "isSpirit": False, }, @@ -139,7 +139,7 @@ def mock_site(): }, "note_js": { "valueId": "93-94", - "name": "J. Suckling", + "name": "J. cherazade", "value": "93-94", "isSpirit": False, }, @@ -166,6 +166,79 @@ def mock_site(): text=html_product, ) + html_product = f""" + + +

MILLESIMA

+ + + + """ + + list_pleine = f""" + + +

LE WINE

+ + + + """ + + list_vide = f""" + + +

LE WINE

+ + + + """ + + m.get( + "https://www.millesima.fr/wine.html", + complete_qs=False, + response_list=[ + {"text": list_pleine}, + {"text": list_vide}, + ], + ) + # on return m sans fermer le server qui simule la page yield m @@ -190,7 +263,7 @@ def test_appellation(scraper: Scraper): contenu = scraper.getjsondata("nino-negri-5-stelle-sfursat-2022.html") assert vide.appellation() is None assert poubelle.appellation() is None - assert contenu.appellation() == "Sforzato di Valtellina" + assert contenu.appellation() == "Madame-Loïk" def test_fonctionprivee(scraper: Scraper): @@ -207,7 +280,6 @@ def test_fonctionprivee(scraper: Scraper): assert contenu._getattributes() is not None - def test_critiques(scraper: Scraper): vide = scraper.getjsondata("") poubelle = scraper.getjsondata("poubelle") @@ -232,13 +304,23 @@ def test_prix(scraper: Scraper): contenu = scraper.getjsondata("nino-negri-5-stelle-sfursat-2022.html") assert vide.prix() is None assert poubelle.prix() is None - assert contenu.prix() == 65.0 - - + assert contenu.prix() == 842.0 + + def test_informations(scraper: Scraper): contenu = scraper.getjsondata("nino-negri-5-stelle-sfursat-2022.html") - assert contenu.informations() == "Sforzato di Valtellina,91,17,93.5,65.0" + assert contenu.informations() == "Madame-Loïk,91,17,93.5,842.0" vide = scraper.getjsondata("") poubelle = scraper.getjsondata("poubelle") assert vide.informations() == "None,None,None,None,None" assert poubelle.informations() == "None,None,None,None,None" + + +def test_search(scraper: Scraper): + m = mock_open() + with patch("builtins.open", m): + scraper.getvins("wine.html", "fake_file.csv") + + assert m().write.called + all_writes = "".join(call.args[0] for call in m().write.call_args_list) + assert "Madame-Loïk,91,17,93.5,842.0" in all_writes