fix(main.py): changement donnée data

2026-03-28 18:03:47 +00:00 · 2026-02-08 22:47:10 +01:00
parent 5c22777c2d
commit 9da0159869
2 changed files with 66 additions and 60 deletions
--- a/main.py
+++ b/main.py
@@ -1,47 +1,68 @@
-from sys import stderr
 from typing import cast
 from requests import Response, Session
 from bs4 import BeautifulSoup, Tag
 from collections import OrderedDict
-from json import JSONDecodeError, loads
+from json import loads


-class ScraperData:
+class _ScraperData:
    def __init__(self, data: dict[str, object]) -> None:
-        if not data:
-            raise ValueError("Données insuffisantes pour créer un ScraperData.")
        self._data: dict[str, object] = data

-    def _getattributes(self) -> dict[str, object] | None:
-        current_data: object = self._data.get("attributes")
-        if isinstance(current_data, dict):
-            return cast(dict[str, object], current_data)
-        return None
+    def _getcontent(self) -> dict[str, object]:
+        """_summary_

-    def appellation(self) -> str | None:
-        current_value: dict[str, object] | None = self._getattributes()
-        if current_value is not None:
-            app_dict: dict[str, object] = cast(
-                dict[str, object], current_value.get("appellation")
-            )
-            if app_dict:
-                return cast(str, app_dict.get("value"))
-        return None
+        Returns:
+            dict[str, object]: _description_
+        """
+        current_data: dict[str, object] = self._data
+        for key in ["initialReduxState", "product", "content"]:
+            current_data = cast(dict[str, object], current_data[key])
+        return current_data
+
+    def _getattributes(self) -> dict[str, object]:
+        """_summary_
+
+        Returns:
+            dict[str, object]: _description_
+        """
+        current_data: object = self._getcontent()["attributes"]
+        return cast(dict[str, object], current_data)
+
+    def appellation(self) -> str:
+        """_summary_
+
+        Returns:
+            str: _description_
+        """
+        current_value: dict[str, object] = self._getattributes()
+        app_dict: dict[str, object] = cast(
+            dict[str, object], current_value["appellation"]
+        )
+        return cast(str, app_dict["value"])

    def _getvin(self, name: str) -> str | None:
-        current_value: dict[str, object] | None = self._getattributes()
-        if current_value is not None:
-            app_dict: dict[str, object] = cast(
-                dict[str, object], current_value.get(name)
-            )
-            if app_dict:
-                val: list[str] = (
-                    cast(str, app_dict.get("valueId")).rstrip("+").split("-")
-                )
-                if len(val) > 1:
-                    val[0] = str((int(val[0]) + int(val[1])) / 2)
-                return val[0]
-        return None
+        """_summary_
+
+        Args:
+            name (str): _description_
+
+        Returns:
+            str | None: _description_
+        """
+        current_value: dict[str, object] = self._getattributes()
+        app_dict: dict[str, object] | None = cast(
+            dict[str, object] | None, current_value.get(name)
+        )
+
+        if app_dict is None:
+            return None
+
+        val: list[str] = cast(str, app_dict.get("attributes")).rstrip("+").split("-")
+        # dans le cas où 93-94 -> [93, 94] -> 93.5
+        if len(val) > 1:
+            val[0] = str((int(val[0]) + int(val[1])) / 2)
+        return val[0]

    def parker(self) -> str | None:
        return self._getvin("note_rp")
@@ -127,7 +148,7 @@ class Scraper:

        return request

-    def getsoup(self, subdir: str = "", use_cache: bool = True) -> BeautifulSoup:
+    def getsoup(self, subdir: str, use_cache: bool = True) -> BeautifulSoup:
        """
        Récupère le contenu HTML d'une page et le transforme en objet BeautifulSoup.

@@ -155,14 +176,14 @@ class Scraper:

        return soup

-    def getjsondata(self, subdir: str = "", id: str = "__NEXT_DATA__") -> ScraperData:
+    def getjsondata(self, subdir: str, id: str = "__NEXT_DATA__") -> _ScraperData:
        """
        Extrait les données JSON contenues dans la balise __NEXT_DATA__ du site.
        Beaucoup de sites modernes (Next.js) stockent leur état initial dans
        une balise <script> pour l'hydratation côté client.

        Args:
-            subdir (str, optional): Le chemin de la page.
+            subdir (str): Le chemin de la page.
            id (str, optional): L'identifiant de la balise script (par défaut __NEXT_DATA__).

        Raises:
@@ -178,32 +199,17 @@ class Scraper:
        soup: BeautifulSoup = self.getsoup(subdir)
        script: Tag | None = soup.find("script", id=id)

-        if isinstance(script, Tag) and script.string:
-            try:
-                current_data: object = loads(script.string)
-                # tout le chemin à parcourir pour arriver au données
-                # (plein d'information inutile)
-                keys: list[str] = [
-                    "props",
-                    "pageProps",
-                    "initialReduxState",
-                    "product",
-                    "content",
-                ]
-                for key in keys:
-                    # si current_data est bien un dictionnaire et que la clé
-                    # est bien dedans
-                    if isinstance(current_data, dict) and key in current_data:
-                        current_data: object = current_data[key]
-                    else:
-                        raise ValueError(f"Clé manquante dans le JSON : {key}")
+        if script is None or not script.string:
+            raise ValueError(f"le script id={id} est introuvable")

-                if isinstance(current_data, dict):
-                    return ScraperData(data=cast(dict[str, object], current_data))
+        current_data: object = cast(object, loads(script.string))

-            except (JSONDecodeError, ValueError) as e:
-                print(f"Erreur lors de l'extraction JSON : {e}", file=stderr)
-        return ScraperData({})
+        for key in ["props", "pageProps"]:
+            if isinstance(current_data, dict) and key in current_data:
+                current_data = cast(object, current_data[key])
+                continue
+            raise ValueError(f"Clé manquante dans le JSON : {key}")

+        return _ScraperData(cast(dict[str, object], current_data))

 # print(Scraper().getjsondata("bordeaux.html?page=1").getdata())
--- a/test_main.py
+++ b/test_main.py
@@ -122,7 +122,7 @@ def scraper() -> Scraper:


 def test_soup(scraper: Scraper):
-    h1: Tag | None = scraper.getsoup().find("h1")
+    h1: Tag | None = scraper.getsoup("").find("h1")

    assert isinstance(h1, Tag)
    assert h1.text == "MILLESIMA"