From 99dd71989da538dd834331e32f68f8b3696ba605 Mon Sep 17 00:00:00 2001 From: Chahrazad650 Date: Wed, 25 Feb 2026 00:10:00 +0100 Subject: [PATCH] debuger _geturlproductslist et request -erreur 403 --- main.py | 5 +++++ scraper.py | 21 +++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 59d1b2c..9bebe0e 100755 --- a/main.py +++ b/main.py @@ -12,6 +12,11 @@ def main() -> None: path: str = normpath(join(getcwd(), argv[1])) db: DataFrame = read_csv(path) print(db.all()) + print(db.head()) + print(db.info()) + print("\nnombre de valeurs manquantes pour chaque colonne :") + print(db.isna().sum()) + if __name__ == "__main__": try: diff --git a/scraper.py b/scraper.py index 736da5f..e951af1 100755 --- a/scraper.py +++ b/scraper.py @@ -151,10 +151,7 @@ class _ScraperData: parker = self.parker() robinson = self.robinson() suckling = self.suckling() - try: - prix = self.prix() - except ValueError: - prix = None + prix = self.prix() return f"{appellation},{parker},{robinson},{suckling},{prix}" @@ -194,7 +191,10 @@ class Scraper: HTTPError: Si le serveur renvoie un code d'erreur (4xx, 5xx). """ target_url: str = self._url + subdir.lstrip("/") + print(f"[DEBUG] GET {target_url}") response: Response = self._session.get(url=target_url, timeout=10) + print(f"[DEBUG] status={response.status_code} len={len(response.text)}") + print(f"[DEBUG] head={response.text[:120].replace('\\n',' ')}") response.raise_for_status() return response @@ -307,15 +307,20 @@ class Scraper: data: dict[str, object] = self.getjsondata(subdir).getdata() for element in ["initialReduxState", "categ", "content"]: - data: dict[str, object] = cast(dict[str, object], data.get(element)) - if not isinstance(data, dict): + nxt = data.get(element) + print("DEBUG key", element, "->", type(nxt)) + if not isinstance(nxt, dict): + print("DEBUG structure manquante, stop sur", element) return None + data = nxt - products: list[str] = cast(list[str], data.get("products")) + products = data.get("products") + print("DEBUG products type:", type(products), "len:", 0 if not isinstance(products, list) else len(products)) if isinstance(products, list): return products - except (JSONDecodeError, HTTPError): + except (JSONDecodeError, HTTPError) as e: + print(f"DEBUG HTTP/JSON error sur {subdir}: {type(e).__name__} {e}") return None def getvins(self, subdir: str, filename: str):