mirror of
https://github.com/guezoloic/millesima-ai-engine.git
synced 2026-03-28 18:03:47 +00:00
debuger _geturlproductslist et request -erreur 403
This commit is contained in:
5
main.py
5
main.py
@@ -12,6 +12,11 @@ def main() -> None:
|
|||||||
path: str = normpath(join(getcwd(), argv[1]))
|
path: str = normpath(join(getcwd(), argv[1]))
|
||||||
db: DataFrame = read_csv(path)
|
db: DataFrame = read_csv(path)
|
||||||
print(db.all())
|
print(db.all())
|
||||||
|
print(db.head())
|
||||||
|
print(db.info())
|
||||||
|
print("\nnombre de valeurs manquantes pour chaque colonne :")
|
||||||
|
print(db.isna().sum())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
|
|||||||
21
scraper.py
21
scraper.py
@@ -151,10 +151,7 @@ class _ScraperData:
|
|||||||
parker = self.parker()
|
parker = self.parker()
|
||||||
robinson = self.robinson()
|
robinson = self.robinson()
|
||||||
suckling = self.suckling()
|
suckling = self.suckling()
|
||||||
try:
|
prix = self.prix()
|
||||||
prix = self.prix()
|
|
||||||
except ValueError:
|
|
||||||
prix = None
|
|
||||||
|
|
||||||
return f"{appellation},{parker},{robinson},{suckling},{prix}"
|
return f"{appellation},{parker},{robinson},{suckling},{prix}"
|
||||||
|
|
||||||
@@ -194,7 +191,10 @@ class Scraper:
|
|||||||
HTTPError: Si le serveur renvoie un code d'erreur (4xx, 5xx).
|
HTTPError: Si le serveur renvoie un code d'erreur (4xx, 5xx).
|
||||||
"""
|
"""
|
||||||
target_url: str = self._url + subdir.lstrip("/")
|
target_url: str = self._url + subdir.lstrip("/")
|
||||||
|
print(f"[DEBUG] GET {target_url}")
|
||||||
response: Response = self._session.get(url=target_url, timeout=10)
|
response: Response = self._session.get(url=target_url, timeout=10)
|
||||||
|
print(f"[DEBUG] status={response.status_code} len={len(response.text)}")
|
||||||
|
print(f"[DEBUG] head={response.text[:120].replace('\\n',' ')}")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@@ -307,15 +307,20 @@ class Scraper:
|
|||||||
data: dict[str, object] = self.getjsondata(subdir).getdata()
|
data: dict[str, object] = self.getjsondata(subdir).getdata()
|
||||||
|
|
||||||
for element in ["initialReduxState", "categ", "content"]:
|
for element in ["initialReduxState", "categ", "content"]:
|
||||||
data: dict[str, object] = cast(dict[str, object], data.get(element))
|
nxt = data.get(element)
|
||||||
if not isinstance(data, dict):
|
print("DEBUG key", element, "->", type(nxt))
|
||||||
|
if not isinstance(nxt, dict):
|
||||||
|
print("DEBUG structure manquante, stop sur", element)
|
||||||
return None
|
return None
|
||||||
|
data = nxt
|
||||||
|
|
||||||
products: list[str] = cast(list[str], data.get("products"))
|
products = data.get("products")
|
||||||
|
print("DEBUG products type:", type(products), "len:", 0 if not isinstance(products, list) else len(products))
|
||||||
if isinstance(products, list):
|
if isinstance(products, list):
|
||||||
return products
|
return products
|
||||||
|
|
||||||
except (JSONDecodeError, HTTPError):
|
except (JSONDecodeError, HTTPError) as e:
|
||||||
|
print(f"DEBUG HTTP/JSON error sur {subdir}: {type(e).__name__} {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def getvins(self, subdir: str, filename: str):
|
def getvins(self, subdir: str, filename: str):
|
||||||
|
|||||||
Reference in New Issue
Block a user