mirror of
https://github.com/guezoloic/millesima-ai-engine.git
synced 2026-03-28 18:03:47 +00:00
ajout: fonctions de recherche
This commit is contained in:
67
main.py
67
main.py
@@ -1,8 +1,8 @@
|
|||||||
from typing import cast
|
from typing import cast
|
||||||
from requests import Response, Session
|
from requests import HTTPError, Response, Session
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from json import loads
|
from json import JSONDecodeError, loads
|
||||||
|
|
||||||
|
|
||||||
class _ScraperData:
|
class _ScraperData:
|
||||||
@@ -284,41 +284,46 @@ class Scraper:
|
|||||||
|
|
||||||
return _ScraperData(cast(dict[str, object], current_data))
|
return _ScraperData(cast(dict[str, object], current_data))
|
||||||
|
|
||||||
# def _geturlsearch(self, subdir: str, index: int) -> str | None:
|
def _geturlproductslist(self, subdir: str):
|
||||||
# data: dict[str, object] = self.getjsondata(subdir).getdata()
|
try:
|
||||||
|
data: dict[str, object] = self.getjsondata(subdir).getdata()
|
||||||
|
|
||||||
# for element in ["initialReduxState", "categ", "content"]:
|
for element in ["initialReduxState", "categ", "content"]:
|
||||||
# data = cast(dict[str, object], data.get(element))
|
data = cast(dict[str, object], data.get(element))
|
||||||
# if data is None or not isinstance(data, dict):
|
if data is None or not isinstance(data, dict):
|
||||||
# return None
|
return None
|
||||||
|
|
||||||
# products = data.get("products")
|
products = data.get("products")
|
||||||
# if not isinstance(products, list) or index >= len(products):
|
if isinstance(products, list):
|
||||||
# return None
|
return products
|
||||||
|
except JSONDecodeError | HTTPError:
|
||||||
|
return None
|
||||||
|
|
||||||
# product = products[index]
|
def getvins(self, subdir: str):
|
||||||
# if isinstance(product, dict):
|
cache: set[str] = set[str]()
|
||||||
# return str(product.get("seoKeyword"))
|
|
||||||
|
|
||||||
# return None
|
for page in range(1, 64):
|
||||||
|
products_list = self._geturlproductslist(f"{subdir}?page={page}")
|
||||||
|
|
||||||
# def getvins(self, subdir: str) -> None:
|
if not products_list:
|
||||||
# cache: set[str] = set[str]()
|
break
|
||||||
|
|
||||||
|
for product in products_list:
|
||||||
|
if not isinstance(product, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
# for page in range(1, 2):
|
link = product.get("seoKeyword")
|
||||||
# index_link = 1
|
|
||||||
# while True:
|
|
||||||
# link: str | None = self._geturlsearch(
|
|
||||||
# subdir=f"{subdir}?page={page}", index=index_link
|
|
||||||
# )
|
|
||||||
|
|
||||||
# index_link+=1
|
if not link:
|
||||||
# if link is None:
|
continue
|
||||||
# break
|
|
||||||
|
if link not in cache:
|
||||||
# if link not in cache:
|
try:
|
||||||
# print(self.getjsondata(link).informations())
|
infos = self.getjsondata(link).informations()
|
||||||
# cache.add(link)
|
print(infos)
|
||||||
|
cache.add(link)
|
||||||
|
except JSONDecodeError | HTTPError as e:
|
||||||
|
print(f"Erreur sur le produit {link}: {e}")
|
||||||
|
|
||||||
|
|
||||||
# Scraper().getvins("bordeaux.html")
|
print(Scraper().getvins("bordeaux.html"))
|
||||||
|
|||||||
27
test.json
Normal file
27
test.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
Haut-Médoc,None,None,90,16.1
|
||||||
|
Haut-Médoc,90.0,16,None,23.2
|
||||||
|
Haut-Médoc,None,None,None,14.2
|
||||||
|
Saint-Estèphe,93,17,96,59.0
|
||||||
|
Pessac-Léognan,90,16.5,94,36.0
|
||||||
|
Pessac-Léognan,89,16.5,95,39.2
|
||||||
|
Haut-Médoc,89.0,15,92,26.8
|
||||||
|
Haut-Médoc,92,16.5,93,65.4
|
||||||
|
Margaux,92,16,93,64.3
|
||||||
|
Moulis,92,15.5,93,41.4
|
||||||
|
Haut-Médoc,None,None,None,15.2
|
||||||
|
Pauillac,97,18,98,298.0
|
||||||
|
Saint-Emilion,95,15,96,106.4
|
||||||
|
Haut-Médoc,92,16,95,32.7
|
||||||
|
Pomerol,88,16,96,92.7
|
||||||
|
Pessac-Léognan,90,16.5,93,33.1
|
||||||
|
Haut-Médoc,89,14.5,None,18.8
|
||||||
|
Haut-Médoc,93.0,16,94,56.3
|
||||||
|
Pessac-Léognan,86,None,92,31.6
|
||||||
|
Haut-Médoc,89,16,92,25.1
|
||||||
|
Haut-Médoc,92,16,94.5,68.7
|
||||||
|
Saint-Estèphe,91.0,None,91.5,27.3
|
||||||
|
Pomerol,94,16.5,95,97.5
|
||||||
|
Margaux,93,16,95,54.5
|
||||||
|
Pessac-Léognan,93,16.5,93,46.9
|
||||||
|
Saint-Estèphe,92,16,96,50.1
|
||||||
|
Pessac-Léognan,93,16.5,94,60.9
|
||||||
Reference in New Issue
Block a user