mirror of
https://github.com/guezoloic/millesima_projetS6.git
synced 2026-03-28 11:03:41 +00:00
ajout: fonctions de recherche
This commit is contained in:
67
main.py
67
main.py
@@ -1,8 +1,8 @@
|
||||
from typing import cast
|
||||
from requests import Response, Session
|
||||
from requests import HTTPError, Response, Session
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from collections import OrderedDict
|
||||
from json import loads
|
||||
from json import JSONDecodeError, loads
|
||||
|
||||
|
||||
class _ScraperData:
|
||||
@@ -284,41 +284,46 @@ class Scraper:
|
||||
|
||||
return _ScraperData(cast(dict[str, object], current_data))
|
||||
|
||||
# def _geturlsearch(self, subdir: str, index: int) -> str | None:
|
||||
# data: dict[str, object] = self.getjsondata(subdir).getdata()
|
||||
def _geturlproductslist(self, subdir: str):
|
||||
try:
|
||||
data: dict[str, object] = self.getjsondata(subdir).getdata()
|
||||
|
||||
# for element in ["initialReduxState", "categ", "content"]:
|
||||
# data = cast(dict[str, object], data.get(element))
|
||||
# if data is None or not isinstance(data, dict):
|
||||
# return None
|
||||
for element in ["initialReduxState", "categ", "content"]:
|
||||
data = cast(dict[str, object], data.get(element))
|
||||
if data is None or not isinstance(data, dict):
|
||||
return None
|
||||
|
||||
# products = data.get("products")
|
||||
# if not isinstance(products, list) or index >= len(products):
|
||||
# return None
|
||||
products = data.get("products")
|
||||
if isinstance(products, list):
|
||||
return products
|
||||
except JSONDecodeError | HTTPError:
|
||||
return None
|
||||
|
||||
# product = products[index]
|
||||
# if isinstance(product, dict):
|
||||
# return str(product.get("seoKeyword"))
|
||||
def getvins(self, subdir: str):
|
||||
cache: set[str] = set[str]()
|
||||
|
||||
# return None
|
||||
for page in range(1, 64):
|
||||
products_list = self._geturlproductslist(f"{subdir}?page={page}")
|
||||
|
||||
# def getvins(self, subdir: str) -> None:
|
||||
# cache: set[str] = set[str]()
|
||||
if not products_list:
|
||||
break
|
||||
|
||||
for product in products_list:
|
||||
if not isinstance(product, dict):
|
||||
continue
|
||||
|
||||
# for page in range(1, 2):
|
||||
# index_link = 1
|
||||
# while True:
|
||||
# link: str | None = self._geturlsearch(
|
||||
# subdir=f"{subdir}?page={page}", index=index_link
|
||||
# )
|
||||
link = product.get("seoKeyword")
|
||||
|
||||
# index_link+=1
|
||||
# if link is None:
|
||||
# break
|
||||
|
||||
# if link not in cache:
|
||||
# print(self.getjsondata(link).informations())
|
||||
# cache.add(link)
|
||||
if not link:
|
||||
continue
|
||||
|
||||
if link not in cache:
|
||||
try:
|
||||
infos = self.getjsondata(link).informations()
|
||||
print(infos)
|
||||
cache.add(link)
|
||||
except JSONDecodeError | HTTPError as e:
|
||||
print(f"Erreur sur le produit {link}: {e}")
|
||||
|
||||
|
||||
# Scraper().getvins("bordeaux.html")
|
||||
print(Scraper().getvins("bordeaux.html"))
|
||||
|
||||
27
test.json
Normal file
27
test.json
Normal file
@@ -0,0 +1,27 @@
|
||||
Haut-Médoc,None,None,90,16.1
|
||||
Haut-Médoc,90.0,16,None,23.2
|
||||
Haut-Médoc,None,None,None,14.2
|
||||
Saint-Estèphe,93,17,96,59.0
|
||||
Pessac-Léognan,90,16.5,94,36.0
|
||||
Pessac-Léognan,89,16.5,95,39.2
|
||||
Haut-Médoc,89.0,15,92,26.8
|
||||
Haut-Médoc,92,16.5,93,65.4
|
||||
Margaux,92,16,93,64.3
|
||||
Moulis,92,15.5,93,41.4
|
||||
Haut-Médoc,None,None,None,15.2
|
||||
Pauillac,97,18,98,298.0
|
||||
Saint-Emilion,95,15,96,106.4
|
||||
Haut-Médoc,92,16,95,32.7
|
||||
Pomerol,88,16,96,92.7
|
||||
Pessac-Léognan,90,16.5,93,33.1
|
||||
Haut-Médoc,89,14.5,None,18.8
|
||||
Haut-Médoc,93.0,16,94,56.3
|
||||
Pessac-Léognan,86,None,92,31.6
|
||||
Haut-Médoc,89,16,92,25.1
|
||||
Haut-Médoc,92,16,94.5,68.7
|
||||
Saint-Estèphe,91.0,None,91.5,27.3
|
||||
Pomerol,94,16.5,95,97.5
|
||||
Margaux,93,16,95,54.5
|
||||
Pessac-Léognan,93,16.5,93,46.9
|
||||
Saint-Estèphe,92,16,96,50.1
|
||||
Pessac-Léognan,93,16.5,94,60.9
|
||||
Reference in New Issue
Block a user