ajout: restructuration du code

This commit is contained in:
2026-03-05 22:06:00 +01:00
parent 8047b06253
commit 69b8b4ce1f
5 changed files with 13 additions and 17 deletions

View File

@@ -1,7 +1,12 @@
[project] [project]
name = "projet-millesima-s6" name = "projet-millesima-s6"
version = "0.1.0" version = "0.1.0"
dependencies = ["requests==2.32.5", "beautifulsoup4==4.14.3", "pandas==2.3.3", "tqdm==4.67.3"] dependencies = [
"requests==2.32.5",
"beautifulsoup4==4.14.3",
"pandas==2.3.3",
"tqdm==4.67.3",
]
[project.optional-dependencies] [project.optional-dependencies]
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"] test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from pandas import DataFrame, to_numeric from pandas import DataFrame, to_numeric, get_dummies
import pandas as pd
SCORE_COLS = ["Robert", "Robinson", "Suckling"] SCORE_COLS = ["Robert", "Robinson", "Suckling"]
@@ -37,7 +36,7 @@ def mean_score(df: DataFrame, col: str) -> DataFrame:
Calcule la moyenne d'une colonne de score par appellation. Calcule la moyenne d'une colonne de score par appellation.
- Convertit les valeurs en numériques, en remplaçant les non-convertibles par NaN - Convertit les valeurs en numériques, en remplaçant les non-convertibles par NaN
- Calcule la moyenne par appellation - Calcule la moyenne par appellation
- Remplace les NaN résultants par 0 - Remplace les NaN résultants par 0
""" """
tmp = df[["Appellation", col]].copy() tmp = df[["Appellation", col]].copy()
@@ -46,12 +45,10 @@ def mean_score(df: DataFrame, col: str) -> DataFrame:
# moyenne par appellation # moyenne par appellation
means = tmp.groupby("Appellation", as_index=False)[col].mean() means = tmp.groupby("Appellation", as_index=False)[col].mean()
means[col] = means[col].fillna(0) means[col] = means[col].fillna(0)
means = means.rename(columns={col: f"mean_{col}"}) means = means.rename(columns={col: f"mean_{col}"})
return means
def mean_robert(df: DataFrame) -> DataFrame: def mean_robert(df: DataFrame) -> DataFrame:
@@ -96,10 +93,10 @@ def encode_appellation(df: DataFrame, column: str = "Appellation") -> DataFrame:
Remplace la colonne 'Appellation' par des colonnes indicatrices Remplace la colonne 'Appellation' par des colonnes indicatrices
""" """
df_copy = df.copy() df_copy = df.copy()
appellations = df_copy[column].astype(str).str.strip() appellations = df_copy[column].astype(str).str.strip()
appellation_dummies = pd.get_dummies(appellations) appellation_dummies = get_dummies(appellations)
df_copy = df_copy.drop(columns=[column]) df_copy = df_copy.drop(columns=[column])

View File

@@ -5,13 +5,7 @@ from os.path import normpath, join
from sys import argv from sys import argv
from pandas import read_csv, DataFrame from pandas import read_csv, DataFrame
from cleaning import (display_info, from cleaning import *
drop_empty_appellation,
mean_robert,
mean_robinson,
mean_suckling,
fill_missing_scores,
encode_appellation)
def load_csv(filename: str) -> DataFrame: def load_csv(filename: str) -> DataFrame:

0
tests/test_cleaning.py Normal file → Executable file
View File

0
tests/test_scraper.py Normal file → Executable file
View File