mirror of
https://github.com/guezoloic/millesima-ai-engine.git
synced 2026-03-28 18:03:47 +00:00
ajout: restructuration du code
This commit is contained in:
@@ -1,7 +1,12 @@
|
||||
[project]
|
||||
name = "projet-millesima-s6"
|
||||
version = "0.1.0"
|
||||
dependencies = ["requests==2.32.5", "beautifulsoup4==4.14.3", "pandas==2.3.3", "tqdm==4.67.3"]
|
||||
dependencies = [
|
||||
"requests==2.32.5",
|
||||
"beautifulsoup4==4.14.3",
|
||||
"pandas==2.3.3",
|
||||
"tqdm==4.67.3",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from pandas import DataFrame, to_numeric
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, to_numeric, get_dummies
|
||||
|
||||
SCORE_COLS = ["Robert", "Robinson", "Suckling"]
|
||||
|
||||
@@ -37,7 +36,7 @@ def mean_score(df: DataFrame, col: str) -> DataFrame:
|
||||
Calcule la moyenne d'une colonne de score par appellation.
|
||||
- Convertit les valeurs en numériques, en remplaçant les non-convertibles par NaN
|
||||
- Calcule la moyenne par appellation
|
||||
- Remplace les NaN résultants par 0
|
||||
- Remplace les NaN résultants par 0
|
||||
|
||||
"""
|
||||
tmp = df[["Appellation", col]].copy()
|
||||
@@ -46,12 +45,10 @@ def mean_score(df: DataFrame, col: str) -> DataFrame:
|
||||
|
||||
# moyenne par appellation
|
||||
means = tmp.groupby("Appellation", as_index=False)[col].mean()
|
||||
|
||||
|
||||
means[col] = means[col].fillna(0)
|
||||
|
||||
|
||||
means = means.rename(columns={col: f"mean_{col}"})
|
||||
|
||||
return means
|
||||
|
||||
|
||||
def mean_robert(df: DataFrame) -> DataFrame:
|
||||
@@ -96,10 +93,10 @@ def encode_appellation(df: DataFrame, column: str = "Appellation") -> DataFrame:
|
||||
Remplace la colonne 'Appellation' par des colonnes indicatrices
|
||||
"""
|
||||
df_copy = df.copy()
|
||||
|
||||
|
||||
appellations = df_copy[column].astype(str).str.strip()
|
||||
|
||||
appellation_dummies = pd.get_dummies(appellations)
|
||||
appellation_dummies = get_dummies(appellations)
|
||||
|
||||
df_copy = df_copy.drop(columns=[column])
|
||||
|
||||
|
||||
@@ -5,13 +5,7 @@ from os.path import normpath, join
|
||||
from sys import argv
|
||||
from pandas import read_csv, DataFrame
|
||||
|
||||
from cleaning import (display_info,
|
||||
drop_empty_appellation,
|
||||
mean_robert,
|
||||
mean_robinson,
|
||||
mean_suckling,
|
||||
fill_missing_scores,
|
||||
encode_appellation)
|
||||
from cleaning import *
|
||||
|
||||
|
||||
def load_csv(filename: str) -> DataFrame:
|
||||
|
||||
0
tests/test_cleaning.py
Normal file → Executable file
0
tests/test_cleaning.py
Normal file → Executable file
0
tests/test_scraper.py
Normal file → Executable file
0
tests/test_scraper.py
Normal file → Executable file
Reference in New Issue
Block a user