mirror of
https://github.com/guezoloic/millesima-ai-engine.git
synced 2026-03-28 18:03:47 +00:00
ajout: restructuration du code
This commit is contained in:
@@ -1,7 +1,12 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "projet-millesima-s6"
|
name = "projet-millesima-s6"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = ["requests==2.32.5", "beautifulsoup4==4.14.3", "pandas==2.3.3", "tqdm==4.67.3"]
|
dependencies = [
|
||||||
|
"requests==2.32.5",
|
||||||
|
"beautifulsoup4==4.14.3",
|
||||||
|
"pandas==2.3.3",
|
||||||
|
"tqdm==4.67.3",
|
||||||
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
|
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from pandas import DataFrame, to_numeric
|
from pandas import DataFrame, to_numeric, get_dummies
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
SCORE_COLS = ["Robert", "Robinson", "Suckling"]
|
SCORE_COLS = ["Robert", "Robinson", "Suckling"]
|
||||||
|
|
||||||
@@ -37,7 +36,7 @@ def mean_score(df: DataFrame, col: str) -> DataFrame:
|
|||||||
Calcule la moyenne d'une colonne de score par appellation.
|
Calcule la moyenne d'une colonne de score par appellation.
|
||||||
- Convertit les valeurs en numériques, en remplaçant les non-convertibles par NaN
|
- Convertit les valeurs en numériques, en remplaçant les non-convertibles par NaN
|
||||||
- Calcule la moyenne par appellation
|
- Calcule la moyenne par appellation
|
||||||
- Remplace les NaN résultants par 0
|
- Remplace les NaN résultants par 0
|
||||||
|
|
||||||
"""
|
"""
|
||||||
tmp = df[["Appellation", col]].copy()
|
tmp = df[["Appellation", col]].copy()
|
||||||
@@ -46,12 +45,10 @@ def mean_score(df: DataFrame, col: str) -> DataFrame:
|
|||||||
|
|
||||||
# moyenne par appellation
|
# moyenne par appellation
|
||||||
means = tmp.groupby("Appellation", as_index=False)[col].mean()
|
means = tmp.groupby("Appellation", as_index=False)[col].mean()
|
||||||
|
|
||||||
means[col] = means[col].fillna(0)
|
means[col] = means[col].fillna(0)
|
||||||
|
|
||||||
means = means.rename(columns={col: f"mean_{col}"})
|
means = means.rename(columns={col: f"mean_{col}"})
|
||||||
|
|
||||||
return means
|
|
||||||
|
|
||||||
|
|
||||||
def mean_robert(df: DataFrame) -> DataFrame:
|
def mean_robert(df: DataFrame) -> DataFrame:
|
||||||
@@ -96,10 +93,10 @@ def encode_appellation(df: DataFrame, column: str = "Appellation") -> DataFrame:
|
|||||||
Remplace la colonne 'Appellation' par des colonnes indicatrices
|
Remplace la colonne 'Appellation' par des colonnes indicatrices
|
||||||
"""
|
"""
|
||||||
df_copy = df.copy()
|
df_copy = df.copy()
|
||||||
|
|
||||||
appellations = df_copy[column].astype(str).str.strip()
|
appellations = df_copy[column].astype(str).str.strip()
|
||||||
|
|
||||||
appellation_dummies = pd.get_dummies(appellations)
|
appellation_dummies = get_dummies(appellations)
|
||||||
|
|
||||||
df_copy = df_copy.drop(columns=[column])
|
df_copy = df_copy.drop(columns=[column])
|
||||||
|
|
||||||
|
|||||||
@@ -5,13 +5,7 @@ from os.path import normpath, join
|
|||||||
from sys import argv
|
from sys import argv
|
||||||
from pandas import read_csv, DataFrame
|
from pandas import read_csv, DataFrame
|
||||||
|
|
||||||
from cleaning import (display_info,
|
from cleaning import *
|
||||||
drop_empty_appellation,
|
|
||||||
mean_robert,
|
|
||||||
mean_robinson,
|
|
||||||
mean_suckling,
|
|
||||||
fill_missing_scores,
|
|
||||||
encode_appellation)
|
|
||||||
|
|
||||||
|
|
||||||
def load_csv(filename: str) -> DataFrame:
|
def load_csv(filename: str) -> DataFrame:
|
||||||
|
|||||||
0
tests/test_cleaning.py
Normal file → Executable file
0
tests/test_cleaning.py
Normal file → Executable file
0
tests/test_scraper.py
Normal file → Executable file
0
tests/test_scraper.py
Normal file → Executable file
Reference in New Issue
Block a user