This commit is contained in:
2026-03-06 21:36:48 +01:00
2 changed files with 6 additions and 6 deletions

View File

@@ -14,11 +14,11 @@ def path_filename(filename: str) -> str:
class Cleaning: class Cleaning:
def __init__(self, filename) -> None: def __init__(self, filename) -> None:
self._vins: DataFrame = read_csv(filename) self._vins: DataFrame = read_csv(filename)
# # créer la liste de tout les scores
self.SCORE_COLS: list[str] = [ self.SCORE_COLS: list[str] = [
c for c in self._vins.columns if c not in ["Appellation", "Prix"] c for c in self._vins.columns if c not in ["Appellation", "Prix"]
] ]
# # transforme tout les colonnes score en numérique
for col in self.SCORE_COLS: for col in self.SCORE_COLS:
self._vins[col] = to_numeric(self._vins[col], errors="coerce") self._vins[col] = to_numeric(self._vins[col], errors="coerce")
@@ -87,7 +87,7 @@ class Cleaning:
Remplace la colonne 'Appellation' par des colonnes indicatrices Remplace la colonne 'Appellation' par des colonnes indicatrices
""" """
appellations = self._vins[column].astype(str).str.strip() appellations = self._vins[column].astype(str).str.strip()
appellation_dummies = get_dummies(appellations) appellation_dummies = get_dummies(appellations, prefix="App")
self._vins = self._vins.drop(columns=[column]) self._vins = self._vins.drop(columns=[column])
self._vins = self._vins.join(appellation_dummies) self._vins = self._vins.join(appellation_dummies)
return self return self

View File

@@ -62,6 +62,6 @@ def test_encode_appellation(cleaning_raw: Cleaning):
.encode_appellation() .encode_appellation()
.getVins() .getVins()
) )
assert "Appellation" not in out.columns assert "App_Appellation" not in out.columns
assert "Pauillac" in out.columns assert "App_Pauillac" in out.columns
assert int(out.loc[0, "Pauillac"]) == 1 assert int(out.loc[0, "App_Pauillac"]) == 1