diff --git a/cleaning.py b/cleaning.py index 563c9e6..efc1054 100644 --- a/cleaning.py +++ b/cleaning.py @@ -74,3 +74,18 @@ def fill_missing_scores(df: DataFrame) -> DataFrame: df_copy = df_copy.drop(columns=temp_cols) return df_copy + + +def encode_appellation(df: DataFrame, column: str = "Appellation") -> DataFrame: + """ + Remplace la colonne 'Appellation' par des colonnes indicatrices + """ + df_copy = df.copy() + + appellations = df_copy[column].astype(str).str.strip() + + appellation_dummies = pd.get_dummies(appellations) + + df_copy = df_copy.drop(columns=[column]) + + return df_copy.join(appellation_dummies) diff --git a/main.py b/main.py index 4d2e768..b59e373 100755 --- a/main.py +++ b/main.py @@ -57,7 +57,13 @@ def main() -> None: df_missing_scores = fill_missing_scores(df) save_csv(df_missing_scores, "donnee_filled.csv") print("\n=== Après remplissage des notes manquantes ===") - display_info(df_missing_scores) + display_info(df_missing_scores) + + df_ready = encode_appellation(df_missing_scores) + save_csv(df_ready, "donnee_ready.csv") + print("\n=== Après remplacer la colonne 'Appellation' par des colonnes indicatrices ===") + display_info(df_ready) + print(df_ready.filter(like="App_").any().head()) if __name__ == "__main__": try: