import pytest from pandas import DataFrame from unittest.mock import patch, mock_open from cleaning import Cleaning @pytest.fixture def cleaning_raw() -> Cleaning: """ "Appellation": ["Pauillac", "Pauillac ", "Margaux", None , "Pomerol", "Pomerol"], "Robert": ["95" , None , "bad" , 90 , None , None ], "Robinson": [None , "93" , 18 , None , None , None ], "Suckling": [96 , None , None , None , 91 , None ], "Prix": ["10.0" , "11.0" , "20.0" , "30.0", "40.0" , "50.0" ], """ csv_content = """Appellation,Robert,Robinson,Suckling,Prix Pauillac,95,,96,10.0 Pauillac ,,93,,11.0 Margaux,bad,18,,20.0 ,90,,,30.0 Pomerol,,,91,40.0 Pomerol,,,,50.0 """ m = mock_open(read_data=csv_content) with patch("builtins.open", m): return Cleaning("donnee.csv") def test_drop_empty_appellation(cleaning_raw: Cleaning) -> None: out = cleaning_raw.drop_empty_appellation().getVins() assert out["Appellation"].isna().sum() == 0 assert len(out) == 5 def test_mean_score_zero_when_no_scores(cleaning_raw: Cleaning) -> None: out = cleaning_raw.drop_empty_appellation() m = out._mean_score("Robert") assert list(m.columns) == ["Appellation", "mean_Robert"] pomerol_mean = m.loc[m["Appellation"].str.strip() == "Pomerol", "mean_Robert"].iloc[ 0 ] assert pomerol_mean == 0 def test_fill_missing_scores(cleaning_raw: Cleaning): cleaning_raw._vins["Appellation"] = cleaning_raw._vins["Appellation"].str.strip() cleaning_raw.drop_empty_appellation() filled = cleaning_raw.fill_missing_scores().getVins() for col in cleaning_raw.SCORE_COLS: assert filled[col].isna().sum() == 0 pauillac_robert = filled[filled["Appellation"] == "Pauillac"]["Robert"] assert (pauillac_robert == 95.0).all() def test_encode_appellation(cleaning_raw: Cleaning): cleaning_raw._vins["Appellation"] = cleaning_raw._vins["Appellation"].str.strip() out = ( cleaning_raw.drop_empty_appellation() .fill_missing_scores() .encode_appellation() .getVins() ) assert "Appellation" not in out.columns assert "Pauillac" in out.columns assert int(out.loc[0, "Pauillac"]) == 1