mirror of
https://github.com/guezoloic/millesima_projetS6.git
synced 2026-03-28 19:13:42 +00:00
ajout: restructuration de la cleaning
This commit is contained in:
@@ -1,64 +1,68 @@
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from pandas import DataFrame
|
||||
|
||||
from cleaning import (
|
||||
SCORE_COLS,
|
||||
drop_empty_appellation,
|
||||
mean_score,
|
||||
fill_missing_scores,
|
||||
encode_appellation,
|
||||
)
|
||||
from unittest.mock import patch, mock_open
|
||||
from cleaning import Cleaning
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_raw() -> DataFrame:
|
||||
return pd.DataFrame({
|
||||
"Appellation": ["Pauillac", "Pauillac ", "Margaux", None, "Pomerol", "Pomerol"],
|
||||
"Robert": ["95", None, "bad", 90, None, None],
|
||||
"Robinson": [None, "93", 18, None, None, None],
|
||||
"Suckling": [96, None, None, None, 91, None],
|
||||
"Prix": ["10.0", "11.0", "20.0", "30.0", "40.0", "50.0"],
|
||||
})
|
||||
def cleaning_raw() -> Cleaning:
|
||||
"""
|
||||
"Appellation": ["Pauillac", "Pauillac ", "Margaux", None , "Pomerol", "Pomerol"],
|
||||
"Robert": ["95" , None , "bad" , 90 , None , None ],
|
||||
"Robinson": [None , "93" , 18 , None , None , None ],
|
||||
"Suckling": [96 , None , None , None , 91 , None ],
|
||||
"Prix": ["10.0" , "11.0" , "20.0" , "30.0", "40.0" , "50.0" ],
|
||||
"""
|
||||
csv_content = """Appellation,Robert,Robinson,Suckling,Prix
|
||||
Pauillac,95,,96,10.0
|
||||
Pauillac ,,93,,11.0
|
||||
Margaux,bad,18,,20.0
|
||||
,90,,,30.0
|
||||
Pomerol,,,91,40.0
|
||||
Pomerol,,,,50.0
|
||||
"""
|
||||
m = mock_open(read_data=csv_content)
|
||||
with patch("builtins.open", m):
|
||||
return Cleaning("donnee.csv")
|
||||
|
||||
|
||||
def test_drop_empty_appellation(df_raw: DataFrame):
|
||||
out = drop_empty_appellation(df_raw)
|
||||
def test_drop_empty_appellation(cleaning_raw: Cleaning) -> None:
|
||||
out = cleaning_raw.drop_empty_appellation().getVins()
|
||||
assert out["Appellation"].isna().sum() == 0
|
||||
assert len(out) == 5
|
||||
assert len(out) == 5
|
||||
|
||||
|
||||
def test_mean_score_zero_when_no_scores(df_raw: DataFrame):
|
||||
out = drop_empty_appellation(df_raw)
|
||||
m = mean_score(out, "Robert")
|
||||
def test_mean_score_zero_when_no_scores(cleaning_raw: Cleaning) -> None:
|
||||
out = cleaning_raw.drop_empty_appellation()
|
||||
m = out._mean_score("Robert")
|
||||
assert list(m.columns) == ["Appellation", "mean_Robert"]
|
||||
|
||||
# Pomerol n'a aucune note Robert => moyenne doit être 0
|
||||
pomerol_mean = m.loc[m["Appellation"].str.strip() == "Pomerol", "mean_Robert"].iloc[0]
|
||||
pomerol_mean = m.loc[m["Appellation"].str.strip() == "Pomerol", "mean_Robert"].iloc[
|
||||
0
|
||||
]
|
||||
assert pomerol_mean == 0
|
||||
|
||||
|
||||
def test_fill_missing_scores(df_raw: DataFrame):
|
||||
out = drop_empty_appellation(df_raw)
|
||||
filled = fill_missing_scores(out)
|
||||
def test_fill_missing_scores(cleaning_raw: Cleaning):
|
||||
cleaning_raw._vins["Appellation"] = cleaning_raw._vins["Appellation"].str.strip()
|
||||
|
||||
# plus de NaN dans les colonnes de scores
|
||||
for col in SCORE_COLS:
|
||||
cleaning_raw.drop_empty_appellation()
|
||||
filled = cleaning_raw.fill_missing_scores().getVins()
|
||||
for col in cleaning_raw.SCORE_COLS:
|
||||
assert filled[col].isna().sum() == 0
|
||||
|
||||
assert filled.loc[1, "Robert"] == 95.0
|
||||
|
||||
# pas de colonnes temporaires mean_*
|
||||
for col in SCORE_COLS:
|
||||
assert f"mean_{col}" not in filled.columns
|
||||
|
||||
pauillac_robert = filled[filled["Appellation"] == "Pauillac"]["Robert"]
|
||||
assert (pauillac_robert == 95.0).all()
|
||||
|
||||
|
||||
def test_encode_appellation(df_raw: DataFrame):
|
||||
out = drop_empty_appellation(df_raw)
|
||||
filled = fill_missing_scores(out)
|
||||
encoded = encode_appellation(filled)
|
||||
def test_encode_appellation(cleaning_raw: Cleaning):
|
||||
cleaning_raw._vins["Appellation"] = cleaning_raw._vins["Appellation"].str.strip()
|
||||
|
||||
# la colonne texte disparaît
|
||||
assert "Appellation" not in encoded.columns
|
||||
assert "Pauillac" in encoded.columns
|
||||
assert encoded.loc[0, "Pauillac"] == 1
|
||||
out = (
|
||||
cleaning_raw.drop_empty_appellation()
|
||||
.fill_missing_scores()
|
||||
.encode_appellation()
|
||||
.getVins()
|
||||
)
|
||||
assert "Appellation" not in out.columns
|
||||
assert "Pauillac" in out.columns
|
||||
assert int(out.loc[0, "Pauillac"]) == 1
|
||||
|
||||
Reference in New Issue
Block a user