feat: ajout premier modele (1ere partie)

This commit is contained in:
2026-03-28 19:58:09 +01:00
parent 106877a073
commit c7d2077b23
3 changed files with 393 additions and 2 deletions

328
learning.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@@ -6,7 +6,8 @@ dependencies = [
"beautifulsoup4==4.14.3",
"pandas==2.3.3",
"tqdm==4.67.3",
"scikit-learn==1.7.2"
"scikit-learn==1.7.2",
"matplotlib==3.10.8"
]
[tool.pytest.ini_options]

View File

@@ -1,10 +1,14 @@
#!/usr/bin/env python3
from typing import Any, Callable
from pandas import DataFrame
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
from cleaning import Cleaning
class Learning:
@@ -29,3 +33,61 @@ class Learning:
prediction = pipeline.predict(self.X_test)
return score, prediction
def draw(self, predictions, y_actual):
plt.figure(figsize=(8, 6))
plt.scatter(
predictions,
y_actual,
alpha=0.5,
c="royalblue",
edgecolors="k",
label="Vins",
)
mn = min(predictions.min(), y_actual.min())
mx = max(predictions.max(), y_actual.max())
plt.plot(
[mn, mx],
[mn, mx],
color="red",
linestyle="--",
lw=2,
label="Prédiction Parfaite",
)
plt.xlabel("Prix estimés (estim_LR)")
plt.ylabel("Prix réels (y_test)")
plt.title("titre")
plt.legend()
plt.grid(True, linestyle=":", alpha=0.6)
plt.show()
df_vins = (
Cleaning("data.csv")
.drop_empty_appellation()
.fill_missing_scores()
.encode_appellation()
.drop_empty_price()
.getVins()
)
etude = Learning(df_vins, target="Prix")
print("--- Question 16 & 17 ---")
score_simple, estim_simple = etude.evaluate(LinearRegression())
print(f"Score R² (LR Simple) : {score_simple:.4f}")
etude.draw(estim_simple, etude.y_test)
print("\n--- Question 18 ---")
score_std, estim_std = etude.evaluate(
estimator=LinearRegression(), pretreatment=StandardScaler()
)
print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
etude.draw(estim_std, etude.y_test)