feat: ajout premier modele (1ere partie)

2026-03-30 18:51:38 +00:00 · 2026-03-28 19:58:09 +01:00
parent 106877a073
commit c7d2077b23
3 changed files with 393 additions and 2 deletions
--- a/learning.ipynb
+++ b/learning.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,8 @@ dependencies = [
    "beautifulsoup4==4.14.3",
    "pandas==2.3.3",
    "tqdm==4.67.3",
-    "scikit-learn==1.7.2"
+    "scikit-learn==1.7.2",
    "matplotlib==3.10.8"
 ]
 [tool.pytest.ini_options]
--- a/src/learning.py
+++ b/src/learning.py
@@ -1,10 +1,14 @@
-#!/usr/bin/env python3
+
 from typing import Any, Callable
 from pandas import DataFrame
 from sklearn.linear_model import LinearRegression
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 import matplotlib.pyplot as plt
 from cleaning import Cleaning
 class Learning:
@@ -29,3 +33,61 @@ class Learning:
        prediction = pipeline.predict(self.X_test)
        return score, prediction
    def draw(self, predictions, y_actual):
        plt.figure(figsize=(8, 6))
        plt.scatter(
            predictions,
            y_actual,
            alpha=0.5,
            c="royalblue",
            edgecolors="k",
            label="Vins",
        )
        mn = min(predictions.min(), y_actual.min())
        mx = max(predictions.max(), y_actual.max())
        plt.plot(
            [mn, mx],
            [mn, mx],
            color="red",
            linestyle="--",
            lw=2,
            label="Prédiction Parfaite",
        )
        plt.xlabel("Prix estimés (estim_LR)")
        plt.ylabel("Prix réels (y_test)")
        plt.title("titre")
        plt.legend()
        plt.grid(True, linestyle=":", alpha=0.6)
        plt.show()
 df_vins = (
    Cleaning("data.csv")
    .drop_empty_appellation()
    .fill_missing_scores()
    .encode_appellation()
    .drop_empty_price()
    .getVins()
 )
 etude = Learning(df_vins, target="Prix")
 print("--- Question 16 & 17 ---")
 score_simple, estim_simple = etude.evaluate(LinearRegression())
 print(f"Score R² (LR Simple) : {score_simple:.4f}")
 etude.draw(estim_simple, etude.y_test)
 print("\n--- Question 18 ---")
 score_std, estim_std = etude.evaluate(
    estimator=LinearRegression(), pretreatment=StandardScaler()
 )
 print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
 etude.draw(estim_std, etude.y_test)