feat(learning.ipynb): ajout meilleur algo

feat: ajout premier modele (1ere partie)
feat: init Learning class and add drop_empty_price function
2026-03-28 19:13:42 +00:00 · 2026-03-28 20:04:14 +01:00 · 2026-03-28 19:58:09 +01:00 · 2026-03-28 15:51:46 +01:00
6 changed files with 499 additions and 12 deletions
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,15 +19,15 @@ jobs:
    steps:
      - uses: actions/checkout@v4

-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v4
        with:
-          python-version: "3.10"
+          python-version: "3.x"

      - name: install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install ".[test,doc]"
+          pip install ".[test]"

      - name: Lint with flake8
        run: |
--- a/.github/workflows/static.yml
+++ b/.github/workflows/static.yml
@@ -32,15 +32,14 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v4

-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v5
        with:
-          python-version: '3.10'
+          python-version: '3.x'

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          # Installe le projet en mode éditable avec les extras de doc
          pip install -e ".[doc]"

      - name: Setup Pages
--- a/learning.ipynb
+++ b/learning.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,8 @@ dependencies = [
    "beautifulsoup4==4.14.3",
    "pandas==2.3.3",
    "tqdm==4.67.3",
+    "scikit-learn==1.7.2",
+    "matplotlib==3.10.8"
 ]

 [tool.pytest.ini_options]
--- a/src/cleaning.py
+++ b/src/cleaning.py
@@ -92,18 +92,24 @@ class Cleaning:
        self._vins = self._vins.join(appellation_dummies)
        return self

+    def drop_empty_price(self) -> "Cleaning":
+        self._vins = self._vins.dropna(subset=["Prix"])
+        return self
+

 def main() -> None:
    if len(argv) != 2:
        raise ValueError(f"Usage: {argv[0]} <filename.csv>")

    filename = argv[1]
-    cleaning: Cleaning = Cleaning(filename)
-    cleaning.drop_empty_appellation()   \
-        .fill_missing_scores()          \
-        .encode_appellation()           \
-        .getVins()                      \
-        .to_csv("clean.csv", index=False)
+    cleaning: Cleaning = (
+        Cleaning(filename)
+        .drop_empty_appellation()
+        .fill_missing_scores()
+        .encode_appellation()
+        .drop_empty_price()
+    )
+    cleaning.getVins().to_csv("clean.csv", index=False)


 if __name__ == "__main__":
--- a/src/learning.py
+++ b/src/learning.py
@@ -0,0 +1,93 @@
+
+
+from typing import Any, Callable
+from pandas import DataFrame
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+import matplotlib.pyplot as plt
+
+from cleaning import Cleaning
+
+
+class Learning:
+    def __init__(self, vins: DataFrame, target: str) -> None:
+        self.X = vins.drop(target, axis=1)
+        self.y = vins[target]
+
+        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
+            self.X, self.y, test_size=0.25, random_state=49
+        )
+
+    def evaluate(
+        self,
+        estimator,
+        pretreatment=None,
+        fn_score=lambda m, xt, yt: m.score(xt, yt),
+    ):
+
+        pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
+        pipeline.fit(self.X_train, self.y_train)
+        score = fn_score(pipeline, self.X_test, self.y_test)
+        prediction = pipeline.predict(self.X_test)
+
+        return score, prediction
+
+    def draw(self, predictions, y_actual):
+        plt.figure(figsize=(8, 6))
+
+        plt.scatter(
+            predictions,
+            y_actual,
+            alpha=0.5,
+            c="royalblue",
+            edgecolors="k",
+            label="Vins",
+        )
+
+        mn = min(predictions.min(), y_actual.min())
+        mx = max(predictions.max(), y_actual.max())
+        plt.plot(
+            [mn, mx],
+            [mn, mx],
+            color="red",
+            linestyle="--",
+            lw=2,
+            label="Prédiction Parfaite",
+        )
+
+        plt.xlabel("Prix estimés (estim_LR)")
+        plt.ylabel("Prix réels (y_test)")
+        plt.title("titre")
+        plt.legend()
+        plt.grid(True, linestyle=":", alpha=0.6)
+
+        plt.show()
+
+
+df_vins = (
+    Cleaning("data.csv")
+    .drop_empty_appellation()
+    .fill_missing_scores()
+    .encode_appellation()
+    .drop_empty_price()
+    .getVins()
+)
+
+etude = Learning(df_vins, target="Prix")
+
+print("--- Question 16 & 17 ---")
+score_simple, estim_simple = etude.evaluate(LinearRegression())
+print(f"Score R² (LR Simple) : {score_simple:.4f}")
+
+etude.draw(estim_simple, etude.y_test)
+
+
+print("\n--- Question 18 ---")
+score_std, estim_std = etude.evaluate(
+    estimator=LinearRegression(), pretreatment=StandardScaler()
+)
+print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
+
+etude.draw(estim_std, etude.y_test)
Author	SHA1	Message	Date
Loïc GUEZO	68dffa6486	feat(learning.ipynb): ajout meilleur algo	2026-03-28 20:04:14 +01:00
Loïc GUEZO	c7d2077b23	feat: ajout premier modele (1ere partie)	2026-03-28 19:58:09 +01:00
Loïc GUEZO	106877a073	feat: init Learning class and add drop_empty_price function	2026-03-28 15:51:46 +01:00