feat(learning.ipynb): ajout meilleur algo

feat: ajout premier modele (1ere partie)
feat: init Learning class and add drop_empty_price function
2026-03-31 04:11:34 +00:00 · 2026-03-28 20:04:14 +01:00 · 2026-03-28 19:58:09 +01:00 · 2026-03-28 15:51:46 +01:00
6 changed files with 499 additions and 12 deletions
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,15 +19,15 @@ jobs:
    steps:
      - uses: actions/checkout@v4
-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v4
        with:
-          python-version: "3.10"
+          python-version: "3.x"
      - name: install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install ".[test,doc]"
+          pip install ".[test]"
      - name: Lint with flake8
        run: |
--- a/.github/workflows/static.yml
+++ b/.github/workflows/static.yml
@@ -32,15 +32,14 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v4
-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v5
        with:
-          python-version: '3.10'
+          python-version: '3.x'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          # Installe le projet en mode éditable avec les extras de doc
          pip install -e ".[doc]"
      - name: Setup Pages
--- a/learning.ipynb
+++ b/learning.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,8 @@ dependencies = [
    "beautifulsoup4==4.14.3",
    "pandas==2.3.3",
    "tqdm==4.67.3",
    "scikit-learn==1.7.2",
    "matplotlib==3.10.8"
 ]
 [tool.pytest.ini_options]
--- a/src/cleaning.py
+++ b/src/cleaning.py
@@ -92,18 +92,24 @@ class Cleaning:
        self._vins = self._vins.join(appellation_dummies)
        return self
    def drop_empty_price(self) -> "Cleaning":
        self._vins = self._vins.dropna(subset=["Prix"])
        return self
 def main() -> None:
    if len(argv) != 2:
        raise ValueError(f"Usage: {argv[0]} <filename.csv>")
    filename = argv[1]
-    cleaning: Cleaning = Cleaning(filename)
+    cleaning: Cleaning = (
-    cleaning.drop_empty_appellation()   \
+        Cleaning(filename)
-        .fill_missing_scores()          \
+        .drop_empty_appellation()
-        .encode_appellation()           \
+        .fill_missing_scores()
-        .getVins()                      \
+        .encode_appellation()
-        .to_csv("clean.csv", index=False)
+        .drop_empty_price()
    )
    cleaning.getVins().to_csv("clean.csv", index=False)
 if __name__ == "__main__":
--- a/src/learning.py
+++ b/src/learning.py
@@ -0,0 +1,93 @@
 from typing import Any, Callable
 from pandas import DataFrame
 from sklearn.linear_model import LinearRegression
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 import matplotlib.pyplot as plt
 from cleaning import Cleaning
 class Learning:
    def __init__(self, vins: DataFrame, target: str) -> None:
        self.X = vins.drop(target, axis=1)
        self.y = vins[target]
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=0.25, random_state=49
        )
    def evaluate(
        self,
        estimator,
        pretreatment=None,
        fn_score=lambda m, xt, yt: m.score(xt, yt),
    ):
        pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
        pipeline.fit(self.X_train, self.y_train)
        score = fn_score(pipeline, self.X_test, self.y_test)
        prediction = pipeline.predict(self.X_test)
        return score, prediction
    def draw(self, predictions, y_actual):
        plt.figure(figsize=(8, 6))
        plt.scatter(
            predictions,
            y_actual,
            alpha=0.5,
            c="royalblue",
            edgecolors="k",
            label="Vins",
        )
        mn = min(predictions.min(), y_actual.min())
        mx = max(predictions.max(), y_actual.max())
        plt.plot(
            [mn, mx],
            [mn, mx],
            color="red",
            linestyle="--",
            lw=2,
            label="Prédiction Parfaite",
        )
        plt.xlabel("Prix estimés (estim_LR)")
        plt.ylabel("Prix réels (y_test)")
        plt.title("titre")
        plt.legend()
        plt.grid(True, linestyle=":", alpha=0.6)
        plt.show()
 df_vins = (
    Cleaning("data.csv")
    .drop_empty_appellation()
    .fill_missing_scores()
    .encode_appellation()
    .drop_empty_price()
    .getVins()
 )
 etude = Learning(df_vins, target="Prix")
 print("--- Question 16 & 17 ---")
 score_simple, estim_simple = etude.evaluate(LinearRegression())
 print(f"Score R² (LR Simple) : {score_simple:.4f}")
 etude.draw(estim_simple, etude.y_test)
 print("\n--- Question 18 ---")
 score_std, estim_std = etude.evaluate(
    estimator=LinearRegression(), pretreatment=StandardScaler()
 )
 print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
 etude.draw(estim_std, etude.y_test)
Author	SHA1	Message	Date
Loïc GUEZO	68dffa6486	feat(learning.ipynb): ajout meilleur algo	2026-03-28 20:04:14 +01:00
Loïc GUEZO	c7d2077b23	feat: ajout premier modele (1ere partie)	2026-03-28 19:58:09 +01:00
Loïc GUEZO	106877a073	feat: init Learning class and add drop_empty_price function	2026-03-28 15:51:46 +01:00