feat(learning.ipynb): ajout meilleur algo

feat: ajout premier modele (1ere partie)
feat: init Learning class and add drop_empty_price function
2026-03-28 19:13:42 +00:00 · 2026-03-28 20:04:14 +01:00 · 2026-03-28 19:58:09 +01:00 · 2026-03-28 15:51:46 +01:00 · 2026-03-27 22:11:53 +01:00 · 2026-03-27 22:06:36 +01:00
9 changed files with 528 additions and 34 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,18 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "saturday"
+    open-pull-requests-limit: 5
+    groups:
+      python-dependencies:
+        patterns:
+          - "*"
+      
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,15 +19,15 @@ jobs:
    steps:
      - uses: actions/checkout@v4

-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v4
        with:
-          python-version: "3.10"
+          python-version: "3.x"

      - name: install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install ".[test,doc]"
+          pip install ".[test]"

      - name: Lint with flake8
        run: |
--- a/.github/workflows/static.yml
+++ b/.github/workflows/static.yml
@@ -32,15 +32,14 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v4

-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v5
        with:
-          python-version: '3.10'
+          python-version: '3.x'

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          # Installe le projet en mode éditable avec les extras de doc
          pip install -e ".[doc]"

      - name: Setup Pages
--- a/learning.ipynb
+++ b/learning.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,8 +6,14 @@ dependencies = [
    "beautifulsoup4==4.14.3",
    "pandas==2.3.3",
    "tqdm==4.67.3",
+    "scikit-learn==1.7.2",
+    "matplotlib==3.10.8"
 ]

+[tool.pytest.ini_options]
+pythonpath = "src"
+testpaths = ["tests"]
+
 [project.optional-dependencies]
 test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
 doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]
--- a/src/cleaning.py
+++ b/src/cleaning.py
@@ -92,18 +92,24 @@ class Cleaning:
        self._vins = self._vins.join(appellation_dummies)
        return self

+    def drop_empty_price(self) -> "Cleaning":
+        self._vins = self._vins.dropna(subset=["Prix"])
+        return self
+

 def main() -> None:
    if len(argv) != 2:
        raise ValueError(f"Usage: {argv[0]} <filename.csv>")

    filename = argv[1]
-    cleaning: Cleaning = Cleaning(filename)
-    cleaning.drop_empty_appellation()   \
-        .fill_missing_scores()          \
-        .encode_appellation()           \
-        .getVins()                      \
-        .to_csv("clean.csv", index=False)
+    cleaning: Cleaning = (
+        Cleaning(filename)
+        .drop_empty_appellation()
+        .fill_missing_scores()
+        .encode_appellation()
+        .drop_empty_price()
+    )
+    cleaning.getVins().to_csv("clean.csv", index=False)


 if __name__ == "__main__":
--- a/src/learning.py
+++ b/src/learning.py
@@ -0,0 +1,93 @@
+
+
+from typing import Any, Callable
+from pandas import DataFrame
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+import matplotlib.pyplot as plt
+
+from cleaning import Cleaning
+
+
+class Learning:
+    def __init__(self, vins: DataFrame, target: str) -> None:
+        self.X = vins.drop(target, axis=1)
+        self.y = vins[target]
+
+        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
+            self.X, self.y, test_size=0.25, random_state=49
+        )
+
+    def evaluate(
+        self,
+        estimator,
+        pretreatment=None,
+        fn_score=lambda m, xt, yt: m.score(xt, yt),
+    ):
+
+        pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
+        pipeline.fit(self.X_train, self.y_train)
+        score = fn_score(pipeline, self.X_test, self.y_test)
+        prediction = pipeline.predict(self.X_test)
+
+        return score, prediction
+
+    def draw(self, predictions, y_actual):
+        plt.figure(figsize=(8, 6))
+
+        plt.scatter(
+            predictions,
+            y_actual,
+            alpha=0.5,
+            c="royalblue",
+            edgecolors="k",
+            label="Vins",
+        )
+
+        mn = min(predictions.min(), y_actual.min())
+        mx = max(predictions.max(), y_actual.max())
+        plt.plot(
+            [mn, mx],
+            [mn, mx],
+            color="red",
+            linestyle="--",
+            lw=2,
+            label="Prédiction Parfaite",
+        )
+
+        plt.xlabel("Prix estimés (estim_LR)")
+        plt.ylabel("Prix réels (y_test)")
+        plt.title("titre")
+        plt.legend()
+        plt.grid(True, linestyle=":", alpha=0.6)
+
+        plt.show()
+
+
+df_vins = (
+    Cleaning("data.csv")
+    .drop_empty_appellation()
+    .fill_missing_scores()
+    .encode_appellation()
+    .drop_empty_price()
+    .getVins()
+)
+
+etude = Learning(df_vins, target="Prix")
+
+print("--- Question 16 & 17 ---")
+score_simple, estim_simple = etude.evaluate(LinearRegression())
+print(f"Score R² (LR Simple) : {score_simple:.4f}")
+
+etude.draw(estim_simple, etude.y_test)
+
+
+print("\n--- Question 18 ---")
+score_std, estim_std = etude.evaluate(
+    estimator=LinearRegression(), pretreatment=StandardScaler()
+)
+print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
+
+etude.draw(estim_std, etude.y_test)
--- a/src/scraper.py
+++ b/src/scraper.py
@@ -377,9 +377,6 @@ class Scraper:
        try:
            data: dict[str, object] = self.getjsondata(subdir).getdata()

-            for element in ["initialReduxState", "categ", "content"]:
-                data = cast(dict[str, object], data.get(element))
-
            products: list[dict[str, Any]] = cast(
                list[dict[str, Any]], data.get("products")
            )
--- a/tests/test_scraper.py
+++ b/tests/test_scraper.py
@@ -185,17 +185,11 @@ def mock_site():
                        {dumps({
                            "props": {
                                "pageProps": {
-                                    "initialReduxState": {
-                                        "categ": {
-                                            "content": {
-                                                "products": [
-                                                    {"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
-                                                    {"seoKeyword": "/poubelle",},
-                                                    {"seoKeyword": "/",}
-                                                ]
-                                            }
-                                        }
-                                    }
+                                    "products": [
+                                        {"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
+                                        {"seoKeyword": "/poubelle",},
+                                        {"seoKeyword": "/",}
+                                    ]
                                }
                            }
                        }
@@ -213,14 +207,8 @@ def mock_site():
                        {dumps({
                            "props": {
                                "pageProps": {
-                                    "initialReduxState": {
-                                        "categ": {
-                                            "content": {
-                                                "products": [
-                                                ]
-                                            }
-                                        }
-                                    }
+                                    "products": [
+                                    ]
                                }
                            }
                        }
Author	SHA1	Message	Date
Loïc GUEZO	68dffa6486	feat(learning.ipynb): ajout meilleur algo	2026-03-28 20:04:14 +01:00
Loïc GUEZO	c7d2077b23	feat: ajout premier modele (1ere partie)	2026-03-28 19:58:09 +01:00
Loïc GUEZO	106877a073	feat: init Learning class and add drop_empty_price function	2026-03-28 15:51:46 +01:00
Loïc GUEZO	416cfcbf8b	Add Python package ecosystem to Dependabot config Configure Dependabot for Python package updates.	2026-03-27 22:11:53 +01:00
Loïc GUEZO	32c5310e37	fix: mettre à jour les tests pytest	2026-03-27 22:06:36 +01:00
Loïc GUEZO	9dfc7457a0	fix(scraper.py): retirer commentaire code et print	2026-03-27 22:06:06 +01:00
Loïc GUEZO	f5d5703e49	fix(scraper): recherche _getproduitslist actualisé Suite à une refont de l'UI et du backend, la structure de données JSON envoyé par la page web a été simplifié. Ancienne structure: - `"props"->"pageProps"->"initialReduxState"->"categ"->"content->"produits"` Nouvelle structure: - `"props"->"pageProps"->"produits"`	2026-03-27 21:47:06 +01:00