Merge pull request #13 from guezoloic/jalon3

Jalon3
2026-03-30 10:46:26 +00:00 · 2026-03-30 09:45:16 +02:00
parent f223acdfe6 83c53dd6b0
commit 615418f347
8 changed files with 1389 additions and 22 deletions
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -19,15 +19,15 @@ jobs:
    steps:
      - uses: actions/checkout@v4
-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v4
        with:
-          python-version: "3.10"
+          python-version: "3.x"
      - name: install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install ".[test,doc]"
+          pip install ".[test]"
      - name: Lint with flake8
        run: |
--- a/.github/workflows/static.yml
+++ b/.github/workflows/static.yml
@@ -32,15 +32,14 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v4
-      - name: Set up Python 3.10
+      - name: Set up Python 3.x
        uses: actions/setup-python@v5
        with:
-          python-version: '3.10'
+          python-version: '3.x'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          # Installe le projet en mode éditable avec les extras de doc
          pip install -e ".[doc]"
      - name: Setup Pages
--- a/docs/learning.ipynb
+++ b/docs/learning.ipynb
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -7,6 +7,7 @@ theme:
 plugins:
  - search
  - mkdocstrings
  - mkdocs-jupyter
 extra:
  generator: false
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,9 @@ dependencies = [
    "beautifulsoup4==4.14.3",
    "pandas==2.3.3",
    "tqdm==4.67.3",
    "scikit-learn==1.7.2",
    "matplotlib==3.10.8",
    "seaborn==0.13.2"
 ]
 [tool.pytest.ini_options]
@@ -14,7 +17,12 @@ testpaths = ["tests"]
 [project.optional-dependencies]
 test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
-doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]
+doc = [
    "mkdocs<2.0.0",
    "mkdocs-material==9.6.23",
    "mkdocstrings[python]",
    "mkdocs-jupyter==0.26.1",
 ]
 [build-system]
 requires = ["setuptools", "wheel"]
--- a/src/cleaning.py
+++ b/src/cleaning.py
@@ -92,18 +92,25 @@ class Cleaning:
        self._vins = self._vins.join(appellation_dummies)
        return self
    def drop_empty_price(self) -> "Cleaning":
        self._vins = self._vins.dropna(subset=["Prix"])
        return self
 def main() -> None:
    if len(argv) != 2:
        raise ValueError(f"Usage: {argv[0]} <filename.csv>")
-    filename = argv[1]
+def main(filename: str | None = None) -> None:
-    cleaning: Cleaning = Cleaning(filename)
+    if not filename:
-    cleaning.drop_empty_appellation()   \
+        if len(argv) != 2:
-        .fill_missing_scores()          \
+            raise ValueError(f"Usage: {argv[0]} <filename.csv>")
-        .encode_appellation()           \
+        filename = argv[1]
-        .getVins()                      \
+
-        .to_csv("clean.csv", index=False)
+    cleaning: Cleaning = (
        Cleaning(filename)
        .drop_empty_appellation()
        .fill_missing_scores()
        .encode_appellation()
        .drop_empty_price()
    )
    cleaning.getVins().to_csv("clean.csv", index=False)
 if __name__ == "__main__":
--- a/src/learning.py
+++ b/src/learning.py
@@ -0,0 +1,64 @@
 # from typing import Any, Callable
 # from pandas import DataFrame
 # from sklearn.linear_model import LinearRegression
 # from sklearn.preprocessing import StandardScaler
 # from sklearn.model_selection import train_test_split
 # from sklearn.pipeline import make_pipeline
 # import matplotlib.pyplot as plt
 # from cleaning import Cleaning
 # class Learning:
 #     def __init__(self, vins: DataFrame, target: str) -> None:
 #         self.X = vins.drop(target, axis=1)
 #         self.y = vins[target]
 #         self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
 #             self.X, self.y, test_size=0.25, random_state=49
 #         )
 #     def evaluate(
 #         self,
 #         estimator,
 #         pretreatment=None,
 #         fn_score=lambda m, xt, yt: m.score(xt, yt),
 #     ):
 #         pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
 #         pipeline.fit(self.X_train, self.y_train)
 #         score = fn_score(pipeline, self.X_test, self.y_test)
 #         prediction = pipeline.predict(self.X_test)
 #         return score, prediction
 #     def draw(self, predictions, y_actual):
 #         plt.figure(figsize=(8, 6))
 #         plt.scatter(
 #             predictions,
 #             y_actual,
 #             alpha=0.5,
 #             c="royalblue",
 #             edgecolors="k",
 #             label="Vins",
 #         )
 #         mn = min(predictions.min(), y_actual.min())
 #         mx = max(predictions.max(), y_actual.max())
 #         plt.plot(
 #             [mn, mx],
 #             [mn, mx],
 #             color="red",
 #             linestyle="--",
 #             lw=2,
 #             label="Prédiction Parfaite",
 #         )
 #         plt.xlabel("Prix estimés (estim_LR)")
 #         plt.ylabel("Prix réels (y_test)")
 #         plt.title("titre")
 #         plt.legend()
 #         plt.grid(True, linestyle=":", alpha=0.6)
 #         plt.show()
--- a/src/scraper.py
+++ b/src/scraper.py
@@ -490,11 +490,12 @@ class Scraper:
                savestate((page, cache))
-def main() -> None:
+def main(filename: str | None = None, suburl: str | None = None) -> None:
-    if len(argv) != 3:
+    if filename is None or suburl is None:
-        raise ValueError(f"{argv[0]} <filename> <sous-url>")
+        if len(argv) != 3:
-    filename = argv[1]
+            raise ValueError(f"Usage: python {argv[0]} <filename> <sous-url>")
-    suburl = argv[2]
+        filename = argv[1]
        suburl = argv[2]
    scraper: Scraper = Scraper()
    scraper.getvins(suburl, filename)