diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 20b970f..d8cdac1 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -19,15 +19,15 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.10 + - name: Set up Python 3.x uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.x" - name: install dependencies run: | python -m pip install --upgrade pip - pip install ".[test,doc]" + pip install ".[test]" - name: Lint with flake8 run: | diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index 16872fe..8da1024 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -32,15 +32,14 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Set up Python 3.10 + - name: Set up Python 3.x uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip - # Installe le projet en mode éditable avec les extras de doc pip install -e ".[doc]" - name: Setup Pages diff --git a/pyproject.toml b/pyproject.toml index e6e8fe4..56617e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ dependencies = [ "beautifulsoup4==4.14.3", "pandas==2.3.3", "tqdm==4.67.3", + "scikit-learn==1.7.2" ] [tool.pytest.ini_options] diff --git a/src/cleaning.py b/src/cleaning.py index 6c5a806..1b3b3f5 100755 --- a/src/cleaning.py +++ b/src/cleaning.py @@ -92,18 +92,24 @@ class Cleaning: self._vins = self._vins.join(appellation_dummies) return self + def drop_empty_price(self) -> "Cleaning": + self._vins = self._vins.dropna(subset=["Prix"]) + return self + def main() -> None: if len(argv) != 2: raise ValueError(f"Usage: {argv[0]} ") filename = argv[1] - cleaning: Cleaning = Cleaning(filename) - cleaning.drop_empty_appellation() \ - .fill_missing_scores() \ - .encode_appellation() \ - .getVins() \ - .to_csv("clean.csv", index=False) + cleaning: Cleaning = ( + Cleaning(filename) + .drop_empty_appellation() + .fill_missing_scores() + .encode_appellation() + .drop_empty_price() + ) + cleaning.getVins().to_csv("clean.csv", index=False) if __name__ == "__main__": diff --git a/src/learning.py b/src/learning.py new file mode 100755 index 0000000..8f47f0f --- /dev/null +++ b/src/learning.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +from typing import Any, Callable +from pandas import DataFrame +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline + + +class Learning: + def __init__(self, vins: DataFrame, target: str) -> None: + self.X = vins.drop(target, axis=1) + self.y = vins[target] + + self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( + self.X, self.y, test_size=0.25, random_state=49 + ) + + def evaluate( + self, + estimator, + pretreatment=None, + fn_score=lambda m, xt, yt: m.score(xt, yt), + ): + + pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator + pipeline.fit(self.X_train, self.y_train) + score = fn_score(pipeline, self.X_test, self.y_test) + prediction = pipeline.predict(self.X_test) + + return score, prediction