1 Commits

Author SHA1 Message Date
106877a073 feat: init Learning class and add drop_empty_price function 2026-03-28 15:51:46 +01:00
5 changed files with 53 additions and 16 deletions

View File

@@ -19,15 +19,15 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
- name: Set up Python 3.x
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.x"
- name: install dependencies
run: |
python -m pip install --upgrade pip
pip install ".[test,doc]"
pip install ".[test]"
- name: Lint with flake8
run: |

View File

@@ -32,15 +32,14 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python 3.10
- name: Set up Python 3.x
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
# Installe le projet en mode éditable avec les extras de doc
pip install -e ".[doc]"
- name: Setup Pages

View File

@@ -2,10 +2,11 @@
name = "projet-millesima-s6"
version = "0.1.0"
dependencies = [
"requests==2.33.0",
"requests==2.32.5",
"beautifulsoup4==4.14.3",
"pandas==3.0.1",
"pandas==2.3.3",
"tqdm==4.67.3",
"scikit-learn==1.7.2"
]
[tool.pytest.ini_options]
@@ -13,8 +14,8 @@ pythonpath = "src"
testpaths = ["tests"]
[project.optional-dependencies]
test = ["pytest==9.0.2", "requests-mock==1.12.1", "flake8==7.3.0"]
doc = ["mkdocs<2.0.0", "mkdocs-material==9.7.6", "mkdocstrings[python]"]
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]
[build-system]
requires = ["setuptools", "wheel"]

View File

@@ -92,18 +92,24 @@ class Cleaning:
self._vins = self._vins.join(appellation_dummies)
return self
def drop_empty_price(self) -> "Cleaning":
self._vins = self._vins.dropna(subset=["Prix"])
return self
def main() -> None:
if len(argv) != 2:
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
filename = argv[1]
cleaning: Cleaning = Cleaning(filename)
cleaning.drop_empty_appellation() \
.fill_missing_scores() \
.encode_appellation() \
.getVins() \
.to_csv("clean.csv", index=False)
cleaning: Cleaning = (
Cleaning(filename)
.drop_empty_appellation()
.fill_missing_scores()
.encode_appellation()
.drop_empty_price()
)
cleaning.getVins().to_csv("clean.csv", index=False)
if __name__ == "__main__":

31
src/learning.py Executable file
View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python3
from typing import Any, Callable
from pandas import DataFrame
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
class Learning:
def __init__(self, vins: DataFrame, target: str) -> None:
self.X = vins.drop(target, axis=1)
self.y = vins[target]
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
self.X, self.y, test_size=0.25, random_state=49
)
def evaluate(
self,
estimator,
pretreatment=None,
fn_score=lambda m, xt, yt: m.score(xt, yt),
):
pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
pipeline.fit(self.X_train, self.y_train)
score = fn_score(pipeline, self.X_test, self.y_test)
prediction = pipeline.predict(self.X_test)
return score, prediction