ajout: learning.ipnyb dans mkdocs et finission du modele 1

This commit is contained in:
2026-03-29 18:34:18 +02:00
parent 7a4e49684f
commit f4dd93e4b0
7 changed files with 560 additions and 481 deletions

486
docs/learning.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -7,6 +7,7 @@ theme:
plugins:
- search
- mkdocstrings
- mkdocs-jupyter
extra:
generator: false

View File

@@ -7,7 +7,7 @@ dependencies = [
"pandas==2.3.3",
"tqdm==4.67.3",
"scikit-learn==1.7.2",
"matplotlib==3.10.8"
"matplotlib==3.10.8",
]
[tool.pytest.ini_options]
@@ -16,7 +16,12 @@ testpaths = ["tests"]
[project.optional-dependencies]
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]
doc = [
"mkdocs<2.0.0",
"mkdocs-material==9.6.23",
"mkdocstrings[python]",
"mkdocs-jupyter==0.26.1",
]
[build-system]
requires = ["setuptools", "wheel"]

View File

@@ -97,11 +97,12 @@ class Cleaning:
return self
def main() -> None:
if len(argv) != 2:
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
def main(filename: str | None = None) -> None:
if not filename:
if len(argv) != 2:
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
filename = argv[1]
filename = argv[1]
cleaning: Cleaning = (
Cleaning(filename)
.drop_empty_appellation()

View File

@@ -1,93 +1,65 @@
# from typing import Any, Callable
# from pandas import DataFrame
# from sklearn.linear_model import LinearRegression
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split
# from sklearn.pipeline import make_pipeline
# import matplotlib.pyplot as plt
# from cleaning import Cleaning
from typing import Any, Callable
from pandas import DataFrame
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
# class Learning:
# def __init__(self, vins: DataFrame, target: str) -> None:
# self.X = vins.drop(target, axis=1)
# self.y = vins[target]
from cleaning import Cleaning
# self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
# self.X, self.y, test_size=0.25, random_state=49
# )
# def evaluate(
# self,
# estimator,
# pretreatment=None,
# fn_score=lambda m, xt, yt: m.score(xt, yt),
# ):
class Learning:
def __init__(self, vins: DataFrame, target: str) -> None:
self.X = vins.drop(target, axis=1)
self.y = vins[target]
# pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
# pipeline.fit(self.X_train, self.y_train)
# score = fn_score(pipeline, self.X_test, self.y_test)
# prediction = pipeline.predict(self.X_test)
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
self.X, self.y, test_size=0.25, random_state=49
)
# return score, prediction
def evaluate(
self,
estimator,
pretreatment=None,
fn_score=lambda m, xt, yt: m.score(xt, yt),
):
# def draw(self, predictions, y_actual):
# plt.figure(figsize=(8, 6))
pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
pipeline.fit(self.X_train, self.y_train)
score = fn_score(pipeline, self.X_test, self.y_test)
prediction = pipeline.predict(self.X_test)
# plt.scatter(
# predictions,
# y_actual,
# alpha=0.5,
# c="royalblue",
# edgecolors="k",
# label="Vins",
# )
return score, prediction
# mn = min(predictions.min(), y_actual.min())
# mx = max(predictions.max(), y_actual.max())
# plt.plot(
# [mn, mx],
# [mn, mx],
# color="red",
# linestyle="--",
# lw=2,
# label="Prédiction Parfaite",
# )
def draw(self, predictions, y_actual):
plt.figure(figsize=(8, 6))
# plt.xlabel("Prix estimés (estim_LR)")
# plt.ylabel("Prix réels (y_test)")
# plt.title("titre")
# plt.legend()
# plt.grid(True, linestyle=":", alpha=0.6)
plt.scatter(
predictions,
y_actual,
alpha=0.5,
c="royalblue",
edgecolors="k",
label="Vins",
)
# plt.show()
mn = min(predictions.min(), y_actual.min())
mx = max(predictions.max(), y_actual.max())
plt.plot(
[mn, mx],
[mn, mx],
color="red",
linestyle="--",
lw=2,
label="Prédiction Parfaite",
)
plt.xlabel("Prix estimés (estim_LR)")
plt.ylabel("Prix réels (y_test)")
plt.title("titre")
plt.legend()
plt.grid(True, linestyle=":", alpha=0.6)
plt.show()
df_vins = (
Cleaning("data.csv")
.drop_empty_appellation()
.fill_missing_scores()
.encode_appellation()
.drop_empty_price()
.getVins()
)
etude = Learning(df_vins, target="Prix")
print("--- Question 16 & 17 ---")
score_simple, estim_simple = etude.evaluate(LinearRegression())
print(f"Score R² (LR Simple) : {score_simple:.4f}")
etude.draw(estim_simple, etude.y_test)
print("\n--- Question 18 ---")
score_std, estim_std = etude.evaluate(
estimator=LinearRegression(), pretreatment=StandardScaler()
)
print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
etude.draw(estim_std, etude.y_test)

View File

@@ -490,11 +490,12 @@ class Scraper:
savestate((page, cache))
def main() -> None:
if len(argv) != 3:
raise ValueError(f"{argv[0]} <filename> <sous-url>")
filename = argv[1]
suburl = argv[2]
def main(filename: str | None = None, suburl: str | None = None) -> None:
if filename is None or suburl is None:
if len(argv) != 3:
raise ValueError(f"Usage: python {argv[0]} <filename> <sous-url>")
filename = argv[1]
suburl = argv[2]
scraper: Scraper = Scraper()
scraper.getvins(suburl, filename)