mirror of
https://github.com/guezoloic/millesima-ai-engine.git
synced 2026-03-30 10:46:26 +00:00
ajout: learning.ipnyb dans mkdocs et finission du modele 1
This commit is contained in:
486
docs/learning.ipynb
Normal file
486
docs/learning.ipynb
Normal file
File diff suppressed because one or more lines are too long
387
learning.ipynb
387
learning.ipynb
File diff suppressed because one or more lines are too long
@@ -7,6 +7,7 @@ theme:
|
||||
plugins:
|
||||
- search
|
||||
- mkdocstrings
|
||||
- mkdocs-jupyter
|
||||
|
||||
extra:
|
||||
generator: false
|
||||
|
||||
@@ -7,7 +7,7 @@ dependencies = [
|
||||
"pandas==2.3.3",
|
||||
"tqdm==4.67.3",
|
||||
"scikit-learn==1.7.2",
|
||||
"matplotlib==3.10.8"
|
||||
"matplotlib==3.10.8",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
@@ -16,7 +16,12 @@ testpaths = ["tests"]
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
|
||||
doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]
|
||||
doc = [
|
||||
"mkdocs<2.0.0",
|
||||
"mkdocs-material==9.6.23",
|
||||
"mkdocstrings[python]",
|
||||
"mkdocs-jupyter==0.26.1",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools", "wheel"]
|
||||
|
||||
@@ -97,11 +97,12 @@ class Cleaning:
|
||||
return self
|
||||
|
||||
|
||||
def main() -> None:
|
||||
def main(filename: str | None = None) -> None:
|
||||
if not filename:
|
||||
if len(argv) != 2:
|
||||
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
|
||||
|
||||
filename = argv[1]
|
||||
|
||||
cleaning: Cleaning = (
|
||||
Cleaning(filename)
|
||||
.drop_empty_appellation()
|
||||
|
||||
134
src/learning.py
134
src/learning.py
@@ -1,93 +1,65 @@
|
||||
# from typing import Any, Callable
|
||||
# from pandas import DataFrame
|
||||
# from sklearn.linear_model import LinearRegression
|
||||
# from sklearn.preprocessing import StandardScaler
|
||||
# from sklearn.model_selection import train_test_split
|
||||
# from sklearn.pipeline import make_pipeline
|
||||
# import matplotlib.pyplot as plt
|
||||
|
||||
# from cleaning import Cleaning
|
||||
|
||||
|
||||
from typing import Any, Callable
|
||||
from pandas import DataFrame
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import make_pipeline
|
||||
import matplotlib.pyplot as plt
|
||||
# class Learning:
|
||||
# def __init__(self, vins: DataFrame, target: str) -> None:
|
||||
# self.X = vins.drop(target, axis=1)
|
||||
# self.y = vins[target]
|
||||
|
||||
from cleaning import Cleaning
|
||||
# self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
|
||||
# self.X, self.y, test_size=0.25, random_state=49
|
||||
# )
|
||||
|
||||
# def evaluate(
|
||||
# self,
|
||||
# estimator,
|
||||
# pretreatment=None,
|
||||
# fn_score=lambda m, xt, yt: m.score(xt, yt),
|
||||
# ):
|
||||
|
||||
class Learning:
|
||||
def __init__(self, vins: DataFrame, target: str) -> None:
|
||||
self.X = vins.drop(target, axis=1)
|
||||
self.y = vins[target]
|
||||
# pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
|
||||
# pipeline.fit(self.X_train, self.y_train)
|
||||
# score = fn_score(pipeline, self.X_test, self.y_test)
|
||||
# prediction = pipeline.predict(self.X_test)
|
||||
|
||||
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
|
||||
self.X, self.y, test_size=0.25, random_state=49
|
||||
)
|
||||
# return score, prediction
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
estimator,
|
||||
pretreatment=None,
|
||||
fn_score=lambda m, xt, yt: m.score(xt, yt),
|
||||
):
|
||||
# def draw(self, predictions, y_actual):
|
||||
# plt.figure(figsize=(8, 6))
|
||||
|
||||
pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
|
||||
pipeline.fit(self.X_train, self.y_train)
|
||||
score = fn_score(pipeline, self.X_test, self.y_test)
|
||||
prediction = pipeline.predict(self.X_test)
|
||||
# plt.scatter(
|
||||
# predictions,
|
||||
# y_actual,
|
||||
# alpha=0.5,
|
||||
# c="royalblue",
|
||||
# edgecolors="k",
|
||||
# label="Vins",
|
||||
# )
|
||||
|
||||
return score, prediction
|
||||
# mn = min(predictions.min(), y_actual.min())
|
||||
# mx = max(predictions.max(), y_actual.max())
|
||||
# plt.plot(
|
||||
# [mn, mx],
|
||||
# [mn, mx],
|
||||
# color="red",
|
||||
# linestyle="--",
|
||||
# lw=2,
|
||||
# label="Prédiction Parfaite",
|
||||
# )
|
||||
|
||||
def draw(self, predictions, y_actual):
|
||||
plt.figure(figsize=(8, 6))
|
||||
# plt.xlabel("Prix estimés (estim_LR)")
|
||||
# plt.ylabel("Prix réels (y_test)")
|
||||
# plt.title("titre")
|
||||
# plt.legend()
|
||||
# plt.grid(True, linestyle=":", alpha=0.6)
|
||||
|
||||
plt.scatter(
|
||||
predictions,
|
||||
y_actual,
|
||||
alpha=0.5,
|
||||
c="royalblue",
|
||||
edgecolors="k",
|
||||
label="Vins",
|
||||
)
|
||||
# plt.show()
|
||||
|
||||
mn = min(predictions.min(), y_actual.min())
|
||||
mx = max(predictions.max(), y_actual.max())
|
||||
plt.plot(
|
||||
[mn, mx],
|
||||
[mn, mx],
|
||||
color="red",
|
||||
linestyle="--",
|
||||
lw=2,
|
||||
label="Prédiction Parfaite",
|
||||
)
|
||||
|
||||
plt.xlabel("Prix estimés (estim_LR)")
|
||||
plt.ylabel("Prix réels (y_test)")
|
||||
plt.title("titre")
|
||||
plt.legend()
|
||||
plt.grid(True, linestyle=":", alpha=0.6)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
df_vins = (
|
||||
Cleaning("data.csv")
|
||||
.drop_empty_appellation()
|
||||
.fill_missing_scores()
|
||||
.encode_appellation()
|
||||
.drop_empty_price()
|
||||
.getVins()
|
||||
)
|
||||
|
||||
etude = Learning(df_vins, target="Prix")
|
||||
|
||||
print("--- Question 16 & 17 ---")
|
||||
score_simple, estim_simple = etude.evaluate(LinearRegression())
|
||||
print(f"Score R² (LR Simple) : {score_simple:.4f}")
|
||||
|
||||
etude.draw(estim_simple, etude.y_test)
|
||||
|
||||
|
||||
print("\n--- Question 18 ---")
|
||||
score_std, estim_std = etude.evaluate(
|
||||
estimator=LinearRegression(), pretreatment=StandardScaler()
|
||||
)
|
||||
print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
|
||||
|
||||
etude.draw(estim_std, etude.y_test)
|
||||
|
||||
@@ -490,9 +490,10 @@ class Scraper:
|
||||
savestate((page, cache))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
def main(filename: str | None = None, suburl: str | None = None) -> None:
|
||||
if filename is None or suburl is None:
|
||||
if len(argv) != 3:
|
||||
raise ValueError(f"{argv[0]} <filename> <sous-url>")
|
||||
raise ValueError(f"Usage: python {argv[0]} <filename> <sous-url>")
|
||||
filename = argv[1]
|
||||
suburl = argv[2]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user