mirror of
https://github.com/guezoloic/millesima_projetS6.git
synced 2026-03-31 04:11:34 +00:00
Compare commits
3 Commits
416cfcbf8b
...
68dffa6486
| Author | SHA1 | Date | |
|---|---|---|---|
| 68dffa6486 | |||
| c7d2077b23 | |||
| 106877a073 |
6
.github/workflows/python-app.yml
vendored
6
.github/workflows/python-app.yml
vendored
@@ -19,15 +19,15 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Python 3.10
|
- name: Set up Python 3.x
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.x"
|
||||||
|
|
||||||
- name: install dependencies
|
- name: install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install ".[test,doc]"
|
pip install ".[test]"
|
||||||
|
|
||||||
- name: Lint with flake8
|
- name: Lint with flake8
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
5
.github/workflows/static.yml
vendored
5
.github/workflows/static.yml
vendored
@@ -32,15 +32,14 @@ jobs:
|
|||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Python 3.10
|
- name: Set up Python 3.x
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.x'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
# Installe le projet en mode éditable avec les extras de doc
|
|
||||||
pip install -e ".[doc]"
|
pip install -e ".[doc]"
|
||||||
|
|
||||||
- name: Setup Pages
|
- name: Setup Pages
|
||||||
|
|||||||
387
learning.ipynb
Normal file
387
learning.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -6,6 +6,8 @@ dependencies = [
|
|||||||
"beautifulsoup4==4.14.3",
|
"beautifulsoup4==4.14.3",
|
||||||
"pandas==2.3.3",
|
"pandas==2.3.3",
|
||||||
"tqdm==4.67.3",
|
"tqdm==4.67.3",
|
||||||
|
"scikit-learn==1.7.2",
|
||||||
|
"matplotlib==3.10.8"
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
|
|||||||
@@ -92,18 +92,24 @@ class Cleaning:
|
|||||||
self._vins = self._vins.join(appellation_dummies)
|
self._vins = self._vins.join(appellation_dummies)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def drop_empty_price(self) -> "Cleaning":
|
||||||
|
self._vins = self._vins.dropna(subset=["Prix"])
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
if len(argv) != 2:
|
if len(argv) != 2:
|
||||||
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
|
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
|
||||||
|
|
||||||
filename = argv[1]
|
filename = argv[1]
|
||||||
cleaning: Cleaning = Cleaning(filename)
|
cleaning: Cleaning = (
|
||||||
cleaning.drop_empty_appellation() \
|
Cleaning(filename)
|
||||||
.fill_missing_scores() \
|
.drop_empty_appellation()
|
||||||
.encode_appellation() \
|
.fill_missing_scores()
|
||||||
.getVins() \
|
.encode_appellation()
|
||||||
.to_csv("clean.csv", index=False)
|
.drop_empty_price()
|
||||||
|
)
|
||||||
|
cleaning.getVins().to_csv("clean.csv", index=False)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
93
src/learning.py
Executable file
93
src/learning.py
Executable file
@@ -0,0 +1,93 @@
|
|||||||
|
|
||||||
|
|
||||||
|
from typing import Any, Callable
|
||||||
|
from pandas import DataFrame
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.pipeline import make_pipeline
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from cleaning import Cleaning
|
||||||
|
|
||||||
|
|
||||||
|
class Learning:
|
||||||
|
def __init__(self, vins: DataFrame, target: str) -> None:
|
||||||
|
self.X = vins.drop(target, axis=1)
|
||||||
|
self.y = vins[target]
|
||||||
|
|
||||||
|
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
|
||||||
|
self.X, self.y, test_size=0.25, random_state=49
|
||||||
|
)
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
estimator,
|
||||||
|
pretreatment=None,
|
||||||
|
fn_score=lambda m, xt, yt: m.score(xt, yt),
|
||||||
|
):
|
||||||
|
|
||||||
|
pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
|
||||||
|
pipeline.fit(self.X_train, self.y_train)
|
||||||
|
score = fn_score(pipeline, self.X_test, self.y_test)
|
||||||
|
prediction = pipeline.predict(self.X_test)
|
||||||
|
|
||||||
|
return score, prediction
|
||||||
|
|
||||||
|
def draw(self, predictions, y_actual):
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
|
||||||
|
plt.scatter(
|
||||||
|
predictions,
|
||||||
|
y_actual,
|
||||||
|
alpha=0.5,
|
||||||
|
c="royalblue",
|
||||||
|
edgecolors="k",
|
||||||
|
label="Vins",
|
||||||
|
)
|
||||||
|
|
||||||
|
mn = min(predictions.min(), y_actual.min())
|
||||||
|
mx = max(predictions.max(), y_actual.max())
|
||||||
|
plt.plot(
|
||||||
|
[mn, mx],
|
||||||
|
[mn, mx],
|
||||||
|
color="red",
|
||||||
|
linestyle="--",
|
||||||
|
lw=2,
|
||||||
|
label="Prédiction Parfaite",
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.xlabel("Prix estimés (estim_LR)")
|
||||||
|
plt.ylabel("Prix réels (y_test)")
|
||||||
|
plt.title("titre")
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True, linestyle=":", alpha=0.6)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
df_vins = (
|
||||||
|
Cleaning("data.csv")
|
||||||
|
.drop_empty_appellation()
|
||||||
|
.fill_missing_scores()
|
||||||
|
.encode_appellation()
|
||||||
|
.drop_empty_price()
|
||||||
|
.getVins()
|
||||||
|
)
|
||||||
|
|
||||||
|
etude = Learning(df_vins, target="Prix")
|
||||||
|
|
||||||
|
print("--- Question 16 & 17 ---")
|
||||||
|
score_simple, estim_simple = etude.evaluate(LinearRegression())
|
||||||
|
print(f"Score R² (LR Simple) : {score_simple:.4f}")
|
||||||
|
|
||||||
|
etude.draw(estim_simple, etude.y_test)
|
||||||
|
|
||||||
|
|
||||||
|
print("\n--- Question 18 ---")
|
||||||
|
score_std, estim_std = etude.evaluate(
|
||||||
|
estimator=LinearRegression(), pretreatment=StandardScaler()
|
||||||
|
)
|
||||||
|
print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
|
||||||
|
|
||||||
|
etude.draw(estim_std, etude.y_test)
|
||||||
Reference in New Issue
Block a user