mirror of
https://github.com/guezoloic/millesima_projetS6.git
synced 2026-03-28 19:13:42 +00:00
Compare commits
7 Commits
888defb6b6
...
jalon3
| Author | SHA1 | Date | |
|---|---|---|---|
| 68dffa6486 | |||
| c7d2077b23 | |||
| 106877a073 | |||
|
|
416cfcbf8b | ||
| 32c5310e37 | |||
| 9dfc7457a0 | |||
| f5d5703e49 |
18
.github/dependabot.yml
vendored
Normal file
18
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "saturday"
|
||||
open-pull-requests-limit: 5
|
||||
groups:
|
||||
python-dependencies:
|
||||
patterns:
|
||||
- "*"
|
||||
|
||||
6
.github/workflows/python-app.yml
vendored
6
.github/workflows/python-app.yml
vendored
@@ -19,15 +19,15 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python 3.10
|
||||
- name: Set up Python 3.x
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.x"
|
||||
|
||||
- name: install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install ".[test,doc]"
|
||||
pip install ".[test]"
|
||||
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
|
||||
5
.github/workflows/static.yml
vendored
5
.github/workflows/static.yml
vendored
@@ -32,15 +32,14 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python 3.10
|
||||
- name: Set up Python 3.x
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
# Installe le projet en mode éditable avec les extras de doc
|
||||
pip install -e ".[doc]"
|
||||
|
||||
- name: Setup Pages
|
||||
|
||||
387
learning.ipynb
Normal file
387
learning.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -6,8 +6,14 @@ dependencies = [
|
||||
"beautifulsoup4==4.14.3",
|
||||
"pandas==2.3.3",
|
||||
"tqdm==4.67.3",
|
||||
"scikit-learn==1.7.2",
|
||||
"matplotlib==3.10.8"
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = "src"
|
||||
testpaths = ["tests"]
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
|
||||
doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]
|
||||
|
||||
@@ -92,18 +92,24 @@ class Cleaning:
|
||||
self._vins = self._vins.join(appellation_dummies)
|
||||
return self
|
||||
|
||||
def drop_empty_price(self) -> "Cleaning":
|
||||
self._vins = self._vins.dropna(subset=["Prix"])
|
||||
return self
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(argv) != 2:
|
||||
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
|
||||
|
||||
filename = argv[1]
|
||||
cleaning: Cleaning = Cleaning(filename)
|
||||
cleaning.drop_empty_appellation() \
|
||||
.fill_missing_scores() \
|
||||
.encode_appellation() \
|
||||
.getVins() \
|
||||
.to_csv("clean.csv", index=False)
|
||||
cleaning: Cleaning = (
|
||||
Cleaning(filename)
|
||||
.drop_empty_appellation()
|
||||
.fill_missing_scores()
|
||||
.encode_appellation()
|
||||
.drop_empty_price()
|
||||
)
|
||||
cleaning.getVins().to_csv("clean.csv", index=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
93
src/learning.py
Executable file
93
src/learning.py
Executable file
@@ -0,0 +1,93 @@
|
||||
|
||||
|
||||
from typing import Any, Callable
|
||||
from pandas import DataFrame
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import make_pipeline
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from cleaning import Cleaning
|
||||
|
||||
|
||||
class Learning:
|
||||
def __init__(self, vins: DataFrame, target: str) -> None:
|
||||
self.X = vins.drop(target, axis=1)
|
||||
self.y = vins[target]
|
||||
|
||||
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
|
||||
self.X, self.y, test_size=0.25, random_state=49
|
||||
)
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
estimator,
|
||||
pretreatment=None,
|
||||
fn_score=lambda m, xt, yt: m.score(xt, yt),
|
||||
):
|
||||
|
||||
pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
|
||||
pipeline.fit(self.X_train, self.y_train)
|
||||
score = fn_score(pipeline, self.X_test, self.y_test)
|
||||
prediction = pipeline.predict(self.X_test)
|
||||
|
||||
return score, prediction
|
||||
|
||||
def draw(self, predictions, y_actual):
|
||||
plt.figure(figsize=(8, 6))
|
||||
|
||||
plt.scatter(
|
||||
predictions,
|
||||
y_actual,
|
||||
alpha=0.5,
|
||||
c="royalblue",
|
||||
edgecolors="k",
|
||||
label="Vins",
|
||||
)
|
||||
|
||||
mn = min(predictions.min(), y_actual.min())
|
||||
mx = max(predictions.max(), y_actual.max())
|
||||
plt.plot(
|
||||
[mn, mx],
|
||||
[mn, mx],
|
||||
color="red",
|
||||
linestyle="--",
|
||||
lw=2,
|
||||
label="Prédiction Parfaite",
|
||||
)
|
||||
|
||||
plt.xlabel("Prix estimés (estim_LR)")
|
||||
plt.ylabel("Prix réels (y_test)")
|
||||
plt.title("titre")
|
||||
plt.legend()
|
||||
plt.grid(True, linestyle=":", alpha=0.6)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
df_vins = (
|
||||
Cleaning("data.csv")
|
||||
.drop_empty_appellation()
|
||||
.fill_missing_scores()
|
||||
.encode_appellation()
|
||||
.drop_empty_price()
|
||||
.getVins()
|
||||
)
|
||||
|
||||
etude = Learning(df_vins, target="Prix")
|
||||
|
||||
print("--- Question 16 & 17 ---")
|
||||
score_simple, estim_simple = etude.evaluate(LinearRegression())
|
||||
print(f"Score R² (LR Simple) : {score_simple:.4f}")
|
||||
|
||||
etude.draw(estim_simple, etude.y_test)
|
||||
|
||||
|
||||
print("\n--- Question 18 ---")
|
||||
score_std, estim_std = etude.evaluate(
|
||||
estimator=LinearRegression(), pretreatment=StandardScaler()
|
||||
)
|
||||
print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
|
||||
|
||||
etude.draw(estim_std, etude.y_test)
|
||||
@@ -377,9 +377,6 @@ class Scraper:
|
||||
try:
|
||||
data: dict[str, object] = self.getjsondata(subdir).getdata()
|
||||
|
||||
for element in ["initialReduxState", "categ", "content"]:
|
||||
data = cast(dict[str, object], data.get(element))
|
||||
|
||||
products: list[dict[str, Any]] = cast(
|
||||
list[dict[str, Any]], data.get("products")
|
||||
)
|
||||
|
||||
@@ -185,17 +185,11 @@ def mock_site():
|
||||
{dumps({
|
||||
"props": {
|
||||
"pageProps": {
|
||||
"initialReduxState": {
|
||||
"categ": {
|
||||
"content": {
|
||||
"products": [
|
||||
{"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
|
||||
{"seoKeyword": "/poubelle",},
|
||||
{"seoKeyword": "/",}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
"products": [
|
||||
{"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
|
||||
{"seoKeyword": "/poubelle",},
|
||||
{"seoKeyword": "/",}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -213,14 +207,8 @@ def mock_site():
|
||||
{dumps({
|
||||
"props": {
|
||||
"pageProps": {
|
||||
"initialReduxState": {
|
||||
"categ": {
|
||||
"content": {
|
||||
"products": [
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
"products": [
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user