7 Commits

Author SHA1 Message Date
68dffa6486 feat(learning.ipynb): ajout meilleur algo 2026-03-28 20:04:14 +01:00
c7d2077b23 feat: ajout premier modele (1ere partie) 2026-03-28 19:58:09 +01:00
106877a073 feat: init Learning class and add drop_empty_price function 2026-03-28 15:51:46 +01:00
Loïc GUEZO
416cfcbf8b Add Python package ecosystem to Dependabot config
Configure Dependabot for Python package updates.
2026-03-27 22:11:53 +01:00
32c5310e37 fix: mettre à jour les tests pytest 2026-03-27 22:06:36 +01:00
9dfc7457a0 fix(scraper.py): retirer commentaire code et print 2026-03-27 22:06:06 +01:00
f5d5703e49 fix(scraper): recherche _getproduitslist actualisé
Suite à une refont de l'UI et du backend, la structure de données JSON envoyé par la page web a été simplifié.

Ancienne structure:

- `"props"->"pageProps"->"initialReduxState"->"categ"->"content->"produits"`

Nouvelle structure:

- `"props"->"pageProps"->"produits"`
2026-03-27 21:47:06 +01:00
9 changed files with 528 additions and 34 deletions

18
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,18 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
day: "saturday"
open-pull-requests-limit: 5
groups:
python-dependencies:
patterns:
- "*"

View File

@@ -19,15 +19,15 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
- name: Set up Python 3.x
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.x"
- name: install dependencies
run: |
python -m pip install --upgrade pip
pip install ".[test,doc]"
pip install ".[test]"
- name: Lint with flake8
run: |

View File

@@ -32,15 +32,14 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python 3.10
- name: Set up Python 3.x
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
# Installe le projet en mode éditable avec les extras de doc
pip install -e ".[doc]"
- name: Setup Pages

387
learning.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@@ -6,8 +6,14 @@ dependencies = [
"beautifulsoup4==4.14.3",
"pandas==2.3.3",
"tqdm==4.67.3",
"scikit-learn==1.7.2",
"matplotlib==3.10.8"
]
[tool.pytest.ini_options]
pythonpath = "src"
testpaths = ["tests"]
[project.optional-dependencies]
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]

View File

@@ -92,18 +92,24 @@ class Cleaning:
self._vins = self._vins.join(appellation_dummies)
return self
def drop_empty_price(self) -> "Cleaning":
self._vins = self._vins.dropna(subset=["Prix"])
return self
def main() -> None:
if len(argv) != 2:
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
filename = argv[1]
cleaning: Cleaning = Cleaning(filename)
cleaning.drop_empty_appellation() \
.fill_missing_scores() \
.encode_appellation() \
.getVins() \
.to_csv("clean.csv", index=False)
cleaning: Cleaning = (
Cleaning(filename)
.drop_empty_appellation()
.fill_missing_scores()
.encode_appellation()
.drop_empty_price()
)
cleaning.getVins().to_csv("clean.csv", index=False)
if __name__ == "__main__":

93
src/learning.py Executable file
View File

@@ -0,0 +1,93 @@
from typing import Any, Callable
from pandas import DataFrame
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
from cleaning import Cleaning
class Learning:
def __init__(self, vins: DataFrame, target: str) -> None:
self.X = vins.drop(target, axis=1)
self.y = vins[target]
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
self.X, self.y, test_size=0.25, random_state=49
)
def evaluate(
self,
estimator,
pretreatment=None,
fn_score=lambda m, xt, yt: m.score(xt, yt),
):
pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
pipeline.fit(self.X_train, self.y_train)
score = fn_score(pipeline, self.X_test, self.y_test)
prediction = pipeline.predict(self.X_test)
return score, prediction
def draw(self, predictions, y_actual):
plt.figure(figsize=(8, 6))
plt.scatter(
predictions,
y_actual,
alpha=0.5,
c="royalblue",
edgecolors="k",
label="Vins",
)
mn = min(predictions.min(), y_actual.min())
mx = max(predictions.max(), y_actual.max())
plt.plot(
[mn, mx],
[mn, mx],
color="red",
linestyle="--",
lw=2,
label="Prédiction Parfaite",
)
plt.xlabel("Prix estimés (estim_LR)")
plt.ylabel("Prix réels (y_test)")
plt.title("titre")
plt.legend()
plt.grid(True, linestyle=":", alpha=0.6)
plt.show()
df_vins = (
Cleaning("data.csv")
.drop_empty_appellation()
.fill_missing_scores()
.encode_appellation()
.drop_empty_price()
.getVins()
)
etude = Learning(df_vins, target="Prix")
print("--- Question 16 & 17 ---")
score_simple, estim_simple = etude.evaluate(LinearRegression())
print(f"Score R² (LR Simple) : {score_simple:.4f}")
etude.draw(estim_simple, etude.y_test)
print("\n--- Question 18 ---")
score_std, estim_std = etude.evaluate(
estimator=LinearRegression(), pretreatment=StandardScaler()
)
print(f"Score R² (Standardisation + LR) : {score_std:.4f}")
etude.draw(estim_std, etude.y_test)

View File

@@ -377,9 +377,6 @@ class Scraper:
try:
data: dict[str, object] = self.getjsondata(subdir).getdata()
for element in ["initialReduxState", "categ", "content"]:
data = cast(dict[str, object], data.get(element))
products: list[dict[str, Any]] = cast(
list[dict[str, Any]], data.get("products")
)

View File

@@ -185,17 +185,11 @@ def mock_site():
{dumps({
"props": {
"pageProps": {
"initialReduxState": {
"categ": {
"content": {
"products": [
{"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
{"seoKeyword": "/poubelle",},
{"seoKeyword": "/",}
]
}
}
}
"products": [
{"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
{"seoKeyword": "/poubelle",},
{"seoKeyword": "/",}
]
}
}
}
@@ -213,14 +207,8 @@ def mock_site():
{dumps({
"props": {
"pageProps": {
"initialReduxState": {
"categ": {
"content": {
"products": [
]
}
}
}
"products": [
]
}
}
}