mirror of
https://github.com/guezoloic/millesima-ai-engine.git
synced 2026-03-31 03:01:36 +00:00
Compare commits
20 Commits
f5d5703e49
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d74d15d7b1 | ||
|
|
f29130f4bb | ||
|
|
615418f347 | ||
| 83c53dd6b0 | |||
| fc3f516cdd | |||
| 235275cfab | |||
| faca333cbf | |||
| f4dd93e4b0 | |||
| 7a4e49684f | |||
| f223acdfe6 | |||
| 7cd24bf6cb | |||
|
|
a75769eb3b | ||
| de513fca15 | |||
|
|
f87ea357f4 | ||
| 68dffa6486 | |||
| c7d2077b23 | |||
| 106877a073 | |||
|
|
416cfcbf8b | ||
| 32c5310e37 | |||
| 9dfc7457a0 |
18
.github/dependabot.yml
vendored
Normal file
18
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# To get started with Dependabot version updates, you'll need to specify which
|
||||||
|
# package ecosystems to update and where the package manifests are located.
|
||||||
|
# Please see the documentation for all configuration options:
|
||||||
|
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||||
|
|
||||||
|
version: 2
|
||||||
|
updates:
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
day: "saturday"
|
||||||
|
open-pull-requests-limit: 5
|
||||||
|
groups:
|
||||||
|
python-dependencies:
|
||||||
|
patterns:
|
||||||
|
- "*"
|
||||||
|
|
||||||
6
.github/workflows/python-app.yml
vendored
6
.github/workflows/python-app.yml
vendored
@@ -19,15 +19,15 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Python 3.10
|
- name: Set up Python 3.x
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.x"
|
||||||
|
|
||||||
- name: install dependencies
|
- name: install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install ".[test,doc]"
|
pip install ".[test]"
|
||||||
|
|
||||||
- name: Lint with flake8
|
- name: Lint with flake8
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
5
.github/workflows/static.yml
vendored
5
.github/workflows/static.yml
vendored
@@ -32,15 +32,14 @@ jobs:
|
|||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Python 3.10
|
- name: Set up Python 3.x
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.x'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
# Installe le projet en mode éditable avec les extras de doc
|
|
||||||
pip install -e ".[doc]"
|
pip install -e ".[doc]"
|
||||||
|
|
||||||
- name: Setup Pages
|
- name: Setup Pages
|
||||||
|
|||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 Loïc GUEZO and chahrazad DAHMANI
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
> A **University of Paris-Est Créteil (UPEC)** Semester 6 project.
|
> A **University of Paris-Est Créteil (UPEC)** Semester 6 project.
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
- 🇫🇷 [Version Française](https://guezoloic.github.io/millesima-ai-engine)
|
- 🇫🇷 [Version Française](https://millesima-ai.github.guezoloic.com)
|
||||||
> note: only french version enabled for now.
|
> note: only french version enabled for now.
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -12,7 +12,7 @@
|
|||||||
|
|
||||||
1. **Clone the repository:**
|
1. **Clone the repository:**
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/votre-pseudo/millesima-ai-engine.git
|
git clone https://github.com/guezoloic/millesima-ai-engine.git
|
||||||
cd millesima-ai-engine
|
cd millesima-ai-engine
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -5,12 +5,12 @@ L’objectif de ce projet est d’étudier, en utilisant des méthodes d’appre
|
|||||||
## projet
|
## projet
|
||||||
<div style="text-align: center;">
|
<div style="text-align: center;">
|
||||||
<object
|
<object
|
||||||
data="/millesima-ai-engine/projet.pdf"
|
data="/projet.pdf"
|
||||||
type="application/pdf"
|
type="application/pdf"
|
||||||
width="100%"
|
width="100%"
|
||||||
height="1000px"
|
height="1000px"
|
||||||
>
|
>
|
||||||
<p>Votre navigateur ne peut pas afficher ce PDF.
|
<p>Votre navigateur ne peut pas afficher ce PDF.
|
||||||
<a href="/millesima-ai-engine/projet.pdf">Cliquez ici pour le télécharger.</a></p>
|
<a href="/projet.pdf">Cliquez ici pour le télécharger.</a></p>
|
||||||
</object>
|
</object>
|
||||||
</div>
|
</div>
|
||||||
1287
docs/learning.ipynb
Normal file
1287
docs/learning.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
|||||||
site_name: "Projet Millesima S6"
|
site_name: "Projet Millesima S6"
|
||||||
site_url: "https://github.guezoloic.com/millesima-ai-engine/"
|
site_url: "https://millesima-ai.github.guezoloic.com"
|
||||||
|
|
||||||
theme:
|
theme:
|
||||||
name: "material"
|
name: "material"
|
||||||
@@ -7,6 +7,7 @@ theme:
|
|||||||
plugins:
|
plugins:
|
||||||
- search
|
- search
|
||||||
- mkdocstrings
|
- mkdocstrings
|
||||||
|
- mkdocs-jupyter
|
||||||
|
|
||||||
extra:
|
extra:
|
||||||
generator: false
|
generator: false
|
||||||
|
|||||||
@@ -1,16 +1,28 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "projet-millesima-s6"
|
name = "millesima-project-s6"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"requests==2.32.5",
|
"requests==2.33.0",
|
||||||
"beautifulsoup4==4.14.3",
|
"beautifulsoup4==4.14.3",
|
||||||
"pandas==2.3.3",
|
"pandas==3.0.1",
|
||||||
"tqdm==4.67.3",
|
"tqdm==4.67.3",
|
||||||
|
"scikit-learn==1.8.0",
|
||||||
|
"matplotlib==3.10.8",
|
||||||
|
"seaborn==0.13.2"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
pythonpath = "src"
|
||||||
|
testpaths = ["tests"]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
test = ["pytest==8.4.2", "requests-mock==1.12.1", "flake8==7.3.0"]
|
test = ["pytest==9.0.2", "requests-mock==1.12.1", "flake8==7.3.0"]
|
||||||
doc = ["mkdocs<2.0.0", "mkdocs-material==9.6.23", "mkdocstrings[python]"]
|
doc = [
|
||||||
|
"mkdocs<2.0.0",
|
||||||
|
"mkdocs-material==9.7.6",
|
||||||
|
"mkdocstrings[python]",
|
||||||
|
"mkdocs-jupyter==0.26.1",
|
||||||
|
]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools", "wheel"]
|
requires = ["setuptools", "wheel"]
|
||||||
|
|||||||
@@ -92,18 +92,25 @@ class Cleaning:
|
|||||||
self._vins = self._vins.join(appellation_dummies)
|
self._vins = self._vins.join(appellation_dummies)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def drop_empty_price(self) -> "Cleaning":
|
||||||
|
self._vins = self._vins.dropna(subset=["Prix"])
|
||||||
|
return self
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
|
def main(filename: str | None = None) -> None:
|
||||||
|
if not filename:
|
||||||
if len(argv) != 2:
|
if len(argv) != 2:
|
||||||
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
|
raise ValueError(f"Usage: {argv[0]} <filename.csv>")
|
||||||
|
|
||||||
filename = argv[1]
|
filename = argv[1]
|
||||||
cleaning: Cleaning = Cleaning(filename)
|
|
||||||
cleaning.drop_empty_appellation() \
|
cleaning: Cleaning = (
|
||||||
.fill_missing_scores() \
|
Cleaning(filename)
|
||||||
.encode_appellation() \
|
.drop_empty_appellation()
|
||||||
.getVins() \
|
.fill_missing_scores()
|
||||||
.to_csv("clean.csv", index=False)
|
.encode_appellation()
|
||||||
|
.drop_empty_price()
|
||||||
|
)
|
||||||
|
cleaning.getVins().to_csv("clean.csv", index=False)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
64
src/learning.py
Executable file
64
src/learning.py
Executable file
@@ -0,0 +1,64 @@
|
|||||||
|
# from typing import Any, Callable
|
||||||
|
# from pandas import DataFrame
|
||||||
|
# from sklearn.linear_model import LinearRegression
|
||||||
|
# from sklearn.preprocessing import StandardScaler
|
||||||
|
# from sklearn.model_selection import train_test_split
|
||||||
|
# from sklearn.pipeline import make_pipeline
|
||||||
|
# import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# from cleaning import Cleaning
|
||||||
|
|
||||||
|
|
||||||
|
# class Learning:
|
||||||
|
# def __init__(self, vins: DataFrame, target: str) -> None:
|
||||||
|
# self.X = vins.drop(target, axis=1)
|
||||||
|
# self.y = vins[target]
|
||||||
|
|
||||||
|
# self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
|
||||||
|
# self.X, self.y, test_size=0.25, random_state=49
|
||||||
|
# )
|
||||||
|
|
||||||
|
# def evaluate(
|
||||||
|
# self,
|
||||||
|
# estimator,
|
||||||
|
# pretreatment=None,
|
||||||
|
# fn_score=lambda m, xt, yt: m.score(xt, yt),
|
||||||
|
# ):
|
||||||
|
|
||||||
|
# pipeline = make_pipeline(pretreatment, estimator) if pretreatment else estimator
|
||||||
|
# pipeline.fit(self.X_train, self.y_train)
|
||||||
|
# score = fn_score(pipeline, self.X_test, self.y_test)
|
||||||
|
# prediction = pipeline.predict(self.X_test)
|
||||||
|
|
||||||
|
# return score, prediction
|
||||||
|
|
||||||
|
# def draw(self, predictions, y_actual):
|
||||||
|
# plt.figure(figsize=(8, 6))
|
||||||
|
|
||||||
|
# plt.scatter(
|
||||||
|
# predictions,
|
||||||
|
# y_actual,
|
||||||
|
# alpha=0.5,
|
||||||
|
# c="royalblue",
|
||||||
|
# edgecolors="k",
|
||||||
|
# label="Vins",
|
||||||
|
# )
|
||||||
|
|
||||||
|
# mn = min(predictions.min(), y_actual.min())
|
||||||
|
# mx = max(predictions.max(), y_actual.max())
|
||||||
|
# plt.plot(
|
||||||
|
# [mn, mx],
|
||||||
|
# [mn, mx],
|
||||||
|
# color="red",
|
||||||
|
# linestyle="--",
|
||||||
|
# lw=2,
|
||||||
|
# label="Prédiction Parfaite",
|
||||||
|
# )
|
||||||
|
|
||||||
|
# plt.xlabel("Prix estimés (estim_LR)")
|
||||||
|
# plt.ylabel("Prix réels (y_test)")
|
||||||
|
# plt.title("titre")
|
||||||
|
# plt.legend()
|
||||||
|
# plt.grid(True, linestyle=":", alpha=0.6)
|
||||||
|
|
||||||
|
# plt.show()
|
||||||
@@ -377,18 +377,10 @@ class Scraper:
|
|||||||
try:
|
try:
|
||||||
data: dict[str, object] = self.getjsondata(subdir).getdata()
|
data: dict[str, object] = self.getjsondata(subdir).getdata()
|
||||||
|
|
||||||
# Changement dans la maniere du site stocke ses données.
|
|
||||||
#
|
|
||||||
# for element in ["initialReduxState", "categ", "content"]:
|
|
||||||
# data = cast(dict[str, object], data.get(element))
|
|
||||||
# print(data)
|
|
||||||
|
|
||||||
products: list[dict[str, Any]] = cast(
|
products: list[dict[str, Any]] = cast(
|
||||||
list[dict[str, Any]], data.get("products")
|
list[dict[str, Any]], data.get("products")
|
||||||
)
|
)
|
||||||
|
|
||||||
print(products)
|
|
||||||
|
|
||||||
return products
|
return products
|
||||||
|
|
||||||
except (JSONDecodeError, HTTPError):
|
except (JSONDecodeError, HTTPError):
|
||||||
@@ -498,9 +490,10 @@ class Scraper:
|
|||||||
savestate((page, cache))
|
savestate((page, cache))
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main(filename: str | None = None, suburl: str | None = None) -> None:
|
||||||
|
if filename is None or suburl is None:
|
||||||
if len(argv) != 3:
|
if len(argv) != 3:
|
||||||
raise ValueError(f"{argv[0]} <filename> <sous-url>")
|
raise ValueError(f"Usage: python {argv[0]} <filename> <sous-url>")
|
||||||
filename = argv[1]
|
filename = argv[1]
|
||||||
suburl = argv[2]
|
suburl = argv[2]
|
||||||
|
|
||||||
|
|||||||
@@ -185,9 +185,6 @@ def mock_site():
|
|||||||
{dumps({
|
{dumps({
|
||||||
"props": {
|
"props": {
|
||||||
"pageProps": {
|
"pageProps": {
|
||||||
"initialReduxState": {
|
|
||||||
"categ": {
|
|
||||||
"content": {
|
|
||||||
"products": [
|
"products": [
|
||||||
{"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
|
{"seoKeyword": "/nino-negri-5-stelle-sfursat-2022.html",},
|
||||||
{"seoKeyword": "/poubelle",},
|
{"seoKeyword": "/poubelle",},
|
||||||
@@ -196,9 +193,6 @@ def mock_site():
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)}
|
)}
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
@@ -213,17 +207,11 @@ def mock_site():
|
|||||||
{dumps({
|
{dumps({
|
||||||
"props": {
|
"props": {
|
||||||
"pageProps": {
|
"pageProps": {
|
||||||
"initialReduxState": {
|
|
||||||
"categ": {
|
|
||||||
"content": {
|
|
||||||
"products": [
|
"products": [
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)}
|
)}
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
Reference in New Issue
Block a user