-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
664 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
/config.local | ||
/tmp | ||
/cache |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
S3_API_HOST= | ||
S3_BUCKET_NAME= | ||
S3_ACCESS_KEY= | ||
S3_SECRET_KEY= | ||
|
||
DVC_USE_SSL=True | ||
DVC_REMOTE_URL= | ||
DVC_REMOTE_NAME=minio | ||
|
||
|
||
MLFLOW_POSTGRES_DB=postgres_mlflow | ||
MLFLOW_POSTGRES_USER=mlflow | ||
MLFLOW_POSTGRES_PASSWORD=mlflow_password | ||
MLFLOW_POSTGRES_PORT=5432 | ||
MLFLOW_POSTGRES_HOST=mlflow-postgres | ||
|
||
BACKEND_STORE_URI=postgresql://${MLFLOW_POSTGRES_USER}:${MLFLOW_POSTGRES_PASSWORD}@localhost:${MLFLOW_POSTGRES_PORT}/${MLFLOW_POSTGRES_DB} | ||
MLFLOW_TRACKING_URI=http://localhost:5000 | ||
MLFLOW_S3_ENDPOINT_URL=https:// | ||
MLFLOW_S3_REMOTE_URL= | ||
MLFLOW_PORT=5000 | ||
|
||
AWS_ACCESS_KEY_ID= | ||
AWS_SECRET_ACCESS_KEY= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control | ||
.pdm.toml | ||
.pdm-python | ||
.pdm-build/ | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# default_stages: [commit, push] | ||
# Список файлов, которые необходимо игнорировать при проверке pre-commit hooks | ||
exclude: '^(models/|data/|notebooks/|config/|mlflow/|.dvc/)' | ||
|
||
repos: | ||
- repo: https://github.com/pre-commit/pre-commit-hooks | ||
rev: v4.6.0 | ||
hooks: | ||
- id: check-yaml | ||
- id: end-of-file-fixer | ||
- id: trailing-whitespace | ||
- id: check-merge-conflict | ||
- id: check-added-large-files | ||
args: ['--maxkb=10000'] | ||
- repo: local | ||
hooks: | ||
# FLAKE 8 | ||
- id: flake8 | ||
name: flake8 | ||
entry: pflake8 --config pyproject.toml . | ||
language: system | ||
types: [ python ] | ||
# RUFF | ||
- id: ruff | ||
name: ruff | ||
entry: poetry run ruff check | ||
language: system | ||
types: [ python ] | ||
# MYPY | ||
# - id: mypy | ||
# name: mypy | ||
# entry: poetry run mypy | ||
# require_serial: true | ||
# language: system | ||
# # args: [--strict] | ||
|
||
# PYLINT | ||
- id: pylint | ||
name: pylint | ||
entry: poetry run pylint | ||
language: system | ||
types: [python] | ||
args: | ||
[ | ||
"-rn", # Only display messages | ||
"-sn", # Don't display the score | ||
"--rcfile=pyproject.toml" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,102 @@ | ||
# ML-DL-Repository-Template | ||
Шаблон репозитория для классического ML/DL проекта | ||
|
||
**Задача:** Тут можно кратко описать задачу... | ||
|
||
## Чтобы запустить проект локально: | ||
1. Установите python 3.11 и выше | ||
2. Склонировать репозиторий: ``git clone ...`` | ||
3. Создать виртуальное окружение: ``python -m venv venv`` | ||
4. Активироавть виртуальное окружение: ``venv\Scripts\activate`` (для windows) | ||
5. Заполните ``.env`` файл: | ||
6. Соберите проект: ``python setup.py`` | ||
|
||
``` | ||
S3_API_HOST= | ||
S3_BUCKET_NAME= | ||
S3_ACCESS_KEY= | ||
S3_SECRET_KEY= | ||
DVC_USE_SSL=True | ||
DVC_REMOTE_URL=s3://ml-team-spb/businessguarantees | ||
DVC_REMOTE_NAME=minio | ||
MLFLOW_POSTGRES_DB=postgres_mlflow | ||
MLFLOW_POSTGRES_USER=mlflow | ||
MLFLOW_POSTGRES_PASSWORD=mlflow_password | ||
MLFLOW_POSTGRES_PORT=5432 | ||
MLFLOW_POSTGRES_HOST=mlflow-postgres | ||
# MLFLOW S3 | ||
BACKEND_STORE_URI=postgresql://${MLFLOW_POSTGRES_USER}:${MLFLOW_POSTGRES_PASSWORD}@localhost:${MLFLOW_POSTGRES_PORT}/${MLFLOW_POSTGRES_DB} | ||
MLFLOW_TRACKING_URI=http://localhost:5000 | ||
MLFLOW_S3_ENDPOINT_URL=S3_API_HOST | ||
MLFLOW_S3_REMOTE_URL= | ||
AWS_ACCESS_KEY_ID= | ||
AWS_SECRET_ACCESS_KEY= | ||
MLFLOW_PORT=5000 | ||
``` | ||
|
||
## Чтобы запустить mlflow: | ||
1. Получить доступ к докеру и настроить его | ||
2. Заполнить env файл | ||
3. Запустить mlflow (желательно в отдельной консоли): ``python mlflow/mlflow_start.py`` | ||
|
||
## Принцип ведения git: | ||
|
||
Во время разработки следует следует создавать новые ветки из ``main``. | ||
|
||
1. Чтобы добавить новый функционал, создаем ветку ``название_нового_функционала`` от ``main``. | ||
|
||
(Хочу добавить поддержку линейной регрессии -> название ветки: ``add: linear_reg_support``) | ||
|
||
2. Чтобы пофиксить баг создаем ветку ``fix: описание_бага`` от ``main`` | ||
|
||
(Хочу добавить пофиксить баг подключение к БД -> название ветки: ``fix: bd_connection``) | ||
|
||
3. Чтобы что-то инициализировать ``init: описание`` | ||
|
||
## Полезные git команды: | ||
1. ``git checkout -b branch_name`` - создать ветку и переключиться на нее | ||
2. ``git branch`` - отобразить все существующие ветки | ||
3. ``git push -u origin new branch`` - отправить новую ветку в удаленный репозиторий | ||
4. ``git reset -soft HEAD~1`` - удалить последний 1 коммит, но сохранить изменения | ||
5. ``git branch -d <branch-name>`` - удалить ветку из локального репозиторию | ||
6. ``git checkout current_branch`` ``->`` ``git merge target_branch`` – подтянуть изменения из ``target_branch`` ветки в ``current_branch`` | ||
7. ``git checkout --track -b local_branch_name origin/remote_branch_name`` – Чтобы склонировать конкретную существующую в удаленном репозитории ветку, нужно ввести команду | ||
8. ``git rm --cached filename`` (если директория, то ``-r filename``) – удалить что-то из всевидящего GIT - ока: | ||
|
||
## Полезные команды Poetry: | ||
1. ``poetry update`` - обновить зависимость по pyproject.toml файлу | ||
2. ``poetry install`` - установить все зависимости по pyproject.toml и poetry.lock файлам | ||
3. ``poetry add --group group_name lib_name`` - добавить библиотеку в зависимости (``--group group_name`` - не обязательно) | ||
4. ``poetry remove lib_name`` - удалить библиотеку из зависимостей | ||
5. ``poetry run pre-commit run --all-files`` - запустить pre-commit хуки | ||
|
||
## Полезные команды DVC: | ||
1. ``dvc init`` - инициализировать все dvc файлы | ||
2. ``dvc add path/to/...`` - добавить в dvc папку/файл | ||
3. ``dvc commit`` - Фиксирует изменения | ||
4. ``dvc push `` - загрузить измененные данные в s3 | ||
5. ``dvc pull`` - выгрузить данные из s3 | ||
6. ``dvc remote list`` - показать список доступных хранилищ | ||
7. ``dvc diff`` - показать разницу между предыдущими версиями | ||
8. ``dvc checkout`` - восстановить данные из предыдущего коммита | ||
|
||
## Полезные команды Docker: | ||
1. ``docker-compose down -v`` - (удалить контейнеры, включая volumes) | ||
2. ``docker-compose down`` - (удалить контейнеры) | ||
3. ``docker-compose build --no-cache`` - пересобрать, не используя кэш | ||
4. ``docker-compose build`` - пересобрать | ||
5. ``docker-compose up -d`` - запустить в detouch моде (логи не отображаются в консоли) | ||
6. ``docker-compose up -d --build`` - запустить в detouch моде и пересобрать | ||
7. ``docker ps`` - вывести список запущенных контейнеров | ||
|
||
## Полезные команды: | ||
1. ``set PYTHONPATH=%PYTHONPATH%;C:\Users\<username>\PycharmProjects\`` - перед запуском jupyter notebook. Чтобы корректно работали во вложенных папках, а не только в корне (для PyCharm) | ||
2. Установить прокси совкомбанка: | ||
- ``set http_proxy=http://proxy-server`` | ||
- ``set https_proxy=http://proxy-server`` | ||
- ``set no_proxylocalhost,127.0.0.1,192.168.*,10.60.*`` |
Empty file.
Empty file.
Empty file.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
version: '3' | ||
|
||
services: | ||
mlflow-postgres: | ||
image: postgres:13.3 | ||
ports: | ||
- ${MLFLOW_POSTGRES_PORT}:5432 | ||
env_file: | ||
- .env | ||
environment: | ||
- POSTGRES_DB=${MLFLOW_POSTGRES_DB} | ||
- POSTGRES_USER=${MLFLOW_POSTGRES_USER} | ||
- POSTGRES_PASSWORD=${MLFLOW_POSTGRES_PASSWORD} | ||
- POSTGRES_HOST=${MLFLOW_POSTGRES_HOST} | ||
networks: | ||
- ml_network | ||
healthcheck: | ||
test: ["CMD-SHELL", "pg_isready -U $${MLFLOW_POSTGRES_USER} -d $${MLFLOW_POSTGRES_DB}"] | ||
interval: 60s | ||
timeout: 10s | ||
retries: 3 | ||
restart: on-failure | ||
networks: | ||
ml_network: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
FROM python:3.11 | ||
|
||
WORKDIR /mlflow/ | ||
|
||
COPY requirements.txt . | ||
RUN pip install --no-cache-dir -r requirements.txt && \ | ||
rm requirements.txt | ||
|
||
EXPOSE 5000 | ||
|
||
CMD mlflow server --host 0.0.0.0 --port 5000 --backend-store-uri ${MLFLOW_BACKEND_STORE_URI} --default-artifact-root ${MLFLOW_DEFAULT_ARTIFACT_ROOT} |
Oops, something went wrong.