Refactor project docs

brnaguiar · Sep 20, 2023 · 5add70b · 5add70b
1 parent ac88eea
commit 5add70b
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 334 deletions.
diff --git a/.env b/.env
@@ -0,0 +1,48 @@
+MLFLOW_IP="mlflow"
+MINIO_IP="minio"
+POSTGRES_IP="postgres"
+FASTAPI_IP="fastapi"
+STREAMLIT_IP="streamlit"
+
+MINIO_PORT_SERVER=9000
+MINIO_PORT_CONSOLE=9001
+POSTGRES_PORT=5432
+PGPORT=5432
+MLFLOW_PORT=5000
+PUSHGATEWAY_PORT=9091
+FASTAPI_PORT=8000
+PROMETHEUS_PORT=9090
+GRAFANA_PORT=3000
+STREAMLIT_PORT=8501
+
+
+MINIO_ACCESS_KEY="teste"
+MINIO_SECRET_ACCESS_KEY="teste123"
+AWS_ACCESS_KEY_ID=${MINIO_ACCESS_KEY}
+AWS_SECRET_ACCESS_KEY=${MINIO_SECRET_ACCESS_KEY}
+FSSPEC_S3_ENDPOINT_URL="http://${MINIO_IP}:${MINIO_PORT_SERVER}"
+FSSPEC_S3_KEY=${MINIO_ACCESS_KEY}
+FSSPEC_S3_SECRET=${MINIO_SECRET_ACCESS_KEY}
+
+POSTGRES_USER="admin"
+POSTGRES_PASSWORD="admin"
+POSTGRES_MLFLOW_DATABASE="mlflow_db"
+POSTGRES_AIRFLOW_DATABASE="airflow"
+POSTGRES_APP_DATABASE="app"
+POSTGRES_RECOMMENDATIONS_TABLE="recommendations"
+POSTGRES_USERS_TABLE="users"
+PGUSER="admin"
+PGPASSWORD="admin"
+
+MLFLOW_BUCKET_NAME="mlflow"
+DATA_BUCKET_NAME="data" 
+
+MLFLOW_TRACKING_URI="http://${MLFLOW_IP}:${MLFLOW_PORT}"
+MLFLOW_S3_ENDPOINT_URL="http://${MINIO_IP}:${MINIO_PORT_SERVER}"
+MLFLOW_S3_IGNORE_TLS="true"
+MLFLOW_S3_BUCKET=${MLFLOW_BUCKET_NAME}
+
+AIRFLOW_PROJ_DIR="./airflow"
+AIRFLOW_WWW_USER_USERNAME="airflow"
+AIRFLOW_WWW_USER_PASSWORD="airflow"
+AIRFLOW_UID=1000
diff --git a/.gitignore b/.gitignore
@@ -50,7 +50,7 @@ coverage.xml
 target/
 
 # DotEnv configuration
-.env
+# .env
 
 # Database
 *.db

diff --git a/README.md b/README.md
@@ -75,6 +75,7 @@ make users
 ## Architecture
 <img src="./images/project_diagram.jpg"/>
 
+
 ## Service Endpoints Showcase
 
 ### Streamlit Frontend App
@@ -106,58 +107,69 @@ make run
 -->
 
 <!-- PROJECT LOGO -->
-<!--
 Project Organization
 ------------
 
     ├── LICENSE
-    ├── Makefile           <- Makefile with commands like `make data` or `make train`
-    ├── README.md          <- The top-level README for developers using this project.
+    │
+    ├── Makefile             <- Makefile with commands like `make env` or `make run`
+    │
+    ├── README.md            <- The top-level README for developers using this project
+    │
     ├── data
-    │   ├── external       <- Data from third party sources.
-    │   ├── interim        <- Intermediate data that has been transformed.
-    │   ├── processed      <- The final, canonical data sets for modeling.
-    │   └── raw            <- The original, immutable data dump.
+    │   ├── 01-external      <- Data from third party sources
+    │   ├── 01-raw           <- Data in a raw format
+    │   ├── 02-processed     <- The pre-processed data for modeling
+    │   └── 03-raw           <- Splitted Pre-Processed data for model training
+    ├── airflow
+    │   ├── dags             <- Airflow Dags
+    │   ├── logs             <- Airflow logging
+    │   ├── plugins          <- Airflow default directory for Plugins like Custom Operators, Sensors, etc... (however, we use the dir `include` in dags for this purpose)
+    │   └── config           <- Airflow Configurations and Settings
+    │
+    ├── assets               <- Project assets like jar files used in Spark Sessions
     │
-    ├── docs               <- A default Sphinx project; see sphinx-doc.org for details
+    ├── models               <- Trained and serialized models, model predictions, or model summaries
     │
-    ├── models             <- Trained and serialized models, model predictions, or model summaries
+    ├── notebooks            <- Jupyter notebooks used in experimentation 
     │
-    ├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering),
-    │                         the creator's initials, and a short `-` delimited description, e.g.
-    │                         `1.0-jqp-initial-data-exploration`.
+    ├── docker               <- Docker data and configurations
     │
-    ├── references         <- Data dictionaries, manuals, and all other explanatory materials.
+    ├── images               <- Project images
     │
-    ├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
-    │   └── figures        <- Generated graphics and figures to be used in reporting
+    ├── requirements.local   <- Required Site-Packages 
+    │                         
+    ├── requirements.minimal <- Required Dist-Packages 
+    │                         
+    ├── Makefile             <- File containing rules and dependencies to automate building processes
     │
-    ├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
-    │                         generated with `pip freeze > requirements.txt`
+    ├── setup.py             <- makes project pip installable (pip install -e .) so src can be imported 
     │
-    ├── setup.py           <- makes project pip installable (pip install -e .) so src can be imported
-    ├── src                <- Source code for use in this project.
-    │   ├── __init__.py    <- Makes src a Python module
+    ├── src                  <- Source code for use in this project.
     │   │
-    │   ├── data           <- Scripts to download or generate data
-    │   │   └── make_dataset.py
+    │   ├── collaborative    <- Source code for the collaborative recommendation strategy
+    │   │   └── models       <- Collaborative models
+    │   │   └── nodes        <- Data processing, validation, training, etc. functions (or nodes) that represent units of work.
+    │   │   └── pipelines    <- Collection of orquestrated data processing, validation, training, etc. nodes, arranged in a sequence or a directed acyclic graph (DAG)
     │   │
-    │   ├── features       <- Scripts to turn raw data into features for modeling
-    │   │   └── build_features.py
+    │   ├── conf           <- Configuration files and parameters for the projects
     │   │
-    │   ├── models         <- Scripts to train models and then use trained models to make
-    │   │   │                 predictions
+    │   ├── main.py        <- Main script, mostly to run pipelines
+    │   │
+    │   ├── scripts        <- Scripts, for instance, to create credentials files and populate databases
     │   │   ├── predict_model.py
     │   │   └── train_model.py
     │   │
-    │   └── visualization  <- Scripts to create exploratory and results oriented visualizations
-    │       └── visualize.py
+    │   └── frontend       <- Source code for the Application Interface
+    │   │
+    │   └── utils          <- Project utils like Handlers and Controllers
     │
-    └── tox.ini            <- tox file with settings for running tox; see tox.readthedocs.io
+    └── tox.ini            <- Settings for flake8
+    │
+    └── pyproject.toml     <- Settings for the project, and tools like isort, black, pytest, etc.
 
 
 --------
--->
 
 
 <p><small>Project based on the <a target="_blank" href="https://drivendata.github.io/cookiecutter-data-science/">cookiecutter data science project template</a>.
-Original file line number
+Diff line change
@@ Expand Up / @@ -50,7 +50,7 @@ coverage.xml @@
     target/
     # DotEnv configuration
-    .env
+    # .env
     # Database
     *.db
@@ Expand Down @@