From 35686b1b96218bf5c5be760d9557cdb375a58c23 Mon Sep 17 00:00:00 2001 From: thomasthaddeus Date: Mon, 6 May 2024 09:10:35 -0700 Subject: [PATCH] update layout of project --- .vscode/settings.json | 7 +++ pyproject.toml | 58 ++++++++++++++++++- requirements2.txt | 54 +++++++++++++++++ .../.conf/plot_config.json | 0 src/{ => dataanalysistoolkit}/__init__.py | 0 .../data_analysis_toolkit.py | 4 +- .../data_sources/__init__.py | 0 .../data_sources/api_connector.py | 0 .../data_sources/excel_connector.py | 0 .../data_sources/sql_connector.py | 0 .../formatters/__init__.py | 0 .../formatters/data_formatter.py | 0 .../generators/__init__.py | 0 .../generators/csv_data_generator.py | 0 .../generators/generate_data.py | 0 .../generators/report_generator.py | 0 .../integrators/__init__.py | 0 .../integrators/data_integrator.py | 0 .../model/__init__.py | 0 .../model/feature_engineer.py | 0 .../model/model_evaluator.py | 0 .../preprocessor/__init__.py | 0 .../preprocessor/data_prep.py | 0 .../utils/__init__.py | 0 .../utils/data_imputer.py | 0 .../visualizer/__init__.py | 0 .../visualizer/data_visualizer.py | 0 tests/__init__.py | 7 +++ {data => tests/data}/gen_test.csv | 0 {data => tests/data}/test_random.csv | 0 tests/data_sources/test_api_connector.py | 2 +- tests/data_sources/test_excel_connector.py | 2 +- tests/test_data_analysis_toolkit.py | 4 +- 33 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 requirements2.txt rename src/{ => dataanalysistoolkit}/.conf/plot_config.json (100%) rename src/{ => dataanalysistoolkit}/__init__.py (100%) rename src/{ => dataanalysistoolkit}/data_analysis_toolkit.py (97%) rename src/{ => dataanalysistoolkit}/data_sources/__init__.py (100%) rename src/{ => dataanalysistoolkit}/data_sources/api_connector.py (100%) rename src/{ => dataanalysistoolkit}/data_sources/excel_connector.py (100%) rename src/{ => dataanalysistoolkit}/data_sources/sql_connector.py (100%) rename src/{ => dataanalysistoolkit}/formatters/__init__.py (100%) rename src/{ => dataanalysistoolkit}/formatters/data_formatter.py (100%) rename src/{ => dataanalysistoolkit}/generators/__init__.py (100%) rename src/{ => dataanalysistoolkit}/generators/csv_data_generator.py (100%) rename src/{ => dataanalysistoolkit}/generators/generate_data.py (100%) rename src/{ => dataanalysistoolkit}/generators/report_generator.py (100%) rename src/{ => dataanalysistoolkit}/integrators/__init__.py (100%) rename src/{ => dataanalysistoolkit}/integrators/data_integrator.py (100%) rename src/{ => dataanalysistoolkit}/model/__init__.py (100%) rename src/{ => dataanalysistoolkit}/model/feature_engineer.py (100%) rename src/{ => dataanalysistoolkit}/model/model_evaluator.py (100%) rename src/{ => dataanalysistoolkit}/preprocessor/__init__.py (100%) rename src/{ => dataanalysistoolkit}/preprocessor/data_prep.py (100%) rename src/{ => dataanalysistoolkit}/utils/__init__.py (100%) rename src/{ => dataanalysistoolkit}/utils/data_imputer.py (100%) rename src/{ => dataanalysistoolkit}/visualizer/__init__.py (100%) rename src/{ => dataanalysistoolkit}/visualizer/data_visualizer.py (100%) rename {data => tests/data}/gen_test.csv (100%) rename {data => tests/data}/test_random.csv (100%) diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9b38853 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 0ed200d..ff3a8d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,10 +32,66 @@ requires-python = ">=3.8" keywords = ["data analysis", "CSV", "statistics", "data cleaning", "data visualization"] # include = ["src/**/*.py", "docs/**/*.md"] # exclude = ["tests/", "examples/"] +dependencies = [ + "backports.tarfile==1.1.1", + "build==1.2.1", + "certifi==2024.2.2", + "charset-normalizer==3.3.2", + "click==8.1.7", + "colorama==0.4.6", + "contourpy==1.2.1", + "cycler==0.12.1", + "docutils==0.21.2", + "fonttools==4.51.0", + "greenlet==3.0.3", + "idna==3.7", + "importlib_metadata==7.1.0", + "jaraco.classes==3.4.0", + "jaraco.context==5.3.0", + "jaraco.functools==4.0.1", + "joblib==1.4.2", + "keyring==25.2.0", + "kiwisolver==1.4.5", + "markdown-it-py==3.0.0", + "matplotlib==3.8.4", + "mdurl==0.1.2", + "more-itertools==10.2.0", + "nh3==0.2.17", + "nltk==3.8.1", + "numpy==1.26.4", + "packaging==24.0", + "pandas==2.2.2", + "pillow==10.3.0", + "pkginfo==1.10.0", + "Pygments==2.18.0", + "pyparsing==3.1.2", + "pyproject_hooks==1.1.0", + "python-dateutil==2.9.0.post0", + "pytz==2024.1", + "pywin32-ctypes==0.2.2", + "readme_renderer==43.0", + "regex==2024.4.28", + "requests==2.31.0", + "requests-toolbelt==1.0.0", + "rfc3986==2.0.0", + "rich==13.7.1", + "scikit-learn==1.4.2", + "scipy==1.13.0", + "seaborn==0.13.2", + "six==1.16.0", + "SQLAlchemy==2.0.30", + "threadpoolctl==3.5.0", + "tqdm==4.66.4", + "twine==5.0.0", + "typing_extensions==4.11.0", + "tzdata==2024.1", + "urllib3==2.2.1", + "zipp==3.18.1" +] [project.optional-dependencies] dev = ["pytest", "pytest-mock", "check-manifest"] -docs = ["sphinx>=2.2"] +docs = ["myst-parser", "sphinx", "nbsphinx", "sphinx-rtd-theme", "sphinxcontrib-websupport"] [tool.setuptools.dynamic] version = {attr = "dataanalysistoolkit.__version__"} diff --git a/requirements2.txt b/requirements2.txt new file mode 100644 index 0000000..6a97a44 --- /dev/null +++ b/requirements2.txt @@ -0,0 +1,54 @@ +backports.tarfile==1.1.1 +build==1.2.1 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +colorama==0.4.6 +contourpy==1.2.1 +cycler==0.12.1 +docutils==0.21.2 +fonttools==4.51.0 +greenlet==3.0.3 +idna==3.7 +importlib_metadata==7.1.0 +jaraco.classes==3.4.0 +jaraco.context==5.3.0 +jaraco.functools==4.0.1 +joblib==1.4.2 +keyring==25.2.0 +kiwisolver==1.4.5 +markdown-it-py==3.0.0 +matplotlib==3.8.4 +mdurl==0.1.2 +more-itertools==10.2.0 +nh3==0.2.17 +nltk==3.8.1 +numpy==1.26.4 +packaging==24.0 +pandas==2.2.2 +pillow==10.3.0 +pkginfo==1.10.0 +Pygments==2.18.0 +pyparsing==3.1.2 +pyproject_hooks==1.1.0 +python-dateutil==2.9.0.post0 +pytz==2024.1 +pywin32-ctypes==0.2.2 +readme_renderer==43.0 +regex==2024.4.28 +requests==2.31.0 +requests-toolbelt==1.0.0 +rfc3986==2.0.0 +rich==13.7.1 +scikit-learn==1.4.2 +scipy==1.13.0 +seaborn==0.13.2 +six==1.16.0 +SQLAlchemy==2.0.30 +threadpoolctl==3.5.0 +tqdm==4.66.4 +twine==5.0.0 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +zipp==3.18.1 diff --git a/src/.conf/plot_config.json b/src/dataanalysistoolkit/.conf/plot_config.json similarity index 100% rename from src/.conf/plot_config.json rename to src/dataanalysistoolkit/.conf/plot_config.json diff --git a/src/__init__.py b/src/dataanalysistoolkit/__init__.py similarity index 100% rename from src/__init__.py rename to src/dataanalysistoolkit/__init__.py diff --git a/src/data_analysis_toolkit.py b/src/dataanalysistoolkit/data_analysis_toolkit.py similarity index 97% rename from src/data_analysis_toolkit.py rename to src/dataanalysistoolkit/data_analysis_toolkit.py index 655e54d..db99a7a 100644 --- a/src/data_analysis_toolkit.py +++ b/src/dataanalysistoolkit/data_analysis_toolkit.py @@ -83,10 +83,12 @@ def __init__(self, filename): filename (_type_): _description_ """ self.data = self.load_data(filename) - self.visualizer = DataVisualizer(self.data) + self.visualizer = DataVisualizer(self.data, config_path='.conf/plot_config.json') self.imputer = DataImputer(self.data) self.preprocessor = DataPreprocessor(self.data) self.feature_engineer = FeatureEngineer(self.data) + # No value for argument 'X_test' in constructor callPylintE1120:no-value-for-parameter + # No value for argument 'y_test' in constructor callPylintE1120:no-value-for-parameter self.evaluator = ModelEvaluator(self.data) self.report_generator = ReportGenerator(self.data) diff --git a/src/data_sources/__init__.py b/src/dataanalysistoolkit/data_sources/__init__.py similarity index 100% rename from src/data_sources/__init__.py rename to src/dataanalysistoolkit/data_sources/__init__.py diff --git a/src/data_sources/api_connector.py b/src/dataanalysistoolkit/data_sources/api_connector.py similarity index 100% rename from src/data_sources/api_connector.py rename to src/dataanalysistoolkit/data_sources/api_connector.py diff --git a/src/data_sources/excel_connector.py b/src/dataanalysistoolkit/data_sources/excel_connector.py similarity index 100% rename from src/data_sources/excel_connector.py rename to src/dataanalysistoolkit/data_sources/excel_connector.py diff --git a/src/data_sources/sql_connector.py b/src/dataanalysistoolkit/data_sources/sql_connector.py similarity index 100% rename from src/data_sources/sql_connector.py rename to src/dataanalysistoolkit/data_sources/sql_connector.py diff --git a/src/formatters/__init__.py b/src/dataanalysistoolkit/formatters/__init__.py similarity index 100% rename from src/formatters/__init__.py rename to src/dataanalysistoolkit/formatters/__init__.py diff --git a/src/formatters/data_formatter.py b/src/dataanalysistoolkit/formatters/data_formatter.py similarity index 100% rename from src/formatters/data_formatter.py rename to src/dataanalysistoolkit/formatters/data_formatter.py diff --git a/src/generators/__init__.py b/src/dataanalysistoolkit/generators/__init__.py similarity index 100% rename from src/generators/__init__.py rename to src/dataanalysistoolkit/generators/__init__.py diff --git a/src/generators/csv_data_generator.py b/src/dataanalysistoolkit/generators/csv_data_generator.py similarity index 100% rename from src/generators/csv_data_generator.py rename to src/dataanalysistoolkit/generators/csv_data_generator.py diff --git a/src/generators/generate_data.py b/src/dataanalysistoolkit/generators/generate_data.py similarity index 100% rename from src/generators/generate_data.py rename to src/dataanalysistoolkit/generators/generate_data.py diff --git a/src/generators/report_generator.py b/src/dataanalysistoolkit/generators/report_generator.py similarity index 100% rename from src/generators/report_generator.py rename to src/dataanalysistoolkit/generators/report_generator.py diff --git a/src/integrators/__init__.py b/src/dataanalysistoolkit/integrators/__init__.py similarity index 100% rename from src/integrators/__init__.py rename to src/dataanalysistoolkit/integrators/__init__.py diff --git a/src/integrators/data_integrator.py b/src/dataanalysistoolkit/integrators/data_integrator.py similarity index 100% rename from src/integrators/data_integrator.py rename to src/dataanalysistoolkit/integrators/data_integrator.py diff --git a/src/model/__init__.py b/src/dataanalysistoolkit/model/__init__.py similarity index 100% rename from src/model/__init__.py rename to src/dataanalysistoolkit/model/__init__.py diff --git a/src/model/feature_engineer.py b/src/dataanalysistoolkit/model/feature_engineer.py similarity index 100% rename from src/model/feature_engineer.py rename to src/dataanalysistoolkit/model/feature_engineer.py diff --git a/src/model/model_evaluator.py b/src/dataanalysistoolkit/model/model_evaluator.py similarity index 100% rename from src/model/model_evaluator.py rename to src/dataanalysistoolkit/model/model_evaluator.py diff --git a/src/preprocessor/__init__.py b/src/dataanalysistoolkit/preprocessor/__init__.py similarity index 100% rename from src/preprocessor/__init__.py rename to src/dataanalysistoolkit/preprocessor/__init__.py diff --git a/src/preprocessor/data_prep.py b/src/dataanalysistoolkit/preprocessor/data_prep.py similarity index 100% rename from src/preprocessor/data_prep.py rename to src/dataanalysistoolkit/preprocessor/data_prep.py diff --git a/src/utils/__init__.py b/src/dataanalysistoolkit/utils/__init__.py similarity index 100% rename from src/utils/__init__.py rename to src/dataanalysistoolkit/utils/__init__.py diff --git a/src/utils/data_imputer.py b/src/dataanalysistoolkit/utils/data_imputer.py similarity index 100% rename from src/utils/data_imputer.py rename to src/dataanalysistoolkit/utils/data_imputer.py diff --git a/src/visualizer/__init__.py b/src/dataanalysistoolkit/visualizer/__init__.py similarity index 100% rename from src/visualizer/__init__.py rename to src/dataanalysistoolkit/visualizer/__init__.py diff --git a/src/visualizer/data_visualizer.py b/src/dataanalysistoolkit/visualizer/data_visualizer.py similarity index 100% rename from src/visualizer/data_visualizer.py rename to src/dataanalysistoolkit/visualizer/data_visualizer.py diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..dd6f088 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,7 @@ +from .data_sources import test_api_connector, test_excel_connector, test_sql_connector +from .generators import test_csv_data_generator, test_generate_data, test_report_generator +from .integrators import test_data_integrator +from .model import test_feature_engineer, test_model_evaluator +from .preprocessor import test_data_prep +from .utils import test_data_imputer +from .visualizer import test_data_visualizer diff --git a/data/gen_test.csv b/tests/data/gen_test.csv similarity index 100% rename from data/gen_test.csv rename to tests/data/gen_test.csv diff --git a/data/test_random.csv b/tests/data/test_random.csv similarity index 100% rename from data/test_random.csv rename to tests/data/test_random.csv diff --git a/tests/data_sources/test_api_connector.py b/tests/data_sources/test_api_connector.py index 6ac7638..6331527 100644 --- a/tests/data_sources/test_api_connector.py +++ b/tests/data_sources/test_api_connector.py @@ -1,7 +1,7 @@ import pytest import requests import requests_mock -from src.data_sources.api_connector import APIConnector +from dataanalysistoolkit.data_sources.api_connector import APIConnector @pytest.fixture def api_connector(): diff --git a/tests/data_sources/test_excel_connector.py b/tests/data_sources/test_excel_connector.py index 3a67664..2a04fbc 100644 --- a/tests/data_sources/test_excel_connector.py +++ b/tests/data_sources/test_excel_connector.py @@ -1,6 +1,6 @@ import pytest import pandas as pd -from src.data_sources.excel_connector import ExcelConnector +from dataanalysistoolkit.data_sources.excel_connector import ExcelConnector # Create a sample Excel file for testing @pytest.fixture(scope="module") diff --git a/tests/test_data_analysis_toolkit.py b/tests/test_data_analysis_toolkit.py index 3fc34e6..8900d64 100644 --- a/tests/test_data_analysis_toolkit.py +++ b/tests/test_data_analysis_toolkit.py @@ -23,12 +23,12 @@ import pytest import pandas as pd -from src.data_analysis_toolkit import DataAnalysisToolkit +from dataanalysistoolkit.data_analysis_toolkit import DataAnalysisToolkit @pytest.fixture def analyzer(): - return DataAnalysisToolkit("../data/test.csv") + return DataAnalysisToolkit("./data/test_random.csv") def test_calculate_budget_statistics(analyzer):