From 74e45041917d4be5d0c4be8ce9242a49ec52badb Mon Sep 17 00:00:00 2001 From: Narasimha Badrinath Date: Sat, 27 Jan 2024 07:04:50 +0000 Subject: [PATCH] Lint issues fixed --- .github/workflows/python-app.yml | 6 +-- bikeshare_model/__init__.py | 2 +- bikeshare_model/config/core.py | 5 +-- bikeshare_model/predict.py | 44 +++++++++++++++++----- bikeshare_model/processing/data_manager.py | 1 - bikeshare_model/processing/features.py | 18 ++++----- bikeshare_model/setup.py | 4 +- bikeshare_model/tests/conftest.py | 6 +-- bikeshare_model/tests/test_features.py | 4 -- bikeshare_model/tests/test_prediction.py | 27 ++++++++++--- bikeshare_model/train_pipeline.py | 2 + 11 files changed, 75 insertions(+), 44 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 8067a62..f866bff 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -31,9 +31,9 @@ jobs: - name: Code formatting with black run: | black bikeshare_model/*.py - # - name: Lint with pylint - # run: | - # pylint --disable=R,C bikeshare_model/ + - name: Lint with pylint + run: | + pylint --disable=R,C --extension-pkg-whitelist='pydantic' bikeshare_model/ - name: Train the model run: | python bikeshare_model/train_pipeline.py diff --git a/bikeshare_model/__init__.py b/bikeshare_model/__init__.py index 4c8ae31..2151e11 100644 --- a/bikeshare_model/__init__.py +++ b/bikeshare_model/__init__.py @@ -7,5 +7,5 @@ from bikeshare_model.config.core import PACKAGE_ROOT, config -with open(PACKAGE_ROOT / "VERSION") as version_file: +with open(PACKAGE_ROOT / "VERSION", encoding="utf-8") as version_file: __version__ = version_file.read().strip() diff --git a/bikeshare_model/config/core.py b/bikeshare_model/config/core.py index d31d32c..8f7945e 100644 --- a/bikeshare_model/config/core.py +++ b/bikeshare_model/config/core.py @@ -6,7 +6,6 @@ parent, root = file.parent, file.parents[1] sys.path.append(str(root)) -from pathlib import Path from typing import Dict, List from pydantic import BaseModel @@ -86,7 +85,7 @@ def find_config_file() -> Path: if CONFIG_FILE_PATH.is_file(): return CONFIG_FILE_PATH - raise Exception(f"Config not found at {CONFIG_FILE_PATH!r}") + raise KeyError(f"Config not found at {CONFIG_FILE_PATH!r}") def fetch_config_from_yaml(cfg_path: Path = None) -> YAML: @@ -96,7 +95,7 @@ def fetch_config_from_yaml(cfg_path: Path = None) -> YAML: cfg_path = find_config_file() if cfg_path: - with open(cfg_path, "r") as conf_file: + with open(cfg_path, "r", encoding="utf-8") as conf_file: parsed_config = load(conf_file.read()) return parsed_config diff --git a/bikeshare_model/predict.py b/bikeshare_model/predict.py index c2f0866..5b2bc36 100644 --- a/bikeshare_model/predict.py +++ b/bikeshare_model/predict.py @@ -1,6 +1,8 @@ import sys from pathlib import Path +from sklearn.metrics import accuracy_score + file = Path(__file__).resolve() parent, root = file.parent, file.parents[1] sys.path.append(str(root)) @@ -27,7 +29,7 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict: # validated_data = validated_data.reindex(columns = ['dteday', 'season', 'hr', 'holiday', 'weekday', 'workingday', # 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'yr', 'mnth']) validated_data = validated_data.reindex(columns=config.model_config.features) - + print(validated_data) results = {"predictions": None, "version": _version, "errors": errors} if not errors: @@ -37,25 +39,47 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict: "version": _version, "errors": errors, } - print(results) + #print(results) + + #print(type(predictions)) + #print("Accuracy Score:" + accuracy_score(np.array([139])), predictions) return results if __name__ == "__main__": + # data_in = { + # "dteday": ["2012-11-6"], + # "season": ["winter"], + # "hr": ["6pm"], + # "holiday": ["No"], + # "weekday": ["Tue"], + # "workingday": ["Yes"], + # "weathersit": ["Clear"], + # "temp": [16], + # "atemp": [17.5], + # "hum": [30], + # "windspeed": [10], + # } + data_in = { - "dteday": ["2012-11-6"], + "dteday": ["2012-11-05"], "season": ["winter"], - "hr": ["6pm"], + "hr": ["6am"], "holiday": ["No"], - "weekday": ["Tue"], + "weekday": ["Mon"], "workingday": ["Yes"], - "weathersit": ["Clear"], - "temp": [16], - "atemp": [17.5], - "hum": [30], - "windspeed": [10], + "weathersit": ["Mist"], + "temp": [6.1], + "atemp": [3.0014000000000003], + "hum": [49.0], + "windspeed": [19.0012], } make_prediction(input_data=data_in) + +# dteday, season, hr, holiday,weekday,workingday, weathersit, temp, atemp, hum, windspeed, casual, registered, cnt +# 2012-11-05, winter, 6am,No, Mon, Yes, Mist, 6.1, 3.0014000000000003, 49.0, 19.0012, 4, 135, 139 + +# 2011-07-13,fall, 4am,No, Wed, Yes, Clear, 26.78, 28.998799999999996, 57.99999999999999,16.997899999999998,0,5,5 \ No newline at end of file diff --git a/bikeshare_model/processing/data_manager.py b/bikeshare_model/processing/data_manager.py index 3eaa5aa..0d24e97 100644 --- a/bikeshare_model/processing/data_manager.py +++ b/bikeshare_model/processing/data_manager.py @@ -6,7 +6,6 @@ sys.path.append(str(root)) import typing as t -from pathlib import Path import joblib import pandas as pd diff --git a/bikeshare_model/processing/features.py b/bikeshare_model/processing/features.py index a9a62b5..5e6d7fe 100644 --- a/bikeshare_model/processing/features.py +++ b/bikeshare_model/processing/features.py @@ -1,8 +1,4 @@ -from typing import List -import sys import pandas as pd -import numpy as np - from sklearn.base import BaseEstimator, TransformerMixin from sklearn.preprocessing import OneHotEncoder @@ -20,7 +16,7 @@ def __init__(self, variable: str, date_var: str): self.variable = variable self.date_var = date_var - def fit(self, X: pd.DataFrame, y: pd.Series = None): + def fit(self, _: pd.DataFrame, __: pd.Series = None): # we need the fit statement to accomodate the sklearn pipeline return self @@ -49,8 +45,9 @@ def __init__(self, variable: str): raise ValueError("variable name should be a string") self.variable = variable + self.fill_value = None - def fit(self, X: pd.DataFrame, y: pd.Series = None): + def fit(self, X: pd.DataFrame, _: pd.Series = None): # we need the fit statement to accomodate the sklearn pipeline X = X.copy() self.fill_value = X[self.variable].mode()[0] @@ -78,7 +75,7 @@ def __init__(self, variable: str, mappings: dict): self.variable = variable self.mappings = mappings - def fit(self, X: pd.DataFrame, y: pd.Series = None): + def fit(self, _: pd.DataFrame, __: pd.Series = None): # we need the fit statement to accomodate the sklearn pipeline return self @@ -102,8 +99,10 @@ def __init__(self, variable: str): raise ValueError("variable name should be a string") self.variable = variable + self.lower_bound = None + self.upper_bound = None - def fit(self, X: pd.DataFrame, y: pd.Series = None): + def fit(self, X: pd.DataFrame, _: pd.Series = None): # we need the fit statement to accomodate the sklearn pipeline X = X.copy() q1 = X.describe()[self.variable].loc["25%"] @@ -136,8 +135,9 @@ def __init__(self, variable: str): self.variable = variable self.encoder = OneHotEncoder(sparse_output=False) + self.encoded_features_names = None - def fit(self, X: pd.DataFrame, y: pd.Series = None): + def fit(self, X: pd.DataFrame, _: pd.Series = None): # we need the fit statement to accomodate the sklearn pipeline X = X.copy() self.encoder.fit(X[[self.variable]]) diff --git a/bikeshare_model/setup.py b/bikeshare_model/setup.py index 397afad..4a2ea0f 100644 --- a/bikeshare_model/setup.py +++ b/bikeshare_model/setup.py @@ -1,7 +1,5 @@ # python setup.py sdist bdist_wheel -import os -from setuptools import setup, find_packages - +from setuptools import setup setup( name="bikesharing", diff --git a/bikeshare_model/tests/conftest.py b/bikeshare_model/tests/conftest.py index 7346168..dfce8f2 100644 --- a/bikeshare_model/tests/conftest.py +++ b/bikeshare_model/tests/conftest.py @@ -9,18 +9,16 @@ from sklearn.model_selection import train_test_split from bikeshare_model.config.core import config from bikeshare_model.processing.data_manager import _load_raw_dataset -from bikeshare_model.processing.features import WeekdayImputer - @pytest.fixture def sample_input_data(): data = _load_raw_dataset(file_name=config.app_config.training_data_file) - X_train, X_test, y_train, y_test = train_test_split( + __, X_test, __, __ = train_test_split( data, data[config.model_config.target], test_size=config.model_config.test_size, random_state=config.model_config.random_state, ) - + print(X_test) return X_test diff --git a/bikeshare_model/tests/test_features.py b/bikeshare_model/tests/test_features.py index bb4b901..fa22022 100644 --- a/bikeshare_model/tests/test_features.py +++ b/bikeshare_model/tests/test_features.py @@ -5,20 +5,16 @@ parent, root = file.parent, file.parents[1] sys.path.append(str(root)) -import pandas as pd from bikeshare_model.config.core import config -from bikeshare_model.tests.conftest import sample_input_data from bikeshare_model.processing.features import WeathersitImputer def test_weathersit_imputation(sample_input_data): # Given imputer = WeathersitImputer(config.model_config.weathersit_var) - # print(sample_input_data['weathersit'].head(10)) # When imputed = imputer.fit(sample_input_data).transform(sample_input_data) # Then - # print(imputed['weathersit'].head(10)) assert imputed.loc[12230, "weathersit"] is not None diff --git a/bikeshare_model/tests/test_prediction.py b/bikeshare_model/tests/test_prediction.py index 92b78e4..5f19249 100644 --- a/bikeshare_model/tests/test_prediction.py +++ b/bikeshare_model/tests/test_prediction.py @@ -17,7 +17,7 @@ def test_make_prediction(sample_input_data): # Given - expected_no_predictions = 179 + #expected_no_predictions = 179 # When result = make_prediction(input_data=sample_input_data) @@ -25,10 +25,25 @@ def test_make_prediction(sample_input_data): # Then predictions = result.get("predictions") assert isinstance(predictions, np.ndarray) - # assert isinstance(predictions[0], np.int64) assert result.get("errors") is None - # assert len(predictions) == expected_no_predictions + + #print(predictions) + # for i,x in enumerate(predictions): + # assert(abs(x - sample_input_data["cnt"][i+1]) < 1) + + #assert(abs(predictions[0] - sample_input_data["cnt"][0]) < 1) + + #print(predictions) + + i = 0 + for index, row in sample_input_data.iterrows(): + assert(abs(row["cnt"] - predictions[i]) < 300) + print(i) + i = i + 1 + + + + #assert(abs(predictions - sample_input_data["cnt"]) < 1) + #assert len(predictions) == expected_no_predictions # _predictions = list(predictions) - # y_true = sample_input_data["Survived"] - # accuracy = accuracy_score(_predictions, y_true) - # assert accuracy > 0.7 + #y_true = sample_input_data["cnt"] diff --git a/bikeshare_model/train_pipeline.py b/bikeshare_model/train_pipeline.py index c7b86eb..c33cb63 100644 --- a/bikeshare_model/train_pipeline.py +++ b/bikeshare_model/train_pipeline.py @@ -28,6 +28,8 @@ def run_training() -> None: ) # Pipeline fitting + print(f"DEBUG-1: train_pipeline.py - {X_train.shape},{y_train.shape}") + bikeshare_pipe.fit(X_train, y_train) # persist trained model