From 74e45041917d4be5d0c4be8ce9242a49ec52badb Mon Sep 17 00:00:00 2001
From: Narasimha Badrinath <bnarasimha21@gmail.com>
Date: Sat, 27 Jan 2024 07:04:50 +0000
Subject: [PATCH] Lint issues fixed

---
 .github/workflows/python-app.yml           |  6 +--
 bikeshare_model/__init__.py                |  2 +-
 bikeshare_model/config/core.py             |  5 +--
 bikeshare_model/predict.py                 | 44 +++++++++++++++++-----
 bikeshare_model/processing/data_manager.py |  1 -
 bikeshare_model/processing/features.py     | 18 ++++-----
 bikeshare_model/setup.py                   |  4 +-
 bikeshare_model/tests/conftest.py          |  6 +--
 bikeshare_model/tests/test_features.py     |  4 --
 bikeshare_model/tests/test_prediction.py   | 27 ++++++++++---
 bikeshare_model/train_pipeline.py          |  2 +
 11 files changed, 75 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 8067a62..f866bff 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -31,9 +31,9 @@ jobs:
     - name: Code formatting with black
       run: |
         black bikeshare_model/*.py
-    # - name: Lint with pylint
-    #   run: |
-    #     pylint --disable=R,C bikeshare_model/
+    - name: Lint with pylint
+      run: |
+        pylint --disable=R,C --extension-pkg-whitelist='pydantic' bikeshare_model/
     - name: Train the model
       run: |
         python bikeshare_model/train_pipeline.py
diff --git a/bikeshare_model/__init__.py b/bikeshare_model/__init__.py
index 4c8ae31..2151e11 100644
--- a/bikeshare_model/__init__.py
+++ b/bikeshare_model/__init__.py
@@ -7,5 +7,5 @@
 
 from bikeshare_model.config.core import PACKAGE_ROOT, config
 
-with open(PACKAGE_ROOT / "VERSION") as version_file:
+with open(PACKAGE_ROOT / "VERSION", encoding="utf-8") as version_file:
     __version__ = version_file.read().strip()
diff --git a/bikeshare_model/config/core.py b/bikeshare_model/config/core.py
index d31d32c..8f7945e 100644
--- a/bikeshare_model/config/core.py
+++ b/bikeshare_model/config/core.py
@@ -6,7 +6,6 @@
 parent, root = file.parent, file.parents[1]
 sys.path.append(str(root))
 
-from pathlib import Path
 from typing import Dict, List
 
 from pydantic import BaseModel
@@ -86,7 +85,7 @@ def find_config_file() -> Path:
     if CONFIG_FILE_PATH.is_file():
         return CONFIG_FILE_PATH
 
-    raise Exception(f"Config not found at {CONFIG_FILE_PATH!r}")
+    raise KeyError(f"Config not found at {CONFIG_FILE_PATH!r}")
 
 
 def fetch_config_from_yaml(cfg_path: Path = None) -> YAML:
@@ -96,7 +95,7 @@ def fetch_config_from_yaml(cfg_path: Path = None) -> YAML:
         cfg_path = find_config_file()
 
     if cfg_path:
-        with open(cfg_path, "r") as conf_file:
+        with open(cfg_path, "r", encoding="utf-8") as conf_file:
             parsed_config = load(conf_file.read())
             return parsed_config
 
diff --git a/bikeshare_model/predict.py b/bikeshare_model/predict.py
index c2f0866..5b2bc36 100644
--- a/bikeshare_model/predict.py
+++ b/bikeshare_model/predict.py
@@ -1,6 +1,8 @@
 import sys
 from pathlib import Path
 
+from sklearn.metrics import accuracy_score
+
 file = Path(__file__).resolve()
 parent, root = file.parent, file.parents[1]
 sys.path.append(str(root))
@@ -27,7 +29,7 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict:
     # validated_data = validated_data.reindex(columns = ['dteday', 'season', 'hr', 'holiday', 'weekday', 'workingday',
     #                                                   'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'yr', 'mnth'])
     validated_data = validated_data.reindex(columns=config.model_config.features)
-
+    print(validated_data)
     results = {"predictions": None, "version": _version, "errors": errors}
 
     if not errors:
@@ -37,25 +39,47 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict:
             "version": _version,
             "errors": errors,
         }
-        print(results)
+        #print(results)
+
+    #print(type(predictions))
+    #print("Accuracy Score:" +  accuracy_score(np.array([139])), predictions)
 
     return results
 
 
 if __name__ == "__main__":
 
+    # data_in = {
+    #     "dteday": ["2012-11-6"],
+    #     "season": ["winter"],
+    #     "hr": ["6pm"],
+    #     "holiday": ["No"],
+    #     "weekday": ["Tue"],
+    #     "workingday": ["Yes"],
+    #     "weathersit": ["Clear"],
+    #     "temp": [16],
+    #     "atemp": [17.5],
+    #     "hum": [30],
+    #     "windspeed": [10],
+    # }
+
     data_in = {
-        "dteday": ["2012-11-6"],
+        "dteday": ["2012-11-05"],
         "season": ["winter"],
-        "hr": ["6pm"],
+        "hr": ["6am"],
         "holiday": ["No"],
-        "weekday": ["Tue"],
+        "weekday": ["Mon"],
         "workingday": ["Yes"],
-        "weathersit": ["Clear"],
-        "temp": [16],
-        "atemp": [17.5],
-        "hum": [30],
-        "windspeed": [10],
+        "weathersit": ["Mist"],
+        "temp": [6.1],
+        "atemp": [3.0014000000000003],
+        "hum": [49.0],
+        "windspeed": [19.0012],
     }
 
     make_prediction(input_data=data_in)
+
+# dteday,     season, hr, holiday,weekday,workingday, weathersit, temp,   atemp,              hum,    windspeed,  casual, registered, cnt
+# 2012-11-05, winter, 6am,No,     Mon,    Yes,        Mist,       6.1,    3.0014000000000003, 49.0,   19.0012,    4,      135,        139
+    
+# 2011-07-13,fall,    4am,No,     Wed,    Yes,        Clear,      26.78,  28.998799999999996, 57.99999999999999,16.997899999999998,0,5,5
\ No newline at end of file
diff --git a/bikeshare_model/processing/data_manager.py b/bikeshare_model/processing/data_manager.py
index 3eaa5aa..0d24e97 100644
--- a/bikeshare_model/processing/data_manager.py
+++ b/bikeshare_model/processing/data_manager.py
@@ -6,7 +6,6 @@
 sys.path.append(str(root))
 
 import typing as t
-from pathlib import Path
 
 import joblib
 import pandas as pd
diff --git a/bikeshare_model/processing/features.py b/bikeshare_model/processing/features.py
index a9a62b5..5e6d7fe 100644
--- a/bikeshare_model/processing/features.py
+++ b/bikeshare_model/processing/features.py
@@ -1,8 +1,4 @@
-from typing import List
-import sys
 import pandas as pd
-import numpy as np
-
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.preprocessing import OneHotEncoder
 
@@ -20,7 +16,7 @@ def __init__(self, variable: str, date_var: str):
         self.variable = variable
         self.date_var = date_var
 
-    def fit(self, X: pd.DataFrame, y: pd.Series = None):
+    def fit(self, _: pd.DataFrame, __: pd.Series = None):
         # we need the fit statement to accomodate the sklearn pipeline
         return self
 
@@ -49,8 +45,9 @@ def __init__(self, variable: str):
             raise ValueError("variable name should be a string")
 
         self.variable = variable
+        self.fill_value = None
 
-    def fit(self, X: pd.DataFrame, y: pd.Series = None):
+    def fit(self, X: pd.DataFrame, _: pd.Series = None):
         # we need the fit statement to accomodate the sklearn pipeline
         X = X.copy()
         self.fill_value = X[self.variable].mode()[0]
@@ -78,7 +75,7 @@ def __init__(self, variable: str, mappings: dict):
         self.variable = variable
         self.mappings = mappings
 
-    def fit(self, X: pd.DataFrame, y: pd.Series = None):
+    def fit(self, _: pd.DataFrame, __: pd.Series = None):
         # we need the fit statement to accomodate the sklearn pipeline
         return self
 
@@ -102,8 +99,10 @@ def __init__(self, variable: str):
             raise ValueError("variable name should be a string")
 
         self.variable = variable
+        self.lower_bound = None
+        self.upper_bound = None
 
-    def fit(self, X: pd.DataFrame, y: pd.Series = None):
+    def fit(self, X: pd.DataFrame, _: pd.Series = None):
         # we need the fit statement to accomodate the sklearn pipeline
         X = X.copy()
         q1 = X.describe()[self.variable].loc["25%"]
@@ -136,8 +135,9 @@ def __init__(self, variable: str):
 
         self.variable = variable
         self.encoder = OneHotEncoder(sparse_output=False)
+        self.encoded_features_names = None
 
-    def fit(self, X: pd.DataFrame, y: pd.Series = None):
+    def fit(self, X: pd.DataFrame, _: pd.Series = None):
         # we need the fit statement to accomodate the sklearn pipeline
         X = X.copy()
         self.encoder.fit(X[[self.variable]])
diff --git a/bikeshare_model/setup.py b/bikeshare_model/setup.py
index 397afad..4a2ea0f 100644
--- a/bikeshare_model/setup.py
+++ b/bikeshare_model/setup.py
@@ -1,7 +1,5 @@
 # python setup.py sdist bdist_wheel
-import os
-from setuptools import setup, find_packages
-
+from setuptools import setup
 
 setup(
     name="bikesharing",
diff --git a/bikeshare_model/tests/conftest.py b/bikeshare_model/tests/conftest.py
index 7346168..dfce8f2 100644
--- a/bikeshare_model/tests/conftest.py
+++ b/bikeshare_model/tests/conftest.py
@@ -9,18 +9,16 @@
 from sklearn.model_selection import train_test_split
 from bikeshare_model.config.core import config
 from bikeshare_model.processing.data_manager import _load_raw_dataset
-from bikeshare_model.processing.features import WeekdayImputer
-
 
 @pytest.fixture
 def sample_input_data():
     data = _load_raw_dataset(file_name=config.app_config.training_data_file)
 
-    X_train, X_test, y_train, y_test = train_test_split(
+    __, X_test, __, __ = train_test_split(
         data,
         data[config.model_config.target],
         test_size=config.model_config.test_size,
         random_state=config.model_config.random_state,
     )
-
+    print(X_test)
     return X_test
diff --git a/bikeshare_model/tests/test_features.py b/bikeshare_model/tests/test_features.py
index bb4b901..fa22022 100644
--- a/bikeshare_model/tests/test_features.py
+++ b/bikeshare_model/tests/test_features.py
@@ -5,20 +5,16 @@
 parent, root = file.parent, file.parents[1]
 sys.path.append(str(root))
 
-import pandas as pd
 from bikeshare_model.config.core import config
-from bikeshare_model.tests.conftest import sample_input_data
 from bikeshare_model.processing.features import WeathersitImputer
 
 
 def test_weathersit_imputation(sample_input_data):
     # Given
     imputer = WeathersitImputer(config.model_config.weathersit_var)
-    # print(sample_input_data['weathersit'].head(10))
 
     # When
     imputed = imputer.fit(sample_input_data).transform(sample_input_data)
 
     # Then
-    # print(imputed['weathersit'].head(10))
     assert imputed.loc[12230, "weathersit"] is not None
diff --git a/bikeshare_model/tests/test_prediction.py b/bikeshare_model/tests/test_prediction.py
index 92b78e4..5f19249 100644
--- a/bikeshare_model/tests/test_prediction.py
+++ b/bikeshare_model/tests/test_prediction.py
@@ -17,7 +17,7 @@
 
 def test_make_prediction(sample_input_data):
     # Given
-    expected_no_predictions = 179
+    #expected_no_predictions = 179
 
     # When
     result = make_prediction(input_data=sample_input_data)
@@ -25,10 +25,25 @@ def test_make_prediction(sample_input_data):
     # Then
     predictions = result.get("predictions")
     assert isinstance(predictions, np.ndarray)
-    # assert isinstance(predictions[0], np.int64)
     assert result.get("errors") is None
-    # assert len(predictions) == expected_no_predictions
+
+    #print(predictions)
+        # for i,x in enumerate(predictions):
+        #     assert(abs(x - sample_input_data["cnt"][i+1]) < 1)
+
+    #assert(abs(predictions[0] - sample_input_data["cnt"][0]) < 1)
+
+    #print(predictions)
+
+    i = 0
+    for index, row in sample_input_data.iterrows():
+        assert(abs(row["cnt"] - predictions[i]) < 300)
+        print(i)
+        i = i + 1 
+
+
+
+    #assert(abs(predictions - sample_input_data["cnt"]) < 1)
+    #assert len(predictions) == expected_no_predictions
     # _predictions = list(predictions)
-    # y_true = sample_input_data["Survived"]
-    # accuracy = accuracy_score(_predictions, y_true)
-    # assert accuracy > 0.7
+    #y_true = sample_input_data["cnt"]
diff --git a/bikeshare_model/train_pipeline.py b/bikeshare_model/train_pipeline.py
index c7b86eb..c33cb63 100644
--- a/bikeshare_model/train_pipeline.py
+++ b/bikeshare_model/train_pipeline.py
@@ -28,6 +28,8 @@ def run_training() -> None:
     )
 
     # Pipeline fitting
+    print(f"DEBUG-1: train_pipeline.py - {X_train.shape},{y_train.shape}")
+    
     bikeshare_pipe.fit(X_train, y_train)
 
     # persist trained model