Lint issues fixed

bnarasimha · Jan 27, 2024 · 74e4504 · 74e4504
1 parent f88ec31
commit 74e4504
Show file tree

Hide file tree

Showing 11 changed files with 75 additions and 44 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -31,9 +31,9 @@ jobs:
  - name: Code formatting with black
  run: |
  black bikeshare_model/*.py
- # - name: Lint with pylint
- #  run: |
- #  pylint --disable=R,C bikeshare_model/
+ - name: Lint with pylint
+ run: |
+ pylint --disable=R,C --extension-pkg-whitelist='pydantic' bikeshare_model/
  - name: Train the model
  run: |
  python bikeshare_model/train_pipeline.py

diff --git a/bikeshare_model/__init__.py b/bikeshare_model/__init__.py
@@ -7,5 +7,5 @@
 
 from bikeshare_model.config.core import PACKAGE_ROOT, config
 
-with open(PACKAGE_ROOT / "VERSION") as version_file:
+with open(PACKAGE_ROOT / "VERSION", encoding="utf-8") as version_file:
  __version__ = version_file.read().strip()
diff --git a/bikeshare_model/config/core.py b/bikeshare_model/config/core.py
@@ -6,7 +6,6 @@
 parent, root = file.parent, file.parents[1]
 sys.path.append(str(root))
 
-from pathlib import Path
 from typing import Dict, List
 
 from pydantic import BaseModel
@@ -86,7 +85,7 @@ def find_config_file() -> Path:
  if CONFIG_FILE_PATH.is_file():
  return CONFIG_FILE_PATH
 
- raise Exception(f"Config not found at {CONFIG_FILE_PATH!r}")
+ raise KeyError(f"Config not found at {CONFIG_FILE_PATH!r}")
 
 
 def fetch_config_from_yaml(cfg_path: Path = None) -> YAML:
@@ -96,7 +95,7 @@ def fetch_config_from_yaml(cfg_path: Path = None) -> YAML:
  cfg_path = find_config_file()
 
  if cfg_path:
- with open(cfg_path, "r") as conf_file:
+ with open(cfg_path, "r", encoding="utf-8") as conf_file:
  parsed_config = load(conf_file.read())
  return parsed_config
 

diff --git a/bikeshare_model/predict.py b/bikeshare_model/predict.py
@@ -1,6 +1,8 @@
 import sys
 from pathlib import Path
 
+from sklearn.metrics import accuracy_score
+
 file = Path(__file__).resolve()
 parent, root = file.parent, file.parents[1]
 sys.path.append(str(root))
@@ -27,7 +29,7 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict:
  # validated_data = validated_data.reindex(columns = ['dteday', 'season', 'hr', 'holiday', 'weekday', 'workingday',
  # 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'yr', 'mnth'])
  validated_data = validated_data.reindex(columns=config.model_config.features)
-
+ print(validated_data)
  results = {"predictions": None, "version": _version, "errors": errors}
 
  if not errors:
@@ -37,25 +39,47 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict:
  "version": _version,
  "errors": errors,
  }
- print(results)
+ #print(results)
+
+ #print(type(predictions))
+ #print("Accuracy Score:" + accuracy_score(np.array([139])), predictions)
 
  return results
 
 
 if __name__ == "__main__":
 
+ # data_in = {
+ # "dteday": ["2012-11-6"],
+ # "season": ["winter"],
+ # "hr": ["6pm"],
+ # "holiday": ["No"],
+ # "weekday": ["Tue"],
+ # "workingday": ["Yes"],
+ # "weathersit": ["Clear"],
+ # "temp": [16],
+ # "atemp": [17.5],
+ # "hum": [30],
+ # "windspeed": [10],
+ # }
+
  data_in = {
- "dteday": ["2012-11-6"],
+ "dteday": ["2012-11-05"],
  "season": ["winter"],
- "hr": ["6pm"],
+ "hr": ["6am"],
  "holiday": ["No"],
- "weekday": ["Tue"],
+ "weekday": ["Mon"],
  "workingday": ["Yes"],
- "weathersit": ["Clear"],
- "temp": [16],
- "atemp": [17.5],
- "hum": [30],
- "windspeed": [10],
+ "weathersit": ["Mist"],
+ "temp": [6.1],
+ "atemp": [3.0014000000000003],
+ "hum": [49.0],
+ "windspeed": [19.0012],
  }
 
  make_prediction(input_data=data_in)
+
+# dteday, season, hr, holiday,weekday,workingday, weathersit, temp, atemp, hum, windspeed, casual, registered, cnt
+# 2012-11-05, winter, 6am,No, Mon, Yes, Mist, 6.1, 3.0014000000000003, 49.0, 19.0012, 4, 135, 139
+
+# 2011-07-13,fall, 4am,No, Wed, Yes, Clear, 26.78, 28.998799999999996, 57.99999999999999,16.997899999999998,0,5,5
diff --git a/bikeshare_model/processing/data_manager.py b/bikeshare_model/processing/data_manager.py
@@ -6,7 +6,6 @@
 sys.path.append(str(root))
 
 import typing as t
-from pathlib import Path
 
 import joblib
 import pandas as pd

diff --git a/bikeshare_model/processing/features.py b/bikeshare_model/processing/features.py
@@ -1,8 +1,4 @@
-from typing import List
-import sys
 import pandas as pd
-import numpy as np
-
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.preprocessing import OneHotEncoder
 
@@ -20,7 +16,7 @@ def __init__(self, variable: str, date_var: str):
  self.variable = variable
  self.date_var = date_var
 
- def fit(self, X: pd.DataFrame, y: pd.Series = None):
+ def fit(self, _: pd.DataFrame, __: pd.Series = None):
  # we need the fit statement to accomodate the sklearn pipeline
  return self
 
@@ -49,8 +45,9 @@ def __init__(self, variable: str):
  raise ValueError("variable name should be a string")
 
  self.variable = variable
+ self.fill_value = None
 
- def fit(self, X: pd.DataFrame, y: pd.Series = None):
+ def fit(self, X: pd.DataFrame, _: pd.Series = None):
  # we need the fit statement to accomodate the sklearn pipeline
  X = X.copy()
  self.fill_value = X[self.variable].mode()[0]
@@ -78,7 +75,7 @@ def __init__(self, variable: str, mappings: dict):
  self.variable = variable
  self.mappings = mappings
 
- def fit(self, X: pd.DataFrame, y: pd.Series = None):
+ def fit(self, _: pd.DataFrame, __: pd.Series = None):
  # we need the fit statement to accomodate the sklearn pipeline
  return self
 
@@ -102,8 +99,10 @@ def __init__(self, variable: str):
  raise ValueError("variable name should be a string")
 
  self.variable = variable
+ self.lower_bound = None
+ self.upper_bound = None
 
- def fit(self, X: pd.DataFrame, y: pd.Series = None):
+ def fit(self, X: pd.DataFrame, _: pd.Series = None):
  # we need the fit statement to accomodate the sklearn pipeline
  X = X.copy()
  q1 = X.describe()[self.variable].loc["25%"]
@@ -136,8 +135,9 @@ def __init__(self, variable: str):
 
  self.variable = variable
  self.encoder = OneHotEncoder(sparse_output=False)
+ self.encoded_features_names = None
 
- def fit(self, X: pd.DataFrame, y: pd.Series = None):
+ def fit(self, X: pd.DataFrame, _: pd.Series = None):
  # we need the fit statement to accomodate the sklearn pipeline
  X = X.copy()
  self.encoder.fit(X[[self.variable]])

diff --git a/bikeshare_model/setup.py b/bikeshare_model/setup.py
@@ -1,7 +1,5 @@
 # python setup.py sdist bdist_wheel
-import os
-from setuptools import setup, find_packages
-
+from setuptools import setup
 
 setup(
  name="bikesharing",

diff --git a/bikeshare_model/tests/conftest.py b/bikeshare_model/tests/conftest.py
@@ -9,18 +9,16 @@
 from sklearn.model_selection import train_test_split
 from bikeshare_model.config.core import config
 from bikeshare_model.processing.data_manager import _load_raw_dataset
-from bikeshare_model.processing.features import WeekdayImputer
-
 
 @pytest.fixture
 def sample_input_data():
  data = _load_raw_dataset(file_name=config.app_config.training_data_file)
 
- X_train, X_test, y_train, y_test = train_test_split(
+ __, X_test, __, __ = train_test_split(
  data,
  data[config.model_config.target],
  test_size=config.model_config.test_size,
  random_state=config.model_config.random_state,
  )
-
+ print(X_test)
  return X_test
diff --git a/bikeshare_model/tests/test_features.py b/bikeshare_model/tests/test_features.py
@@ -5,20 +5,16 @@
 parent, root = file.parent, file.parents[1]
 sys.path.append(str(root))
 
-import pandas as pd
 from bikeshare_model.config.core import config
-from bikeshare_model.tests.conftest import sample_input_data
 from bikeshare_model.processing.features import WeathersitImputer
 
 
 def test_weathersit_imputation(sample_input_data):
  # Given
  imputer = WeathersitImputer(config.model_config.weathersit_var)
- # print(sample_input_data['weathersit'].head(10))
 
  # When
  imputed = imputer.fit(sample_input_data).transform(sample_input_data)
 
  # Then
- # print(imputed['weathersit'].head(10))
  assert imputed.loc[12230, "weathersit"] is not None
diff --git a/bikeshare_model/tests/test_prediction.py b/bikeshare_model/tests/test_prediction.py
@@ -17,18 +17,33 @@
 
 def test_make_prediction(sample_input_data):
  # Given
- expected_no_predictions = 179
+ #expected_no_predictions = 179
 
  # When
  result = make_prediction(input_data=sample_input_data)
 
  # Then
  predictions = result.get("predictions")
  assert isinstance(predictions, np.ndarray)
- # assert isinstance(predictions[0], np.int64)
  assert result.get("errors") is None
- # assert len(predictions) == expected_no_predictions
+
+ #print(predictions)
+ # for i,x in enumerate(predictions):
+ # assert(abs(x - sample_input_data["cnt"][i+1]) < 1)
+
+ #assert(abs(predictions[0] - sample_input_data["cnt"][0]) < 1)
+
+ #print(predictions)
+
+ i = 0
+ for index, row in sample_input_data.iterrows():
+ assert(abs(row["cnt"] - predictions[i]) < 300)
+ print(i)
+ i = i + 1 
+
+
+
+ #assert(abs(predictions - sample_input_data["cnt"]) < 1)
+ #assert len(predictions) == expected_no_predictions
  # _predictions = list(predictions)
- # y_true = sample_input_data["Survived"]
- # accuracy = accuracy_score(_predictions, y_true)
- # assert accuracy > 0.7
+ #y_true = sample_input_data["cnt"]
diff --git a/bikeshare_model/train_pipeline.py b/bikeshare_model/train_pipeline.py
@@ -28,6 +28,8 @@ def run_training() -> None:
  )
 
  # Pipeline fitting
+ print(f"DEBUG-1: train_pipeline.py - {X_train.shape},{y_train.shape}")
+
  bikeshare_pipe.fit(X_train, y_train)
 
  # persist trained model