Skip to content

Commit

Permalink
Lint issues fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
bnarasimha committed Jan 27, 2024
1 parent f88ec31 commit 74e4504
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 44 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ jobs:
- name: Code formatting with black
run: |
black bikeshare_model/*.py
# - name: Lint with pylint
# run: |
# pylint --disable=R,C bikeshare_model/
- name: Lint with pylint
run: |
pylint --disable=R,C --extension-pkg-whitelist='pydantic' bikeshare_model/
- name: Train the model
run: |
python bikeshare_model/train_pipeline.py
Expand Down
2 changes: 1 addition & 1 deletion bikeshare_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@

from bikeshare_model.config.core import PACKAGE_ROOT, config

with open(PACKAGE_ROOT / "VERSION") as version_file:
with open(PACKAGE_ROOT / "VERSION", encoding="utf-8") as version_file:
__version__ = version_file.read().strip()
5 changes: 2 additions & 3 deletions bikeshare_model/config/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
parent, root = file.parent, file.parents[1]
sys.path.append(str(root))

from pathlib import Path
from typing import Dict, List

from pydantic import BaseModel
Expand Down Expand Up @@ -86,7 +85,7 @@ def find_config_file() -> Path:
if CONFIG_FILE_PATH.is_file():
return CONFIG_FILE_PATH

raise Exception(f"Config not found at {CONFIG_FILE_PATH!r}")
raise KeyError(f"Config not found at {CONFIG_FILE_PATH!r}")


def fetch_config_from_yaml(cfg_path: Path = None) -> YAML:
Expand All @@ -96,7 +95,7 @@ def fetch_config_from_yaml(cfg_path: Path = None) -> YAML:
cfg_path = find_config_file()

if cfg_path:
with open(cfg_path, "r") as conf_file:
with open(cfg_path, "r", encoding="utf-8") as conf_file:
parsed_config = load(conf_file.read())
return parsed_config

Expand Down
44 changes: 34 additions & 10 deletions bikeshare_model/predict.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import sys
from pathlib import Path

from sklearn.metrics import accuracy_score

file = Path(__file__).resolve()
parent, root = file.parent, file.parents[1]
sys.path.append(str(root))
Expand All @@ -27,7 +29,7 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict:
# validated_data = validated_data.reindex(columns = ['dteday', 'season', 'hr', 'holiday', 'weekday', 'workingday',
# 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'yr', 'mnth'])
validated_data = validated_data.reindex(columns=config.model_config.features)

print(validated_data)
results = {"predictions": None, "version": _version, "errors": errors}

if not errors:
Expand All @@ -37,25 +39,47 @@ def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict:
"version": _version,
"errors": errors,
}
print(results)
#print(results)

#print(type(predictions))
#print("Accuracy Score:" + accuracy_score(np.array([139])), predictions)

return results


if __name__ == "__main__":

# data_in = {
# "dteday": ["2012-11-6"],
# "season": ["winter"],
# "hr": ["6pm"],
# "holiday": ["No"],
# "weekday": ["Tue"],
# "workingday": ["Yes"],
# "weathersit": ["Clear"],
# "temp": [16],
# "atemp": [17.5],
# "hum": [30],
# "windspeed": [10],
# }

data_in = {
"dteday": ["2012-11-6"],
"dteday": ["2012-11-05"],
"season": ["winter"],
"hr": ["6pm"],
"hr": ["6am"],
"holiday": ["No"],
"weekday": ["Tue"],
"weekday": ["Mon"],
"workingday": ["Yes"],
"weathersit": ["Clear"],
"temp": [16],
"atemp": [17.5],
"hum": [30],
"windspeed": [10],
"weathersit": ["Mist"],
"temp": [6.1],
"atemp": [3.0014000000000003],
"hum": [49.0],
"windspeed": [19.0012],
}

make_prediction(input_data=data_in)

# dteday, season, hr, holiday,weekday,workingday, weathersit, temp, atemp, hum, windspeed, casual, registered, cnt
# 2012-11-05, winter, 6am,No, Mon, Yes, Mist, 6.1, 3.0014000000000003, 49.0, 19.0012, 4, 135, 139

# 2011-07-13,fall, 4am,No, Wed, Yes, Clear, 26.78, 28.998799999999996, 57.99999999999999,16.997899999999998,0,5,5
1 change: 0 additions & 1 deletion bikeshare_model/processing/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
sys.path.append(str(root))

import typing as t
from pathlib import Path

import joblib
import pandas as pd
Expand Down
18 changes: 9 additions & 9 deletions bikeshare_model/processing/features.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
from typing import List
import sys
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder

Expand All @@ -20,7 +16,7 @@ def __init__(self, variable: str, date_var: str):
self.variable = variable
self.date_var = date_var

def fit(self, X: pd.DataFrame, y: pd.Series = None):
def fit(self, _: pd.DataFrame, __: pd.Series = None):
# we need the fit statement to accomodate the sklearn pipeline
return self

Expand Down Expand Up @@ -49,8 +45,9 @@ def __init__(self, variable: str):
raise ValueError("variable name should be a string")

self.variable = variable
self.fill_value = None

def fit(self, X: pd.DataFrame, y: pd.Series = None):
def fit(self, X: pd.DataFrame, _: pd.Series = None):
# we need the fit statement to accomodate the sklearn pipeline
X = X.copy()
self.fill_value = X[self.variable].mode()[0]
Expand Down Expand Up @@ -78,7 +75,7 @@ def __init__(self, variable: str, mappings: dict):
self.variable = variable
self.mappings = mappings

def fit(self, X: pd.DataFrame, y: pd.Series = None):
def fit(self, _: pd.DataFrame, __: pd.Series = None):
# we need the fit statement to accomodate the sklearn pipeline
return self

Expand All @@ -102,8 +99,10 @@ def __init__(self, variable: str):
raise ValueError("variable name should be a string")

self.variable = variable
self.lower_bound = None
self.upper_bound = None

def fit(self, X: pd.DataFrame, y: pd.Series = None):
def fit(self, X: pd.DataFrame, _: pd.Series = None):
# we need the fit statement to accomodate the sklearn pipeline
X = X.copy()
q1 = X.describe()[self.variable].loc["25%"]
Expand Down Expand Up @@ -136,8 +135,9 @@ def __init__(self, variable: str):

self.variable = variable
self.encoder = OneHotEncoder(sparse_output=False)
self.encoded_features_names = None

def fit(self, X: pd.DataFrame, y: pd.Series = None):
def fit(self, X: pd.DataFrame, _: pd.Series = None):
# we need the fit statement to accomodate the sklearn pipeline
X = X.copy()
self.encoder.fit(X[[self.variable]])
Expand Down
4 changes: 1 addition & 3 deletions bikeshare_model/setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# python setup.py sdist bdist_wheel
import os
from setuptools import setup, find_packages

from setuptools import setup

setup(
name="bikesharing",
Expand Down
6 changes: 2 additions & 4 deletions bikeshare_model/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,16 @@
from sklearn.model_selection import train_test_split
from bikeshare_model.config.core import config
from bikeshare_model.processing.data_manager import _load_raw_dataset
from bikeshare_model.processing.features import WeekdayImputer


@pytest.fixture
def sample_input_data():
data = _load_raw_dataset(file_name=config.app_config.training_data_file)

X_train, X_test, y_train, y_test = train_test_split(
__, X_test, __, __ = train_test_split(
data,
data[config.model_config.target],
test_size=config.model_config.test_size,
random_state=config.model_config.random_state,
)

print(X_test)
return X_test
4 changes: 0 additions & 4 deletions bikeshare_model/tests/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,16 @@
parent, root = file.parent, file.parents[1]
sys.path.append(str(root))

import pandas as pd
from bikeshare_model.config.core import config
from bikeshare_model.tests.conftest import sample_input_data
from bikeshare_model.processing.features import WeathersitImputer


def test_weathersit_imputation(sample_input_data):
# Given
imputer = WeathersitImputer(config.model_config.weathersit_var)
# print(sample_input_data['weathersit'].head(10))

# When
imputed = imputer.fit(sample_input_data).transform(sample_input_data)

# Then
# print(imputed['weathersit'].head(10))
assert imputed.loc[12230, "weathersit"] is not None
27 changes: 21 additions & 6 deletions bikeshare_model/tests/test_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,33 @@

def test_make_prediction(sample_input_data):
# Given
expected_no_predictions = 179
#expected_no_predictions = 179

# When
result = make_prediction(input_data=sample_input_data)

# Then
predictions = result.get("predictions")
assert isinstance(predictions, np.ndarray)
# assert isinstance(predictions[0], np.int64)
assert result.get("errors") is None
# assert len(predictions) == expected_no_predictions

#print(predictions)
# for i,x in enumerate(predictions):
# assert(abs(x - sample_input_data["cnt"][i+1]) < 1)

#assert(abs(predictions[0] - sample_input_data["cnt"][0]) < 1)

#print(predictions)

i = 0
for index, row in sample_input_data.iterrows():
assert(abs(row["cnt"] - predictions[i]) < 300)
print(i)
i = i + 1



#assert(abs(predictions - sample_input_data["cnt"]) < 1)
#assert len(predictions) == expected_no_predictions
# _predictions = list(predictions)
# y_true = sample_input_data["Survived"]
# accuracy = accuracy_score(_predictions, y_true)
# assert accuracy > 0.7
#y_true = sample_input_data["cnt"]
2 changes: 2 additions & 0 deletions bikeshare_model/train_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def run_training() -> None:
)

# Pipeline fitting
print(f"DEBUG-1: train_pipeline.py - {X_train.shape},{y_train.shape}")

bikeshare_pipe.fit(X_train, y_train)

# persist trained model
Expand Down

0 comments on commit 74e4504

Please sign in to comment.