Skip to content

Commit

Permalink
black and pylint added for linting and code formatting. tested
Browse files Browse the repository at this point in the history
  • Loading branch information
bnarasimha committed Jan 27, 2024
1 parent 0323873 commit ac6fabf
Show file tree
Hide file tree
Showing 14 changed files with 244 additions and 158 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements/requirements.txt ]; then pip install -r requirements/requirements.txt; fi
- name: Lint with flake8
- name: Code formatting with black
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
black
- name: Lint with pylint
run: |
pylint
- name: Train the model
run: |
python bikeshare_model/train_pipeline.py
Expand Down
4 changes: 3 additions & 1 deletion bikeshare_model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import sys
from pathlib import Path

file = Path(__file__).resolve()
parent, root = file.parent, file.parents[1]
sys.path.append(str(root))

from bikeshare_model.config.core import PACKAGE_ROOT, config

with open(PACKAGE_ROOT / "VERSION") as version_file:
__version__ = version_file.read().strip()
__version__ = version_file.read().strip()
23 changes: 12 additions & 11 deletions bikeshare_model/config/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Path setup, and access the config.yml file, datasets folder & trained models
import sys
from pathlib import Path

file = Path(__file__).resolve()
parent, root = file.parent, file.parents[1]
sys.path.append(str(root))
Expand All @@ -17,7 +18,7 @@
PACKAGE_ROOT = Path(bikeshare_model.__file__).resolve().parent
ROOT = PACKAGE_ROOT.parent
CONFIG_FILE_PATH = PACKAGE_ROOT / "config.yml"
#print(CONFIG_FILE_PATH)
# print(CONFIG_FILE_PATH)

DATASET_DIR = PACKAGE_ROOT / "datasets"
TRAINED_MODEL_DIR = PACKAGE_ROOT / "trained_models"
Expand All @@ -43,7 +44,7 @@ class ModelConfig(BaseModel):
target: str
features: List[str]
unused_fields: List[str]

date_var: str
yr_var: str
mnth_var: str
Expand All @@ -57,16 +58,16 @@ class ModelConfig(BaseModel):
atemp_var: str
hum_var: str
windspeed_var: str

yr_mappings: Dict[int, int]
mnth_mappings: Dict[str, int]
season_mappings: Dict[str, int]
weathersit_mappings: Dict[str, int]
holiday_mappings: Dict[str, int]
workingday_mappings: Dict[str, int]
hr_mappings: Dict[str, int]
test_size:float

test_size: float
random_state: int
n_estimators: int
max_depth: int
Expand All @@ -81,10 +82,10 @@ class Config(BaseModel):

def find_config_file() -> Path:
"""Locate the configuration file."""

if CONFIG_FILE_PATH.is_file():
return CONFIG_FILE_PATH

raise Exception(f"Config not found at {CONFIG_FILE_PATH!r}")


Expand All @@ -98,7 +99,7 @@ def fetch_config_from_yaml(cfg_path: Path = None) -> YAML:
with open(cfg_path, "r") as conf_file:
parsed_config = load(conf_file.read())
return parsed_config

raise OSError(f"Did not find config file at path: {cfg_path}")


Expand All @@ -109,11 +110,11 @@ def create_and_validate_config(parsed_config: YAML = None) -> Config:

# specify the data attribute from the strictyaml YAML type.
_config = Config(
app_config = AppConfig(**parsed_config.data),
model_config = ModelConfig(**parsed_config.data),
app_config=AppConfig(**parsed_config.data),
model_config=ModelConfig(**parsed_config.data),
)

return _config


config = create_and_validate_config()
config = create_and_validate_config()
134 changes: 94 additions & 40 deletions bikeshare_model/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
from pathlib import Path

file = Path(__file__).resolve()
parent, root = file.parent, file.parents[1]
sys.path.append(str(root))
Expand All @@ -13,43 +14,96 @@
from bikeshare_model.processing.features import Mapper
from bikeshare_model.processing.features import OutlierHandler, WeekdayOneHotEncoder

bikeshare_pipe = Pipeline([

######### Imputation ###########
('weekday_imputation', WeekdayImputer(variable = config.model_config.weekday_var,
date_var= config.model_config.date_var)),
('weathersit_imputation', WeathersitImputer(variable = config.model_config.weathersit_var)),

######### Mapper ###########
('map_yr', Mapper(variable = config.model_config.yr_var, mappings = config.model_config.yr_mappings)),

('map_mnth', Mapper(variable = config.model_config.mnth_var, mappings = config.model_config.mnth_mappings)),

('map_season', Mapper(variable = config.model_config.season_var, mappings = config.model_config.season_mappings)),

('map_weathersit', Mapper(variable = config.model_config.weathersit_var, mappings = config.model_config.weathersit_mappings)),

('map_holiday', Mapper(variable = config.model_config.holiday_var, mappings = config.model_config.holiday_mappings)),

('map_workingday', Mapper(variable = config.model_config.workingday_var, mappings = config.model_config.workingday_mappings)),

('map_hr', Mapper(variable = config.model_config.hr_var, mappings = config.model_config.hr_mappings)),

######## Handle outliers ########
('handle_outliers_temp', OutlierHandler(variable = config.model_config.temp_var)),
('handle_outliers_atemp', OutlierHandler(variable = config.model_config.atemp_var)),
('handle_outliers_hum', OutlierHandler(variable = config.model_config.hum_var)),
('handle_outliers_windspeed', OutlierHandler(variable = config.model_config.windspeed_var)),

######## One-hot encoding ########
('encode_weekday', WeekdayOneHotEncoder(variable = config.model_config.weekday_var)),

# Scale features
('scaler', StandardScaler()),

# Regressor
('model_rf', RandomForestRegressor(n_estimators = config.model_config.n_estimators,
max_depth = config.model_config.max_depth,
random_state = config.model_config.random_state))

])
bikeshare_pipe = Pipeline(
[
######### Imputation ###########
(
"weekday_imputation",
WeekdayImputer(
variable=config.model_config.weekday_var,
date_var=config.model_config.date_var,
),
),
(
"weathersit_imputation",
WeathersitImputer(variable=config.model_config.weathersit_var),
),
######### Mapper ###########
(
"map_yr",
Mapper(
variable=config.model_config.yr_var,
mappings=config.model_config.yr_mappings,
),
),
(
"map_mnth",
Mapper(
variable=config.model_config.mnth_var,
mappings=config.model_config.mnth_mappings,
),
),
(
"map_season",
Mapper(
variable=config.model_config.season_var,
mappings=config.model_config.season_mappings,
),
),
(
"map_weathersit",
Mapper(
variable=config.model_config.weathersit_var,
mappings=config.model_config.weathersit_mappings,
),
),
(
"map_holiday",
Mapper(
variable=config.model_config.holiday_var,
mappings=config.model_config.holiday_mappings,
),
),
(
"map_workingday",
Mapper(
variable=config.model_config.workingday_var,
mappings=config.model_config.workingday_mappings,
),
),
(
"map_hr",
Mapper(
variable=config.model_config.hr_var,
mappings=config.model_config.hr_mappings,
),
),
######## Handle outliers ########
("handle_outliers_temp", OutlierHandler(variable=config.model_config.temp_var)),
(
"handle_outliers_atemp",
OutlierHandler(variable=config.model_config.atemp_var),
),
("handle_outliers_hum", OutlierHandler(variable=config.model_config.hum_var)),
(
"handle_outliers_windspeed",
OutlierHandler(variable=config.model_config.windspeed_var),
),
######## One-hot encoding ########
(
"encode_weekday",
WeekdayOneHotEncoder(variable=config.model_config.weekday_var),
),
# Scale features
("scaler", StandardScaler()),
# Regressor
(
"model_rf",
RandomForestRegressor(
n_estimators=config.model_config.n_estimators,
max_depth=config.model_config.max_depth,
random_state=config.model_config.random_state,
),
),
]
)
46 changes: 30 additions & 16 deletions bikeshare_model/predict.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
from pathlib import Path

file = Path(__file__).resolve()
parent, root = file.parent, file.parents[1]
sys.path.append(str(root))
Expand All @@ -11,37 +12,50 @@
from bikeshare_model import __version__ as _version
from bikeshare_model.config.core import config
from bikeshare_model.processing.data_manager import load_pipeline
from bikeshare_model.processing.data_manager import pre_pipeline_preparation
from bikeshare_model.processing.validation import validate_inputs


pipeline_file_name = f"{config.app_config.pipeline_save_file}{_version}.pkl"
bikeshare_pipe = load_pipeline(file_name = pipeline_file_name)
bikeshare_pipe = load_pipeline(file_name=pipeline_file_name)


def make_prediction(*, input_data: Union[pd.DataFrame, dict]) -> dict:
"""Make a prediction using a saved model """
validated_data, errors = validate_inputs(input_df = pd.DataFrame(input_data))
#validated_data = validated_data.reindex(columns = ['dteday', 'season', 'hr', 'holiday', 'weekday', 'workingday',
"""Make a prediction using a saved model"""

validated_data, errors = validate_inputs(input_df=pd.DataFrame(input_data))

# validated_data = validated_data.reindex(columns = ['dteday', 'season', 'hr', 'holiday', 'weekday', 'workingday',
# 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'yr', 'mnth'])
validated_data = validated_data.reindex(columns = config.model_config.features)
validated_data = validated_data.reindex(columns=config.model_config.features)

results = {"predictions": None, "version": _version, "errors": errors}

if not errors:
predictions = bikeshare_pipe.predict(validated_data)
results = {"predictions": np.floor(predictions), "version": _version, "errors": errors}
results = {
"predictions": np.floor(predictions),
"version": _version,
"errors": errors,
}
print(results)

return results



if __name__ == "__main__":

data_in = {'dteday': ['2012-11-6'], 'season': ['winter'], 'hr': ['6pm'], 'holiday': ['No'], 'weekday': ['Tue'],
'workingday': ['Yes'], 'weathersit': ['Clear'], 'temp': [16], 'atemp': [17.5], 'hum': [30], 'windspeed': [10]}

make_prediction(input_data = data_in)
data_in = {
"dteday": ["2012-11-6"],
"season": ["winter"],
"hr": ["6pm"],
"holiday": ["No"],
"weekday": ["Tue"],
"workingday": ["Yes"],
"weathersit": ["Clear"],
"temp": [16],
"atemp": [17.5],
"hum": [30],
"windspeed": [10],
}

make_prediction(input_data=data_in)
Loading

0 comments on commit ac6fabf

Please sign in to comment.