Skip to content

Commit

Permalink
#68 documenting model classes and functions
Browse files Browse the repository at this point in the history
  • Loading branch information
alvesisaque committed Feb 7, 2023
1 parent bd6419b commit 42b153f
Showing 1 changed file with 34 additions and 2 deletions.
36 changes: 34 additions & 2 deletions hygia/data_pipeline/model/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,25 @@
from sklearn.metrics import precision_score

class RandomForestModel:
"""
This class presents the model Random Forest, allowing train and predict the model
Examples
--------
Use this class like this:
.. code-block:: python
new_rf_model = hg.RandomForestModel()
clf, scores = new_rf_model.train_and_get_scores(df, concatened_column_name, all_features_columns)
scores
"""
def __init__(self, model_file=None, n_estimators=100, max_depth=None, random_state=0):
"""
Initialize the RandomForestModel class.
:param model_file: Path to the model file
:type model_file: path
"""
self.model = None
if model_file is not None:
with open(model_file, 'rb') as f:
Expand All @@ -21,7 +39,19 @@ def __init__(self, model_file=None, n_estimators=100, max_depth=None, random_sta
self.random_state = random_state
self.model = RandomForestClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, random_state=self.random_state)

def train_and_get_scores(self, df, concatened_column_name, all_features_columns, test_size=0.3):
def train_and_get_scores(self, df, concatened_column_name, all_features_columns, test_size=0.3):
"""
Train and get scores for the model execution.
:param df: Dataframe with the data.
:type df: pandas.DataFrame
:param concatened_column_name: Column name
:type concatened_column_name: str
:param all_features_columns: List of all features column nales
:type all_features_columns: List
"""
print(f'{Fore.YELLOW}tranning model...{Fore.WHITE}')

df_not_duplicates = df.drop_duplicates(subset=[concatened_column_name])
Expand Down Expand Up @@ -64,8 +94,10 @@ def train_and_get_scores(self, df, concatened_column_name, all_features_columns,
}

return clf, scores

def predict(self, X):
"""
Predict the model result
"""
print(f'{Fore.YELLOW}running model...{Fore.WHITE}')
return self.model.predict(X)

0 comments on commit 42b153f

Please sign in to comment.