Skip to content

Commit

Permalink
(#105) Complementing tests to random forest
Browse files Browse the repository at this point in the history
  • Loading branch information
AnHoff committed Jul 14, 2023
1 parent 66a363a commit 6c45ff9
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
6 changes: 4 additions & 2 deletions tests/data_pipeline/augment_data/test_augment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
('00000', False),
('00001', False)
])

class TestAugmentData:
def setup_method(self):
self.augment_data = AugmentData(country="MEXICO")
Expand All @@ -22,10 +23,11 @@ def test_validate_zipcode(self, zipcode, expected):
def test_validate_zipcodes(self, zipcode, expected):
df = pd.DataFrame({'zipcode': [zipcode]})
result = self.augment_data.validate_zipcodes(df, 'zipcode')
assert result.equals(pd.DataFrame({'zipcode_is_valid': [expected]}))
expected_result = pd.DataFrame({'zipcode_is_valid': [expected]})
assert result.equals(expected_result)

def test_augment_data(self, zipcode, expected):
df = pd.DataFrame({'zipcode': [zipcode]})
result = self.augment_data.augment_data(df, 'zipcode')
expected_result = pd.DataFrame({'zipcode': [zipcode], 'zipcode_is_valid': [expected]})
assert result.equals(expected_result)
assert result.equals(expected_result)
18 changes: 18 additions & 0 deletions tests/data_pipeline/model/test_random_forest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelEncoder
from hygia import RandomForestModel

class TestRandomForestModel:
Expand All @@ -18,3 +19,20 @@ def test_random_forest_model(self):
assert scores['precision'] >= 0.0 and scores['precision'] <= 1
assert scores['recall'] >= 0.0 and scores['recall'] <= 1
assert scores['f1'] >= 0.0 and scores['f1'] <= 1

def test_predict(self):
X, _ = make_classification(n_samples=100, n_features=20, random_state=42)
columns = ['feature_'+str(i) for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=columns)
df['target'] = ['valid'] * len(df)
df.loc[0, 'target'] = 'key_smash'

model = RandomForestModel(normalize=False)
label_encoder = LabelEncoder()
df['target_encoded'] = label_encoder.fit_transform(df['target'])

model.train_and_get_scores(df, 'target_encoded', columns)

result = model.predict(df[columns], 'target_encoded')

assert len(result) == len(df)

0 comments on commit 6c45ff9

Please sign in to comment.