(#105) Complementing tests to random forest

hygia-org · Jul 14, 2023 · 6c45ff9 · 6c45ff9
1 parent 66a363a
commit 6c45ff9
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 2 deletions.
diff --git a/tests/data_pipeline/augment_data/test_augment_data.py b/tests/data_pipeline/augment_data/test_augment_data.py
@@ -12,6 +12,7 @@
  ('00000', False),
  ('00001', False)
 ])
+
 class TestAugmentData:
  def setup_method(self):
  self.augment_data = AugmentData(country="MEXICO")
@@ -22,10 +23,11 @@ def test_validate_zipcode(self, zipcode, expected):
  def test_validate_zipcodes(self, zipcode, expected):
  df = pd.DataFrame({'zipcode': [zipcode]})
  result = self.augment_data.validate_zipcodes(df, 'zipcode')
- assert result.equals(pd.DataFrame({'zipcode_is_valid': [expected]}))
+ expected_result = pd.DataFrame({'zipcode_is_valid': [expected]})
+ assert result.equals(expected_result)
 
  def test_augment_data(self, zipcode, expected):
  df = pd.DataFrame({'zipcode': [zipcode]})
  result = self.augment_data.augment_data(df, 'zipcode')
  expected_result = pd.DataFrame({'zipcode': [zipcode], 'zipcode_is_valid': [expected]})
- assert result.equals(expected_result)
+ assert result.equals(expected_result)
diff --git a/tests/data_pipeline/model/test_random_forest.py b/tests/data_pipeline/model/test_random_forest.py
@@ -1,5 +1,6 @@
 import pandas as pd
 from sklearn.datasets import make_classification
+from sklearn.preprocessing import LabelEncoder
 from hygia import RandomForestModel
 
 class TestRandomForestModel:
@@ -18,3 +19,20 @@ def test_random_forest_model(self):
  assert scores['precision'] >= 0.0 and scores['precision'] <= 1
  assert scores['recall'] >= 0.0 and scores['recall'] <= 1
  assert scores['f1'] >= 0.0 and scores['f1'] <= 1
+
+ def test_predict(self):
+ X, _ = make_classification(n_samples=100, n_features=20, random_state=42)
+ columns = ['feature_'+str(i) for i in range(X.shape[1])]
+ df = pd.DataFrame(X, columns=columns)
+ df['target'] = ['valid'] * len(df)
+ df.loc[0, 'target'] = 'key_smash'
+
+ model = RandomForestModel(normalize=False)
+ label_encoder = LabelEncoder()
+ df['target_encoded'] = label_encoder.fit_transform(df['target'])
+
+ model.train_and_get_scores(df, 'target_encoded', columns)
+
+ result = model.predict(df[columns], 'target_encoded')
+
+ assert len(result) == len(df)