From eae351e1e065d4478b23a907dd0dc8fb7fc71e90 Mon Sep 17 00:00:00 2001 From: TuanNguyen27 Date: Mon, 11 Jul 2016 12:07:42 -0400 Subject: [PATCH] Cleaned older tests and added testing for custom metrics Fixed older tests by adding hard-coded result arrays for testing fit and fit_transform. Added custom metrics by importing accuracy_score, zero_one_loss from sklearn --- mdr/mdr.py | 18 +++++---- tests.py | 109 ++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 93 insertions(+), 34 deletions(-) diff --git a/mdr/mdr.py b/mdr/mdr.py index 50454e3..863736e 100644 --- a/mdr/mdr.py +++ b/mdr/mdr.py @@ -130,7 +130,7 @@ def fit_transform(self, features, classes): self.fit(features, classes) return self.transform(features) - def score(self, features, classes, add_score = False): + def score(self, features, classes, add_score = None): """Estimates the accuracy of the predictions from the constructed feature #pass in another param to customize scoring metrics Parameters @@ -146,16 +146,18 @@ def score(self, features, classes, add_score = False): The estimated accuracy based on the constructed feature """ - if add_score: - #import some kind of scoring metric from sklearn? - return + if len(self.feature_map) == 0: raise ValueError('fit not called properly') + new_feature = self.transform(features) - results = (new_feature == classes) - score = np.sum(results) - accuracy_score = float(score)/classes.size - return accuracy_score + + if add_score == None: + results = (new_feature == classes) + score = np.sum(results) + return float(score)/classes.size + else: + return add_score(classes, new_feature) #might have to specify additional params, depending on the metrics in use def main(): """Main function that is called when MDR is run on the command line""" diff --git a/tests.py b/tests.py index b8ff713..436cbb9 100644 --- a/tests.py +++ b/tests.py @@ -9,12 +9,12 @@ import random import warnings import inspect -from sklearn.cross_validation import train_test_split +from sklearn.metrics import accuracy_score, zero_one_loss def test_init(): """Ensure that the MDR instantiator stores the MDR variables properly""" - mdr_obj = MDR() #change this or create a second test + mdr_obj = MDR() assert mdr_obj.tie_break == 0 assert mdr_obj.default_label == 0 @@ -25,8 +25,8 @@ def test_init(): assert mdr_obj2.tie_break == 1 assert mdr_obj2.default_label == 2 - def test_fit(): + """Ensure that the MDR 'fit' method constructs the right matrix to count each class, as well as the right map from feature instances to labels""" features = np.array([ [2, 0], [0, 0], [0, 1], @@ -69,7 +69,13 @@ def test_fit(): assert mdr.feature_map[(1,1)] == 0 assert mdr.feature_map[(0,1)] == 1 +# 2 0 count: 1 label 1; maps to 1 +# 0 0 count: 3 label 0; 6 label 1; maps to 0 *tie_break* +# 1 1 count: 2 label 0; maps to 0 +# 0 1 count: 3 label 1; maps to 1 + def test_transform(): + """Ensure that the MDR 'transform' method maps a new set of feature instances to the desired labels""" features = np.array([ [2, 0], [0, 0], [0, 1], @@ -107,33 +113,84 @@ def test_transform(): [0, 0]]) new_features = mdr.transform(test_features) - for row_i in range(test_features.shape[0]): - assert new_features[row_i] == mdr.feature_map[tuple(test_features[row_i])] - assert new_features[0] == mdr.default_label - assert new_features[13] == mdr.default_label + assert np.array_equal(new_features, [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]) + +# 2 0 count: 1 label 1; maps to 1 +# 0 0 count: 3 label 0; 6 label 1; maps to 0 *tie_break* +# 1 1 count: 2 label 0; maps to 0 +# 0 1 count: 3 label 1; maps to 1 def test_fit_transform(): - features = np.array([ [2, 0], - [0, 0], - [0, 1], - [0, 0], - [0, 0], - [0, 0], - [0, 1], - [0, 0], - [0, 0], - [0, 1], - [0, 0], - [0, 0], - [0, 0], - [1, 1], - [1, 1]]) + """Ensure that the MDR 'fit_transform' method combines both fit and transform, and produces the right predicted labels""" + features = np.array([[2,0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 0], + [1, 1], + [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() new_features = mdr.fit_transform(features, classes) - for row_i in range(new_features.shape[0]): - assert new_features[row_i] == mdr.feature_map[tuple(features[row_i])] - assert new_features[0] == 1 - assert new_features[13] == 0 + assert np.array_equal(new_features, [1,0,1,0,0,0,1,0,0,1,0,0,0,0,0]) + +def test_score(): + """Ensure that the MDR 'score' method outputs the right default score, as well as the right custom metric if specified""" + features = np.array([[2,0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 0], + [1, 1], + [1, 1]]) + + classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) + + mdr = MDR() + mdr.fit(features, classes) + assert mdr.score(features, classes) == 9./15 + +def test_custom_score(): + """Ensure that the MDR 'score' method outputs the right custom score passed in from the user""" + features = np.array([[2,0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 1], + [0, 0], + [0, 0], + [0, 0], + [1, 1], + [1, 1]]) + + classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) + + mdr = MDR() + mdr.fit(features, classes) + assert mdr.score(features = features, classes = classes, add_score = accuracy_score) == 9./15 + assert mdr.score(features = features, classes = classes, add_score = zero_one_loss) == 1 - 9./15 + #Note: have not handled the case where there are extra params to specify for custom scores. +