From eae351e1e065d4478b23a907dd0dc8fb7fc71e90 Mon Sep 17 00:00:00 2001
From: TuanNguyen27 <anhtuan277@gmail.com>
Date: Mon, 11 Jul 2016 12:07:42 -0400
Subject: [PATCH] Cleaned older tests and added testing for custom metrics

Fixed older tests by adding hard-coded result arrays for testing fit
and fit_transform.

Added custom metrics by importing accuracy_score, zero_one_loss from
sklearn
---
 mdr/mdr.py |  18 +++++----
 tests.py   | 109 ++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 93 insertions(+), 34 deletions(-)

diff --git a/mdr/mdr.py b/mdr/mdr.py
index 50454e3..863736e 100644
--- a/mdr/mdr.py
+++ b/mdr/mdr.py
@@ -130,7 +130,7 @@ def fit_transform(self, features, classes):
         self.fit(features, classes)
         return self.transform(features)
 
-    def score(self, features, classes, add_score = False):
+    def score(self, features, classes, add_score = None):
         """Estimates the accuracy of the predictions from the constructed feature
         #pass in another param to customize scoring metrics 
         Parameters
@@ -146,16 +146,18 @@ def score(self, features, classes, add_score = False):
             The estimated accuracy based on the constructed feature
 
         """
-        if add_score:
-            #import some kind of scoring metric from sklearn? 
-            return 
+        
         if len(self.feature_map) == 0:
             raise ValueError('fit not called properly')
+
         new_feature = self.transform(features)
-        results = (new_feature == classes)
-        score = np.sum(results) 
-        accuracy_score = float(score)/classes.size 
-        return accuracy_score
+
+        if add_score == None:
+            results = (new_feature == classes)
+            score = np.sum(results)
+            return float(score)/classes.size 
+        else:
+            return add_score(classes, new_feature) #might have to specify additional params, depending on the metrics in use
 
 def main():
     """Main function that is called when MDR is run on the command line"""
diff --git a/tests.py b/tests.py
index b8ff713..436cbb9 100644
--- a/tests.py
+++ b/tests.py
@@ -9,12 +9,12 @@
 import random
 import warnings
 import inspect
-from sklearn.cross_validation import train_test_split
+from sklearn.metrics import accuracy_score, zero_one_loss
 
 def test_init():
     """Ensure that the MDR instantiator stores the MDR variables properly"""
 
-    mdr_obj = MDR() #change this or create a second test 
+    mdr_obj = MDR() 
 
     assert mdr_obj.tie_break == 0
     assert mdr_obj.default_label == 0
@@ -25,8 +25,8 @@ def test_init():
     assert mdr_obj2.tie_break == 1 
     assert mdr_obj2.default_label == 2
 
-
 def test_fit():
+	"""Ensure that the MDR 'fit' method constructs the right matrix to count each class, as well as the right map from feature instances to labels"""
 	features = np.array([   [2,	0],
 							[0,	0],
 							[0,	1],
@@ -69,7 +69,13 @@ def test_fit():
 	assert mdr.feature_map[(1,1)] == 0
 	assert mdr.feature_map[(0,1)] == 1
 
+# 2 0 count: 1 label 1; maps to 1 
+# 0 0 count: 3 label 0; 6 label 1; maps to 0 *tie_break*
+# 1 1 count: 2 label 0; maps to 0 
+# 0 1 count: 3 label 1; maps to 1 
+
 def test_transform():
+	"""Ensure that the MDR 'transform' method maps a new set of feature instances to the desired labels"""
 	features = np.array([   [2,	0],
 							[0,	0],
 							[0,	1],
@@ -107,33 +113,84 @@ def test_transform():
 								[0,	0]])
 
 	new_features = mdr.transform(test_features)
-	for row_i in range(test_features.shape[0]):
-		assert new_features[row_i] == mdr.feature_map[tuple(test_features[row_i])]
-	assert new_features[0] == mdr.default_label
-	assert new_features[13] == mdr.default_label
+	assert np.array_equal(new_features, [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0])
+
+# 2 0 count: 1 label 1; maps to 1 
+# 0 0 count: 3 label 0; 6 label 1; maps to 0 *tie_break*
+# 1 1 count: 2 label 0; maps to 0 
+# 0 1 count: 3 label 1; maps to 1 
 
 def test_fit_transform():
-	features = np.array([   [2,	0],
-							[0,	0],
-							[0,	1],
-							[0,	0],
-							[0,	0],
-							[0,	0],
-							[0,	1],
-							[0,	0],
-							[0,	0],
-							[0,	1],
-							[0,	0],
-							[0,	0],
-							[0,	0],
-							[1,	1],
-							[1,	1]])
+	"""Ensure that the MDR 'fit_transform' method combines both fit and transform, and produces the right predicted labels"""
+	features = np.array([[2,0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	0],
+						[1,	1],
+						[1,	1]])
 
 	classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
 
 	mdr = MDR() 
 	new_features = mdr.fit_transform(features, classes)
-	for row_i in range(new_features.shape[0]):
-		assert new_features[row_i] == mdr.feature_map[tuple(features[row_i])]
-	assert new_features[0] == 1
-	assert new_features[13] == 0
+	assert np.array_equal(new_features, [1,0,1,0,0,0,1,0,0,1,0,0,0,0,0])
+
+def test_score():
+	"""Ensure that the MDR 'score' method outputs the right default score, as well as the right custom metric if specified"""
+	features = np.array([[2,0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	0],
+						[1,	1],
+						[1,	1]])
+
+	classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
+
+	mdr = MDR() 
+	mdr.fit(features, classes)
+	assert mdr.score(features, classes)	== 9./15
+
+def test_custom_score(): 
+	"""Ensure that the MDR 'score' method outputs the right custom score passed in from the user"""
+	features = np.array([[2,0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	1],
+						[0,	0],
+						[0,	0],
+						[0,	0],
+						[1,	1],
+						[1,	1]])
+
+	classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
+
+	mdr = MDR() 
+	mdr.fit(features, classes)
+	assert mdr.score(features = features, classes = classes, add_score = accuracy_score) == 9./15
+	assert mdr.score(features = features, classes = classes, add_score = zero_one_loss) == 1 - 9./15	
+	#Note: have not handled the case where there are extra params to specify for custom scores. 
+