Merge pull request #56 from heartexlabs/DEV-1914

fix: DEV-1914: Compositional labeling config takes account of metric selected
HumanSignal · Mar 30, 2022 · f3b7797 · f3b7797
2 parents 3ea4b21 + 2039b52
commit f3b7797
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 10 deletions.
diff --git a/evalme/metrics.py b/evalme/metrics.py
@@ -26,7 +26,6 @@ class MetricWrapper(object):
     tags = attr.ib(default=[])
 
 
-
 class Metrics(object):
 
     _metrics = {}
@@ -55,9 +54,17 @@ def get_default_metric_for_tag(cls, tag):
 
     @classmethod
     def get_default_metric_for_name_tag(cls, tag, name):
+        """
+        Get default metric by tag and name
+        :param tag: Tag name
+        :param name: Metric name
+        :return: Metric object
+        """
         metric = cls._metrics.get(name)
         if (metric is not None) and ((metric.tag == tag) or (metric.tag == 'all')):
             return metric
+        elif (metric is not None) and (metric.tag != tag):
+            return None
         else:
             return cls.get_default_metric_for_tag(tag)
 
@@ -172,7 +179,7 @@ def symmetrize(a, b):
                 if not matching_func:
                     logger.error(f'No matching function found for control type {control_type} in {project}.'
                                  f'Using naive calculation.')
-                    matching_func = cls._metrics.get('naive')
+                    continue
                 # identify if label config need
                 func_args = inspect.getfullargspec(matching_func.func)
                 if 'label_config' in func_args[0]:
@@ -271,10 +278,11 @@ def group(cls, project, results, threshold=0.0, use_single_linkage_by_default=Fa
 
 Metrics.register(
     name='naive',
-    form=None,
+    form='empty_form',
     tag='all',
     func=naive,
-    desc='Naive comparison of result dict'
+    desc='Naive comparison of result dict',
+    tags=['all']
 )
 
 

diff --git a/evalme/tests/test_classification.py b/evalme/tests/test_classification.py
@@ -2,6 +2,8 @@
 
 from evalme.classification import ClassificationEvalItem, ChoicesEvalItem, naive
 
+from evalme.metrics import Metrics
+
 
 @pytest.mark.ClassificationEvalItem
 def test_not_matching():
@@ -206,6 +208,7 @@ def test_naive_matching():
             }
         ]]
     assert naive(test_data[0], test_data[1]) == 1
+    assert Metrics.apply({}, test_data[0], test_data[1], metric_name='naive') == 1
 
 
 def test_naive_matching_per_label():
@@ -230,6 +233,7 @@ def test_naive_matching_per_label():
             }
         ]]
     assert naive(test_data[0], test_data[1], per_label=True) == {"Accessories\\1\\2": 1}
+    assert Metrics.apply({}, test_data[0], test_data[1], metric_name='naive', per_label=True) == {"Accessories\\1\\2": 1}
 
 
 def test_naive_not_matching():
@@ -254,6 +258,7 @@ def test_naive_not_matching():
             }
         ]]
     assert naive(test_data[0], test_data[1]) == 0
+    assert Metrics.apply({}, test_data[0], test_data[1], metric_name='naive') == 0
 
 
 def test_naive_not_matching_per_label():
@@ -278,3 +283,4 @@ def test_naive_not_matching_per_label():
             }
         ]]
     assert naive(test_data[0], test_data[1], per_label=True) == {"Accessories1": 0}
+    assert Metrics.apply({}, test_data[0], test_data[1], metric_name='naive', per_label=True) == {"Accessories1": 0, "Accessories2": 0}
diff --git a/evalme/tests/test_matcher.py b/evalme/tests/test_matcher.py
@@ -56,6 +56,6 @@ def test_get_agreement_per_label():
                  "origin": "manual", "to_name": "text", "from_name": "ner"}]
 
     t1 = get_agreement(item_old, item_new, per_label=True)
-    assert t1[1] == {'No-label': 0.0, 'Title': 0.0}
+    assert t1[1] == {}
     t2 = get_agreement(item_new, item_old, per_label=True)
-    assert t2[1] == {'No-label': 0.0, 'Title': 0.0}
+    assert t2[1] == {}
diff --git a/evalme/tests/test_metrics.py b/evalme/tests/test_metrics.py
@@ -75,13 +75,13 @@ def test_config_with_2_control_types():
     """
     Test Metrics apply with different control types
     """
-    result_of_type1_1 = {"from_name": "image",
+    result_of_type1_1 = {"from_name": "image1",
               "type": "polygonlabels",
               "value": {
                   "points": [[1, 1], [1, 20], [20, 20], [20, 1]],
                   "polygonlabels": ["Engine"]
               }}
-    result_of_type1_2 = {"from_name": "image",
+    result_of_type1_2 = {"from_name": "image1",
               "type": "polygonlabels",
               "value": {
                   "points": [[1, 1], [1, 20], [20, 20], [20, 1]],
@@ -104,4 +104,4 @@ def test_config_with_2_control_types():
     combined_result = Metrics.apply({}, combined_1, combined_2)
     assert r1 == 1
     assert r2 == 0.0
-    assert combined_result == 0.5
+    assert combined_result == 1
diff --git a/evalme/tests/test_old_format.py b/evalme/tests/test_old_format.py
@@ -7,7 +7,7 @@ def test_old_format_agreement_matrix():
 
     matrix = m.get_annotations_agreement()
     assert matrix is not None
-    assert matrix > 0
+    assert matrix == 0.0
 
 
 def test_old_format_load():