#29 adds files for end-to-end test run

namiyousef · Mar 31, 2022 · af70c60 · af70c60
1 parent 3b4319e
commit af70c60
Show file tree

Hide file tree

Showing 3 changed files with 407 additions and 25 deletions.
diff --git a/argminer/data.py b/argminer/data.py
@@ -64,6 +64,8 @@ def __init__(self, df_label_map, df_text, tokenizer, max_length, strategy, is_tr
             id_: label for label, id_ in self.label_to_id.items()
         }
 
+        self.reduce_map = self.get_reduce_map()
+
         self.targets = df_text.labels.apply(
             lambda x: [self.label_to_id[label] for label in x]
         ).values
@@ -107,8 +109,9 @@ def __getitem__(self, index):
         targets = labeller(targets, word_id_mask, word_ids_filtered)
         targets = torch.as_tensor(targets, dtype=torch.long)
 
-        inputs['word_ids'] = word_ids_replaced
-        inputs['index'] = torch.as_tensor(index)
+        if not self.is_train:
+            inputs['word_ids'] = word_ids_replaced
+            inputs['index'] = torch.as_tensor(index)
 
         # for training, no need to return word_ids, or word_id_mask
         # for validation and testing, there is a need to return them!
@@ -162,6 +165,19 @@ def _label_standard(self, targets, word_id_mask, word_ids):
         expanded_targets[word_id_mask] = expanded_targets_with_mask
         return expanded_targets
 
+    def get_reduce_map(self):
+        reduce_map = {}
+        label_to_id = {}
+        for id_, label in self.id_to_label.items():
+            if label not in ['X', 'O']:
+                label = label.split('-')[1]
+            reduce_map[id_] = label  # uses dict to preserve order
+            label_to_id[label] = label
+        label_to_id = {label: i for i, label in enumerate(label_to_id)}
+        reduce_map = {i: label_to_id[label] for i, label in reduce_map.items()}
+        return reduce_map
+
+
 
 class KaggleDataset(Dataset):
     """
@@ -320,6 +336,8 @@ def postprocess(self):
             raise Exception('Cannot run postprocess before running process')
         elif self.status != 'processed':
             raise Exception('Postprocess method has already been called.')
+        # TODO this is a BUG FIX
+        return self._postprocess
 
     def get_train_data(self):
         if self.status == 'postprocessed':

diff --git a/argminer/evaluation.py b/argminer/evaluation.py
@@ -1,24 +1,10 @@
+# -- public imports
 import torch
 import pandas as pd
 
-def test(model, testloader, collect_predictions=False):
-
-    model.eval()
-    with torch.no_grad():
-        for i, (inputs, targets) in enumerate(testloader):
-            # TODO need to extract relevant items from inputs
-            # TODO move inputs and model to relevant devices
-            loss, outputs = model(
-                labels=targets,
-                input_ids=inputs['input_ids'],
-                attention_mask=inputs['attention_mask'],
-                return_dict=False
-            )
-            if collect_predictions:
-                pass
-
-    return predictions
-
+# -- private imports
+from colabtools.config import DEVICE
+from colabtools.utils import move_to_device
 
 def get_word_labels(inputs, outputs, agg_strategy, has_x):
     """
@@ -37,7 +23,8 @@ def get_word_labels(inputs, outputs, agg_strategy, has_x):
 
         unique_word_ids, word_id_counts = torch.unique_consecutive(word_ids, return_counts=True)
         agg_predictions = torch.zeros(
-            (unique_word_ids.shape[0], predictions.shape[-1]),
+            # TODo the len() is a hotfix!
+            (unique_word_ids.shape[0], predictions.shape[-1] if len(predictions.shape) > 1 else 1), # for targets, you don't need the second dim?
             dtype=predictions.dtype
         )
 
@@ -152,8 +139,6 @@ def evaluate(df_outputs, df_targets):
 
     matched_gt_id = df_targets[df_targets['tp'] == 1][gt_id].unique()
 
-    print(df_targets[df_targets[gt_id].isin(matched_gt_id)].sort_values('class'))
-
     df_false_negative = df_targets[
         # (df_targets['tp'] == 0) &
         (~df_targets.set_index(gt_id).index.isin(matched_gt_id))
@@ -169,6 +154,82 @@ def evaluate(df_outputs, df_targets):
         FN.merge(FP, how='outer'), how='outer'
     ).fillna(0)
     scores = scores.assign(f1=lambda x: x['tp'] / (x['tp'] + 1 / 2 * (x['fp'] + x['fn'])))
-    print(scores)
 
-    return scores['f1'].mean()
+    return scores
+
+def inference(model, testloader, metrics=[]):
+    # TODO add options for agg method
+    """
+    Takes a trained model and evaluates it's performance
+    assumes that model and data on the same device!
+    """
+    model.eval()
+    model.to(DEVICE)
+
+    reduce_map = testloader.dataset.reduce_map
+    reduce_map_values = torch.as_tensor(list(reduce_map.values()))
+    reduce_map_values = move_to_device(reduce_map_values, DEVICE)
+
+    total_metrics = []
+    total_scores = []
+
+    with torch.no_grad():
+        for i, (inputs, targets) in enumerate(testloader):
+
+
+            inputs = move_to_device(inputs, DEVICE)
+            targets = move_to_device(targets, DEVICE)
+            loss, outputs = model(
+                labels=targets,
+                input_ids=inputs['input_ids'],
+                attention_mask=inputs['attention_mask'],
+                return_dict=False
+            )
+            df_metrics_no_agg = pd.DataFrame.from_records({
+                f'{metric.__class__.__name__}_no_agg': metric(outputs, targets) for metric in metrics
+            })
+
+            word_ids = inputs['word_ids']
+            doc_ids = inputs['index']
+
+            word_label_probas = get_word_labels(word_ids, outputs, agg_strategy='first', has_x=False)
+            word_label_ids = [tensor.argmax(dim=1) for tensor in word_label_probas]
+
+            target_label_probas = get_word_labels(word_ids, targets, agg_strategy='first', has_x=False)
+            # TODO this is a hotfix. Need an automatic dimensioning tool!
+            target_label_ids = [tensor.flatten() for tensor in target_label_probas]
+
+            df_metrics_agg = pd.DataFrame.from_records({
+                f'{metric.__class__.__name__}_agg': [
+                    metric(output, target).item() for output, target in zip(word_label_ids, target_label_ids)
+                ] for metric in metrics
+            })
+
+            word_labels = [reduce_map_values[label_ids] for label_ids in word_label_ids]
+            target_labels = [reduce_map_values[label_ids] for label_ids in target_label_ids]
+
+            df_metrics_agg_reduced = pd.DataFrame.from_records({
+                f'{metric.__class__.__name__}_agg_reduced': [
+                    metric(output, target).item() for output, target in zip(word_labels, target_labels)
+                ] for metric in metrics
+            })
+
+
+            df_targets_predString = get_predictionString(target_labels, doc_ids)
+            df_outputs_predString = get_predictionString(word_labels, doc_ids)
+
+            # TODO evaluate might fail on string inputs, make sure it doesn't
+            df_metrics = pd.concat([df_metrics_no_agg, df_metrics_agg, df_metrics_agg_reduced], axis=1)
+            df_scores = evaluate(df_outputs_predString, df_targets_predString)
+
+            total_metrics.append(df_metrics)
+            total_scores.append(df_scores)
+
+    df_metrics_total = pd.concat(total_metrics)
+    df_scores_total = pd.concat(total_scores)
+
+    return df_metrics_total, df_scores_total
+
+            # maybe need to concat df_scores as well?
+
+