From 05e2bc9d26e5109e6d731ded74481924fc3cd60e Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Tue, 3 Nov 2020 22:10:42 +0800
Subject: [PATCH 1/9] add dice evaluation metric

---
 mmseg/core/evaluation/__init__.py           |   5 +-
 mmseg/core/evaluation/mean_iou.py           |  74 ---------
 mmseg/core/evaluation/metrics.py            | 164 ++++++++++++++++++++
 mmseg/datasets/custom.py                    |  48 +++---
 tests/test_data/test_dataset.py             |  16 +-
 tests/{test_mean_iou.py => test_metrics.py} |  51 +++++-
 6 files changed, 251 insertions(+), 107 deletions(-)
 delete mode 100644 mmseg/core/evaluation/mean_iou.py
 create mode 100644 mmseg/core/evaluation/metrics.py
 rename tests/{test_mean_iou.py => test_metrics.py} (55%)

diff --git a/mmseg/core/evaluation/__init__.py b/mmseg/core/evaluation/__init__.py
index f169d1bf1b..7a5f1e0154 100644
--- a/mmseg/core/evaluation/__init__.py
+++ b/mmseg/core/evaluation/__init__.py
@@ -1,7 +1,8 @@
 from .class_names import get_classes, get_palette
 from .eval_hooks import DistEvalHook, EvalHook
-from .mean_iou import mean_iou
+from .metrics import mean_dice, mean_iou, metrics
 
 __all__ = [
-    'EvalHook', 'DistEvalHook', 'mean_iou', 'get_classes', 'get_palette'
+    'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'metrics',
+    'get_classes', 'get_palette'
 ]
diff --git a/mmseg/core/evaluation/mean_iou.py b/mmseg/core/evaluation/mean_iou.py
deleted file mode 100644
index 301cfd04fb..0000000000
--- a/mmseg/core/evaluation/mean_iou.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import numpy as np
-
-
-def intersect_and_union(pred_label, label, num_classes, ignore_index):
-    """Calculate intersection and Union.
-
-    Args:
-        pred_label (ndarray): Prediction segmentation map
-        label (ndarray): Ground truth segmentation map
-        num_classes (int): Number of categories
-        ignore_index (int): Index that will be ignored in evaluation.
-
-     Returns:
-         ndarray: The intersection of prediction and ground truth histogram
-             on all classes
-         ndarray: The union of prediction and ground truth histogram on all
-             classes
-         ndarray: The prediction histogram on all classes.
-         ndarray: The ground truth histogram on all classes.
-    """
-
-    mask = (label != ignore_index)
-    pred_label = pred_label[mask]
-    label = label[mask]
-
-    intersect = pred_label[pred_label == label]
-    area_intersect, _ = np.histogram(
-        intersect, bins=np.arange(num_classes + 1))
-    area_pred_label, _ = np.histogram(
-        pred_label, bins=np.arange(num_classes + 1))
-    area_label, _ = np.histogram(label, bins=np.arange(num_classes + 1))
-    area_union = area_pred_label + area_label - area_intersect
-
-    return area_intersect, area_union, area_pred_label, area_label
-
-
-def mean_iou(results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None):
-    """Calculate Intersection and Union (IoU)
-
-    Args:
-        results (list[ndarray]): List of prediction segmentation maps
-        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps
-        num_classes (int): Number of categories
-        ignore_index (int): Index that will be ignored in evaluation.
-        nan_to_num (int, optional): If specified, NaN values will be replaced
-            by the numbers defined by the user. Default: None.
-
-     Returns:
-         float: Overall accuracy on all images.
-         ndarray: Per category accuracy, shape (num_classes, )
-         ndarray: Per category IoU, shape (num_classes, )
-    """
-
-    num_imgs = len(results)
-    assert len(gt_seg_maps) == num_imgs
-    total_area_intersect = np.zeros((num_classes, ), dtype=np.float)
-    total_area_union = np.zeros((num_classes, ), dtype=np.float)
-    total_area_pred_label = np.zeros((num_classes, ), dtype=np.float)
-    total_area_label = np.zeros((num_classes, ), dtype=np.float)
-    for i in range(num_imgs):
-        area_intersect, area_union, area_pred_label, area_label = \
-            intersect_and_union(results[i], gt_seg_maps[i], num_classes,
-                                ignore_index=ignore_index)
-        total_area_intersect += area_intersect
-        total_area_union += area_union
-        total_area_pred_label += area_pred_label
-        total_area_label += area_label
-    all_acc = total_area_intersect.sum() / total_area_label.sum()
-    acc = total_area_intersect / total_area_label
-    iou = total_area_intersect / total_area_union
-    if nan_to_num is not None:
-        return all_acc, np.nan_to_num(acc, nan=nan_to_num), \
-            np.nan_to_num(iou, nan=nan_to_num)
-    return all_acc, acc, iou
diff --git a/mmseg/core/evaluation/metrics.py b/mmseg/core/evaluation/metrics.py
new file mode 100644
index 0000000000..849eee0a22
--- /dev/null
+++ b/mmseg/core/evaluation/metrics.py
@@ -0,0 +1,164 @@
+import numpy as np
+
+
+def intersect_and_union(pred_label, label, num_classes, ignore_index):
+    """Calculate intersection and Union.
+
+    Args:
+        pred_label (ndarray): Prediction segmentation map
+        label (ndarray): Ground truth segmentation map
+        num_classes (int): Number of categories
+        ignore_index (int): Index that will be ignored in evaluation.
+
+     Returns:
+         ndarray: The intersection of prediction and ground truth histogram
+             on all classes
+         ndarray: The union of prediction and ground truth histogram on all
+             classes
+         ndarray: The prediction histogram on all classes.
+         ndarray: The ground truth histogram on all classes.
+    """
+
+    mask = (label != ignore_index)
+    pred_label = pred_label[mask]
+    label = label[mask]
+
+    intersect = pred_label[pred_label == label]
+    area_intersect, _ = np.histogram(
+        intersect, bins=np.arange(num_classes + 1))
+    area_pred_label, _ = np.histogram(
+        pred_label, bins=np.arange(num_classes + 1))
+    area_label, _ = np.histogram(label, bins=np.arange(num_classes + 1))
+    area_union = area_pred_label + area_label - area_intersect
+
+    return area_intersect, area_union, area_pred_label, area_label
+
+
+def total_intersect_and_union(results, gt_seg_maps, num_classes, ignore_index):
+    """Calculate Total Intersection and Union.
+
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps
+        num_classes (int): Number of categories
+        ignore_index (int): Index that will be ignored in evaluation.
+
+     Returns:
+         ndarray: The intersection of prediction and ground truth histogram
+             on all classes
+         ndarray: The union of prediction and ground truth histogram on all
+             classes
+         ndarray: The prediction histogram on all classes.
+         ndarray: The ground truth histogram on all classes.
+    """
+
+    num_imgs = len(results)
+    assert len(gt_seg_maps) == num_imgs
+    total_area_intersect = np.zeros((num_classes, ), dtype=np.float)
+    total_area_union = np.zeros((num_classes, ), dtype=np.float)
+    total_area_pred_label = np.zeros((num_classes, ), dtype=np.float)
+    total_area_label = np.zeros((num_classes, ), dtype=np.float)
+    for i in range(num_imgs):
+        area_intersect, area_union, area_pred_label, area_label = \
+            intersect_and_union(results[i], gt_seg_maps[i], num_classes,
+                                ignore_index=ignore_index)
+        total_area_intersect += area_intersect
+        total_area_union += area_union
+        total_area_pred_label += area_pred_label
+        total_area_label += area_label
+    return total_area_intersect, total_area_union, \
+        total_area_pred_label, total_area_label
+
+
+def mean_iou(results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None):
+    """Calculate Mean Intersection and Union (mIoU)
+
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps
+        num_classes (int): Number of categories
+        ignore_index (int): Index that will be ignored in evaluation.
+        nan_to_num (int, optional): If specified, NaN values will be replaced
+            by the numbers defined by the user. Default: None.
+
+     Returns:
+         float: Overall accuracy on all images.
+         ndarray: Per category accuracy, shape (num_classes, )
+         ndarray: Per category IoU, shape (num_classes, )
+    """
+
+    total_area_intersect, total_area_union, total_area_pred_label, \
+        total_area_label = total_intersect_and_union(results, gt_seg_maps,
+                                                     num_classes,
+                                                     ignore_index=ignore_index)
+    all_acc = total_area_intersect.sum() / total_area_label.sum()
+    acc = total_area_intersect / total_area_label
+    iou = total_area_intersect / total_area_union
+    if nan_to_num is not None:
+        return all_acc, np.nan_to_num(acc, nan=nan_to_num), \
+            np.nan_to_num(iou, nan=nan_to_num)
+    return all_acc, acc, iou
+
+
+def mean_dice(results,
+              gt_seg_maps,
+              num_classes,
+              ignore_index,
+              nan_to_num=None):
+    """Calculate Mean Dice (mDice)
+
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps
+        num_classes (int): Number of categories
+        ignore_index (int): Index that will be ignored in evaluation.
+        nan_to_num (int, optional): If specified, NaN values will be replaced
+            by the numbers defined by the user. Default: None.
+
+     Returns:
+         float: Overall accuracy on all images.
+         ndarray: Per category accuracy, shape (num_classes, )
+         ndarray: Per category dice, shape (num_classes, )
+    """
+
+    total_area_intersect, total_area_union, total_area_pred_label, \
+        total_area_label = total_intersect_and_union(results, gt_seg_maps,
+                                                     num_classes,
+                                                     ignore_index=ignore_index)
+    all_acc = total_area_intersect.sum() / total_area_label.sum()
+    acc = total_area_intersect / total_area_label
+    dice = 2 * total_area_intersect / (
+        total_area_pred_label + total_area_label)
+    if nan_to_num is not None:
+        return all_acc, np.nan_to_num(acc, nan=nan_to_num), \
+            np.nan_to_num(dice, nan=nan_to_num)
+    return all_acc, acc, dice
+
+
+def metrics(results,
+            gt_seg_maps,
+            num_classes,
+            ignore_index,
+            metric='mIoU',
+            nan_to_num=None):
+    """Calculate evaluation metrics
+    Args:
+        results (list[ndarray]): List of prediction segmentation maps
+        gt_seg_maps (list[ndarray]): list of ground truth segmentation maps
+        num_classes (int): Number of categories
+        ignore_index (int): Index that will be ignored in evaluation.
+        metric (str): Metrics to be evaluated, 'mIoU' or 'mDice'.
+        nan_to_num (int, optional): If specified, NaN values will be replaced
+            by the numbers defined by the user. Default: None.
+     Returns:
+         float: Overall accuracy on all images.
+         ndarray: Per category accuracy, shape (num_classes, )
+         ndarray: Per category evalution metrics, shape (num_classes, )
+    """
+
+    allowed_metrics = {'mIoU': mean_iou, 'mDice': mean_dice}
+    if (not isinstance(metric, str)) or (metric not in allowed_metrics):
+        raise KeyError('metric {} is not supported'.format(metric))
+    all_acc, acc, eval_metric = allowed_metrics[metric](
+        results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None)
+    return all_acc, acc, eval_metric
diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py
index 7e42d6622c..5f318b1042 100644
--- a/mmseg/datasets/custom.py
+++ b/mmseg/datasets/custom.py
@@ -6,7 +6,7 @@
 from mmcv.utils import print_log
 from torch.utils.data import Dataset
 
-from mmseg.core import mean_iou
+from mmseg.core import metrics
 from mmseg.utils import get_root_logger
 from .builder import DATASETS
 from .pipelines import Compose
@@ -14,12 +14,10 @@
 
 @DATASETS.register_module()
 class CustomDataset(Dataset):
-    """Custom dataset for semantic segmentation.
-
-    An example of file structure is as followed.
+    """Custom dataset for semantic segmentation. An example of file structure
+    is as followed.
 
     .. code-block:: none
-
         ├── data
         │   ├── my_dataset
         │   │   ├── img_dir
@@ -34,15 +32,12 @@ class CustomDataset(Dataset):
         │   │   │   │   ├── yyy{seg_map_suffix}
         │   │   │   │   ├── zzz{seg_map_suffix}
         │   │   │   ├── val
-
     The img/gt_semantic_seg pair of CustomDataset should be of the same
     except suffix. A valid img/gt_semantic_seg filename pair should be like
     ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included
     in the suffix). If split is given, then ``xxx`` is specified in txt file.
     Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded.
     Please refer to ``docs/tutorials/new_dataset.md`` for more details.
-
-
     Args:
         pipeline (list[dict]): Processing pipeline
         img_dir (str): Path to image directory
@@ -127,7 +122,6 @@ def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix,
             split (str|None): Split txt file. If split is specified, only file
                 with suffix in the splits will be loaded. Otherwise, all images
                 in img_dir/ann_dir will be loaded. Default: None
-
         Returns:
             list[dict]: All image info of dataset.
         """
@@ -158,7 +152,6 @@ def get_ann_info(self, idx):
 
         Args:
             idx (int): Index of data.
-
         Returns:
             dict: Annotation info of specified index.
         """
@@ -178,7 +171,6 @@ def __getitem__(self, idx):
 
         Args:
             idx (int): Index of data.
-
         Returns:
             dict: Training/test data (with annotation if `test_mode` is set
                 False).
@@ -194,7 +186,6 @@ def prepare_train_img(self, idx):
 
         Args:
             idx (int): Index of data.
-
         Returns:
             dict: Training data and annotation after pipeline with new keys
                 introduced by pipeline.
@@ -211,7 +202,6 @@ def prepare_test_img(self, idx):
 
         Args:
             idx (int): Index of data.
-
         Returns:
             dict: Testing data after pipeline with new keys intorduced by
                 piepline.
@@ -318,7 +308,6 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
             metric (str | list[str]): Metrics to be evaluated.
             logger (logging.Logger | None | str): Logger used for printing
                 related information during evaluation. Default: None.
-
         Returns:
             dict[str, float]: Default metrics.
         """
@@ -326,7 +315,7 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
         if not isinstance(metric, str):
             assert len(metric) == 1
             metric = metric[0]
-        allowed_metrics = ['mIoU']
+        allowed_metrics = ['mIoU', 'mDice']
         if metric not in allowed_metrics:
             raise KeyError('metric {} is not supported'.format(metric))
 
@@ -338,33 +327,44 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
         else:
             num_classes = len(self.CLASSES)
 
-        all_acc, acc, iou = mean_iou(
-            results, gt_seg_maps, num_classes, ignore_index=self.ignore_index)
+        all_acc, acc, eval_metric = metrics(
+            results,
+            gt_seg_maps,
+            num_classes,
+            ignore_index=self.ignore_index,
+            metric=metric)
         summary_str = ''
         summary_str += 'per class results:\n'
 
         line_format = '{:<15} {:>10} {:>10}\n'
-        summary_str += line_format.format('Class', 'IoU', 'Acc')
+        if metric == 'mIoU':
+            summary_str += line_format.format('Class', 'IoU', 'Acc')
+        else:
+            summary_str += line_format.format('Class', 'Dice', 'Acc')
         if self.CLASSES is None:
             class_names = tuple(range(num_classes))
         else:
             class_names = self.CLASSES
         for i in range(num_classes):
-            iou_str = '{:.2f}'.format(iou[i] * 100)
+            eval_metric_str = '{:.2f}'.format(eval_metric[i] * 100)
             acc_str = '{:.2f}'.format(acc[i] * 100)
-            summary_str += line_format.format(class_names[i], iou_str, acc_str)
+            summary_str += line_format.format(class_names[i], eval_metric_str,
+                                              acc_str)
         summary_str += 'Summary:\n'
         line_format = '{:<15} {:>10} {:>10} {:>10}\n'
-        summary_str += line_format.format('Scope', 'mIoU', 'mAcc', 'aAcc')
+        if metric == 'mIoU':
+            summary_str += line_format.format('Scope', 'mIoU', 'mAcc', 'aAcc')
+        else:
+            summary_str += line_format.format('Scope', 'mDice', 'mAcc', 'aAcc')
 
-        iou_str = '{:.2f}'.format(np.nanmean(iou) * 100)
+        eval_metric_str = '{:.2f}'.format(np.nanmean(eval_metric) * 100)
         acc_str = '{:.2f}'.format(np.nanmean(acc) * 100)
         all_acc_str = '{:.2f}'.format(all_acc * 100)
-        summary_str += line_format.format('global', iou_str, acc_str,
+        summary_str += line_format.format('global', eval_metric_str, acc_str,
                                           all_acc_str)
         print_log(summary_str, logger)
 
-        eval_results['mIoU'] = np.nanmean(iou)
+        eval_results[metric] = np.nanmean(eval_metric)
         eval_results['mAcc'] = np.nanmean(acc)
         eval_results['aAcc'] = all_acc
 
diff --git a/tests/test_data/test_dataset.py b/tests/test_data/test_dataset.py
index d7e44f50ec..709fc7c52f 100644
--- a/tests/test_data/test_dataset.py
+++ b/tests/test_data/test_dataset.py
@@ -159,20 +159,32 @@ def test_custom_dataset():
     for gt_seg_map in gt_seg_maps:
         h, w = gt_seg_map.shape
         pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w)))
-    eval_results = train_dataset.evaluate(pseudo_results)
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mIoU')
     assert isinstance(eval_results, dict)
     assert 'mIoU' in eval_results
     assert 'mAcc' in eval_results
     assert 'aAcc' in eval_results
 
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mDice')
+    assert isinstance(eval_results, dict)
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
     # evaluation with CLASSES
     train_dataset.CLASSES = tuple(['a'] * 7)
-    eval_results = train_dataset.evaluate(pseudo_results)
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mIoU')
     assert isinstance(eval_results, dict)
     assert 'mIoU' in eval_results
     assert 'mAcc' in eval_results
     assert 'aAcc' in eval_results
 
+    eval_results = train_dataset.evaluate(pseudo_results, metric='mDice')
+    assert isinstance(eval_results, dict)
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
 
 @patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
 @patch('mmseg.datasets.CustomDataset.__getitem__',
diff --git a/tests/test_mean_iou.py b/tests/test_metrics.py
similarity index 55%
rename from tests/test_mean_iou.py
rename to tests/test_metrics.py
index 74a2b78617..ac29b03bb0 100644
--- a/tests/test_mean_iou.py
+++ b/tests/test_metrics.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from mmseg.core.evaluation import mean_iou
+from mmseg.core.evaluation import metrics
 
 
 def get_confusion_matrix(pred_label, label, num_classes, ignore_index):
@@ -41,23 +41,64 @@ def legacy_mean_iou(results, gt_seg_maps, num_classes, ignore_index):
     return all_acc, acc, iou
 
 
-def test_mean_iou():
+# This func is deprecated since it's not memory efficient
+def legacy_mean_dice(results, gt_seg_maps, num_classes, ignore_index):
+    num_imgs = len(results)
+    assert len(gt_seg_maps) == num_imgs
+    total_mat = np.zeros((num_classes, num_classes), dtype=np.float)
+    for i in range(num_imgs):
+        mat = get_confusion_matrix(
+            results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index)
+        total_mat += mat
+    all_acc = np.diag(total_mat).sum() / total_mat.sum()
+    acc = np.diag(total_mat) / total_mat.sum(axis=1)
+    dice = 2 * np.diag(total_mat) / (
+        total_mat.sum(axis=1) + total_mat.sum(axis=0))
+
+    return all_acc, acc, dice
+
+
+def test_metrics():
     pred_size = (10, 30, 30)
     num_classes = 19
     ignore_index = 255
     results = np.random.randint(0, num_classes, size=pred_size)
     label = np.random.randint(0, num_classes, size=pred_size)
     label[:, 2, 5:10] = ignore_index
-    all_acc, acc, iou = mean_iou(results, label, num_classes, ignore_index)
+    all_acc, acc, iou = metrics(
+        results, label, num_classes, ignore_index, metric='mIoU')
     all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
                                               ignore_index)
     assert all_acc == all_acc_l
     assert np.allclose(acc, acc_l)
     assert np.allclose(iou, iou_l)
 
+    all_acc, acc, dice = metrics(
+        results, label, num_classes, ignore_index, metric='mDice')
+    all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes,
+                                                ignore_index)
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(dice, dice_l)
+
     results = np.random.randint(0, 5, size=pred_size)
     label = np.random.randint(0, 4, size=pred_size)
-    all_acc, acc, iou = mean_iou(
-        results, label, num_classes, ignore_index=255, nan_to_num=-1)
+    all_acc, acc, iou = metrics(
+        results,
+        label,
+        num_classes,
+        ignore_index=255,
+        metric='mIoU',
+        nan_to_num=-1)
     assert acc[-1] == -1
     assert iou[-1] == -1
+
+    all_acc, acc, dice = metrics(
+        results,
+        label,
+        num_classes,
+        ignore_index=255,
+        metric='mDice',
+        nan_to_num=-1)
+    assert acc[-1] == -1
+    assert dice[-1] == -1

From 87f6b543d7a4ef14b50daaf710e176c2ddf9e3ec Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Tue, 3 Nov 2020 22:20:36 +0800
Subject: [PATCH 2/9] add dice evaluation metric

---
 mmseg/core/evaluation/__init__.py |  4 ++--
 mmseg/core/evaluation/metrics.py  | 18 ++++++++++--------
 mmseg/datasets/custom.py          |  4 ++--
 tests/test_metrics.py             | 10 +++++-----
 4 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/mmseg/core/evaluation/__init__.py b/mmseg/core/evaluation/__init__.py
index 7a5f1e0154..c58d926f06 100644
--- a/mmseg/core/evaluation/__init__.py
+++ b/mmseg/core/evaluation/__init__.py
@@ -1,8 +1,8 @@
 from .class_names import get_classes, get_palette
 from .eval_hooks import DistEvalHook, EvalHook
-from .metrics import mean_dice, mean_iou, metrics
+from .metrics import eval_metrics, mean_dice, mean_iou
 
 __all__ = [
-    'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'metrics',
+    'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'eval_metrics',
     'get_classes', 'get_palette'
 ]
diff --git a/mmseg/core/evaluation/metrics.py b/mmseg/core/evaluation/metrics.py
index 849eee0a22..3be4e5c07d 100644
--- a/mmseg/core/evaluation/metrics.py
+++ b/mmseg/core/evaluation/metrics.py
@@ -135,12 +135,12 @@ def mean_dice(results,
     return all_acc, acc, dice
 
 
-def metrics(results,
-            gt_seg_maps,
-            num_classes,
-            ignore_index,
-            metric='mIoU',
-            nan_to_num=None):
+def eval_metrics(results,
+                 gt_seg_maps,
+                 num_classes,
+                 ignore_index,
+                 metric='mIoU',
+                 nan_to_num=None):
     """Calculate evaluation metrics
     Args:
         results (list[ndarray]): List of prediction segmentation maps
@@ -159,6 +159,8 @@ def metrics(results,
     allowed_metrics = {'mIoU': mean_iou, 'mDice': mean_dice}
     if (not isinstance(metric, str)) or (metric not in allowed_metrics):
         raise KeyError('metric {} is not supported'.format(metric))
-    all_acc, acc, eval_metric = allowed_metrics[metric](
-        results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None)
+    all_acc, acc, eval_metric = allowed_metrics[metric](results, gt_seg_maps,
+                                                        num_classes,
+                                                        ignore_index,
+                                                        nan_to_num)
     return all_acc, acc, eval_metric
diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py
index 5f318b1042..706723f550 100644
--- a/mmseg/datasets/custom.py
+++ b/mmseg/datasets/custom.py
@@ -6,7 +6,7 @@
 from mmcv.utils import print_log
 from torch.utils.data import Dataset
 
-from mmseg.core import metrics
+from mmseg.core import eval_metrics
 from mmseg.utils import get_root_logger
 from .builder import DATASETS
 from .pipelines import Compose
@@ -327,7 +327,7 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
         else:
             num_classes = len(self.CLASSES)
 
-        all_acc, acc, eval_metric = metrics(
+        all_acc, acc, eval_metric = eval_metrics(
             results,
             gt_seg_maps,
             num_classes,
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index ac29b03bb0..b194de2593 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from mmseg.core.evaluation import metrics
+from mmseg.core.evaluation import eval_metrics
 
 
 def get_confusion_matrix(pred_label, label, num_classes, ignore_index):
@@ -65,7 +65,7 @@ def test_metrics():
     results = np.random.randint(0, num_classes, size=pred_size)
     label = np.random.randint(0, num_classes, size=pred_size)
     label[:, 2, 5:10] = ignore_index
-    all_acc, acc, iou = metrics(
+    all_acc, acc, iou = eval_metrics(
         results, label, num_classes, ignore_index, metric='mIoU')
     all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
                                               ignore_index)
@@ -73,7 +73,7 @@ def test_metrics():
     assert np.allclose(acc, acc_l)
     assert np.allclose(iou, iou_l)
 
-    all_acc, acc, dice = metrics(
+    all_acc, acc, dice = eval_metrics(
         results, label, num_classes, ignore_index, metric='mDice')
     all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes,
                                                 ignore_index)
@@ -83,7 +83,7 @@ def test_metrics():
 
     results = np.random.randint(0, 5, size=pred_size)
     label = np.random.randint(0, 4, size=pred_size)
-    all_acc, acc, iou = metrics(
+    all_acc, acc, iou = eval_metrics(
         results,
         label,
         num_classes,
@@ -93,7 +93,7 @@ def test_metrics():
     assert acc[-1] == -1
     assert iou[-1] == -1
 
-    all_acc, acc, dice = metrics(
+    all_acc, acc, dice = eval_metrics(
         results,
         label,
         num_classes,

From db6f5d94ad20a5995dd87f307be7cf802f23b4a0 Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Fri, 6 Nov 2020 10:48:56 +0800
Subject: [PATCH 3/9] add dice evaluation metric

---
 mmseg/datasets/custom.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py
index 706723f550..9ffb456d96 100644
--- a/mmseg/datasets/custom.py
+++ b/mmseg/datasets/custom.py
@@ -18,6 +18,7 @@ class CustomDataset(Dataset):
     is as followed.
 
     .. code-block:: none
+
         ├── data
         │   ├── my_dataset
         │   │   ├── img_dir
@@ -32,12 +33,15 @@ class CustomDataset(Dataset):
         │   │   │   │   ├── yyy{seg_map_suffix}
         │   │   │   │   ├── zzz{seg_map_suffix}
         │   │   │   ├── val
+
     The img/gt_semantic_seg pair of CustomDataset should be of the same
     except suffix. A valid img/gt_semantic_seg filename pair should be like
     ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included
     in the suffix). If split is given, then ``xxx`` is specified in txt file.
     Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded.
     Please refer to ``docs/tutorials/new_dataset.md`` for more details.
+
+
     Args:
         pipeline (list[dict]): Processing pipeline
         img_dir (str): Path to image directory
@@ -122,6 +126,7 @@ def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix,
             split (str|None): Split txt file. If split is specified, only file
                 with suffix in the splits will be loaded. Otherwise, all images
                 in img_dir/ann_dir will be loaded. Default: None
+
         Returns:
             list[dict]: All image info of dataset.
         """
@@ -152,6 +157,7 @@ def get_ann_info(self, idx):
 
         Args:
             idx (int): Index of data.
+
         Returns:
             dict: Annotation info of specified index.
         """
@@ -171,6 +177,7 @@ def __getitem__(self, idx):
 
         Args:
             idx (int): Index of data.
+
         Returns:
             dict: Training/test data (with annotation if `test_mode` is set
                 False).
@@ -186,6 +193,7 @@ def prepare_train_img(self, idx):
 
         Args:
             idx (int): Index of data.
+
         Returns:
             dict: Training data and annotation after pipeline with new keys
                 introduced by pipeline.
@@ -202,6 +210,7 @@ def prepare_test_img(self, idx):
 
         Args:
             idx (int): Index of data.
+
         Returns:
             dict: Testing data after pipeline with new keys intorduced by
                 piepline.
@@ -308,6 +317,7 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
             metric (str | list[str]): Metrics to be evaluated.
             logger (logging.Logger | None | str): Logger used for printing
                 related information during evaluation. Default: None.
+
         Returns:
             dict[str, float]: Default metrics.
         """

From a51f97502dbb4c56bf6f6acc48460324ab59d736 Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Tue, 10 Nov 2020 19:05:01 +0800
Subject: [PATCH 4/9] support 2 metrics

---
 mmseg/core/evaluation/metrics.py | 72 ++++++++++++++++++--------------
 mmseg/datasets/custom.py         | 66 +++++++++++++----------------
 setup.cfg                        |  2 +-
 tests/test_data/test_dataset.py  | 16 +++++++
 tests/test_metrics.py            | 60 +++++++++++++++++++++++++-
 5 files changed, 145 insertions(+), 71 deletions(-)

diff --git a/mmseg/core/evaluation/metrics.py b/mmseg/core/evaluation/metrics.py
index 3be4e5c07d..0db521a3d9 100644
--- a/mmseg/core/evaluation/metrics.py
+++ b/mmseg/core/evaluation/metrics.py
@@ -87,16 +87,13 @@ def mean_iou(results, gt_seg_maps, num_classes, ignore_index, nan_to_num=None):
          ndarray: Per category IoU, shape (num_classes, )
     """
 
-    total_area_intersect, total_area_union, total_area_pred_label, \
-        total_area_label = total_intersect_and_union(results, gt_seg_maps,
-                                                     num_classes,
-                                                     ignore_index=ignore_index)
-    all_acc = total_area_intersect.sum() / total_area_label.sum()
-    acc = total_area_intersect / total_area_label
-    iou = total_area_intersect / total_area_union
-    if nan_to_num is not None:
-        return all_acc, np.nan_to_num(acc, nan=nan_to_num), \
-            np.nan_to_num(iou, nan=nan_to_num)
+    all_acc, acc, iou = eval_metrics(
+        results=results,
+        gt_seg_maps=gt_seg_maps,
+        num_classes=num_classes,
+        ignore_index=ignore_index,
+        metrics=['mIoU'],
+        nan_to_num=nan_to_num)
     return all_acc, acc, iou
 
 
@@ -121,17 +118,13 @@ def mean_dice(results,
          ndarray: Per category dice, shape (num_classes, )
     """
 
-    total_area_intersect, total_area_union, total_area_pred_label, \
-        total_area_label = total_intersect_and_union(results, gt_seg_maps,
-                                                     num_classes,
-                                                     ignore_index=ignore_index)
-    all_acc = total_area_intersect.sum() / total_area_label.sum()
-    acc = total_area_intersect / total_area_label
-    dice = 2 * total_area_intersect / (
-        total_area_pred_label + total_area_label)
-    if nan_to_num is not None:
-        return all_acc, np.nan_to_num(acc, nan=nan_to_num), \
-            np.nan_to_num(dice, nan=nan_to_num)
+    all_acc, acc, dice = eval_metrics(
+        results=results,
+        gt_seg_maps=gt_seg_maps,
+        num_classes=num_classes,
+        ignore_index=ignore_index,
+        metrics=['mDice'],
+        nan_to_num=nan_to_num)
     return all_acc, acc, dice
 
 
@@ -139,7 +132,7 @@ def eval_metrics(results,
                  gt_seg_maps,
                  num_classes,
                  ignore_index,
-                 metric='mIoU',
+                 metrics=['mIoU'],
                  nan_to_num=None):
     """Calculate evaluation metrics
     Args:
@@ -147,7 +140,7 @@ def eval_metrics(results,
         gt_seg_maps (list[ndarray]): list of ground truth segmentation maps
         num_classes (int): Number of categories
         ignore_index (int): Index that will be ignored in evaluation.
-        metric (str): Metrics to be evaluated, 'mIoU' or 'mDice'.
+        metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'.
         nan_to_num (int, optional): If specified, NaN values will be replaced
             by the numbers defined by the user. Default: None.
      Returns:
@@ -156,11 +149,28 @@ def eval_metrics(results,
          ndarray: Per category evalution metrics, shape (num_classes, )
     """
 
-    allowed_metrics = {'mIoU': mean_iou, 'mDice': mean_dice}
-    if (not isinstance(metric, str)) or (metric not in allowed_metrics):
-        raise KeyError('metric {} is not supported'.format(metric))
-    all_acc, acc, eval_metric = allowed_metrics[metric](results, gt_seg_maps,
-                                                        num_classes,
-                                                        ignore_index,
-                                                        nan_to_num)
-    return all_acc, acc, eval_metric
+    if isinstance(metrics, str):
+        metrics = [metrics]
+    allowed_metrics = ['mIoU', 'mDice']
+    if not set(metrics).issubset(set(allowed_metrics)):
+        raise KeyError('metrics {} is not supported'.format(metrics))
+    total_area_intersect, total_area_union, total_area_pred_label, \
+        total_area_label = total_intersect_and_union(results, gt_seg_maps,
+                                                     num_classes,
+                                                     ignore_index=ignore_index)
+    all_acc = total_area_intersect.sum() / total_area_label.sum()
+    acc = total_area_intersect / total_area_label
+    ret_metrics = [all_acc, acc]
+    for metric in metrics:
+        if 'mIoU' == metric:
+            iou = total_area_intersect / total_area_union
+            ret_metrics.append(iou)
+        elif 'mDice' == metric:
+            dice = 2 * total_area_intersect / (
+                total_area_pred_label + total_area_label)
+            ret_metrics.append(dice)
+    if nan_to_num is not None:
+        ret_metrics = [
+            np.nan_to_num(metric, nan=nan_to_num) for metric in ret_metrics
+        ]
+    return ret_metrics
diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py
index 9ffb456d96..9d71921d5d 100644
--- a/mmseg/datasets/custom.py
+++ b/mmseg/datasets/custom.py
@@ -4,6 +4,7 @@
 import mmcv
 import numpy as np
 from mmcv.utils import print_log
+from terminaltables import AsciiTable
 from torch.utils.data import Dataset
 
 from mmseg.core import eval_metrics
@@ -322,13 +323,11 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
             dict[str, float]: Default metrics.
         """
 
-        if not isinstance(metric, str):
-            assert len(metric) == 1
-            metric = metric[0]
+        if isinstance(metric, str):
+            metric = [metric]
         allowed_metrics = ['mIoU', 'mDice']
-        if metric not in allowed_metrics:
+        if not set(metric).issubset(set(allowed_metrics)):
             raise KeyError('metric {} is not supported'.format(metric))
-
         eval_results = {}
         gt_seg_maps = self.get_gt_seg_maps()
         if self.CLASSES is None:
@@ -336,46 +335,37 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
                 reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps]))
         else:
             num_classes = len(self.CLASSES)
-
-        all_acc, acc, eval_metric = eval_metrics(
+        ret_metrics = eval_metrics(
             results,
             gt_seg_maps,
             num_classes,
             ignore_index=self.ignore_index,
-            metric=metric)
-        summary_str = ''
-        summary_str += 'per class results:\n'
-
-        line_format = '{:<15} {:>10} {:>10}\n'
-        if metric == 'mIoU':
-            summary_str += line_format.format('Class', 'IoU', 'Acc')
-        else:
-            summary_str += line_format.format('Class', 'Dice', 'Acc')
+            metrics=metric)
+        class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']]
         if self.CLASSES is None:
             class_names = tuple(range(num_classes))
         else:
             class_names = self.CLASSES
         for i in range(num_classes):
-            eval_metric_str = '{:.2f}'.format(eval_metric[i] * 100)
-            acc_str = '{:.2f}'.format(acc[i] * 100)
-            summary_str += line_format.format(class_names[i], eval_metric_str,
-                                              acc_str)
-        summary_str += 'Summary:\n'
-        line_format = '{:<15} {:>10} {:>10} {:>10}\n'
-        if metric == 'mIoU':
-            summary_str += line_format.format('Scope', 'mIoU', 'mAcc', 'aAcc')
-        else:
-            summary_str += line_format.format('Scope', 'mDice', 'mAcc', 'aAcc')
-
-        eval_metric_str = '{:.2f}'.format(np.nanmean(eval_metric) * 100)
-        acc_str = '{:.2f}'.format(np.nanmean(acc) * 100)
-        all_acc_str = '{:.2f}'.format(all_acc * 100)
-        summary_str += line_format.format('global', eval_metric_str, acc_str,
-                                          all_acc_str)
-        print_log(summary_str, logger)
-
-        eval_results[metric] = np.nanmean(eval_metric)
-        eval_results['mAcc'] = np.nanmean(acc)
-        eval_results['aAcc'] = all_acc
-
+            class_table_data.append(
+                [class_names[i]] +
+                [round(m[i] * 100, 2) for m in ret_metrics[2:]] +
+                [round(ret_metrics[1][i] * 100, 2)])
+        summary_table_data = [['Scope'] +
+                              ['m' + head
+                               for head in class_table_data[0][1:]] + ['aAcc']]
+        summary_table_data.append(
+            ['global'] +
+            [round(np.nanmean(m) * 100, 2) for m in ret_metrics[2:]] +
+            [round(np.nanmean(ret_metrics[1]) * 100, 2)] +
+            [round(np.nanmean(ret_metrics[0]) * 100, 2)])
+        print_log('per class results:', logger)
+        table = AsciiTable(class_table_data)
+        print_log('\n' + table.table, logger=logger)
+        print_log('Summary:', logger)
+        table = AsciiTable(summary_table_data)
+        print_log('\n' + table.table, logger=logger)
+
+        for i in range(1, len(summary_table_data[0])):
+            eval_results[summary_table_data[0][i]] = summary_table_data[1][i]
         return eval_results
diff --git a/setup.cfg b/setup.cfg
index a5fb07d401..708fb4ce33 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,6 +8,6 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
 known_first_party = mmseg
-known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,torch
+known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,oss2,pytest,scipy,terminaltables,torch
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
diff --git a/tests/test_data/test_dataset.py b/tests/test_data/test_dataset.py
index 0795542b5c..2e19c30f08 100644
--- a/tests/test_data/test_dataset.py
+++ b/tests/test_data/test_dataset.py
@@ -171,6 +171,14 @@ def test_custom_dataset():
     assert 'mAcc' in eval_results
     assert 'aAcc' in eval_results
 
+    eval_results = train_dataset.evaluate(
+        pseudo_results, metric=['mDice', 'mIoU'])
+    assert isinstance(eval_results, dict)
+    assert 'mIoU' in eval_results
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
     # evaluation with CLASSES
     train_dataset.CLASSES = tuple(['a'] * 7)
     eval_results = train_dataset.evaluate(pseudo_results, metric='mIoU')
@@ -185,6 +193,14 @@ def test_custom_dataset():
     assert 'mAcc' in eval_results
     assert 'aAcc' in eval_results
 
+    eval_results = train_dataset.evaluate(
+        pseudo_results, metric=['mIoU', 'mDice'])
+    assert isinstance(eval_results, dict)
+    assert 'mIoU' in eval_results
+    assert 'mDice' in eval_results
+    assert 'mAcc' in eval_results
+    assert 'aAcc' in eval_results
+
 
 @patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
 @patch('mmseg.datasets.CustomDataset.__getitem__',
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index b194de2593..f1d3028520 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from mmseg.core.evaluation import eval_metrics
+from mmseg.core.evaluation import eval_metrics, mean_dice, mean_iou
 
 
 def get_confusion_matrix(pred_label, label, num_classes, ignore_index):
@@ -81,6 +81,13 @@ def test_metrics():
     assert np.allclose(acc, acc_l)
     assert np.allclose(dice, dice_l)
 
+    all_acc, acc, iou, dice = eval_metrics(
+        results, label, num_classes, ignore_index, metric=['mIoU', 'mDice'])
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(iou, iou_l)
+    assert np.allclose(dice, dice_l)
+
     results = np.random.randint(0, 5, size=pred_size)
     label = np.random.randint(0, 4, size=pred_size)
     all_acc, acc, iou = eval_metrics(
@@ -102,3 +109,54 @@ def test_metrics():
         nan_to_num=-1)
     assert acc[-1] == -1
     assert dice[-1] == -1
+
+    all_acc, acc, dice, iou = eval_metrics(
+        results,
+        label,
+        num_classes,
+        ignore_index=255,
+        metric=['mDice', 'mIoU'],
+        nan_to_num=-1)
+    assert acc[-1] == -1
+    assert dice[-1] == -1
+    assert iou[-1] == -1
+
+
+def test_mean_iou():
+    pred_size = (10, 30, 30)
+    num_classes = 19
+    ignore_index = 255
+    results = np.random.randint(0, num_classes, size=pred_size)
+    label = np.random.randint(0, num_classes, size=pred_size)
+    label[:, 2, 5:10] = ignore_index
+    all_acc, acc, iou = mean_iou(results, label, num_classes, ignore_index)
+    all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
+                                              ignore_index)
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(iou, iou_l)
+
+    all_acc, acc, iou = mean_iou(
+        results, label, num_classes, ignore_index=255, nan_to_num=-1)
+    assert acc[-1] == -1
+    assert iou[-1] == -1
+
+
+def test_mean_dice():
+    pred_size = (10, 30, 30)
+    num_classes = 19
+    ignore_index = 255
+    results = np.random.randint(0, num_classes, size=pred_size)
+    label = np.random.randint(0, num_classes, size=pred_size)
+    label[:, 2, 5:10] = ignore_index
+    all_acc, acc, iou = mean_dice(results, label, num_classes, ignore_index)
+    all_acc_l, acc_l, iou_l = legacy_mean_dice(results, label, num_classes,
+                                               ignore_index)
+    assert all_acc == all_acc_l
+    assert np.allclose(acc, acc_l)
+    assert np.allclose(iou, iou_l)
+
+    all_acc, acc, iou = mean_dice(
+        results, label, num_classes, ignore_index=255, nan_to_num=-1)
+    assert acc[-1] == -1
+    assert iou[-1] == -1

From 03e0117686349a5e96c922de36f1b8061a9a10df Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Tue, 10 Nov 2020 19:17:57 +0800
Subject: [PATCH 5/9] support 2 metrics

---
 tests/test_metrics.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index f1d3028520..023bbb0a55 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -66,7 +66,7 @@ def test_metrics():
     label = np.random.randint(0, num_classes, size=pred_size)
     label[:, 2, 5:10] = ignore_index
     all_acc, acc, iou = eval_metrics(
-        results, label, num_classes, ignore_index, metric='mIoU')
+        results, label, num_classes, ignore_index, metrics='mIoU')
     all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes,
                                               ignore_index)
     assert all_acc == all_acc_l
@@ -74,7 +74,7 @@ def test_metrics():
     assert np.allclose(iou, iou_l)
 
     all_acc, acc, dice = eval_metrics(
-        results, label, num_classes, ignore_index, metric='mDice')
+        results, label, num_classes, ignore_index, metrics='mDice')
     all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes,
                                                 ignore_index)
     assert all_acc == all_acc_l
@@ -82,7 +82,7 @@ def test_metrics():
     assert np.allclose(dice, dice_l)
 
     all_acc, acc, iou, dice = eval_metrics(
-        results, label, num_classes, ignore_index, metric=['mIoU', 'mDice'])
+        results, label, num_classes, ignore_index, metrics=['mIoU', 'mDice'])
     assert all_acc == all_acc_l
     assert np.allclose(acc, acc_l)
     assert np.allclose(iou, iou_l)
@@ -95,7 +95,7 @@ def test_metrics():
         label,
         num_classes,
         ignore_index=255,
-        metric='mIoU',
+        metrics='mIoU',
         nan_to_num=-1)
     assert acc[-1] == -1
     assert iou[-1] == -1
@@ -105,7 +105,7 @@ def test_metrics():
         label,
         num_classes,
         ignore_index=255,
-        metric='mDice',
+        metrics='mDice',
         nan_to_num=-1)
     assert acc[-1] == -1
     assert dice[-1] == -1
@@ -115,7 +115,7 @@ def test_metrics():
         label,
         num_classes,
         ignore_index=255,
-        metric=['mDice', 'mIoU'],
+        metrics=['mDice', 'mIoU'],
         nan_to_num=-1)
     assert acc[-1] == -1
     assert dice[-1] == -1
@@ -136,6 +136,8 @@ def test_mean_iou():
     assert np.allclose(acc, acc_l)
     assert np.allclose(iou, iou_l)
 
+    results = np.random.randint(0, 5, size=pred_size)
+    label = np.random.randint(0, 4, size=pred_size)
     all_acc, acc, iou = mean_iou(
         results, label, num_classes, ignore_index=255, nan_to_num=-1)
     assert acc[-1] == -1
@@ -156,6 +158,8 @@ def test_mean_dice():
     assert np.allclose(acc, acc_l)
     assert np.allclose(iou, iou_l)
 
+    results = np.random.randint(0, 5, size=pred_size)
+    label = np.random.randint(0, 4, size=pred_size)
     all_acc, acc, iou = mean_dice(
         results, label, num_classes, ignore_index=255, nan_to_num=-1)
     assert acc[-1] == -1

From d648cec246543697a398e7b92e1bae29feb2789c Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Tue, 10 Nov 2020 19:29:35 +0800
Subject: [PATCH 6/9] support 2 metrics

---
 mmseg/core/evaluation/metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mmseg/core/evaluation/metrics.py b/mmseg/core/evaluation/metrics.py
index 0db521a3d9..45c62b1641 100644
--- a/mmseg/core/evaluation/metrics.py
+++ b/mmseg/core/evaluation/metrics.py
@@ -162,10 +162,10 @@ def eval_metrics(results,
     acc = total_area_intersect / total_area_label
     ret_metrics = [all_acc, acc]
     for metric in metrics:
-        if 'mIoU' == metric:
+        if metric == 'mIoU':
             iou = total_area_intersect / total_area_union
             ret_metrics.append(iou)
-        elif 'mDice' == metric:
+        elif metric == 'mDice':
             dice = 2 * total_area_intersect / (
                 total_area_pred_label + total_area_label)
             ret_metrics.append(dice)

From 33ac81d0f095408fb8bb2a3d5ce8f42dbcbd5da4 Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Tue, 10 Nov 2020 20:13:57 +0800
Subject: [PATCH 7/9] support 2 metrics

---
 requirements/runtime.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index db5d81e01e..a8347b9c0c 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,2 +1,3 @@
 matplotlib
 numpy
+terminaltables

From 974588b0453ead73c38d6f3c611417f4c7cccbc9 Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Mon, 23 Nov 2020 10:21:43 +0800
Subject: [PATCH 8/9] fix docstring

---
 mmseg/datasets/custom.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py
index 9d71921d5d..ca66c85204 100644
--- a/mmseg/datasets/custom.py
+++ b/mmseg/datasets/custom.py
@@ -315,7 +315,8 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
 
         Args:
             results (list): Testing results of the dataset.
-            metric (str | list[str]): Metrics to be evaluated.
+            metric (str | list[str]): Metrics to be evaluated. 'mIoU' and
+                'mDice' are support ONLY.
             logger (logging.Logger | None | str): Logger used for printing
                 related information during evaluation. Default: None.
 
@@ -349,16 +350,16 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
         for i in range(num_classes):
             class_table_data.append(
                 [class_names[i]] +
-                [round(m[i] * 100, 2) for m in ret_metrics[2:]] +
-                [round(ret_metrics[1][i] * 100, 2)])
+                [np.round(m[i] * 100, 2) for m in ret_metrics[2:]] +
+                [np.round(ret_metrics[1][i] * 100, 2)])
         summary_table_data = [['Scope'] +
                               ['m' + head
                                for head in class_table_data[0][1:]] + ['aAcc']]
         summary_table_data.append(
             ['global'] +
-            [round(np.nanmean(m) * 100, 2) for m in ret_metrics[2:]] +
-            [round(np.nanmean(ret_metrics[1]) * 100, 2)] +
-            [round(np.nanmean(ret_metrics[0]) * 100, 2)])
+            [np.round(np.nanmean(m) * 100, 2) for m in ret_metrics[2:]] +
+            [np.round(np.nanmean(ret_metrics[1]) * 100, 2)] +
+            [np.round(np.nanmean(ret_metrics[0]) * 100, 2)])
         print_log('per class results:', logger)
         table = AsciiTable(class_table_data)
         print_log('\n' + table.table, logger=logger)

From 4e354727d2f2f10b56875a6de979f52f78ddf34b Mon Sep 17 00:00:00 2001
From: Junjun2016 <hejunjun@sjtu.edu.cn>
Date: Mon, 23 Nov 2020 15:29:08 +0800
Subject: [PATCH 9/9] use np.round once for all

---
 mmseg/datasets/custom.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/mmseg/datasets/custom.py b/mmseg/datasets/custom.py
index ca66c85204..4e7e30e91c 100644
--- a/mmseg/datasets/custom.py
+++ b/mmseg/datasets/custom.py
@@ -316,7 +316,7 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
         Args:
             results (list): Testing results of the dataset.
             metric (str | list[str]): Metrics to be evaluated. 'mIoU' and
-                'mDice' are support ONLY.
+                'mDice' are supported.
             logger (logging.Logger | None | str): Logger used for printing
                 related information during evaluation. Default: None.
 
@@ -347,19 +347,23 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
             class_names = tuple(range(num_classes))
         else:
             class_names = self.CLASSES
+        ret_metrics_round = [
+            np.round(ret_metric * 100, 2) for ret_metric in ret_metrics
+        ]
         for i in range(num_classes):
-            class_table_data.append(
-                [class_names[i]] +
-                [np.round(m[i] * 100, 2) for m in ret_metrics[2:]] +
-                [np.round(ret_metrics[1][i] * 100, 2)])
+            class_table_data.append([class_names[i]] +
+                                    [m[i] for m in ret_metrics_round[2:]] +
+                                    [ret_metrics_round[1][i]])
         summary_table_data = [['Scope'] +
                               ['m' + head
                                for head in class_table_data[0][1:]] + ['aAcc']]
-        summary_table_data.append(
-            ['global'] +
-            [np.round(np.nanmean(m) * 100, 2) for m in ret_metrics[2:]] +
-            [np.round(np.nanmean(ret_metrics[1]) * 100, 2)] +
-            [np.round(np.nanmean(ret_metrics[0]) * 100, 2)])
+        ret_metrics_mean = [
+            np.round(np.nanmean(ret_metric) * 100, 2)
+            for ret_metric in ret_metrics
+        ]
+        summary_table_data.append(['global'] + ret_metrics_mean[2:] +
+                                  [ret_metrics_mean[1]] +
+                                  [ret_metrics_mean[0]])
         print_log('per class results:', logger)
         table = AsciiTable(class_table_data)
         print_log('\n' + table.table, logger=logger)
@@ -368,5 +372,6 @@ def evaluate(self, results, metric='mIoU', logger=None, **kwargs):
         print_log('\n' + table.table, logger=logger)
 
         for i in range(1, len(summary_table_data[0])):
-            eval_results[summary_table_data[0][i]] = summary_table_data[1][i]
+            eval_results[summary_table_data[0]
+                         [i]] = summary_table_data[1][i] / 100.0
         return eval_results