From 7f8b68bbca1cac22856bc635402316044289fbef Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Fri, 21 Aug 2020 16:38:34 -0700
Subject: [PATCH 01/16] Fix IoU score for classes not present in target or pred

Fixes #3097

- Allow configurable not_present_score for IoU for classes
  not present in target or pred. Defaults to 1.0.
- Also allow passing `num_classes` parameter through from iou
  metric class down to its underlying functional iou
  call.
---
 CHANGELOG.md                                  |  1 +
 pytorch_lightning/metrics/classification.py   | 22 ++++++++-
 .../metrics/functional/classification.py      | 34 +++++++++----
 .../metrics/functional/test_classification.py | 48 ++++++++++++++++++-
 4 files changed, 93 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 30bfaa56c6b92..f9269b58a23cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -156,6 +156,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/PyTorchLightning/pytorch-lightning/pull/3043))
 - Fixed automatic batch scaling not working with half precision ([#3045](https://github.com/PyTorchLightning/pytorch-lightning/pull/3045))
 - Fixed setting device to root gpu ([#3042](https://github.com/PyTorchLightning/pytorch-lightning/pull/3042))
+- Fixed IoU score for classes not present in target or pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
 
 ## [0.8.5] - 2020-07-09
 
diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
index aa14d48ead6ed..069d698ad6759 100644
--- a/pytorch_lightning/metrics/classification.py
+++ b/pytorch_lightning/metrics/classification.py
@@ -804,9 +804,18 @@ class IoU(TensorMetric):
 
     """
 
-    def __init__(self, remove_bg: bool = False, reduction: str = "elementwise_mean"):
+    def __init__(
+            self,
+            not_present_score: float = 1.0,
+            num_classes: Optional[int] = None,
+            remove_bg: bool = False,
+            reduction: str = "elementwise_mean",
+    ):
         """
         Args:
+            not_present_score: score to use for a class, if no instance of that class was present in either pred or
+                target
+            num_classes: Optionally specify the number of classes
             remove_bg: Flag to state whether a background class has been included
                 within input parameters. If true, will remove background class. If
                 false, return IoU over all classes.
@@ -819,6 +828,8 @@ def __init__(self, remove_bg: bool = False, reduction: str = "elementwise_mean")
                 - sum: add elements
         """
         super().__init__(name="iou")
+        self.not_present_score = not_present_score
+        self.num_classes = num_classes
         self.remove_bg = remove_bg
         self.reduction = reduction
 
@@ -826,4 +837,11 @@ def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor, sample_weight: Opt
         """
         Actual metric calculation.
         """
-        return iou(y_pred, y_true, remove_bg=self.remove_bg, reduction=self.reduction)
+        return iou(
+            pred=y_pred,
+            target=y_true,
+            not_present_score=self.not_present_score,
+            num_classes=self.num_classes,
+            remove_bg=self.remove_bg,
+            reduction=self.reduction,
+        )
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 75c0ab358798a..7db5cc1f2b7fe 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -963,9 +963,10 @@ def dice_score(
 def iou(
         pred: torch.Tensor,
         target: torch.Tensor,
+        not_present_score: float = 1.0,
         num_classes: Optional[int] = None,
         remove_bg: bool = False,
-        reduction: str = 'elementwise_mean'
+        reduction: str = 'elementwise_mean',
 ) -> torch.Tensor:
     """
     Intersection over union, or Jaccard index calculation.
@@ -973,6 +974,7 @@ def iou(
     Args:
         pred: Tensor containing predictions
         target: Tensor containing targets
+        not_present_score: score to use for a class, if no instance of that class was present in either pred or target
         num_classes: Optionally specify the number of classes
         remove_bg: Flag to state whether a background class has been included
             within input parameters. If true, will remove background class. If
@@ -998,12 +1000,26 @@ def iou(
         tensor(0.4914)
 
     """
+    num_classes = get_num_classes(pred=pred, target=target, num_classes=num_classes)
+
+    # Determine minimum class index we will be evaluating. If using the background, then this is 0; otherwise, if
+    # removing background, use 1.
+    min_class_idx = 1 if remove_bg else 0
+
     tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred, target, num_classes)
-    if remove_bg:
-        tps = tps[1:]
-        fps = fps[1:]
-        fns = fns[1:]
-    denom = fps + fns + tps
-    denom[denom == 0] = torch.tensor(FLOAT16_EPSILON).type_as(denom)
-    iou = tps / denom
-    return reduce(iou, reduction=reduction)
+
+    scores = torch.zeros(num_classes - min_class_idx, device=pred.device, dtype=torch.float32)
+    for class_idx in range(min_class_idx, num_classes):
+        # If this class is not present in either the pred or the target, then use the not_present_score for this class.
+        if not (target == class_idx).any() and not (pred == class_idx).any():
+            scores[class_idx - min_class_idx] = not_present_score
+            continue
+
+        tp = tps[class_idx]
+        fp = fps[class_idx]
+        fn = fns[class_idx]
+        denom = tp + fp + fn
+        score = tp.to(torch.float) / denom
+        scores[class_idx - min_class_idx] = score
+
+    return reduce(scores, reduction=reduction)
diff --git a/tests/metrics/functional/test_classification.py b/tests/metrics/functional/test_classification.py
index f8269384b3477..4987e2f2a8787 100644
--- a/tests/metrics/functional/test_classification.py
+++ b/tests/metrics/functional/test_classification.py
@@ -373,9 +373,55 @@ def test_iou(half_ones, reduction, remove_bg, expected):
     target = (torch.arange(120) % 3).view(-1, 1)
     if half_ones:
         pred[:60] = 1
-    iou_val = iou(pred, target, remove_bg=remove_bg, reduction=reduction)
+    iou_val = iou(
+        pred=pred,
+        target=target,
+        remove_bg=remove_bg,
+        reduction=reduction,
+    )
     assert torch.allclose(iou_val, expected, atol=1e-9)
 
 
+@pytest.mark.parametrize(['pred', 'target', 'not_present_score', 'num_classes', 'remove_bg', 'expected'], [
+    # Note that -1 is used as the not_present_score in almost all tests here to distinguish it from the range of valid
+    # scores the function can return ([0., 1.] range, inclusive).
+    # 2 classes, class 0 is correct everywhere, class 1 is not present.
+    pytest.param([0], [0], -1., 2, False, [1., -1.]),
+    pytest.param([0, 0], [0, 0], -1., 2, False, [1., -1.]),
+    # not_present_score not applied if only class 0 is present and it's the only class.
+    pytest.param([0], [0], -1., 1, False, [1.]),
+    # 2 classes, class 1 is correct everywhere, class 0 is not present.
+    pytest.param([1], [1], -1., 2, False, [-1., 1.]),
+    pytest.param([1, 1], [1, 1], -1., 2, False, [-1., 1.]),
+    # When background removed, class 0 does not get a score (not even the not_present_score).
+    pytest.param([1], [1], -1., 2, True, [1.0]),
+    # 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get not_present_score.
+    pytest.param([0, 2], [0, 2], -1., 3, False, [1., -1., 1.]),
+    pytest.param([2, 0], [2, 0], -1., 3, False, [1., -1., 1.]),
+    # 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get not_present_score.
+    pytest.param([0, 1], [0, 1], -1., 3, False, [1., 1., -1.]),
+    pytest.param([1, 0], [1, 0], -1., 3, False, [1., 1., -1.]),
+    # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get not_present_score), class
+    # 2 is not present.
+    pytest.param([0, 1], [0, 0], -1., 3, False, [0.5, 0., -1.]),
+    # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get not_present_score), class
+    # 2 is not present.
+    pytest.param([0, 0], [0, 1], -1., 3, False, [0.5, 0., -1.]),
+    # Sanity checks with not_present_score of 1.0.
+    pytest.param([0, 2], [0, 2], 1.0, 3, False, [1., 1., 1.]),
+    pytest.param([0, 2], [0, 2], 1.0, 3, True, [1., 1.]),
+])
+def test_iou_not_present_score(pred, target, not_present_score, num_classes, remove_bg, expected):
+    iou_val = iou(
+        pred=torch.tensor(pred),
+        target=torch.tensor(target),
+        not_present_score=not_present_score,
+        num_classes=num_classes,
+        remove_bg=remove_bg,
+        reduction='none',
+    )
+    assert torch.allclose(iou_val, torch.tensor(expected).to(iou_val))
+
+
 # example data taken from
 # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/tests/test_ranking.py

From b3f32d05bbb5cd833b6a2eddceb4271dbea46983 Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Wed, 2 Sep 2020 09:29:25 -0700
Subject: [PATCH 02/16] Changelog: move IoU not-present score fix to
 [unreleased]

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9269b58a23cf..1be24a0e0359d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Changed `class_reduction` similar to sklearn for classification metrics ([#3322](https://github.com/PyTorchLightning/pytorch-lightning/pull/3322))
 
+- Changed IoU score behavior for classes not present in target or pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
+
 ### Deprecated
 
 
@@ -156,7 +158,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/PyTorchLightning/pytorch-lightning/pull/3043))
 - Fixed automatic batch scaling not working with half precision ([#3045](https://github.com/PyTorchLightning/pytorch-lightning/pull/3045))
 - Fixed setting device to root gpu ([#3042](https://github.com/PyTorchLightning/pytorch-lightning/pull/3042))
-- Fixed IoU score for classes not present in target or pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
 
 ## [0.8.5] - 2020-07-09
 

From 69fb77f4a21e19d9f28ec4612ac81bb1eb24654b Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Wed, 2 Sep 2020 12:35:39 -0700
Subject: [PATCH 03/16] IoU: avoid recomputing class presence in target and
 pred

Use already-computed support, true positives, and false positives to
determine if a class is not present in either target or pred.
---
 .../metrics/functional/classification.py           | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 7db5cc1f2b7fe..3e2b96b4cda02 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -1009,15 +1009,19 @@ def iou(
     tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred, target, num_classes)
 
     scores = torch.zeros(num_classes - min_class_idx, device=pred.device, dtype=torch.float32)
-    for class_idx in range(min_class_idx, num_classes):
-        # If this class is not present in either the pred or the target, then use the not_present_score for this class.
-        if not (target == class_idx).any() and not (pred == class_idx).any():
-            scores[class_idx - min_class_idx] = not_present_score
-            continue
 
+    for class_idx in range(min_class_idx, num_classes):
         tp = tps[class_idx]
         fp = fps[class_idx]
         fn = fns[class_idx]
+        sup = sups[class_idx]
+
+        # If this class is not present in either the target (no support) or the pred (no true or false positives), then
+        # use the not_present_score for this class.
+        if sup + tp + fp == 0:
+            scores[class_idx - min_class_idx] = not_present_score
+            continue
+
         denom = tp + fp + fn
         score = tp.to(torch.float) / denom
         scores[class_idx - min_class_idx] = score

From 6b5cc245af3107234a83c9253a0170f00a38c6f7 Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Wed, 2 Sep 2020 12:50:42 -0700
Subject: [PATCH 04/16] Test IoU against sklearn jaccard_score

Also add TODO to test our IoU's not_present_score against sklearn's
jaccard_score's zero_division when it beecomes available.
---
 tests/metrics/functional/test_classification.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/metrics/functional/test_classification.py b/tests/metrics/functional/test_classification.py
index 4987e2f2a8787..1f4eb08ec4067 100644
--- a/tests/metrics/functional/test_classification.py
+++ b/tests/metrics/functional/test_classification.py
@@ -4,6 +4,7 @@
 import torch
 from sklearn.metrics import (
     accuracy_score as sk_accuracy,
+    jaccard_score as sk_jaccard_score,
     precision_score as sk_precision,
     recall_score as sk_recall,
     f1_score as sk_f1_score,
@@ -37,6 +38,7 @@
 
 @pytest.mark.parametrize(['sklearn_metric', 'torch_metric'], [
     pytest.param(sk_accuracy, accuracy, id='accuracy'),
+    pytest.param(partial(sk_jaccard_score, average='micro'), iou, id='iou'),
     pytest.param(partial(sk_precision, average='micro'), precision, id='precision'),
     pytest.param(partial(sk_recall, average='micro'), recall, id='recall'),
     pytest.param(partial(sk_f1_score, average='micro'), f1_score, id='f1_score'),
@@ -382,6 +384,9 @@ def test_iou(half_ones, reduction, remove_bg, expected):
     assert torch.allclose(iou_val, expected, atol=1e-9)
 
 
+# TODO: When the jaccard_score of the sklearn version we use accepts `zero_division` (see
+#       https://github.com/scikit-learn/scikit-learn/pull/17866), consider adding a test here against our
+#       `not_present_score`.
 @pytest.mark.parametrize(['pred', 'target', 'not_present_score', 'num_classes', 'remove_bg', 'expected'], [
     # Note that -1 is used as the not_present_score in almost all tests here to distinguish it from the range of valid
     # scores the function can return ([0., 1.] range, inclusive).

From 0b81cb210069296d179845502c8f5f0fa0e4deaf Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Thu, 3 Sep 2020 12:00:34 -0700
Subject: [PATCH 05/16] IoU: remove_bg -> ignore_index

Fixes #2736

- Rename IoU metric argument from `remove_bg` -> `ignore_index`.
- Accept an optional int class index to ignore, instead of a bool and
  instead of always assuming the background class has index 0.
- If given, ignore the class index when computing the IoU output,
  regardless of reduction method.
---
 CHANGELOG.md                                  |  2 +
 pytorch_lightning/metrics/classification.py   | 14 ++--
 .../metrics/functional/classification.py      | 33 ++++----
 .../metrics/functional/test_classification.py | 78 ++++++++++++-------
 tests/metrics/test_classification.py          |  6 +-
 5 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1be24a0e0359d..c852af7b78216 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,6 +32,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Changed IoU score behavior for classes not present in target or pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
 
+- Changed IoU `remove_bg` bool to `ignore_index` optional int ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
+
 ### Deprecated
 
 
diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
index 069d698ad6759..bd020561d6c83 100644
--- a/pytorch_lightning/metrics/classification.py
+++ b/pytorch_lightning/metrics/classification.py
@@ -806,20 +806,20 @@ class IoU(TensorMetric):
 
     def __init__(
             self,
+            ignore_index: Optional[int] = None,
             not_present_score: float = 1.0,
             num_classes: Optional[int] = None,
-            remove_bg: bool = False,
             reduction: str = "elementwise_mean",
     ):
         """
         Args:
+            ignore_index: optional int specifying a target class to ignore. If given, this class index does not
+                contribute to the returned score, regardless of reduction method. Has no effect if given an int that is
+                not in the range [0, num_classes-1], where num_classes is either given or derived from pred and target.
+                By default, no index is ignored, and all classes are used.
             not_present_score: score to use for a class, if no instance of that class was present in either pred or
                 target
             num_classes: Optionally specify the number of classes
-            remove_bg: Flag to state whether a background class has been included
-                within input parameters. If true, will remove background class. If
-                false, return IoU over all classes.
-                Assumes that background is '0' class in input tensor
             reduction: a method to reduce metric score over labels (default: takes the mean)
                 Available reduction methods:
 
@@ -828,9 +828,9 @@ def __init__(
                 - sum: add elements
         """
         super().__init__(name="iou")
+        self.ignore_index = ignore_index
         self.not_present_score = not_present_score
         self.num_classes = num_classes
-        self.remove_bg = remove_bg
         self.reduction = reduction
 
     def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor, sample_weight: Optional[torch.Tensor] = None):
@@ -840,8 +840,8 @@ def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor, sample_weight: Opt
         return iou(
             pred=y_pred,
             target=y_true,
+            ignore_index=self.ignore_index,
             not_present_score=self.not_present_score,
             num_classes=self.num_classes,
-            remove_bg=self.remove_bg,
             reduction=self.reduction,
         )
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 3e2b96b4cda02..e37ba86b580e2 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -963,9 +963,9 @@ def dice_score(
 def iou(
         pred: torch.Tensor,
         target: torch.Tensor,
+        ignore_index: Optional[int] = None,
         not_present_score: float = 1.0,
         num_classes: Optional[int] = None,
-        remove_bg: bool = False,
         reduction: str = 'elementwise_mean',
 ) -> torch.Tensor:
     """
@@ -974,12 +974,12 @@ def iou(
     Args:
         pred: Tensor containing predictions
         target: Tensor containing targets
+        ignore_index: optional int specifying a target class to ignore. If given, this class index does not contribute
+            to the returned score, regardless of reduction method. Has no effect if given an int that is not in the
+            range [0, num_classes-1], where num_classes is either given or derived from pred and target. By default, no
+            index is ignored, and all classes are used.
         not_present_score: score to use for a class, if no instance of that class was present in either pred or target
         num_classes: Optionally specify the number of classes
-        remove_bg: Flag to state whether a background class has been included
-            within input parameters. If true, will remove background class. If
-            false, return IoU over all classes
-            Assumes that background is '0' class in input tensor
         reduction: a method to reduce metric score over labels (default: takes the mean)
             Available reduction methods:
 
@@ -1002,15 +1002,15 @@ def iou(
     """
     num_classes = get_num_classes(pred=pred, target=target, num_classes=num_classes)
 
-    # Determine minimum class index we will be evaluating. If using the background, then this is 0; otherwise, if
-    # removing background, use 1.
-    min_class_idx = 1 if remove_bg else 0
-
     tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred, target, num_classes)
 
-    scores = torch.zeros(num_classes - min_class_idx, device=pred.device, dtype=torch.float32)
+    scores = torch.zeros(num_classes, device=pred.device, dtype=torch.float32)
+
+    for class_idx in range(num_classes):
+        # Skip this class if its index is being ignored.
+        if class_idx == ignore_index:
+            continue
 
-    for class_idx in range(min_class_idx, num_classes):
         tp = tps[class_idx]
         fp = fps[class_idx]
         fn = fns[class_idx]
@@ -1019,11 +1019,18 @@ def iou(
         # If this class is not present in either the target (no support) or the pred (no true or false positives), then
         # use the not_present_score for this class.
         if sup + tp + fp == 0:
-            scores[class_idx - min_class_idx] = not_present_score
+            scores[class_idx] = not_present_score
             continue
 
         denom = tp + fp + fn
         score = tp.to(torch.float) / denom
-        scores[class_idx - min_class_idx] = score
+        scores[class_idx] = score
+
+    # Remove the ignored class index from the scores.
+    if ignore_index is not None and ignore_index >= 0 and ignore_index < num_classes:
+        scores = torch.cat([
+            scores[:ignore_index],
+            scores[ignore_index + 1:],
+        ])
 
     return reduce(scores, reduction=reduction)
diff --git a/tests/metrics/functional/test_classification.py b/tests/metrics/functional/test_classification.py
index 1f4eb08ec4067..87037bd25e319 100644
--- a/tests/metrics/functional/test_classification.py
+++ b/tests/metrics/functional/test_classification.py
@@ -362,15 +362,15 @@ def test_dice_score(pred, target, expected):
     assert score == expected
 
 
-@pytest.mark.parametrize(['half_ones', 'reduction', 'remove_bg', 'expected'], [
-    pytest.param(False, 'none', False, torch.Tensor([1, 1, 1])),
-    pytest.param(False, 'elementwise_mean', False, torch.Tensor([1])),
-    pytest.param(False, 'none', True, torch.Tensor([1, 1])),
-    pytest.param(True, 'none', False, torch.Tensor([0.5, 0.5, 0.5])),
-    pytest.param(True, 'elementwise_mean', False, torch.Tensor([0.5])),
-    pytest.param(True, 'none', True, torch.Tensor([0.5, 0.5])),
+@pytest.mark.parametrize(['half_ones', 'reduction', 'ignore_index', 'expected'], [
+    pytest.param(False, 'none', None, torch.Tensor([1, 1, 1])),
+    pytest.param(False, 'elementwise_mean', None, torch.Tensor([1])),
+    pytest.param(False, 'none', 0, torch.Tensor([1, 1])),
+    pytest.param(True, 'none', None, torch.Tensor([0.5, 0.5, 0.5])),
+    pytest.param(True, 'elementwise_mean', None, torch.Tensor([0.5])),
+    pytest.param(True, 'none', 0, torch.Tensor([0.5, 0.5])),
 ])
-def test_iou(half_ones, reduction, remove_bg, expected):
+def test_iou(half_ones, reduction, ignore_index, expected):
     pred = (torch.arange(120) % 3).view(-1, 1)
     target = (torch.arange(120) % 3).view(-1, 1)
     if half_ones:
@@ -378,7 +378,7 @@ def test_iou(half_ones, reduction, remove_bg, expected):
     iou_val = iou(
         pred=pred,
         target=target,
-        remove_bg=remove_bg,
+        ignore_index=ignore_index,
         reduction=reduction,
     )
     assert torch.allclose(iou_val, expected, atol=1e-9)
@@ -387,46 +387,70 @@ def test_iou(half_ones, reduction, remove_bg, expected):
 # TODO: When the jaccard_score of the sklearn version we use accepts `zero_division` (see
 #       https://github.com/scikit-learn/scikit-learn/pull/17866), consider adding a test here against our
 #       `not_present_score`.
-@pytest.mark.parametrize(['pred', 'target', 'not_present_score', 'num_classes', 'remove_bg', 'expected'], [
+@pytest.mark.parametrize(['pred', 'target', 'ignore_index', 'not_present_score', 'num_classes', 'expected'], [
     # Note that -1 is used as the not_present_score in almost all tests here to distinguish it from the range of valid
     # scores the function can return ([0., 1.] range, inclusive).
     # 2 classes, class 0 is correct everywhere, class 1 is not present.
-    pytest.param([0], [0], -1., 2, False, [1., -1.]),
-    pytest.param([0, 0], [0, 0], -1., 2, False, [1., -1.]),
+    pytest.param([0], [0], None, -1., 2, [1., -1.]),
+    pytest.param([0, 0], [0, 0], None, -1., 2, [1., -1.]),
     # not_present_score not applied if only class 0 is present and it's the only class.
-    pytest.param([0], [0], -1., 1, False, [1.]),
+    pytest.param([0], [0], None, -1., 1, [1.]),
     # 2 classes, class 1 is correct everywhere, class 0 is not present.
-    pytest.param([1], [1], -1., 2, False, [-1., 1.]),
-    pytest.param([1, 1], [1, 1], -1., 2, False, [-1., 1.]),
-    # When background removed, class 0 does not get a score (not even the not_present_score).
-    pytest.param([1], [1], -1., 2, True, [1.0]),
+    pytest.param([1], [1], None, -1., 2, [-1., 1.]),
+    pytest.param([1, 1], [1, 1], None, -1., 2, [-1., 1.]),
+    # When 0 index ignored, class 0 does not get a score (not even the not_present_score).
+    pytest.param([1], [1], 0, -1., 2, [1.0]),
     # 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get not_present_score.
-    pytest.param([0, 2], [0, 2], -1., 3, False, [1., -1., 1.]),
-    pytest.param([2, 0], [2, 0], -1., 3, False, [1., -1., 1.]),
+    pytest.param([0, 2], [0, 2], None, -1., 3, [1., -1., 1.]),
+    pytest.param([2, 0], [2, 0], None, -1., 3, [1., -1., 1.]),
     # 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get not_present_score.
-    pytest.param([0, 1], [0, 1], -1., 3, False, [1., 1., -1.]),
-    pytest.param([1, 0], [1, 0], -1., 3, False, [1., 1., -1.]),
+    pytest.param([0, 1], [0, 1], None, -1., 3, [1., 1., -1.]),
+    pytest.param([1, 0], [1, 0], None, -1., 3, [1., 1., -1.]),
     # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get not_present_score), class
     # 2 is not present.
-    pytest.param([0, 1], [0, 0], -1., 3, False, [0.5, 0., -1.]),
+    pytest.param([0, 1], [0, 0], None, -1., 3, [0.5, 0., -1.]),
     # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get not_present_score), class
     # 2 is not present.
-    pytest.param([0, 0], [0, 1], -1., 3, False, [0.5, 0., -1.]),
+    pytest.param([0, 0], [0, 1], None, -1., 3, [0.5, 0., -1.]),
     # Sanity checks with not_present_score of 1.0.
-    pytest.param([0, 2], [0, 2], 1.0, 3, False, [1., 1., 1.]),
-    pytest.param([0, 2], [0, 2], 1.0, 3, True, [1., 1.]),
+    pytest.param([0, 2], [0, 2], None, 1.0, 3, [1., 1., 1.]),
+    pytest.param([0, 2], [0, 2], 0, 1.0, 3, [1., 1.]),
 ])
-def test_iou_not_present_score(pred, target, not_present_score, num_classes, remove_bg, expected):
+def test_iou_not_present_score(pred, target, ignore_index, not_present_score, num_classes, expected):
     iou_val = iou(
         pred=torch.tensor(pred),
         target=torch.tensor(target),
+        ignore_index=ignore_index,
         not_present_score=not_present_score,
         num_classes=num_classes,
-        remove_bg=remove_bg,
         reduction='none',
     )
     assert torch.allclose(iou_val, torch.tensor(expected).to(iou_val))
 
 
+@pytest.mark.parametrize(['pred', 'target', 'ignore_index', 'num_classes', 'reduction', 'expected'], [
+    # Ignoring an index outside of [0, num_classes-1] should have no effect.
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], None, 3, 'none', [1, 1 / 2, 2 / 3]),
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], -1, 3, 'none', [1, 1 / 2, 2 / 3]),
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 255, 3, 'none', [1, 1 / 2, 2 / 3]),
+    # Ignoring a valid index drops only that index from the result.
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'none', [1 / 2, 2 / 3]),
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 1, 3, 'none', [1, 2 / 3]),
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 2, 3, 'none', [1, 1 / 2]),
+    # When reducing to mean or sum, the ignored index does not contribute to the output.
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'elementwise_mean', [7 / 12]),
+    pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'sum', [7 / 6]),
+])
+def test_iou_ignore_index(pred, target, ignore_index, num_classes, reduction, expected):
+    iou_val = iou(
+        pred=torch.tensor(pred),
+        target=torch.tensor(target),
+        ignore_index=ignore_index,
+        num_classes=num_classes,
+        reduction=reduction,
+    )
+    assert torch.allclose(iou_val, torch.tensor(expected).to(iou_val))
+
+
 # example data taken from
 # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/tests/test_ranking.py
diff --git a/tests/metrics/test_classification.py b/tests/metrics/test_classification.py
index 1be023b2af164..0559f360e10d4 100644
--- a/tests/metrics/test_classification.py
+++ b/tests/metrics/test_classification.py
@@ -226,9 +226,9 @@ def test_dice_coefficient(include_background):
     assert isinstance(dice, torch.Tensor)
 
 
-@pytest.mark.parametrize('remove_bg', [True, False])
-def test_iou(remove_bg):
-    iou = IoU(remove_bg=remove_bg)
+@pytest.mark.parametrize('ignore_index', [0, 1, None])
+def test_iou(ignore_index):
+    iou = IoU(ignore_index=ignore_index)
     assert iou.name == 'iou'
 
     score = iou(torch.randint(0, 1, (10, 25, 25)),

From d0b20b95c78334735985bf52d262a189a98fcb03 Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Tue, 8 Sep 2020 10:15:53 -0700
Subject: [PATCH 06/16] Improve documentation for IoU not_present_score

---
 pytorch_lightning/metrics/classification.py   | 17 ++++++++++++++--
 .../metrics/functional/classification.py      | 20 ++++++++++++++++---
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
index bd020561d6c83..607f3e19d6901 100644
--- a/pytorch_lightning/metrics/classification.py
+++ b/pytorch_lightning/metrics/classification.py
@@ -817,8 +817,21 @@ def __init__(
                 contribute to the returned score, regardless of reduction method. Has no effect if given an int that is
                 not in the range [0, num_classes-1], where num_classes is either given or derived from pred and target.
                 By default, no index is ignored, and all classes are used.
-            not_present_score: score to use for a class, if no instance of that class was present in either pred or
-                target
+            not_present_score: score to use for an individual class, if no instances of the class index were present in
+                `y_pred` AND no instances of the class index were present in `y_true`. By default, assign a score of
+                1.0 for this class if not present.
+
+                Ex: if we have the following input:
+
+                - 3 classes
+                - `y_pred` is [0, 0]
+                - `y_true` is [0, 2]
+                - `not_present_score` is 1.0
+
+                Then class 0 would get a score of 1 / 2, and class 2 would get a score of 0 / 1. However, class 1 is not
+                actually present in either `y_pred` or `y_true`, so it falls back to the `not_present_score` (1.0 in
+                this example). These 3 scores are then reduced according to the `reduction` method in the same way as if
+                class 1 were present and received an empirical score.
             num_classes: Optionally specify the number of classes
             reduction: a method to reduce metric score over labels (default: takes the mean)
                 Available reduction methods:
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index e37ba86b580e2..d51d4fa0d7686 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -978,7 +978,21 @@ def iou(
             to the returned score, regardless of reduction method. Has no effect if given an int that is not in the
             range [0, num_classes-1], where num_classes is either given or derived from pred and target. By default, no
             index is ignored, and all classes are used.
-        not_present_score: score to use for a class, if no instance of that class was present in either pred or target
+        not_present_score: score to use for an individual class, if no instances of the class index were present in
+            `pred` AND no instances of the class index were present in `target`. By default, assign a score of 1.0 for
+            this class if not present.
+
+            Ex: if we have the following input:
+
+            - 3 classes
+            - `pred` is [0, 0]
+            - `target` is [0, 2]
+            - `not_present_score` is 1.0
+
+            Then class 0 would get a score of 1 / 2, and class 2 would get a score of 0 / 1. However, class 1 is not
+            actually present in either `pred` or `target`, so it falls back to the `not_present_score` (1.0 in
+            this example). These 3 scores are then reduced according to the `reduction` method in the same way as if
+            class 1 were present and received an empirical score.
         num_classes: Optionally specify the number of classes
         reduction: a method to reduce metric score over labels (default: takes the mean)
             Available reduction methods:
@@ -1016,8 +1030,8 @@ def iou(
         fn = fns[class_idx]
         sup = sups[class_idx]
 
-        # If this class is not present in either the target (no support) or the pred (no true or false positives), then
-        # use the not_present_score for this class.
+        # If this class is not present in the target (no support) AND not present in the pred (no true or false
+        # positives), then use the not_present_score for this class.
         if sup + tp + fp == 0:
             scores[class_idx] = not_present_score
             continue

From 749ee03c4098ef85e89f0e9dea17a9248f7c69d9 Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Tue, 8 Sep 2020 10:21:27 -0700
Subject: [PATCH 07/16] Update default IoU not_present_score to 0.0

---
 pytorch_lightning/metrics/classification.py            | 4 ++--
 pytorch_lightning/metrics/functional/classification.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
index 607f3e19d6901..ee416623740d6 100644
--- a/pytorch_lightning/metrics/classification.py
+++ b/pytorch_lightning/metrics/classification.py
@@ -807,7 +807,7 @@ class IoU(TensorMetric):
     def __init__(
             self,
             ignore_index: Optional[int] = None,
-            not_present_score: float = 1.0,
+            not_present_score: float = 0.0,
             num_classes: Optional[int] = None,
             reduction: str = "elementwise_mean",
     ):
@@ -819,7 +819,7 @@ def __init__(
                 By default, no index is ignored, and all classes are used.
             not_present_score: score to use for an individual class, if no instances of the class index were present in
                 `y_pred` AND no instances of the class index were present in `y_true`. By default, assign a score of
-                1.0 for this class if not present.
+                0.0 for this class if not present.
 
                 Ex: if we have the following input:
 
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index d51d4fa0d7686..4badf3bd3c7de 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -964,7 +964,7 @@ def iou(
         pred: torch.Tensor,
         target: torch.Tensor,
         ignore_index: Optional[int] = None,
-        not_present_score: float = 1.0,
+        not_present_score: float = 0.0,
         num_classes: Optional[int] = None,
         reduction: str = 'elementwise_mean',
 ) -> torch.Tensor:
@@ -979,7 +979,7 @@ def iou(
             range [0, num_classes-1], where num_classes is either given or derived from pred and target. By default, no
             index is ignored, and all classes are used.
         not_present_score: score to use for an individual class, if no instances of the class index were present in
-            `pred` AND no instances of the class index were present in `target`. By default, assign a score of 1.0 for
+            `pred` AND no instances of the class index were present in `target`. By default, assign a score of 0.0 for
             this class if not present.
 
             Ex: if we have the following input:

From 11a2fa418dde14a5ce8d4b2bc4332d1af7215765 Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Tue, 8 Sep 2020 11:59:32 -0700
Subject: [PATCH 08/16] Add note about IoU division by zero

---
 pytorch_lightning/metrics/functional/classification.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 4badf3bd3c7de..85c5cd4e20b00 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -1037,6 +1037,9 @@ class 1 were present and received an empirical score.
             continue
 
         denom = tp + fp + fn
+        # Note that we do not need to worry about division-by-zero here since we know (sup + tp + fp != 0) from above,
+        # which means ((tp+fn) + tp + fp != 0), which means (2tp + fp + fn != 0). Since all vars are non-negative, we
+        # can conclude (tp + fp + fn > 0), meaning the denominator is non-zero for each class.
         score = tp.to(torch.float) / denom
         scores[class_idx] = score
 

From 645f981f46681d0af8d06d6965f94f6d3baeabbd Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Wed, 9 Sep 2020 16:25:35 -0700
Subject: [PATCH 09/16] Rename IoU not_present_score -> absent_score

---
 pytorch_lightning/metrics/classification.py   | 14 ++++----
 .../metrics/functional/classification.py      | 16 +++++-----
 .../metrics/functional/test_classification.py | 32 +++++++++----------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
index ee416623740d6..3f3d8b42e9ba8 100644
--- a/pytorch_lightning/metrics/classification.py
+++ b/pytorch_lightning/metrics/classification.py
@@ -807,7 +807,7 @@ class IoU(TensorMetric):
     def __init__(
             self,
             ignore_index: Optional[int] = None,
-            not_present_score: float = 0.0,
+            absent_score: float = 0.0,
             num_classes: Optional[int] = None,
             reduction: str = "elementwise_mean",
     ):
@@ -817,19 +817,19 @@ def __init__(
                 contribute to the returned score, regardless of reduction method. Has no effect if given an int that is
                 not in the range [0, num_classes-1], where num_classes is either given or derived from pred and target.
                 By default, no index is ignored, and all classes are used.
-            not_present_score: score to use for an individual class, if no instances of the class index were present in
+            absent_score: score to use for an individual class, if no instances of the class index were present in
                 `y_pred` AND no instances of the class index were present in `y_true`. By default, assign a score of
-                0.0 for this class if not present.
+                0.0 for this class if absent.
 
                 Ex: if we have the following input:
 
                 - 3 classes
                 - `y_pred` is [0, 0]
                 - `y_true` is [0, 2]
-                - `not_present_score` is 1.0
+                - `absent_score` is 1.0
 
                 Then class 0 would get a score of 1 / 2, and class 2 would get a score of 0 / 1. However, class 1 is not
-                actually present in either `y_pred` or `y_true`, so it falls back to the `not_present_score` (1.0 in
+                actually present in either `y_pred` or `y_true`, so it falls back to the `absent_score` (1.0 in
                 this example). These 3 scores are then reduced according to the `reduction` method in the same way as if
                 class 1 were present and received an empirical score.
             num_classes: Optionally specify the number of classes
@@ -842,7 +842,7 @@ class 1 were present and received an empirical score.
         """
         super().__init__(name="iou")
         self.ignore_index = ignore_index
-        self.not_present_score = not_present_score
+        self.absent_score = absent_score
         self.num_classes = num_classes
         self.reduction = reduction
 
@@ -854,7 +854,7 @@ def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor, sample_weight: Opt
             pred=y_pred,
             target=y_true,
             ignore_index=self.ignore_index,
-            not_present_score=self.not_present_score,
+            absent_score=self.absent_score,
             num_classes=self.num_classes,
             reduction=self.reduction,
         )
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 85c5cd4e20b00..fab1bc7210aa1 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -964,7 +964,7 @@ def iou(
         pred: torch.Tensor,
         target: torch.Tensor,
         ignore_index: Optional[int] = None,
-        not_present_score: float = 0.0,
+        absent_score: float = 0.0,
         num_classes: Optional[int] = None,
         reduction: str = 'elementwise_mean',
 ) -> torch.Tensor:
@@ -978,19 +978,19 @@ def iou(
             to the returned score, regardless of reduction method. Has no effect if given an int that is not in the
             range [0, num_classes-1], where num_classes is either given or derived from pred and target. By default, no
             index is ignored, and all classes are used.
-        not_present_score: score to use for an individual class, if no instances of the class index were present in
+        absent_score: score to use for an individual class, if no instances of the class index were present in
             `pred` AND no instances of the class index were present in `target`. By default, assign a score of 0.0 for
-            this class if not present.
+            this class if absent.
 
             Ex: if we have the following input:
 
             - 3 classes
             - `pred` is [0, 0]
             - `target` is [0, 2]
-            - `not_present_score` is 1.0
+            - `absent_score` is 1.0
 
             Then class 0 would get a score of 1 / 2, and class 2 would get a score of 0 / 1. However, class 1 is not
-            actually present in either `pred` or `target`, so it falls back to the `not_present_score` (1.0 in
+            actually present in either `pred` or `target`, so it falls back to the `absent_score` (1.0 in
             this example). These 3 scores are then reduced according to the `reduction` method in the same way as if
             class 1 were present and received an empirical score.
         num_classes: Optionally specify the number of classes
@@ -1030,10 +1030,10 @@ class 1 were present and received an empirical score.
         fn = fns[class_idx]
         sup = sups[class_idx]
 
-        # If this class is not present in the target (no support) AND not present in the pred (no true or false
-        # positives), then use the not_present_score for this class.
+        # If this class is absent in the target (no support) AND absent in the pred (no true or false
+        # positives), then use the absent_score for this class.
         if sup + tp + fp == 0:
-            scores[class_idx] = not_present_score
+            scores[class_idx] = absent_score
             continue
 
         denom = tp + fp + fn
diff --git a/tests/metrics/functional/test_classification.py b/tests/metrics/functional/test_classification.py
index 87037bd25e319..0a92a3cea9fc3 100644
--- a/tests/metrics/functional/test_classification.py
+++ b/tests/metrics/functional/test_classification.py
@@ -386,42 +386,42 @@ def test_iou(half_ones, reduction, ignore_index, expected):
 
 # TODO: When the jaccard_score of the sklearn version we use accepts `zero_division` (see
 #       https://github.com/scikit-learn/scikit-learn/pull/17866), consider adding a test here against our
-#       `not_present_score`.
-@pytest.mark.parametrize(['pred', 'target', 'ignore_index', 'not_present_score', 'num_classes', 'expected'], [
-    # Note that -1 is used as the not_present_score in almost all tests here to distinguish it from the range of valid
+#       `absent_score`.
+@pytest.mark.parametrize(['pred', 'target', 'ignore_index', 'absent_score', 'num_classes', 'expected'], [
+    # Note that -1 is used as the absent_score in almost all tests here to distinguish it from the range of valid
     # scores the function can return ([0., 1.] range, inclusive).
-    # 2 classes, class 0 is correct everywhere, class 1 is not present.
+    # 2 classes, class 0 is correct everywhere, class 1 is absent.
     pytest.param([0], [0], None, -1., 2, [1., -1.]),
     pytest.param([0, 0], [0, 0], None, -1., 2, [1., -1.]),
-    # not_present_score not applied if only class 0 is present and it's the only class.
+    # absent_score not applied if only class 0 is present and it's the only class.
     pytest.param([0], [0], None, -1., 1, [1.]),
-    # 2 classes, class 1 is correct everywhere, class 0 is not present.
+    # 2 classes, class 1 is correct everywhere, class 0 is absent.
     pytest.param([1], [1], None, -1., 2, [-1., 1.]),
     pytest.param([1, 1], [1, 1], None, -1., 2, [-1., 1.]),
-    # When 0 index ignored, class 0 does not get a score (not even the not_present_score).
+    # When 0 index ignored, class 0 does not get a score (not even the absent_score).
     pytest.param([1], [1], 0, -1., 2, [1.0]),
-    # 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get not_present_score.
+    # 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get absent_score.
     pytest.param([0, 2], [0, 2], None, -1., 3, [1., -1., 1.]),
     pytest.param([2, 0], [2, 0], None, -1., 3, [1., -1., 1.]),
-    # 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get not_present_score.
+    # 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get absent_score.
     pytest.param([0, 1], [0, 1], None, -1., 3, [1., 1., -1.]),
     pytest.param([1, 0], [1, 0], None, -1., 3, [1., 1., -1.]),
-    # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get not_present_score), class
-    # 2 is not present.
+    # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get absent_score), class
+    # 2 is absent.
     pytest.param([0, 1], [0, 0], None, -1., 3, [0.5, 0., -1.]),
-    # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get not_present_score), class
-    # 2 is not present.
+    # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get absent_score), class
+    # 2 is absent.
     pytest.param([0, 0], [0, 1], None, -1., 3, [0.5, 0., -1.]),
-    # Sanity checks with not_present_score of 1.0.
+    # Sanity checks with absent_score of 1.0.
     pytest.param([0, 2], [0, 2], None, 1.0, 3, [1., 1., 1.]),
     pytest.param([0, 2], [0, 2], 0, 1.0, 3, [1., 1.]),
 ])
-def test_iou_not_present_score(pred, target, ignore_index, not_present_score, num_classes, expected):
+def test_iou_absent_score(pred, target, ignore_index, absent_score, num_classes, expected):
     iou_val = iou(
         pred=torch.tensor(pred),
         target=torch.tensor(target),
         ignore_index=ignore_index,
-        not_present_score=not_present_score,
+        absent_score=absent_score,
         num_classes=num_classes,
         reduction='none',
     )

From c6cdaa03b02d99a88a967a3a4b0d7c6212825719 Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Mon, 14 Sep 2020 09:25:29 -0700
Subject: [PATCH 10/16] Update IoU absent score changelog wording

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c852af7b78216..72e11b052eff5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,7 +30,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Changed `class_reduction` similar to sklearn for classification metrics ([#3322](https://github.com/PyTorchLightning/pytorch-lightning/pull/3322))
 
-- Changed IoU score behavior for classes not present in target or pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
+- Changed IoU score behavior for classes absent in target and pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
 
 - Changed IoU `remove_bg` bool to `ignore_index` optional int ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))
 

From f0cc0fa214d4456c286777952857da3e9b239a34 Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Mon, 14 Sep 2020 09:39:04 -0700
Subject: [PATCH 11/16] Condense IoU absent_score argument docstring

---
 pytorch_lightning/metrics/classification.py     | 17 +++--------------
 .../metrics/functional/classification.py        | 17 +++--------------
 2 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
index 3f3d8b42e9ba8..d639b2b39e67c 100644
--- a/pytorch_lightning/metrics/classification.py
+++ b/pytorch_lightning/metrics/classification.py
@@ -818,20 +818,9 @@ def __init__(
                 not in the range [0, num_classes-1], where num_classes is either given or derived from pred and target.
                 By default, no index is ignored, and all classes are used.
             absent_score: score to use for an individual class, if no instances of the class index were present in
-                `y_pred` AND no instances of the class index were present in `y_true`. By default, assign a score of
-                0.0 for this class if absent.
-
-                Ex: if we have the following input:
-
-                - 3 classes
-                - `y_pred` is [0, 0]
-                - `y_true` is [0, 2]
-                - `absent_score` is 1.0
-
-                Then class 0 would get a score of 1 / 2, and class 2 would get a score of 0 / 1. However, class 1 is not
-                actually present in either `y_pred` or `y_true`, so it falls back to the `absent_score` (1.0 in
-                this example). These 3 scores are then reduced according to the `reduction` method in the same way as if
-                class 1 were present and received an empirical score.
+                `y_pred` AND no instances of the class index were present in `y_true`. For example, if we have 3
+                classes, [0, 0] for `y_pred`, and [0, 2] for `y_true`, then class 1 would be assigned the
+                `absent_score`. Default is 0.0.
             num_classes: Optionally specify the number of classes
             reduction: a method to reduce metric score over labels (default: takes the mean)
                 Available reduction methods:
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index fab1bc7210aa1..44cc5bc7d550c 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -979,20 +979,9 @@ def iou(
             range [0, num_classes-1], where num_classes is either given or derived from pred and target. By default, no
             index is ignored, and all classes are used.
         absent_score: score to use for an individual class, if no instances of the class index were present in
-            `pred` AND no instances of the class index were present in `target`. By default, assign a score of 0.0 for
-            this class if absent.
-
-            Ex: if we have the following input:
-
-            - 3 classes
-            - `pred` is [0, 0]
-            - `target` is [0, 2]
-            - `absent_score` is 1.0
-
-            Then class 0 would get a score of 1 / 2, and class 2 would get a score of 0 / 1. However, class 1 is not
-            actually present in either `pred` or `target`, so it falls back to the `absent_score` (1.0 in
-            this example). These 3 scores are then reduced according to the `reduction` method in the same way as if
-            class 1 were present and received an empirical score.
+            `pred` AND no instances of the class index were present in `target`. For example, if we have 3 classes,
+            [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`. Default is
+            0.0.
         num_classes: Optionally specify the number of classes
         reduction: a method to reduce metric score over labels (default: takes the mean)
             Available reduction methods:

From 8b84b0493b7c31b6a988737d328d27ac45632eac Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Mon, 14 Sep 2020 09:41:17 -0700
Subject: [PATCH 12/16] Remove unnecessary IoU ignore_index comment

---
 pytorch_lightning/metrics/functional/classification.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 44cc5bc7d550c..fd768c31b2463 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -1010,7 +1010,6 @@ def iou(
     scores = torch.zeros(num_classes, device=pred.device, dtype=torch.float32)
 
     for class_idx in range(num_classes):
-        # Skip this class if its index is being ignored.
         if class_idx == ignore_index:
             continue
 

From 4adeca5951a323d4280da79275bb1d0eb3d80ee4 Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 15 Sep 2020 23:58:38 +0530
Subject: [PATCH 13/16] docstrings

---
 pytorch_lightning/metrics/classification.py   | 19 +++++++++----------
 .../metrics/functional/classification.py      | 18 ++++++++----------
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
index d639b2b39e67c..57b3cdbb192ee 100644
--- a/pytorch_lightning/metrics/classification.py
+++ b/pytorch_lightning/metrics/classification.py
@@ -748,11 +748,11 @@ def __init__(
             include_background: whether to also compute dice for the background
             nan_score: score to return, if a NaN occurs during computation (denom zero)
             no_fg_score: score to return, if no foreground pixel was found in target
-            reduction: a method to reduce metric score over labels (default: takes the mean)
-                Available reduction methods:
-                - elementwise_mean: takes the mean
-                - none: pass array
-                - sum: add elements
+            reduction: a method to reduce metric score over labels.
+
+                - ``'elementwise_mean'``: takes the mean (default)
+                - ``'sum'``: takes the sum
+                - ``'none'``: no reduction will be applied
             reduce_group: the process group to reduce metric results from DDP
         """
         super().__init__(
@@ -822,12 +822,11 @@ def __init__(
                 classes, [0, 0] for `y_pred`, and [0, 2] for `y_true`, then class 1 would be assigned the
                 `absent_score`. Default is 0.0.
             num_classes: Optionally specify the number of classes
-            reduction: a method to reduce metric score over labels (default: takes the mean)
-                Available reduction methods:
+            reduction: a method to reduce metric score over labels.
 
-                - elementwise_mean: takes the mean
-                - none: pass array
-                - sum: add elements
+                - ``'elementwise_mean'``: takes the mean (default)
+                - ``'sum'``: takes the sum
+                - ``'none'``: no reduction will be applied
         """
         super().__init__(name="iou")
         self.ignore_index = ignore_index
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index fd768c31b2463..d1ffab1fda72c 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -921,12 +921,11 @@ def dice_score(
         bg: whether to also compute dice for the background
         nan_score: score to return, if a NaN occurs during computation
         no_fg_score: score to return, if no foreground pixel was found in target
-        reduction: a method to reduce metric score over labels (default: takes the mean)
-            Available reduction methods:
+        reduction: a method to reduce metric score over labels.
 
-            - elementwise_mean: takes the mean
-            - none: pass array
-            - sum: add elements
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
 
     Return:
         Tensor containing dice score
@@ -983,12 +982,11 @@ def iou(
             [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`. Default is
             0.0.
         num_classes: Optionally specify the number of classes
-        reduction: a method to reduce metric score over labels (default: takes the mean)
-            Available reduction methods:
+        reduction: a method to reduce metric score over labels.
 
-            - elementwise_mean: takes the mean
-            - none: pass array
-            - sum: add elements
+            - ``'elementwise_mean'``: takes the mean (default)
+            - ``'sum'``: takes the sum
+            - ``'none'``: no reduction will be applied
 
     Return:
         IoU score : Tensor containing single value if reduction is

From 0b8fbc15cbca118ae7921f31bb3794c4c2cbd15a Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 15 Sep 2020 23:59:58 +0530
Subject: [PATCH 14/16] isort

---
 pytorch_lightning/metrics/functional/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index d1ffab1fda72c..3a47e7d6ad356 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -4,7 +4,7 @@
 import torch
 from torch.nn import functional as F
 
-from pytorch_lightning.metrics.functional.reduction import reduce, class_reduce
+from pytorch_lightning.metrics.functional.reduction import class_reduce, reduce
 from pytorch_lightning.utilities import FLOAT16_EPSILON, rank_zero_warn
 
 

From 97f95abc72bd9ac3455dcb52688f4b6be305a17a Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Wed, 16 Sep 2020 00:05:52 +0530
Subject: [PATCH 15/16] flake8

---
 tests/trainer/test_trainer_tricks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/trainer/test_trainer_tricks.py b/tests/trainer/test_trainer_tricks.py
index b1716485ed692..3c2f5b2fed9aa 100755
--- a/tests/trainer/test_trainer_tricks.py
+++ b/tests/trainer/test_trainer_tricks.py
@@ -63,7 +63,6 @@ def test_overfit_batch_limits(tmpdir):
     full_train_samples = len(train_loader)
     num_train_samples = int(0.11 * full_train_samples)
 
-
     # ------------------------------------------------------
     # set VAL and Test loaders
     # ------------------------------------------------------

From 42ef161dbc15e542bb73c04d8f4e0d879b1ad59b Mon Sep 17 00:00:00 2001
From: Abe Botros <abe@openspace.ai>
Date: Tue, 15 Sep 2020 13:25:14 -0700
Subject: [PATCH 16/16] Fix test of IoU against sklearn jaccard

Use macro instead of micro averaging in sklearn's jaccard score, to
match multi-class IoU, which conventionally takes per-class scores
before averaging.
---
 tests/metrics/functional/test_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/metrics/functional/test_classification.py b/tests/metrics/functional/test_classification.py
index 0a92a3cea9fc3..7466c5c4fe48e 100644
--- a/tests/metrics/functional/test_classification.py
+++ b/tests/metrics/functional/test_classification.py
@@ -38,7 +38,7 @@
 
 @pytest.mark.parametrize(['sklearn_metric', 'torch_metric'], [
     pytest.param(sk_accuracy, accuracy, id='accuracy'),
-    pytest.param(partial(sk_jaccard_score, average='micro'), iou, id='iou'),
+    pytest.param(partial(sk_jaccard_score, average='macro'), iou, id='iou'),
     pytest.param(partial(sk_precision, average='micro'), precision, id='precision'),
     pytest.param(partial(sk_recall, average='micro'), recall, id='recall'),
     pytest.param(partial(sk_f1_score, average='micro'), f1_score, id='f1_score'),