Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix IoU score for classes not present in target or pred #3098

Merged
merged 17 commits into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Changed `class_reduction` similar to sklearn for classification metrics ([#3322](https://github.com/PyTorchLightning/pytorch-lightning/pull/3322))

- Changed IoU score behavior for classes absent in target and pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))

- Changed IoU `remove_bg` bool to `ignore_index` optional int ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098))

### Deprecated


Expand Down
55 changes: 37 additions & 18 deletions pytorch_lightning/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,11 +748,11 @@ def __init__(
include_background: whether to also compute dice for the background
nan_score: score to return, if a NaN occurs during computation (denom zero)
no_fg_score: score to return, if no foreground pixel was found in target
reduction: a method to reduce metric score over labels (default: takes the mean)
Available reduction methods:
- elementwise_mean: takes the mean
- none: pass array
- sum: add elements
reduction: a method to reduce metric score over labels.

- ``'elementwise_mean'``: takes the mean (default)
- ``'sum'``: takes the sum
- ``'none'``: no reduction will be applied
reduce_group: the process group to reduce metric results from DDP
"""
super().__init__(
Expand Down Expand Up @@ -804,26 +804,45 @@ class IoU(TensorMetric):

"""

def __init__(self, remove_bg: bool = False, reduction: str = "elementwise_mean"):
def __init__(
self,
ignore_index: Optional[int] = None,
absent_score: float = 0.0,
num_classes: Optional[int] = None,
reduction: str = "elementwise_mean",
):
"""
Args:
remove_bg: Flag to state whether a background class has been included
within input parameters. If true, will remove background class. If
false, return IoU over all classes.
Assumes that background is '0' class in input tensor
reduction: a method to reduce metric score over labels (default: takes the mean)
Available reduction methods:

- elementwise_mean: takes the mean
- none: pass array
- sum: add elements
ignore_index: optional int specifying a target class to ignore. If given, this class index does not
contribute to the returned score, regardless of reduction method. Has no effect if given an int that is
not in the range [0, num_classes-1], where num_classes is either given or derived from pred and target.
By default, no index is ignored, and all classes are used.
absent_score: score to use for an individual class, if no instances of the class index were present in
`y_pred` AND no instances of the class index were present in `y_true`. For example, if we have 3
classes, [0, 0] for `y_pred`, and [0, 2] for `y_true`, then class 1 would be assigned the
`absent_score`. Default is 0.0.
num_classes: Optionally specify the number of classes
reduction: a method to reduce metric score over labels.

- ``'elementwise_mean'``: takes the mean (default)
- ``'sum'``: takes the sum
- ``'none'``: no reduction will be applied
"""
super().__init__(name="iou")
self.remove_bg = remove_bg
self.ignore_index = ignore_index
self.absent_score = absent_score
self.num_classes = num_classes
self.reduction = reduction

def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor, sample_weight: Optional[torch.Tensor] = None):
"""
Actual metric calculation.
"""
return iou(y_pred, y_true, remove_bg=self.remove_bg, reduction=self.reduction)
return iou(
pred=y_pred,
target=y_true,
ignore_index=self.ignore_index,
absent_score=self.absent_score,
num_classes=self.num_classes,
reduction=self.reduction,
)
80 changes: 55 additions & 25 deletions pytorch_lightning/metrics/functional/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
from torch.nn import functional as F

from pytorch_lightning.metrics.functional.reduction import reduce, class_reduce
from pytorch_lightning.metrics.functional.reduction import class_reduce, reduce
from pytorch_lightning.utilities import FLOAT16_EPSILON, rank_zero_warn


Expand Down Expand Up @@ -921,12 +921,11 @@ def dice_score(
bg: whether to also compute dice for the background
nan_score: score to return, if a NaN occurs during computation
no_fg_score: score to return, if no foreground pixel was found in target
reduction: a method to reduce metric score over labels (default: takes the mean)
Available reduction methods:
reduction: a method to reduce metric score over labels.

- elementwise_mean: takes the mean
- none: pass array
- sum: add elements
- ``'elementwise_mean'``: takes the mean (default)
- ``'sum'``: takes the sum
- ``'none'``: no reduction will be applied

Return:
Tensor containing dice score
Expand Down Expand Up @@ -963,27 +962,31 @@ def dice_score(
def iou(
pred: torch.Tensor,
target: torch.Tensor,
ignore_index: Optional[int] = None,
absent_score: float = 0.0,
num_classes: Optional[int] = None,
remove_bg: bool = False,
reduction: str = 'elementwise_mean'
reduction: str = 'elementwise_mean',
) -> torch.Tensor:
"""
Intersection over union, or Jaccard index calculation.

Args:
pred: Tensor containing predictions
target: Tensor containing targets
ignore_index: optional int specifying a target class to ignore. If given, this class index does not contribute
to the returned score, regardless of reduction method. Has no effect if given an int that is not in the
range [0, num_classes-1], where num_classes is either given or derived from pred and target. By default, no
index is ignored, and all classes are used.
absent_score: score to use for an individual class, if no instances of the class index were present in
`pred` AND no instances of the class index were present in `target`. For example, if we have 3 classes,
[0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`. Default is
0.0.
num_classes: Optionally specify the number of classes
remove_bg: Flag to state whether a background class has been included
within input parameters. If true, will remove background class. If
false, return IoU over all classes
Assumes that background is '0' class in input tensor
reduction: a method to reduce metric score over labels (default: takes the mean)
Available reduction methods:
reduction: a method to reduce metric score over labels.

- elementwise_mean: takes the mean
- none: pass array
- sum: add elements
- ``'elementwise_mean'``: takes the mean (default)
- ``'sum'``: takes the sum
- ``'none'``: no reduction will be applied

Return:
IoU score : Tensor containing single value if reduction is
Expand All @@ -998,12 +1001,39 @@ def iou(
tensor(0.4914)

"""
num_classes = get_num_classes(pred=pred, target=target, num_classes=num_classes)

tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred, target, num_classes)
if remove_bg:
tps = tps[1:]
fps = fps[1:]
fns = fns[1:]
denom = fps + fns + tps
denom[denom == 0] = torch.tensor(FLOAT16_EPSILON).type_as(denom)
iou = tps / denom
return reduce(iou, reduction=reduction)

scores = torch.zeros(num_classes, device=pred.device, dtype=torch.float32)

for class_idx in range(num_classes):
if class_idx == ignore_index:
continue

tp = tps[class_idx]
fp = fps[class_idx]
fn = fns[class_idx]
sup = sups[class_idx]

# If this class is absent in the target (no support) AND absent in the pred (no true or false
# positives), then use the absent_score for this class.
if sup + tp + fp == 0:
abrahambotros marked this conversation as resolved.
Show resolved Hide resolved
scores[class_idx] = absent_score
continue

denom = tp + fp + fn
# Note that we do not need to worry about division-by-zero here since we know (sup + tp + fp != 0) from above,
# which means ((tp+fn) + tp + fp != 0), which means (2tp + fp + fn != 0). Since all vars are non-negative, we
# can conclude (tp + fp + fn > 0), meaning the denominator is non-zero for each class.
score = tp.to(torch.float) / denom
abrahambotros marked this conversation as resolved.
Show resolved Hide resolved
scores[class_idx] = score

# Remove the ignored class index from the scores.
if ignore_index is not None and ignore_index >= 0 and ignore_index < num_classes:
scores = torch.cat([
scores[:ignore_index],
scores[ignore_index + 1:],
])

return reduce(scores, reduction=reduction)
93 changes: 84 additions & 9 deletions tests/metrics/functional/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch
from sklearn.metrics import (
accuracy_score as sk_accuracy,
jaccard_score as sk_jaccard_score,
precision_score as sk_precision,
recall_score as sk_recall,
f1_score as sk_f1_score,
Expand Down Expand Up @@ -37,6 +38,7 @@

@pytest.mark.parametrize(['sklearn_metric', 'torch_metric'], [
pytest.param(sk_accuracy, accuracy, id='accuracy'),
pytest.param(partial(sk_jaccard_score, average='micro'), iou, id='iou'),
pytest.param(partial(sk_precision, average='micro'), precision, id='precision'),
pytest.param(partial(sk_recall, average='micro'), recall, id='recall'),
pytest.param(partial(sk_f1_score, average='micro'), f1_score, id='f1_score'),
Expand Down Expand Up @@ -360,22 +362,95 @@ def test_dice_score(pred, target, expected):
assert score == expected


@pytest.mark.parametrize(['half_ones', 'reduction', 'remove_bg', 'expected'], [
pytest.param(False, 'none', False, torch.Tensor([1, 1, 1])),
pytest.param(False, 'elementwise_mean', False, torch.Tensor([1])),
pytest.param(False, 'none', True, torch.Tensor([1, 1])),
pytest.param(True, 'none', False, torch.Tensor([0.5, 0.5, 0.5])),
pytest.param(True, 'elementwise_mean', False, torch.Tensor([0.5])),
pytest.param(True, 'none', True, torch.Tensor([0.5, 0.5])),
@pytest.mark.parametrize(['half_ones', 'reduction', 'ignore_index', 'expected'], [
pytest.param(False, 'none', None, torch.Tensor([1, 1, 1])),
pytest.param(False, 'elementwise_mean', None, torch.Tensor([1])),
pytest.param(False, 'none', 0, torch.Tensor([1, 1])),
pytest.param(True, 'none', None, torch.Tensor([0.5, 0.5, 0.5])),
pytest.param(True, 'elementwise_mean', None, torch.Tensor([0.5])),
pytest.param(True, 'none', 0, torch.Tensor([0.5, 0.5])),
])
def test_iou(half_ones, reduction, remove_bg, expected):
def test_iou(half_ones, reduction, ignore_index, expected):
pred = (torch.arange(120) % 3).view(-1, 1)
target = (torch.arange(120) % 3).view(-1, 1)
if half_ones:
pred[:60] = 1
iou_val = iou(pred, target, remove_bg=remove_bg, reduction=reduction)
iou_val = iou(
pred=pred,
target=target,
ignore_index=ignore_index,
reduction=reduction,
)
assert torch.allclose(iou_val, expected, atol=1e-9)


# TODO: When the jaccard_score of the sklearn version we use accepts `zero_division` (see
# https://github.com/scikit-learn/scikit-learn/pull/17866), consider adding a test here against our
# `absent_score`.
@pytest.mark.parametrize(['pred', 'target', 'ignore_index', 'absent_score', 'num_classes', 'expected'], [
# Note that -1 is used as the absent_score in almost all tests here to distinguish it from the range of valid
# scores the function can return ([0., 1.] range, inclusive).
# 2 classes, class 0 is correct everywhere, class 1 is absent.
pytest.param([0], [0], None, -1., 2, [1., -1.]),
pytest.param([0, 0], [0, 0], None, -1., 2, [1., -1.]),
# absent_score not applied if only class 0 is present and it's the only class.
pytest.param([0], [0], None, -1., 1, [1.]),
# 2 classes, class 1 is correct everywhere, class 0 is absent.
pytest.param([1], [1], None, -1., 2, [-1., 1.]),
pytest.param([1, 1], [1, 1], None, -1., 2, [-1., 1.]),
# When 0 index ignored, class 0 does not get a score (not even the absent_score).
pytest.param([1], [1], 0, -1., 2, [1.0]),
# 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get absent_score.
pytest.param([0, 2], [0, 2], None, -1., 3, [1., -1., 1.]),
pytest.param([2, 0], [2, 0], None, -1., 3, [1., -1., 1.]),
# 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get absent_score.
pytest.param([0, 1], [0, 1], None, -1., 3, [1., 1., -1.]),
pytest.param([1, 0], [1, 0], None, -1., 3, [1., 1., -1.]),
# 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get absent_score), class
# 2 is absent.
pytest.param([0, 1], [0, 0], None, -1., 3, [0.5, 0., -1.]),
# 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get absent_score), class
# 2 is absent.
pytest.param([0, 0], [0, 1], None, -1., 3, [0.5, 0., -1.]),
# Sanity checks with absent_score of 1.0.
pytest.param([0, 2], [0, 2], None, 1.0, 3, [1., 1., 1.]),
pytest.param([0, 2], [0, 2], 0, 1.0, 3, [1., 1.]),
])
def test_iou_absent_score(pred, target, ignore_index, absent_score, num_classes, expected):
iou_val = iou(
pred=torch.tensor(pred),
target=torch.tensor(target),
ignore_index=ignore_index,
absent_score=absent_score,
num_classes=num_classes,
reduction='none',
)
assert torch.allclose(iou_val, torch.tensor(expected).to(iou_val))
abrahambotros marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.parametrize(['pred', 'target', 'ignore_index', 'num_classes', 'reduction', 'expected'], [
# Ignoring an index outside of [0, num_classes-1] should have no effect.
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], None, 3, 'none', [1, 1 / 2, 2 / 3]),
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], -1, 3, 'none', [1, 1 / 2, 2 / 3]),
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 255, 3, 'none', [1, 1 / 2, 2 / 3]),
# Ignoring a valid index drops only that index from the result.
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'none', [1 / 2, 2 / 3]),
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 1, 3, 'none', [1, 2 / 3]),
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 2, 3, 'none', [1, 1 / 2]),
# When reducing to mean or sum, the ignored index does not contribute to the output.
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'elementwise_mean', [7 / 12]),
pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'sum', [7 / 6]),
])
def test_iou_ignore_index(pred, target, ignore_index, num_classes, reduction, expected):
iou_val = iou(
pred=torch.tensor(pred),
target=torch.tensor(target),
ignore_index=ignore_index,
num_classes=num_classes,
reduction=reduction,
)
assert torch.allclose(iou_val, torch.tensor(expected).to(iou_val))


# example data taken from
# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/tests/test_ranking.py
6 changes: 3 additions & 3 deletions tests/metrics/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ def test_dice_coefficient(include_background):
assert isinstance(dice, torch.Tensor)


@pytest.mark.parametrize('remove_bg', [True, False])
def test_iou(remove_bg):
iou = IoU(remove_bg=remove_bg)
@pytest.mark.parametrize('ignore_index', [0, 1, None])
def test_iou(ignore_index):
iou = IoU(ignore_index=ignore_index)
assert iou.name == 'iou'

score = iou(torch.randint(0, 1, (10, 25, 25)),
Expand Down
1 change: 0 additions & 1 deletion tests/trainer/test_trainer_tricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def test_overfit_batch_limits(tmpdir):
full_train_samples = len(train_loader)
num_train_samples = int(0.11 * full_train_samples)


# ------------------------------------------------------
# set VAL and Test loaders
# ------------------------------------------------------
Expand Down