Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert MAP implementation to pycocotools backend #1832

Merged
merged 33 commits into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
9058fee
refactor
SkafteNicki Jun 8, 2023
48f3788
tests
SkafteNicki Jun 9, 2023
8055b8e
Merge branch 'master' into new_map
Borda Jun 12, 2023
b586a42
working reference
SkafteNicki Jun 13, 2023
b8c99e7
fix dtype casting
SkafteNicki Jun 13, 2023
13fd068
remove old code
SkafteNicki Jun 13, 2023
3c6f01c
improve docs
SkafteNicki Jun 13, 2023
46f1f8a
Merge branch 'master' into new_map
SkafteNicki Jun 13, 2023
6132051
changelog
SkafteNicki Jun 13, 2023
908f658
refactor
SkafteNicki Jun 13, 2023
fd21da0
fix doc formatting
SkafteNicki Jun 13, 2023
5efc562
Merge branch 'master' into new_map
SkafteNicki Jun 13, 2023
8c0a41d
mypy
SkafteNicki Jun 13, 2023
5f993e4
Merge branch 'master' into new_map
SkafteNicki Jun 13, 2023
8c6748d
Merge branch 'master' into new_map
SkafteNicki Jun 14, 2023
5651da0
skip doctest on missing import
SkafteNicki Jun 14, 2023
1e7bb62
remove helper
SkafteNicki Jun 15, 2023
188ff33
Merge branch 'master' into new_map
SkafteNicki Jun 19, 2023
dc875d9
fix tests
SkafteNicki Jun 21, 2023
2dec2db
fix tests
SkafteNicki Jun 21, 2023
19f8c56
Merge branch 'master' into new_map
SkafteNicki Jun 21, 2023
c1d665f
Merge branch 'master' into new_map
SkafteNicki Jun 22, 2023
fb3d0e7
Merge branch 'master' into new_map
SkafteNicki Jun 29, 2023
b6141dc
Merge branch 'master' into new_map
SkafteNicki Jun 29, 2023
b97eeb9
readd old implementation
SkafteNicki Jun 29, 2023
e910a6e
Merge branch 'new_map' of https://github.com/PyTorchLightning/metrics…
SkafteNicki Jun 29, 2023
3d244dc
Merge branch 'master' into new_map
SkafteNicki Jun 30, 2023
f5ba0fd
ignore mypy
SkafteNicki Jul 1, 2023
c1dfb27
Merge branch 'new_map' of https://github.com/PyTorchLightning/metrics…
SkafteNicki Jul 1, 2023
b75056d
Merge branch 'master' into new_map
SkafteNicki Jul 3, 2023
94587ad
Merge branch 'master' into new_map
mergify[bot] Jul 3, 2023
bad9952
Merge branch 'master' into new_map
mergify[bot] Jul 3, 2023
40823c4
Merge branch 'master' into new_map
mergify[bot] Jul 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Changed calculation in `PearsonCorrCoeff` to be more robust in certain cases ([#1729](https://github.com/Lightning-AI/torchmetrics/pull/1729))


- Changed `MeanAveragePrecision` to `pycocotools` backend ([#1832](https://github.com/Lightning-AI/torchmetrics/pull/1832))
SkafteNicki marked this conversation as resolved.
Show resolved Hide resolved


### Deprecated

- Deprecated domain metrics import from package root (
Expand Down
970 changes: 970 additions & 0 deletions src/torchmetrics/detection/_mean_ap.py

Large diffs are not rendered by default.

1,112 changes: 441 additions & 671 deletions src/torchmetrics/detection/mean_ap.py

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion tests/unittests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@

import pytest
import torch
from torch.multiprocessing import Pool, set_start_method
from torch.multiprocessing import Pool, set_sharing_strategy, set_start_method

with contextlib.suppress(RuntimeError):
set_start_method("spawn")
set_sharing_strategy("file_system")

NUM_PROCESSES = 2 # torch.cuda.device_count() if torch.cuda.is_available() else 2
NUM_BATCHES = 2 * NUM_PROCESSES # Need to be divisible with the number of processes
Expand Down
3 changes: 3 additions & 0 deletions tests/unittests/detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
from unittests import _PATH_ROOT

_SAMPLE_DETECTION_SEGMENTATION = os.path.join(_PATH_ROOT, "_data", "detection", "instance_segmentation_inputs.json")
_DETECTION_VAL = os.path.join(_PATH_ROOT, "_data", "detection", "instances_val2014_100.json")
_DETECTION_BBOX = os.path.join(_PATH_ROOT, "_data", "detection", "instances_val2014_fakebbox100_results.json")
_DETECTION_SEGM = os.path.join(_PATH_ROOT, "_data", "detection", "instances_val2014_fakesegm100_results.json")
334 changes: 137 additions & 197 deletions tests/unittests/detection/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,76 +11,157 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import contextlib
import io
from collections import namedtuple
from copy import deepcopy
from functools import partial

import numpy as np
import pytest
import torch
from pycocotools import mask
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from torch import IntTensor, Tensor
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchmetrics.utilities.imports import _TORCHVISION_AVAILABLE, _TORCHVISION_GREATER_EQUAL_0_8
from torchmetrics.utilities.imports import _PYCOCOTOOLS_AVAILABLE, _TORCHVISION_GREATER_EQUAL_0_8

from unittests.detection import _SAMPLE_DETECTION_SEGMENTATION
from unittests.detection import _DETECTION_BBOX, _DETECTION_SEGM, _DETECTION_VAL, _SAMPLE_DETECTION_SEGMENTATION
from unittests.helpers.testers import MetricTester

Input = namedtuple("Input", ["preds", "target"])
_pytest_condition = not (_PYCOCOTOOLS_AVAILABLE and _TORCHVISION_GREATER_EQUAL_0_8)


def _create_inputs_masks() -> Input:
with open(_SAMPLE_DETECTION_SEGMENTATION) as fp:
inputs_json = json.load(fp)

_mask_unsqueeze_bool = lambda m: Tensor(mask.decode(m)).unsqueeze(0).bool()
_masks_stack_bool = lambda ms: Tensor(np.stack([mask.decode(m) for m in ms])).bool()

return Input(
preds=[
[
{
"masks": _mask_unsqueeze_bool(inputs_json["preds"][0]),
"scores": Tensor([0.236]),
"labels": IntTensor([4]),
},
{
"masks": _masks_stack_bool([inputs_json["preds"][1], inputs_json["preds"][2]]),
"scores": Tensor([0.318, 0.726]),
"labels": IntTensor([3, 2]),
}, # 73
],
[
{
"masks": _mask_unsqueeze_bool(inputs_json["preds"][0]),
"scores": Tensor([0.236]),
"labels": IntTensor([4]),
},
{
"masks": _masks_stack_bool([inputs_json["preds"][1], inputs_json["preds"][2]]),
"scores": Tensor([0.318, 0.726]),
"labels": IntTensor([3, 2]),
}, # 73
],
],
target=[
[
{"masks": _mask_unsqueeze_bool(inputs_json["targets"][0]), "labels": IntTensor([4])}, # 42
{
"masks": _masks_stack_bool([inputs_json["targets"][1], inputs_json["targets"][2]]),
"labels": IntTensor([2, 2]),
}, # 73
],
[
{"masks": _mask_unsqueeze_bool(inputs_json["targets"][0]), "labels": IntTensor([4])}, # 42
{
"masks": _masks_stack_bool([inputs_json["targets"][1], inputs_json["targets"][2]]),
"labels": IntTensor([2, 2]),
}, # 73
],
],
def _generate_coco_inputs(iou_type):
"""Generates inputs for the MAP metric.

The inputs are generated from the official COCO results json files:
https://github.com/cocodataset/cocoapi/tree/master/results
and should therefore correspond directly to the result on the webpage
"""
batched_preds, batched_target = MeanAveragePrecision.coco_to_tm(
_DETECTION_BBOX if iou_type == "bbox" else _DETECTION_SEGM, _DETECTION_VAL, iou_type
)

# create 10 batches of 10 preds/targets each
batched_preds = [batched_preds[10 * i : 10 * (i + 1)] for i in range(10)]
batched_target = [batched_target[10 * i : 10 * (i + 1)] for i in range(10)]
return batched_preds, batched_target


_coco_bbox_input = _generate_coco_inputs("bbox")
_coco_segm_input = _generate_coco_inputs("segm")


def _compare_again_coco_fn(preds, target, iou_type, iou_thresholds=None, rec_thresholds=None, class_metrics=True):
"""Taken from https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb."""
with contextlib.redirect_stdout(io.StringIO()):
gt = COCO(_DETECTION_VAL)
dt = gt.loadRes(_DETECTION_BBOX) if iou_type == "bbox" else gt.loadRes(_DETECTION_SEGM)

coco_eval = COCOeval(gt, dt, iou_type)
if iou_thresholds is not None:
coco_eval.params.iouThrs = np.array(iou_thresholds, dtype=np.float64)
if rec_thresholds is not None:
coco_eval.params.recThrs = np.array(rec_thresholds, dtype=np.float64)

coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
global_stats = deepcopy(coco_eval.stats)

map_per_class_values = torch.Tensor([-1])
mar_100_per_class_values = torch.Tensor([-1])
classes = torch.tensor(
list(set(torch.arange(91).tolist()) - {0, 12, 19, 26, 29, 30, 45, 66, 68, 69, 71, 76, 83, 87, 89})
)

if class_metrics:
map_per_class_list = []
mar_100_per_class_list = []
for class_id in classes.tolist():
coco_eval.params.catIds = [class_id]
with contextlib.redirect_stdout(io.StringIO()):
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
class_stats = coco_eval.stats
map_per_class_list.append(torch.Tensor([class_stats[0]]))
mar_100_per_class_list.append(torch.Tensor([class_stats[8]]))

map_per_class_values = torch.Tensor(map_per_class_list)
mar_100_per_class_values = torch.Tensor(mar_100_per_class_list)

return {
"map": Tensor([global_stats[0]]),
"map_50": Tensor([global_stats[1]]),
"map_75": Tensor([global_stats[2]]),
"map_small": Tensor([global_stats[3]]),
"map_medium": Tensor([global_stats[4]]),
"map_large": Tensor([global_stats[5]]),
"mar_1": Tensor([global_stats[6]]),
"mar_10": Tensor([global_stats[7]]),
"mar_100": Tensor([global_stats[8]]),
"mar_small": Tensor([global_stats[9]]),
"mar_medium": Tensor([global_stats[10]]),
"mar_large": Tensor([global_stats[11]]),
"map_per_class": map_per_class_values,
"mar_100_per_class": mar_100_per_class_values,
"classes": classes,
}


@pytest.mark.skipif(_pytest_condition, reason="test requires that torchvision=>0.8.0 and pycocotools is installed")
@pytest.mark.parametrize("iou_type", ["bbox", "segm"])
@pytest.mark.parametrize("ddp", [False, True])
class TestMAPUsingCOCOReference(MetricTester):
"""Test map metric on the reference coco data."""

@pytest.mark.parametrize("iou_thresholds", [None, [0.25, 0.5, 0.75]])
@pytest.mark.parametrize("rec_thresholds", [None, [0.25, 0.5, 0.75]])
def test_map(self, iou_type, iou_thresholds, rec_thresholds, ddp):
"""Test modular implementation for correctness."""
preds, target = _coco_bbox_input if iou_type == "bbox" else _coco_segm_input
self.run_class_metric_test(
ddp=ddp,
preds=preds,
target=target,
metric_class=MeanAveragePrecision,
reference_metric=partial(
_compare_again_coco_fn,
iou_type=iou_type,
iou_thresholds=iou_thresholds,
rec_thresholds=rec_thresholds,
class_metrics=False,
),
metric_args={
"iou_type": iou_type,
"iou_thresholds": iou_thresholds,
"rec_thresholds": rec_thresholds,
"class_metrics": False,
},
check_batch=False,
atol=1e-2,
)

def test_map_classwise(self, iou_type, ddp):
"""Test modular implementation for correctness with classwise=True. Needs bigger atol to be stable."""
preds, target = _coco_bbox_input if iou_type == "bbox" else _coco_segm_input
self.run_class_metric_test(
ddp=ddp,
preds=preds,
target=target,
metric_class=MeanAveragePrecision,
reference_metric=partial(_compare_again_coco_fn, iou_type=iou_type, class_metrics=True),
metric_args={"iou_type": iou_type, "class_metrics": True},
check_batch=False,
atol=1e-1,
)


Input = namedtuple("Input", ["preds", "target"])


_inputs = Input(
preds=[
Expand Down Expand Up @@ -244,154 +325,13 @@ def _create_inputs_masks() -> Input:
{
"boxes": Tensor([[1.0, 2.0, 3.0, 4.0]]),
"scores": Tensor([0.8]), # target does not have scores
"labels": Tensor([1]),
"labels": IntTensor([1]),
},
],
],
)


def _compare_fn(preds, target) -> dict:
"""Comparison function for map implementation.

Official pycocotools results calculated from a subset of https://github.com/cocodataset/cocoapi/tree/master/results
All classes
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.637
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.859
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.761
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.622
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.800
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.635
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.432
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.652
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.652
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.673
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.800
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.633

Class 0
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.725
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.780

Class 1
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.800
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.800

Class 2
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.454
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.450

Class 3
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = -1.000

Class 4
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.650
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.650

Class 49
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.556
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.580
"""
return {
"map": Tensor([0.637]),
"map_50": Tensor([0.859]),
"map_75": Tensor([0.761]),
"map_small": Tensor([0.622]),
"map_medium": Tensor([0.800]),
"map_large": Tensor([0.635]),
"mar_1": Tensor([0.432]),
"mar_10": Tensor([0.652]),
"mar_100": Tensor([0.652]),
"mar_small": Tensor([0.673]),
"mar_medium": Tensor([0.800]),
"mar_large": Tensor([0.633]),
"map_per_class": Tensor([0.725, 0.800, 0.454, -1.000, 0.650, 0.556]),
"mar_100_per_class": Tensor([0.780, 0.800, 0.450, -1.000, 0.650, 0.580]),
"classes": Tensor([0, 1, 2, 3, 4, 49]),
}


def _compare_fn_segm(preds, target) -> dict:
"""Comparison function for map implementation for instance segmentation.

Official pycocotools results calculated from a subset of https://github.com/cocodataset/cocoapi/tree/master/results
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.352
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.752
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.252
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.352
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.350
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.350
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.350
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.350
"""
return {
"map": Tensor([0.352]),
"map_50": Tensor([0.752]),
"map_75": Tensor([0.252]),
"map_small": Tensor([-1]),
"map_medium": Tensor([-1]),
"map_large": Tensor([0.352]),
"mar_1": Tensor([0.35]),
"mar_10": Tensor([0.35]),
"mar_100": Tensor([0.35]),
"mar_small": Tensor([-1]),
"mar_medium": Tensor([-1]),
"mar_large": Tensor([0.35]),
"map_per_class": Tensor([0.4039604, -1.0, 0.3]),
"mar_100_per_class": Tensor([0.4, -1.0, 0.3]),
"classes": Tensor([2, 3, 4]),
}


_pytest_condition = not (_TORCHVISION_AVAILABLE and _TORCHVISION_GREATER_EQUAL_0_8)


@pytest.mark.skipif(_pytest_condition, reason="test requires that torchvision=>0.8.0 is installed")
@pytest.mark.parametrize("compute_on_cpu", [True])
class TestMAP(MetricTester):
"""Test the MAP metric for object detection predictions.

Results are compared to original values from the pycocotools implementation. A subset of the first 10 fake
predictions of the official repo is used:
https://github.com/cocodataset/cocoapi/blob/master/results/instances_val2014_fakebbox100_results.json
"""

atol = 1e-2

@pytest.mark.parametrize("ddp", [False, True])
def test_map_bbox(self, compute_on_cpu, ddp):
"""Test modular implementation for correctness."""
self.run_class_metric_test(
ddp=ddp,
preds=_inputs.preds,
target=_inputs.target,
metric_class=MeanAveragePrecision,
reference_metric=_compare_fn,
check_batch=False,
metric_args={"class_metrics": True, "compute_on_cpu": compute_on_cpu},
)

@pytest.mark.parametrize("ddp", [False, True])
def test_map_segm(self, compute_on_cpu, ddp):
"""Test modular implementation for correctness."""
_inputs_masks = _create_inputs_masks()
self.run_class_metric_test(
ddp=ddp,
preds=_inputs_masks.preds,
target=_inputs_masks.target,
metric_class=MeanAveragePrecision,
reference_metric=_compare_fn_segm,
check_batch=False,
metric_args={"class_metrics": True, "compute_on_cpu": compute_on_cpu, "iou_type": "segm"},
)


# noinspection PyTypeChecker
@pytest.mark.skipif(_pytest_condition, reason="test requires that torchvision=>0.8.0 is installed")
def test_error_on_wrong_init():
"""Test class raises the expected errors."""
Expand Down
Loading
Loading