From eb7bb7c871d4dac3a5765bec35cbed69c6771497 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Wed, 28 Aug 2024 22:47:10 +0900 Subject: [PATCH 1/4] add rtmpose_tiny for single obj --- src/otx/recipe/_base_/data/keypoint_detection.yaml | 2 +- tests/unit/core/data/transform_libs/test_torchvision.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/otx/recipe/_base_/data/keypoint_detection.yaml b/src/otx/recipe/_base_/data/keypoint_detection.yaml index 756bddba960..0f1cc0f54ae 100644 --- a/src/otx/recipe/_base_/data/keypoint_detection.yaml +++ b/src/otx/recipe/_base_/data/keypoint_detection.yaml @@ -44,7 +44,7 @@ val_subset: mean: [123.675, 116.28, 103.53] std: [58.395, 57.12, 57.375] test_subset: - subset_name: test + subset_name: val batch_size: 32 transforms: - class_path: otx.core.data.transform_libs.torchvision.GetBBoxCenterScale diff --git a/tests/unit/core/data/transform_libs/test_torchvision.py b/tests/unit/core/data/transform_libs/test_torchvision.py index 1a1363d6821..6655c535935 100644 --- a/tests/unit/core/data/transform_libs/test_torchvision.py +++ b/tests/unit/core/data/transform_libs/test_torchvision.py @@ -915,6 +915,11 @@ def test_forward(self, keypoint_det_entity) -> None: ) results = transform(deepcopy(keypoint_det_entity)) +<<<<<<< HEAD assert np.array_equal(results.bbox_info.center, np.array([3.5, 3.5])) assert np.array_equal(results.bbox_info.scale, np.array([8.75, 8.75])) +======= + assert torch.all(results.bbox_info.center == torch.Tensor([[3.5, 3.5]])) + assert torch.all(results.bbox_info.scale == torch.Tensor([[8.75, 8.75]])) +>>>>>>> add rtmpose_tiny for single obj assert results.keypoints.shape == (4, 2) From ae6293a562c26ce4821dd289fcd133ffc6a88b4c Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Wed, 28 Aug 2024 23:08:05 +0900 Subject: [PATCH 2/4] modify test subset name --- src/otx/recipe/_base_/data/keypoint_detection.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/otx/recipe/_base_/data/keypoint_detection.yaml b/src/otx/recipe/_base_/data/keypoint_detection.yaml index 0f1cc0f54ae..756bddba960 100644 --- a/src/otx/recipe/_base_/data/keypoint_detection.yaml +++ b/src/otx/recipe/_base_/data/keypoint_detection.yaml @@ -44,7 +44,7 @@ val_subset: mean: [123.675, 116.28, 103.53] std: [58.395, 57.12, 57.375] test_subset: - subset_name: val + subset_name: test batch_size: 32 transforms: - class_path: otx.core.data.transform_libs.torchvision.GetBBoxCenterScale From b24ee2ebb60a7d43c704da4523bda2b2d606e29b Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Wed, 28 Aug 2024 23:38:28 +0900 Subject: [PATCH 3/4] fix unit test --- tests/unit/core/data/transform_libs/test_torchvision.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/unit/core/data/transform_libs/test_torchvision.py b/tests/unit/core/data/transform_libs/test_torchvision.py index 6655c535935..1a1363d6821 100644 --- a/tests/unit/core/data/transform_libs/test_torchvision.py +++ b/tests/unit/core/data/transform_libs/test_torchvision.py @@ -915,11 +915,6 @@ def test_forward(self, keypoint_det_entity) -> None: ) results = transform(deepcopy(keypoint_det_entity)) -<<<<<<< HEAD assert np.array_equal(results.bbox_info.center, np.array([3.5, 3.5])) assert np.array_equal(results.bbox_info.scale, np.array([8.75, 8.75])) -======= - assert torch.all(results.bbox_info.center == torch.Tensor([[3.5, 3.5]])) - assert torch.all(results.bbox_info.scale == torch.Tensor([[8.75, 8.75]])) ->>>>>>> add rtmpose_tiny for single obj assert results.keypoints.shape == (4, 2) From e00276abf0b64a9cd2b262f0f54854240ba03074 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Fri, 30 Aug 2024 23:58:59 +0900 Subject: [PATCH 4/4] property for pck --- src/otx/core/metrics/pck.py | 18 +++- src/otx/core/model/keypoint_detection.py | 5 + .../keypoint_detection/rtmpose_tiny.yaml | 2 +- .../rtmpose_tiny_single_obj.yaml | 93 ++++++++++--------- tests/perf/test_keypoint_detection.py | 90 ++++++++++++++++-- 5 files changed, 153 insertions(+), 55 deletions(-) diff --git a/src/otx/core/metrics/pck.py b/src/otx/core/metrics/pck.py index 61ab3909865..941ed679775 100644 --- a/src/otx/core/metrics/pck.py +++ b/src/otx/core/metrics/pck.py @@ -147,6 +147,22 @@ def __init__( self.label_info: LabelInfo = label_info self.reset() + @property + def input_size(self) -> tuple[int, int]: + """Getter for input_size.""" + return self._input_size + + @input_size.setter + def input_size(self, size: tuple[int, int]) -> None: + """Setter for input_size.""" + if not isinstance(size, tuple) or len(size) != 2: + msg = "input_size must be a tuple of two integers." + raise ValueError(msg) + if not all(isinstance(dim, int) for dim in size): + msg = "input_size dimensions must be integers." + raise ValueError(msg) + self._input_size = size + def reset(self) -> None: """Reset for every validation and test epoch. @@ -177,7 +193,7 @@ def compute(self) -> dict: gt_kpts = np.stack([p[0] for p in self.targets]) kpts_visible = np.stack([p[1] for p in self.targets]) - normalize = np.tile(np.array([[256, 192]]), (pred_kpts.shape[0], 1)) + normalize = np.tile(np.array([self.input_size]), (pred_kpts.shape[0], 1)) _, avg_acc, _ = keypoint_pck_accuracy( pred_kpts, gt_kpts, diff --git a/src/otx/core/model/keypoint_detection.py b/src/otx/core/model/keypoint_detection.py index 406c6c8031e..ea91d00cd67 100644 --- a/src/otx/core/model/keypoint_detection.py +++ b/src/otx/core/model/keypoint_detection.py @@ -104,6 +104,11 @@ def _customize_outputs( bbox_info=[], ) + def configure_metric(self) -> None: + """Configure the metric.""" + super().configure_metric() + self._metric.input_size = self.input_size + def _convert_pred_entity_to_compute_metric( self, preds: KeypointDetBatchPredEntity, diff --git a/src/otx/recipe/keypoint_detection/rtmpose_tiny.yaml b/src/otx/recipe/keypoint_detection/rtmpose_tiny.yaml index 1a25a2d39d4..447d4fd5218 100644 --- a/src/otx/recipe/keypoint_detection/rtmpose_tiny.yaml +++ b/src/otx/recipe/keypoint_detection/rtmpose_tiny.yaml @@ -6,7 +6,7 @@ model: optimizer: class_path: torch.optim.AdamW init_args: - lr: 0.004 + lr: 0.001 weight_decay: 0.0001 scheduler: diff --git a/src/otx/recipe/keypoint_detection/rtmpose_tiny_single_obj.yaml b/src/otx/recipe/keypoint_detection/rtmpose_tiny_single_obj.yaml index 8b22c757330..8045bb5e85c 100644 --- a/src/otx/recipe/keypoint_detection/rtmpose_tiny_single_obj.yaml +++ b/src/otx/recipe/keypoint_detection/rtmpose_tiny_single_obj.yaml @@ -2,6 +2,9 @@ model: class_path: otx.algo.keypoint_detection.rtmpose.RTMPoseTiny init_args: label_info: 17 + input_size: + - 512 + - 512 optimizer: class_path: torch.optim.AdamW @@ -35,47 +38,49 @@ overrides: - data.train_subset.transforms - data.val_subset.transforms - data.test_subset.transforms - input_size: - - 512 - - 512 - train_subset: - transforms: - - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine - init_args: - input_size: $(input_size) - - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - init_args: - is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - val_subset: - transforms: - - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine - init_args: - input_size: $(input_size) - is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - test_subset: - transforms: - - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine - init_args: - input_size: $(input_size) - is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] + data: + input_size: + - 512 + - 512 + train_subset: + transforms: + - class_path: otx.core.data.transform_libs.torchvision.RandomBBoxTransform + - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine + init_args: + input_size: $(input_size) + - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug + init_args: + is_numpy_to_tvtensor: true + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + val_subset: + transforms: + - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine + init_args: + input_size: $(input_size) + is_numpy_to_tvtensor: true + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + test_subset: + transforms: + - class_path: otx.core.data.transform_libs.torchvision.TopdownAffine + init_args: + input_size: $(input_size) + is_numpy_to_tvtensor: true + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] diff --git a/tests/perf/test_keypoint_detection.py b/tests/perf/test_keypoint_detection.py index 3a2f2a299c9..1ff150a03d6 100644 --- a/tests/perf/test_keypoint_detection.py +++ b/tests/perf/test_keypoint_detection.py @@ -5,6 +5,7 @@ from __future__ import annotations from pathlib import Path +from typing import ClassVar import pytest @@ -19,26 +20,97 @@ class TestPerfKeypointDetection(PerfTestBase): Benchmark.Model(task="keypoint_detection", name="rtmpose_tiny", category="speed"), ] - DATASET_TEST_CASES = [ + DATASET_TEST_CASES: ClassVar = [ Benchmark.Dataset( - name=f"coco_person_keypoint_small_{idx}", - path=Path("keypoint_detection/coco_keypoint_small") / f"{idx}", + name="coco_person_keypoint_small", + path=Path("keypoint_detection/coco_keypoint/small"), group="small", num_repeat=5, extra_overrides={}, - ) - for idx in (1, 2, 3) - ] + [ + ), Benchmark.Dataset( name="coco_person_keypoint_medium", - path=Path("keypoint_detection/coco_keypoint_medium"), + path=Path("keypoint_detection/coco_keypoint/medium"), + group="medium", + num_repeat=5, + extra_overrides={}, + ), + Benchmark.Dataset( + name="coco_person_keypoint_large", + path=Path("keypoint_detection/coco_keypoint/large"), + group="large", + num_repeat=5, + extra_overrides={}, + ), + ] + + BENCHMARK_CRITERIA = [ # noqa: RUF012 + Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1), + Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1), + Benchmark.Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), + ] + + @pytest.mark.parametrize( + "fxt_model", + MODEL_TEST_CASES, + ids=lambda model: model.name, + indirect=True, + ) + @pytest.mark.parametrize( + "fxt_dataset", + DATASET_TEST_CASES, + ids=lambda dataset: dataset.name, + indirect=True, + ) + def test_perf( + self, + fxt_model: Benchmark.Model, + fxt_dataset: Benchmark.Dataset, + fxt_benchmark: Benchmark, + ): + self._test_perf( + model=fxt_model, + dataset=fxt_dataset, + benchmark=fxt_benchmark, + criteria=self.BENCHMARK_CRITERIA, + ) + + +class TestPerfKeypointDetectionSingleObj(PerfTestBase): + """Benchmark visual prompting.""" + + MODEL_TEST_CASES = [ # noqa: RUF012 + Benchmark.Model(task="keypoint_detection", name="rtmpose_tiny_single_obj", category="speed"), + ] + + DATASET_TEST_CASES: ClassVar = [ + Benchmark.Dataset( + name="coco_person_keypoint_single_obj_small", + path=Path("keypoint_detection/coco_keypoint_single_obj/small"), + group="small", + num_repeat=5, + extra_overrides={}, + ), + Benchmark.Dataset( + name="coco_person_keypoint_single_obj_medium", + path=Path("keypoint_detection/coco_keypoint_single_obj/medium"), group="medium", num_repeat=5, extra_overrides={}, ), Benchmark.Dataset( - name="mpii_large", - path=Path("keypoint_detection/mpii_large"), + name="coco_person_keypoint_single_obj_large", + path=Path("keypoint_detection/coco_keypoint_single_obj/large"), group="large", num_repeat=5, extra_overrides={},