From 79c7360467a5ae5a547023966197a360c87696ee Mon Sep 17 00:00:00 2001 From: jaegukhyun Date: Tue, 28 May 2024 14:25:20 +0900 Subject: [PATCH 1/5] Sync rgb order between torch and ov inference of action classification task --- src/otx/core/data/transform_libs/torchvision.py | 5 ++++- .../recipe/action/action_classification/openvino_model.yaml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/otx/core/data/transform_libs/torchvision.py b/src/otx/core/data/transform_libs/torchvision.py index a823b87493d..169ebac5ae8 100644 --- a/src/otx/core/data/transform_libs/torchvision.py +++ b/src/otx/core/data/transform_libs/torchvision.py @@ -236,7 +236,10 @@ def _transform(self, inpt: Video, params: dict) -> tv_tensors.Video: start_index = 0 frame_inds = np.concatenate(frame_inds) + start_index - outputs = torch.stack([torch.tensor(inpt[idx].data) for idx in frame_inds], dim=0) + outputs = torch.stack( + [torch.tensor(cv2.cvtColor(inpt[idx].data, cv2.COLOR_RGB2BGR)) for idx in frame_inds], + dim=0, + ) outputs = outputs.permute(0, 3, 1, 2) outputs = tv_tensors.Video(outputs) inpt.close() diff --git a/src/otx/recipe/action/action_classification/openvino_model.yaml b/src/otx/recipe/action/action_classification/openvino_model.yaml index 95cd0766187..7d9f4b81fc7 100644 --- a/src/otx/recipe/action/action_classification/openvino_model.yaml +++ b/src/otx/recipe/action/action_classification/openvino_model.yaml @@ -18,7 +18,7 @@ overrides: data: task: ACTION_CLASSIFICATION config: - image_color_channel: BGR + image_color_channel: RGB data_format: kinetics train_subset: batch_size: 8 From 3ab625efdda66d8fee282877fc990e93183457cd Mon Sep 17 00:00:00 2001 From: jaegukhyun Date: Tue, 28 May 2024 15:57:40 +0900 Subject: [PATCH 2/5] Fix unit tests --- tests/unit/core/data/transform_libs/test_torchvision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/core/data/transform_libs/test_torchvision.py b/tests/unit/core/data/transform_libs/test_torchvision.py index 72ecdbd3b27..9d9c089a5a5 100644 --- a/tests/unit/core/data/transform_libs/test_torchvision.py +++ b/tests/unit/core/data/transform_libs/test_torchvision.py @@ -38,7 +38,7 @@ class MockFrame: - data = np.ndarray([3, 10, 10]) + data = np.ndarray([10, 10, 3], dtype=np.uint8) class MockVideo: From 695198995d5a064a5b09df58e22893f55d552163 Mon Sep 17 00:00:00 2001 From: jaegukhyun Date: Wed, 29 May 2024 12:35:09 +0900 Subject: [PATCH 3/5] Add error for unsupported color format --- .../data/dataset/action_classification.py | 38 ++++++++++++++++++- src/otx/recipe/_base_/data/mmaction_base.yaml | 2 +- .../action_classification/openvino_model.yaml | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/otx/core/data/dataset/action_classification.py b/src/otx/core/data/dataset/action_classification.py index 6a4f97faa50..ddea3f79a0c 100644 --- a/src/otx/core/data/dataset/action_classification.py +++ b/src/otx/core/data/dataset/action_classification.py @@ -6,7 +6,7 @@ from __future__ import annotations from functools import partial -from typing import Callable +from typing import TYPE_CHECKING, Callable import torch from datumaro import Label @@ -14,11 +14,47 @@ from otx.core.data.dataset.base import OTXDataset from otx.core.data.entity.action_classification import ActionClsBatchDataEntity, ActionClsDataEntity from otx.core.data.entity.base import ImageInfo +from otx.core.data.mem_cache import NULL_MEM_CACHE_HANDLER +from otx.core.types.image import ImageColorChannel + +if TYPE_CHECKING: + from datumaro import DatasetSubset + + from otx.core.data.dataset.base import Transforms + from otx.core.data.mem_cache import MemCacheHandlerBase class OTXActionClsDataset(OTXDataset[ActionClsDataEntity]): """OTXDataset class for action classification task.""" + def __init__( + self, + dm_subset: DatasetSubset, + transforms: Transforms, + mem_cache_handler: MemCacheHandlerBase = NULL_MEM_CACHE_HANDLER, + mem_cache_img_max_size: tuple[int, int] | None = None, + max_refetch: int = 1000, + image_color_channel: ImageColorChannel = ImageColorChannel.BGR, + stack_images: bool = True, + to_tv_image: bool = True, + ) -> None: + super().__init__( + dm_subset, + transforms, + mem_cache_handler, + mem_cache_img_max_size, + max_refetch, + image_color_channel, + stack_images, + to_tv_image, + ) + # TODO(Someone): ImageColorChannel is not used in action classification task + # This task only supports BGR color format. + # There should be implementation that links between ImageColorChannel and action classification task. + if self.image_color_channel != ImageColorChannel.BGR: + msg = "Action classification task only supports BGR color format." + raise ValueError(msg) + def _get_item_impl(self, idx: int) -> ActionClsDataEntity | None: item = self.dm_subset[idx] diff --git a/src/otx/recipe/_base_/data/mmaction_base.yaml b/src/otx/recipe/_base_/data/mmaction_base.yaml index 4666e4ab00d..dc1297bf9c9 100644 --- a/src/otx/recipe/_base_/data/mmaction_base.yaml +++ b/src/otx/recipe/_base_/data/mmaction_base.yaml @@ -5,7 +5,7 @@ config: mem_cache_img_max_size: - 500 - 500 - image_color_channel: RGB + image_color_channel: BGR stack_images: False unannotated_items_ratio: 0.0 train_subset: diff --git a/src/otx/recipe/action/action_classification/openvino_model.yaml b/src/otx/recipe/action/action_classification/openvino_model.yaml index 7d9f4b81fc7..95cd0766187 100644 --- a/src/otx/recipe/action/action_classification/openvino_model.yaml +++ b/src/otx/recipe/action/action_classification/openvino_model.yaml @@ -18,7 +18,7 @@ overrides: data: task: ACTION_CLASSIFICATION config: - image_color_channel: RGB + image_color_channel: BGR data_format: kinetics train_subset: batch_size: 8 From 469743a4fc7ec0f77be3989f2b7386cca865fd75 Mon Sep 17 00:00:00 2001 From: jaegukhyun Date: Wed, 29 May 2024 13:14:46 +0900 Subject: [PATCH 4/5] Modify unit tests --- src/otx/recipe/_base_/data/mmaction_base.yaml | 2 +- tests/unit/core/data/test_factory.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/otx/recipe/_base_/data/mmaction_base.yaml b/src/otx/recipe/_base_/data/mmaction_base.yaml index dc1297bf9c9..480a9e11e49 100644 --- a/src/otx/recipe/_base_/data/mmaction_base.yaml +++ b/src/otx/recipe/_base_/data/mmaction_base.yaml @@ -78,7 +78,7 @@ config: transform_lib_type: MMACTION to_tv_image: False batch_size: 8 - num_workers: 2 + num_workers: 0 transforms: - type: LoadVideoForClassification - type: DecordInit diff --git a/tests/unit/core/data/test_factory.py b/tests/unit/core/data/test_factory.py index 2f80d957464..be0ca43e5c5 100644 --- a/tests/unit/core/data/test_factory.py +++ b/tests/unit/core/data/test_factory.py @@ -25,6 +25,7 @@ from otx.core.data.transform_libs.mmpretrain import MMPretrainTransformLib from otx.core.data.transform_libs.mmseg import MMSegTransformLib from otx.core.data.transform_libs.torchvision import TorchVisionTransformLib +from otx.core.types.image import ImageColorChannel from otx.core.types.task import OTXTaskType from otx.core.types.transformer_libs import TransformLibType @@ -86,6 +87,7 @@ def test_create( cfg_data_module.vpm_config = mocker.MagicMock(spec=VisualPromptingConfig) cfg_data_module.vpm_config.use_bbox = False cfg_data_module.vpm_config.use_point = False + cfg_data_module.image_color_channel = ImageColorChannel.BGR mocker.patch.object(HLabelInfo, "from_dm_label_groups", return_value=fxt_mock_hlabelinfo) assert isinstance( OTXDatasetFactory.create( From 9509d7c0b8f14d10a8611c3aa14545e2c05357e5 Mon Sep 17 00:00:00 2001 From: jaegukhyun Date: Wed, 29 May 2024 15:35:30 +0900 Subject: [PATCH 5/5] Revert unnecessasry changes --- src/otx/recipe/_base_/data/mmaction_base.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/otx/recipe/_base_/data/mmaction_base.yaml b/src/otx/recipe/_base_/data/mmaction_base.yaml index 480a9e11e49..dc1297bf9c9 100644 --- a/src/otx/recipe/_base_/data/mmaction_base.yaml +++ b/src/otx/recipe/_base_/data/mmaction_base.yaml @@ -78,7 +78,7 @@ config: transform_lib_type: MMACTION to_tv_image: False batch_size: 8 - num_workers: 0 + num_workers: 2 transforms: - type: LoadVideoForClassification - type: DecordInit