diff --git a/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py b/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py index 7d83e7f6e1..ddc981a6d8 100644 --- a/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py +++ b/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=17, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py index 7351c6df32..fabcd90344 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=17, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py index 7198e66968..cc9fb7a52e 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=17, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py index 34cc11e81a..d9c180fe3a 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py index a2e4c28d33..0fd70b7822 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=17, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py index 74c2af6c80..700de32aea 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=17, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py index 687dbcb132..26bd52498a 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py index 2b6226f24d..be1eeea320 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=512, out_channels=17, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py index 2f4a61bcdd..ab87d99148 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=512, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py index 5de555efa9..abf3692647 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=384, out_channels=17, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py index 8ec812876e..634a7cc04d 100644 --- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=384, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py b/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py index 064efc7ab8..4f028fa1f5 100644 --- a/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py +++ b/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=14, input_size=codec['input_size'], diff --git a/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py index 66a68b3fbf..dfaf384037 100644 --- a/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py +++ b/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py @@ -71,7 +71,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=16, input_size=codec['input_size'], diff --git a/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py index 0749962ef6..a19569b6ba 100644 --- a/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py +++ b/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=68, input_size=codec['input_size'], diff --git a/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py b/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py index 7d80c49670..1f13d434dd 100644 --- a/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py +++ b/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=98, input_size=codec['input_size'], diff --git a/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py b/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py index d87cb1e964..62765ca2c7 100644 --- a/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py +++ b/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=21, input_size=codec['input_size'], diff --git a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py index fd457d224e..1ad246a2b8 100644 --- a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py +++ b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=133, input_size=codec['input_size'], diff --git a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py index 0adce5ab91..949cbd9c18 100644 --- a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py +++ b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=133, input_size=codec['input_size'], diff --git a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py index 4894829618..eab0a46299 100644 --- a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py +++ b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=133, input_size=codec['input_size'], diff --git a/mmpose/models/heads/__init__.py b/mmpose/models/heads/__init__.py index 449f159911..8b4d988a5f 100644 --- a/mmpose/models/heads/__init__.py +++ b/mmpose/models/heads/__init__.py @@ -1,8 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. from .base_head import BaseHead +from .coord_cls_heads import RTMCCHead, SimCCHead from .heatmap_heads import (AssociativeEmbeddingHead, CIDHead, CPMHead, - HeatmapHead, MSPNHead, RTMHead, SimCCHead, - ViPNASHead) + HeatmapHead, MSPNHead, ViPNASHead) from .hybrid_heads import DEKRHead from .regression_heads import (DSNTHead, IntegralRegressionHead, RegressionHead, RLEHead) @@ -10,5 +10,5 @@ __all__ = [ 'BaseHead', 'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead', 'RegressionHead', 'IntegralRegressionHead', 'SimCCHead', 'RLEHead', - 'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMHead' + 'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMCCHead' ] diff --git a/mmpose/models/heads/coord_cls_heads/__init__.py b/mmpose/models/heads/coord_cls_heads/__init__.py new file mode 100644 index 0000000000..104ff91308 --- /dev/null +++ b/mmpose/models/heads/coord_cls_heads/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .rtmcc_head import RTMCCHead +from .simcc_head import SimCCHead + +__all__ = ['SimCCHead', 'RTMCCHead'] diff --git a/mmpose/models/heads/heatmap_heads/rtm_head.py b/mmpose/models/heads/coord_cls_heads/rtmcc_head.py similarity index 98% rename from mmpose/models/heads/heatmap_heads/rtm_head.py rename to mmpose/models/heads/coord_cls_heads/rtmcc_head.py index 25acfa958d..8892abffe3 100644 --- a/mmpose/models/heads/heatmap_heads/rtm_head.py +++ b/mmpose/models/heads/coord_cls_heads/rtmcc_head.py @@ -6,19 +6,19 @@ from torch import Tensor, nn from mmpose.evaluation.functional import simcc_pck_accuracy +from mmpose.models.utils.rtmcc_block import RTMCCBlock, ScaleNorm from mmpose.models.utils.tta import flip_vectors from mmpose.registry import KEYPOINT_CODECS, MODELS from mmpose.utils.tensor_utils import to_numpy from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType, OptSampleList) -from ...utils.rtmpose_block import RTMBlock, ScaleNorm from ..base_head import BaseHead OptIntSeq = Optional[Sequence[int]] @MODELS.register_module() -class RTMHead(BaseHead): +class RTMCCHead(BaseHead): """Top-down head introduced in RTMPose (2023). The head is composed of a large-kernel convolutional layer, a fully-connected layer and a Gated Attention Unit to generate 1d representation from low-resolution feature @@ -136,7 +136,7 @@ def __init__( W = int(self.input_size[0] * self.simcc_split_ratio) H = int(self.input_size[1] * self.simcc_split_ratio) - self.gau = RTMBlock( + self.gau = RTMCCBlock( self.out_channels, gau_cfg['hidden_dims'], gau_cfg['hidden_dims'], diff --git a/mmpose/models/heads/heatmap_heads/simcc_head.py b/mmpose/models/heads/coord_cls_heads/simcc_head.py similarity index 100% rename from mmpose/models/heads/heatmap_heads/simcc_head.py rename to mmpose/models/heads/coord_cls_heads/simcc_head.py diff --git a/mmpose/models/heads/heatmap_heads/__init__.py b/mmpose/models/heads/heatmap_heads/__init__.py index 9de4ea47eb..b482216b36 100644 --- a/mmpose/models/heads/heatmap_heads/__init__.py +++ b/mmpose/models/heads/heatmap_heads/__init__.py @@ -4,11 +4,9 @@ from .cpm_head import CPMHead from .heatmap_head import HeatmapHead from .mspn_head import MSPNHead -from .rtm_head import RTMHead -from .simcc_head import SimCCHead from .vipnas_head import ViPNASHead __all__ = [ - 'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead', 'SimCCHead', - 'AssociativeEmbeddingHead', 'CIDHead', 'RTMHead' + 'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead', + 'AssociativeEmbeddingHead', 'CIDHead' ] diff --git a/mmpose/models/utils/__init__.py b/mmpose/models/utils/__init__.py index b7be078b3d..730d43aca0 100644 --- a/mmpose/models/utils/__init__.py +++ b/mmpose/models/utils/__init__.py @@ -1,9 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. from .ckpt_convert import pvt_convert -from .rtmpose_block import RTMBlock, rope +from .rtmcc_block import RTMCCBlock, rope from .transformer import PatchEmbed, nchw_to_nlc, nlc_to_nchw __all__ = [ - 'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'pvt_convert', 'RTMBlock', + 'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'pvt_convert', 'RTMCCBlock', 'rope' ] diff --git a/mmpose/models/utils/rtmpose_block.py b/mmpose/models/utils/rtmcc_block.py similarity index 96% rename from mmpose/models/utils/rtmpose_block.py rename to mmpose/models/utils/rtmcc_block.py index 9f5473f5ec..0e317376b2 100644 --- a/mmpose/models/utils/rtmpose_block.py +++ b/mmpose/models/utils/rtmcc_block.py @@ -5,6 +5,8 @@ import torch.nn as nn import torch.nn.functional as F from mmcv.cnn.bricks import DropPath +from mmengine.utils import digit_version +from mmengine.utils.dl_utils import TORCH_VERSION def rope(x, dim): @@ -107,7 +109,7 @@ def forward(self, x): return x / norm.clamp(min=self.eps) * self.g -class RTMBlock(nn.Module): +class RTMCCBlock(nn.Module): """Gated Attention Unit (GAU) in RTMBlock. Args: @@ -162,7 +164,7 @@ def __init__(self, use_rel_bias=True, pos_enc=False): - super(RTMBlock, self).__init__() + super(RTMCCBlock, self).__init__() self.s = s self.num_token = num_token self.use_rel_bias = use_rel_bias @@ -197,6 +199,9 @@ def __init__(self, nn.init.xavier_uniform_(self.uv.weight) if act_fn == 'SiLU': + assert digit_version(TORCH_VERSION) >= digit_version('1.7.0'), \ + 'SiLU activation requires PyTorch version >= 1.7' + self.act_fn = nn.SiLU(True) else: self.act_fn = nn.ReLU(True) diff --git a/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py b/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py index c5e2d00667..0fa5c5d30c 100644 --- a/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py +++ b/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=17, input_size=codec['input_size'], diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py index cb55db6c46..b44df792a1 100644 --- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py +++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py index 5040d84c2d..2468c40d53 100644 --- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py +++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py index 4776607803..c7e3061c53 100644 --- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py +++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py index 17d5a8b8a6..16a7b0c493 100644 --- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py +++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py index ca22db7c0b..dca589bef9 100644 --- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py +++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=512, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py index 9d12864735..cd16e0a98a 100644 --- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py +++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=384, out_channels=17, input_size=codec['input_size'], @@ -180,7 +180,6 @@ batch_size=256, num_workers=10, persistent_workers=True, - drop_last=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, diff --git a/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py index a559187dff..dba43a7d72 100644 --- a/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py +++ b/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=68, input_size=codec['input_size'], diff --git a/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py b/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py index 7cfefd17a4..63049aa4d1 100644 --- a/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py +++ b/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=21, input_size=codec['input_size'], diff --git a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py index 7cbb93a738..83f1bdce00 100644 --- a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py +++ b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=133, input_size=codec['input_size'], diff --git a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py index 04d5a9d4fd..a060d59a40 100644 --- a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py +++ b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=1024, out_channels=133, input_size=codec['input_size'], diff --git a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py index 835d1c63eb..f1f86f24b7 100644 --- a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py +++ b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py @@ -72,7 +72,7 @@ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa )), head=dict( - type='RTMHead', + type='RTMCCHead', in_channels=768, out_channels=133, input_size=codec['input_size'], diff --git a/tests/test_models/test_heads/test_heatmap_heads/test_rtm_head.py b/tests/test_models/test_heads/test_heatmap_heads/test_rtmcc_head.py similarity index 93% rename from tests/test_models/test_heads/test_heatmap_heads/test_rtm_head.py rename to tests/test_models/test_heads/test_heatmap_heads/test_rtmcc_head.py index b78df38605..b7f833d362 100644 --- a/tests/test_models/test_heads/test_heatmap_heads/test_rtm_head.py +++ b/tests/test_models/test_heads/test_heatmap_heads/test_rtmcc_head.py @@ -6,13 +6,15 @@ import torch import torch.nn as nn from mmengine.structures import InstanceData +from mmengine.utils import digit_version +from mmengine.utils.dl_utils import TORCH_VERSION -from mmpose.models.heads import RTMHead -from mmpose.models.utils import RTMBlock +from mmpose.models.heads import RTMCCHead +from mmpose.models.utils import RTMCCBlock from mmpose.testing import get_packed_inputs -class TestRTMHead(TestCase): +class TestRTMCCHead(TestCase): def _get_feats(self, batch_size: int = 2, @@ -26,8 +28,11 @@ def _get_feats(self, def test_init(self): + if digit_version(TORCH_VERSION) < digit_version('1.7.0'): + return unittest.skip('RTMCCHead requires PyTorch >= 1.7') + # original version - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -53,12 +58,12 @@ def test_init(self): self.assertIsNotNone(head.decoder) self.assertTrue(isinstance(head.final_layer, nn.Conv2d)) self.assertTrue(isinstance(head.mlp, nn.Sequential)) - self.assertTrue(isinstance(head.gau, RTMBlock)) + self.assertTrue(isinstance(head.gau, RTMCCBlock)) self.assertTrue(isinstance(head.cls_x, nn.Linear)) self.assertTrue(isinstance(head.cls_y, nn.Linear)) # w/ 1x1 conv - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -84,12 +89,12 @@ def test_init(self): self.assertIsNotNone(head.decoder) self.assertTrue(isinstance(head.final_layer, nn.Conv2d)) self.assertTrue(isinstance(head.mlp, nn.Sequential)) - self.assertTrue(isinstance(head.gau, RTMBlock)) + self.assertTrue(isinstance(head.gau, RTMCCBlock)) self.assertTrue(isinstance(head.cls_x, nn.Linear)) self.assertTrue(isinstance(head.cls_y, nn.Linear)) # hidden_dims - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -115,12 +120,12 @@ def test_init(self): self.assertIsNotNone(head.decoder) self.assertTrue(isinstance(head.final_layer, nn.Conv2d)) self.assertTrue(isinstance(head.mlp, nn.Sequential)) - self.assertTrue(isinstance(head.gau, RTMBlock)) + self.assertTrue(isinstance(head.gau, RTMCCBlock)) self.assertTrue(isinstance(head.cls_x, nn.Linear)) self.assertTrue(isinstance(head.cls_y, nn.Linear)) # s = 256 - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -146,11 +151,15 @@ def test_init(self): self.assertIsNotNone(head.decoder) self.assertTrue(isinstance(head.final_layer, nn.Conv2d)) self.assertTrue(isinstance(head.mlp, nn.Sequential)) - self.assertTrue(isinstance(head.gau, RTMBlock)) + self.assertTrue(isinstance(head.gau, RTMCCBlock)) self.assertTrue(isinstance(head.cls_x, nn.Linear)) self.assertTrue(isinstance(head.cls_y, nn.Linear)) def test_predict(self): + + if digit_version(TORCH_VERSION) < digit_version('1.7.0'): + return unittest.skip('RTMCCHead requires PyTorch >= 1.7') + decoder_cfg_list = [] # original version decoder_cfg = dict( @@ -193,7 +202,7 @@ def test_predict(self): decoder_cfg_list.append(decoder_cfg) for decoder_cfg in decoder_cfg_list: - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -224,7 +233,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # 1x1 conv - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -249,7 +258,7 @@ def test_predict(self): preds, _ = head.predict(feats, batch_data_samples) # hidden dims - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -280,7 +289,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # s - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -311,7 +320,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # expansion factor - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -342,7 +351,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # drop path - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -373,7 +382,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # act fn - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -404,7 +413,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # use_rel_bias - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -435,7 +444,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # pos_enc - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -466,7 +475,7 @@ def test_predict(self): batch_data_samples[0].gt_instances.keypoints.shape) # output_heatmaps - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -502,6 +511,9 @@ def test_predict(self): self.assertEqual(pred_heatmaps[0].heatmaps.shape, (17, 512, 384)) def test_tta(self): + if digit_version(TORCH_VERSION) < digit_version('1.7.0'): + return unittest.skip('RTMCCHead requires PyTorch >= 1.7') + # flip test decoder_cfg = dict( type='SimCCLabel', @@ -511,7 +523,7 @@ def test_tta(self): simcc_split_ratio=2.0, normalize=False) - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -542,6 +554,9 @@ def test_tta(self): batch_data_samples[0].gt_instances.keypoints.shape) def test_loss(self): + if digit_version(TORCH_VERSION) < digit_version('1.7.0'): + return unittest.skip('RTMCCHead requires PyTorch >= 1.7') + decoder_cfg_list = [] decoder_cfg = dict( type='SimCCLabel', @@ -563,7 +578,7 @@ def test_loss(self): # decoder for decoder_cfg in decoder_cfg_list: - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -597,7 +612,7 @@ def test_loss(self): self.assertIsInstance(losses['acc_pose'], torch.Tensor) # beta = 10 - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -631,7 +646,7 @@ def test_loss(self): self.assertIsInstance(losses['acc_pose'], torch.Tensor) # label softmax - head = RTMHead( + head = RTMCCHead( in_channels=32, out_channels=17, input_size=(192, 256), @@ -665,9 +680,12 @@ def test_loss(self): self.assertIsInstance(losses['acc_pose'], torch.Tensor) def test_errors(self): + if digit_version(TORCH_VERSION) < digit_version('1.7.0'): + return unittest.skip('RTMCCHead requires PyTorch >= 1.7') + # Invalid arguments with self.assertRaisesRegex(ValueError, 'multiple input features'): - _ = RTMHead( + _ = RTMCCHead( in_channels=(16, 32), out_channels=17, input_size=(192, 256),