open-mmlab · ly015 · Mar 9, 2023 · Mar 8, 2023 · Mar 8, 2023 · Mar 9, 2023
diff --git a/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py b/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=512,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=512,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=384,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=384,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,

diff --git a/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py b/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=14,
         input_size=codec['input_size'],

diff --git a/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py
@@ -71,7 +71,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=16,
         input_size=codec['input_size'],

diff --git a/...d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/...d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=68,
         input_size=codec['input_size'],

diff --git a/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py b/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=98,
         input_size=codec['input_size'],

diff --git a/..._keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py b/..._keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=21,
         input_size=codec['input_size'],

diff --git a/...olebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py b/...olebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=133,
         input_size=codec['input_size'],

diff --git a/...olebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py b/...olebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=133,
         input_size=codec['input_size'],

diff --git a/...olebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py b/...olebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=133,
         input_size=codec['input_size'],

diff --git a/mmpose/models/heads/__init__.py b/mmpose/models/heads/__init__.py
@@ -1,14 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .base_head import BaseHead
+from .coord_cls_heads import RTMCCHead, SimCCHead
 from .heatmap_heads import (AssociativeEmbeddingHead, CIDHead, CPMHead,
-                            HeatmapHead, MSPNHead, RTMHead, SimCCHead,
-                            ViPNASHead)
+                            HeatmapHead, MSPNHead, ViPNASHead)
 from .hybrid_heads import DEKRHead
 from .regression_heads import (DSNTHead, IntegralRegressionHead,
                                RegressionHead, RLEHead)
 
 __all__ = [
     'BaseHead', 'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead',
     'RegressionHead', 'IntegralRegressionHead', 'SimCCHead', 'RLEHead',
-    'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMHead'
+    'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMCCHead'
 ]
diff --git a/mmpose/models/heads/coord_cls_heads/__init__.py b/mmpose/models/heads/coord_cls_heads/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .rtmcc_head import RTMCCHead
+from .simcc_head import SimCCHead
+
+__all__ = ['SimCCHead', 'RTMCCHead']
diff --git a/...se/models/heads/heatmap_heads/rtm_head.py → ...odels/heads/coord_cls_heads/rtmcc_head.py b/...se/models/heads/heatmap_heads/rtm_head.py → ...odels/heads/coord_cls_heads/rtmcc_head.py
@@ -6,19 +6,19 @@
 from torch import Tensor, nn
 
 from mmpose.evaluation.functional import simcc_pck_accuracy
+from mmpose.models.utils.rtmcc_block import RTMCCBlock, ScaleNorm
 from mmpose.models.utils.tta import flip_vectors
 from mmpose.registry import KEYPOINT_CODECS, MODELS
 from mmpose.utils.tensor_utils import to_numpy
 from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType,
                                  OptSampleList)
-from ...utils.rtmpose_block import RTMBlock, ScaleNorm
 from ..base_head import BaseHead
 
 OptIntSeq = Optional[Sequence[int]]
 
 
 @MODELS.register_module()
-class RTMHead(BaseHead):
+class RTMCCHead(BaseHead):
     """Top-down head introduced in RTMPose (2023). The head is composed of a
     large-kernel convolutional layer, a fully-connected layer and a Gated
     Attention Unit to generate 1d representation from low-resolution feature
@@ -136,7 +136,7 @@ def __init__(
         W = int(self.input_size[0] * self.simcc_split_ratio)
         H = int(self.input_size[1] * self.simcc_split_ratio)
 
-        self.gau = RTMBlock(
+        self.gau = RTMCCBlock(
             self.out_channels,
             gau_cfg['hidden_dims'],
             gau_cfg['hidden_dims'],

diff --git a/.../models/heads/heatmap_heads/simcc_head.py → ...odels/heads/coord_cls_heads/simcc_head.py b/.../models/heads/heatmap_heads/simcc_head.py → ...odels/heads/coord_cls_heads/simcc_head.py
diff --git a/mmpose/models/heads/heatmap_heads/__init__.py b/mmpose/models/heads/heatmap_heads/__init__.py
@@ -4,11 +4,9 @@
 from .cpm_head import CPMHead
 from .heatmap_head import HeatmapHead
 from .mspn_head import MSPNHead
-from .rtm_head import RTMHead
-from .simcc_head import SimCCHead
 from .vipnas_head import ViPNASHead
 
 __all__ = [
-    'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead', 'SimCCHead',
-    'AssociativeEmbeddingHead', 'CIDHead', 'RTMHead'
+    'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead',
+    'AssociativeEmbeddingHead', 'CIDHead'
 ]
diff --git a/mmpose/models/utils/__init__.py b/mmpose/models/utils/__init__.py
@@ -1,9 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .ckpt_convert import pvt_convert
-from .rtmpose_block import RTMBlock, rope
+from .rtmcc_block import RTMCCBlock, rope
 from .transformer import PatchEmbed, nchw_to_nlc, nlc_to_nchw
 
 __all__ = [
-    'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'pvt_convert', 'RTMBlock',
+    'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'pvt_convert', 'RTMCCBlock',
     'rope'
 ]
diff --git a/mmpose/models/utils/rtmpose_block.py → mmpose/models/utils/rtmcc_block.py b/mmpose/models/utils/rtmpose_block.py → mmpose/models/utils/rtmcc_block.py
@@ -5,6 +5,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn.bricks import DropPath
+from mmengine.utils import digit_version
+from mmengine.utils.dl_utils import TORCH_VERSION
 
 
 def rope(x, dim):
@@ -107,7 +109,7 @@ def forward(self, x):
         return x / norm.clamp(min=self.eps) * self.g
 
 
-class RTMBlock(nn.Module):
+class RTMCCBlock(nn.Module):
     """Gated Attention Unit (GAU) in RTMBlock.
 
     Args:
@@ -162,7 +164,7 @@ def __init__(self,
                  use_rel_bias=True,
                  pos_enc=False):
 
-        super(RTMBlock, self).__init__()
+        super(RTMCCBlock, self).__init__()
         self.s = s
         self.num_token = num_token
         self.use_rel_bias = use_rel_bias
@@ -197,6 +199,9 @@ def __init__(self,
         nn.init.xavier_uniform_(self.uv.weight)
 
         if act_fn == 'SiLU':
+            assert digit_version(TORCH_VERSION) >= digit_version('1.7.0'), \
+                'SiLU activation requires PyTorch version >= 1.7'
+
             self.act_fn = nn.SiLU(True)
         else:
             self.act_fn = nn.ReLU(True)

diff --git a/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py b/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],

diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,

diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,

diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,

diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,