diff --git a/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py b/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py
index 7d83e7f6e1..ddc981a6d8 100644
--- a/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py
+++ b/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py
index 7351c6df32..fabcd90344 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py
index 7198e66968..cc9fb7a52e 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py
index 34cc11e81a..d9c180fe3a 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py
index a2e4c28d33..0fd70b7822 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py
index 74c2af6c80..700de32aea 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py
index 687dbcb132..26bd52498a 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py
index 2b6226f24d..be1eeea320 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=512,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py
index 2f4a61bcdd..ab87d99148 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=512,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py
index 5de555efa9..abf3692647 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_aic-coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=384,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py
index 8ec812876e..634a7cc04d 100644
--- a/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/coco/rtmpose-tiny_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=384,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py b/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py
index 064efc7ab8..4f028fa1f5 100644
--- a/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py
+++ b/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=14,
         input_size=codec['input_size'],
diff --git a/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py b/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py
index 66a68b3fbf..dfaf384037 100644
--- a/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py
+++ b/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py
@@ -71,7 +71,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=16,
         input_size=codec['input_size'],
diff --git a/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
index 0749962ef6..a19569b6ba 100644
--- a/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
+++ b/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=68,
         input_size=codec['input_size'],
diff --git a/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py b/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py
index 7d80c49670..1f13d434dd 100644
--- a/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py
+++ b/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=98,
         input_size=codec['input_size'],
diff --git a/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py b/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
index d87cb1e964..62765ca2c7 100644
--- a/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
+++ b/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=21,
         input_size=codec['input_size'],
diff --git a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
index fd457d224e..1ad246a2b8 100644
--- a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
+++ b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=133,
         input_size=codec['input_size'],
diff --git a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
index 0adce5ab91..949cbd9c18 100644
--- a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
+++ b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=133,
         input_size=codec['input_size'],
diff --git a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
index 4894829618..eab0a46299 100644
--- a/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
+++ b/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=133,
         input_size=codec['input_size'],
diff --git a/mmpose/models/heads/__init__.py b/mmpose/models/heads/__init__.py
index 449f159911..8b4d988a5f 100644
--- a/mmpose/models/heads/__init__.py
+++ b/mmpose/models/heads/__init__.py
@@ -1,8 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .base_head import BaseHead
+from .coord_cls_heads import RTMCCHead, SimCCHead
 from .heatmap_heads import (AssociativeEmbeddingHead, CIDHead, CPMHead,
-                            HeatmapHead, MSPNHead, RTMHead, SimCCHead,
-                            ViPNASHead)
+                            HeatmapHead, MSPNHead, ViPNASHead)
 from .hybrid_heads import DEKRHead
 from .regression_heads import (DSNTHead, IntegralRegressionHead,
                                RegressionHead, RLEHead)
@@ -10,5 +10,5 @@
 __all__ = [
     'BaseHead', 'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead',
     'RegressionHead', 'IntegralRegressionHead', 'SimCCHead', 'RLEHead',
-    'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMHead'
+    'DSNTHead', 'AssociativeEmbeddingHead', 'DEKRHead', 'CIDHead', 'RTMCCHead'
 ]
diff --git a/mmpose/models/heads/coord_cls_heads/__init__.py b/mmpose/models/heads/coord_cls_heads/__init__.py
new file mode 100644
index 0000000000..104ff91308
--- /dev/null
+++ b/mmpose/models/heads/coord_cls_heads/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .rtmcc_head import RTMCCHead
+from .simcc_head import SimCCHead
+
+__all__ = ['SimCCHead', 'RTMCCHead']
diff --git a/mmpose/models/heads/heatmap_heads/rtm_head.py b/mmpose/models/heads/coord_cls_heads/rtmcc_head.py
similarity index 98%
rename from mmpose/models/heads/heatmap_heads/rtm_head.py
rename to mmpose/models/heads/coord_cls_heads/rtmcc_head.py
index 25acfa958d..8892abffe3 100644
--- a/mmpose/models/heads/heatmap_heads/rtm_head.py
+++ b/mmpose/models/heads/coord_cls_heads/rtmcc_head.py
@@ -6,19 +6,19 @@
 from torch import Tensor, nn
 
 from mmpose.evaluation.functional import simcc_pck_accuracy
+from mmpose.models.utils.rtmcc_block import RTMCCBlock, ScaleNorm
 from mmpose.models.utils.tta import flip_vectors
 from mmpose.registry import KEYPOINT_CODECS, MODELS
 from mmpose.utils.tensor_utils import to_numpy
 from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType,
                                  OptSampleList)
-from ...utils.rtmpose_block import RTMBlock, ScaleNorm
 from ..base_head import BaseHead
 
 OptIntSeq = Optional[Sequence[int]]
 
 
 @MODELS.register_module()
-class RTMHead(BaseHead):
+class RTMCCHead(BaseHead):
     """Top-down head introduced in RTMPose (2023). The head is composed of a
     large-kernel convolutional layer, a fully-connected layer and a Gated
     Attention Unit to generate 1d representation from low-resolution feature
@@ -136,7 +136,7 @@ def __init__(
         W = int(self.input_size[0] * self.simcc_split_ratio)
         H = int(self.input_size[1] * self.simcc_split_ratio)
 
-        self.gau = RTMBlock(
+        self.gau = RTMCCBlock(
             self.out_channels,
             gau_cfg['hidden_dims'],
             gau_cfg['hidden_dims'],
diff --git a/mmpose/models/heads/heatmap_heads/simcc_head.py b/mmpose/models/heads/coord_cls_heads/simcc_head.py
similarity index 100%
rename from mmpose/models/heads/heatmap_heads/simcc_head.py
rename to mmpose/models/heads/coord_cls_heads/simcc_head.py
diff --git a/mmpose/models/heads/heatmap_heads/__init__.py b/mmpose/models/heads/heatmap_heads/__init__.py
index 9de4ea47eb..b482216b36 100644
--- a/mmpose/models/heads/heatmap_heads/__init__.py
+++ b/mmpose/models/heads/heatmap_heads/__init__.py
@@ -4,11 +4,9 @@
 from .cpm_head import CPMHead
 from .heatmap_head import HeatmapHead
 from .mspn_head import MSPNHead
-from .rtm_head import RTMHead
-from .simcc_head import SimCCHead
 from .vipnas_head import ViPNASHead
 
 __all__ = [
-    'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead', 'SimCCHead',
-    'AssociativeEmbeddingHead', 'CIDHead', 'RTMHead'
+    'HeatmapHead', 'CPMHead', 'MSPNHead', 'ViPNASHead',
+    'AssociativeEmbeddingHead', 'CIDHead'
 ]
diff --git a/mmpose/models/utils/__init__.py b/mmpose/models/utils/__init__.py
index b7be078b3d..730d43aca0 100644
--- a/mmpose/models/utils/__init__.py
+++ b/mmpose/models/utils/__init__.py
@@ -1,9 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .ckpt_convert import pvt_convert
-from .rtmpose_block import RTMBlock, rope
+from .rtmcc_block import RTMCCBlock, rope
 from .transformer import PatchEmbed, nchw_to_nlc, nlc_to_nchw
 
 __all__ = [
-    'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'pvt_convert', 'RTMBlock',
+    'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'pvt_convert', 'RTMCCBlock',
     'rope'
 ]
diff --git a/mmpose/models/utils/rtmpose_block.py b/mmpose/models/utils/rtmcc_block.py
similarity index 96%
rename from mmpose/models/utils/rtmpose_block.py
rename to mmpose/models/utils/rtmcc_block.py
index 9f5473f5ec..0e317376b2 100644
--- a/mmpose/models/utils/rtmpose_block.py
+++ b/mmpose/models/utils/rtmcc_block.py
@@ -5,6 +5,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn.bricks import DropPath
+from mmengine.utils import digit_version
+from mmengine.utils.dl_utils import TORCH_VERSION
 
 
 def rope(x, dim):
@@ -107,7 +109,7 @@ def forward(self, x):
         return x / norm.clamp(min=self.eps) * self.g
 
 
-class RTMBlock(nn.Module):
+class RTMCCBlock(nn.Module):
     """Gated Attention Unit (GAU) in RTMBlock.
 
     Args:
@@ -162,7 +164,7 @@ def __init__(self,
                  use_rel_bias=True,
                  pos_enc=False):
 
-        super(RTMBlock, self).__init__()
+        super(RTMCCBlock, self).__init__()
         self.s = s
         self.num_token = num_token
         self.use_rel_bias = use_rel_bias
@@ -197,6 +199,9 @@ def __init__(self,
         nn.init.xavier_uniform_(self.uv.weight)
 
         if act_fn == 'SiLU':
+            assert digit_version(TORCH_VERSION) >= digit_version('1.7.0'), \
+                'SiLU activation requires PyTorch version >= 1.7'
+
             self.act_fn = nn.SiLU(True)
         else:
             self.act_fn = nn.ReLU(True)
diff --git a/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py b/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py
index c5e2d00667..0fa5c5d30c 100644
--- a/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py
+++ b/projects/rtmpose/rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py
index cb55db6c46..b44df792a1 100644
--- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py
+++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py
index 5040d84c2d..2468c40d53 100644
--- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py
+++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py
index 4776607803..c7e3061c53 100644
--- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py
+++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py
index 17d5a8b8a6..16a7b0c493 100644
--- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py
+++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py
index ca22db7c0b..dca589bef9 100644
--- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py
+++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=512,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py
index 9d12864735..cd16e0a98a 100644
--- a/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py
+++ b/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-tiny_8xb256-420e_coco-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=384,
         out_channels=17,
         input_size=codec['input_size'],
@@ -180,7 +180,6 @@
     batch_size=256,
     num_workers=10,
     persistent_workers=True,
-    drop_last=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
index a559187dff..dba43a7d72 100644
--- a/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
+++ b/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=68,
         input_size=codec['input_size'],
diff --git a/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py b/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
index 7cfefd17a4..63049aa4d1 100644
--- a/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
+++ b/projects/rtmpose/rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=21,
         input_size=codec['input_size'],
diff --git a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
index 7cbb93a738..83f1bdce00 100644
--- a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
+++ b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=133,
         input_size=codec['input_size'],
diff --git a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
index 04d5a9d4fd..a060d59a40 100644
--- a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
+++ b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=1024,
         out_channels=133,
         input_size=codec['input_size'],
diff --git a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
index 835d1c63eb..f1f86f24b7 100644
--- a/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
+++ b/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
@@ -72,7 +72,7 @@
             'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'  # noqa
         )),
     head=dict(
-        type='RTMHead',
+        type='RTMCCHead',
         in_channels=768,
         out_channels=133,
         input_size=codec['input_size'],
diff --git a/tests/test_models/test_heads/test_heatmap_heads/test_rtm_head.py b/tests/test_models/test_heads/test_heatmap_heads/test_rtmcc_head.py
similarity index 93%
rename from tests/test_models/test_heads/test_heatmap_heads/test_rtm_head.py
rename to tests/test_models/test_heads/test_heatmap_heads/test_rtmcc_head.py
index b78df38605..b7f833d362 100644
--- a/tests/test_models/test_heads/test_heatmap_heads/test_rtm_head.py
+++ b/tests/test_models/test_heads/test_heatmap_heads/test_rtmcc_head.py
@@ -6,13 +6,15 @@
 import torch
 import torch.nn as nn
 from mmengine.structures import InstanceData
+from mmengine.utils import digit_version
+from mmengine.utils.dl_utils import TORCH_VERSION
 
-from mmpose.models.heads import RTMHead
-from mmpose.models.utils import RTMBlock
+from mmpose.models.heads import RTMCCHead
+from mmpose.models.utils import RTMCCBlock
 from mmpose.testing import get_packed_inputs
 
 
-class TestRTMHead(TestCase):
+class TestRTMCCHead(TestCase):
 
     def _get_feats(self,
                    batch_size: int = 2,
@@ -26,8 +28,11 @@ def _get_feats(self,
 
     def test_init(self):
 
+        if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
+            return unittest.skip('RTMCCHead requires PyTorch >= 1.7')
+
         # original version
-        head = RTMHead(
+        head = RTMCCHead(
             in_channels=32,
             out_channels=17,
             input_size=(192, 256),
@@ -53,12 +58,12 @@ def test_init(self):
         self.assertIsNotNone(head.decoder)
         self.assertTrue(isinstance(head.final_layer, nn.Conv2d))
         self.assertTrue(isinstance(head.mlp, nn.Sequential))
-        self.assertTrue(isinstance(head.gau, RTMBlock))
+        self.assertTrue(isinstance(head.gau, RTMCCBlock))
         self.assertTrue(isinstance(head.cls_x, nn.Linear))
         self.assertTrue(isinstance(head.cls_y, nn.Linear))
 
         # w/ 1x1 conv
-        head = RTMHead(
+        head = RTMCCHead(
             in_channels=32,
             out_channels=17,
             input_size=(192, 256),
@@ -84,12 +89,12 @@ def test_init(self):
         self.assertIsNotNone(head.decoder)
         self.assertTrue(isinstance(head.final_layer, nn.Conv2d))
         self.assertTrue(isinstance(head.mlp, nn.Sequential))
-        self.assertTrue(isinstance(head.gau, RTMBlock))
+        self.assertTrue(isinstance(head.gau, RTMCCBlock))
         self.assertTrue(isinstance(head.cls_x, nn.Linear))
         self.assertTrue(isinstance(head.cls_y, nn.Linear))
 
         # hidden_dims
-        head = RTMHead(
+        head = RTMCCHead(
             in_channels=32,
             out_channels=17,
             input_size=(192, 256),
@@ -115,12 +120,12 @@ def test_init(self):
         self.assertIsNotNone(head.decoder)
         self.assertTrue(isinstance(head.final_layer, nn.Conv2d))
         self.assertTrue(isinstance(head.mlp, nn.Sequential))
-        self.assertTrue(isinstance(head.gau, RTMBlock))
+        self.assertTrue(isinstance(head.gau, RTMCCBlock))
         self.assertTrue(isinstance(head.cls_x, nn.Linear))
         self.assertTrue(isinstance(head.cls_y, nn.Linear))
 
         # s = 256
-        head = RTMHead(
+        head = RTMCCHead(
             in_channels=32,
             out_channels=17,
             input_size=(192, 256),
@@ -146,11 +151,15 @@ def test_init(self):
         self.assertIsNotNone(head.decoder)
         self.assertTrue(isinstance(head.final_layer, nn.Conv2d))
         self.assertTrue(isinstance(head.mlp, nn.Sequential))
-        self.assertTrue(isinstance(head.gau, RTMBlock))
+        self.assertTrue(isinstance(head.gau, RTMCCBlock))
         self.assertTrue(isinstance(head.cls_x, nn.Linear))
         self.assertTrue(isinstance(head.cls_y, nn.Linear))
 
     def test_predict(self):
+
+        if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
+            return unittest.skip('RTMCCHead requires PyTorch >= 1.7')
+
         decoder_cfg_list = []
         # original version
         decoder_cfg = dict(
@@ -193,7 +202,7 @@ def test_predict(self):
         decoder_cfg_list.append(decoder_cfg)
 
         for decoder_cfg in decoder_cfg_list:
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -224,7 +233,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # 1x1 conv
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -249,7 +258,7 @@ def test_predict(self):
             preds, _ = head.predict(feats, batch_data_samples)
 
             # hidden dims
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -280,7 +289,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # s
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -311,7 +320,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # expansion factor
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -342,7 +351,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # drop path
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -373,7 +382,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # act fn
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -404,7 +413,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # use_rel_bias
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -435,7 +444,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # pos_enc
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -466,7 +475,7 @@ def test_predict(self):
                 batch_data_samples[0].gt_instances.keypoints.shape)
 
             # output_heatmaps
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -502,6 +511,9 @@ def test_predict(self):
             self.assertEqual(pred_heatmaps[0].heatmaps.shape, (17, 512, 384))
 
     def test_tta(self):
+        if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
+            return unittest.skip('RTMCCHead requires PyTorch >= 1.7')
+
         # flip test
         decoder_cfg = dict(
             type='SimCCLabel',
@@ -511,7 +523,7 @@ def test_tta(self):
             simcc_split_ratio=2.0,
             normalize=False)
 
-        head = RTMHead(
+        head = RTMCCHead(
             in_channels=32,
             out_channels=17,
             input_size=(192, 256),
@@ -542,6 +554,9 @@ def test_tta(self):
                          batch_data_samples[0].gt_instances.keypoints.shape)
 
     def test_loss(self):
+        if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
+            return unittest.skip('RTMCCHead requires PyTorch >= 1.7')
+
         decoder_cfg_list = []
         decoder_cfg = dict(
             type='SimCCLabel',
@@ -563,7 +578,7 @@ def test_loss(self):
 
         # decoder
         for decoder_cfg in decoder_cfg_list:
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -597,7 +612,7 @@ def test_loss(self):
             self.assertIsInstance(losses['acc_pose'], torch.Tensor)
 
             # beta = 10
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -631,7 +646,7 @@ def test_loss(self):
             self.assertIsInstance(losses['acc_pose'], torch.Tensor)
 
             # label softmax
-            head = RTMHead(
+            head = RTMCCHead(
                 in_channels=32,
                 out_channels=17,
                 input_size=(192, 256),
@@ -665,9 +680,12 @@ def test_loss(self):
             self.assertIsInstance(losses['acc_pose'], torch.Tensor)
 
     def test_errors(self):
+        if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
+            return unittest.skip('RTMCCHead requires PyTorch >= 1.7')
+
         # Invalid arguments
         with self.assertRaisesRegex(ValueError, 'multiple input features'):
-            _ = RTMHead(
+            _ = RTMCCHead(
                 in_channels=(16, 32),
                 out_channels=17,
                 input_size=(192, 256),