Add interhand3D pipeline. (open-mmlab#575)

zengwang430521 · web-flow · commit 78e65cc8a5d9 · 2021-04-23T10:27:36.000+08:00
* Add interhand3D pipeline.

* reorgnize pipelines.

* reorganize interhand3d pipelines.

* modify bceloss.

* modify 3d heatmap generation pipeline

* modify codes according to reviews.

* modify some comments.
diff --git a/mmpose/datasets/datasets/hand/interhand3d_dataset.py b/mmpose/datasets/datasets/hand/interhand3d_dataset.py
@@ -97,6 +97,10 @@ def __init__(self,
         assert self.ann_info['num_joints'] == 42
         self.ann_info['joint_weights'] = \
             np.ones((self.ann_info['num_joints'], 1), dtype=np.float32)
+        self.ann_info['heatmap3d_depth_bound'] = data_cfg[
+            'heatmap3d_depth_bound']
+        self.ann_info['heatmap_size_root'] = data_cfg['heatmap_size_root']
+        self.ann_info['root_depth_bound'] = data_cfg['root_depth_bound']
 
         self.dataset_name = 'interhand3d'
         self.camera_file = camera_file
@@ -257,7 +261,12 @@ def _get_db(self):
                 # the bboxes have been extended
                 center, scale = self._xywh2cs(*bbox, 1.0)
                 abs_depth = rootnet_ann_data['abs_depth']
+            # 41: 'l_wrist', left hand root
+            # 20: 'r_wrist', right hand root
             rel_root_depth = joint_cam[41, 2] - joint_cam[20, 2]
+            # if root is not valid, root-relative 3D depth is also invalid.
+            rel_root_valid = joint_valid[20] * joint_valid[41]
+
             # if root is not valid -> root-relative 3D pose is also not valid.
             # Therefore, mark all joints as invalid
             joint_valid[:20] *= joint_valid[20]
@@ -280,6 +289,7 @@ def _get_db(self):
                 'hand_type': hand_type,
                 'hand_type_valid': hand_type_valid,
                 'rel_root_depth': rel_root_depth,
+                'rel_root_valid': rel_root_valid,
                 'abs_depth': abs_depth,
                 'joints_cam': joint_cam,
                 'focal': focal,
diff --git a/mmpose/datasets/pipelines/__init__.py b/mmpose/datasets/pipelines/__init__.py
@@ -1,4 +1,5 @@
 from .bottom_up_transform import *  # noqa
+from .hand_transform import *  # noqa
 from .loading import LoadImageFromFile  # noqa
 from .mesh_transform import *  # noqa
 from .pose3d_transform import *  # noqa
diff --git a/mmpose/datasets/pipelines/hand_transform.py b/mmpose/datasets/pipelines/hand_transform.py
@@ -0,0 +1,60 @@
+import numpy as np
+
+from mmpose.datasets.registry import PIPELINES
+from .top_down_transform import TopDownRandomFlip
+
+
+@PIPELINES.register_module()
+class HandRandomFlip(TopDownRandomFlip):
+    """Data augmentation with random image flip. A child class of
+    TopDownRandomFlip.
+
+    Required keys: 'img', 'joints_3d', 'joints_3d_visible', 'center',
+    'hand_type', 'rel_root_depth' and 'ann_info'.
+    Modifies key: 'img', 'joints_3d', 'joints_3d_visible', 'center',
+    'hand_type', 'rel_root_depth'.
+
+    Args:
+        flip_prob (float): Probability of flip.
+    """
+
+    def __call__(self, results):
+        """Perform data augmentation with random image flip."""
+        # base flip augmentation
+        super().__call__(results)
+
+        # flip hand type and root depth
+        hand_type = results['hand_type']
+        rel_root_depth = results['rel_root_depth']
+        flipped = results['flipped']
+        if flipped:
+            hand_type[0], hand_type[1] = hand_type[1], hand_type[0]
+            rel_root_depth = -rel_root_depth
+        results['hand_type'] = hand_type
+        results['rel_root_depth'] = rel_root_depth
+        return results
+
+
+@PIPELINES.register_module()
+class HandGenerateRelDepthTarget:
+    """Generate the target relative root depth.
+
+    Required keys: 'rel_root_depth', 'rel_root_valid', 'ann_info'. Modified
+    keys: 'target', 'target_weight'.
+    """
+
+    def __init__(self):
+        pass
+
+    def __call__(self, results):
+        """Generate the target heatmap."""
+        rel_root_depth = results['rel_root_depth']
+        rel_root_valid = results['rel_root_valid']
+        cfg = results['ann_info']
+        D = cfg['heatmap_size_root']
+        root_depth_bound = cfg['root_depth_bound']
+        target = (rel_root_depth / root_depth_bound + 0.5) * D
+        target_weight = rel_root_valid * (target >= 0) * (target <= D)
+        results['target'] = target * np.ones(1, dtype=np.float32)
+        results['target_weight'] = target_weight * np.ones(1, dtype=np.float32)
+        return results
diff --git a/mmpose/datasets/pipelines/pose3d_transform.py b/mmpose/datasets/pipelines/pose3d_transform.py
@@ -295,3 +295,64 @@ def __call__(self, results):
         results[self.item] = torch.from_numpy(seq)
 
         return results
+
+
+@PIPELINES.register_module()
+class Generate3DHeatmapTarget:
+    """Generate the target 3d heatmap.
+
+    Required keys: 'joints_3d', 'joints_3d_visible', 'ann_info'.
+    Modified keys: 'target', and 'target_weight'.
+
+    Args:
+        sigma: Sigma of heatmap gaussian.
+        joint_indices (list): Indices of joints used for heatmap generation.
+        If None (default) is given, all joints will be used.
+    """
+
+    def __init__(self, sigma=2, joint_indices=None):
+        self.sigma = sigma
+        self.joint_indices = joint_indices
+
+    def __call__(self, results):
+        """Generate the target heatmap."""
+        joints_3d = results['joints_3d']
+        joints_3d_visible = results['joints_3d_visible']
+        cfg = results['ann_info']
+        image_size = cfg['image_size']
+        W, H, D = cfg['heatmap_size']
+        heatmap3d_depth_bound = cfg['heatmap3d_depth_bound']
+        joint_weights = cfg['joint_weights']
+        use_different_joint_weights = cfg['use_different_joint_weights']
+
+        if self.joint_indices is not None:
+            joints_3d = joints_3d[self.joint_indices, ...]
+            joints_3d_visible = joints_3d_visible[self.joint_indices, ...]
+            joint_weights = joint_weights[self.joint_indices, ...]
+
+        mu_x = joints_3d[:, 0] * W / image_size[0]
+        mu_y = joints_3d[:, 1] * H / image_size[1]
+        mu_z = (joints_3d[:, 2] / heatmap3d_depth_bound + 0.5) * D
+
+        target_weight = joints_3d_visible[:, 0]
+        target_weight = target_weight * (mu_z >= 0) * (mu_z < D)
+        if use_different_joint_weights:
+            target_weight = target_weight * joint_weights
+        target_weight = target_weight[:, None]
+
+        x, y, z = np.arange(W), np.arange(H), np.arange(D)
+        zz, yy, xx = np.meshgrid(z, y, x)
+        xx = xx[None, ...].astype(np.float32)
+        yy = yy[None, ...].astype(np.float32)
+        zz = zz[None, ...].astype(np.float32)
+
+        mu_x = mu_x[..., None, None, None]
+        mu_y = mu_y[..., None, None, None]
+        mu_z = mu_z[..., None, None, None]
+
+        target = np.exp(-((xx - mu_x)**2 + (yy - mu_y)**2 + (zz - mu_z)**2) /
+                        (2 * self.sigma**2))
+
+        results['target'] = target
+        results['target_weight'] = target_weight
+        return results
diff --git a/mmpose/datasets/pipelines/shared_transform.py b/mmpose/datasets/pipelines/shared_transform.py
@@ -403,3 +403,60 @@ def __repr__(self):
                      f'{self.saturation_upper}), '
                      f'hue_delta={self.hue_delta})')
         return repr_str
+
+
+@PIPELINES.register_module()
+class MultitaskGatherTarget:
+    """Gather the targets for multitask heads.
+
+    Args:
+        pipeline_list (list[list]): List of pipelines for all heads.
+        pipeline_indices (list[int]): Pipeline index of each head.
+    """
+
+    def __init__(self, pipeline_list, pipeline_indices):
+        self.pipelines = []
+        for pipeline in pipeline_list:
+            self.pipelines.append(Compose(pipeline))
+        self.pipeline_indices = pipeline_indices
+
+    def __call__(self, results):
+        # generate target and target weights using all pipelines
+        _target, _target_weight = [], []
+        for pipeline in self.pipelines:
+            results_head = pipeline(results)
+            _target.append(results_head['target'])
+            _target_weight.append(results_head['target_weight'])
+
+        # reorganize generated target, target_weights according
+        # to self.pipelines_indices
+        target, target_weight = [], []
+        for ind in self.pipeline_indices:
+            target.append(_target[ind])
+            target_weight.append(_target_weight[ind])
+
+        results['target'] = target
+        results['target_weight'] = target_weight
+        return results
+
+
+@PIPELINES.register_module()
+class RenameKeys:
+    """Rename the keys.
+
+    Args:
+    key_pairs (Sequence[tuple]): Required keys to be renamed. If a tuple
+    (key_src, key_tgt) is given as an element, the item retrived by key_src
+    will be renamed as key_tgt.
+    """
+
+    def __init__(self, key_pairs):
+        self.key_pairs = key_pairs
+
+    def __call__(self, results):
+        """Rename keys."""
+        for key_pair in self.key_pairs:
+            assert len(key_pair) == 2
+            key_src, key_tgt = key_pair
+            results[key_tgt] = results.pop(key_src)
+        return results
diff --git a/mmpose/datasets/pipelines/top_down_transform.py b/mmpose/datasets/pipelines/top_down_transform.py
@@ -12,8 +12,9 @@ class TopDownRandomFlip:
     """Data augmentation with random image flip.
 
     Required keys: 'img', 'joints_3d', 'joints_3d_visible', 'center' and
-    'ann_info'. Modifies key: 'img', 'joints_3d', 'joints_3d_visible' and
-    'center'.
+    'ann_info'.
+    Modifies key: 'img', 'joints_3d', 'joints_3d_visible', 'center' and
+    'flipped'.
 
     Args:
         flip (bool): Option to perform random flip.
@@ -30,9 +31,12 @@ def __call__(self, results):
         joints_3d_visible = results['joints_3d_visible']
         center = results['center']
 
+        # A flag indicating whether the image is flipped,
+        # which can be used by child class.
+        flipped = False
         if np.random.rand() <= self.flip_prob:
+            flipped = True
             img = img[:, ::-1, :]
-
             joints_3d, joints_3d_visible = fliplr_joints(
                 joints_3d, joints_3d_visible, img.shape[1],
                 results['ann_info']['flip_pairs'])
@@ -42,6 +46,7 @@ def __call__(self, results):
         results['joints_3d'] = joints_3d
         results['joints_3d_visible'] = joints_3d_visible
         results['center'] = center
+        results['flipped'] = flipped
 
         return results
 
@@ -645,3 +650,31 @@ def __call__(self, results):
         results['target_weight'] = target_weight
 
         return results
+
+
+@PIPELINES.register_module()
+class TopDownRandomTranslation:
+    """Data augmentation with random translation.
+
+    Required key: 'scale' and 'center'. Modifies key: 'center'.
+
+    Notes:
+        bbox height: H
+        bbox width: W
+
+    Args:
+        trans_factor (float): Translating center to
+        ``[-trans_factor, trans_factor] * [W, H] + center``.
+    """
+
+    def __init__(self, trans_factor=0.15):
+        self.trans_factor = trans_factor
+
+    def __call__(self, results):
+        """Perform data augmentation with random translation."""
+        center = results['center']
+        scale = results['scale']
+        # reference bbox size is [200, 200] pixels
+        center += self.trans_factor * (2 * np.random.rand(2) - 1) * scale * 200
+        results['center'] = center
+        return results
diff --git a/mmpose/models/losses/classfication_loss.py b/mmpose/models/losses/classfication_loss.py
@@ -19,16 +19,19 @@ def forward(self, output, target, target_weight):
 
         Note:
             batch_size: N
-            num_keypoints: K
+            num_labels: K
 
         Args:
             output (torch.Tensor[N, K]): Output classification.
             target (torch.Tensor[N, K]): Target classification.
-            target_weight (torch.Tensor[N, K]):
-                Weights across different joint types.
+            target_weight (torch.Tensor[N, K] or torch.Tensor[N]):
+                Weights across different labels.
         """
+
         if self.use_target_weight:
             loss = self.criterion(output, target, reduction='none')
+            if target_weight.dim() == 1:
+                target_weight = target_weight[:, None]
             loss = (loss * target_weight).mean()
         else:
             loss = self.criterion(output, target)
diff --git a/tests/test_datasets/test_hand_dataset.py b/tests/test_datasets/test_hand_dataset.py
@@ -367,6 +367,9 @@ def test_top_down_InterHand3D_dataset():
     data_cfg = dict(
         image_size=[256, 256],
         heatmap_size=[64, 64, 64],
+        heatmap3d_depth_bound=400.0,
+        heatmap_size_root=64,
+        root_depth_bound=400.0,
         num_output_channels=channel_cfg['num_output_channels'],
         num_joints=channel_cfg['dataset_joints'],
         dataset_channel=channel_cfg['dataset_channel'],
diff --git a/tests/test_loss/test_classification_loss.py b/tests/test_loss/test_classification_loss.py
@@ -0,0 +1,39 @@
+import torch
+
+
+def test_bce_loss():
+    from mmpose.models import build_loss
+
+    # test BCE loss without target weight
+    loss_cfg = dict(type='BCELoss')
+    loss = build_loss(loss_cfg)
+
+    fake_pred = torch.zeros((1, 2))
+    fake_label = torch.zeros((1, 2))
+    assert torch.allclose(loss(fake_pred, fake_label, None), torch.tensor(0.))
+
+    fake_pred = torch.ones((1, 2)) * 0.5
+    fake_label = torch.zeros((1, 2))
+    assert torch.allclose(
+        loss(fake_pred, fake_label, None), -torch.log(torch.tensor(0.5)))
+
+    # test BCE loss with target weight
+    loss_cfg = dict(type='BCELoss', use_target_weight=True)
+    loss = build_loss(loss_cfg)
+
+    fake_pred = torch.ones((1, 2)) * 0.5
+    fake_label = torch.zeros((1, 2))
+    fake_weight = torch.ones((1, 2))
+    assert torch.allclose(
+        loss(fake_pred, fake_label, fake_weight),
+        -torch.log(torch.tensor(0.5)))
+
+    fake_weight[:, 0] = 0
+    assert torch.allclose(
+        loss(fake_pred, fake_label, fake_weight),
+        -0.5 * torch.log(torch.tensor(0.5)))
+
+    fake_weight = torch.ones(1)
+    assert torch.allclose(
+        loss(fake_pred, fake_label, fake_weight),
+        -torch.log(torch.tensor(0.5)))
diff --git a/tests/test_pipelines/test_hand_transform.py b/tests/test_pipelines/test_hand_transform.py
diff --git a/tests/test_pipelines/test_pose3d_transform.py b/tests/test_pipelines/test_pose3d_transform.py
diff --git a/tests/test_pipelines/test_shared_transform.py b/tests/test_pipelines/test_shared_transform.py
diff --git a/tests/test_pipelines/test_top_down_pipelines.py b/tests/test_pipelines/test_top_down_pipelines.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`from .bottom_up_transform import * # noqa`
	`2`	`+from .hand_transform import * # noqa`
`2`	`3`	`from .loading import LoadImageFromFile # noqa`
`3`	`4`	`from .mesh_transform import * # noqa`
`4`	`5`	`from .pose3d_transform import * # noqa`