Add 3D pose pipeline (open-mmlab#524)

* Add 3D pose pipeline * Add transforms on joint coordinates in pipelines * Add camera projection in pipelines * Add camera interface in mmpose.core * Add 3D pose pipeline * Revise code * fix variable name * Add 3D pose pipeline * update unittests for better codecov rate * Add 3D pose pipeline * update unittests for better codecov rate * Add 3D pose pipeline * update unittests for better codecov rate * Add 3D pose pipeline * Add PoseSequenceToTensor * minor fix according to review comments * Revise according to review comments * rebase to master * extend fliplr_regression to handle 2D/3D * add remove_root option to JointRelativization * Fix docstring * update unittest * update unittest * update camera parameters to be in meter * minor fix to docstrings * minor fix * fix importing
wjkim81 · Mar 30, 2021 · ab38160 · ab38160
1 parent 9dd8b12
commit ab38160
Show file tree

Hide file tree

Showing 14 changed files with 769 additions and 47 deletions.
diff --git a/mmpose/core/__init__.py b/mmpose/core/__init__.py
@@ -1,3 +1,4 @@
+from .camera import *  # noqa: F401, F403
 from .evaluation import *  # noqa: F401, F403
 from .fp16 import *  # noqa: F401, F403
 from .optimizer import *  # noqa: F401, F403

diff --git a/mmpose/core/camera/__init__.py b/mmpose/core/camera/__init__.py
@@ -0,0 +1,4 @@
+from .camera_base import CAMERAS
+from .single_camera import SimpleCamera
+
+__all__ = ['CAMERAS', 'SimpleCamera']
diff --git a/mmpose/core/camera/camera_base.py b/mmpose/core/camera/camera_base.py
@@ -0,0 +1,44 @@
+from abc import ABCMeta, abstractmethod
+
+from mmcv.utils import Registry
+
+CAMERAS = Registry('camera')
+
+
+class SingleCameraBase(metaclass=ABCMeta):
+    """Base class for single camera model.
+
+    Args:
+        param (dict): Camera parameters
+
+    Methods:
+        world_to_camera: Project points from world coordinates to camera
+            coordinates
+        camera_to_world: Project points from camera coordinates to world
+            coordinates
+        camera_to_pixel: Project points from camera coordinates to pixel
+            coordinates
+        world_to_pixel: Project points from world coordinates to pixel
+            coordinates
+    """
+
+    @abstractmethod
+    def __init__(self, param):
+        """Load camera parameters and check validity."""
+
+    def world_to_camera(self, X):
+        """Project points from world coordinates to camera coordinates."""
+        raise NotImplementedError
+
+    def camera_to_world(self, X):
+        """Project points from camera coordinates to world coordinates."""
+        raise NotImplementedError
+
+    def camera_to_pixel(self, X):
+        """Project points from camera coordinates to pixel coordinates."""
+        raise NotImplementedError
+
+    def world_to_pixel(self, X):
+        """Project points from world coordinates to pixel coordinates."""
+        _X = self.world_to_camera(X)
+        return self.camera_to_pixel(_X)
diff --git a/mmpose/core/camera/single_camera.py b/mmpose/core/camera/single_camera.py
@@ -0,0 +1,110 @@
+import numpy as np
+
+from .camera_base import CAMERAS, SingleCameraBase
+
+
+@CAMERAS.register_module()
+class SimpleCamera(SingleCameraBase):
+    """Camera model to calculate coordinate transformation with given
+    intrinsic/extrinsic camera parameters.
+
+    Notes:
+        The keypoint coordiante should be an np.ndarray with a shape of
+    [...,J, C] where J is the keypoint number of an instance, and C is
+    the coordinate dimension. For example:
+
+        [J, C]: shape of joint coordinates of a person with J joints.
+        [N, J, C]: shape of a batch of person joint coordinates.
+        [N, T, J, C]: shape of a batch of pose sequences.
+
+    Args:
+        param (dict): camera parameters including:
+            - R: 3x3, camera rotation matrix (camera-to-world)
+            - T: 3x1, camera translation (camera-to-world)
+            - K: (optional) 2x3, camera intrinsic matrix
+            - k: (optional) nx1, camera radial distortion coefficients
+            - p: (optional) mx1, camera tangential distortion coefficients
+            - f: (optional) 2x1, camera focal length
+            - c: (optional) 2x1, camera center
+        if K is not provided, it will be calculated from f and c.
+
+    Methods:
+        world_to_camera: Project points from world coordinates to camera
+            coordinates
+        camera_to_pixel: Project points from camera coordinates to pixel
+            coordinates
+        world_to_pixel: Project points from world coordinates to pixel
+            coordinates
+    """
+
+    def __init__(self, param):
+
+        self.param = {}
+        # extrinsic param
+        R = np.array(param['R'], dtype=np.float32)
+        T = np.array(param['T'], dtype=np.float32)
+        assert R.shape == (3, 3)
+        assert T.shape == (3, 1)
+        # The camera matrices are transposed in advance because the joint
+        # coordinates are stored as row vectors.
+        self.param['R_c2w'] = R.T
+        self.param['T_c2w'] = T.T
+        self.param['R_w2c'] = R
+        self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']
+
+        # intrinsic param
+        if 'K' in param:
+            K = np.array(param['K'], dtype=np.float32)
+            assert K.shape == (2, 3)
+            self.param['K'] = K.T
+        elif 'f' in param and 'c' in param:
+            f = np.array(param['f'], dtype=np.float32)
+            c = np.array(param['c'], dtype=np.float32)
+            assert f.shape == (2, 1)
+            assert c.shape == (2, 1)
+            self.param['K'] = np.concatenate((np.diagflat(f), c), axis=-1).T
+        else:
+            raise ValueError('Camera intrinsic parameters are missing. '
+                             'Either "K" or "f"&"c" should be provided.')
+
+        # distortion param
+        if 'k' in param and 'p' in param:
+            self.undistortion = True
+            self.param['k'] = np.array(param['k'], dtype=np.float32).flatten()
+            self.param['p'] = np.array(param['p'], dtype=np.float32).flatten()
+            assert self.param['k'].size in {3, 6}
+            assert self.param['p'].size == 2
+        else:
+            self.undistortion = False
+
+    def world_to_camera(self, X):
+        assert isinstance(X, np.ndarray)
+        assert X.ndim >= 2 and X.shape[-1] == 3
+        return X @ self.param['R_w2c'] + self.param['T_w2c']
+
+    def camera_to_world(self, X):
+        assert isinstance(X, np.ndarray)
+        assert X.ndim >= 2 and X.shape[-1] == 3
+        return X @ self.param['R_c2w'] + self.param['T_c2w']
+
+    def camera_to_pixel(self, X):
+        assert isinstance(X, np.ndarray)
+        assert X.ndim >= 2 and X.shape[-1] == 3
+
+        _X = X / X[..., 2:]
+
+        if self.undistortion:
+            k = self.param['k']
+            p = self.param['p']
+            _X_2d = _X[..., :2]
+            r2 = (_X_2d**2).sum(-1)
+            radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
+            if k.size == 6:
+                radial /= 1 + sum(
+                    (ki * r2**(i + 1) for i, ki in enumerate(k[3:])))
+
+            tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])
+
+            _X[..., :2] = _X_2d * (radial + tangential)[..., None] + np.outer(
+                r2, p[::-1]).reshape(_X_2d.shape)
+        return _X @ self.param['K']
diff --git a/mmpose/core/post_processing/__init__.py b/mmpose/core/post_processing/__init__.py
@@ -1,13 +1,11 @@
 from .nms import oks_nms, soft_oks_nms
 from .post_transforms import (affine_transform, flip_back, fliplr_joints,
-                              fliplr_regression, fliplr_regression_3d,
-                              get_affine_transform, get_warp_matrix,
-                              rotate_point, transform_preds,
+                              fliplr_regression, get_affine_transform,
+                              get_warp_matrix, rotate_point, transform_preds,
                               warp_affine_joints)
 
 __all__ = [
     'oks_nms', 'soft_oks_nms', 'affine_transform', 'rotate_point', 'flip_back',
-    'fliplr_joints', 'fliplr_regression', 'fliplr_regression_3d',
-    'transform_preds', 'get_affine_transform', 'get_warp_matrix',
-    'warp_affine_joints'
+    'fliplr_joints', 'fliplr_regression', 'transform_preds',
+    'get_affine_transform', 'get_warp_matrix', 'warp_affine_joints'
 ]
diff --git a/mmpose/core/post_processing/post_transforms.py b/mmpose/core/post_processing/post_transforms.py
@@ -50,56 +50,58 @@ def fliplr_joints(joints_3d, joints_3d_visible, img_width, flip_pairs):
     return joints_3d_flipped, joints_3d_visible_flipped
 
 
-def fliplr_regression(regression, flip_pairs):
+def fliplr_regression(regression,
+                      flip_pairs,
+                      center_mode='static',
+                      center_x=0.5,
+                      center_index=0):
     """Flip human joints horizontally.
 
     Note:
         batch_size: N
         num_keypoint: K
     Args:
-        regression (np.ndarray([N, K, 2])): Coordinates of keypoints.
+        regression (np.ndarray([..., K, C])): Coordinates of keypoints, where K
+            is the joint number and C is the dimension. Example shapes are:
+            - [N, K, C]: a batch of keypoints where N is the batch size.
+            - [N, T, K, C]: a batch of pose sequences, where T is the frame
+                number.
         flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
             (for example, left ear -- right ear).
+        center_mode (str): The mode to set the center location on the x-axis
+            to flip around. Options are:
+            - static: use a static x value (see center_x also)
+            - root: use a root joint (see center_index also)
+        center_x (float): Set the x-axis location of the flip center. Only used
+            when center_mode=static.
+        center_index (int): Set the index of the root joint, whose x location
+            will be used as the flip center. Only used when center_mode=root.
 
     Returns:
         tuple: Flipped human joints.
 
-        - regression_flipped (np.ndarray([N, K, 2])): Flipped joints.
+        - regression_flipped (np.ndarray([..., K, C])): Flipped joints.
     """
-    regression_flipped = regression.copy()
-    # Swap left-right parts
-    for left, right in flip_pairs:
-        regression_flipped[:, left, :] = regression[:, right, :]
-        regression_flipped[:, right, :] = regression[:, left, :]
-
-    # Flip horizontally
-    regression_flipped[:, :, 0] = 1 - regression_flipped[:, :, 0]
-    return regression_flipped
+    assert regression.ndim >= 2, f'Invalid pose shape {regression.shape}'
 
+    allowed_center_mode = {'static', 'root'}
+    assert center_mode in allowed_center_mode, 'Get invalid center_mode ' \
+        f'{center_mode}, allowed choices are {allowed_center_mode}'
 
-def fliplr_regression_3d(regression, flip_pairs):
-    """Flip human joints horizontally.
+    if center_mode == 'static':
+        x_c = center_x
+    elif center_mode == 'root':
+        assert regression.shape[-2] > center_index
+        x_c = regression[..., center_index:center_index + 1, 0]
 
-    Note:
-        batch_size: N
-        num_keypoint: K
-    Args:
-        regression (np.ndarray([N, K, 3])): Coordinates of keypoints.
-        flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
-            (for example, left ear -- right ear).
-
-    Returns:
-        tuple: Flipped human joints.
-
-        - regression_flipped (np.ndarray([N, K, 3])): Flipped joints.
-    """
     regression_flipped = regression.copy()
     # Swap left-right parts
     for left, right in flip_pairs:
-        regression_flipped[:, left, :] = regression[:, right, :]
-        regression_flipped[:, right, :] = regression[:, left, :]
+        regression_flipped[..., left, :] = regression[..., right, :]
+        regression_flipped[..., right, :] = regression[..., left, :]
+
     # Flip horizontally
-    regression_flipped[:, :, 0] = -regression_flipped[:, :, 0]
+    regression_flipped[..., 0] = x_c * 2 - regression_flipped[..., 0]
     return regression_flipped
 
 

diff --git a/mmpose/datasets/pipelines/__init__.py b/mmpose/datasets/pipelines/__init__.py
@@ -1,5 +1,6 @@
 from .bottom_up_transform import *  # noqa
 from .loading import LoadImageFromFile  # noqa
 from .mesh_transform import *  # noqa
+from .pose3d_transform import *  # noqa
 from .shared_transform import *  # noqa
 from .top_down_transform import *  # noqa