Skip to content

Commit

Permalink
Add 3D pose pipeline (open-mmlab#524)
Browse files Browse the repository at this point in the history
* Add 3D pose pipeline

* Add transforms on joint coordinates in pipelines
* Add camera projection in pipelines
* Add camera interface in mmpose.core

* Add 3D pose pipeline

* Revise code

* fix variable name

* Add 3D pose pipeline

* update unittests for better codecov rate

* Add 3D pose pipeline

* update unittests for better codecov rate

* Add 3D pose pipeline

* update unittests for better codecov rate

* Add 3D pose pipeline

* Add PoseSequenceToTensor

* minor fix according to review comments

* Revise according to review comments

* rebase to master
* extend fliplr_regression to handle 2D/3D
* add remove_root option to JointRelativization

* Fix docstring

* update unittest

* update unittest

* update camera parameters to be in meter

* minor fix to docstrings

* minor fix

* fix importing
  • Loading branch information
ly015 authored Mar 30, 2021
1 parent 9dd8b12 commit ab38160
Show file tree
Hide file tree
Showing 14 changed files with 769 additions and 47 deletions.
1 change: 1 addition & 0 deletions mmpose/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .camera import * # noqa: F401, F403
from .evaluation import * # noqa: F401, F403
from .fp16 import * # noqa: F401, F403
from .optimizer import * # noqa: F401, F403
Expand Down
4 changes: 4 additions & 0 deletions mmpose/core/camera/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .camera_base import CAMERAS
from .single_camera import SimpleCamera

__all__ = ['CAMERAS', 'SimpleCamera']
44 changes: 44 additions & 0 deletions mmpose/core/camera/camera_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from abc import ABCMeta, abstractmethod

from mmcv.utils import Registry

CAMERAS = Registry('camera')


class SingleCameraBase(metaclass=ABCMeta):
"""Base class for single camera model.
Args:
param (dict): Camera parameters
Methods:
world_to_camera: Project points from world coordinates to camera
coordinates
camera_to_world: Project points from camera coordinates to world
coordinates
camera_to_pixel: Project points from camera coordinates to pixel
coordinates
world_to_pixel: Project points from world coordinates to pixel
coordinates
"""

@abstractmethod
def __init__(self, param):
"""Load camera parameters and check validity."""

def world_to_camera(self, X):
"""Project points from world coordinates to camera coordinates."""
raise NotImplementedError

def camera_to_world(self, X):
"""Project points from camera coordinates to world coordinates."""
raise NotImplementedError

def camera_to_pixel(self, X):
"""Project points from camera coordinates to pixel coordinates."""
raise NotImplementedError

def world_to_pixel(self, X):
"""Project points from world coordinates to pixel coordinates."""
_X = self.world_to_camera(X)
return self.camera_to_pixel(_X)
110 changes: 110 additions & 0 deletions mmpose/core/camera/single_camera.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import numpy as np

from .camera_base import CAMERAS, SingleCameraBase


@CAMERAS.register_module()
class SimpleCamera(SingleCameraBase):
"""Camera model to calculate coordinate transformation with given
intrinsic/extrinsic camera parameters.
Notes:
The keypoint coordiante should be an np.ndarray with a shape of
[...,J, C] where J is the keypoint number of an instance, and C is
the coordinate dimension. For example:
[J, C]: shape of joint coordinates of a person with J joints.
[N, J, C]: shape of a batch of person joint coordinates.
[N, T, J, C]: shape of a batch of pose sequences.
Args:
param (dict): camera parameters including:
- R: 3x3, camera rotation matrix (camera-to-world)
- T: 3x1, camera translation (camera-to-world)
- K: (optional) 2x3, camera intrinsic matrix
- k: (optional) nx1, camera radial distortion coefficients
- p: (optional) mx1, camera tangential distortion coefficients
- f: (optional) 2x1, camera focal length
- c: (optional) 2x1, camera center
if K is not provided, it will be calculated from f and c.
Methods:
world_to_camera: Project points from world coordinates to camera
coordinates
camera_to_pixel: Project points from camera coordinates to pixel
coordinates
world_to_pixel: Project points from world coordinates to pixel
coordinates
"""

def __init__(self, param):

self.param = {}
# extrinsic param
R = np.array(param['R'], dtype=np.float32)
T = np.array(param['T'], dtype=np.float32)
assert R.shape == (3, 3)
assert T.shape == (3, 1)
# The camera matrices are transposed in advance because the joint
# coordinates are stored as row vectors.
self.param['R_c2w'] = R.T
self.param['T_c2w'] = T.T
self.param['R_w2c'] = R
self.param['T_w2c'] = -self.param['T_c2w'] @ self.param['R_w2c']

# intrinsic param
if 'K' in param:
K = np.array(param['K'], dtype=np.float32)
assert K.shape == (2, 3)
self.param['K'] = K.T
elif 'f' in param and 'c' in param:
f = np.array(param['f'], dtype=np.float32)
c = np.array(param['c'], dtype=np.float32)
assert f.shape == (2, 1)
assert c.shape == (2, 1)
self.param['K'] = np.concatenate((np.diagflat(f), c), axis=-1).T
else:
raise ValueError('Camera intrinsic parameters are missing. '
'Either "K" or "f"&"c" should be provided.')

# distortion param
if 'k' in param and 'p' in param:
self.undistortion = True
self.param['k'] = np.array(param['k'], dtype=np.float32).flatten()
self.param['p'] = np.array(param['p'], dtype=np.float32).flatten()
assert self.param['k'].size in {3, 6}
assert self.param['p'].size == 2
else:
self.undistortion = False

def world_to_camera(self, X):
assert isinstance(X, np.ndarray)
assert X.ndim >= 2 and X.shape[-1] == 3
return X @ self.param['R_w2c'] + self.param['T_w2c']

def camera_to_world(self, X):
assert isinstance(X, np.ndarray)
assert X.ndim >= 2 and X.shape[-1] == 3
return X @ self.param['R_c2w'] + self.param['T_c2w']

def camera_to_pixel(self, X):
assert isinstance(X, np.ndarray)
assert X.ndim >= 2 and X.shape[-1] == 3

_X = X / X[..., 2:]

if self.undistortion:
k = self.param['k']
p = self.param['p']
_X_2d = _X[..., :2]
r2 = (_X_2d**2).sum(-1)
radial = 1 + sum(ki * r2**(i + 1) for i, ki in enumerate(k[:3]))
if k.size == 6:
radial /= 1 + sum(
(ki * r2**(i + 1) for i, ki in enumerate(k[3:])))

tangential = 2 * (p[1] * _X[..., 0] + p[0] * _X[..., 1])

_X[..., :2] = _X_2d * (radial + tangential)[..., None] + np.outer(
r2, p[::-1]).reshape(_X_2d.shape)
return _X @ self.param['K']
10 changes: 4 additions & 6 deletions mmpose/core/post_processing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from .nms import oks_nms, soft_oks_nms
from .post_transforms import (affine_transform, flip_back, fliplr_joints,
fliplr_regression, fliplr_regression_3d,
get_affine_transform, get_warp_matrix,
rotate_point, transform_preds,
fliplr_regression, get_affine_transform,
get_warp_matrix, rotate_point, transform_preds,
warp_affine_joints)

__all__ = [
'oks_nms', 'soft_oks_nms', 'affine_transform', 'rotate_point', 'flip_back',
'fliplr_joints', 'fliplr_regression', 'fliplr_regression_3d',
'transform_preds', 'get_affine_transform', 'get_warp_matrix',
'warp_affine_joints'
'fliplr_joints', 'fliplr_regression', 'transform_preds',
'get_affine_transform', 'get_warp_matrix', 'warp_affine_joints'
]
62 changes: 32 additions & 30 deletions mmpose/core/post_processing/post_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,56 +50,58 @@ def fliplr_joints(joints_3d, joints_3d_visible, img_width, flip_pairs):
return joints_3d_flipped, joints_3d_visible_flipped


def fliplr_regression(regression, flip_pairs):
def fliplr_regression(regression,
flip_pairs,
center_mode='static',
center_x=0.5,
center_index=0):
"""Flip human joints horizontally.
Note:
batch_size: N
num_keypoint: K
Args:
regression (np.ndarray([N, K, 2])): Coordinates of keypoints.
regression (np.ndarray([..., K, C])): Coordinates of keypoints, where K
is the joint number and C is the dimension. Example shapes are:
- [N, K, C]: a batch of keypoints where N is the batch size.
- [N, T, K, C]: a batch of pose sequences, where T is the frame
number.
flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
(for example, left ear -- right ear).
center_mode (str): The mode to set the center location on the x-axis
to flip around. Options are:
- static: use a static x value (see center_x also)
- root: use a root joint (see center_index also)
center_x (float): Set the x-axis location of the flip center. Only used
when center_mode=static.
center_index (int): Set the index of the root joint, whose x location
will be used as the flip center. Only used when center_mode=root.
Returns:
tuple: Flipped human joints.
- regression_flipped (np.ndarray([N, K, 2])): Flipped joints.
- regression_flipped (np.ndarray([..., K, C])): Flipped joints.
"""
regression_flipped = regression.copy()
# Swap left-right parts
for left, right in flip_pairs:
regression_flipped[:, left, :] = regression[:, right, :]
regression_flipped[:, right, :] = regression[:, left, :]

# Flip horizontally
regression_flipped[:, :, 0] = 1 - regression_flipped[:, :, 0]
return regression_flipped
assert regression.ndim >= 2, f'Invalid pose shape {regression.shape}'

allowed_center_mode = {'static', 'root'}
assert center_mode in allowed_center_mode, 'Get invalid center_mode ' \
f'{center_mode}, allowed choices are {allowed_center_mode}'

def fliplr_regression_3d(regression, flip_pairs):
"""Flip human joints horizontally.
if center_mode == 'static':
x_c = center_x
elif center_mode == 'root':
assert regression.shape[-2] > center_index
x_c = regression[..., center_index:center_index + 1, 0]

Note:
batch_size: N
num_keypoint: K
Args:
regression (np.ndarray([N, K, 3])): Coordinates of keypoints.
flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
(for example, left ear -- right ear).
Returns:
tuple: Flipped human joints.
- regression_flipped (np.ndarray([N, K, 3])): Flipped joints.
"""
regression_flipped = regression.copy()
# Swap left-right parts
for left, right in flip_pairs:
regression_flipped[:, left, :] = regression[:, right, :]
regression_flipped[:, right, :] = regression[:, left, :]
regression_flipped[..., left, :] = regression[..., right, :]
regression_flipped[..., right, :] = regression[..., left, :]

# Flip horizontally
regression_flipped[:, :, 0] = -regression_flipped[:, :, 0]
regression_flipped[..., 0] = x_c * 2 - regression_flipped[..., 0]
return regression_flipped


Expand Down
1 change: 1 addition & 0 deletions mmpose/datasets/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .bottom_up_transform import * # noqa
from .loading import LoadImageFromFile # noqa
from .mesh_transform import * # noqa
from .pose3d_transform import * # noqa
from .shared_transform import * # noqa
from .top_down_transform import * # noqa
Loading

0 comments on commit ab38160

Please sign in to comment.