Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

Add CPU option to run frankmocap #103

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ demo.sh
demo_multi.sh
mocap_utils/frame_to_video.py
mocap_utils/frame_to_gif.py
mocap_utils/select_epick_kitchen.py
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ FrankMocap pursues an easy-to-use single view 3D motion capture system developed
- Note:
- Above commands use openGL by default. If it does not work, you may try alternative renderers (pytorch3d or openDR).
- See the readme of each module for details


## Joint Order
- See [joint_order](docs/joint_order.md)


## Body Motion Capture Module
Expand Down
130 changes: 130 additions & 0 deletions bodymocap/body_bbox_detector_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright (c) Facebook, Inc. and its affiliates.

import os
import os.path as osp
import sys
import numpy as np
import cv2

import torch
import torchvision.transforms as transforms
# from PIL import Image

# Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/blob/master/demo.py

# 2D body pose estimator
pose2d_estimator_path = './detectors/body_pose_estimator'
sys.path.append(pose2d_estimator_path)
from detectors.body_pose_estimator.pose2d_models.with_mobilenet import PoseEstimationWithMobileNet
from detectors.body_pose_estimator.modules.load_state import load_state
from detectors.body_pose_estimator.val import normalize, pad_width
from detectors.body_pose_estimator.modules.pose import Pose, track_poses
from detectors.body_pose_estimator.modules.keypoints import extract_keypoints, group_keypoints


class BodyPoseEstimator_cpu(object):
"""
Hand Detector for third-view input.
It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git)
"""
def __init__(self):
print("Loading Body Pose Estimator")
self.__load_body_estimator()


def __load_body_estimator(self):
device = torch.device('cpu')
net = PoseEstimationWithMobileNet().to(device)
pose2d_checkpoint = "./extra_data/body_module/body_pose_estimator/checkpoint_iter_370000.pth"
checkpoint = torch.load(pose2d_checkpoint, map_location='cpu')
load_state(net, checkpoint)
net = net.eval()
# net = net.cuda()
self.model = net


#Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/demo.py
def __infer_fast(self, img, input_height_size, stride, upsample_ratio,
cpu=False, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
height, width, _ = img.shape
scale = input_height_size / height

scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
scaled_img = normalize(scaled_img, img_mean, img_scale)
min_dims = [input_height_size, max(scaled_img.shape[1], input_height_size)]
padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)

tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
if not cpu:
tensor_img = tensor_img.cpu()

stages_output = self.model(tensor_img)

stage2_heatmaps = stages_output[-2]
heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

stage2_pafs = stages_output[-1]
pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

return heatmaps, pafs, scale, pad

def detect_body_pose(self, img):
"""
Output:
current_bbox: BBOX_XYWH
"""
stride = 8
upsample_ratio = 4
orig_img = img.copy()

# forward
heatmaps, pafs, scale, pad = self.__infer_fast(img,
input_height_size=256, stride=stride, upsample_ratio=upsample_ratio)

total_keypoints_num = 0
all_keypoints_by_type = []
num_keypoints = Pose.num_kpts
for kpt_idx in range(num_keypoints): # 19th for bg
total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)

pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True)
for kpt_id in range(all_keypoints.shape[0]):
all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale

'''
# print(len(pose_entries))
if len(pose_entries)>1:
pose_entries = pose_entries[:1]
print("We only support one person currently")
# assert len(pose_entries) == 1, "We only support one person currently"
'''

current_poses, current_bbox = list(), list()
for n in range(len(pose_entries)):
if len(pose_entries[n]) == 0:
continue
pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
for kpt_id in range(num_keypoints):
if pose_entries[n][kpt_id] != -1.0: # keypoint was found
pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
pose = Pose(pose_keypoints, pose_entries[n][18])
current_poses.append(pose.keypoints)
current_bbox.append(np.array(pose.bbox))

# enlarge the bbox
for i, bbox in enumerate(current_bbox):
x, y, w, h = bbox
margin = 0.05
x_margin = int(w * margin)
y_margin = int(h * margin)
x0 = max(x-x_margin, 0)
y0 = max(y-y_margin, 0)
x1 = min(x+w+x_margin, orig_img.shape[1])
y1 = min(y+h+y_margin, orig_img.shape[0])
current_bbox[i] = np.array((x0, y0, x1-x0, y1-y0)).astype(np.int32)

return current_poses, current_bbox
2 changes: 1 addition & 1 deletion bodymocap/body_mocap_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def regress(self, img_original, body_bbox_list):
pred_output['pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space

pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0])
pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72)
pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72) # (1, 72)

pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy() # (1, 24, 3, 3)
pred_output['pred_betas'] = pred_betas.detach().cpu().numpy() # (1, 10)
Expand Down
183 changes: 183 additions & 0 deletions bodymocap/body_mocap_api_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright (c) Facebook, Inc. and its affiliates.

import cv2
import sys
import torch
import numpy as np
import pickle
from torchvision.transforms import Normalize

from bodymocap.models import hmr, SMPL, SMPLX
from bodymocap import constants
from bodymocap.utils.imutils import crop, crop_bboxInfo, process_image_bbox, process_image_keypoints, bbox_from_keypoints
from mocap_utils.coordconv import convert_smpl_to_bbox, convert_bbox_to_oriIm
import mocap_utils.geometry_utils as gu


class BodyMocap_cpu(object):
def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cpu'), use_smplx=False):

self.device = torch.device('gpu') if torch.cuda.is_available() else torch.device('cpu')

# Load parametric model (SMPLX or SMPL)
if use_smplx:
smplModelPath = smpl_dir + '/SMPLX_NEUTRAL.pkl'
self.smpl = SMPLX(smpl_dir,
batch_size=1,
num_betas = 10,
use_pca = False,
create_transl=False).to(self.device)
self.use_smplx = True
else:
smplModelPath = smpl_dir + '/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl'
self.smpl = SMPL(smplModelPath, batch_size=1, create_transl=False).to(self.device)
self.use_smplx = False

#Load pre-trained neural network
SMPL_MEAN_PARAMS = './extra_data/body_module/data_from_spin/smpl_mean_params.npz'
self.model_regressor = hmr(SMPL_MEAN_PARAMS).to(self.device)
checkpoint = torch.load(regressor_checkpoint,map_location=torch.device('cpu'))
self.model_regressor.load_state_dict(checkpoint['model'], strict=False)
self.model_regressor.eval()


def regress(self, img_original, body_bbox_list):
"""
args:
img_original: original raw image (BGR order by using cv2.imread)
body_bbox: bounding box around the target: (minX, minY, width, height)
outputs:
pred_vertices_img:
pred_joints_vis_img:
pred_rotmat
pred_betas
pred_camera
bbox: [bbr[0], bbr[1],bbr[0]+bbr[2], bbr[1]+bbr[3]])
bboxTopLeft: bbox top left (redundant)
boxScale_o2n: bbox scaling factor (redundant)
"""
pred_output_list = list()

for body_bbox in body_bbox_list:
img, norm_img, boxScale_o2n, bboxTopLeft, bbox = process_image_bbox(
img_original, body_bbox, input_res=constants.IMG_RES)
bboxTopLeft = np.array(bboxTopLeft)

# bboxTopLeft = bbox['bboxXYWH'][:2]
if img is None:
pred_output_list.append(None)
continue

with torch.no_grad():
# model forward
pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device))

#Convert rot_mat to aa since hands are always in aa
# pred_aa = rotmat3x3_to_angle_axis(pred_rotmat)
pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cpu()
pred_aa = pred_aa.reshape(pred_aa.shape[0], 72)
smpl_output = self.smpl(
betas=pred_betas,
body_pose=pred_aa[:,3:],
global_orient=pred_aa[:,:3],
pose2rot=True)
pred_vertices = smpl_output.vertices
pred_joints_3d = smpl_output.joints

pred_vertices = pred_vertices[0].cpu().numpy()

pred_camera = pred_camera.cpu().numpy().ravel()
camScale = pred_camera[0] # *1.15
camTrans = pred_camera[1:]

pred_output = dict()
# Convert mesh to original image space (X,Y are aligned to image)
# 1. SMPL -> 2D bbox
# 2. 2D bbox -> original 2D image
pred_vertices_bbox = convert_smpl_to_bbox(pred_vertices, camScale, camTrans)
pred_vertices_img = convert_bbox_to_oriIm(
pred_vertices_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])

# Convert joint to original image space (X,Y are aligned to image)
pred_joints_3d = pred_joints_3d[0].cpu().numpy() # (1,49,3)
pred_joints_vis = pred_joints_3d[:,:3] # (49,3)
pred_joints_vis_bbox = convert_smpl_to_bbox(pred_joints_vis, camScale, camTrans)
pred_joints_vis_img = convert_bbox_to_oriIm(
pred_joints_vis_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])

# Output
pred_output['img_cropped'] = img[:, :, ::-1]
pred_output['pred_vertices_smpl'] = smpl_output.vertices[0].cpu().numpy() # SMPL vertex in original smpl space
pred_output['pred_vertices_img'] = pred_vertices_img # SMPL vertex in image space
pred_output['pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space

pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0])
pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72)

pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy() # (1, 24, 3, 3)
pred_output['pred_betas'] = pred_betas.detach().cpu().numpy() # (1, 10)

pred_output['pred_camera'] = pred_camera
pred_output['bbox_top_left'] = bboxTopLeft
pred_output['bbox_scale_ratio'] = boxScale_o2n
pred_output['faces'] = self.smpl.faces

if self.use_smplx:
img_center = np.array((img_original.shape[1], img_original.shape[0]) ) * 0.5
# right hand
pred_joints = smpl_output.right_hand_joints[0].cpu().numpy()
pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
pred_joints_img = convert_bbox_to_oriIm(
pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
pred_output['right_hand_joints_img_coord'] = pred_joints_img
# left hand
pred_joints = smpl_output.left_hand_joints[0].cpu().numpy()
pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
pred_joints_img = convert_bbox_to_oriIm(
pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
pred_output['left_hand_joints_img_coord'] = pred_joints_img

pred_output_list.append(pred_output)

return pred_output_list


def get_hand_bboxes(self, pred_body_list, img_shape):
"""
args:
pred_body_list: output of body regresion
img_shape: img_height, img_width
outputs:
hand_bbox_list:
"""
hand_bbox_list = list()
for pred_body in pred_body_list:
hand_bbox = dict(
left_hand = None,
right_hand = None
)
if pred_body is None:
hand_bbox_list.append(hand_bbox)
else:
for hand_type in hand_bbox:
key = f'{hand_type}_joints_img_coord'
pred_joints_vis_img = pred_body[key]

if pred_joints_vis_img is not None:
# get initial bbox
x0, x1 = np.min(pred_joints_vis_img[:, 0]), np.max(pred_joints_vis_img[:, 0])
y0, y1 = np.min(pred_joints_vis_img[:, 1]), np.max(pred_joints_vis_img[:, 1])
width, height = x1-x0, y1-y0
# extend the obtained bbox
margin = int(max(height, width) * 0.2)
img_height, img_width = img_shape
x0 = max(x0 - margin, 0)
y0 = max(y0 - margin, 0)
x1 = min(x1 + margin, img_width)
y1 = min(y1 + margin, img_height)
# result bbox in (x0, y0, w, h) format
hand_bbox[hand_type] = np.array([x0, y0, x1-x0, y1-y0]) # in (x, y, w, h ) format

hand_bbox_list.append(hand_bbox)

return hand_bbox_list
9 changes: 4 additions & 5 deletions bodymocap/models/smpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,14 @@ def forward(self, *args, **kwargs):
extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices)
# extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices[:,:6890]) *0 #TODO: implement this correctly


#SMPL-X Joint order: https://docs.google.com/spreadsheets/d/1_1dLdaX-sbMkCKr_JzJW_RZCpwBwd7rcKkWT_VgAQ_0/edit#gid=0
smplx_to_smpl = list(range(0,22)) + [28,43] + list(range(55,76)) #28 left middle finger , 43: right middle finger 1
smpl_joints = smpl_output.joints[:,smplx_to_smpl,:] #Convert SMPL-X to SMPL 127 ->45
joints = torch.cat([smpl_joints, extra_joints], dim=1) #[N, 127, 3]->[N, 45, 3] + [N, 9, 3] #SMPL-X has more joints. should convert 45
smplx_to_smpl = list(range(0,22)) + [28,43] + list(range(55,76)) # 28 left middle finger , 43: right middle finger 1
smpl_joints = smpl_output.joints[:,smplx_to_smpl,:] # Convert SMPL-X to SMPL 127 ->45
joints = torch.cat([smpl_joints, extra_joints], dim=1) # [N, 127, 3]->[N, 45, 3] + [N, 9, 3] # SMPL-X has more joints. should convert 45
joints = joints[:, self.joint_map, :]

# Hand joints
smplx_hand_to_panoptic = [0, 13,14,15,16, 1,2,3,17, 4,5,6,18, 10,11,12,19, 7,8,9,20] #Wrist Thumb to Pinky
smplx_hand_to_panoptic = [0, 13,14,15,16, 1,2,3,17, 4,5,6,18, 10,11,12,19, 7,8,9,20] #Wrist Thumb to Pinky

smplx_lhand = [20] + list(range(25,40)) + list(range(66, 71)) #20 for left wrist. 20 finger joints
lhand_joints = smpl_output.joints[:,smplx_lhand, :] #(N,21,3)
Expand Down
3 changes: 2 additions & 1 deletion demo/demo_bodymocap.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import mocap_utils.general_utils as gnu
from mocap_utils.timer import Timer

import renderer.image_utils as imu
from renderer.viewer2D import ImShow

def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
Expand Down Expand Up @@ -120,7 +121,7 @@ def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
img_original_bgr,
pred_mesh_list = pred_mesh_list,
body_bbox_list = body_bbox_list)

# show result in the screen
if not args.no_display:
res_img = res_img.astype(np.uint8)
Expand Down
Loading