Skip to content
This repository was archived by the owner on Oct 31, 2023. It is now read-only.

Add CPU option to run frankmocap #103

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions bodymocap/body_bbox_detector_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright (c) Facebook, Inc. and its affiliates.

import os
import os.path as osp
import sys
import numpy as np
import cv2

import torch
import torchvision.transforms as transforms
# from PIL import Image

# Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/blob/master/demo.py

# 2D body pose estimator
pose2d_estimator_path = './detectors/body_pose_estimator'
sys.path.append(pose2d_estimator_path)
from detectors.body_pose_estimator.pose2d_models.with_mobilenet import PoseEstimationWithMobileNet
from detectors.body_pose_estimator.modules.load_state import load_state
from detectors.body_pose_estimator.val import normalize, pad_width
from detectors.body_pose_estimator.modules.pose import Pose, track_poses
from detectors.body_pose_estimator.modules.keypoints import extract_keypoints, group_keypoints


class BodyPoseEstimator_cpu(object):
"""
Hand Detector for third-view input.
It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git)
"""
def __init__(self):
print("Loading Body Pose Estimator")
self.__load_body_estimator()


def __load_body_estimator(self):
device = torch.device('cpu')
net = PoseEstimationWithMobileNet().to(device)
pose2d_checkpoint = "./extra_data/body_module/body_pose_estimator/checkpoint_iter_370000.pth"
checkpoint = torch.load(pose2d_checkpoint, map_location='cpu')
load_state(net, checkpoint)
net = net.eval()
# net = net.cuda()
self.model = net


#Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/demo.py
def __infer_fast(self, img, input_height_size, stride, upsample_ratio,
cpu=False, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
height, width, _ = img.shape
scale = input_height_size / height

scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
scaled_img = normalize(scaled_img, img_mean, img_scale)
min_dims = [input_height_size, max(scaled_img.shape[1], input_height_size)]
padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)

tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
if not cpu:
tensor_img = tensor_img.cpu()

stages_output = self.model(tensor_img)

stage2_heatmaps = stages_output[-2]
heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

stage2_pafs = stages_output[-1]
pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

return heatmaps, pafs, scale, pad

def detect_body_pose(self, img):
"""
Output:
current_bbox: BBOX_XYWH
"""
stride = 8
upsample_ratio = 4
orig_img = img.copy()

# forward
heatmaps, pafs, scale, pad = self.__infer_fast(img,
input_height_size=256, stride=stride, upsample_ratio=upsample_ratio)

total_keypoints_num = 0
all_keypoints_by_type = []
num_keypoints = Pose.num_kpts
for kpt_idx in range(num_keypoints): # 19th for bg
total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)

pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True)
for kpt_id in range(all_keypoints.shape[0]):
all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale

'''
# print(len(pose_entries))
if len(pose_entries)>1:
pose_entries = pose_entries[:1]
print("We only support one person currently")
# assert len(pose_entries) == 1, "We only support one person currently"
'''

current_poses, current_bbox = list(), list()
for n in range(len(pose_entries)):
if len(pose_entries[n]) == 0:
continue
pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
for kpt_id in range(num_keypoints):
if pose_entries[n][kpt_id] != -1.0: # keypoint was found
pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
pose = Pose(pose_keypoints, pose_entries[n][18])
current_poses.append(pose.keypoints)
current_bbox.append(np.array(pose.bbox))

# enlarge the bbox
for i, bbox in enumerate(current_bbox):
x, y, w, h = bbox
margin = 0.05
x_margin = int(w * margin)
y_margin = int(h * margin)
x0 = max(x-x_margin, 0)
y0 = max(y-y_margin, 0)
x1 = min(x+w+x_margin, orig_img.shape[1])
y1 = min(y+h+y_margin, orig_img.shape[0])
current_bbox[i] = np.array((x0, y0, x1-x0, y1-y0)).astype(np.int32)

return current_poses, current_bbox
183 changes: 183 additions & 0 deletions bodymocap/body_mocap_api_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright (c) Facebook, Inc. and its affiliates.

import cv2
import sys
import torch
import numpy as np
import pickle
from torchvision.transforms import Normalize

from bodymocap.models import hmr, SMPL, SMPLX
from bodymocap import constants
from bodymocap.utils.imutils import crop, crop_bboxInfo, process_image_bbox, process_image_keypoints, bbox_from_keypoints
from mocap_utils.coordconv import convert_smpl_to_bbox, convert_bbox_to_oriIm
import mocap_utils.geometry_utils as gu


class BodyMocap_cpu(object):
def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cpu'), use_smplx=False):

self.device = torch.device('gpu') if torch.cuda.is_available() else torch.device('cpu')

# Load parametric model (SMPLX or SMPL)
if use_smplx:
smplModelPath = smpl_dir + '/SMPLX_NEUTRAL.pkl'
self.smpl = SMPLX(smpl_dir,
batch_size=1,
num_betas = 10,
use_pca = False,
create_transl=False).to(self.device)
self.use_smplx = True
else:
smplModelPath = smpl_dir + '/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl'
self.smpl = SMPL(smplModelPath, batch_size=1, create_transl=False).to(self.device)
self.use_smplx = False

#Load pre-trained neural network
SMPL_MEAN_PARAMS = './extra_data/body_module/data_from_spin/smpl_mean_params.npz'
self.model_regressor = hmr(SMPL_MEAN_PARAMS).to(self.device)
checkpoint = torch.load(regressor_checkpoint,map_location=torch.device('cpu'))
self.model_regressor.load_state_dict(checkpoint['model'], strict=False)
self.model_regressor.eval()


def regress(self, img_original, body_bbox_list):
"""
args:
img_original: original raw image (BGR order by using cv2.imread)
body_bbox: bounding box around the target: (minX, minY, width, height)
outputs:
pred_vertices_img:
pred_joints_vis_img:
pred_rotmat
pred_betas
pred_camera
bbox: [bbr[0], bbr[1],bbr[0]+bbr[2], bbr[1]+bbr[3]])
bboxTopLeft: bbox top left (redundant)
boxScale_o2n: bbox scaling factor (redundant)
"""
pred_output_list = list()

for body_bbox in body_bbox_list:
img, norm_img, boxScale_o2n, bboxTopLeft, bbox = process_image_bbox(
img_original, body_bbox, input_res=constants.IMG_RES)
bboxTopLeft = np.array(bboxTopLeft)

# bboxTopLeft = bbox['bboxXYWH'][:2]
if img is None:
pred_output_list.append(None)
continue

with torch.no_grad():
# model forward
pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device))

#Convert rot_mat to aa since hands are always in aa
# pred_aa = rotmat3x3_to_angle_axis(pred_rotmat)
pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cpu()
pred_aa = pred_aa.reshape(pred_aa.shape[0], 72)
smpl_output = self.smpl(
betas=pred_betas,
body_pose=pred_aa[:,3:],
global_orient=pred_aa[:,:3],
pose2rot=True)
pred_vertices = smpl_output.vertices
pred_joints_3d = smpl_output.joints

pred_vertices = pred_vertices[0].cpu().numpy()

pred_camera = pred_camera.cpu().numpy().ravel()
camScale = pred_camera[0] # *1.15
camTrans = pred_camera[1:]

pred_output = dict()
# Convert mesh to original image space (X,Y are aligned to image)
# 1. SMPL -> 2D bbox
# 2. 2D bbox -> original 2D image
pred_vertices_bbox = convert_smpl_to_bbox(pred_vertices, camScale, camTrans)
pred_vertices_img = convert_bbox_to_oriIm(
pred_vertices_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])

# Convert joint to original image space (X,Y are aligned to image)
pred_joints_3d = pred_joints_3d[0].cpu().numpy() # (1,49,3)
pred_joints_vis = pred_joints_3d[:,:3] # (49,3)
pred_joints_vis_bbox = convert_smpl_to_bbox(pred_joints_vis, camScale, camTrans)
pred_joints_vis_img = convert_bbox_to_oriIm(
pred_joints_vis_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])

# Output
pred_output['img_cropped'] = img[:, :, ::-1]
pred_output['pred_vertices_smpl'] = smpl_output.vertices[0].cpu().numpy() # SMPL vertex in original smpl space
pred_output['pred_vertices_img'] = pred_vertices_img # SMPL vertex in image space
pred_output['pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space

pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0])
pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72)

pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy() # (1, 24, 3, 3)
pred_output['pred_betas'] = pred_betas.detach().cpu().numpy() # (1, 10)

pred_output['pred_camera'] = pred_camera
pred_output['bbox_top_left'] = bboxTopLeft
pred_output['bbox_scale_ratio'] = boxScale_o2n
pred_output['faces'] = self.smpl.faces

if self.use_smplx:
img_center = np.array((img_original.shape[1], img_original.shape[0]) ) * 0.5
# right hand
pred_joints = smpl_output.right_hand_joints[0].cpu().numpy()
pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
pred_joints_img = convert_bbox_to_oriIm(
pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
pred_output['right_hand_joints_img_coord'] = pred_joints_img
# left hand
pred_joints = smpl_output.left_hand_joints[0].cpu().numpy()
pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
pred_joints_img = convert_bbox_to_oriIm(
pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
pred_output['left_hand_joints_img_coord'] = pred_joints_img

pred_output_list.append(pred_output)

return pred_output_list


def get_hand_bboxes(self, pred_body_list, img_shape):
"""
args:
pred_body_list: output of body regresion
img_shape: img_height, img_width
outputs:
hand_bbox_list:
"""
hand_bbox_list = list()
for pred_body in pred_body_list:
hand_bbox = dict(
left_hand = None,
right_hand = None
)
if pred_body is None:
hand_bbox_list.append(hand_bbox)
else:
for hand_type in hand_bbox:
key = f'{hand_type}_joints_img_coord'
pred_joints_vis_img = pred_body[key]

if pred_joints_vis_img is not None:
# get initial bbox
x0, x1 = np.min(pred_joints_vis_img[:, 0]), np.max(pred_joints_vis_img[:, 0])
y0, y1 = np.min(pred_joints_vis_img[:, 1]), np.max(pred_joints_vis_img[:, 1])
width, height = x1-x0, y1-y0
# extend the obtained bbox
margin = int(max(height, width) * 0.2)
img_height, img_width = img_shape
x0 = max(x0 - margin, 0)
y0 = max(y0 - margin, 0)
x1 = min(x1 + margin, img_width)
y1 = min(y1 + margin, img_height)
# result bbox in (x0, y0, w, h) format
hand_bbox[hand_type] = np.array([x0, y0, x1-x0, y1-y0]) # in (x, y, w, h ) format

hand_bbox_list.append(hand_bbox)

return hand_bbox_list
Loading