-
Notifications
You must be signed in to change notification settings - Fork 1
/
demo_frankmocap.py
207 lines (163 loc) · 7.28 KB
/
demo_frankmocap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Copyright (c) Facebook, Inc. and its affiliates.
import os
import sys
import os.path as osp
import torch
from torchvision.transforms import Normalize
import numpy as np
import cv2
import argparse
import json
import pickle
############# input parameters #############
from demo.demo_options import DemoOptions
from bodymocap.body_mocap_api import BodyMocap
from handmocap.hand_mocap_api import HandMocap
import mocap_utils.demo_utils as demo_utils
import mocap_utils.general_utils as gnu
from mocap_utils.timer import Timer
from datetime import datetime
from bodymocap.body_bbox_detector import BodyPoseEstimator
from handmocap.hand_bbox_detector import HandBboxDetector
from integration.copy_and_paste import integration_copy_paste
from renderer.viewer2D import ImShow
def __filter_bbox_list(body_bbox_list, hand_bbox_list, single_person):
# (to make the order as consistent as possible without tracking)
bbox_size = [ (x[2] * x[3]) for x in body_bbox_list]
idx_big2small = np.argsort(bbox_size)[::-1]
body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ]
hand_bbox_list = [hand_bbox_list[i] for i in idx_big2small]
if single_person and len(body_bbox_list)>0:
body_bbox_list = [body_bbox_list[0], ]
hand_bbox_list = [hand_bbox_list[0], ]
return body_bbox_list, hand_bbox_list
def run_regress(
args, img_original_bgr,
body_bbox_list, hand_bbox_list, bbox_detector,
body_mocap, hand_mocap
):
cond1 = len(body_bbox_list) > 0 and len(hand_bbox_list) > 0
cond2 = not args.frankmocap_fast_mode
# use pre-computed bbox or use slow detection mode
if cond1 or cond2:
if not cond1 and cond2:
# run detection only when bbox is not available
body_pose_list, body_bbox_list, hand_bbox_list, _ = \
bbox_detector.detect_hand_bbox(img_original_bgr.copy())
else:
print("Use pre-computed bounding boxes")
assert len(body_bbox_list) == len(hand_bbox_list)
if len(body_bbox_list) < 1:
return list(), list(), list()
# sort the bbox using bbox size
# only keep on bbox if args.single_person is set
body_bbox_list, hand_bbox_list = __filter_bbox_list(
body_bbox_list, hand_bbox_list, args.single_person)
# hand & body pose regression
pred_hand_list = hand_mocap.regress(
img_original_bgr, hand_bbox_list, add_margin=True)
pred_body_list = body_mocap.regress(img_original_bgr, body_bbox_list)
assert len(hand_bbox_list) == len(pred_hand_list)
assert len(pred_hand_list) == len(pred_body_list)
else:
_, body_bbox_list = bbox_detector.detect_body_bbox(img_original_bgr.copy())
if len(body_bbox_list) < 1:
return list(), list(), list()
# sort the bbox using bbox size
# only keep on bbox if args.single_person is set
hand_bbox_list = [None, ] * len(body_bbox_list)
body_bbox_list, _ = __filter_bbox_list(
body_bbox_list, hand_bbox_list, args.single_person)
# body regression first
pred_body_list = body_mocap.regress(img_original_bgr, body_bbox_list)
assert len(body_bbox_list) == len(pred_body_list)
# get hand bbox from body
hand_bbox_list = body_mocap.get_hand_bboxes(pred_body_list, img_original_bgr.shape[:2])
assert len(pred_body_list) == len(hand_bbox_list)
# hand regression
pred_hand_list = hand_mocap.regress(
img_original_bgr, hand_bbox_list, add_margin=True)
assert len(hand_bbox_list) == len(pred_hand_list)
# integration by copy-and-paste
integral_output_list = integration_copy_paste(
pred_body_list, pred_hand_list, body_mocap.smpl, img_original_bgr.shape)
return body_bbox_list, hand_bbox_list, integral_output_list
def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap):
#Setup input data to handle different types of inputs
input_type, input_data = demo_utils.setup_input(args)
cur_frame = args.start_frame
video_frame = 0
while True:
# load data
load_bbox = False
if input_type =='image_dir':
if cur_frame < len(input_data):
image_path = input_data[cur_frame]
img_original_bgr = cv2.imread(image_path)
else:
img_original_bgr = None
elif input_type == 'video':
_, img_original_bgr = input_data.read()
if video_frame < cur_frame:
video_frame += 1
continue
# save the obtained video frames
image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
if img_original_bgr is not None:
video_frame += 1
if args.save_frame:
gnu.make_subdir(image_path)
cv2.imwrite(image_path, img_original_bgr)
elif input_type == 'webcam':
_, img_original_bgr = input_data.read()
if video_frame < cur_frame:
video_frame += 1
continue
# save the obtained video frames
image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
if img_original_bgr is not None:
video_frame += 1
if args.save_frame:
gnu.make_subdir(image_path)
cv2.imwrite(image_path, img_original_bgr)
else:
assert False, "Unknown input_type"
cur_frame +=1
if img_original_bgr is None or cur_frame > args.end_frame:
break
print("--------------------------------------")
# bbox detection
if not load_bbox:
body_bbox_list, hand_bbox_list = list(), list()
# regression (includes integration)
body_bbox_list, hand_bbox_list, pred_output_list = run_regress(
args, img_original_bgr,
body_bbox_list, hand_bbox_list, bbox_detector,
body_mocap, hand_mocap)
# save the obtained body & hand bbox to json file
if args.save_bbox_output:
demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)
if len(body_bbox_list) < 1:
print(f"No body deteced: {image_path}")
continue
# save predictions to pkl
if args.save_pred_pkl:
demo_type = 'frank'
demo_utils.save_pred_to_pkl(
args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
print(f"Processed : {image_path}")
if input_type =='webcam' and input_data is not None:
input_data.release()
cv2.destroyAllWindows()
def main():
args = DemoOptions().parse()
args.use_smplx = True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
assert torch.cuda.is_available(), "Current version only supports GPU"
hand_bbox_detector = HandBboxDetector('third_view', device)
#Set Mocap regressor
body_mocap = BodyMocap(args.checkpoint_body_smplx, args.smpl_dir, device = device, use_smplx= True)
hand_mocap = HandMocap(args.checkpoint_hand, args.smpl_dir, device = device)
run_frank_mocap(args, hand_bbox_detector, body_mocap, hand_mocap)
if __name__ == '__main__':
main()