-
Notifications
You must be signed in to change notification settings - Fork 1
/
position_proposer.py
477 lines (393 loc) · 18.4 KB
/
position_proposer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
import random
import cv2
import numpy as np
import torch
from dataloader import fit_to_image
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)
random.seed(0)
def xywh_intersection(xywh1, xywh2):
'''
:param xywh1: left, top, w, h
:param xywh2:
:return:
'''
xbox1 = [xywh1[0], xywh1[0] + xywh1[2]]
ybox1 = [xywh1[1], xywh1[1] + xywh1[3]]
xbox2 = [xywh2[0], xywh2[0] + xywh2[2]]
ybox2 = [xywh2[1], xywh2[1] + xywh2[3]]
overlaps = intersect1d(xbox1, xbox2) and intersect1d(ybox1, ybox2)
return overlaps
def intersect1d(box1, box2):
return box1[1] >= box2[0] and box2[1] >= box1[0]
def exp(x, a, b, c):
res = a * np.exp(b * x) + c
return res
def bboxes_intersect(bboxes, bbox):
'''
All boxes must be in XYWH format!
:param bboxes: n bounding boxes
:param bbox: check if this bbox intersect the others
:return:
'''
for ref_bbox in bboxes:
if xywh_intersection(ref_bbox, bbox):
return True
return False
def xyxy2xywh(bbs):
xywh = []
for xyxy in bbs:
x1, y1, x2, y2 = xyxy
w = x2 - x1
h = y2 - y1
xywh.append([x1, y1, w, h])
xywh = np.asarray(xywh).reshape((-1, 4))
return xywh
class PositionProposer:
def __init__(self, gen_input_size, ground_labels=(6, 7, 8, 9),
height_coefs={'x_mu': 0.5, 'x_std': 0.24, 'y_mu': 0.5, 'y_std': 0.07},
xyxy_boxes=False, max_size=None, occlusion_classes=None, min_height=None, max_height=None,
insert2empty=False, ecp=False):
self.gen_input_size = gen_input_size
if isinstance(height_coefs, list) or isinstance(height_coefs, tuple):
self.height_fnc = lambda y_loc: height_coefs[0] * y_loc + height_coefs[1]
else:
# self.height_fnc = lambda y_loc: np.exp(coefs[0] * y_loc + coefs[1])
self.height_fnc = lambda y: -0.5682602 + 1.39825452 * y
self.height_coefs = height_coefs
self.min_height = 35 if min_height is None else min_height
self.max_height = max_height
self.allowed_labels = ground_labels # ground, road, sidewalk, parking
self.xyxy_boxes = xyxy_boxes
self.max_size = max_size
self.insert2empty = insert2empty
self.ecp = ecp
if occlusion_classes is not None:
# if we have provided occlusion classes, use them
self.allowed_labels = occlusion_classes
self.use_occlusion = True
else:
self.use_occlusion = False
def get_allowed_positions(self, segmentation, objects_all):
ground = np.zeros_like(segmentation, dtype=bool)
for gl in self.allowed_labels:
tmp = segmentation == gl
ground = np.logical_or(ground, tmp)
ground[int(ground.shape[-2] * 0.7):, :] = False
# ground_yx = np.where(ground)
# ground_xy = [[x, y] for y, x in zip(ground_yx[0], ground_yx[1])]
# mask-out areas with pedestrians
for obj in objects_all:
if isinstance(obj, dict):
x, y, w, h = obj['bbox']
else:
x, y, w, h = obj
x, y, w, h = int(x), int(y), int(w), int(h)
ground[y:(y + h), x:(x + w)] = False
return ground
# return ground_xy, ground
def choose_nearby(self, allowed_positions, person_bbs, mask, im_h_given=None, im_w_given=None):
'''
:param allowed_positions: Allowed positions where person can stand.
:param person_bbs: annotated persons in the scene
:param width: Width of the bounding box of the new pedestrian.
:param mask: mask of pedestrian to insert
:return: list of possible positions of bottom-left corner of the pedestrian bounding box
'''
nearby = []
wh_y, wh_x = np.where(mask)
sk_height = max(wh_y) - min(wh_y)
if allowed_positions is not None:
im_h, im_w = allowed_positions.shape
else:
im_h, im_w = im_h_given, im_w_given
if allowed_positions is not None:
bbs = [obj['bbox'] for obj in person_bbs] # Format of one bb: [x,y,w,h] (x,y) is top-left corner.
else:
bbs = person_bbs['bboxes']
if self.xyxy_boxes:
bbs = xyxy2xywh(bbs)
iterable = bbs
for bb_orig in iterable:
if allowed_positions is not None and isinstance(bb_orig, dict):
if bb_orig['label'] != 'pedestrian':
continue
bboxVis = bb_orig['bboxVis']
bbox_orig = bb_orig['bbox']
height = bbox_orig[-1]
if height < 50:
# pedestrian is too small
continue
areaVis = bboxVis[2] * bboxVis[3]
area = bbox_orig[2] * bbox_orig[3]
visibility = areaVis / area
if visibility < 0.65:
# pedestrian is too occluded
continue
else:
bbox_orig = bb_orig.copy()
left, right = max([bbox_orig[0], 0]), min([bbox_orig[0] + bbox_orig[2], im_w - 1])
top, bottom = max([bbox_orig[1], 0]), min([bbox_orig[1] + bbox_orig[3], im_h - 1])
height = bbox_orig[-1]
# scale the skeleton so that it is the same height as the reference bounding box
scale = height / sk_height
tgt_size = (int(mask.shape[0] * scale), int(mask.shape[1] * scale))
mask_scaled = cv2.resize(mask, tgt_size, interpolation=cv2.INTER_NEAREST)
wh_y, wh_x = np.where(mask_scaled)
if len(wh_y) == 0 or len(wh_x) == 0:
continue
bb_msk_h = max(wh_y) - min(wh_y)
bb_msk_w = max(wh_x) - min(wh_x)
# pedestrian standing on the LEFT from annotated bb
start_x = left - bb_msk_w
end_x = left
if allowed_positions is not None:
left_free = all(allowed_positions[bottom, start_x:end_x]) if end_x > 0 else False
else:
left_free = True
# where would the pedestrian stand on the LEFT
x = left - bb_msk_w - 1
y = top
proposed_bb = [x, y, bb_msk_w, bb_msk_h] # TODO: compute
right = x + bb_msk_w
bottom = min(y + bb_msk_h, im_h - 1)
if x < 0 or x >= im_w or y < 0 or y >= im_h or right >= im_w or bottom >= im_h:
left_free = False
if left_free and not bboxes_intersect(bbs, proposed_bb):
nearby.append([start_x, bottom, height, 'left']) # (x,y)
# pedestrian standing on the RIGHT from annotated bb
start_x = right
end_x = min(right + bb_msk_w, im_w - 1)
if allowed_positions is not None:
right_free = all(allowed_positions[bottom, start_x:end_x]) if end_x < allowed_positions.shape[
1] else False
else:
right_free = True
# where would the pedestrian stand on the RIGHT
x = right + 1
y = top
proposed_bb = [x, y, bb_msk_w, bb_msk_h]
right = x + bb_msk_w
bottom = y + bb_msk_h
if x < 0 or x >= im_w or y < 0 or y >= im_h or right >= im_w or bottom >= im_h:
right_free = False
if right_free and not bboxes_intersect(bbs, proposed_bb):
nearby.append([start_x, bottom, height, 'right'])
return nearby
def __call__(self, image, segmentation, objects_orig, objects_all, skeleton, person_mask, return_mask=False):
'''
Chooses a position of a pedestrian in the image.
:param image: PIL image of the scene
:param person_mask: 2D numpy array of person mask
:param segmentation: Segmentation of the scene.
:param objects_all: Bounding boxes of the pedestrians. Format: [x,y,w,h] (x,y) is top-left corner.
:param skeleton: Skeleton of the person that should be inserted.
:return:
'''
if segmentation is not None:
im_h, im_w = segmentation.shape[-2:]
else:
# retunrns image_crop, crop_size, crop_params, bbox
return self.image_based(image, objects_all, skeleton, person_mask)
sk_img_h, sk_img_w = skeleton.shape
sk_y, sk_x = np.where(skeleton)
sk_height = max(sk_y) - min(sk_y)
sk_width = max(sk_x) - min(sk_x)
# put pedestrian either:
# 1) next to some already existing pedestrian => +- same height as the nearby pedestrian
# 2) on the sidewalk or on the road => use the height fnc
# The pedestrian must (same as in the BMVC paper):
# 1) the pedestrian bounding box should not touch any boundary of the image
# 2) Every point on the bottom boundary of the candidate pedestrian bounding box should belong to road/sidewalk
ground = self.get_allowed_positions(segmentation, objects_all) # anywhere on road/sidewalk
if self.insert2empty:
positions_nearby = []
else:
positions_nearby = self.choose_nearby(ground, objects_all, person_mask) # next to some pedestrian
print('Number of nearby positions: {}'.format(len(positions_nearby)))
# change possible ground positions
ground[:, :int(ground.shape[-1] * 0.1)] = False
ground[:, int(ground.shape[-1] * 0.9):] = False
ground = ground.astype(np.uint8)
kernel = np.ones((51, 51))
ground = cv2.erode(ground, kernel, iterations=1)
ground_yx = np.where(ground)
positions_ground = [[x, y] for y, x in zip(ground_yx[0], ground_yx[1])]
use_nearby = len(positions_nearby) # random.random() < 0.5 and
positions = positions_nearby if use_nearby else positions_ground
if not len(positions):
return None
height = -1
while height < self.min_height:
pos_idx = random.choice(range(len(positions)))
if use_nearby:
x, y, height, direction = positions[pos_idx]
else:
x, y = positions[pos_idx]
direction = 'none'
if self.ecp:
height = self.height_fnc(y)
else:
y_tmp = y / im_h
height = self.height_fnc(y_tmp)
height = height * im_h
if self.max_height is not None and height > self.max_height:
height = -1
y_px = int((1 - y) * im_h) if isinstance(y, float) else y
x_px = int(x * im_w) if isinstance(x, float) else x
# how do we need to resize the inputs
resize_factor = height / sk_height
crop_size = int(sk_img_h * resize_factor)
crop_shape = (crop_size, crop_size)
crop_tmp = image[y_px - crop_size:y_px, x_px:x_px + crop_size, :]
# resize skeleton
skeleton_resized = cv2.resize(skeleton, crop_shape, interpolation=cv2.INTER_NEAREST)
# find bottom left point of the skeleton that should match with (x_px, y_px)
skr_y, skr_x = np.where(skeleton_resized)
bottom_skr = max(skr_y)
left_skr = min(skr_x)
right_skr = max(skr_x)
bottom_skr_left = max([x for x in skr_x[skr_y == bottom_skr]])
# get the bottom left point of the image crop
bottom_crop = y_px + (crop_size - bottom_skr)
left_crop = x_px
if use_nearby:
left_crop = self.shift_left_side(direction, left_crop, left_skr, right_skr, x_px)
else:
left_crop = x_px - bottom_skr_left
# crop_params in PIL crop format = (left, upper, right, lower)-tuple
left, upper, right, lower = (left_crop, bottom_crop - crop_size, left_crop + crop_size, bottom_crop)
left, upper, crop_size = fit_to_image(left, upper, crop_size, im_w, im_h)
right = left + crop_size
lower = upper + crop_size
crop_params = left, upper, right, lower
# use crop params to crop the image and then resize it to self.gen_input_size
image_crop = image[upper:lower, left:right]
image_crop_orig = image_crop.copy()
image_crop = cv2.resize(image_crop, (self.gen_input_size, self.gen_input_size), interpolation=cv2.INTER_LINEAR)
# resize the mask for puposes of insertion into the current segmentation
resized_mask = cv2.resize(person_mask, (crop_size, crop_size), interpolation=cv2.INTER_NEAREST)
augmented_segmentation, bbox = insert2segmentation(segmentation, crop_params, resized_mask)
if return_mask:
return image_crop, crop_size, crop_params, augmented_segmentation, bbox, image_crop_orig, resized_mask
return image_crop, crop_size, crop_params, augmented_segmentation, bbox, image_crop_orig
def shift_left_side(self, direction, left_crop, left_skr, right_skr, x_px):
if direction == 'left':
left_crop = x_px - left_skr # x_px + right_skr // 2
elif direction == 'right':
left_crop = x_px - left_skr # // 2
elif direction == 'none':
left_crop = x_px
else:
raise Exception('Something is wrong. Direction should be "left" or "right".')
return left_crop
def get_random_position(self, im_h, im_w):
height = -1
while height < 0:
x = np.random.normal(loc=self.height_coefs['x_mu'], scale=self.height_coefs['x_std']) * im_w
x = int(max([im_w * 0.2, min([x, im_w * 0.8])]))
y = np.random.normal(loc=self.height_coefs['y_mu'], scale=self.height_coefs['y_std']) * im_h
y = int(max([im_h // 10, min([y, im_h // 2])]))
height = self.height_fnc(y / im_h) * im_h
return x, y, height
def image_based(self, image, objects_all, skeleton, person_mask):
msk_y, msk_x = np.where(person_mask)
sk_height = max(msk_y) - min(msk_y)
sk_width = max(msk_x) - min(msk_x)
shape = image.shape
if shape[0] == 3:
_, im_h, im_w = shape
else:
im_h, im_w, _ = shape
# next to some pedestrian
positions_nearby = self.choose_nearby(None, objects_all, person_mask, im_h_given=im_h, im_w_given=im_w)
if positions_nearby:
pos_idx = random.choice([i for i in range(len(positions_nearby))])
x, y, height, direction = positions_nearby[pos_idx]
else:
x, y, height = self.get_random_position(im_h, im_w) # TODO: generate randomly some position
height = int(height)
direction = 'none'
y_px = y # bottom
x_px = x # left
resize_factor = height / sk_height
crop_size = int(self.gen_input_size * resize_factor)
crop_shape = (crop_size, crop_size)
crop_tmp = image[y_px - crop_size:y_px, x_px:x_px + crop_size, :]
# resize skeleton
skeleton_resized = cv2.resize(skeleton, crop_shape, interpolation=cv2.INTER_NEAREST)
# find bottom left point of the skeleton that should match with (x_px, y_px)
skr_y, skr_x = np.where(skeleton_resized)
bottom_skr = max(skr_y)
left_skr = min(skr_x)
right_skr = max(skr_x)
# get the bottom left point of the image crop
bottom_crop = y_px + (crop_size - bottom_skr)
left_crop = x_px
left_crop = self.shift_left_side(direction, left_crop, left_skr, right_skr, x_px)
# crop_params in PIL crop format = (left, upper, right, lower)-tuple
left, upper, right, lower = (left_crop, bottom_crop - crop_size, left_crop + crop_size, bottom_crop)
crop_params = left, upper, right, lower
left, upper, crop_size = fit_to_image(left, upper, crop_size, im_w, im_h)
if self.max_size is not None:
crop_size = min([self.max_size, crop_size])
right = left + crop_size
lower = upper + crop_size
crop_params = left, upper, right, lower
# use crop params to crop the image and then resize it to self.gen_input_size
if isinstance(image, torch.Tensor):
image = image.cpu().numpy()
image = np.moveaxis(image, 0, 2)
image_crop = (image)[upper:lower, left:right]
image_crop_orig = image_crop.copy()
image_crop = cv2.resize(image_crop, (self.gen_input_size, self.gen_input_size), interpolation=cv2.INTER_LINEAR)
# resize the mask for puposes of insertion into the current segmentation
resized_mask = cv2.resize(person_mask, (crop_size, crop_size), interpolation=cv2.INTER_NEAREST)
bbox = inserted_bbox(im_h, im_w, crop_params, resized_mask, xyxy=self.xyxy_boxes)
return image_crop, crop_size, crop_params, bbox, image_crop_orig
def insert2segmentation(segmentation, crop_params, mask, class_num=24):
left, upper, right, lower = crop_params
augmented = np.copy(segmentation)
inserted_only = np.zeros_like(segmentation)
segm_crop = augmented[upper:lower, left:right]
tmp_crop = np.zeros_like(segm_crop)
try:
segm_crop[mask > 0.99] = class_num
tmp_crop[mask > 0.99] = 1
except:
print('Some problem. Segm crop: {}, mask: {}, crop_params: {}'.format(segm_crop.shape, mask.shape, crop_params))
augmented[upper:lower, left:right] = segm_crop
inserted_only[upper:lower, left:right] = tmp_crop
# determine the bounding box
wh_y, wh_x = np.where(inserted_only)
top = min(wh_y)
left = min(wh_x)
width = max(wh_x) - min(wh_x)
height = max(wh_y) - min(wh_y)
bbox = [int(left), int(top), int(width), int(height)] # XYWH format
return augmented, bbox
def inserted_bbox(im_h, im_w, crop_params, mask, xyxy=False):
left, upper, right, lower = crop_params
inserted_only = np.zeros((im_h, im_w), dtype=np.int8)
tmp_crop = np.zeros((lower - upper, right - left), dtype=np.int8)
try:
tmp_crop[mask > 0.99] = 1
except:
print('Some problem.')
inserted_only[upper:lower, left:right] = tmp_crop
# determine the bounding box
wh_y, wh_x = np.where(inserted_only)
top = min(wh_y)
bottom = max(wh_y)
left = min(wh_x)
right = max(wh_x)
width = max(wh_x) - min(wh_x)
height = max(wh_y) - min(wh_y)
assert width > 2 and height > 30, "Width {}, height {}".format(width, height)
if xyxy:
bbox = [int(left), int(top), int(right), int(bottom)] # XYXY format
else:
bbox = [int(left), int(top), int(width), int(height)] # XYWH format
return bbox