-
Notifications
You must be signed in to change notification settings - Fork 13
/
data_generator.py
295 lines (238 loc) · 11.7 KB
/
data_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# Holy resources:
# https://github.com/virafpatrawala/COCO-Semantic-Segmentation/blob/master/COCOdataset_SemanticSegmentation_Demo.ipynb
# https://mahmoudyusof.github.io/facial-keypoint-detection/data-generator/
# https://blog.paperspace.com/data-augmentation-for-object-detection-building-input-pipelines/
import os
import cv2
import imgaug as ia
import numpy as np
import pandas as pd
import skimage.io as io
import tensorflow as tf
from imgaug import augmenters as iaa
from imgaug.augmentables.kps import Keypoint, KeypointsOnImage
from PIL import Image
from tensorflow.keras.utils import Sequence
from constants import *
from data_augmentation import *
def transform_bbox_square(bbox, slack=1):
"""
Transforms a bounding box anchored at top left corner of shape () to a square with
edge length being the larger of the bounding box's height or width.
Only supports square aspect ratios currently.
## Parameters
bbox : {tuple or ndarray of len 4}
Given as two points, anchored at top left of image being 0,0: left, upper, right, lower
slack : {int, float}
The amount of extra padding that should be applied to the edges of the bounding box after
transforming
##
"""
x, y, w, h = [i for i in bbox] # (x,y,w,h) anchored to top left
center_x = x+w/2
center_y = y+h/2
if w >= h:
new_w = w
new_h = w
else:
new_w = h
new_h = h
new_w *= slack # add slack to bbox
new_h *= slack # add slack to bbox
new_x = center_x - new_w/2
new_y = center_y - new_h/2
return (round(new_x), round(new_y), round(new_x+new_w), round(new_y+new_h))
# inherit from Sequence to access multicore functionality: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
class DataGenerator(Sequence):
def __init__(self, df, base_dir, input_dim, output_dim, num_hg_blocks, shuffle=False, \
batch_size=DEFAULT_BATCH_SIZE, online_fetch=False, img_aug_strength=None, is_eval=False):
self.df = df # df of the the annotations we want
self.base_dir = base_dir # where to read imgs from in collab runtime
# NOTE update image transformation logic if input is no longer square
self.input_dim = input_dim # model requirement for input image dimensions
self.output_dim = output_dim # dimesnions of output heatmap of model
self.num_hg_blocks = num_hg_blocks
self.shuffle = shuffle
self.batch_size = batch_size
self.is_eval = is_eval
# If true, images will be loaded from url over network rather than filesystem
self.online_fetch = online_fetch
if img_aug_strength is not None:
self.augmenter = get_augmenter_pipeline(img_aug_strength)
else:
self.augmenter = None
self.on_epoch_end()
# after each epoch, shuffle indices so data order changes
def on_epoch_end(self):
self.indices = np.arange(len(self.df))
if self.shuffle:
np.random.shuffle(self.indices)
# number of batches (not number of examples)
def __len__(self):
return int(len(self.df) / self.batch_size)
def transform_image(self, img, bbox):
new_bbox = transform_bbox_square(bbox, slack=BBOX_SLACK)
cropped_img = img.crop(box=new_bbox)
cropped_width, cropped_height = cropped_img.size
new_img = cv2.resize(np.array(cropped_img), self.input_dim,
interpolation=cv2.INTER_LINEAR)
return new_img, cropped_width, cropped_height, new_bbox[0], new_bbox[1]
def transform_label(self, label, cropped_width, cropped_height, anchor_x, anchor_y):
label = [int(v) for v in label]
# adjust x/y coords to new resized img
transformed_label = []
for x, y, v in zip(*[iter(label)]*NUM_COCO_KP_ATTRBS):
x = round((x-anchor_x) * self.input_dim[0]/cropped_width)
y = round((y-anchor_y) * self.input_dim[1]/cropped_height)
# validate kps, throw away if out of bounds
# TODO: if kp is thrown away then we must update num_keypoints
if (x > self.input_dim[0] or x < 0) or (y > self.input_dim[1] or y < 0):
x, y, v = (0, 0, 0)
transformed_label.append(x)
transformed_label.append(y)
transformed_label.append(v)
return np.asarray(transformed_label)
def generate_heatmaps(self, label):
heat_maps = np.zeros((*self.output_dim, NUM_COCO_KEYPOINTS))
for i in range(NUM_COCO_KEYPOINTS):
label_idx = i * NUM_COCO_KP_ATTRBS # index for label
# generate empty heatmap for unlabelled kp
if label[label_idx + (NUM_COCO_KP_ATTRBS-1)] == 0:
continue
kpx = int(label[label_idx])
kpy = int(label[label_idx + 1])
heat_map = self.gaussian(
np.zeros(self.input_dim), (kpx, kpy), HEATMAP_SIGMA)
# downscale heatmap resolution
heat_maps[:, :, i] = cv2.resize(
heat_map, self.output_dim, interpolation=cv2.INTER_AREA)
heat_maps[:, :, i] /= (heat_maps[:, :, i].max()) # normalize heatmap to [0,1]
heat_maps[:, :, i] *= HEATMAP_SCALE # scale up to place more importance on correctly identifying kp regions
return heat_maps
def convert_coco_kp_to_imgaug_kp(self, label):
kps = []
valid = np.ones(NUM_COCO_KEYPOINTS)
invalid_xy = -1
for i in range(NUM_COCO_KEYPOINTS):
label_idx = i * NUM_COCO_KP_ATTRBS # index for label
# generate empty heatmap for unlabelled kp
if label[label_idx + (NUM_COCO_KP_ATTRBS-1)] == 0:
# invalid keypoint
valid[i] = 0
kps.append(Keypoint(x=invalid_xy, y=invalid_xy))
continue
kpx = int(label[label_idx])
kpy = int(label[label_idx + 1])
kps.append(Keypoint(x=kpx, y=kpy))
return kps, valid
def convert_imgaug_kpsoi_to_coco_kp(self, kpsoi_aug, valid, image_aug):
transformed_label = []
for i in range(NUM_COCO_KEYPOINTS):
kp = kpsoi_aug[i]
if (not valid[i]) or kp.is_out_of_image(image_aug):
x, y, v = (0, 0, 0)
else:
x = kp.x
y = kp.y
v = 1
transformed_label.append(x)
transformed_label.append(y)
transformed_label.append(v)
return np.asarray(transformed_label)
# This func is unmodified and ripped from: https://github.com/princeton-vl/pose-hg-train/blob/master/src/pypose/draw.py
def gaussian(self, img, pt, sigma):
# Draw a 2D gaussian
# Check that any part of the gaussian is in-bounds
ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)]
br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)]
if (ul[0] > img.shape[1] or ul[1] >= img.shape[0] or
br[0] < 0 or br[1] < 0):
# If not, just return the image as is
return img
# Generate gaussian
size = 6 * sigma + 1
x = np.arange(0, size, 1, float)
y = x[:, np.newaxis]
x0 = y0 = size // 2
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], img.shape[1])
img_y = max(0, ul[1]), min(br[1], img.shape[0])
img[img_y[0]:img_y[1], img_x[0]:img_x[1]
] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return img
# returns batch at index idx
"""
Returns a batch from the dataset
### Parameters:
idx : {int-type} Batch number to retrieve
### Returns:
Tuple of (X, y) where:
X : ndarray of shape (batch number, input_dim1, input_dim2, 3)
This corresponds to a batch of images, normalized from [0,255] to [0,1]
y : list of ndarrays where each list element corresponds to an intermediate (or final) layer of the hourglass,
and has shape (batch number, output_dim1, output_dim2, 17). The list length is num_hg_blocks
Each output corresponds to a heatmap, which currently is a Gaussian and has range [0,1]
"""
def __getitem__(self, idx):
# Initialize Batch:
X = np.empty((self.batch_size, *self.input_dim, INPUT_CHANNELS))
# Order of last dimension: (heatmap for each kp) repeated num_hg_blocks times
y = np.empty((self.batch_size, *self.output_dim, NUM_COCO_KEYPOINTS))
metadatas = []
# get the indices of the requested batch
indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]
for i, data_index in enumerate(indices):
ann = self.df.loc[data_index]
img_path = os.path.join(self.base_dir, ann['path'])
if self.online_fetch:
img = Image.fromarray(io.imread(ann['coco_url'])).convert('RGB') # bottleneck opening from URL
else:
# bottleneck opening from file system
img = Image.open(img_path).convert('RGB')
transformed_img, cropped_width, cropped_height, anchor_x, anchor_y = self.transform_image(
img, ann['bbox'])
transformed_label = self.transform_label(
ann['keypoints'], cropped_width, cropped_height, anchor_x, anchor_y)
if self.is_eval:
metadata = {}
metadata["src_set_image_id"] = ann['src_set_image_id']
metadata["ann_id"] = ann['ann_id']
metadata["coco_url"] = ann['coco_url']
metadata["cropped_width"] = cropped_width
metadata["cropped_height"] = cropped_height
metadata["anchor_x"] = anchor_x
metadata["anchor_y"] = anchor_y
metadata["input_dim"] = self.input_dim
metadata["output_dim"] = self.output_dim
metadata["transformed_label"] = transformed_label #DEBUG
metadata["ground_truth_keypoints"] = ann['keypoints'] #DEBUG
metadatas.append(metadata)
# if image augmentations should be applied
if self.augmenter is not None:
imgaug_kps, valid = self.convert_coco_kp_to_imgaug_kp(transformed_label.astype('float32'))
# Keep track of image dimension
kpsoi = KeypointsOnImage(imgaug_kps, shape=transformed_img.shape)
# Perform data augmentation randomly
image_aug, kpsoi_aug = self.augmenter(image=transformed_img, keypoints=kpsoi)
# Perform a R/L augmentation randomly, applying R/L flip to the labels as well to maintain the right order
image_aug, kpsoi_aug = flipRL(image=image_aug,keypoints=kpsoi_aug)
# Filter out out-of-bounds (from rotation/cropping) and invalid (originally occluded/not present) keypoints
augmented_label = self.convert_imgaug_kpsoi_to_coco_kp(kpsoi_aug, valid, image_aug)
# Update data
transformed_img = image_aug
transformed_label = augmented_label
normalized_img = transformed_img/255.0 # scale RGB channels to [0,1]
heat_map_labels = self.generate_heatmaps(transformed_label)
X[i, ] = normalized_img
y[i, ] = heat_map_labels
y_stacked = []
for _ in range(self.num_hg_blocks):
y_stacked.append(y)
if self.is_eval:
return X, y_stacked, metadatas
return X, y_stacked