-
Notifications
You must be signed in to change notification settings - Fork 5
/
data_loader.py
468 lines (414 loc) · 18.8 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
import cv2
import numpy as np
class DataLoader():
# This class imports data from eth and ucy datasets (25 FPS)
# The data are stored in two formats:
# ---If using people centered format:
# ---(indices are matched - the ith person is the same person)
# ------people_start_frame: record the start frame of each person
# ------ when the person first makes its appearance
# ------ (people_start_frame[i] means the start frame of ith person)
# ------people_end_frame: record the end frame of each person
# ------ when the person last makes its appearance
# ------ (people_end_frame[i] means the end frame of ith person)
# ------people_coords_complete: the coordinates of each person throughout its appearance
# ------ (people_coords_complete[i][j][0] means the x coordinate
# ------ of person i in frame j+people_start_frame[i])
# ------people_velocity_complete: the coordinates of each person throughout its appearance
# ------ (people_velocity_complete[i][j][0] means the x velocity
# ------ of person i in frame j+people_start_frame[i])
# ---If using frame centered format:
# ---(indices are matched - the ith frame is the same frame
# --- and the jth person is the same person)
# ---They are not really matrices but it's just a legacy naming problem...
# ------video_position_matrix: A 3D irregular list
# ------ 1st Dimension indicates frames
# ------ 2nd Dimension indicates people
# ------ 3rd Dimension indicates coordinates of each person
# ------ (video_position_matrix[i][j][0] means the x coordinate
# ------ of person j in frame i)
# ------video_velocity_matrix: A 3D irregular list
# ------ 1st Dimension indicates frames
# ------ 2nd Dimension indicates people
# ------ 3rd Dimension indicates velocities of each person
# ------ (video_velocity_matrix[i][j][0] means the x velocity
# ------ of person j in frame i)
# ------video_pedidx_matrix: A 3D irregular list
# ------ 1st Dimension indicates frames
# ------ 2nd Dimension indicates pedestrian_id
# ------ (video_pedidx_matrix[i][j] means the index id
# ------ of person j in frame i)
#
def __init__(self, dataset = 'eth', flag = 0, target_fps = 15):
# Initialize data processor
# Inputs:
# dataset: can only be 'eth' or 'ucy'
# flag: can only be 0 or 1 for 'eth'
# and 0-5 for 'ucy'
# target_fps: desired fps for the labels
#
# dataset - flag dataset name
# eth - 0 ETH
# eth - 1 HOTEL
# ucy - 0 ZARA1
# ucy - 1 ZARA2
# ucy - 2 UNIV (or UNIV1)
# ucy - 3 ZARA3
# ucy - 4 UNIV2
# ucy - 5 ARXIE (not recommended)
self.dataset = dataset
self.flag = flag
self.video_position_matrix = [[]]
self.video_velocity_matrix = [[]]
self.video_pedidx_matrix = [[]]
self.people_start_frame = []
self.people_end_frame = []
self.people_coords_complete = []
self.people_velocity_complete = []
self.frame_id_list = []
self.person_id_list = []
self.x_list = []
self.y_list = []
self.vx_list = []
self.vy_list = []
self.H = []
if dataset == 'eth':
in_fps = 15
read_success = self._read_eth_data(flag)
elif dataset == 'ucy':
in_fps = 25
read_success = self._read_ucy_data(flag)
else:
print('dataset argument must be \'eth\' or \'ucy\'')
read_success = False
if read_success:
self._organize_frame()
if not (in_fps == target_fps):
self._frame_matching(in_fps, target_fps)
self._data_processing()
else:
raise Exception('Wrong inputs to the data loader!')
return
def update_message(self, msg):
# Store everything obtained/computed from the datasets into the message
#if msg.if_processed_data:
# raise Exception('Data already loaded!')
msg.if_processed_data = True
msg.dataset = self.dataset
msg.flag = self.flag
msg.fname = self.fname
msg.total_num_frames = self.total_num_frames
msg.has_video = self.has_video
msg.frame_width = self.frame_width
msg.frame_height = self.frame_height
msg.video_position_matrix = self.video_position_matrix
msg.video_velocity_matrix = self.video_velocity_matrix
msg.video_pedidx_matrix = self.video_pedidx_matrix
msg.people_start_frame = self.people_start_frame
msg.people_end_frame = self.people_end_frame
msg.people_coords_complete = self.people_coords_complete
msg.people_velocity_complete = self.people_velocity_complete
msg.frame_id_list = self.frame_id_list
msg.person_id_list = self.person_id_list
msg.x_list = self.x_list
msg.y_list = self.y_list
msg.vx_list = self.vx_list
msg.vy_list = self.vy_list
msg.H = self.H
return msg
def _read_eth_data(self, flag):
# Read data from the ETH dataset
# Data is stored into x_list, y_list, vx_list, vy_list
# Associated person_id and frames are stored into person_id_list, frame_id_list
# Inputs:
# flag - the flag argument from __init__
# Returns:
# True if data reading is successful
if flag == 0:
folder = 'seq_eth'
elif flag == 1:
folder = 'seq_hotel'
else:
print('Flag for \'eth\' should be 0 or 1')
return False
# Create a VideoCapture object and get basic video information
self.fname = 'ewap_dataset/' + folder + '/' + folder + '.avi'
cap = cv2.VideoCapture(self.fname)
if (cap.isOpened()== False):
print("Error opening video stream or file")
return False
self.total_num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.frame_width = int(cap.get(3))
self.frame_height = int(cap.get(4))
cap.release()
self.has_video = True
# Read Homography matrix
fname = 'ewap_dataset/' + folder + '/H.txt'
with open(fname) as f:
for line in f:
line = line.split(' ')
real_line = []
for elem in line:
if len(elem) > 0:
real_line.append(elem)
real_line[-1] = real_line[-1][:-1]
h1, h2, h3 = real_line
self.H.append([float(h1), float(h2), float(h3)])
f.close()
# Read the data from text file
fname = 'ewap_dataset/'+ folder + '/obsmat.txt'
with open(fname) as f:
for line in f:
line = line.split(' ')
real_line = []
for elem in line:
if len(elem) > 0:
real_line.append(elem)
real_line[-1] = real_line[-1]
frame_id, person_id, x, z, y, vx, vz, vy = real_line
self.frame_id_list.append(int(round(float(frame_id))))
self.person_id_list.append(int(round(float(person_id))))
x = float(x)
y = float(y)
vx = float(vx)
vy = float(vy)
self.x_list.append(x)
self.y_list.append(y)
self.vx_list.append(vx)
self.vy_list.append(vy)
f.close()
#print('File reading done!')
return True
def _read_ucy_data(self, flag):
# Read data from the UCY dataset
# Data is stored into x_list, y_list
# Associated person_id and frames are stored into person_id_list, frame_id_list
# Different from ETH, UCY don't provide velocity labels,
# so vx_list and vy_list are generated via linear interpolations.
# Inputs:
# flag - the flag argument from __init__
# Returns:
# True if data reading is successful
if flag == 0:
folder = 'zara'
source = 'crowds_zara01'
elif flag == 1:
folder = 'zara'
source = 'crowds_zara02'
elif flag == 2:
folder = 'university_students'
source = 'students003'
elif flag == 3:
folder = 'zara'
source = 'crowds_zara03'
elif flag == 4:
folder = 'university_students'
source = 'students001'
elif flag == 5:
folder = 'arxiepiskopi'
source = 'arxiepiskopi1'
print('Warning: bad data used!')
else:
print('Flag for \'ucy\' should be 0 - 5')
return False
# Create a VideoCapture object and read from input file
if (flag == 3) or (flag == 4):
# zara3 and univ2 don't have videos to read
if flag == 3:
alt_source = 'crowds_zara01'
else:
alt_source = 'students003'
self.fname = 'ucy_dataset/' + folder + '/' + alt_source + '.avi'
# Dummy video to capture frame information
# ZARA1 ZARA2 ZARA3 have the same frame width and height
# UNIV1 and UNIV2 also have the same frame width and height
# number of frames will be determined by the last frame that contains information
cap = cv2.VideoCapture(self.fname)
if (cap.isOpened()== False):
print("Error opening video stream or file")
return False
self.total_num_frames = -1
self.has_video = False
else:
self.fname = 'ucy_dataset/' + folder + '/' + source + '.avi'
cap = cv2.VideoCapture(self.fname)
if (cap.isOpened()== False):
print("Error opening video stream or file")
return False
self.total_num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.has_video = True
self.frame_width = int(cap.get(3))
self.frame_height = int(cap.get(4))
cap.release()
# Obtain the approximate H matrix
offx = 17.5949
offy = 9.665722
pts_img = np.array([[476, 117], [562, 117], [562, 311],[476, 311]])
pts_wrd = np.array([[0, 0], [1.81, 0], [1.81, 4.63],[0, 4.63]])
pts_wrd[:,0] += offx
pts_wrd[:,1] += offy
self.H, status = cv2.findHomography(pts_img, pts_wrd)
# Read the data from text file
fname = 'ucy_dataset/' + folder + '/data_' + folder + '/' + source + '.vsp'
with open(fname) as f:
person_id = 0
mem_x = 0
mem_y = 0
mem_frame_id = 0
record_switch = True
for idx, line in enumerate(f):
line = line.split()
if len(line) == 6:
person_id += 1
record_switch = False
if idx > 0:
vx = self.vx_list[-1]
vy = self.vy_list[-1]
self.vx_list.append(vx)
self.vy_list.append(vy)
if (len(line) == 8) or (len(line) == 4):
x = float(line[0])
y = float(line[1])
frame_id = int(line[2])
# convert units from pixels to meters using the approximated H
pt = np.matmul(self.H, [[x], [y], [1.0]])
x, y = (pt[0][0] / pt[2][0]), (pt[1][0] / pt[2][0])
self.x_list.append(x)
self.y_list.append(y)
self.frame_id_list.append(frame_id)
self.person_id_list.append(person_id)
if record_switch:
# velocity information not included from dataset
# obtained by linear interpolation
vx = (x - mem_x) / (frame_id - mem_frame_id)
vy = (y - mem_y) / (frame_id - mem_frame_id)
self.vx_list.append(vx * 25)
self.vy_list.append(vy * 25)
else:
record_switch = True
mem_x = x
mem_y = y
mem_frame_id = frame_id
vx = self.vx_list[-1]
vy = self.vy_list[-1]
self.vx_list.append(vx)
self.vy_list.append(vy)
f.close()
#print('File reading done!')
return True
def _organize_frame(self):
# Connect paths for each individual person
# For each person, the frame ids, the associated path
# coordinates and velocities are stored into person_*_complete
num_people = max(self.person_id_list)
for i in range(num_people):
got_start = False
prev_frame = 0
curr_frame = 0
person_coords = []
person_velocity = []
mem_x = 0
mem_y = 0
mem_vel_x = 0
mem_vel_y = 0
for j in range(len(self.frame_id_list)):
if self.person_id_list[j] == (i + 1):
curr_frame = self.frame_id_list[j]
if not got_start:
got_start = True
self.people_start_frame.append(curr_frame)
person_coords.append((self.x_list[j], self.y_list[j]))
person_velocity.append((self.vx_list[j], self.vy_list[j]))
else:
num_frames_interpolated = curr_frame - prev_frame
for k in range(num_frames_interpolated):
# for frames with missing information,
# linear interpolation is performed on the two neighboring frames
ratio = float(k + 1) / float(num_frames_interpolated)
diff_x = ratio * (self.x_list[j] - mem_x)
diff_y = ratio * (self.y_list[j] - mem_y)
person_coords.append((mem_x + diff_x, mem_y + diff_y))
diff_vx = ratio * (self.vx_list[j] - mem_vel_x)
diff_vy = ratio * (self.vy_list[j] - mem_vel_y)
person_velocity.append((mem_vel_x + diff_vx, mem_vel_y + diff_vy))
mem_x, mem_y = self.x_list[j], self.y_list[j]
mem_vel_x, mem_vel_y = self.vx_list[j], self.vy_list[j]
prev_frame = curr_frame
if got_start:
self.people_end_frame.append(curr_frame)
self.people_coords_complete.append(person_coords)
self.people_velocity_complete.append(person_velocity)
if self.total_num_frames == -1:
self.total_num_frames = max(self.people_end_frame)
#print('Frame organizing done!')
return
def _frame_matching(self, in_fps, out_fps):
# Converts information stored in people_* arrays
# from their original fps to a desired fps
# Inputs:
# in_fps - the original fps of the dataset labels
# out_fps - desired fps to convert to
total_time = self.total_num_frames / in_fps
new_total_frames = int(np.floor(total_time * out_fps))
num_people = len(self.people_start_frame)
for i in range(num_people):
curr_start = self.people_start_frame[i]
curr_end = self.people_end_frame[i]
# make sure the pedestrian exists in its new start frame and end frame
new_start = int(np.ceil(curr_start / in_fps * out_fps))
new_end = int(np.floor(curr_end / in_fps * out_fps))
new_pos_array = []
new_vel_array = []
# bilinear interpolation to obtain pos and vel at new frames (timestamps)
for j in range(new_start, new_end + 1):
timestamp = j / out_fps
old_frame_idx = timestamp * in_fps
if abs(round(old_frame_idx) - old_frame_idx) < 1e-10:
idx = int(round(old_frame_idx)) - curr_start
new_pos_array.append(self.people_coords_complete[i][idx])
new_vel_array.append(self.people_velocity_complete[i][idx])
else:
prev_idx = int(np.floor(old_frame_idx)) - curr_start
next_idx = int(np.ceil(old_frame_idx)) - curr_start
ratio = (old_frame_idx - curr_start) - prev_idx # / 1
new_pos = np.array(self.people_coords_complete[i][prev_idx]) * (1 - ratio) + \
np.array(self.people_coords_complete[i][next_idx]) * ratio
new_vel = np.array(self.people_velocity_complete[i][prev_idx]) * (1 - ratio) + \
np.array(self.people_velocity_complete[i][next_idx]) * ratio
new_pos_array.append((new_pos[0], new_pos[1]))
new_vel_array.append((new_vel[0], new_vel[1]))
self.people_start_frame[i] = new_start
self.people_end_frame[i] = new_end
self.people_coords_complete[i] = new_pos_array
self.people_velocity_complete[i] = new_vel_array
return
def _data_processing(self):
# Precompute 3d video arrays and store them into video_*_matrix
# (not really matrices but too lazy to fix now)
# Each array contains information frame by frame
# Entries in the array that share the same index points to the same person
# pedidx highlights which person the entry corresponds to
for i in range(self.total_num_frames):
position_array = []
velocity_array = []
pedidx_array = []
curr_frame = i + 1
for j in range(len(self.people_start_frame)):
curr_start = self.people_start_frame[j]
curr_end = self.people_end_frame[j]
if (curr_start <= curr_frame) and (curr_frame <= curr_end):
x, y = self.people_coords_complete[j][curr_frame - curr_start]
vx, vy = self.people_velocity_complete[j][curr_frame - curr_start]
position_array.append((float(x), float(y)))
velocity_array.append((float(vx), float(vy)))
pedidx_array.append(j)
if len(position_array) > 0:
self.video_position_matrix.append(position_array)
self.video_velocity_matrix.append(velocity_array)
self.video_pedidx_matrix.append(pedidx_array)
else:
self.video_position_matrix.append([])
self.video_velocity_matrix.append([])
self.video_pedidx_matrix.append([])
#print('Initial data processing done!')
return