-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
111 lines (82 loc) · 3.77 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import numpy as np
from paddle.io import Dataset
import paddle.vision.transforms.functional as F
from PIL import Image
class MonodepthDataset(Dataset):
"""monodepth dataset"""
def __init__(self, root, filenames_file, params, dataset, mode, use_aug=False):
super().__init__()
self.root = root
self.params = params
self.dataset = dataset
self.mode = mode
self.use_aug = use_aug
with open(filenames_file, 'r') as f:
self.paths = [line.strip().split() for line in f.readlines()]
def __len__(self):
return len(self.paths)
def __getitem__(self, idx):
left_image_path, right_image_path = self.paths[idx]
left_image_path = os.path.join(self.root, left_image_path)
right_image_path = os.path.join(self.root, right_image_path)
# we load only one image for test, except if we trained a stereo model
if self.mode == 'test' and not self.params.do_stereo:
left_image = self.read_image(left_image_path)
else:
left_image = self.read_image(left_image_path)
right_image = self.read_image(right_image_path)
if self.mode == 'train':
# randomly flip images
if self.use_aug and np.random.uniform(0, 1) > 0.5:
left_image, right_image = F.hflip(right_image), F.hflip(left_image)
# randomly augment images
if self.use_aug and np.random.uniform(0, 1) > 0.5:
left_image, right_image = self.augment_image_pair(left_image, right_image)
return self.transpose(left_image), self.transpose(right_image)
elif self.mode == 'test':
left_image = np.stack([left_image, F.hflip(left_image)])
if self.params.do_stereo:
right_image = np.stack([right_image, F.hflip(right_image)])
return self.transpose(left_image), self.transpose(right_image)
return self.transpose(left_image)
def transpose(self, x):
return x.transpose((2, 0, 1)) if len(x.shape) == 3 else x.transpose((0, 3, 1, 2))
def augment_image_pair(self, left_image, right_image):
# randomly shift gamma
random_gamma = np.random.uniform(0.8, 1.2)
left_image_aug = left_image ** random_gamma
right_image_aug = right_image ** random_gamma
# randomly shift brightness
random_brightness = np.random.uniform(0.5, 2.0)
left_image_aug = left_image_aug * random_brightness
right_image_aug = right_image_aug * random_brightness
# randomly shift color
random_colors = np.random.uniform(0.8, 1.2, size=(3,))
left_image_aug *= random_colors
right_image_aug *= random_colors
# saturate
left_image_aug = np.clip(left_image_aug, 0, 1)
right_image_aug = np.clip(right_image_aug, 0, 1)
return left_image_aug, right_image_aug
def read_image(self, image_path):
image = Image.open(image_path).convert('RGB')
image = np.array(image).astype(np.float32) / 255.0
# if the dataset is cityscapes, we crop the last fifth to remove the car hood
if self.dataset == 'cityscapes':
o_height = image.shape[0]
crop_height = (o_height * 4) // 5
image = image[:crop_height,:,:]
image = F.resize(image, (self.params.height, self.params.width), interpolation='area')
return image
if __name__ == '__main__':
from utils import AttrDict
param = AttrDict()
param.height = 256
param.width = 512
param.do_stereo = False
root = 'eigen'
filenames_file = 'eval/filenames/eigen_test_files.txt'
mode = 'test'
dataset = MonodepthDataset(root, filenames_file, param, 'kitti', mode)
print(dataset[0].shape)