-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_data.py
149 lines (132 loc) · 5.82 KB
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from scipy.io import loadmat
import pandas as pd
import numpy as np
from random import shuffle
import os
import cv2
class DataManager(object):
"""Class for loading fer2013 emotion classification dataset or
imdb gender classification dataset."""
def __init__(self, dataset_name='imdb',
dataset_path=None, image_size=(48, 48)):
self.dataset_name = dataset_name
self.dataset_path = dataset_path
self.image_size = image_size
if self.dataset_path is not None:
self.dataset_path = dataset_path
elif self.dataset_name == 'imdb':
self.dataset_path = '../datasets/imdb_crop/imdb.mat'
elif self.dataset_name == 'fer2013':
self.dataset_path = '../datasets/fer2013/fer2013.csv'
elif self.dataset_name == 'KDEF':
self.dataset_path = '../datasets/KDEF/'
else:
raise Exception(
'Incorrect dataset name, please input imdb or fer2013')
def get_data(self):
if self.dataset_name == 'imdb':
ground_truth_data = self._load_imdb()
elif self.dataset_name == 'fer2013':
ground_truth_data = self._load_fer2013()
elif self.dataset_name == 'KDEF':
ground_truth_data = self._load_KDEF()
return ground_truth_data
def _load_imdb(self):
face_score_treshold = 3
dataset = loadmat(self.dataset_path)
image_names_array = dataset['imdb']['full_path'][0, 0][0]
gender_classes = dataset['imdb']['gender'][0, 0][0]
face_score = dataset['imdb']['face_score'][0, 0][0]
second_face_score = dataset['imdb']['second_face_score'][0, 0][0]
face_score_mask = face_score > face_score_treshold
second_face_score_mask = np.isnan(second_face_score)
unknown_gender_mask = np.logical_not(np.isnan(gender_classes))
mask = np.logical_and(face_score_mask, second_face_score_mask)
mask = np.logical_and(mask, unknown_gender_mask)
image_names_array = image_names_array[mask]
gender_classes = gender_classes[mask].tolist()
image_names = []
for image_name_arg in range(image_names_array.shape[0]):
image_name = image_names_array[image_name_arg][0]
image_names.append(image_name)
return dict(zip(image_names, gender_classes))
def _load_fer2013(self):
data = pd.read_csv(self.dataset_path)
pixels = data['pixels'].tolist()
width, height = 48, 48
faces = []
for pixel_sequence in pixels:
face = [int(pixel) for pixel in pixel_sequence.split(' ')]
face = np.asarray(face).reshape(width, height)
face = cv2.resize(face.astype('uint8'), self.image_size)
faces.append(face.astype('float32'))
faces = np.asarray(faces)
faces = np.expand_dims(faces, -1)
emotions = pd.get_dummies(data['emotion']).as_matrix()
return faces, emotions
def _load_KDEF(self):
class_to_arg = get_class_to_arg(self.dataset_name)
num_classes = len(class_to_arg)
file_paths = []
for folder, subfolders, filenames in os.walk(self.dataset_path):
for filename in filenames:
if filename.lower().endswith(('.jpg')):
file_paths.append(os.path.join(folder, filename))
num_faces = len(file_paths)
y_size, x_size = self.image_size
faces = np.zeros(shape=(num_faces, y_size, x_size))
emotions = np.zeros(shape=(num_faces, num_classes))
for file_arg, file_path in enumerate(file_paths):
image_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
image_array = cv2.resize(image_array, (y_size, x_size))
faces[file_arg] = image_array
file_basename = os.path.basename(file_path)
file_emotion = file_basename[4:6]
# there are two file names in the dataset
# that don't match the given classes
try:
emotion_arg = class_to_arg[file_emotion]
except:
continue
emotions[file_arg, emotion_arg] = 1
faces = np.expand_dims(faces, -1)
return faces, emotions
def get_labels(dataset_name):
if dataset_name == 'fer2013':
return {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
4: 'sad', 5: 'surprise', 6: 'neutral'}
elif dataset_name == 'imdb':
return {0: 'woman', 1: 'man'}
elif dataset_name == 'KDEF':
return {0: 'AN', 1: 'DI', 2: 'AF', 3: 'HA', 4: 'SA', 5: 'SU', 6: 'NE'}
else:
raise Exception('Invalid dataset name')
def get_class_to_arg(dataset_name='fer2013'):
if dataset_name == 'fer2013':
return {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'sad': 4,
'surprise': 5, 'neutral': 6}
elif dataset_name == 'imdb':
return {'woman': 0, 'man': 1}
elif dataset_name == 'KDEF':
return {'AN': 0, 'DI': 1, 'AF': 2, 'HA': 3, 'SA': 4, 'SU': 5, 'NE': 6}
else:
raise Exception('Invalid dataset name')
def split_imdb_data(ground_truth_data, validation_split=.2, do_shuffle=False):
ground_truth_keys = sorted(ground_truth_data.keys())
if do_shuffle is not False:
shuffle(ground_truth_keys)
training_split = 1 - validation_split
num_train = int(training_split * len(ground_truth_keys))
train_keys = ground_truth_keys[:num_train]
validation_keys = ground_truth_keys[num_train:]
return train_keys, validation_keys
def split_data(x, y, validation_split=.2):
num_samples = len(x)
num_train_samples = int((1 - validation_split)*num_samples)
train_x = x[:num_train_samples]
train_y = y[:num_train_samples]
val_x = x[num_train_samples:]
val_y = y[num_train_samples:]
train_data = (train_x, train_y)
val_data = (val_x, val_y)
return train_data, val_data