-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
89 lines (69 loc) · 3.08 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import random
import os
import tensorflow as tf
from scipy.misc import imresize
import matplotlib.pyplot as plt
from keras.utils import to_categorical
def pre_process(image):
new_image = imresize(image, size=(28, 28)).reshape(28, 28, 1) / 255.
new_image = (new_image - 0.5) * 2
return new_image
def load_image(path):
image = plt.imread(path)
return pre_process(image)
def get_images(paths, labels, nb_samples=None, shuffle=True):
if nb_samples is not None:
sampler = lambda x: random.sample(x, nb_samples)
else:
sampler = lambda x: x
images = [
(i, os.path.join(path, image))
for i, path in zip(labels, paths)
for image in sampler(os.listdir(path))
]
if shuffle:
random.shuffle(images)
return images
def one_hot_vector(labels, concept_size):
return to_categorical(labels, concept_size)
def get_next_train_val_batch(train_dataset, validation_dataset, concept_size=10):
num = train_dataset.num_shot_per_concept
train_batch_data, train_batch_labels = train_dataset.next_batch(concept_size=concept_size)
val_batch_data, val_batch_labels = validation_dataset.next_batch(concepts=train_batch_labels[0::num].reshape(-1))
for label in range(concept_size):
train_batch_labels[train_batch_labels == train_batch_labels[num * label]] = label
val_batch_labels[val_batch_labels == val_batch_labels[num * label]] = label
train_batch_labels = one_hot_vector(train_batch_labels, concept_size)
val_batch_labels = one_hot_vector(val_batch_labels, concept_size)
return train_batch_data, train_batch_labels, val_batch_data, val_batch_labels
def average_gradients(tower_grads):
"""Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads