forked from ed1d1a8d/6.867-final-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
training.py
99 lines (74 loc) · 2.8 KB
/
training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data_utils
from get_data import get_audio_and_speakers
from model import LSTMVoice, dtype
NUM_PEOPLE = 10
audio, speaker = get_audio_and_speakers(NUM_PEOPLE)
total_dataset_size = len(audio)
train_size = total_dataset_size // 2
val_size = total_dataset_size // 4
audio_train = torch.FloatTensor(audio[:train_size])
speaker_train = torch.IntTensor(speaker[:train_size])
audio_val = torch.FloatTensor(audio[train_size : train_size + val_size])
speaker_val = torch.IntTensor(speaker[train_size : train_size + val_size])
audio_test = torch.FloatTensor(audio[train_size + val_size:])
speaker_test = torch.IntTensor(speaker[train_size + val_size:])
train = data_utils.TensorDataset(audio_train, speaker_train)
train_loader = data_utils.DataLoader(train, batch_size=64, shuffle=True)
def evaluate(model, audio, speaker):
data = data_utils.TensorDataset(audio, speaker)
data_loader = data_utils.DataLoader(data, batch_size=64, shuffle=True)
correct = 0
total = 0
for i, batch in enumerate(data_loader, 0):
inputs, labels = batch
if inputs.shape[0] != 64:
continue
total += 64
inputs = inputs.permute(2, 0, 1)
inputs = autograd.Variable(inputs.type(dtype))
scores = model(inputs).data
for i, x in enumerate(scores):
j = max(range(len(x)), key=lambda j: x[j])
if j == labels[i]:
correct += 1
return correct * 1.0 / total
INPUT_DIM = 128
HIDDEN_DIM = 256
DEPTH = 1
BATCH_SIZE = 64
model = LSTMVoice(INPUT_DIM, HIDDEN_DIM, DEPTH, BATCH_SIZE, NUM_PEOPLE)
model = model.cuda()
loss_function = nn.NLLLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
losses = []
running_loss = 0.0
NUM_EPOCHS = 5
for epoch in range(NUM_EPOCHS):
print('training epoch', epoch+1)
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
if inputs.shape[0] != 64:
continue
inputs = inputs.permute(2, 0, 1)
inputs = autograd.Variable(inputs.type(dtype))
labels = autograd.Variable(labels.type(dtype).long())
model.zero_grad()
scores = model(inputs)
loss = loss_function(scores, labels)
loss.backward()
optimizer.step()
# print statistics
losses += [loss.data[0]]
running_loss += loss.data[0]
if i % 100 == 99: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 100))
running_loss = 0.0
print("Train:", evaluate(model, audio_train, speaker_train))
print("Val:", evaluate(model, audio_val, speaker_val))
print("Test:", evaluate(model, audio_test, speaker_test))