-
Notifications
You must be signed in to change notification settings - Fork 3
/
run_svm.py
125 lines (89 loc) · 2.79 KB
/
run_svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import numpy as np
np.random.seed(0)
# imports
import sys
import csv
from sklearn import svm
# variables
save_path = sys.argv[1]
train_path = sys.argv[2]
test_path = sys.argv[3]
output_path = sys.argv[4]
# load train, test list
with open(train_path) as f:
train_list = [x.split('\t')[0] for x in f.read().splitlines()]
with open(train_path) as f:
train_label = [x.split('\t')[1] for x in f.read().splitlines()]
with open(test_path) as f:
test_list = [x.split('\t')[0] for x in f.read().splitlines()]
# unique labels
unq_labels = list(set(train_label))
# generate one hot matrix
train_size = len(train_list)
test_size = len(test_list)
num_tags = len(unq_labels)
# shuffling training set
train_list = np.array(train_list)
train_label = np.array(train_label)
tmp1 = np.arange(train_size)
np.random.shuffle(tmp1)
train_list = train_list[tmp1]
train_label = train_label[tmp1]
train_list = train_list.tolist()
train_label = train_label.tolist()
train_list_to_label = dict(zip(train_list,train_label))
y_train = np.zeros((train_size,num_tags))
for sample_iter in range(train_size):
for tag_iter in range(num_tags):
if train_list_to_label[train_list[sample_iter]] == unq_labels[tag_iter]:
y_train[sample_iter,tag_iter] = 1
# load 1 sample for measure feature_length
tmp_feature = np.load(save_path + train_list[0].replace('.wav','.npy'))
feature_length = len(tmp_feature)
print feature_length
# load encoded feature
x_train = np.zeros((train_size,feature_length))
x_test = np.zeros((test_size,feature_length))
for iter in range(0,train_size):
file_path = save_path + train_list[iter].replace('.wav','.npy')
x_train[iter] = np.load(file_path)
if np.remainder(iter,1000) == 0:
print iter
print iter+1
for iter in range(0,test_size):
file_path = save_path + test_list[iter].replace('.wav','.npy')
x_test[iter] = np.load(file_path)
if np.remainder(iter,1000) == 0:
print iter
print iter+1
# normalization
mean_value = np.mean(x_train)
std_value = np.std(x_train)
x_train -= mean_value
x_test -= mean_value
x_train /= std_value
x_test /= std_value
print 'mean value: ' + str(mean_value)
print 'std value: ' + str(std_value)
print 'Normalization done!'
# svm
clf = svm.SVC()
y_train = np.argmax(y_train,axis=1)
clf.fit(x_train,y_train)
y_test_tmp = clf.predict(x_test)
output = np.zeros((test_size,num_tags))
for sample_iter in range(test_size):
for tag_iter in range(num_tags):
if y_test_tmp[sample_iter] == tag_iter:
output[sample_iter,tag_iter] = 1
print output.shape
# write result
# output_path, unq_labels
with open(output_path,'wb') as f:
wr = csv.writer(f,quoting=csv.QUOTE_NONE,delimiter='\t')
tag_index_list = np.argmax(output,axis=1)
for file_iter in range(test_size):
prints = [test_list[file_iter], unq_labels[tag_index_list[file_iter]]]
wr.writerow(prints)
print 'write done'