-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.goutputstream-QOWXTW
90 lines (73 loc) · 2.35 KB
/
.goutputstream-QOWXTW
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
%Extracts features from a .wav file. Runs k-means to separate between voiced and unvoiced samples.
%Runs on voiced frames only.
function fvector = extractFeatures(filename, options)
%load samples, compute zcc, energy, plot signal in time
[zcc_energy, samples, fs] = preprocess(filename);
%normalize
samples = maxMinNormalization(samples);
%Obtain centroids and assignments to separate between voice and unvoiced
initial_centroids = [2 3;50 2]
[centroids, idx] = runkMeans(zcc_energy, initial_centroids, iterations = 5, plot_progress = 1);
%Find class with greatest ZCC
if centroids(1,1) > centroids(2,1)
voiced = 2;
else
voiced = 1;
end
%TODO: remove hard coded frame size
j=1;
vector = 0;
for i = 2:(length(samples)/160 -2)
%Obtain pitch and formants using LPC, only if this is a VOICED frame.
if idx(i)==voiced
if zcc_energy(i,1)>55
continue;
end
if zcc_energy(i,2)<0.1
continue;
end
begining = (i-1)*160 + 1 -110;
ending = begining + 320 + 110;
frame = samples(begining:ending);
%Apply hamming window
window = hamming(length(frame));
frame = frame.*window;
%If we need pitch or formants, lets include them
if options.pitch || options.formants
[pitch, formants] = linearPrediction(frame, fs);
vector = [pitch];
if pitch==0 && options.pitch
[pitch, _formants] = cepstral_features(frame);
if pitch==0
continue;
end
vector = [pitch];
elseif options.pitch
vector = [pitch];
end
%include as many formants as required
if length(formants)<options.formants
continue;
end
vector = [vector, formants(1:options.formants)];
end
%options.mfcc not currently used, using 14 coefficients
if options.mfcc
vector = [vector, mfcc(frame)]; %, cepstrum(frame), mfcc(frame)
end
%Dynamic frequency features
%For variants on the computation of these features, check 2.1.2 of http://speechlab.eece.mu.edu/papers/Ye_thesis.pdf.
if options.delta
if j>1
delta = fvector(j-1, end-13:end) - vector(end-13:end);
else
delta = zeros(1,14) - vector(end-13:end);
end
vector = [vector, delta]; %, cepstrum(frame), mfcc(frame)
end
fvector(j, :) = vector;
j = j + 1;
end
end
fprintf('Extracted %f feature vectors from %f frames \n', size(fvector,1), length(samples)/160);
end