-
Notifications
You must be signed in to change notification settings - Fork 3
/
main.m
340 lines (318 loc) · 12.5 KB
/
main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
%%% Underdetimed source separation
%%% The algorithm is described in "Michael Syskind Pedersen, DeLiang Wang,
%%% Jan Larsen and Ulrik Kjems: Two-microphone Separation of Speech
%%% Mixtures", 2006, Submitted.
clear all
close all
format compact
dis=1;%%% display information while algorithm is running
if dis
disp('Initialisation...')
end
rand('state',1)
y={[]};
M=2; %number of sensors
u=0.5;%cardioid pattern controller
N=3;% number of signals in mixture %%% only used in mixing process
th=1; %mask threshold control (tau)?
stopthresholdini=3000; %One source if condition number is above this value
TC1=0.1; %Merge finalstereo signals if correlation is above TC1
TC2=0.03; %Merge finalstereo and enerstereo if correlation is above TC2
numlags=1; %%% when correlation between two signals are found, how many lags should be considered?
thepow=20; %%% tau_E
minpow=30; %%% included to save a few calculations.
%evalu=0%%% source signals as input, separated signals as output
%evalu=1%%% separation and evaluation
evalu=1;
winnumber=3; %%Selects window function
NFFT=2048;%%%DFT length
k=4; %%% window length is NFFT/k
lastfmasklength=1;
switch winnumber
case 1
WINDOW=hanning(NFFT/k);
case 2
WINDOW=hann(NFFT/k);
case 3
WINDOW=hamming(NFFT/k);
case 4
WINDOW=bartlett(NFFT/k);%(triangular)
case 5
WINDOW=triang(NFFT/k);
case 6
WINDOW=blackman(NFFT/k);
case 7
WINDOW=rectwin(NFFT/k);
end
noverlapfactor=0.75;% overlap between consecutive windows
NOVERLAP=length(WINDOW)*noverlapfactor;
if evalu %%%read source signals signals
s=[];
[s(:,1),fs]=wavread('ukma.wav');%A
[s(:,2),fs]=wavread('frma.wav');%B
[s(:,3),fs]=wavread('itfe.wav');%C
[s(:,4),fs]=wavread('cnfe.wav');%D
[s(:,5),fs]=wavread('rufe.wav');%E
[s(:,6),fs]=wavread('gema.wav');%F
[s(:,7),fs]=wavread('nlma.wav');%G
[s(:,8),fs]=wavread('jpfe.wav');%H
[s(:,9),fs]=wavread('brfe.wav');%I
[s(:,10),fs]=wavread('esma.wav');%J
[s(:,11),fs]=wavread('dkma.wav');%K
[s(:,12),fs]=wavread('ukfe.wav');%L
labelvec={' A',' B',' C',' D',' E',' F',' G',' H',' I',' J',' K',' L'};
% s=s(10001:50000,:);%%%%if signal duration should be made shorter
%%% randomly pick N source signal from the pool of size(s,2) source signals.
NS=1:size(s,2);
for i=1:N
rnd=ceil(rand(1)*(size(s,2)-i+1));
Ns(i)=NS(rnd);
NS(rnd)=[];%actually we pick by removing size(s,2)-N sources from the pool
end
N=length(Ns);
S=s(:,Ns)';
Npos=max(N,7); % at least 7 different directions
all_theta=linspace(0,pi,Npos); %directions equally spaced between 0 and 180 degrees
for i=1:N
rnd=ceil(rand(1)*(Npos-i+1));
theta(i)=all_theta(rnd);
all_theta(rnd)=[];
end
%theta*180/pi %%%% vector of direction angles
%two cardioids pointing in two directions varying from 1-u to 1+u,0<u<=1
A=calcA(theta,u); %calculate elements in mixing matrix
X=A*S; %%%Create 2 mixed signals
wavwrite(X',fs,'stereomix');%save stereo mixture, not scaled!!!
else
[X,fs]=wavread('stereomix.wav');
X=X';
end
%%estimate energy stop thresholds
powpow=10*log10((sum(X(1,:).^2)+sum(X(2,:).^2))/(2*length(X)));%power of mixture(used to determine thresholds)
thE=powpow-thepow; %power threshold
minpower=powpow-minpow;%do not save signals with very little power (makes cumputations faster)
%%% create ideal masks for evaluation purpose
if evalu
for i=1:N
vd=zeros(1,N);
vd(i)=1;
ivd=(vd-1)*(-1);
[imaskL{i},imaskR{i},SNRiL(i),SNRiR(i)]=idealmask(A*diag(vd)*S,A*diag(ivd)*S,fs,NFFT,WINDOW,NOVERLAP);
end
[cmL,cmR]=colorimask(imaskL,imaskR,fs); %%% draw each ideal BM with a certain color
end
x={X}; %initialize buffer of signals to be separated
mask={[]}; %initialize mask buffer
fmask={[]};
enermask={[]};
delete_me_again=sg(X(1,:),NFFT,fs,WINDOW,NOVERLAP);%get dimensions of mask
lastremmask=zeros(size(delete_me_again));
clear delete_me_again
%%% Start algorithm
%%%Initialize counters
countmax=30; %the maximum iteration number (to be absolutely sure that the algorithm stops)
finalcnt=1; %counts the number of separated output signals
enercnt=1; %counts the number of low energy masks
exitcnt=1; % counts the number of times the mask buffer is emptied and the remaining mask is estimated
cnt=0;%number of iterations
if dis
disp('Starting algorithm...')
end
while cnt<countmax
sx=size(x,2); %%%%size of the signal vector. the signals are grouped in sx/2 grops. When there is no more signals left in x, the separation task is over.
cnt=cnt+1;
% stopthreshold=max(stopthresholdini/cnt,minstopthreshold)%enable if stop threshold should change during iterations
stopthreshold=stopthresholdini;
%%% make empty buffers to store signal pairs and corresponding masks for evaluation
xbuffer={};
maskbuffer={};
for n=1:sx
%%% 2 by 2 ICA algorithm
[y{n},Aest] = icaML(x{n});
%%% alternative 2 by 2 ICA algorithm
%%% [Aest,y{n}]=jade(x{n});%%% available from ICA central
%%%normalization
for m=1:2%%%normalization
y{n}(m,:)=y{n}(m,:)/(10*sqrt(var(y{n}(m,:))));
end
%[Aest,W,y{n},theta_est]=normmix(Aest,x{n},u); %normalization based on kmnown microphone responses
%%%normalization
%%% ICA evaluation
if(0)%%% plots ICA output beampatterns
evalICAdB(W,theta_est,theta,N,Ns,labelvec,u)
end
%%% ICA evaluation
%%% estimate mask
if dis
disp('Finding mask...')
end
if evalu
[newX{1}(1,:),newX{1}(2,:),newX{2}(1,:),newX{2}(2,:),msk{1},msk{2}]=applymasks(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,NFFT,WINDOW,NOVERLAP,cmR,cmL);
else
[newX{1}(1,:),newX{1}(2,:),newX{2}(1,:),newX{2}(2,:),msk{1},msk{2}]=applymasks(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,NFFT,WINDOW,NOVERLAP);
end
%% binaural stopping criteria
for m=1:2
condi=oneortwo_cond(newX{m}(1,:),newX{m}(2,:),fs);%%% one or two signals based on condition number
if(enerstop(newX{m}(1,:),newX{m}(2,:),thE,minpower)==2)
if dis
disp('Not a speech signal-too low energy')
end
elseif(enerstop(newX{m}(1,:),newX{m}(2,:),thE,minpower)==1)
if dis
disp('Not a good quality speech signal')
end
[L,R,enermask{enercnt}]=getfinalmask(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,m,NFFT,WINDOW,NOVERLAP,1);%to save memory, enermask is a sparse matrix
stestr=strcat('enerstereo',int2str(enercnt));%not scaled
if dis
disp(strcat('enerstereo',int2str(enercnt)))
end
wavwrite([L,R],fs,stestr);
enercnt=enercnt+1;
elseif(condi>stopthreshold)
if dis
disp('Save as final signal...')
end
[L,R,fmask{finalcnt}]=getfinalmask(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,1,m,NFFT,WINDOW,NOVERLAP,0);%notice that in the last step, the threshold is set to 1.
%str=strcat('y',int2str(m),'iter',int2str(cnt),'n',int2str(n),'final',int2str(finalcnt))
%wavwrite(y{n}(m,:),fs,str);%%%save files
stestr=strcat('finalstereo',int2str(finalcnt));%not scaled
if dis
disp(strcat('finalstereo',int2str(finalcnt)))
end
wavwrite([L,R],fs,stestr);
finalcnt=finalcnt+1;%%%save files
else %continue
xbuffer={xbuffer{1:end},newX{m}};
maskbuffer={maskbuffer{1:end},msk{m}};
% str=strcat('y',int2str(m),'iter',int2str(cnt),'n',int2str(n))
% wavwrite(y{n}(m,:),fs,str);
end
end
end
clear x
x=xbuffer;
clear mask
mask=maskbuffer;
if(isempty(xbuffer))%%% all branches in the algorithm has ended, no more signal pairs to which ICA should be applied
if dis
disp('Stopping separation algorithm')
end
if length(fmask)~=lastfmasklength; %check if one source appears more than once in the final signals
fmask=multisigcheck(fmask,X(1,:)',X(2,:)',TC1,fs,NFFT,WINDOW,NOVERLAP,numlags);
lastfmasklength=length(fmask);
finalcnt=lastfmasklength+1;
end
if isempty(enermask{1})
else %check if poor quality signal is correlated with one of the final signals
fmask=nosigcorr(fmask,enermask,X(1,:)',X(2,:)',TC2,fs,NFFT,WINDOW,NOVERLAP,numlags);
end
enercnt=1;
enermask={[]};
if isempty(fmask{1})
if dis
disp('No signals segregated. Stopping separation procedure.')'
end
exitcnt=4;
break
else
if dis
disp('Finding remaining mask...')
end
if evalu
[L,R,remainingmask]=getremainingmask(X(1,:)',X(2,:)',fmask,fs,NFFT,WINDOW,NOVERLAP,cmL,cmR);
else
[L,R,remainingmask]=getremainingmask(X(1,:)',X(2,:)',fmask,fs,NFFT,WINDOW,NOVERLAP);
end
end
if lastremmask==remainingmask %%% Should not continue if background mask have not changed.
if dis
disp('No changes. Stopping separation procedure.')
end
exitcnt=4;
wavwrite([L,R],fs,'remaining');
break
else
lastremmask=remainingmask;%%% to see if the next background mask have changed.
end
wavwrite([L,R],fs,'remaining');
condi=oneortwo_cond(newX{m}(1,:),newX{m}(2,:),fs);
if(enerstop(L',R',thE,minpower,fs)>0) %% enough energy in remaining mask?
if dis
disp('Not a speech signal')
end
break
elseif(condi>stopthreshold)%%%more than one signal in remaining?
stestr=strcat('finalstereo',int2str(finalcnt));%not scaled
wavwrite([L,R],fs,stestr);
if dis
disp('save remaing')
disp(strcat('finalstereo',int2str(finalcnt)))
end
fmask{finalcnt}=remainingmask;
break
else
mask={remainingmask};
x={[L';R']};
end
if exitcnt>=4
break
end
exitcnt=exitcnt+1;
end
end
if isempty(fmask)
flag=1;
mEL=0
mNR=0
mSNRi=0
mSNRo=0
mSNRx=0
else
if length(fmask)~=lastfmasklength;
fmask=multisigcheck(fmask,X(1,:)',X(2,:)',TC1,fs,NFFT,WINDOW,NOVERLAP,numlags);
end
if isempty(enermask{1});
else
fmask=nosigcorr(fmask,enermask,X(1,:)',X(2,:)',TC2,fs,NFFT,WINDOW,NOVERLAP,numlags);
end
end
if dis
disp('Separation done.')
end
if evalu
if dis
disp('Evaluation of outputs')
end
%%%clear x and mask if algorithm should be run again on another mixture
clear x
clear mask
%% sort masks with corresponding ideal mask
[valL,valR,e1L,e1R,e2L,e2R,q,lbl,cflag]=comparemasks(fmask,imaskL,imaskR,labelvec(Ns),fs,length(s));
%standalone signals
for i=1:N
Xalone(:,:,i)=A(:,i)*S(i,:);
end
%%% calculate snr according to hu-wang 1999 paper
[PLEL,PREL,PLNR,PRNR,SNRL,SNRR,SNRiLi,SNRiRi,SNRxL,SNRxR]=calcELNR(e1L,e1R,e2L,e2R,imaskL,imaskR,q,NFFT,WINDOW,NOVERLAP,Xalone);
lbl{1:end};
datafile=strcat('data.mat');
save(datafile, 'PLEL', 'PREL', 'PLNR' ,'PRNR', 'SNRL', 'SNRR', 'SNRiL', 'SNRiR','lbl', 'th', 'stopthresholdini', 'TC1' ,'TC2', 'thepow', 'minpow', 'NFFT', 'winnumber', 'k', 'NOVERLAP', 'Ns', 'theta')
mEL=100*(mean(PLEL)+mean(PREL))/2
mNR=100*(mean(PLNR)+mean(PRNR))/2
mSNRi=(mean(SNRiL)+mean(SNRiR))/2
mSNRo=(mean(SNRL)+mean(SNRR))/2
mSNRx=(mean(SNRxL)+mean(SNRxR))/2
if length(fmask)~=N
flag=1;%%%not same number of segregated signals as input signals
elseif cflag==1%%% one of the segregated signals appears to be the same source signal.
flag=1;
else
flag=0;
end
mSNR=mSNRo-mSNRi
save mydata mEL mNR mSNRi mSNRo mSNR mSNRx flag
end
if dis
disp('Done')
end