15
15
from SimilarityFusion import doSimilarityFusion
16
16
from SongStructureGUI import saveResultsJSON
17
17
import subprocess
18
- import crema
19
18
20
19
MANUAL_AUDIO_LOAD = True
21
20
FFMPEG_BINARY = "ffmpeg"
22
- model = crema .models .chord .ChordModel ()
23
21
24
22
def plotFusionResults (Ws , vs , alllabels , times ):
25
23
"""
@@ -74,7 +72,7 @@ def plotFusionResults(Ws, vs, alllabels, times):
74
72
plt .tight_layout ()
75
73
return fig
76
74
77
- def getFusedSimilarity (filename , sr , hop_length , win_fac , wins_per_block , K , reg_diag , reg_neighbs , niters , do_animation , plot_result ):
75
+ def getFusedSimilarity (filename , sr , hop_length , win_fac , wins_per_block , K , reg_diag , reg_neighbs , niters , do_animation , plot_result , do_crema = True ):
78
76
"""
79
77
Load in filename, compute features, average/stack delay, and do similarity
80
78
network fusion (SNF) on all feature types
@@ -104,7 +102,8 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
104
102
Whether to plot and save images of the evolution of SNF
105
103
plot_result: boolean
106
104
Whether to plot the result of the fusion
107
-
105
+ do_crema: boolean
106
+ Whether to include precomputed crema in the fusion
108
107
Returns
109
108
-------
110
109
{'Ws': An dictionary of weighted adjacency matrices for individual features
@@ -119,14 +118,8 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
119
118
sr , y = sio .wavfile .read ("%s.wav" % filename )
120
119
y = y / 2.0 ** 15
121
120
os .remove ("%s.wav" % filename )
122
-
123
- subprocess .call ([FFMPEG_BINARY , "-i" , filename , "-ar" , "44100" , "-ac" , "1" , "%s.wav" % filename ])
124
- _ , y44100 = sio .wavfile .read ("%s.wav" % filename )
125
- y44100 = y44100 / 2.0 ** 15
126
- os .remove ("%s.wav" % filename )
127
121
else :
128
122
y , sr = librosa .load (filename , sr = sr )
129
- y44100 , _ = librosa .load (filename , sr = 44100 )
130
123
131
124
## Step 2: Figure out intervals to which to sync features
132
125
if win_fac > 0 :
@@ -160,44 +153,55 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
160
153
tempogram = librosa .feature .tempogram (onset_envelope = oenv , sr = sr , hop_length = hop_length )
161
154
162
155
# 4) Crema
163
- #data = model.outputs(y=y44100, sr=44100)
164
- data = model .outputs (filename = filename )
165
- fac = (float (sr )/ 44100.0 )* 4096.0 / hop_length
166
- times_orig = fac * np .arange (len (data ['chord_bass' ]))
167
- times_new = np .arange (mfcc .shape [1 ])
168
- interp = scipy .interpolate .interp1d (times_orig , data ['chord_pitch' ].T , kind = 'nearest' , fill_value = 'extrapolate' )
169
- chord_pitch = interp (times_new )
156
+ if do_crema :
157
+ matfilename = "%s_crema.mat" % filename
158
+ if not os .path .exists (matfilename ):
159
+ print ("****WARNING: PRECOMPUTED CREMA DOES NOT EXIST****" )
160
+ do_crema = False
161
+ else :
162
+ data = sio .loadmat (matfilename )
163
+ fac = (float (sr )/ 44100.0 )* 4096.0 / hop_length
164
+ times_orig = fac * np .arange (len (data ['chord_bass' ]))
165
+ times_new = np .arange (mfcc .shape [1 ])
166
+ interp = scipy .interpolate .interp1d (times_orig , data ['chord_pitch' ].T , kind = 'nearest' , fill_value = 'extrapolate' )
167
+ chord_pitch = interp (times_new )
170
168
171
169
## Step 4: Synchronize features to intervals
172
- n_frames = np .min ([chroma .shape [1 ], mfcc .shape [1 ], tempogram .shape [1 ], chord_pitch .shape [1 ]])
170
+ n_frames = np .min ([chroma .shape [1 ], mfcc .shape [1 ], tempogram .shape [1 ]])
171
+ if do_crema :
172
+ n_frames = min (n_frames , chord_pitch .shape [1 ])
173
173
# median-aggregate chroma to suppress transients and passing tones
174
174
intervals = librosa .util .fix_frames (intervals , x_min = 0 , x_max = n_frames )
175
- chroma = librosa .util .sync (chroma , intervals , aggregate = np .median )
176
- mfcc = librosa .util .sync (mfcc , intervals )
177
- tempogram = librosa .util .sync (tempogram , intervals )
178
- chord_pitch = librosa .util .sync (chord_pitch , intervals )
179
175
times = intervals * float (hop_length )/ float (sr )
180
176
181
-
182
-
177
+ chroma = librosa .util .sync (chroma , intervals , aggregate = np .median )
183
178
chroma = chroma [:, :n_frames ]
179
+ mfcc = librosa .util .sync (mfcc , intervals )
184
180
mfcc = mfcc [:, :n_frames ]
181
+ tempogram = librosa .util .sync (tempogram , intervals )
185
182
tempogram = tempogram [:, :n_frames ]
186
- chord_pitch = chord_pitch [:, :n_frames ]
183
+ if do_crema :
184
+ chord_pitch = librosa .util .sync (chord_pitch , intervals )
185
+ chord_pitch = chord_pitch [:, :n_frames ]
186
+
187
187
188
188
#Do a delay embedding and compute SSMs
189
189
XChroma = librosa .feature .stack_memory (chroma , n_steps = wins_per_block , mode = 'edge' ).T
190
- XMFCC = librosa .feature .stack_memory (mfcc , n_steps = wins_per_block , mode = 'edge' ).T
191
- XTempogram = librosa .feature .stack_memory (tempogram , n_steps = wins_per_block , mode = 'edge' ).T
192
- XChordPitch = librosa .feature .stack_memory (chord_pitch , n_steps = wins_per_block , mode = 'edge' ).T
193
190
DChroma = getCSMCosine (XChroma , XChroma ) #Cosine distance
191
+ XMFCC = librosa .feature .stack_memory (mfcc , n_steps = wins_per_block , mode = 'edge' ).T
194
192
DMFCC = getCSM (XMFCC , XMFCC ) #Euclidean distance
193
+ XTempogram = librosa .feature .stack_memory (tempogram , n_steps = wins_per_block , mode = 'edge' ).T
195
194
DTempogram = getCSM (XTempogram , XTempogram )
196
- DChordPitch = getCSMCosine (XChordPitch , XChordPitch )
195
+ if do_crema :
196
+ XChordPitch = librosa .feature .stack_memory (chord_pitch , n_steps = wins_per_block , mode = 'edge' ).T
197
+ DChordPitch = getCSMCosine (XChordPitch , XChordPitch )
197
198
198
199
#Run similarity network fusion
199
- FeatureNames = ['MFCCs' , 'Chromas' , 'Tempogram' , 'Crema' ]
200
- Ds = [DMFCC , DChroma , DTempogram , DChordPitch ]
200
+ FeatureNames = ['MFCCs' , 'Chromas' , 'Tempogram' ]
201
+ Ds = [DMFCC , DChroma , DTempogram ]
202
+ if do_crema :
203
+ FeatureNames .append ('Crema' )
204
+ Ds .append (DChordPitch )
201
205
# Edge case: If it's too small, zeropad SSMs
202
206
for i , Di in enumerate (Ds ):
203
207
if Di .shape [0 ] < 2 * K :
@@ -244,6 +248,6 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
244
248
res = getFusedSimilarity (opt .filename , sr = opt .sr , \
245
249
hop_length = opt .hop_length , win_fac = opt .win_fac , wins_per_block = opt .wins_per_block , \
246
250
K = opt .K , reg_diag = opt .reg_diag , reg_neighbs = opt .reg_neighbs , niters = opt .niters , \
247
- do_animation = opt .do_animation , plot_result = opt .plot_result )
251
+ do_animation = opt .do_animation , plot_result = opt .plot_result , do_crema = False )
248
252
sio .savemat (opt .matfilename , res )
249
253
saveResultsJSON (opt .filename , res ['times' ], res ['Ws' ], res ['K' ], opt .neigs , opt .jsonfilename , opt .diffusion_znormalize )
0 commit comments