Skip to content

Commit 20bb5aa

Browse files
committed
Precomputing Crema
1 parent 212555c commit 20bb5aa

File tree

3 files changed

+78
-58
lines changed

3 files changed

+78
-58
lines changed

CremaPrecomputer.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import crema
2+
import librosa
3+
import sys
4+
import warnings
5+
import numpy as np
6+
import scipy.io as sio
7+
import subprocess
8+
from multiprocessing import Pool as PPool
9+
from SongStructure import *
10+
from SalamiExperiments import *
11+
model = crema.models.chord.ChordModel()
12+
13+
def compute_crema(num):
14+
filename = "%s/%i/audio.mp3"%(AUDIO_DIR, num)
15+
print("Doing crema on %s"%filename)
16+
matfilename = "%s_crema.mat"%filename
17+
subprocess.call([FFMPEG_BINARY, "-i", filename, "-ar", "44100", "-ac", "1", "%s.wav"%filename])
18+
_, y44100 = sio.wavfile.read("%s.wav"%filename)
19+
y44100 = y44100/2.0**15
20+
os.remove("%s.wav"%filename)
21+
22+
data = model.outputs(y=y44100, sr=44100)
23+
sio.savemat(matfilename, data)
24+
25+
def compute_all_crema(NThreads = 12):
26+
"""
27+
Precompute all crema features in the SALAMI dataset
28+
"""
29+
# Disable inconsistent hierarchy warnings
30+
if not sys.warnoptions:
31+
warnings.simplefilter("ignore")
32+
songnums = [int(s) for s in os.listdir(AUDIO_DIR)]
33+
if NThreads > -1:
34+
parpool = PPool(NThreads)
35+
parpool.map(compute_crema, (songnums))
36+
else:
37+
for num in songnums:
38+
compute_crema(num)
39+
40+
if __name__ == '__main__':
41+
compute_all_crema(-1)

SalamiExperiments.py

+1-26
Original file line numberDiff line numberDiff line change
@@ -209,33 +209,8 @@ def aggregate_experiments_results():
209209

210210
plt.savefig("Results.svg", bbox_inches='tight')
211211

212-
# Step 4: Plot distribution of improvements with fusion
213-
"""
214-
names = ['MFCCs', 'Chromas']
215-
plt.figure(figsize=(15, 5))
216-
for i, plotname in enumerate(['Precision', 'Recall', 'L-Measure']):
217-
plt.subplot(1, 3, i+1)
218-
for name in names:
219-
prl = prls[name]
220-
improvements = prls['Fused'][:, i]/prl[:, i]
221-
order = np.argsort(improvements)[0:10]
222-
s = ""
223-
for o in order:
224-
s += "\n%i: %.3g"%(idxs[o], improvements[o])
225-
print("Worst 10 %s %s: %s"%(name, plotname, s))
226-
print(improvements)
227-
#sns.kdeplot(improvements, shade=True)
228-
plt.legend(names)
229-
plt.title("SPAM %s Fusion Improvement"%plotname)
230-
plt.xlabel(plotname)
231-
plt.ylabel("Probability Density")
232-
#plt.gca().set_xscale("log")
233-
plt.savefig("Improvements.svg", bbox_inches='tight')
234-
"""
235-
236212

237213
if __name__ == '__main__':
238214
#get_inter_anno_agreement()
239215
#run_audio_experiments(NThreads=-1)
240-
aggregate_experiments_results()
241-
#compute_features(724, True)
216+
aggregate_experiments_results()

SongStructure.py

+36-32
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
from SimilarityFusion import doSimilarityFusion
1616
from SongStructureGUI import saveResultsJSON
1717
import subprocess
18-
import crema
1918

2019
MANUAL_AUDIO_LOAD = True
2120
FFMPEG_BINARY = "ffmpeg"
22-
model = crema.models.chord.ChordModel()
2321

2422
def plotFusionResults(Ws, vs, alllabels, times):
2523
"""
@@ -74,7 +72,7 @@ def plotFusionResults(Ws, vs, alllabels, times):
7472
plt.tight_layout()
7573
return fig
7674

77-
def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg_diag, reg_neighbs, niters, do_animation, plot_result):
75+
def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg_diag, reg_neighbs, niters, do_animation, plot_result, do_crema=True):
7876
"""
7977
Load in filename, compute features, average/stack delay, and do similarity
8078
network fusion (SNF) on all feature types
@@ -104,7 +102,8 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
104102
Whether to plot and save images of the evolution of SNF
105103
plot_result: boolean
106104
Whether to plot the result of the fusion
107-
105+
do_crema: boolean
106+
Whether to include precomputed crema in the fusion
108107
Returns
109108
-------
110109
{'Ws': An dictionary of weighted adjacency matrices for individual features
@@ -119,14 +118,8 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
119118
sr, y = sio.wavfile.read("%s.wav"%filename)
120119
y = y/2.0**15
121120
os.remove("%s.wav"%filename)
122-
123-
subprocess.call([FFMPEG_BINARY, "-i", filename, "-ar", "44100", "-ac", "1", "%s.wav"%filename])
124-
_, y44100 = sio.wavfile.read("%s.wav"%filename)
125-
y44100 = y44100/2.0**15
126-
os.remove("%s.wav"%filename)
127121
else:
128122
y, sr = librosa.load(filename, sr=sr)
129-
y44100, _ = librosa.load(filename, sr=44100)
130123

131124
## Step 2: Figure out intervals to which to sync features
132125
if win_fac > 0:
@@ -160,44 +153,55 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
160153
tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length)
161154

162155
# 4) Crema
163-
#data = model.outputs(y=y44100, sr=44100)
164-
data = model.outputs(filename=filename)
165-
fac = (float(sr)/44100.0)*4096.0/hop_length
166-
times_orig = fac*np.arange(len(data['chord_bass']))
167-
times_new = np.arange(mfcc.shape[1])
168-
interp = scipy.interpolate.interp1d(times_orig, data['chord_pitch'].T, kind='nearest', fill_value='extrapolate')
169-
chord_pitch = interp(times_new)
156+
if do_crema:
157+
matfilename = "%s_crema.mat"%filename
158+
if not os.path.exists(matfilename):
159+
print("****WARNING: PRECOMPUTED CREMA DOES NOT EXIST****")
160+
do_crema = False
161+
else:
162+
data = sio.loadmat(matfilename)
163+
fac = (float(sr)/44100.0)*4096.0/hop_length
164+
times_orig = fac*np.arange(len(data['chord_bass']))
165+
times_new = np.arange(mfcc.shape[1])
166+
interp = scipy.interpolate.interp1d(times_orig, data['chord_pitch'].T, kind='nearest', fill_value='extrapolate')
167+
chord_pitch = interp(times_new)
170168

171169
## Step 4: Synchronize features to intervals
172-
n_frames = np.min([chroma.shape[1], mfcc.shape[1], tempogram.shape[1], chord_pitch.shape[1]])
170+
n_frames = np.min([chroma.shape[1], mfcc.shape[1], tempogram.shape[1]])
171+
if do_crema:
172+
n_frames = min(n_frames, chord_pitch.shape[1])
173173
# median-aggregate chroma to suppress transients and passing tones
174174
intervals = librosa.util.fix_frames(intervals, x_min=0, x_max=n_frames)
175-
chroma = librosa.util.sync(chroma, intervals, aggregate=np.median)
176-
mfcc = librosa.util.sync(mfcc, intervals)
177-
tempogram = librosa.util.sync(tempogram, intervals)
178-
chord_pitch = librosa.util.sync(chord_pitch, intervals)
179175
times = intervals*float(hop_length)/float(sr)
180176

181-
182-
177+
chroma = librosa.util.sync(chroma, intervals, aggregate=np.median)
183178
chroma = chroma[:, :n_frames]
179+
mfcc = librosa.util.sync(mfcc, intervals)
184180
mfcc = mfcc[:, :n_frames]
181+
tempogram = librosa.util.sync(tempogram, intervals)
185182
tempogram = tempogram[:, :n_frames]
186-
chord_pitch = chord_pitch[:, :n_frames]
183+
if do_crema:
184+
chord_pitch = librosa.util.sync(chord_pitch, intervals)
185+
chord_pitch = chord_pitch[:, :n_frames]
186+
187187

188188
#Do a delay embedding and compute SSMs
189189
XChroma = librosa.feature.stack_memory(chroma, n_steps=wins_per_block, mode='edge').T
190-
XMFCC = librosa.feature.stack_memory(mfcc, n_steps=wins_per_block, mode='edge').T
191-
XTempogram = librosa.feature.stack_memory(tempogram, n_steps=wins_per_block, mode='edge').T
192-
XChordPitch = librosa.feature.stack_memory(chord_pitch, n_steps=wins_per_block, mode='edge').T
193190
DChroma = getCSMCosine(XChroma, XChroma) #Cosine distance
191+
XMFCC = librosa.feature.stack_memory(mfcc, n_steps=wins_per_block, mode='edge').T
194192
DMFCC = getCSM(XMFCC, XMFCC) #Euclidean distance
193+
XTempogram = librosa.feature.stack_memory(tempogram, n_steps=wins_per_block, mode='edge').T
195194
DTempogram = getCSM(XTempogram, XTempogram)
196-
DChordPitch = getCSMCosine(XChordPitch, XChordPitch)
195+
if do_crema:
196+
XChordPitch = librosa.feature.stack_memory(chord_pitch, n_steps=wins_per_block, mode='edge').T
197+
DChordPitch = getCSMCosine(XChordPitch, XChordPitch)
197198

198199
#Run similarity network fusion
199-
FeatureNames = ['MFCCs', 'Chromas', 'Tempogram', 'Crema']
200-
Ds = [DMFCC, DChroma, DTempogram, DChordPitch]
200+
FeatureNames = ['MFCCs', 'Chromas', 'Tempogram']
201+
Ds = [DMFCC, DChroma, DTempogram]
202+
if do_crema:
203+
FeatureNames.append('Crema')
204+
Ds.append(DChordPitch)
201205
# Edge case: If it's too small, zeropad SSMs
202206
for i, Di in enumerate(Ds):
203207
if Di.shape[0] < 2*K:
@@ -244,6 +248,6 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
244248
res = getFusedSimilarity(opt.filename, sr=opt.sr, \
245249
hop_length=opt.hop_length, win_fac=opt.win_fac, wins_per_block=opt.wins_per_block, \
246250
K=opt.K, reg_diag=opt.reg_diag, reg_neighbs=opt.reg_neighbs, niters=opt.niters, \
247-
do_animation=opt.do_animation, plot_result=opt.plot_result)
251+
do_animation=opt.do_animation, plot_result=opt.plot_result, do_crema=False)
248252
sio.savemat(opt.matfilename, res)
249253
saveResultsJSON(opt.filename, res['times'], res['Ws'], res['K'], opt.neigs, opt.jsonfilename, opt.diffusion_znormalize)

0 commit comments

Comments
 (0)