Skip to content

Commit a609e93

Browse files
committed
Addding tempogram, and adding custom ffmpeg option for SLURM cluster
1 parent e797b08 commit a609e93

File tree

2 files changed

+35
-13
lines changed

2 files changed

+35
-13
lines changed

SALAMISLURM.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@
1414
if num in songnums:
1515
if not sys.warnoptions:
1616
warnings.simplefilter("ignore")
17+
print("Doing %i"%num)
1718
compute_features(num)

SongStructure.py

+34-13
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,17 @@
66
import numpy as np
77
import matplotlib.pyplot as plt
88
import scipy.io as sio
9+
import os
910
import librosa
11+
import librosa.display
1012
import argparse
1113
from CSMSSMTools import getCSM, getCSMCosine
1214
from SimilarityFusion import doSimilarityFusion
1315
from SongStructureGUI import saveResultsJSON
16+
import subprocess
17+
18+
MANUAL_AUDIO_LOAD = False
19+
FFMPEG_BINARY = "ffmpeg"
1420

1521
def plotFusionResults(Ws, vs, alllabels, times):
1622
"""
@@ -83,7 +89,8 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
8389
wins_per_block: int
8490
Number of aggregated windows per sliding window block
8591
K: int
86-
Number of nearest neighbors in SNF
92+
Number of nearest neighbors in SNF. If -1, then autotuned to sqrt(N)
93+
for an NxN similarity matrix
8794
reg_diag: float
8895
Regularization for self-similarity promotion
8996
reg_neighbs: float
@@ -99,11 +106,18 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
99106
-------
100107
{'Ws': An dictionary of weighted adjacency matrices for individual features
101108
and the fused adjacency matrix,
102-
'times': Time in seconds of each row in the similarity matrices}
109+
'times': Time in seconds of each row in the similarity matrices,
110+
'K': The number of nearest neighbors actually used}
103111
"""
104112
## Step 1: Load audio
105113
print("Loading %s..."%filename)
106-
y, sr = librosa.load(filename, sr=sr)
114+
if MANUAL_AUDIO_LOAD:
115+
subprocess.call([FFMPEG_BINARY, "-i", filename, "-ar", "%i"%sr, "-ac", "1", "%s.wav"%filename])
116+
sr, y = sio.wavfile.read("%s.wav"%filename)
117+
y = y/2.0**15
118+
os.remove("%s.wav"%filename)
119+
else:
120+
y, sr = librosa.load(filename, sr=sr)
107121

108122
## Step 2: Figure out intervals to which to sync features
109123
if win_fac > 0:
@@ -119,7 +133,6 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
119133
intervals = librosa.util.fix_frames(beats, x_max=C.shape[1])
120134
intervals = librosa.segment.subsegment(C, intervals, n_segments=abs(win_fac))
121135

122-
123136
## Step 3: Compute features
124137
# 1) CQT chroma with 3x oversampling in pitch
125138
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length, bins_per_octave=12*3)
@@ -134,39 +147,47 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
134147
mfcc = coeffs[:, None]*mfcc
135148

136149
# 3) Tempograms
137-
150+
oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
151+
tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length)
138152

139153

140154
## Step 4: Synchronize features to intervals
141-
n_frames = min(chroma.shape[1], mfcc.shape[1])
155+
n_frames = min(min(chroma.shape[1], mfcc.shape[1]), tempogram.shape[1])
142156
# median-aggregate chroma to suppress transients and passing tones
143157
intervals = librosa.util.fix_frames(intervals, x_min=0, x_max=n_frames)
144158
chroma = librosa.util.sync(chroma, intervals, aggregate=np.median)
145159
mfcc = librosa.util.sync(mfcc, intervals)
160+
tempogram = librosa.util.sync(tempogram, intervals)
146161
times = intervals*float(hop_length)/float(sr)
147162

148163

149164

150165
chroma = chroma[:, :n_frames]
151166
mfcc = mfcc[:, :n_frames]
167+
tempogram = tempogram[:, :n_frames]
152168

153169
#Do a delay embedding and compute SSMs
154170
XChroma = librosa.feature.stack_memory(chroma, n_steps=wins_per_block, mode='edge').T
155171
XMFCC = librosa.feature.stack_memory(mfcc, n_steps=wins_per_block, mode='edge').T
172+
XTempogram = librosa.feature.stack_memory(tempogram, n_steps=wins_per_block, mode='edge').T
156173
DChroma = getCSMCosine(XChroma, XChroma) #Cosine distance
157174
DMFCC = getCSM(XMFCC, XMFCC) #Euclidean distance
175+
DTempogram = getCSM(XTempogram, XTempogram)
158176

159177
#Run similarity network fusion
160-
FeatureNames = ['MFCCs', 'Chromas']
161-
Ds = [DMFCC, DChroma]
178+
FeatureNames = ['MFCCs', 'Chromas', 'Tempogram']
179+
Ds = [DMFCC, DChroma, DTempogram]
162180
# Edge case: zeropad if it's too small
163181
for i, Di in enumerate(Ds):
164182
if Di.shape[0] < 2*K:
165183
D = np.zeros((2*K, 2*K))
166184
D[0:Di.shape[0], 0:Di.shape[1]] = Di
167185
Ds[i] = D
168-
169-
(Ws, WFused) = doSimilarityFusion(Ds, K=K, niters=niters, \
186+
187+
pK = K
188+
if K == -1:
189+
pK = int(np.round(np.sqrt(Ds[0].shape[0])))
190+
(Ws, WFused) = doSimilarityFusion(Ds, K=pK, niters=niters, \
170191
reg_diag=reg_diag, reg_neighbs=reg_neighbs, \
171192
do_animation=do_animation, PlotNames=FeatureNames, \
172193
PlotExtents=[times[0], times[-1]])
@@ -177,7 +198,7 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
177198
if plot_result:
178199
plotFusionResults(WsDict, {}, {}, times)
179200
plt.savefig("%s_Plot.png"%filename, bbox_inches='tight')
180-
return {'Ws':WsDict, 'times':times}
201+
return {'Ws':WsDict, 'times':times, 'K':pK}
181202

182203
if __name__ == '__main__':
183204
parser = argparse.ArgumentParser()
@@ -188,7 +209,7 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
188209
parser.add_argument('--hop_length', type=int, default=512, help="Hop Size in samples")
189210
parser.add_argument('--win_fac', type=int, default=10, help="Number of windows to average in a frame. If negative, then do beat tracking, and subdivide by |win_fac| times within each beat")
190211
parser.add_argument('--wins_per_block', type=int, default=20, help="Number of frames to stack in sliding window for every feature")
191-
parser.add_argument('--K', type=int, default=10, help="Number of nearest neighbors in similarity network fusion")
212+
parser.add_argument('--K', type=int, default=10, help="Number of nearest neighbors in similarity network fusion. If -1, then autotune to sqrt(N) for an NxN similarity matrix")
192213
parser.add_argument('--reg_diag', type=float, default=1.0, help="Regularization for self-similarity promotion")
193214
parser.add_argument('--reg_neighbs', type=float, default=0.5, help="Regularization for direct neighbor similarity promotion")
194215
parser.add_argument('--niters', type=int, default=10, help="Number of iterations in similarity network fusion")
@@ -204,4 +225,4 @@ def getFusedSimilarity(filename, sr, hop_length, win_fac, wins_per_block, K, reg
204225
K=opt.K, reg_diag=opt.reg_diag, reg_neighbs=opt.reg_neighbs, niters=opt.niters, \
205226
do_animation=opt.do_animation, plot_result=opt.plot_result)
206227
sio.savemat(opt.matfilename, res)
207-
saveResultsJSON(opt.filename, res['times'], res['Ws'], opt.K, opt.neigs, opt.jsonfilename, opt.diffusion_znormalize)
228+
saveResultsJSON(opt.filename, res['times'], res['Ws'], res['K'], opt.neigs, opt.jsonfilename, opt.diffusion_znormalize)

0 commit comments

Comments
 (0)