-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfilterbank_shape.py
106 lines (94 loc) · 4.98 KB
/
filterbank_shape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
This class contains different functions that generates triangular linear filter shape, mel filter bank shape. The filterbank layer uses the triangular linear filterbank shape
"""
import numpy as np
class FilterbankShape(object):
def tri_filter_shape(self, ndim, nfilter):
f = np.arange(ndim)
f_high = f[-1]
f_low = f[0]
H = np.zeros((nfilter, ndim))
M = f_low + np.arange(nfilter + 2) * (f_high - f_low) / (nfilter + 1)
for m in range(nfilter):
k = np.logical_and(f >= M[m], f <= M[m + 1]) # up-slope
H[m][k] = 2 * (f[k] - M[m]) / ((M[m + 2] - M[m]) * (M[m + 1] - M[m]))
k = np.logical_and(f >= M[m + 1], f <= M[m + 2]) # down-slope
H[m][k] = (
2 * (M[m + 2] - f[k]) / ((M[m + 2] - M[m]) * (M[m + 2] - M[m + 1]))
)
H = np.transpose(H)
H.astype(np.float32)
return H
def hz2mel(self, hz):
"""Convert a value in Hertz to Mels
:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
"""
return 2595 * np.log10(1 + hz / 700.0)
def mel2hz(self, mel):
"""Convert a value in Mels to Hertz
:param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
"""
return 700 * (10 ** (mel / 2595.0) - 1)
def mel_tri_filter_shape(
self, nfilt=20, nfft=512, samplerate=16000, lowfreq=0, highfreq=None
):
"""Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
:param nfilt: the number of filters in the filterbank, default 20.
:param nfft: the FFT size. Default is 512.
:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
:param lowfreq: lowest band edge of mel filters, default 0 Hz
:param highfreq: highest band edge of mel filters, default samplerate/2
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
"""
highfreq = highfreq or samplerate / 2
assert highfreq <= samplerate / 2, "highfreq is greater than samplerate/2"
# compute points evenly spaced in mels
lowmel = self.hz2mel(lowfreq)
highmel = self.hz2mel(highfreq)
melpoints = np.linspace(lowmel, highmel, nfilt + 2)
# our points are in Hz, but we use fft bins, so we have to convert
# from Hz to fft bin number
bin = np.floor((nfft + 1) * self.mel2hz(melpoints) / samplerate)
fbank = np.zeros([nfilt, nfft // 2 + 1])
for j in range(0, nfilt):
for i in range(int(bin[j]), int(bin[j + 1])):
fbank[j, i] = (i - bin[j]) / (bin[j + 1] - bin[j])
for i in range(int(bin[j + 1]), int(bin[j + 2])):
fbank[j, i] = (bin[j + 2] - i) / (bin[j + 2] - bin[j + 1])
fbank = np.transpose(fbank)
fbank.astype(np.float32)
return fbank
def lin_tri_filter_shape(
self, nfilt=20, nfft=512, samplerate=16000, lowfreq=0, highfreq=None
):
"""Compute a linear-filterbank. The filters are stored in the rows, the columns correspond
to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
:param nfilt: the number of filters in the filterbank, default 20.
:param nfft: the FFT size. Default is 512.
:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
:param lowfreq: lowest band edge of mel filters, default 0 Hz
:param highfreq: highest band edge of mel filters, default samplerate/2
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
"""
highfreq = highfreq or samplerate / 2
assert highfreq <= samplerate / 2, "highfreq is greater than samplerate/2"
# compute points evenly spaced in mels
# lowmel = self.hz2mel(lowfreq)
# highmel = self.hz2mel(highfreq)
# melpoints = np.linspace(lowmel,highmel,nfilt+2)
hzpoints = np.linspace(lowfreq, highfreq, nfilt + 2)
# our points are in Hz, but we use fft bins, so we have to convert
# from Hz to fft bin number
bin = np.floor((nfft + 1) * hzpoints / samplerate)
fbank = np.zeros([nfilt, nfft // 2 + 1])
for j in range(0, nfilt):
for i in range(int(bin[j]), int(bin[j + 1])):
fbank[j, i] = (i - bin[j]) / (bin[j + 1] - bin[j])
for i in range(int(bin[j + 1]), int(bin[j + 2])):
fbank[j, i] = (bin[j + 2] - i) / (bin[j + 2] - bin[j + 1])
fbank = np.transpose(fbank)
fbank.astype(np.float32)
return fbank