-
Notifications
You must be signed in to change notification settings - Fork 49
/
Copy pathSpectrogramGenerator.py
132 lines (92 loc) · 3.8 KB
/
SpectrogramGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import random
import numpy as np
from PIL import Image
import fnmatch
import sys
from subprocess import Popen, PIPE, STDOUT
if (sys.version_info >= (3,0)):
from queue import Queue
else:
from Queue import Queue
def recursive_glob(path, pattern):
for root, dirs, files in os.walk(path):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.abspath(os.path.join(root, basename))
if os.path.isfile(filename):
yield filename
class SpectrogramGenerator(object):
def __init__(self, source, config, shuffle=False, max_size=100, run_only_once=False):
self.source = source
self.config = config
self.queue = Queue(max_size)
self.shuffle = shuffle
self.run_only_once = run_only_once
if os.path.isdir(self.source):
files = []
files.extend(recursive_glob(self.source, "*.wav"))
files.extend(recursive_glob(self.source, "*.mp3"))
files.extend(recursive_glob(self.source, "*.m4a"))
else:
files = [self.source]
self.files = files
def audioToSpectrogram(self, file, pixel_per_sec, height):
'''
V0 - Verbosity level: ignore everything
c 1 - channel 1 / mono
n - apply filter/effect
rate 10k - limit sampling rate to 10k --> max frequency 5kHz (Shenon Nquist Theorem)
y - small y: defines height
X capital X: defines pixels per second
m - monochrom
r - no legend
o - output to stdout (-)
'''
file_name = "tmp_{}.png".format(random.randint(0, 100000))
command = "sox -V0 '{}' -n remix 1 rate 10k spectrogram -y {} -X {} -m -r -o {}".format(file, height, pixel_per_sec, file_name)
p = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
output, errors = p.communicate()
if errors:
print(errors)
# image = Image.open(StringIO(output))
image = Image.open(file_name)
os.remove(file_name)
return np.array(image)
def get_generator(self):
start = 0
while True:
file = self.files[start]
try:
target_height, target_width, target_channels = self.config["input_shape"]
image = self.audioToSpectrogram(file, self.config["pixel_per_second"], target_height)
image = np.expand_dims(image, -1) # add dimension for mono channel
height, width, channels = image.shape
assert target_height == height, "Heigh mismatch {} vs {}".format(target_height, height)
num_segments = width // target_width
for i in range(0, num_segments):
slice_start = i * target_width
slice_end = slice_start + target_width
slice = image[:, slice_start:slice_end]
# Ignore black images
if slice.max() == 0 and slice.min() == 0:
continue
yield slice
except Exception as e:
print("SpectrogramGenerator Exception: ", e, file)
pass
finally:
start += 1
if start >= len(self.files):
if self.run_only_once:
break
start = 0
if self.shuffle:
np.random.shuffle(self.files)
def get_num_files(self):
return len(self.files)
if __name__ == "__main__":
a = SpectrogramGenerator("/extra/tom/news2/raw", {"pixel_per_second": 50, "input_shape": [129, 100, 1], "batch_size": 32, "num_classes": 4}, shuffle=True)
gen = a.get_generator()
for a in gen:
pass