-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspot_keywords.py
83 lines (63 loc) · 2.09 KB
/
spot_keywords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from tensorflow import keras
import numpy as np
import librosa as lb
model_path = "model.h5"
NUM_SAMPLES = 22050
class _spotting_keywords:
model = None
mappings = ["bed",
"bird",
"cat",
"dog",
"down",
"eight",
"five",
"four",
"go",
"happy",
"house",
"left",
"marvin",
"nine",
"no",
"off",
"on",
"one",
"right",
"seven",
"sheila",
"six",
"stop",
"three",
"tree",
"two",
"up",
"wow",
"yes",
"zero",
"_background_noise_"]
_instance = None
def predict(self, file_path):
MFCCs = self.preprocess(file_path)
MFCCs = MFCCs[np.newaxis, ..., np.newaxis]
predictions = self.model.predict(MFCCs)
predicted_index = np.argmax(predictions)
predicted_keyword = self.mappings[predicted_index]
_spotting_keywords._instance = None
return predicted_keyword
def preprocess(self, file_path, n_mfcc=13, n_fft=2048, hop_length=512):
signal, sr = lb.load(file_path)
if len(signal) > NUM_SAMPLES:
signal = signal[:NUM_SAMPLES]
MFCCs = lb.feature.mfcc(signal, n_mfcc=n_mfcc,
n_fft=n_fft, hop_length=hop_length)
return MFCCs.T
def spotting_keywords():
if _spotting_keywords._instance is None:
_spotting_keywords._instance = _spotting_keywords()
_spotting_keywords.model = keras.models.load_model(model_path)
return _spotting_keywords._instance
# if __name__ == "__main__":
# sp = spotting_keywords()
# keyword1 = sp.predict("test/up.wav")
# print(f"The audio file corresponds to : {keyword1}")