-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpitchcoach.py
120 lines (101 loc) · 4.78 KB
/
pitchcoach.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from pydub import AudioSegment
from pydub.utils import get_array_type
import numpy as np
from matplotlib import pyplot as plt
import scipy
from note_freq import note_freq_dict, note, freq
import array
# Pitch detectior for piano part
def fft_spectrum(segment,array_type): # apply fft to find frequencies and their magnitudes in the segment
raw_data = array.array(array_type, segment._data)
n = len(raw_data)
freq_array = np.arange(n) * (float(segment.frame_rate)/n)
freq_array = freq_array[:n//2]
raw_data = raw_data - np.average(raw_data)
freq_magnitude = np.fft.fft(raw_data)
freq_magnitude = freq_magnitude[:n//2]
freq_magnitude = np.abs(freq_magnitude)
freq_magnitude /= np.sum(freq_magnitude)
return freq_array, freq_magnitude
def find_note(freq): # Find the note with closest frequency
# Some speical cases for weird sounds
if freq < note_freq_dict[0][0]:
return note_freq_dict[0][1]
if freq > note_freq_dict[-1][0]:
return note_freq_dict[-1][1]
mindiff_index = 0
mindiff = np.abs(freq - note_freq_dict[0][0])
for i in range(1, len(note_freq_dict)):
thisdiff = np.abs(freq - note_freq_dict[i][0])
if thisdiff < mindiff:
mindiff = thisdiff
mindiff_index = i
return note_freq_dict[mindiff_index][1]
def audio_pitch(filename):
audio = AudioSegment.from_file('input_data/' + filename + '.wav')
audio = audio.high_pass_filter(16.35).low_pass_filter(8000) # only deal with sounds within C0 - B8
bit_depth = audio.sample_width * 8
array_type = get_array_type(bit_depth)
window_len = 10 # break the audio into 10ms segments
volume = [seg.dBFS for seg in audio[::window_len]] # volume of each segment
time = np.arange(len(volume)) * (window_len/1000) # time for each segment (seconds)
VOLUME_THRESHOLD = -40 # voices quieter than this is considered background noise
EDGE_THRESHOLD = 3.4 # minimum rise in volume for a note
TIME_BEFORE = 100 # minimum time between two consecutive notes (ms)
notes = [] # store the notes time
notes_amp = [] # store the notes amplitude
for i in range(1,len(volume)):
if volume[i]>VOLUME_THRESHOLD and volume[i]-volume[i-1]>EDGE_THRESHOLD: # loud enough, and a sudden rise in volume
ms = i * window_len # note time
if len(notes)==0 or ms-notes[-1]>TIME_BEFORE: # not too close to the previous note
notes.append(ms)
notes_amp.append(volume[i])
notes_name = []
notes_freq = []
for start in notes:
st, en = start, start+150 # take a 150ms segment
segment = audio[st:en] # extract the segment
freq,mag = fft_spectrum(segment,array_type) # compute frequency and amplitude with fft
note_freq = freq[np.argmax(mag)] # find the frequency with maximum magnitude
note_name = find_note(note_freq) # find the note's name
notes_name.append(note_name)
notes_freq.append(note_freq)
#print(start/1000, note_name)
plt.figure(figsize=(30,10), facecolor='white')
plt.tick_params(labelsize=20)
plt.plot(time, volume) # plot volume vs. time
plt.title(filename, fontsize = 25)
for note, amp, name in zip(notes, notes_amp, notes_name):
plt.axvline(x=note/1000, color='r', linewidth=0.8, linestyle="-") # add a vertical line to each note
plt.text(note/1000-0.2, amp+3, name, fontsize=20) # label each note's name
plt.savefig('output_images/'+filename+'.png')
return notes_freq, notes_name
'''
the function returns the number of semitones between two given keys
return: positive int means key1 down to key2, otherwise key1 raise to key2
'''
def steps_diff(key1, key2):
info1, info2 = freq(key1, index = True), freq(key2, index =True)
return info1[1] - info2[1]
'''
the function iterate the notelist of the audio files,
check if note_list2 is the correct transposition of note_list1
'''
def transpose_checker(note_list1, note_list2, trans_step):
mlen = min(len(note_list1), len(note_list2))
wrong_notes = []
if len(note_list1) != len(note_list2):
print("Warning: note_list1 and note_list2 are of unequal length")
print("Either you have different number of notes in the two files")
print("or some notes are not correctly identified")
print()
for i in range(mlen):
note1, note2 = note_list1[i], note_list2[i]
#print("in transpo checker", note1, note2, i)
if steps_diff(note1,note2) != trans_step:
tmp = freq(note1,index = True)
note1_index = tmp[1]
correct_note = note_freq_dict[note1_index-trans_step][1]
wrong_notes.append([note1, note2, correct_note, i])
score = str(mlen - len(wrong_notes)) + '/' + str(mlen)
return wrong_notes, score