From 203a9e1fd34497d776b054d9fd906767a33ba2ff Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Mon, 21 Aug 2017 12:20:56 -0300 Subject: [PATCH 1/2] Add seek step to enable faster silence detection --- pydub/silence.py | 35 ++++++++++++++++++++--------------- setup.cfg | 5 ++++- test/test.py | 5 +++++ 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/pydub/silence.py b/pydub/silence.py index 8d47a73b..2363b8cf 100644 --- a/pydub/silence.py +++ b/pydub/silence.py @@ -1,9 +1,7 @@ -from .utils import ( - db_to_float, -) +from .utils import db_to_float -def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16): +def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): seg_len = len(audio_segment) # you can't have a silent portion of a sound that is longer than the sound @@ -16,11 +14,18 @@ def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16): # find silence and add start and end indicies to the to_cut list silence_starts = [] - # check every (1 sec by default) chunk of sound for silence - slice_starts = seg_len - min_silence_len - - for i in range(slice_starts + 1): - audio_slice = audio_segment[i:i+min_silence_len] + # check successive (1 sec by default) chunk of sound for silence + # try a chunk at every "seek step" (or every chunk for a seek step == 1) + # + # make sure the last portion of the audio is included in the seach + # even when seek step is greater than 1 + last_slice_start = seg_len - min_silence_len + slice_starts = range(0, last_slice_start + 1, seek_step) + if slice_starts[-1] != last_slice_start: + slice_starts.append(last_slice_start) + + for i in slice_starts: + audio_slice = audio_segment[i:i + min_silence_len] if audio_slice.rms < silence_thresh: silence_starts.append(i) @@ -54,8 +59,8 @@ def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16): return silent_ranges -def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16): - silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh) +def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): + silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step) len_seg = len(audio_segment) # if there is no silence, the whole thing is nonsilent @@ -81,8 +86,8 @@ def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16): return nonsilent_ranges - -def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100): +def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100, + seek_step=1): """ audio_segment - original pydub.AudioSegment() object @@ -97,7 +102,7 @@ def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, ke abruptly cut off. (default: 100ms) """ - not_silence_ranges = detect_nonsilent(audio_segment, min_silence_len, silence_thresh) + not_silence_ranges = detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step) chunks = [] for start_i, end_i in not_silence_ranges: @@ -106,4 +111,4 @@ def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, ke chunks.append(audio_segment[start_i:end_i]) - return chunks \ No newline at end of file + return chunks diff --git a/setup.cfg b/setup.cfg index 0a8df87a..cf3aad63 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,5 @@ [wheel] -universal = 1 \ No newline at end of file +universal = 1 + +[pep8] +max-line-length = 100 diff --git a/test/test.py b/test/test.py index 9eea4716..6c268500 100644 --- a/test/test.py +++ b/test/test.py @@ -868,6 +868,11 @@ def test_detect_silence_seg1(self): silent_ranges = detect_silence(self.seg1, min_silence_len=500, silence_thresh=-20) self.assertEqual(silent_ranges, [[0, 775], [3141, 4033], [5516, 6051]]) + def test_detect_silence_seg1_with_seek_split(self): + silent_ranges = detect_silence(self.seg1, min_silence_len=500, silence_thresh=-20, + seek_step=10) + self.assertEqual(silent_ranges, [[0, 770], [3150, 4030], [5520, 6050]]) + def test_realistic_audio(self): silent_ranges = detect_silence(self.seg4, min_silence_len=1000, silence_thresh=self.seg4.dBFS) From 2a48cba1ee33c1f22a3fea360bdbd7bd6d1d9297 Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Mon, 21 Aug 2017 14:50:59 -0300 Subject: [PATCH 2/2] Fix python 3 range compatibility --- pydub/silence.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pydub/silence.py b/pydub/silence.py index 2363b8cf..c89eb233 100644 --- a/pydub/silence.py +++ b/pydub/silence.py @@ -1,3 +1,5 @@ +import itertools + from .utils import db_to_float @@ -16,13 +18,13 @@ def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek # check successive (1 sec by default) chunk of sound for silence # try a chunk at every "seek step" (or every chunk for a seek step == 1) - # - # make sure the last portion of the audio is included in the seach - # even when seek step is greater than 1 last_slice_start = seg_len - min_silence_len slice_starts = range(0, last_slice_start + 1, seek_step) - if slice_starts[-1] != last_slice_start: - slice_starts.append(last_slice_start) + + # guarantee last_slice_start is included in the range + # to make sure the last portion of the audio is seached + if last_slice_start % seek_step: + slice_starts = itertools.chain(slice_starts, [last_slice_start]) for i in slice_starts: audio_slice = audio_segment[i:i + min_silence_len]