Skip to content

Commit

Permalink
conditionally use optimized method
Browse files Browse the repository at this point in the history
  • Loading branch information
benniekiss committed Jan 29, 2025
1 parent 43bb258 commit 649c060
Showing 1 changed file with 47 additions and 13 deletions.
60 changes: 47 additions & 13 deletions pyannote/audio/utils/signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,19 +269,35 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
frames = scores.sliding_window
timestamps = [frames[i].middle for i in range(num_frames)]

# annotation meant to store 'active' regions
if self.onset == self.offset:
active = self._opt_binarize(scores, timestamps)
else:
active = self._binarize(scores, timestamps)

# because of padding, some active regions might be overlapping: merge them.
# also: fill same speaker gaps shorter than min_duration_off
if self.pad_offset > 0.0 or self.pad_onset > 0.0 or self.min_duration_off > 0.0:
active = active.support(collar=self.min_duration_off)

# remove tracks shorter than min_duration_on
if self.min_duration_on > 0:
for segment, track in list(active.itertracks()):
if segment.duration < self.min_duration_on:
del active[segment, track]

return active

def _binarize(self, scores, timestamps):
active = Annotation()

for k, k_scores in enumerate(scores.data.T):

label = k if scores.labels is None else scores.labels[k]

# initial state
start = timestamps[0]
is_active = k_scores[0] > self.onset

for t, y in zip(timestamps[1:], k_scores[1:]):

# currently active
if is_active:
# switching from active to inactive
Expand All @@ -303,19 +319,37 @@ def __call__(self, scores: SlidingWindowFeature) -> Annotation:
region = Segment(start - self.pad_onset, t + self.pad_offset)
active[region, k] = label

# because of padding, some active regions might be overlapping: merge them.
# also: fill same speaker gaps shorter than min_duration_off
if self.pad_offset > 0.0 or self.pad_onset > 0.0 or self.min_duration_off > 0.0:
active = active.support(collar=self.min_duration_off)
return active

# remove tracks shorter than min_duration_on
if self.min_duration_on > 0:
for segment, track in list(active.itertracks()):
if segment.duration < self.min_duration_on:
del active[segment, track]
def _opt_binarize(self, scores, timestamps):
active = Annotation()

return active
for k, k_scores in enumerate(scores.data.T):
label = k if scores.labels is None else scores.labels[k]

# Detect transitions
is_active = k_scores > self.onset
transitions = np.diff(is_active.astype(int))
starts = np.where(transitions == 1)[0] + 1
ends = np.where(transitions == -1)[0] + 1

# If the first frame is active, add it as a start
if is_active[0]:
starts = np.insert(starts, 0, 0)

# If the last frame is active, add it as an end
if is_active[-1]:
ends = np.append(ends, len(is_active) - 1)

# Create segments
for start, end in zip(starts, ends):
region = Segment(
timestamps[start] - self.pad_onset,
timestamps[end] + self.pad_offset,
)
active[region, k] = label

return active

class Peak:
"""Peak detection
Expand Down

0 comments on commit 649c060

Please sign in to comment.