From dce605d5dd5f31f679f9edeafc69176b27a4d327 Mon Sep 17 00:00:00 2001 From: Nickolay Shmyrev Date: Sun, 28 May 2023 23:40:24 +0200 Subject: [PATCH] Expose keep_excessive_supervision parameter to accept supervisions slighlty longer than cuts. --- lhotse/cut/base.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lhotse/cut/base.py b/lhotse/cut/base.py index 8c2519687..c69c6cee9 100644 --- a/lhotse/cut/base.py +++ b/lhotse/cut/base.py @@ -393,6 +393,7 @@ def plot_alignment(self, alignment_type: str = "word"): def trim_to_supervisions( self, keep_overlapping: bool = True, + keep_excessive_supervisions: bool = True, min_duration: Optional[Seconds] = None, context_direction: Literal["center", "left", "right", "random"] = "center", keep_all_channels: bool = False, @@ -440,6 +441,8 @@ def trim_to_supervisions( :param keep_overlapping: when ``False``, it will discard parts of other supervisions that overlap with the main supervision. In the illustration above, it would discard ``Sup2`` in ``Cut1`` and ``Sup1`` in ``Cut2``. In this mode, we guarantee that there will always be exactly one supervision per cut. + :param keep_excessive_supervisions: when ``False``, it will discard supervisions which are longer than cuts. + Can result in cuts without supervisions. :param min_duration: An optional duration in seconds; specifying this argument will extend the cuts that would have been shorter than ``min_duration`` with actual acoustic context in the recording/features. If there are supervisions present in the context, they are kept when ``keep_overlapping`` is true. @@ -473,7 +476,7 @@ def trim_to_supervisions( trimmed = self.truncate( offset=new_start, duration=new_duration, - keep_excessive_supervisions=keep_overlapping, + keep_excessive_supervisions=keep_excessive_supervisions, _supervisions_index=supervisions_index, ) @@ -485,6 +488,11 @@ def trim_to_supervisions( # For MixedCut, we can't change the channels since it is defined by the # number of channels in underlying tracks. + # Ensure that there are supervisions. + assert (len(trimmed.supervisions) > 0), ( + "Trimmed cut has no supervisions. Make sure that supervisions " + "are not filtered out. Consider `keep_excessive_supervisions=True`." + ) # Ensure that all supervisions have the same channel. assert ( len(set(to_hashable(s.channel) for s in trimmed.supervisions)) == 1