From 9c1330a8523a2cc28c5d983a1d59ae4d4b05f117 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BD=AD=E9=9C=87=E4=B8=9C?= <275331498@qq.com>
Date: Sat, 23 Nov 2024 08:41:47 +0800
Subject: [PATCH] change max_frames to max_duration in docs (#1419)

* change max_frames to max_duration in docs

* minor fix
---
 docs/datasets.rst                          |  8 ++++----
 lhotse/cut/data.py                         |  2 +-
 lhotse/cut/mixed.py                        |  2 +-
 lhotse/cut/padding.py                      |  2 +-
 lhotse/cut/set.py                          |  2 +-
 lhotse/dataset/audio_tagging.py            |  2 +-
 lhotse/dataset/sampling/bucketing.py       |  4 ++--
 lhotse/dataset/sampling/cut_pairs.py       |  8 ++++----
 lhotse/dataset/sampling/dynamic.py         |  2 +-
 lhotse/dataset/sampling/simple.py          | 12 ++++++------
 lhotse/dataset/sampling/weighted_simple.py |  2 +-
 lhotse/dataset/speech_recognition.py       |  2 +-
 lhotse/dataset/speech_translation.py       |  2 +-
 lhotse/dataset/surt.py                     |  2 +-
 14 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/docs/datasets.rst b/docs/datasets.rst
index 03df15ea1..d3609e955 100644
--- a/docs/datasets.rst
+++ b/docs/datasets.rst
@@ -28,7 +28,7 @@ It allows for interesting collation methods - e.g. **padding the speech with noi
 
 The items for mini-batch creation are selected by the ``Sampler``.
 Lhotse defines ``Sampler`` classes that are initialized with :class:`~lhotse.cut.CutSet`'s, so that they can look up specific properties of an utterance to stratify the sampling.
-For example, :class:`~lhotse.dataset.sampling.SimpleCutSampler` has a defined ``max_frames`` attribute, and it will keep sampling cuts for a batch until they do not exceed the specified number of frames.
+For example, :class:`~lhotse.dataset.sampling.SimpleCutSampler` has a defined ``max_duration`` attribute, and it will keep sampling cuts for a batch until they do not exceed the specified number of seconds.
 Another strategy — used in :class:`~lhotse.dataset.sampling.BucketingSampler` — will first group the cuts of similar durations into buckets, and then randomly select a bucket to draw the whole batch from.
 
 For tasks where both input and output of the model are speech utterances, we can use the :class:`~lhotse.dataset.sampling.CutPairsSampler`, which accepts two :class:`~lhotse.cut.CutSet`'s and will match the cuts in them by their IDs.
@@ -38,11 +38,11 @@ A typical Lhotse's dataset API usage might look like this:
 .. code-block::
 
     from torch.utils.data import DataLoader
-    from lhotse.dataset import SpeechRecognitionDataset, SimpleCutSampler
+    from lhotse.dataset import K2SpeechRecognitionDataset, SimpleCutSampler
 
     cuts = CutSet(...)
-    dset = SpeechRecognitionDataset(cuts)
-    sampler = SimpleCutSampler(cuts, max_frames=50000)
+    dset = K2SpeechRecognitionDataset(cuts)
+    sampler = SimpleCutSampler(cuts, max_duration=500)
     # Dataset performs batching by itself, so we have to indicate that
     # to the DataLoader with batch_size=None
     dloader = DataLoader(dset, sampler=sampler, batch_size=None, num_workers=1)
diff --git a/lhotse/cut/data.py b/lhotse/cut/data.py
index ad47ca381..a939db5a2 100644
--- a/lhotse/cut/data.py
+++ b/lhotse/cut/data.py
@@ -723,7 +723,7 @@ def pad(
         """
         Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.
 
-        The user can choose to pad either to a specific `duration`; a specific number of frames `max_frames`;
+        The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
         or a specific number of samples `num_samples`. The three arguments are mutually exclusive.
 
         :param duration: The cut's minimal duration after padding.
diff --git a/lhotse/cut/mixed.py b/lhotse/cut/mixed.py
index 01acf248d..cd83d29e0 100644
--- a/lhotse/cut/mixed.py
+++ b/lhotse/cut/mixed.py
@@ -622,7 +622,7 @@ def pad(
         """
         Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.
 
-        The user can choose to pad either to a specific `duration`; a specific number of frames `max_frames`;
+        The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
         or a specific number of samples `num_samples`. The three arguments are mutually exclusive.
 
         :param duration: The cut's minimal duration after padding.
diff --git a/lhotse/cut/padding.py b/lhotse/cut/padding.py
index c535bde2b..a95be6062 100644
--- a/lhotse/cut/padding.py
+++ b/lhotse/cut/padding.py
@@ -236,7 +236,7 @@ def pad(
         """
         Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.
 
-        The user can choose to pad either to a specific `duration`; a specific number of frames `max_frames`;
+        The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
         or a specific number of samples `num_samples`. The three arguments are mutually exclusive.
 
         :param duration: The cut's minimal duration after padding.
diff --git a/lhotse/cut/set.py b/lhotse/cut/set.py
index 2a7afd16c..5a62ba21c 100644
--- a/lhotse/cut/set.py
+++ b/lhotse/cut/set.py
@@ -2821,7 +2821,7 @@ def pad(
     """
     Return a new MixedCut, padded with zeros in the recording, and ``pad_feat_value`` in each feature bin.
 
-    The user can choose to pad either to a specific `duration`; a specific number of frames `max_frames`;
+    The user can choose to pad either to a specific `duration`; a specific number of frames `num_frames`;
     or a specific number of samples `num_samples`. The three arguments are mutually exclusive.
 
     :param cut: DataCut to be padded.
diff --git a/lhotse/dataset/audio_tagging.py b/lhotse/dataset/audio_tagging.py
index 0ca44a687..fbf370fd6 100644
--- a/lhotse/dataset/audio_tagging.py
+++ b/lhotse/dataset/audio_tagging.py
@@ -78,7 +78,7 @@ def __init__(
     def __getitem__(self, cuts: CutSet) -> Dict[str, Union[torch.Tensor, List[str]]]:
         """
         Return a new batch, with the batch size automatically determined using the constraints
-        of max_frames and max_cuts.
+        of max_duration and max_cuts.
         """
         self.hdf5_fix.update()
 
diff --git a/lhotse/dataset/sampling/bucketing.py b/lhotse/dataset/sampling/bucketing.py
index dd53551cc..b869185b6 100644
--- a/lhotse/dataset/sampling/bucketing.py
+++ b/lhotse/dataset/sampling/bucketing.py
@@ -30,7 +30,7 @@ class BucketingSampler(CutSampler):
         ...    # BucketingSampler specific args
         ...    sampler_type=SimpleCutSampler, num_buckets=20,
         ...    # Args passed into SimpleCutSampler
-        ...    max_frames=20000
+        ...    max_duration=200
         ... )
 
     Bucketing sampler with 20 buckets, sampling pairs of source-target cuts::
@@ -40,7 +40,7 @@ class BucketingSampler(CutSampler):
         ...    # BucketingSampler specific args
         ...    sampler_type=CutPairsSampler, num_buckets=20,
         ...    # Args passed into CutPairsSampler
-        ...    max_source_frames=20000, max_target_frames=15000
+        ...    max_source_duration=200, max_target_duration=150
         ... )
     """
 
diff --git a/lhotse/dataset/sampling/cut_pairs.py b/lhotse/dataset/sampling/cut_pairs.py
index 1582158d2..cd13353d8 100644
--- a/lhotse/dataset/sampling/cut_pairs.py
+++ b/lhotse/dataset/sampling/cut_pairs.py
@@ -12,10 +12,10 @@ class CutPairsSampler(CutSampler):
     It expects that both CutSet's strictly consist of Cuts with corresponding IDs.
     It behaves like an iterable that yields lists of strings (cut IDs).
 
-    When one of :attr:`max_frames`, :attr:`max_samples`, or :attr:`max_duration` is specified,
+    When one of :attr:`max_source_duration`, :attr:`max_target_duration`, or :attr:`max_cuts` is specified,
     the batch size is dynamic.
     Exactly zero or one of those constraints can be specified.
-    Padding required to collate the batch does not contribute to max frames/samples/duration.
+    Padding required to collate the batch does not contribute to max source_duration/target_duration.
     """
 
     def __init__(
@@ -229,7 +229,7 @@ def _next_batch(self) -> Tuple[CutSet, CutSet]:
             self.source_constraints.add(next_source_cut)
             self.target_constraints.add(next_target_cut)
 
-            # Did we exceed the max_source_frames and max_cuts constraints?
+            # Did we exceed the max_source_duration and max_cuts constraints?
             if (
                 not self.source_constraints.exceeded()
                 and not self.target_constraints.exceeded()
@@ -249,7 +249,7 @@ def _next_batch(self) -> Tuple[CutSet, CutSet]:
                     # and return the cut anyway.
                     warnings.warn(
                         "The first cut drawn in batch collection violates one of the max_... constraints"
-                        "we'll return it anyway. Consider increasing max_source_frames/max_cuts/etc."
+                        "we'll return it anyway. Consider increasing max_source_duration/max_cuts/etc."
                     )
                     source_cuts.append(next_source_cut)
                     target_cuts.append(next_target_cut)
diff --git a/lhotse/dataset/sampling/dynamic.py b/lhotse/dataset/sampling/dynamic.py
index 2d36b4130..dc5858010 100644
--- a/lhotse/dataset/sampling/dynamic.py
+++ b/lhotse/dataset/sampling/dynamic.py
@@ -335,7 +335,7 @@ def detuplify(
                 else next_cut_or_tpl
             )
 
-            # Did we exceed the max_frames and max_cuts constraints?
+            # Did we exceed the max_duration and max_cuts constraints?
             if self.constraint.close_to_exceeding():
                 # Yes. Finish sampling this batch.
                 if self.constraint.exceeded() and len(cuts) == 1:
diff --git a/lhotse/dataset/sampling/simple.py b/lhotse/dataset/sampling/simple.py
index 66b56dae2..a8ca079c4 100644
--- a/lhotse/dataset/sampling/simple.py
+++ b/lhotse/dataset/sampling/simple.py
@@ -11,10 +11,10 @@ class SimpleCutSampler(CutSampler):
     Samples cuts from a CutSet to satisfy the input constraints.
     It behaves like an iterable that yields lists of strings (cut IDs).
 
-    When one of :attr:`max_frames`, :attr:`max_samples`, or :attr:`max_duration` is specified,
+    When one of :attr:`max_duration`, or :attr:`max_cuts` is specified,
     the batch size is dynamic.
     Exactly zero or one of those constraints can be specified.
-    Padding required to collate the batch does not contribute to max frames/samples/duration.
+    Padding required to collate the batch does not contribute to max duration.
 
     Example usage::
 
@@ -197,10 +197,10 @@ def _next_batch(self) -> CutSet:
                 self.diagnostics.discard_single(next_cut)
                 continue
 
-            # Track the duration/frames/etc. constraints.
+            # Track the duration/etc. constraints.
             self.time_constraint.add(next_cut)
 
-            # Did we exceed the max_frames and max_cuts constraints?
+            # Did we exceed the max_duration and max_cuts constraints?
             if not self.time_constraint.exceeded():
                 # No - add the next cut to the batch, and keep trying.
                 cuts.append(next_cut)
@@ -215,9 +215,9 @@ def _next_batch(self) -> CutSet:
                     # and return the cut anyway.
                     warnings.warn(
                         "The first cut drawn in batch collection violates "
-                        "the max_frames, max_cuts, or max_duration constraints - "
+                        "the max_duration, or max_cuts constraints - "
                         "we'll return it anyway. "
-                        "Consider increasing max_frames/max_cuts/max_duration."
+                        "Consider increasing max_duration/max_cuts."
                     )
                     cuts.append(next_cut)
 
diff --git a/lhotse/dataset/sampling/weighted_simple.py b/lhotse/dataset/sampling/weighted_simple.py
index 7c3f76034..4a3191b02 100644
--- a/lhotse/dataset/sampling/weighted_simple.py
+++ b/lhotse/dataset/sampling/weighted_simple.py
@@ -15,7 +15,7 @@ class WeightedSimpleCutSampler(SimpleCutSampler):
     When performing sampling, it avoids having duplicated cuts in the same batch.
     The sampler terminates if the number of sampled cuts reach :attr:`num_samples`
 
-    When one of :attr:`max_frames`, :attr:`max_samples`, or :attr:`max_duration` is specified,
+    When one of :attr:`max_duration`, or :attr:`max_cuts` is specified,
     the batch size is dynamic.
 
     Example usage:
diff --git a/lhotse/dataset/speech_recognition.py b/lhotse/dataset/speech_recognition.py
index 4c9919f99..4a3520b37 100644
--- a/lhotse/dataset/speech_recognition.py
+++ b/lhotse/dataset/speech_recognition.py
@@ -94,7 +94,7 @@ def __init__(
     def __getitem__(self, cuts: CutSet) -> Dict[str, Union[torch.Tensor, List[str]]]:
         """
         Return a new batch, with the batch size automatically determined using the constraints
-        of max_frames and max_cuts.
+        of max_duration and max_cuts.
         """
         validate_for_asr(cuts)
 
diff --git a/lhotse/dataset/speech_translation.py b/lhotse/dataset/speech_translation.py
index 672d27069..1def4475b 100644
--- a/lhotse/dataset/speech_translation.py
+++ b/lhotse/dataset/speech_translation.py
@@ -97,7 +97,7 @@ def __init__(
     def __getitem__(self, cuts: CutSet) -> Dict[str, Union[torch.Tensor, List[str]]]:
         """
         Return a new batch, with the batch size automatically determined using the constraints
-        of max_frames and max_cuts.
+        of max_duration and max_cuts.
         """
         validate_for_asr(cuts)
         self.hdf5_fix.update()
diff --git a/lhotse/dataset/surt.py b/lhotse/dataset/surt.py
index 8eda83b5f..5e424353c 100644
--- a/lhotse/dataset/surt.py
+++ b/lhotse/dataset/surt.py
@@ -170,7 +170,7 @@ def __init__(
     def __getitem__(self, cuts: CutSet) -> Dict[str, Union[torch.Tensor, List[str]]]:
         """
         Return a new batch, with the batch size automatically determined using the constraints
-        of max_frames and max_cuts.
+        of max_duration and max_cuts.
         """
         validate_for_asr(cuts)