diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py index eafaeb3a60fa..bb8ae674cbe6 100644 --- a/python/ray/data/dataset.py +++ b/python/ray/data/dataset.py @@ -549,6 +549,13 @@ def __call__(self, batch: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: specified ``batch_size`` if ``batch_size`` doesn't evenly divide the block(s) sent to a given map task. + If ``batch_size`` is set and each input block is smaller than the + ``batch_size``, Ray Data will bundle up many blocks as the input for one + task, until their total size is equal to or greater than the given + ``batch_size``. + If ``batch_size`` is not set, the bundling will not be performed. Each task + will receive only one input block. + .. seealso:: :meth:`~Dataset.iter_batches`