diff --git a/lhotse/dataset/sampling/base.py b/lhotse/dataset/sampling/base.py
index 8d63c9e59..d2fe183b3 100644
--- a/lhotse/dataset/sampling/base.py
+++ b/lhotse/dataset/sampling/base.py
@@ -68,9 +68,7 @@ def __init__(
             It makes sense to turn it off when iterating the sampler is somewhat costly for any reason;
             e.g. because the underlying manifest is lazily loaded from the filesystem/somewhere else.
         """
-        super().__init__(
-            data_source=None
-        )  # the "data_source" arg is not used in Sampler...
+        super().__init__(data_source=None)  # the "data_source" arg is not used in Sampler...
         self.shuffle = shuffle
         self.seed = seed
         self.epoch = 0
@@ -103,7 +101,6 @@ def set_epoch(self, epoch: int) -> None:
         :param epoch: Epoch number.
         """
         self.epoch = epoch
-        self.num_batches = None
 
     def filter(self, predicate: Callable[[Cut], bool]) -> None:
         """
@@ -134,6 +131,36 @@ def _next_batch(self):
             "Sub-classes of CutSampler have to implement self._next_batch()"
         )
 
+    @property
+    def remaining_duration(self) -> Optional[float]:
+        """
+        Remaining duration of data left in the sampler (may be inexact due to float arithmetic).
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        raise NotImplementedError(
+            'Sub-classes of CutSampler have to implement self.remaining_duration'
+        )
+
+    @property
+    def remaining_cuts(self) -> Optional[int]:
+        """
+        Remaining number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        raise NotImplementedError(
+            'Sub-classes of CutSampler have to implement self.remaining_cuts'
+        )
+
+    @property
+    def num_cuts(self) -> Optional[int]:
+        """
+        Total number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        raise NotImplementedError(
+            'Sub-classes of CutSampler have to implement self.num_cuts'
+        )
+
     def __len__(self) -> int:
         if not self.provide_len:
             # Fake non-existence of this attribute
diff --git a/lhotse/dataset/sampling/bucketing.py b/lhotse/dataset/sampling/bucketing.py
index 6394691b8..f3f3b1f37 100644
--- a/lhotse/dataset/sampling/bucketing.py
+++ b/lhotse/dataset/sampling/bucketing.py
@@ -3,7 +3,7 @@
 from functools import reduce
 from itertools import chain
 from operator import add
-from typing import Callable, Dict, List, Tuple, Type
+from typing import Callable, Dict, List, Optional, Tuple, Type
 
 import numpy as np
 from typing_extensions import Literal
@@ -53,6 +53,7 @@ def __init__(
             num_buckets: int = 10,
             bucket_method: Literal["equal_len", "equal_duration"] = "equal_len",
             drop_last: bool = False,
+            proportional_sampling: bool = True,
             seed: int = 0,
             **kwargs: Dict,
     ) -> None:
@@ -71,6 +72,10 @@ def __init__(
         :param drop_last: When ``True``, we will drop all incomplete batches.
             A batch is considered incomplete if it depleted a bucket before
             hitting the constraint such as max_duration, max_cuts, etc.
+        :param proportional_sampling: When ``True``, we will introduce an approximate
+            proportional sampling mechanism in the bucket selection.
+            This mechanism reduces the chance that any of the buckets gets depleted early.
+            Enabled by default.
         :param seed: random seed for bucket selection
         :param kwargs: Arguments used to create the underlying sampler for each bucket.
         """
@@ -83,6 +88,7 @@ def __init__(
         )
         self.num_buckets = num_buckets
         self.drop_last = drop_last
+        self.proportional_sampling = proportional_sampling
         self.sampler_type = sampler_type
         self.sampler_kwargs = kwargs
         self.cut_sets = cuts
@@ -119,6 +125,45 @@ def __init__(
         self.bucket_rng = random.Random(self.seed + self.epoch)
         self.depleted = [False] * num_buckets
 
+    @property
+    def remaining_duration(self) -> Optional[float]:
+        """
+        Remaining duration of data left in the sampler (may be inexact due to float arithmetic).
+        Not available when the CutSet is read in lazy mode (returns None).
+
+        .. note: For BucketingSampler, it's the sum of remaining duration in all buckets.
+        """
+        try:
+            return sum(s.remaining_duration for _, s in self._nondepleted_samplers_with_idxs)
+        except TypeError:
+            return None
+
+    @property
+    def remaining_cuts(self) -> Optional[int]:
+        """
+        Remaining number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+
+        .. note: For BucketingSampler, it's the sum of remaining cuts in all buckets.
+        """
+        try:
+            return sum(s.remaining_cuts for _, s in self._nondepleted_samplers_with_idxs)
+        except TypeError:
+            return None
+
+    @property
+    def num_cuts(self) -> Optional[int]:
+        """
+        Total number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+
+        .. note: For BucketingSampler, it's the sum of num cuts in all buckets.
+        """
+        try:
+            return sum(s.num_cuts for s in self.bucket_samplers)
+        except TypeError:
+            return None
+
     def set_epoch(self, epoch: int) -> None:
         """
         Sets the epoch for this sampler. When :attr:`shuffle=True`, this ensures all replicas
@@ -157,9 +202,45 @@ def __iter__(self) -> "BucketingSampler":
         self.depleted = [False] * self.num_buckets
         return self
 
+    def _select_bucket_with_idx(self) -> Tuple[int, CutSampler]:
+        if not self.proportional_sampling or self.cut_sets[0].is_lazy:
+            # Either proportional sampling was disabled, or the CutSet is lazy.
+            # With lazy CutSets, we simply choose a random bucket,
+            # because we can't know how much data is left in the buckets.
+            return self.bucket_rng.choice(self._nondepleted_samplers_with_idxs)
+        idx_sampler_pairs = self._nondepleted_samplers_with_idxs
+        if len(idx_sampler_pairs) == 1:
+            # Only a single bucket left -- choose it.
+            return idx_sampler_pairs[0]
+        # If we got there, it means there are at least 2 buckets we can sample from.
+        # We are going to use approximate proportional sampling:
+        # for that, we randomly select two buckets, and then assign a higher probability
+        # to the bucket that has more cumulative data duration left to sample.
+        # This helps ensure that none of the buckets is depleted much earlier than
+        # the others.
+        idx1, sampler1 = self.bucket_rng.choice(idx_sampler_pairs)
+        idx2, sampler2 = self.bucket_rng.choice(idx_sampler_pairs)
+        # Note: prob1 is the probability of selecting sampler1
+        try:
+            prob1 = sampler1.remaining_duration / (
+                    sampler1.remaining_duration + sampler2.remaining_duration
+            )
+        except ZeroDivisionError:
+            # This will happen when we have already depleted the samplers,
+            # but the BucketingSampler doesn't know it yet. We only truly
+            # know that a sampler is depleted when we try to get a batch
+            # and it raises a StopIteration, which is done after this stage.
+            # We can't depend on remaining_duration for lazy CutSets.
+            # If both samplers are zero duration, just return the first one.
+            return idx1, sampler1
+        if self.bucket_rng.random() > prob1:
+            return idx2, sampler2
+        else:
+            return idx1, sampler1
+
     def _next_batch(self):
         while not self.is_depleted:
-            idx, sampler = self.bucket_rng.choice(self._nondepleted_samplers_with_idxs)
+            idx, sampler = self._select_bucket_with_idx()
             try:
                 return next(sampler)
             except StopIteration:
diff --git a/lhotse/dataset/sampling/cut_pairs.py b/lhotse/dataset/sampling/cut_pairs.py
index 06ddfd2a5..029131ec6 100644
--- a/lhotse/dataset/sampling/cut_pairs.py
+++ b/lhotse/dataset/sampling/cut_pairs.py
@@ -80,6 +80,34 @@ def __init__(
         self.max_cuts = max_cuts
         self.drop_last = drop_last
 
+    @property
+    def remaining_duration(self) -> Optional[float]:
+        """
+        Remaining duration of data left in the sampler (may be inexact due to float arithmetic).
+        Not available when the CutSet is read in lazy mode (returns None).
+
+        .. note: For :class:`.CutPairsSampler` we return the source cuts duration.
+        """
+        return self.source_cuts.remaining_duration
+
+    @property
+    def remaining_cuts(self) -> Optional[int]:
+        """
+        Remaining number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        return self.source_cuts.remaining_cuts
+
+    @property
+    def num_cuts(self) -> Optional[int]:
+        """
+        Total number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        if self.source_cuts.is_lazy:
+            return None
+        return len(self.source_cuts)
+
     def __iter__(self) -> "CutPairsSampler":
         """
         Prepare the dataset for iterating over a new epoch. Will shuffle the data if requested.
diff --git a/lhotse/dataset/sampling/data_source.py b/lhotse/dataset/sampling/data_source.py
index 43294720f..fbd4a92ba 100644
--- a/lhotse/dataset/sampling/data_source.py
+++ b/lhotse/dataset/sampling/data_source.py
@@ -1,6 +1,6 @@
 import random
 from collections import deque
-from typing import Generator, Iterable
+from typing import Generator, Iterable, Optional
 
 from lhotse import CutSet
 from lhotse.cut import Cut
@@ -18,6 +18,27 @@ def __init__(self, items: CutSet):
         self._shuffled_items = self._orig_items
         self._iter = None
         self._reusable = deque()
+        # Add duration tracking for non-lazy CutSets
+        if not self.is_lazy:
+            self._total_duration = sum(c.duration for c in self._orig_items)
+            self._total_cuts = len(self._orig_items)
+        else:
+            self._total_duration = None
+            self._total_cuts = None
+        self._remaining_duration = self._total_duration
+        self.remaining_cuts = self._total_cuts
+
+    @property
+    def is_lazy(self) -> bool:
+        return self._orig_items.is_lazy
+
+    @property
+    def remaining_duration(self) -> Optional[float]:
+        # Paranoia mode: float arithmetic is imprecise, so we'll make sure
+        # that the returned duration is non-negative.
+        if self._remaining_duration is None:
+            return None
+        return max(0, self._remaining_duration)
 
     def shuffle(self, seed: int) -> "DataSource":
         """
@@ -45,11 +66,16 @@ def sort_like(self, other: "DataSource") -> "DataSource":
     def take_back(self, cut: Cut) -> None:
         """Push the cut in front of other cuts to be sampled again."""
         self._reusable.append(cut)
+        if not self.is_lazy:
+            self._remaining_duration += cut.duration
+            self.remaining_cuts += 1
 
     def reset(self) -> None:
         """Reset the iterable state of DataSource."""
         self._iter = None
         self._reusable.clear()
+        self._remaining_duration = self._total_duration
+        self.remaining_cuts = self._total_cuts
 
     def __iter__(self) -> "DataSource":
         self.reset()
@@ -58,8 +84,13 @@ def __iter__(self) -> "DataSource":
 
     def __next__(self) -> Cut:
         if self._reusable:
-            return self._reusable.popleft()
-        return next(self._iter)
+            next_cut = self._reusable.popleft()
+        else:
+            next_cut = next(self._iter)
+        if not self.is_lazy:
+            self._remaining_duration -= next_cut.duration
+            self.remaining_cuts -= 1
+        return next_cut
 
     def __len__(self) -> int:
         return len(self._shuffled_items)
diff --git a/lhotse/dataset/sampling/single_cut.py b/lhotse/dataset/sampling/single_cut.py
index d36f352d7..10b6547b2 100644
--- a/lhotse/dataset/sampling/single_cut.py
+++ b/lhotse/dataset/sampling/single_cut.py
@@ -78,6 +78,32 @@ def __init__(
         # Constraints
         assert is_none_or_gt(self.max_cuts, 0)
 
+    @property
+    def remaining_duration(self) -> Optional[float]:
+        """
+        Remaining duration of data left in the sampler (may be inexact due to float arithmetic).
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        return self.data_source.remaining_duration
+
+    @property
+    def remaining_cuts(self) -> Optional[int]:
+        """
+        Remaining number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        return self.data_source.remaining_cuts
+
+    @property
+    def num_cuts(self) -> Optional[int]:
+        """
+        Total number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+        """
+        if self.data_source.is_lazy:
+            return None
+        return len(self.data_source)
+
     def __iter__(self) -> "SingleCutSampler":
         """
         Prepare the dataset for iterating over a new epoch. Will shuffle the data if requested.
diff --git a/lhotse/dataset/sampling/zip.py b/lhotse/dataset/sampling/zip.py
index 73ccced8b..d98600839 100644
--- a/lhotse/dataset/sampling/zip.py
+++ b/lhotse/dataset/sampling/zip.py
@@ -1,6 +1,6 @@
 from functools import reduce
 from operator import add
-from typing import Callable
+from typing import Callable, Optional
 
 from lhotse import CutSet
 from lhotse.cut import Cut
@@ -32,6 +32,44 @@ def __init__(self, *samplers: CutSampler) -> None:
         super().__init__()
         self.samplers = samplers
 
+    @property
+    def remaining_duration(self) -> Optional[float]:
+        """
+        Remaining duration of data left in the sampler (may be inexact due to float arithmetic).
+
+        .. note: For ZipSampler, it's the minimum of remaining durations in its sub-samplers.
+        """
+        try:
+            return min(s.remaining_duration for s in self.samplers)
+        except TypeError:
+            return None
+
+    @property
+    def remaining_cuts(self) -> Optional[int]:
+        """
+        Remaining number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+
+        .. note: For ZipSampler, it's the minimum of remaining cuts in its sub-samplers.
+        """
+        try:
+            return min(s.remaining_cuts for s in self.samplers)
+        except TypeError:
+            return None
+
+    @property
+    def num_cuts(self) -> Optional[int]:
+        """
+        Total number of cuts in the sampler.
+        Not available when the CutSet is read in lazy mode (returns None).
+
+        .. note: For ZipSampler, it's the minimum of num cuts in its sub-samplers.
+        """
+        try:
+            return min(s.num_cuts for s in self.samplers)
+        except TypeError:
+            return None
+
     def __iter__(self):
         for sampler in self.samplers:
             iter(sampler)
diff --git a/test/dataset/test_sampling.py b/test/dataset/test_sampling.py
index 9ffd869dc..83463c48b 100644
--- a/test/dataset/test_sampling.py
+++ b/test/dataset/test_sampling.py
@@ -1,5 +1,6 @@
 import random
 from itertools import groupby
+from math import isclose
 from statistics import mean
 from tempfile import NamedTemporaryFile
 
@@ -122,23 +123,22 @@ def test_single_cut_sampler_order_differs_between_epochs():
         last_order = new_order
 
 
+@pytest.mark.xfail(
+    reason="len(sampler) is incorrect as it caches the num_buckets value "
+           "for a particular cut ordering -- if the cuts are re-shuffled, "
+           "the actual len might differ."
+)
 def test_single_cut_sampler_len():
     # total duration is 55 seconds
     # each second has 100 frames
-    cuts = CutSet.from_cuts(
-        dummy_cut(idx, duration=float(idx))
-        for idx in range(1, 11)
-    )
-    sampler = SingleCutSampler(
-        cuts,
-        shuffle=True,
-        max_frames=10 * 100,
-        max_cuts=6
-    )
+    cuts = CutSet.from_cuts(dummy_cut(idx, duration=float(idx)) for idx in range(1, 11))
+    sampler = SingleCutSampler(cuts, shuffle=True, max_frames=10 * 100, max_cuts=6)
 
     for epoch in range(5):
-        assert len(sampler) == len([batch for batch in sampler])
         sampler.set_epoch(epoch)
+        sampler_len = len(sampler)
+        num_batches = len([batch for batch in sampler])
+        assert sampler_len == num_batches
 
 
 def test_single_cut_sampler_low_max_frames(libri_cut_set):
@@ -286,13 +286,15 @@ def test_cut_pairs_sampler_order_differs_between_epochs():
         last_order = new_order
 
 
+@pytest.mark.xfail(
+    reason="len(sampler) is incorrect as it caches the num_buckets value "
+           "for a particular cut ordering -- if the cuts are re-shuffled, "
+           "the actual len might differ."
+)
 def test_cut_pairs_sampler_len():
     # total duration is 55 seconds
     # each second has 100 frames
-    cuts = CutSet.from_cuts(
-        dummy_cut(idx, duration=float(idx))
-        for idx in range(1, 11)
-    )
+    cuts = CutSet.from_cuts(dummy_cut(idx, duration=float(idx)) for idx in range(1, 11))
     sampler = CutPairsSampler(
         source_cuts=cuts,
         target_cuts=cuts,
@@ -351,6 +353,15 @@ def test_bucketing_sampler_single_cuts():
     assert set(cut_set.ids) == set(c.id for c in sampled_cuts)
 
 
+def test_bucketing_sampler_single_cuts_no_proportional_sampling():
+    cut_set = DummyManifest(CutSet, begin_id=0, end_id=1000)
+    sampler = BucketingSampler(cut_set, proportional_sampling=False, sampler_type=SingleCutSampler)
+    sampled_cuts = []
+    for batch in sampler:
+        sampled_cuts.extend(batch)
+    assert set(cut_set.ids) == set(c.id for c in sampled_cuts)
+
+
 def test_bucketing_sampler_single_cuts_equal_len():
     cut_set = DummyManifest(CutSet, begin_id=0, end_id=1000)
     for idx, c in enumerate(cut_set):
@@ -915,3 +926,28 @@ def test_streaming_shuffle(datasize, bufsize):
     assert len(data) == len(shuffled)
     assert len(shuffled) == len(set(shuffled))
     assert data != shuffled
+
+
+@pytest.mark.parametrize(
+    "sampler",
+    [
+        SingleCutSampler(DummyManifest(CutSet, begin_id=0, end_id=10)),
+        CutPairsSampler(
+            DummyManifest(CutSet, begin_id=0, end_id=10),
+            DummyManifest(CutSet, begin_id=0, end_id=10),
+        ),
+        BucketingSampler(DummyManifest(CutSet, begin_id=0, end_id=10)),
+        ZipSampler(
+            SingleCutSampler(DummyManifest(CutSet, begin_id=0, end_id=10)),
+            SingleCutSampler(DummyManifest(CutSet, begin_id=10, end_id=20)),
+        ),
+    ],
+)
+def test_sampler_properties(sampler):
+    assert sampler.remaining_cuts == 10
+    assert isclose(sampler.remaining_duration, 10.0)
+    assert sampler.num_cuts == 10
+    batches = [b for b in sampler]
+    assert sampler.remaining_cuts == 0
+    assert isclose(sampler.remaining_duration, 0.0)
+    assert sampler.num_cuts == 10