Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/torchcodec/_core/SingleStreamDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ void SingleStreamDecoder::initializeDecoder() {
ptsToSeconds(formatContext_->duration, defaultTimeBase);
}

// Use container duration as fallback for streams missing stream duration
if (containerMetadata_.durationSecondsFromHeader.has_value()) {
for (auto& streamMetadata : containerMetadata_.allStreamMetadata) {
if (!streamMetadata.durationSecondsFromHeader.has_value()) {
streamMetadata.durationSecondsFromHeader =
containerMetadata_.durationSecondsFromHeader;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than assigning the container's duration to the stream's duration, I think we should do something like:

if (!streamMetadata.durationSectonsFromHeader.has_value()) {
  return containerMetadata_.durationSecondsFromHeader;
}

I think that keeps it clear where we're getting our information from. So far, we've kept that explicit in our existing metadata fields.

One wrinkle here is that the metadata we report to users in Python is VideoStreamMetadata. That is, it's explicitly the stream metadata. We don't have a hierarchically clean way to add the container metadata there. So while it's kinda odd, maybe we should add a field for duration_seconds_from_container on the Python side.

To be clear, my current thinking is that:

  1. In C++, we fall back to the container metadata where appropriate. This may be awkward because we don't pass around the container metadata, so we may want to add a durationSecondsFromContainer field to StreamMetadata.
  2. In Python, we should add a field for duration_seconds_from_container to StreamMetadata, and we have logic to fall back to it in duration_seconds.

@NicolasHug, @Dan-Flores, I'm curious to hear your thoughts as well.

}
}
}

if (formatContext_->bit_rate > 0) {
containerMetadata_.bitRate = formatContext_->bit_rate;
}
Expand Down
11 changes: 0 additions & 11 deletions test/test_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
SINE_MONO_S32,
SINE_MONO_S32_44100,
SINE_MONO_S32_8000,
supports_approximate_mode,
TEST_SRC_2_720P,
TEST_SRC_2_720P_H265,
TEST_SRC_2_720P_MPEG4,
Expand Down Expand Up @@ -1492,8 +1491,6 @@ def test_get_frames_at_tensor_indices(self):
def test_beta_cuda_interface_get_frame_at(
self, asset, contiguous_indices, seek_mode
):
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")

if in_fbcode() and asset is AV1_VIDEO:
pytest.skip("AV1 CUDA not supported internally")
Expand Down Expand Up @@ -1540,8 +1537,6 @@ def test_beta_cuda_interface_get_frame_at(
def test_beta_cuda_interface_get_frames_at(
self, asset, contiguous_indices, seek_mode
):
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")
if in_fbcode() and asset is AV1_VIDEO:
pytest.skip("AV1 CUDA not supported internally")

Expand Down Expand Up @@ -1585,8 +1580,6 @@ def test_beta_cuda_interface_get_frames_at(
)
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_get_frame_played_at(self, asset, seek_mode):
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")
if in_fbcode() and asset is AV1_VIDEO:
pytest.skip("AV1 CUDA not supported internally")

Expand Down Expand Up @@ -1627,8 +1620,6 @@ def test_beta_cuda_interface_get_frame_played_at(self, asset, seek_mode):
)
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_get_frames_played_at(self, asset, seek_mode):
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")
if in_fbcode() and asset is AV1_VIDEO:
pytest.skip("AV1 CUDA not supported internally")

Expand Down Expand Up @@ -1670,8 +1661,6 @@ def test_beta_cuda_interface_get_frames_played_at(self, asset, seek_mode):
)
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_backwards(self, asset, seek_mode):
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")
if in_fbcode() and asset is AV1_VIDEO:
pytest.skip("AV1 CUDA not supported internally")

Expand Down
7 changes: 0 additions & 7 deletions test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,10 +798,3 @@ def sample_format(self) -> str:
},
frames={0: {}}, # Not needed for now
)


def supports_approximate_mode(asset: TestVideo) -> bool:
# Those are missing the `duration` field so they fail in approximate mode (on all devices).
# TODO: we should address this, see
# https://github.com/meta-pytorch/torchcodec/issues/945
return asset not in (AV1_VIDEO, TEST_SRC_2_720P_VP9, TEST_SRC_2_720P_VP8)
Loading