diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 4e7ae440..a5c0fddf 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -732,6 +732,18 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() { int firstActiveStreamIndex = *activeStreamIndices_.begin(); const auto& firstStreamInfo = streams_[firstActiveStreamIndex]; int64_t desiredPts = *maybeDesiredPts_ * firstStreamInfo.timeBase.den; + + // For some encodings like H265, FFMPEG sometimes seeks past the point we + // set as the max_ts. So we use our own index to give it the exact pts of + // the key frame that we want to seek to. + // See https://github.com/pytorch/torchcodec/issues/179 for more details. + // See https://trac.ffmpeg.org/ticket/11137 for the underlying ffmpeg bug. + if (!firstStreamInfo.keyFrames.empty()) { + int desiredKeyFrameIndex = + getKeyFrameIndexForPts(firstStreamInfo, desiredPts); + desiredPts = firstStreamInfo.keyFrames[desiredKeyFrameIndex].pts; + } + int ffmepgStatus = avformat_seek_file( formatContext_.get(), firstStreamInfo.streamIndex, diff --git a/test/decoders/test_simple_video_decoder.py b/test/decoders/test_simple_video_decoder.py index ff87a63d..1187efbf 100644 --- a/test/decoders/test_simple_video_decoder.py +++ b/test/decoders/test_simple_video_decoder.py @@ -321,12 +321,9 @@ def test_get_frame_displayed_at(self): assert isinstance(decoder.get_frame_displayed_at(6.02).duration_seconds, float) def test_get_frame_displayed_at_h265(self): + # Non-regression test for https://github.com/pytorch/torchcodec/issues/179 decoder = SimpleVideoDecoder(H265_VIDEO.path) - # Note that for H265, FFMPEG's seeking is not precise. Even though we ask to - # seek with a max_ts=0.5, FFMPEG will seek beyond that point. - # TODO: Revert use frame5 in the test below once it's fixed upstream: - # https://trac.ffmpeg.org/ticket/11137 - ref_frame6 = H265_VIDEO.get_frame_by_name("frame000006") + ref_frame6 = H265_VIDEO.get_frame_by_name("frame000005") assert_tensor_equal(ref_frame6, decoder.get_frame_displayed_at(0.5).data) def test_get_frame_displayed_at_fails(self): diff --git a/test/generate_reference_resources.sh b/test/generate_reference_resources.sh index 1fdb84de..ccc7262e 100755 --- a/test/generate_reference_resources.sh +++ b/test/generate_reference_resources.sh @@ -47,7 +47,7 @@ ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3" # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4 -FRAMES=(6) +FRAMES=(5) for frame in "${FRAMES[@]}"; do frame_name=$(printf "%06d" "$frame") ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.frame$frame_name.bmp" diff --git a/test/resources/h265_video.mp4.frame000006.pt b/test/resources/h265_video.mp4.frame000005.pt similarity index 79% rename from test/resources/h265_video.mp4.frame000006.pt rename to test/resources/h265_video.mp4.frame000005.pt index 1fda757c..5afa28ba 100644 Binary files a/test/resources/h265_video.mp4.frame000006.pt and b/test/resources/h265_video.mp4.frame000005.pt differ