From ba3ed26408485bcbccaa206148d7cee3a6bc57d4 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 17:56:04 +0800
Subject: [PATCH 01/41] New upstream version 4.4.1

---
 Changelog                              | 191 ++++++++++++++++++++++++-
 RELEASE                                |   2 +-
 RELEASE_NOTES                          |   2 +-
 VERSION                                |   2 +-
 configure                              |   4 +-
 doc/Doxyfile                           |   2 +-
 doc/writing_filters.txt                |   2 +-
 ffbuild/common.mak                     |   2 +-
 fftools/ffmpeg.c                       |  19 ++-
 libavcodec/aaccoder.c                  |   8 +-
 libavcodec/aacdec_template.c           |   7 +-
 libavcodec/aacenc.c                    |   5 +-
 libavcodec/aacpsy.c                    |   5 +-
 libavcodec/aarch64/hevcdsp_idct_neon.S |  11 +-
 libavcodec/alsdec.c                    |   9 +-
 libavcodec/apedec.c                    |  10 +-
 libavcodec/argo.c                      |   7 +-
 libavcodec/av1_metadata_bsf.c          |  16 ++-
 libavcodec/cbs_h265_syntax_template.c  |   2 +-
 libavcodec/clearvideo.c                |   4 +-
 libavcodec/cpia.c                      |   1 +
 libavcodec/crystalhd.c                 |   1 +
 libavcodec/cuviddec.c                  |   1 +
 libavcodec/decode.c                    |  73 +++++-----
 libavcodec/dnxhddec.c                  |  22 ++-
 libavcodec/dpx.c                       |   5 +-
 libavcodec/dxva2_av1.c                 |   2 +-
 libavcodec/exr.c                       |  19 ++-
 libavcodec/faxcompr.c                  |  18 ++-
 libavcodec/flac_parser.c               |   3 +-
 libavcodec/flicvideo.c                 |   2 +
 libavcodec/frame_thread_encoder.c      |  11 +-
 libavcodec/frame_thread_encoder.h      |   4 +
 libavcodec/h263.c                      |  12 ++
 libavcodec/h263.h                      |   1 +
 libavcodec/h263data.c                  |  14 --
 libavcodec/h263data.h                  |   1 -
 libavcodec/h264_slice.c                |   3 +
 libavcodec/h264dec.c                   |   2 +-
 libavcodec/hevc_sei.c                  |   2 +-
 libavcodec/iff.c                       |   3 +-
 libavcodec/internal.h                  |   5 +
 libavcodec/j2kenc.c                    |   2 +-
 libavcodec/jpeg2000dec.c               |   2 +
 libavcodec/jpeglsdec.c                 |   2 +-
 libavcodec/libdav1d.c                  |  20 +++
 libavcodec/lpc.c                       |   2 +-
 libavcodec/lpc.h                       |   7 +-
 libavcodec/mjpegbdec.c                 |  10 +-
 libavcodec/mjpegdec.c                  |   3 +
 libavcodec/mpeg12dec.c                 |   8 +-
 libavcodec/mpegvideo_enc.c             |   7 +-
 libavcodec/mxpegdec.c                  |   3 +
 libavcodec/nellymoserenc.c             |   6 +-
 libavcodec/nvenc.c                     | 142 +++++++++---------
 libavcodec/nvenc.h                     |   2 +-
 libavcodec/nvenc_hevc.c                |   2 +-
 libavcodec/pngdec.c                    |   6 +-
 libavcodec/rv10.c                      |   8 +-
 libavcodec/sbrdsp_fixed.c              |   2 +-
 libavcodec/snow.h                      |   1 +
 libavcodec/snowdec.c                   |  17 ++-
 libavcodec/svq1enc.c                   |   7 +-
 libavcodec/ttadata.c                   |   3 +-
 libavcodec/ttadsp.c                    |   6 +-
 libavcodec/ttmlenc.c                   |   2 +-
 libavcodec/utils.c                     |  49 +++++--
 libavcodec/vaapi_av1.c                 |   2 +-
 libavcodec/vc1.c                       |   5 +
 libavcodec/vc1dec.c                    |  10 +-
 libavcodec/vc2enc.c                    |   2 +
 libavcodec/videotoolboxenc.c           |   4 +
 libavcodec/webp.c                      |   3 +
 libavcodec/wma.c                       |   4 +-
 libavcodec/wmadec.c                    |  11 +-
 libavcodec/wmaprodec.c                 |  13 +-
 libavcodec/xpmdec.c                    |  14 +-
 libavfilter/af_drmeter.c               |   5 +
 libavfilter/f_metadata.c               |   3 -
 libavfilter/vf_ciescope.c              |   3 +-
 libavfilter/vf_dctdnoiz.c              |   3 +
 libavfilter/vf_fftdnoiz.c              |   2 +-
 libavfilter/vf_mestimate.c             |   3 +
 libavfilter/vf_overlay_cuda.c          |  12 +-
 libavfilter/vf_scale.c                 |  12 ++
 libavfilter/vf_scale_npp.c             |   5 +-
 libavfilter/vf_vmafmotion.c            |   3 +
 libavfilter/vf_yadif.c                 |  32 +++--
 libavformat/aaxdec.c                   |  15 +-
 libavformat/adtsenc.c                  |   6 +-
 libavformat/aiffdec.c                  |   3 +
 libavformat/asfdec_f.c                 |   2 +-
 libavformat/asfdec_o.c                 |  21 +--
 libavformat/avidec.c                   |  11 +-
 libavformat/avio.c                     |   7 +-
 libavformat/bfi.c                      |   2 +
 libavformat/cafdec.c                   |   2 +-
 libavformat/dsfdec.c                   |   2 +-
 libavformat/dsicin.c                   |   7 +-
 libavformat/dxa.c                      |   2 +-
 libavformat/fifo.c                     |   2 +-
 libavformat/ftp.c                      |   2 +
 libavformat/hlsenc.c                   |  11 +-
 libavformat/id3v2.c                    |   6 +-
 libavformat/iff.c                      |   2 +-
 libavformat/jacosubdec.c               |   3 +
 libavformat/matroskadec.c              |  22 +--
 libavformat/matroskaenc.c              |   1 +
 libavformat/mccdec.c                   |   3 +-
 libavformat/moflex.c                   |   3 -
 libavformat/mov.c                      |  32 ++++-
 libavformat/movenc.c                   |  21 ++-
 libavformat/mpc8.c                     |  13 +-
 libavformat/mpegts.c                   |   5 +-
 libavformat/msf.c                      |   2 +
 libavformat/mvdec.c                    |   5 +-
 libavformat/mvi.c                      |  14 +-
 libavformat/mxfdec.c                   |   8 +-
 libavformat/nutdec.c                   |   5 +
 libavformat/pp_bnk.c                   |   2 +-
 libavformat/qcp.c                      |   3 +-
 libavformat/realtextdec.c              |   3 +-
 libavformat/rmdec.c                    |  16 ++-
 libavformat/rpl.c                      |  10 +-
 libavformat/sbgdec.c                   |  10 ++
 libavformat/subtitles.c                |   2 +-
 libavformat/tta.c                      |   2 +
 libavformat/utils.c                    |  12 +-
 libavformat/wavdec.c                   |   4 +-
 libavformat/wtvdec.c                   |   6 +-
 libavutil/cpu.c                        |   6 +
 libavutil/mathematics.h                |   1 +
 libswscale/alphablend.c                |  32 +++--
 libswscale/slice.c                     |   5 +-
 tests/ref/fate/ts-opus-demux           |   2 +-
 tools/cws2fws.c                        |   6 +
 136 files changed, 950 insertions(+), 397 deletions(-)

diff --git a/Changelog b/Changelog
index a96e350e096..a6508cd8ac0 100644
--- a/Changelog
+++ b/Changelog
@@ -1,7 +1,196 @@
 Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
-version <next>:
+version 4.4.1:
+- avcodec/flac_parser: Consider AV_INPUT_BUFFER_PADDING_SIZE
+- avcodec/ttadsp: Fix integer overflows in tta_filter_process_c()
+- avutil/mathematics: Document av_rescale_rnd() behavior on non int64 results
+- avcodec/utils: Ensure 8x8 alignment for ARGO in avcodec_align_dimensions2()
+- avformat/matroskadec: Reset state also on failure in matroska_reset_status()
+- avformat/wavdec: Check smv_block_size
+- avformat/rmdec: Check for multiple audio_stream_info
+- avcodec/apedec: Use 64bit to avoid overflow
+- avcodec/apedec: Fix undefined integer overflow in long_filter_ehigh_3830()
+- oavformat/avidec: Check offset in odml
+- avformat/mpegts: use actually read packet size in mpegts_resync special case
+- fftools/ffmpeg: Fix crash when flushing non-fully setup output stream
+- avfilter/scale_npp: fix non-aligned output frame dimensions
+- Revert "avformat/hlsenc: compute video_keyframe_size after write keyframe"
+- Changelog: update
+- swscale/alphablend: Fix slice handling
+- avcodec/apedec: Fix integer overflow in filter_fast_3320()
+- avformat/mov: Fix last mfra check
+- avcodec/mxpegdec: Check for AVDISCARD_ALL
+- avcodec/flicvideo: Check remaining bytes in FLI*COPY
+- avcodec/utils: ARGO writes 4x4 blocks without regard to the image dimensions
+- avcodec/cbs_h265_syntax_template: Limit sps_num_palette_predictor_initializer_minus1 to 127
+- avcodec/snowdec: Maintain avmv buffer
+- avcodec/mpeg12dec: Do not put mpeg_f_code into an invalid state on error return
+- avcodec/mpegvideo_enc: Limit bitrate tolerance to the representable
+- avcodec/apedec: Fix integer overflow in intermediate
+- avformat/mvdec: Do not set invalid sample rate
+- avformat/sbgdec: Check for t0 overflow in expand_tseq()
+- avformat/rmdec: Use 64bit for intermediate for DEINT_ID_INT4
+- avformat/sbgdec: Check opt_duration and start for overflow
+- avcodec/exr: Fix undefined integer multiplication
+- avformat/mov: Check for duplicate clli
+- avformat/utils: Ignore negative duration in codec_info_duration computation
+- avformat/jacosubdec: Check for min in t overflow in get_shift()
+- avformat/mxfdec: check channel number in mxf_get_d10_aes3_packet()
+- (origin/release/4.4) avcodec/wmadec: handle run_level_decode error
+- avcodec/wma: Return specific error code
+- avcodec/dxva2_av1: fix superres_denom parameter
+- avcodec/libdav1d: fix compilation after recent libdav1d API changes
+- Changelog: update
+- avcodec/utils: don't return negative values in av_get_audio_frame_duration()
+- avcodec/jpeg2000dec: Check that atom header is within bytsetream
+- avcodec/apedec: Fix 2 integer overflows in filter_3800()
+- avcodec/xpmdec: Move allocations down after more error checks
+- avcodec/argo: Move U, fix shift
+- avformat/mov: Check dts for overflow in mov_read_trun()
+- avformat/avidec: Use 64bit for frame number in odml index parsing
+- avcodec/mjpegbdec: Skip SOS on AVDISCARD_ALL as does mjpeg
+- avcodec/mjpegdec: Check for bits left in mjpeg_decode_scan_progressive_ac()
+- avformat/adtsenc: return value check for init_get_bits in adts_decode_extradata
+- avcodec/webp: Check available space in loop in decode_entropy_coded_image()
+- avcodec/h264dec: use picture parameters in ff_print_debug_info2()
+- avcodec/vc1dec: ff_print_debug_info() does not support WMV3 field_mode
+- avcodec/frame_thread_encoder: Free AVCodecContext structure on error during init
+- avcodec/faxcompr: Check for end of input in cmode == 1 in decode_group3_2d_line()
+- avcodec/vc1dec: Disable error concealment for *IMAGE
+- avcodec/sbrdsp_fixed: Fix negation overflow in sbr_neg_odd_64_c()
+- avcodec/argo: Check for even dimensions
+- avformat/wtvdec: Check for EOF before seeking back in parse_media_type()
+- avformat/mpc8: Check first keyframe position for overflow
+- avcodec/exr: Check ac_count
+- avformat/wavdec: Use 64bit in new_pos computation
+- avformat/sbgdec: Check for overflow in timestamp preparation
+- avformat/dsicin: Check packet size for overflow
+- avformat/dsfdec: Change order of operations in bitrate computation
+- avformat/bfi: check nframes
+- avformat/avidec: fix position overflow in avi_load_index()
+- avformat/asfdec_f: Check sizeX against padding
+- avformat/aiffdec: Check for size overflow in header parsing
+- avcodec/aaccoder: Add minimal bias in search_for_ms()
+- avformat/mov: Fix incorrect overflow detection in mov_read_sidx()
+- avformat/mov: Avoid undefined overflow in time_offset calculation
+- avfilter/af_drmeter: Check that there is data
+- avfilter/vf_fftdnoiz: Use lrintf() in export_row8()
+- avfilter/vf_mestimate: Check b_count
+- avformat/mov: do not ignore errors in mov_metadata_hmmt()
+- avformat/mxfdec: Check size for shrinking
+- avcodec/dnxhddec: check and propagate function return value
+- swscale/slice: Fix wrong return on error
+- avcodec/aacdec_template: Avoid some invalid values to be set by decode_audio_specific_config_gb()
+- swscale/slice: Check slice for allocation failure
+- avformat/matroskadec: Fix handling of huge default durations
+- avcodec/lpc: check for zero err in normalization in compute_lpc_coefs()
+- avcodec/j2kenc: Check for av_strtok() failure
+- avformat/ftp: Check for av_strtok() failure
+- tools/cws2fws: Check read() for failure
+- avcodec/cpia: Fix missing src_size update
+- avcodec/exr: Better size checks
+- avcodec/clearvideo: Check tile_size to be not too large
+- avcodec/utils: Use 64bit for intermediate in AV_CODEC_ID_ADPCM_THP* duration calculation
+- avformat/aaxdec: Check avio_seek() in header reading
+- avcodec/hevc_sei: Use get_bits_long() for time_offset_value
+- avformat/rmdec: Check old_format len for overflow
+- avformat/realtextdec: Check the pts difference before using it for the duration computation
+- avformat/qcp: Avoid negative nb_rates
+- avformat/pp_bnk: Use 64bit in bitrate computation
+- avformat/nutdec: Check tmp_size
+- avformat/msf: Check that channels doesnt overflow during extradata construction
+- avformat/subtitles: Check pts difference before use
+- avformat/mpc8: Check for position overflow in mpc8_handle_chunk()
+- avformat/mccdec: Fix overflows in num/den
+- avformat/iff: Use 64bit in duration computation
+- avformat/dxa: Check fps to be within the supported range more precissely
+- avcodec/iff: Only write palette to plane 1 if its PAL8
+- avformat/tta: Check for EOF in index reading loop
+- avfilter/vf_scale: set the RGB matrix coefficients in case of RGB
+- avfilter/vf_scale: reset color matrix in case of identity & non-RGB
+- ffmpeg: fix order between field order autodetection and override
+- avcodec/h264_slice: clear old slice POC values on parsing failure
+- avfilter/f_metadata: do not return the frame early if there is no metadata
+- ffbuild: Avoid using the --preprocessor argument to windres
+- avcodec/crystalhd: signal that the decoder sets all output frame properties
+- avcodec/cuviddec: signal that the decoder sets all output frame properties
+- avcodec/decode: reindent after the previous commit
+- avcodec/decode: add an internal codec flag to signal a decoder sets all output frame properties
+- avcodec/decode: fetch packets from the pkt_props FIFO on every frame returned
+- Update missed irc links
+- avformat/rpl: The associative law doesnt hold for signed integers in C
+- avcodec/faxcompr: Check available bits in decode_uncompressed()
+- avcodec/faxcompr: Check if bits are available before reading in cmode == 9 || cmode == 10
+- avformat/utils: Avoid overflow in codec_info_duration computation for subtitles
+- avformat/utils: check dts/duration to be representable before using them
+- avcodec/utils: do "calc from frame_bytes, channels, and block_align" in 64bit
+- avcodec/ttadata: Add sentinel at the end of ff_tta_shift_1
+- avformat/mov: Check for duplicate mdcv
+- avfilter/vf_dctdnoiz: Check threads
+- avfilter/vf_ciescope: Fix undefined behavior in rgb_to_xy() with black
+- avcodec/dpx: fix off by 1 in bits_per_color check
+- avformat/rpl: Check for EOF and zero framesize
+- avcodec/vc2enc: Check for non negative slice bounds
+- avformat/rpl: Use 64bit in bitrate computation and check it
+- avcodec/mpegvideo_enc: Reset stuffing bits if they are not supported
+- avcodec/svq1enc: Do not print debug RD value before it has been computed
+- avcodec/aacpsy: Check bandwidth
+- avcodec/aacenc: Do not divide by lambda_count if it is 0
+- avcodec/aacenc: Use FLT_EPSILON for lambda minimum
+- avfilter/vf_yadif: Fix handing of tiny images
+- avfilter/vf_vmafmotion: Check dimensions
+- avformat/movenc: Check pal_size before use
+- avcodec/lpc: Avoid floating point division by 0
+- avcodec/aacpsy: Avoid floating point division by 0 of norm_fac
+- avcodec/aacenc: Avoid 0 lambda
+- avcodec/exr: More strictly check dc_count
+- avcodec/exr: x/ymax cannot be INT_MAX
+- avformat/avio: Check av_opt_copy() for failure
+- avformat/moflex: Remove unneeded format variable
+- avformat/fifo: check for flushed packets and timeshift
+- avcodec/clearvideo: Check for 0 tile_shift
+- avcodec/vc1: Check remaining bits in ff_vc1_parse_frame_header()
+- avformat/mov: Ignore duplicate CoLL
+- avformat/mov: Limit nb_chapter_tracks to input size
+- avformat/utils: Use 64bit earlier in r_frame_rate check
+- avcodec/alsdec: Fix decoding error with mono audio files
+- avformat/mvdec: Check sample rate in parse_audio_var()
+- avcodec/faxcompr: Check for end of bitstream in decode_group3_1d_line() and decode_group3_2d_line()
+- avcodec/utils: treat PAL8 for jpegs similar to other colorspaces
+- avcodec/jpeglsdec: Set alpha plane in PAL8 so image is not 100% transparent
+- avformat/asfdec_o: Use ff_get_extradata()
+- avformat/id3v2: Check end for overflow in id3v2_parse()
+- avformat/mxfdec: Fix file position addition
+- avformat/wtvdec: Improve size overflow checks in parse_chunks()
+- avcodec/faxcompr: Check remaining bits on error in decode_group3_1d_line()
+- avformat/mov: check for pts overflow in mov_read_sidx()
+- avcodec/utils: Check ima wav duration for overflow
+- avcodec/rv10: Execute whole size check earlier for rv20
+- avformat/cafdec: Check channels
+- avcodec/exr: increase vlc depth
+- avcodec/dpx: Check bits_per_color earlier
+- avformat/mvi: Check audio_data_size to be non negative
+- avcodec/nvenc: disable s12m timestamps by default
+- aarch64: hevc_idct: Fix overflows in idct_dc
+- avcodec/vaapi_av1: pass full buffer size for each tile
+- avcodec/videotoolboxenc: #define TARGET_CPU_ARM64 to 0 if not provided by the SDK
+- lavc/pngdec: fix updating reference frames for APNG_DISPOSE_OP_BACKGROUND
+- ffmpeg: return no chosen output if an uninitialized stream is unavailable
+- avcodec/h263, h263data: Move ff_h263_init_rl_inter to h263.c
+- configure: Add missing mpegvideo dependency for IPU decoder
+- avcodec/ttmlenc: Don't confuse capabilities and caps_internal
+- avformat/mpegts: add missing sample_rate value to Opus extradata
+- avformat/movenc: fix writing dOps atoms
+- avcodec/av1_metadata: don't store the inserted TD OBU in stack
+- avcodec/nellymoserenc: Fix segfault when using unsupported channels/rate
+- avutil/cpu: Use HW_NCPUONLINE to detect # of online CPUs with OpenBSD
+- avcodec/nvenc: fix lossless tuning logic
+- avfilter/overlay_cuda: check av_buffer_ref result
+- avfilter/overlay_cuda: hold explicit reference to hw_device_ctx
+- avformat/matroskaenc: Fix leak when writing attachment without filename
+
+version 4.4:
 - AudioToolbox output device
 - MacCaption demuxer
 - PGX decoder
diff --git a/RELEASE b/RELEASE
index 515be8f918d..cca25a93cd0 100644
--- a/RELEASE
+++ b/RELEASE
@@ -1 +1 @@
-4.4
+4.4.1
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 67339dca853..312ec6f2191 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -11,5 +11,5 @@
 
    We hope you will like this release as much as we enjoyed working on it, and
    as usual, if you have any questions about it, or any FFmpeg related topic,
-   feel free to join us on the #ffmpeg IRC channel (on irc.freenode.net) or ask
+   feel free to join us on the #ffmpeg IRC channel (on irc.libera.chat) or ask
    on the mailing-lists.
diff --git a/VERSION b/VERSION
index 515be8f918d..cca25a93cd0 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-4.4
+4.4.1
diff --git a/configure b/configure
index d7a3f507e83..4ba72bf84b6 100755
--- a/configure
+++ b/configure
@@ -536,7 +536,7 @@ die(){
 
 If you think configure made a mistake, make sure you are using the latest
 version from Git.  If the latest version fails, report the problem to the
-ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.freenode.net.
+ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.libera.chat.
 EOF
     if disabled logging; then
         cat <<EOF
@@ -2761,6 +2761,7 @@ indeo3_decoder_select="hpeldsp"
 indeo4_decoder_select="ividsp"
 indeo5_decoder_select="ividsp"
 interplay_video_decoder_select="hpeldsp"
+ipu_decoder_select="mpegvideo"
 jpegls_decoder_select="mjpeg_decoder"
 jv_decoder_select="blockdsp"
 lagarith_decoder_select="llviddsp"
@@ -7501,7 +7502,6 @@ LD_LIB=$LD_LIB
 LD_PATH=$LD_PATH
 DLLTOOL=$dlltool
 WINDRES=$windres
-DEPWINDRES=$dep_cc
 DOXYGEN=$doxygen
 LDFLAGS=$LDFLAGS
 LDEXEFLAGS=$LDEXEFLAGS
diff --git a/doc/Doxyfile b/doc/Doxyfile
index c4f2a01d0d0..f7efc438036 100644
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = FFmpeg
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 4.4
+PROJECT_NUMBER         = 4.4.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/doc/writing_filters.txt b/doc/writing_filters.txt
index 2e25cbed71b..e89d7d1e05e 100644
--- a/doc/writing_filters.txt
+++ b/doc/writing_filters.txt
@@ -418,4 +418,4 @@ done:
 
 When all of this is done, you can submit your patch to the ffmpeg-devel
 mailing-list for review.  If you need any help, feel free to come on our IRC
-channel, #ffmpeg-devel on irc.freenode.net.
+channel, #ffmpeg-devel on irc.libera.chat.
diff --git a/ffbuild/common.mak b/ffbuild/common.mak
index e070b6b5e2f..164a43932d3 100644
--- a/ffbuild/common.mak
+++ b/ffbuild/common.mak
@@ -90,7 +90,7 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
 	-$(if $(ASMSTRIPFLAGS), $(STRIP) $(ASMSTRIPFLAGS) $@)
 
 %.o: %.rc
-	$(WINDRES) $(IFLAGS) --preprocessor "$(DEPWINDRES) -E -xc-header -DRC_INVOKED $(CC_DEPFLAGS)" -o $@ $<
+	$(WINDRES) $(IFLAGS) $(foreach ARG,$(CC_DEPFLAGS),--preprocessor-arg "$(ARG)") -o $@ $<
 
 %.i: %.c
 	$(CC) $(CCFLAGS) $(CC_E) $<
diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 46bb014de89..dec012a299b 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1974,6 +1974,9 @@ static void flush_encoders(void)
             AVPacket *pkt = ost->pkt;
             int pkt_size;
 
+            if (!pkt)
+                break;
+
             switch (enc->codec_type) {
             case AVMEDIA_TYPE_AUDIO:
                 desc   = "audio";
@@ -3463,12 +3466,7 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame)
             enc_ctx->bits_per_raw_sample = frame_bits_per_raw_sample;
         }
 
-        if (ost->top_field_first == 0) {
-            enc_ctx->field_order = AV_FIELD_BB;
-        } else if (ost->top_field_first == 1) {
-            enc_ctx->field_order = AV_FIELD_TT;
-        }
-
+        // Field order: autodetection
         if (frame) {
             if (enc_ctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME) &&
                 ost->top_field_first >= 0)
@@ -3483,6 +3481,13 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame)
                 enc_ctx->field_order = AV_FIELD_PROGRESSIVE;
         }
 
+        // Field order: override
+        if (ost->top_field_first == 0) {
+            enc_ctx->field_order = AV_FIELD_BB;
+        } else if (ost->top_field_first == 1) {
+            enc_ctx->field_order = AV_FIELD_TT;
+        }
+
         if (ost->forced_keyframes) {
             if (!strncmp(ost->forced_keyframes, "expr:", 5)) {
                 ret = av_expr_parse(&ost->forced_keyframes_pexpr, ost->forced_keyframes+5,
@@ -3950,7 +3955,7 @@ static OutputStream *choose_output(void)
                 ost->st->index, ost->st->id, ost->initialized, ost->inputs_done, ost->finished);
 
         if (!ost->initialized && !ost->inputs_done)
-            return ost;
+            return ost->unavailable ? NULL : ost;
 
         if (!ost->finished && opts < opts_min) {
             opts_min = opts;
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index baa82489b19..11b0559e1cd 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -843,25 +843,25 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
                                                     sce0->ics.swb_sizes[g],
                                                     sce0->sf_idx[w*16+g],
                                                     sce0->band_type[w*16+g],
-                                                    lambda / band0->threshold, INFINITY, &b1, NULL, 0);
+                                                    lambda / (band0->threshold + FLT_MIN), INFINITY, &b1, NULL, 0);
                         dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
                                                     R34,
                                                     sce1->ics.swb_sizes[g],
                                                     sce1->sf_idx[w*16+g],
                                                     sce1->band_type[w*16+g],
-                                                    lambda / band1->threshold, INFINITY, &b2, NULL, 0);
+                                                    lambda / (band1->threshold + FLT_MIN), INFINITY, &b2, NULL, 0);
                         dist2 += quantize_band_cost(s, M,
                                                     M34,
                                                     sce0->ics.swb_sizes[g],
                                                     mididx,
                                                     midcb,
-                                                    lambda / minthr, INFINITY, &b3, NULL, 0);
+                                                    lambda / (minthr + FLT_MIN), INFINITY, &b3, NULL, 0);
                         dist2 += quantize_band_cost(s, S,
                                                     S34,
                                                     sce1->ics.swb_sizes[g],
                                                     sididx,
                                                     sidcb,
-                                                    mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
+                                                    mslambda / (minthr * bmax + FLT_MIN), INFINITY, &b4, NULL, 0);
                         B0 += b1+b2;
                         B1 += b3+b4;
                         dist1 -= b1+b2;
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 98f77a3ad74..3d7f3257db9 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1076,14 +1076,18 @@ static int decode_audio_specific_config_gb(AACContext *ac,
 {
     int i, ret;
     GetBitContext gbc = *gb;
+    MPEG4AudioConfig m4ac_bak = *m4ac;
 
-    if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0)
+    if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) {
+        *m4ac = m4ac_bak;
         return AVERROR_INVALIDDATA;
+    }
 
     if (m4ac->sampling_index > 12) {
         av_log(avctx, AV_LOG_ERROR,
                "invalid sampling rate index %d\n",
                m4ac->sampling_index);
+        *m4ac = m4ac_bak;
         return AVERROR_INVALIDDATA;
     }
     if (m4ac->object_type == AOT_ER_AAC_LD &&
@@ -1091,6 +1095,7 @@ static int decode_audio_specific_config_gb(AACContext *ac,
         av_log(avctx, AV_LOG_ERROR,
                "invalid low delay sampling rate index %d\n",
                m4ac->sampling_index);
+        *m4ac = m4ac_bak;
         return AVERROR_INVALIDDATA;
     }
 
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 070a2e706ab..be5e8e21084 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -28,6 +28,7 @@
  *              TODOs:
  * add sane pulse detection
  ***********************************/
+#include <float.h>
 
 #include "libavutil/libm.h"
 #include "libavutil/float_dsp.h"
@@ -852,7 +853,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                 /* Not so fast though */
                 ratio = sqrtf(ratio);
             }
-            s->lambda = FFMIN(s->lambda * ratio, 65536.f);
+            s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
 
             /* Keep iterating if we must reduce and lambda is in the sky */
             if (ratio > 0.9f && ratio < 1.1f) {
@@ -897,7 +898,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
 {
     AACEncContext *s = avctx->priv_data;
 
-    av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
+    av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
 
     ff_mdct_end(&s->mdct1024);
     ff_mdct_end(&s->mdct128);
diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
index fca692cb153..76458783cea 100644
--- a/libavcodec/aacpsy.c
+++ b/libavcodec/aacpsy.c
@@ -308,6 +308,9 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
     const int bandwidth    = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
     const float num_bark   = calc_bark((float)bandwidth);
 
+    if (bandwidth <= 0)
+        return AVERROR(EINVAL);
+
     ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
     if (!ctx->model_priv_data)
         return AVERROR(ENOMEM);
@@ -794,7 +797,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
 
         if (pe < 1.15f * desired_pe) {
             /* 6.6.1.3.6 "Final threshold modification by linearization" */
-            norm_fac = 1.0f / norm_fac;
+            norm_fac = norm_fac ? 1.0f / norm_fac : 0;
             for (w = 0; w < wi->num_windows*16; w += 16) {
                 for (g = 0; g < num_bands; g++) {
                     AacPsyBand *band = &pch->band[w+g];
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
index 28c11e632c7..0869431294d 100644
--- a/libavcodec/aarch64/hevcdsp_idct_neon.S
+++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
@@ -573,14 +573,13 @@ idct_16x16 10
 // void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs)
 .macro idct_dc size, bitdepth
 function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1
-        movi          v1.8h,  #((1 << (14 - \bitdepth))+1)
         ld1r         {v4.8h}, [x0]
-        add           v4.8h,  v4.8h,  v1.8h
-        sshr          v0.8h,  v4.8h,  #(15 - \bitdepth)
-        sshr          v1.8h,  v4.8h,  #(15 - \bitdepth)
+        srshr         v4.8h,  v4.8h,  #1
+        srshr         v0.8h,  v4.8h,  #(14 - \bitdepth)
+        srshr         v1.8h,  v4.8h,  #(14 - \bitdepth)
 .if \size > 4
-        sshr          v2.8h,  v4.8h,  #(15 - \bitdepth)
-        sshr          v3.8h,  v4.8h,  #(15 - \bitdepth)
+        srshr         v2.8h,  v4.8h,  #(14 - \bitdepth)
+        srshr         v3.8h,  v4.8h,  #(14 - \bitdepth)
 .if \size > 16 /* dc 32x32 */
         mov              x2,  #4
 1:
diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index a8c3433fa86..e736905a76d 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -1632,7 +1632,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
     AVCodecContext *avctx    = ctx->avctx;
     GetBitContext *gb = &ctx->gb;
     unsigned int div_blocks[32];                ///< block sizes.
-    unsigned int c;
+    int c;
     unsigned int js_blocks[2];
     uint32_t bs_info = 0;
     int ret;
@@ -1810,14 +1810,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
     else
         ctx->cur_frame_length = sconf->frame_length;
 
-    ctx->highest_decoded_channel = 0;
+    ctx->highest_decoded_channel = -1;
     // decode the frame data
     if ((invalid_frame = read_frame_data(ctx, ra_frame)) < 0)
         av_log(ctx->avctx, AV_LOG_WARNING,
                "Reading frame data failed. Skipping RA unit.\n");
 
-    if (ctx->highest_decoded_channel == 0)
+    if (ctx->highest_decoded_channel == -1) {
+        av_log(ctx->avctx, AV_LOG_WARNING,
+               "No channel data decoded.\n");
         return AVERROR_INVALIDDATA;
+    }
 
     ctx->frame_id++;
 
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index e0c6b6bb8b8..4684e40a466 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -879,7 +879,7 @@ static av_always_inline int filter_fast_3320(APEPredictor *p,
     }
 
     predictionA = p->buf[delayA] * 2U - p->buf[delayA - 1];
-    p->lastA[filter] = decoded + ((int32_t)(predictionA  * p->coeffsA[filter][0]) >> 9);
+    p->lastA[filter] = decoded + (unsigned)((int32_t)(predictionA  * p->coeffsA[filter][0]) >> 9);
 
     if ((decoded ^ predictionA) > 0)
         p->coeffsA[filter][0]++;
@@ -909,8 +909,8 @@ static av_always_inline int filter_3800(APEPredictor *p,
         return predictionA;
     }
     d2 =  p->buf[delayA];
-    d1 = (p->buf[delayA] - p->buf[delayA - 1]) * 2U;
-    d0 =  p->buf[delayA] + ((p->buf[delayA - 2] - p->buf[delayA - 1]) * 8U);
+    d1 = (p->buf[delayA] - (unsigned)p->buf[delayA - 1]) * 2;
+    d0 =  p->buf[delayA] + ((p->buf[delayA - 2] - (unsigned)p->buf[delayA - 1]) * 8);
     d3 =  p->buf[delayB] * 2U - p->buf[delayB - 1];
     d4 =  p->buf[delayB];
 
@@ -979,7 +979,7 @@ static void long_filter_ehigh_3830(int32_t *buffer, int length)
         for (j = 7; j > 0; j--)
             delay[j] = delay[j - 1];
         delay[0] = buffer[i];
-        buffer[i] -= dotprod >> 9;
+        buffer[i] -= (unsigned)(dotprod >> 9);
     }
 }
 
@@ -1337,7 +1337,7 @@ static void do_apply_filter(APEContext *ctx, int version, APEFilter *f,
             absres = FFABSU(res);
             if (absres)
                 *f->adaptcoeffs = APESIGN(res) *
-                                  (8 << ((absres > f->avg * 3) + (absres > f->avg * 4 / 3)));
+                                  (8 << ((absres > f->avg * 3LL) + (absres > (f->avg + f->avg / 3))));
                 /* equivalent to the following code
                     if (absres <= f->avg * 4 / 3)
                         *f->adaptcoeffs = APESIGN(res) * 8;
diff --git a/libavcodec/argo.c b/libavcodec/argo.c
index 7358d102e32..f633ec2691f 100644
--- a/libavcodec/argo.c
+++ b/libavcodec/argo.c
@@ -59,7 +59,7 @@ static int decode_pal8(AVCodecContext *avctx, uint32_t *pal)
         return AVERROR_INVALIDDATA;
 
     for (int i = 0; i < count; i++)
-        pal[start + i] = (0xFF << 24U) | bytestream2_get_be24u(gb);
+        pal[start + i] = (0xFFU << 24) | bytestream2_get_be24u(gb);
 
     return 0;
 }
@@ -685,6 +685,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
              return AVERROR_PATCHWELCOME;
     }
 
+    if (avctx->width % 2 || avctx->height % 2) {
+        avpriv_request_sample(s, "Odd dimensions\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
     s->frame = av_frame_alloc();
     if (!s->frame)
         return AVERROR(ENOMEM);
diff --git a/libavcodec/av1_metadata_bsf.c b/libavcodec/av1_metadata_bsf.c
index 328db5c0da2..1fb85d88b7e 100644
--- a/libavcodec/av1_metadata_bsf.c
+++ b/libavcodec/av1_metadata_bsf.c
@@ -28,6 +28,7 @@ typedef struct AV1MetadataContext {
     CBSBSFContext common;
 
     int td;
+    AV1RawOBU td_obu;
 
     int color_primaries;
     int transfer_characteristics;
@@ -107,12 +108,11 @@ static int av1_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt,
                                         CodedBitstreamFragment *frag)
 {
     AV1MetadataContext *ctx = bsf->priv_data;
-    AV1RawOBU td, *obu;
     int err, i;
 
     for (i = 0; i < frag->nb_units; i++) {
         if (frag->units[i].type == AV1_OBU_SEQUENCE_HEADER) {
-            obu = frag->units[i].content;
+            AV1RawOBU *obu = frag->units[i].content;
             err = av1_metadata_update_sequence_header(bsf, &obu->obu.sequence_header);
             if (err < 0)
                 return err;
@@ -124,12 +124,8 @@ static int av1_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt,
         if (ctx->td == BSF_ELEMENT_REMOVE)
             ff_cbs_delete_unit(frag, 0);
     } else if (pkt && ctx->td == BSF_ELEMENT_INSERT) {
-        td = (AV1RawOBU) {
-            .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER,
-        };
-
         err = ff_cbs_insert_unit_content(frag, 0, AV1_OBU_TEMPORAL_DELIMITER,
-                                         &td, NULL);
+                                         &ctx->td_obu, NULL);
         if (err < 0) {
             av_log(bsf, AV_LOG_ERROR, "Failed to insert Temporal Delimiter.\n");
             return err;
@@ -155,6 +151,12 @@ static const CBSBSFType av1_metadata_type = {
 
 static int av1_metadata_init(AVBSFContext *bsf)
 {
+    AV1MetadataContext *ctx = bsf->priv_data;
+
+    ctx->td_obu = (AV1RawOBU) {
+        .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER,
+    };
+
     return ff_cbs_bsf_generic_init(bsf, &av1_metadata_type);
 }
 
diff --git a/libavcodec/cbs_h265_syntax_template.c b/libavcodec/cbs_h265_syntax_template.c
index 5d216aad365..921daea68c7 100644
--- a/libavcodec/cbs_h265_syntax_template.c
+++ b/libavcodec/cbs_h265_syntax_template.c
@@ -728,7 +728,7 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
 
         flag(sps_palette_predictor_initializer_present_flag);
         if (current->sps_palette_predictor_initializer_present_flag) {
-            ue(sps_num_palette_predictor_initializer_minus1, 0, 128);
+            ue(sps_num_palette_predictor_initializer_minus1, 0, 127);
             for (comp = 0; comp < (current->chroma_format_idc ? 3 : 1); comp++) {
                 int bit_depth = comp == 0 ? current->bit_depth_luma_minus8 + 8
                                           : current->bit_depth_chroma_minus8 + 8;
diff --git a/libavcodec/clearvideo.c b/libavcodec/clearvideo.c
index 79ba88857cb..a56e09069bf 100644
--- a/libavcodec/clearvideo.c
+++ b/libavcodec/clearvideo.c
@@ -722,8 +722,8 @@ static av_cold int clv_decode_init(AVCodecContext *avctx)
     }
 
     c->tile_shift = av_log2(c->tile_size);
-    if (1U << c->tile_shift != c->tile_size) {
-        av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2.\n", c->tile_size);
+    if (1U << c->tile_shift != c->tile_size || c->tile_shift < 1 || c->tile_shift > 30) {
+        av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2 > 1 and < 2^31\n", c->tile_size);
         return AVERROR_INVALIDDATA;
     }
 
diff --git a/libavcodec/cpia.c b/libavcodec/cpia.c
index 5f12a99a83a..435834d6170 100644
--- a/libavcodec/cpia.c
+++ b/libavcodec/cpia.c
@@ -111,6 +111,7 @@ static int cpia_decode_frame(AVCodecContext *avctx,
         // Read line length, two byte little endian
         linelength = AV_RL16(src);
         src += 2;
+        src_size -= 2;
 
         if (src_size < linelength) {
             frame->decode_error_flags = FF_DECODE_ERROR_INVALID_BITSTREAM;
diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
index 228803183af..886488602ed 100644
--- a/libavcodec/crystalhd.c
+++ b/libavcodec/crystalhd.c
@@ -785,6 +785,7 @@ static int crystalhd_receive_frame(AVCodecContext *avctx, AVFrame *frame)
         .flush          = flush, \
         .bsfs           = bsf_name, \
         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
+        .caps_internal  = FF_CODEC_CAP_SETS_FRAME_PROPS, \
         .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE}, \
         .wrapper_name   = "crystalhd", \
     };
diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index ec57afdefe7..5c135d17144 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -1150,6 +1150,7 @@ static const AVCodecHWConfigInternal *const cuvid_hw_configs[] = {
         .flush          = cuvid_flush, \
         .bsfs           = bsf_name, \
         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
+        .caps_internal  = FF_CODEC_CAP_SETS_FRAME_PROPS, \
         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
                                                         AV_PIX_FMT_NV12, \
                                                         AV_PIX_FMT_P010, \
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 5a00aeedaef..936e5d63da8 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -233,9 +233,11 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (ret < 0)
         return ret;
 
-    ret = extract_packet_props(avctx->internal, pkt);
-    if (ret < 0)
-        goto finish;
+    if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
+        ret = extract_packet_props(avctx->internal, pkt);
+        if (ret < 0)
+            goto finish;
+    }
 
     ret = apply_param_change(avctx, pkt);
     if (ret < 0)
@@ -502,11 +504,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
         pkt->data                += consumed;
         pkt->size                -= consumed;
-        avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment.
         pkt->pts                  = AV_NOPTS_VALUE;
         pkt->dts                  = AV_NOPTS_VALUE;
-        avci->last_pkt_props->pts = AV_NOPTS_VALUE;
-        avci->last_pkt_props->dts = AV_NOPTS_VALUE;
+        if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
+            avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment.
+            avci->last_pkt_props->pts = AV_NOPTS_VALUE;
+            avci->last_pkt_props->dts = AV_NOPTS_VALUE;
+        }
     }
 
     if (got_frame)
@@ -548,6 +552,11 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
     if (ret == AVERROR_EOF)
         avci->draining_done = 1;
 
+    if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS) &&
+        IS_EMPTY(avci->last_pkt_props) && av_fifo_size(avci->pkt_props) >= sizeof(*avci->last_pkt_props))
+        av_fifo_generic_read(avci->pkt_props,
+                             avci->last_pkt_props, sizeof(*avci->last_pkt_props), NULL);
+
     if (!ret) {
         frame->best_effort_timestamp = guess_correct_pts(avctx,
                                                          frame->pts,
@@ -1738,39 +1747,37 @@ int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
         { AV_PKT_DATA_S12M_TIMECODE,              AV_FRAME_DATA_S12M_TIMECODE },
     };
 
-    if (IS_EMPTY(pkt) && av_fifo_size(avctx->internal->pkt_props) >= sizeof(*pkt))
-        av_fifo_generic_read(avctx->internal->pkt_props,
-                             pkt, sizeof(*pkt), NULL);
-
-    frame->pts = pkt->pts;
+    if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
+        frame->pts = pkt->pts;
 #if FF_API_PKT_PTS
 FF_DISABLE_DEPRECATION_WARNINGS
-    frame->pkt_pts = pkt->pts;
+        frame->pkt_pts = pkt->pts;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
-    frame->pkt_pos      = pkt->pos;
-    frame->pkt_duration = pkt->duration;
-    frame->pkt_size     = pkt->size;
-
-    for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) {
-        buffer_size_t size;
-        uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size);
-        if (packet_sd) {
-            AVFrameSideData *frame_sd = av_frame_new_side_data(frame,
-                                                               sd[i].frame,
-                                                               size);
-            if (!frame_sd)
-                return AVERROR(ENOMEM);
-
-            memcpy(frame_sd->data, packet_sd, size);
+        frame->pkt_pos      = pkt->pos;
+        frame->pkt_duration = pkt->duration;
+        frame->pkt_size     = pkt->size;
+
+        for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) {
+            buffer_size_t size;
+            uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size);
+            if (packet_sd) {
+                AVFrameSideData *frame_sd = av_frame_new_side_data(frame,
+                                                                   sd[i].frame,
+                                                                   size);
+                if (!frame_sd)
+                    return AVERROR(ENOMEM);
+
+                memcpy(frame_sd->data, packet_sd, size);
+            }
         }
-    }
-    add_metadata_from_side_data(pkt, frame);
+        add_metadata_from_side_data(pkt, frame);
 
-    if (pkt->flags & AV_PKT_FLAG_DISCARD) {
-        frame->flags |= AV_FRAME_FLAG_DISCARD;
-    } else {
-        frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD);
+        if (pkt->flags & AV_PKT_FLAG_DISCARD) {
+            frame->flags |= AV_FRAME_FLAG_DISCARD;
+        } else {
+            frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD);
+        }
     }
     frame->reordered_opaque = avctx->reordered_opaque;
 
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index c78d55aee51..9b475a6979f 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -112,6 +112,7 @@ static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
 
 static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid, int bitdepth)
 {
+    int ret;
     if (cid != ctx->cid) {
         const CIDEntry *cid_table = ff_dnxhd_get_cid_table(cid);
 
@@ -132,19 +133,26 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid, int bitdepth)
         ff_free_vlc(&ctx->dc_vlc);
         ff_free_vlc(&ctx->run_vlc);
 
-        init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257,
+        if ((ret = init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257,
                  ctx->cid_table->ac_bits, 1, 1,
-                 ctx->cid_table->ac_codes, 2, 2, 0);
-        init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, bitdepth > 8 ? 14 : 12,
+                 ctx->cid_table->ac_codes, 2, 2, 0)) < 0)
+            goto out;
+        if ((ret = init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, bitdepth > 8 ? 14 : 12,
                  ctx->cid_table->dc_bits, 1, 1,
-                 ctx->cid_table->dc_codes, 1, 1, 0);
-        init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62,
+                 ctx->cid_table->dc_codes, 1, 1, 0)) < 0)
+            goto out;
+        if ((ret = init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62,
                  ctx->cid_table->run_bits, 1, 1,
-                 ctx->cid_table->run_codes, 2, 2, 0);
+                 ctx->cid_table->run_codes, 2, 2, 0)) < 0)
+            goto out;
 
         ctx->cid = cid;
     }
-    return 0;
+    ret = 0;
+out:
+    if (ret < 0)
+        av_log(ctx->avctx, AV_LOG_ERROR, "init_vlc failed\n");
+    return ret;
 }
 
 static int dnxhd_get_profile(int cid)
diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index 5372e3d5861..915d94077e6 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -242,6 +242,9 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_PATCHWELCOME;
     }
 
+    if (bits_per_color > 31)
+        return AVERROR_INVALIDDATA;
+
     buf += 820;
     avctx->sample_aspect_ratio.num = read32(&buf, endian);
     avctx->sample_aspect_ratio.den = read32(&buf, endian);
@@ -316,7 +319,7 @@ static int decode_frame(AVCodecContext *avctx,
             minCV = av_int2float(i);
             maxCV = av_int2float(j);
             if (bits_per_color >= 1 &&
-                minCV == 0.0f && maxCV == ((1<<bits_per_color) - 1)) {
+                minCV == 0.0f && maxCV == ((1U<<bits_per_color) - 1)) {
                 avctx->color_range = AVCOL_RANGE_JPEG;
             } else if (bits_per_color >= 8 &&
                        minCV == (1  <<(bits_per_color - 4)) &&
diff --git a/libavcodec/dxva2_av1.c b/libavcodec/dxva2_av1.c
index aa14e473dfa..c30b57799c2 100644
--- a/libavcodec/dxva2_av1.c
+++ b/libavcodec/dxva2_av1.c
@@ -73,7 +73,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c
     pp->max_height = seq->max_frame_height_minus_1 + 1;
 
     pp->CurrPicTextureIndex = ff_dxva2_get_surface_index(avctx, ctx, h->cur_frame.tf.f);
-    pp->superres_denom      = frame_header->use_superres ? frame_header->coded_denom : AV1_SUPERRES_NUM;
+    pp->superres_denom      = frame_header->use_superres ? frame_header->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM;
     pp->bitdepth            = get_bit_depth_from_seq(seq);
     pp->seq_profile         = seq->seq_profile;
 
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 65e5203c317..49ba7fd6de9 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -418,7 +418,7 @@ static int huf_decode(VLC *vlc, GetByteContext *gb, int nbits, int run_sym,
 
     init_get_bits(&gbit, gb->buffer, nbits);
     while (get_bits_left(&gbit) > 0 && oe < no) {
-        uint16_t x = get_vlc2(&gbit, vlc->table, 12, 2);
+        uint16_t x = get_vlc2(&gbit, vlc->table, 12, 3);
 
         if (x == run_sym) {
             int run = get_bits(&gbit, 8);
@@ -1014,7 +1014,9 @@ static int dwa_uncompress(EXRContext *s, const uint8_t *src, int compressed_size
     dc_count = AV_RL64(src + 72);
     ac_compression = AV_RL64(src + 80);
 
-    if (compressed_size < 88LL + lo_size + ac_size + dc_size + rle_csize)
+    if (   compressed_size < (uint64_t)(lo_size | ac_size | dc_size | rle_csize) || compressed_size < 88LL + lo_size + ac_size + dc_size + rle_csize
+        || ac_count > (uint64_t)INT_MAX/2
+    )
         return AVERROR_INVALIDDATA;
 
     bytestream2_init(&gb, src + 88, compressed_size - 88);
@@ -1031,12 +1033,14 @@ static int dwa_uncompress(EXRContext *s, const uint8_t *src, int compressed_size
     }
 
     if (ac_size > 0) {
-        unsigned long dest_len = ac_count * 2LL;
+        unsigned long dest_len;
         GetByteContext agb = gb;
 
         if (ac_count > 3LL * td->xsize * s->scan_lines_per_block)
             return AVERROR_INVALIDDATA;
 
+        dest_len = ac_count * 2LL;
+
         av_fast_padded_malloc(&td->ac_data, &td->ac_size, dest_len);
         if (!td->ac_data)
             return AVERROR(ENOMEM);
@@ -1059,13 +1063,15 @@ static int dwa_uncompress(EXRContext *s, const uint8_t *src, int compressed_size
         bytestream2_skip(&gb, ac_size);
     }
 
-    if (dc_size > 0) {
-        unsigned long dest_len = dc_count * 2LL;
+    {
+        unsigned long dest_len;
         GetByteContext agb = gb;
 
-        if (dc_count > (6LL * td->xsize * td->ysize + 63) / 64)
+        if (dc_count != dc_w * dc_h * 3)
             return AVERROR_INVALIDDATA;
 
+        dest_len = dc_count * 2LL;
+
         av_fast_padded_malloc(&td->dc_data, &td->dc_size, FFALIGN(dest_len, 64) * 2);
         if (!td->dc_data)
             return AVERROR(ENOMEM);
@@ -1795,6 +1801,7 @@ static int decode_header(EXRContext *s, AVFrame *frame)
             ymax   = bytestream2_get_le32(gb);
 
             if (xmin > xmax || ymin > ymax ||
+                ymax == INT_MAX || xmax == INT_MAX ||
                 (unsigned)xmax - xmin >= INT_MAX ||
                 (unsigned)ymax - ymin >= INT_MAX) {
                 ret = AVERROR_INVALIDDATA;
diff --git a/libavcodec/faxcompr.c b/libavcodec/faxcompr.c
index 3dd64cf7306..b283831dae7 100644
--- a/libavcodec/faxcompr.c
+++ b/libavcodec/faxcompr.c
@@ -144,6 +144,8 @@ static int decode_uncompressed(AVCodecContext *avctx, GetBitContext *gb,
                 return AVERROR_INVALIDDATA;
             }
             cwi = 10 - av_log2(cwi);
+            if (get_bits_left(gb) < cwi + 1)
+                return AVERROR_INVALIDDATA;
             skip_bits(gb, cwi + 1);
             if (cwi > 5) {
                 newmode = get_bits1(gb);
@@ -209,6 +211,8 @@ static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb,
     unsigned int run = 0;
     unsigned int t;
     for (;;) {
+        if (get_bits_left(gb) <= 0)
+            return AVERROR_INVALIDDATA;
         t    = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2);
         run += t;
         if (t < 64) {
@@ -227,7 +231,7 @@ static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb,
             run       = 0;
             mode      = !mode;
         } else if ((int)t == -1) {
-            if (show_bits(gb, 12) == 15) {
+            if (get_bits_left(gb) > 12 && show_bits(gb, 12) == 15) {
                 int ret;
                 skip_bits(gb, 12);
                 ret = decode_uncompressed(avctx, gb, &pix_left, &runs, runend, &mode);
@@ -254,7 +258,10 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
     unsigned int offs = 0, run = 0;
 
     while (offs < width) {
-        int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1);
+        int cmode;
+        if (get_bits_left(gb) <= 0)
+            return AVERROR_INVALIDDATA;
+        cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1);
         if (cmode == -1) {
             av_log(avctx, AV_LOG_ERROR, "Incorrect mode VLC\n");
             return AVERROR_INVALIDDATA;
@@ -276,6 +283,8 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
             for (k = 0; k < 2; k++) {
                 run = 0;
                 for (;;) {
+                    if (get_bits_left(gb) <= 0)
+                        return AVERROR_INVALIDDATA;
                     t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2);
                     if (t == -1) {
                         av_log(avctx, AV_LOG_ERROR, "Incorrect code\n");
@@ -299,7 +308,10 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
                 mode = !mode;
             }
         } else if (cmode == 9 || cmode == 10) {
-            int xxx = get_bits(gb, 3);
+            int xxx;
+            if (get_bits_left(gb) < 3)
+                return AVERROR_INVALIDDATA;
+            xxx = get_bits(gb, 3);
             if (cmode == 9 && xxx == 7) {
                 int ret;
                 int pix_left = width - offs;
diff --git a/libavcodec/flac_parser.c b/libavcodec/flac_parser.c
index 3424583c495..b13b3b646ab 100644
--- a/libavcodec/flac_parser.c
+++ b/libavcodec/flac_parser.c
@@ -55,6 +55,7 @@
 
 /** largest possible size of flac header */
 #define MAX_FRAME_HEADER_SIZE 16
+#define MAX_FRAME_VERIFY_SIZE (MAX_FRAME_HEADER_SIZE)
 
 typedef struct FLACHeaderMarker {
     int offset;       /**< byte offset from start of FLACParseContext->buffer */
@@ -170,7 +171,7 @@ static int find_headers_search_validate(FLACParseContext *fpc, int offset)
     uint8_t *header_buf;
     int size = 0;
     header_buf = flac_fifo_read_wrap(fpc, offset,
-                                     MAX_FRAME_HEADER_SIZE,
+                                     MAX_FRAME_VERIFY_SIZE + AV_INPUT_BUFFER_PADDING_SIZE,
                                      &fpc->wrap_buf,
                                      &fpc->wrap_buf_allocated_size);
     if (frame_header_is_valid(fpc->avctx, header_buf, &fi)) {
diff --git a/libavcodec/flicvideo.c b/libavcodec/flicvideo.c
index 276c2ff2a62..67fbaa72490 100644
--- a/libavcodec/flicvideo.c
+++ b/libavcodec/flicvideo.c
@@ -735,6 +735,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                 bytestream2_skip(&g2, chunk_size - 6);
             } else {
 
+                if (bytestream2_get_bytes_left(&g2) < 2 * s->avctx->width * s->avctx->height )
+                    return AVERROR_INVALIDDATA;
                 for (y_ptr = 0; y_ptr < s->frame->linesize[0] * s->avctx->height;
                      y_ptr += s->frame->linesize[0]) {
 
diff --git a/libavcodec/frame_thread_encoder.c b/libavcodec/frame_thread_encoder.c
index 778317d60bb..0d52f066e58 100644
--- a/libavcodec/frame_thread_encoder.c
+++ b/libavcodec/frame_thread_encoder.c
@@ -124,7 +124,7 @@ static void * attribute_align_arg worker(void *v){
 int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){
     int i=0;
     ThreadContext *c;
-
+    AVCodecContext *thread_avctx = NULL;
 
     if(   !(avctx->thread_type & FF_THREAD_FRAME)
        || !(avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS))
@@ -205,16 +205,17 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){
         AVDictionary *tmp = NULL;
         int ret;
         void *tmpv;
-        AVCodecContext *thread_avctx = avcodec_alloc_context3(avctx->codec);
+        thread_avctx = avcodec_alloc_context3(avctx->codec);
         if(!thread_avctx)
             goto fail;
         tmpv = thread_avctx->priv_data;
         *thread_avctx = *avctx;
+        thread_avctx->priv_data = tmpv;
+        thread_avctx->internal = NULL;
+        thread_avctx->hw_frames_ctx = NULL;
         ret = av_opt_copy(thread_avctx, avctx);
         if (ret < 0)
             goto fail;
-        thread_avctx->priv_data = tmpv;
-        thread_avctx->internal = NULL;
         if (avctx->codec->priv_class) {
             int ret = av_opt_copy(thread_avctx->priv_data, avctx->priv_data);
             if (ret < 0)
@@ -243,6 +244,8 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){
 
     return 0;
 fail:
+    avcodec_close(thread_avctx);
+    av_freep(&thread_avctx);
     avctx->thread_count = i;
     av_log(avctx, AV_LOG_ERROR, "ff_frame_thread_encoder_init failed\n");
     ff_frame_thread_encoder_free(avctx);
diff --git a/libavcodec/frame_thread_encoder.h b/libavcodec/frame_thread_encoder.h
index c400d6b32c8..9733fcdc2de 100644
--- a/libavcodec/frame_thread_encoder.h
+++ b/libavcodec/frame_thread_encoder.h
@@ -23,6 +23,10 @@
 
 #include "avcodec.h"
 
+/**
+ * Initialize frame thread encoder.
+ * @note hardware encoders are not supported
+ */
 int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options);
 void ff_frame_thread_encoder_free(AVCodecContext *avctx);
 int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index bc5c0d599ff..4a03c710a60 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -29,6 +29,7 @@
 
 #include <limits.h>
 
+#include "libavutil/thread.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
 #include "h263.h"
@@ -38,6 +39,17 @@
 #include "flv.h"
 #include "mpeg4video.h"
 
+static av_cold void h263_init_rl_inter(void)
+{
+    static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3];
+    ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table);
+}
+
+av_cold void ff_h263_init_rl_inter(void)
+{
+    static AVOnce init_static_once = AV_ONCE_INIT;
+    ff_thread_once(&init_static_once, h263_init_rl_inter);
+}
 
 void ff_h263_update_motion_val(MpegEncContext * s){
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
diff --git a/libavcodec/h263.h b/libavcodec/h263.h
index 998f7d0d59a..491f2e0aac4 100644
--- a/libavcodec/h263.h
+++ b/libavcodec/h263.h
@@ -66,6 +66,7 @@ int16_t *ff_h263_pred_motion(MpegEncContext * s, int block, int dir,
                              int *px, int *py);
 void ff_h263_encode_init(MpegEncContext *s);
 void ff_h263_decode_init_vlc(void);
+void ff_h263_init_rl_inter(void);
 int ff_h263_decode_picture_header(MpegEncContext *s);
 int ff_h263_decode_gob_header(MpegEncContext *s);
 void ff_h263_update_motion_val(MpegEncContext * s);
diff --git a/libavcodec/h263data.c b/libavcodec/h263data.c
index 604a0425e1a..20d0436fda5 100644
--- a/libavcodec/h263data.c
+++ b/libavcodec/h263data.c
@@ -25,8 +25,6 @@
 
 #include <stdint.h>
 
-#include "libavutil/thread.h"
-
 #include "h263data.h"
 #include "mpegvideo.h"
 
@@ -290,15 +288,3 @@ const AVRational ff_h263_pixel_aspect[16] = {
     {  0,  1 },
     {  0,  1 },
 };
-
-static av_cold void h263_init_rl_inter(void)
-{
-    static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3];
-    ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table);
-}
-
-av_cold void ff_h263_init_rl_inter(void)
-{
-    static AVOnce init_static_once = AV_ONCE_INIT;
-    ff_thread_once(&init_static_once, h263_init_rl_inter);
-}
diff --git a/libavcodec/h263data.h b/libavcodec/h263data.h
index 144704d12b1..06554bdf0d4 100644
--- a/libavcodec/h263data.h
+++ b/libavcodec/h263data.h
@@ -61,7 +61,6 @@ extern const int8_t ff_inter_run[102];
 
 extern RLTable ff_h263_rl_inter;
 extern RLTable ff_rl_intra_aic;
-void ff_h263_init_rl_inter(void);
 
 extern const uint16_t ff_h263_format[8][2];
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 62f7a61aed9..7c69016338b 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1831,6 +1831,8 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
     if (nal->type == H264_NAL_IDR_SLICE)
         get_ue_golomb_long(&sl->gb); /* idr_pic_id */
 
+    sl->poc_lsb = 0;
+    sl->delta_poc_bottom = 0;
     if (sps->poc_type == 0) {
         sl->poc_lsb = get_bits(&sl->gb, sps->log2_max_poc_lsb);
 
@@ -1838,6 +1840,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
             sl->delta_poc_bottom = get_se_golomb(&sl->gb);
     }
 
+    sl->delta_poc[0] = sl->delta_poc[1] = 0;
     if (sps->poc_type == 1 && !sps->delta_pic_order_always_zero_flag) {
         sl->delta_poc[0] = get_se_golomb(&sl->gb);
 
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 47b9abbc5c3..485f47d36ec 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -914,7 +914,7 @@ static int finalize_frame(H264Context *h, AVFrame *dst, H264Picture *out, int *g
                                  out->qscale_table,
                                  out->motion_val,
                                  NULL,
-                                 h->mb_width, h->mb_height, h->mb_stride, 1);
+                                 out->mb_width, out->mb_height, out->mb_stride, 1);
         }
     }
 
diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c
index c881c4338c2..e6ae777852b 100644
--- a/libavcodec/hevc_sei.c
+++ b/libavcodec/hevc_sei.c
@@ -386,7 +386,7 @@ static int decode_nal_sei_timecode(HEVCSEITimeCode *s, GetBitContext *gb)
 
             s->time_offset_length[i] = get_bits(gb, 5);
             if (s->time_offset_length[i] > 0) {
-                s->time_offset_value[i] = get_bits(gb, s->time_offset_length[i]);
+                s->time_offset_value[i] = get_bits_long(gb, s->time_offset_length[i]);
             }
         }
     }
diff --git a/libavcodec/iff.c b/libavcodec/iff.c
index 79f6215c770..76d3696bb33 100644
--- a/libavcodec/iff.c
+++ b/libavcodec/iff.c
@@ -1848,7 +1848,8 @@ static int decode_frame(AVCodecContext *avctx,
                     buf += s->planesize;
                 }
             }
-            memcpy(frame->data[1], s->pal, 256 * 4);
+            if (avctx->pix_fmt == AV_PIX_FMT_PAL8)
+                memcpy(frame->data[1], s->pal, 256 * 4);
         } else if (s->ham) {
             int i, count = 1 << s->ham;
 
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index b57b9968166..d889c1883e7 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -78,6 +78,11 @@
  * Codec handles avctx->thread_count == 0 (auto) internally.
  */
 #define FF_CODEC_CAP_AUTO_THREADS           (1 << 7)
+/**
+ * Codec handles output frame properties internally instead of letting the
+ * internal logic derive them from AVCodecInternal.last_pkt_props.
+ */
+#define FF_CODEC_CAP_SETS_FRAME_PROPS       (1 << 8)
 
 /**
  * AVCodec.codec_tags termination value
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index e3c5a32188c..212b9601c45 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -1679,7 +1679,7 @@ static int parse_layer_rates(Jpeg2000EncoderContext *s)
     }
 
     token = av_strtok(s->lr_str, ",", &saveptr);
-    if (rate = strtol(token, NULL, 10)) {
+    if (token && (rate = strtol(token, NULL, 10))) {
             s->layer_rates[0] = rate <= 1 ? 0:rate;
             nlayers++;
     } else {
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 63edbcda096..0d7ade5ce88 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -2361,6 +2361,8 @@ static int jp2_find_codestream(Jpeg2000DecoderContext *s)
                    atom_size >= 16) {
             uint32_t atom2_size, atom2, atom2_end;
             do {
+                if (bytestream2_get_bytes_left(&s->g) < 8)
+                    break;
                 atom2_size = bytestream2_get_be32u(&s->g);
                 atom2      = bytestream2_get_be32u(&s->g);
                 atom2_end  = bytestream2_tell(&s->g) + atom2_size - 8;
diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index 69980eaa497..c4ffa81f7d5 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -122,7 +122,7 @@ int ff_jpegls_decode_lse(MJpegDecodeContext *s)
             s->avctx->pix_fmt = AV_PIX_FMT_PAL8;
             for (i=s->palette_index; i<=maxtab; i++) {
                 uint8_t k = i << shift;
-                pal[k] = 0;
+                pal[k] = wt < 4 ? 0xFF000000 : 0;
                 for (j=0; j<wt; j++) {
                     pal[k] |= get_bits(&s->gb, 8) << (8*(wt-j-1));
                 }
diff --git a/libavcodec/libdav1d.c b/libavcodec/libdav1d.c
index 3c2a68b7e06..a9c983eacab 100644
--- a/libavcodec/libdav1d.c
+++ b/libavcodec/libdav1d.c
@@ -33,6 +33,9 @@
 #include "decode.h"
 #include "internal.h"
 
+#define FF_DAV1D_VERSION_AT_LEAST(x,y) \
+    (DAV1D_API_VERSION_MAJOR > (x) || DAV1D_API_VERSION_MAJOR == (x) && DAV1D_API_VERSION_MINOR >= (y))
+
 typedef struct Libdav1dContext {
     AVClass *class;
     Dav1dContext *c;
@@ -145,6 +148,15 @@ static av_cold int libdav1d_init(AVCodecContext *c)
     if (dav1d->operating_point >= 0)
         s.operating_point = dav1d->operating_point;
 
+#if FF_DAV1D_VERSION_AT_LEAST(6,0)
+    if (dav1d->frame_threads || dav1d->tile_threads)
+        s.n_threads = FFMAX(dav1d->frame_threads, dav1d->tile_threads);
+    else
+        s.n_threads = FFMIN(threads, DAV1D_MAX_THREADS);
+    s.max_frame_delay = (c->flags & AV_CODEC_FLAG_LOW_DELAY) ? 1 : s.n_threads;
+    av_log(c, AV_LOG_DEBUG, "Using %d threads, %d max_frame_delay\n",
+           s.n_threads, s.max_frame_delay);
+#else
     s.n_tile_threads = dav1d->tile_threads
                      ? dav1d->tile_threads
                      : FFMIN(floor(sqrt(threads)), DAV1D_MAX_TILE_THREADS);
@@ -153,6 +165,7 @@ static av_cold int libdav1d_init(AVCodecContext *c)
                       : FFMIN(ceil(threads / s.n_tile_threads), DAV1D_MAX_FRAME_THREADS);
     av_log(c, AV_LOG_DEBUG, "Using %d frame threads, %d tile threads\n",
            s.n_frame_threads, s.n_tile_threads);
+#endif
 
     res = dav1d_open(&dav1d->c, &s);
     if (res < 0)
@@ -456,6 +469,13 @@ static av_cold int libdav1d_close(AVCodecContext *c)
     return 0;
 }
 
+#ifndef DAV1D_MAX_FRAME_THREADS
+#define DAV1D_MAX_FRAME_THREADS DAV1D_MAX_THREADS
+#endif
+#ifndef DAV1D_MAX_TILE_THREADS
+#define DAV1D_MAX_TILE_THREADS DAV1D_MAX_THREADS
+#endif
+
 #define OFFSET(x) offsetof(Libdav1dContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption libdav1d_options[] = {
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index 1d1d04fd801..3ed61563ee7 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -189,7 +189,7 @@ double ff_lpc_calc_ref_coefs_f(LPCContext *s, const float *samples, int len,
     compute_ref_coefs(autoc, order, ref, error);
     for (i = 0; i < order; i++)
         avg_err = (avg_err + error[i])/2.0f;
-    return signal/avg_err;
+    return avg_err ? signal/avg_err : NAN;
 }
 
 /**
diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index 88ca247f87b..e1b41bfd9ba 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -143,7 +143,7 @@ static inline void compute_ref_coefs(const LPC_TYPE *autoc, int max_order,
         gen0[i] = gen1[i] = autoc[i + 1];
 
     err    = autoc[0];
-    ref[0] = -gen1[0] / err;
+    ref[0] = -gen1[0] / ((USE_FIXED || err) ? err : 1);
     err   +=  gen1[0] * ref[0];
     if (error)
         error[0] = err;
@@ -152,7 +152,7 @@ static inline void compute_ref_coefs(const LPC_TYPE *autoc, int max_order,
             gen1[j] = gen1[j + 1] + ref[i - 1] * gen0[j];
             gen0[j] = gen1[j + 1] * ref[i - 1] + gen0[j];
         }
-        ref[i] = -gen1[0] / err;
+        ref[i] = -gen1[0] / ((USE_FIXED || err) ? err : 1);
         err   +=  gen1[0] * ref[i];
         if (error)
             error[i] = err;
@@ -186,7 +186,8 @@ static inline int AAC_RENAME(compute_lpc_coefs)(const LPC_TYPE *autoc, int max_o
             for(j=0; j<i; j++)
                 r -= lpc_last[j] * autoc[i-j-1];
 
-            r /= err;
+            if (err)
+                r /= err;
             err *= FIXR(1.0) - (r * r);
         }
 
diff --git a/libavcodec/mjpegbdec.c b/libavcodec/mjpegbdec.c
index 7666674908c..19875a2ddb8 100644
--- a/libavcodec/mjpegbdec.c
+++ b/libavcodec/mjpegbdec.c
@@ -119,9 +119,13 @@ static int mjpegb_decode_frame(AVCodecContext *avctx,
                       8 * FFMIN(field_size, buf_end - buf_ptr - sos_offs));
         s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
         s->start_code = SOS;
-        ret = ff_mjpeg_decode_sos(s, NULL, 0, NULL);
-        if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
-            return ret;
+        if (avctx->skip_frame == AVDISCARD_ALL) {
+            skip_bits(&s->gb, get_bits_left(&s->gb));
+        } else {
+            ret = ff_mjpeg_decode_sos(s, NULL, 0, NULL);
+            if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
+                return ret;
+        }
     }
 
     if (s->interlaced) {
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 2df6caa4405..afb117cfc61 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -1573,6 +1573,9 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss,
                 else
                     ret = decode_block_progressive(s, *block, last_nnz, s->ac_index[0],
                                                    quant_matrix, ss, se, Al, &EOBRUN);
+
+                if (ret >= 0 && get_bits_left(&s->gb) < 0)
+                    ret = AVERROR_INVALIDDATA;
                 if (ret < 0) {
                     av_log(s->avctx, AV_LOG_ERROR,
                            "error y=%d x=%d\n", mb_y, mb_x);
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 94221da2c15..09bf01247d2 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -1538,6 +1538,10 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
     s->mpeg_f_code[0][1] = get_bits(&s->gb, 4);
     s->mpeg_f_code[1][0] = get_bits(&s->gb, 4);
     s->mpeg_f_code[1][1] = get_bits(&s->gb, 4);
+    s->mpeg_f_code[0][0] += !s->mpeg_f_code[0][0];
+    s->mpeg_f_code[0][1] += !s->mpeg_f_code[0][1];
+    s->mpeg_f_code[1][0] += !s->mpeg_f_code[1][0];
+    s->mpeg_f_code[1][1] += !s->mpeg_f_code[1][1];
     if (!s->pict_type && s1->mpeg_enc_ctx_allocated) {
         av_log(s->avctx, AV_LOG_ERROR,
                "Missing picture start code, guessing missing values\n");
@@ -1551,10 +1555,6 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
         s->current_picture.f->pict_type = s->pict_type;
         s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
     }
-    s->mpeg_f_code[0][0] += !s->mpeg_f_code[0][0];
-    s->mpeg_f_code[0][1] += !s->mpeg_f_code[0][1];
-    s->mpeg_f_code[1][0] += !s->mpeg_f_code[1][0];
-    s->mpeg_f_code[1][1] += !s->mpeg_f_code[1][1];
 
     s->intra_dc_precision         = get_bits(&s->gb, 2);
     s->picture_structure          = get_bits(&s->gb, 2);
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index eb13d57d2b2..cee3fdb36bb 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -509,9 +509,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (!s->fixed_qscale &&
         avctx->bit_rate * av_q2d(avctx->time_base) >
             avctx->bit_rate_tolerance) {
+        double nbt = avctx->bit_rate * av_q2d(avctx->time_base) * 5;
         av_log(avctx, AV_LOG_WARNING,
                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
-        avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
+        if (nbt <= INT_MAX) {
+            avctx->bit_rate_tolerance = nbt;
+        } else
+            avctx->bit_rate_tolerance = INT_MAX;
     }
 
     if (avctx->rc_max_rate &&
@@ -2016,6 +2020,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             break;
             default:
                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
+                s->stuffing_bits = 0;
             }
             flush_put_bits(&s->pb);
             s->frame_bits  = put_bits_count(&s->pb);
diff --git a/libavcodec/mxpegdec.c b/libavcodec/mxpegdec.c
index 763ce5871df..a068baf7586 100644
--- a/libavcodec/mxpegdec.c
+++ b/libavcodec/mxpegdec.c
@@ -193,6 +193,9 @@ static int mxpeg_decode_frame(AVCodecContext *avctx,
     int start_code;
     int ret;
 
+    if (avctx->skip_frame == AVDISCARD_ALL)
+        return AVERROR_PATCHWELCOME;
+
     buf_ptr = buf;
     buf_end = buf + buf_size;
     jpg->got_picture = 0;
diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c
index 99ede2f42b4..8670431dcc4 100644
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -138,10 +138,8 @@ static av_cold int encode_end(AVCodecContext *avctx)
 
     ff_mdct_end(&s->mdct_ctx);
 
-    if (s->avctx->trellis) {
-        av_freep(&s->opt);
-        av_freep(&s->path);
-    }
+    av_freep(&s->opt);
+    av_freep(&s->path);
     ff_af_queue_close(&s->afq);
     av_freep(&s->fdsp);
 
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index dddee8cac1e..b09ddbe0fa0 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -144,6 +144,70 @@ static int nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err,
     return ret;
 }
 
+typedef struct GUIDTuple {
+    const GUID guid;
+    int flags;
+} GUIDTuple;
+
+#define PRESET_ALIAS(alias, name, ...) \
+    [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
+
+#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
+
+static void nvenc_map_preset(NvencContext *ctx)
+{
+    GUIDTuple presets[] = {
+#ifdef NVENC_HAVE_NEW_PRESETS
+        PRESET(P1),
+        PRESET(P2),
+        PRESET(P3),
+        PRESET(P4),
+        PRESET(P5),
+        PRESET(P6),
+        PRESET(P7),
+        PRESET_ALIAS(SLOW,   P7, NVENC_TWO_PASSES),
+        PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS),
+        PRESET_ALIAS(FAST,   P1, NVENC_ONE_PASS),
+        // Compat aliases
+        PRESET_ALIAS(DEFAULT,             P4, NVENC_DEPRECATED_PRESET),
+        PRESET_ALIAS(HP,                  P1, NVENC_DEPRECATED_PRESET),
+        PRESET_ALIAS(HQ,                  P7, NVENC_DEPRECATED_PRESET),
+        PRESET_ALIAS(BD,                  P5, NVENC_DEPRECATED_PRESET),
+        PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
+        PRESET_ALIAS(LOW_LATENCY_HP,      P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
+        PRESET_ALIAS(LOW_LATENCY_HQ,      P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
+        PRESET_ALIAS(LOSSLESS_DEFAULT,    P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
+        PRESET_ALIAS(LOSSLESS_HP,         P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
+#else
+        PRESET(DEFAULT),
+        PRESET(HP),
+        PRESET(HQ),
+        PRESET(BD),
+        PRESET_ALIAS(SLOW,   HQ,    NVENC_TWO_PASSES),
+        PRESET_ALIAS(MEDIUM, HQ,    NVENC_ONE_PASS),
+        PRESET_ALIAS(FAST,   HP,    NVENC_ONE_PASS),
+        PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
+        PRESET(LOW_LATENCY_HP,      NVENC_LOWLATENCY),
+        PRESET(LOW_LATENCY_HQ,      NVENC_LOWLATENCY),
+        PRESET(LOSSLESS_DEFAULT,    NVENC_LOSSLESS),
+        PRESET(LOSSLESS_HP,         NVENC_LOSSLESS),
+#endif
+    };
+
+    GUIDTuple *t = &presets[ctx->preset];
+
+    ctx->init_encode_params.presetGUID = t->guid;
+    ctx->flags = t->flags;
+
+#ifdef NVENC_HAVE_NEW_PRESETS
+    if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS)
+        ctx->flags |= NVENC_LOSSLESS;
+#endif
+}
+
+#undef PRESET
+#undef PRESET_ALIAS
+
 static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
 {
 #if NVENCAPI_CHECK_VERSION(11, 1)
@@ -358,7 +422,7 @@ static int nvenc_check_capabilities(AVCodecContext *avctx)
     }
 
     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
-    if (ctx->preset >= PRESET_LOSSLESS_DEFAULT && ret <= 0) {
+    if (ctx->flags & NVENC_LOSSLESS && ret <= 0) {
         av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n");
         return AVERROR(ENOSYS);
     }
@@ -548,6 +612,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
         return AVERROR_BUG;
     }
 
+    nvenc_map_preset(ctx);
+
+    if (ctx->flags & NVENC_DEPRECATED_PRESET)
+        av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n");
+
     if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
         AVHWFramesContext   *frames_ctx;
         AVHWDeviceContext   *hwdev_ctx;
@@ -638,65 +707,6 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
     return 0;
 }
 
-typedef struct GUIDTuple {
-    const GUID guid;
-    int flags;
-} GUIDTuple;
-
-#define PRESET_ALIAS(alias, name, ...) \
-    [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
-
-#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
-
-static void nvenc_map_preset(NvencContext *ctx)
-{
-    GUIDTuple presets[] = {
-#ifdef NVENC_HAVE_NEW_PRESETS
-        PRESET(P1),
-        PRESET(P2),
-        PRESET(P3),
-        PRESET(P4),
-        PRESET(P5),
-        PRESET(P6),
-        PRESET(P7),
-        PRESET_ALIAS(SLOW,   P7, NVENC_TWO_PASSES),
-        PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS),
-        PRESET_ALIAS(FAST,   P1, NVENC_ONE_PASS),
-        // Compat aliases
-        PRESET_ALIAS(DEFAULT,             P4, NVENC_DEPRECATED_PRESET),
-        PRESET_ALIAS(HP,                  P1, NVENC_DEPRECATED_PRESET),
-        PRESET_ALIAS(HQ,                  P7, NVENC_DEPRECATED_PRESET),
-        PRESET_ALIAS(BD,                  P5, NVENC_DEPRECATED_PRESET),
-        PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
-        PRESET_ALIAS(LOW_LATENCY_HP,      P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
-        PRESET_ALIAS(LOW_LATENCY_HQ,      P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
-        PRESET_ALIAS(LOSSLESS_DEFAULT,    P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
-        PRESET_ALIAS(LOSSLESS_HP,         P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
-#else
-        PRESET(DEFAULT),
-        PRESET(HP),
-        PRESET(HQ),
-        PRESET(BD),
-        PRESET_ALIAS(SLOW,   HQ,    NVENC_TWO_PASSES),
-        PRESET_ALIAS(MEDIUM, HQ,    NVENC_ONE_PASS),
-        PRESET_ALIAS(FAST,   HP,    NVENC_ONE_PASS),
-        PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
-        PRESET(LOW_LATENCY_HP,      NVENC_LOWLATENCY),
-        PRESET(LOW_LATENCY_HQ,      NVENC_LOWLATENCY),
-        PRESET(LOSSLESS_DEFAULT,    NVENC_LOSSLESS),
-        PRESET(LOSSLESS_HP,         NVENC_LOSSLESS),
-#endif
-    };
-
-    GUIDTuple *t = &presets[ctx->preset];
-
-    ctx->init_encode_params.presetGUID = t->guid;
-    ctx->flags = t->flags;
-}
-
-#undef PRESET
-#undef PRESET_ALIAS
-
 static av_cold void set_constqp(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
@@ -1254,18 +1264,15 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
 
     ctx->init_encode_params.encodeConfig = &ctx->encode_config;
 
-    nvenc_map_preset(ctx);
-
-    if (ctx->flags & NVENC_DEPRECATED_PRESET)
-        av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n");
-
     preset_config.version = NV_ENC_PRESET_CONFIG_VER;
     preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
 
 #ifdef NVENC_HAVE_NEW_PRESETS
     ctx->init_encode_params.tuningInfo = ctx->tuning_info;
 
-    if (ctx->flags & NVENC_LOWLATENCY)
+    if (ctx->flags & NVENC_LOSSLESS)
+        ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS;
+    else if (ctx->flags & NVENC_LOWLATENCY)
         ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
 
     nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder,
@@ -1307,9 +1314,6 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
      * */
     if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead)
         ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth;
-
-    if (ctx->init_encode_params.tuningInfo == NV_ENC_TUNING_INFO_LOSSLESS)
-        ctx->flags |= NVENC_LOSSLESS;
 #endif
 
     if (ctx->weighted_pred == 1)
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index fefc5f7f0ba..314c270e748 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -103,7 +103,7 @@ enum {
     PRESET_LOW_LATENCY_DEFAULT ,
     PRESET_LOW_LATENCY_HQ ,
     PRESET_LOW_LATENCY_HP,
-    PRESET_LOSSLESS_DEFAULT, // lossless presets must be the last ones
+    PRESET_LOSSLESS_DEFAULT,
     PRESET_LOSSLESS_HP,
 #ifdef NVENC_HAVE_NEW_PRESETS
     PRESET_P1,
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 441e7871d2b..82fbb23bf73 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -148,7 +148,7 @@ static const AVOption options[] = {
     { "middle",       "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0,       VE, "b_ref_mode" },
 #endif
     { "a53cc",        "Use A53 Closed Captions (if available)", OFFSET(a53_cc),   AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1,       VE },
-    { "s12m_tc",      "Use timecode (if available)",        OFFSET(s12m_tc),      AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1,       VE },
+    { "s12m_tc",      "Use timecode (if available)",        OFFSET(s12m_tc),      AV_OPT_TYPE_BOOL,  { .i64 = 0 }, 0, 1,       VE },
     { "dpb_size",     "Specifies the DPB size used for encoding (0 means automatic)",
                                                             OFFSET(dpb_size),     AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE },
 #ifdef NVENC_HAVE_MULTIPASS
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index f3b212d5086..6aa3c1b436b 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -1644,7 +1644,7 @@ static int decode_frame_apng(AVCodecContext *avctx,
     if (!(avctx->active_thread_type & FF_THREAD_FRAME)) {
         if (s->dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
             ff_thread_release_buffer(avctx, &s->picture);
-        } else if (s->dispose_op == APNG_DISPOSE_OP_NONE) {
+        } else {
             ff_thread_release_buffer(avctx, &s->last_picture);
             FFSWAP(ThreadFrame, s->picture, s->last_picture);
         }
@@ -1693,8 +1693,8 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
         pdst->hdr_state |= psrc->hdr_state;
     }
 
-    src_frame = psrc->dispose_op == APNG_DISPOSE_OP_NONE ?
-                &psrc->picture : &psrc->last_picture;
+    src_frame = psrc->dispose_op == APNG_DISPOSE_OP_PREVIOUS ?
+                &psrc->last_picture : &psrc->picture;
 
     ff_thread_release_buffer(dst, &pdst->last_picture);
     if (src_frame && src_frame->f->data[0]) {
diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index bd70689caba..9c3a48c2518 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -154,7 +154,7 @@ static int rv10_decode_picture_header(MpegEncContext *s)
     return mb_count;
 }
 
-static int rv20_decode_picture_header(RVDecContext *rv)
+static int rv20_decode_picture_header(RVDecContext *rv, int whole_size)
 {
     MpegEncContext *s = &rv->m;
     int seq, mb_pos, i, ret;
@@ -232,6 +232,10 @@ static int rv20_decode_picture_header(RVDecContext *rv)
                    "attempting to change resolution to %dx%d\n", new_w, new_h);
             if (av_image_check_size(new_w, new_h, 0, s->avctx) < 0)
                 return AVERROR_INVALIDDATA;
+
+            if (whole_size < (new_w + 15)/16 * ((new_h + 15)/16) / 8)
+                return AVERROR_INVALIDDATA;
+
             ff_mpv_common_end(s);
 
             // attempt to keep aspect during typical resolution switches
@@ -447,7 +451,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, const uint8_t *buf,
     if (s->codec_id == AV_CODEC_ID_RV10)
         mb_count = rv10_decode_picture_header(s);
     else
-        mb_count = rv20_decode_picture_header(rv);
+        mb_count = rv20_decode_picture_header(rv, whole_size);
     if (mb_count < 0) {
         if (mb_count != ERROR_SKIP_FRAME)
             av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n");
diff --git a/libavcodec/sbrdsp_fixed.c b/libavcodec/sbrdsp_fixed.c
index 91fa664c087..43fcc90ae57 100644
--- a/libavcodec/sbrdsp_fixed.c
+++ b/libavcodec/sbrdsp_fixed.c
@@ -87,7 +87,7 @@ static void sbr_neg_odd_64_c(int *x)
 {
     int i;
     for (i = 1; i < 64; i += 2)
-        x[i] = -x[i];
+        x[i] = -(unsigned)x[i];
 }
 
 static void sbr_qmf_pre_shuffle_c(int *z)
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index 41a3bef4dec..d705188bfdd 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -186,6 +186,7 @@ typedef struct SnowContext{
     uint8_t *emu_edge_buffer;
 
     AVMotionVector *avmv;
+    unsigned avmv_size;
     int avmv_index;
     uint64_t encoding_error[AV_NUM_DATA_POINTERS];
 
diff --git a/libavcodec/snowdec.c b/libavcodec/snowdec.c
index 68afe0df26d..177c2fa56dc 100644
--- a/libavcodec/snowdec.c
+++ b/libavcodec/snowdec.c
@@ -493,9 +493,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                s->spatial_decomposition_count
               );
 
-    av_assert0(!s->avmv);
     if (s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_MVS) {
-        s->avmv = av_malloc_array(s->b_width * s->b_height, sizeof(AVMotionVector) << (s->block_max_depth*2));
+        size_t size;
+        res = av_size_mult(s->b_width * s->b_height, sizeof(AVMotionVector) << (s->block_max_depth*2), &size);
+        if (res)
+            return res;
+        av_fast_malloc(&s->avmv, &s->avmv_size, size);
+        if (!s->avmv)
+            return AVERROR(ENOMEM);
+    } else {
+        s->avmv_size = 0;
+        av_freep(&s->avmv);
     }
     s->avmv_index = 0;
 
@@ -624,8 +632,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         memcpy(sd->data, s->avmv, s->avmv_index * sizeof(AVMotionVector));
     }
 
-    av_freep(&s->avmv);
-
     if (res < 0)
         return res;
 
@@ -645,6 +651,9 @@ static av_cold int decode_end(AVCodecContext *avctx)
 
     ff_snow_common_end(s);
 
+    s->avmv_size = 0;
+    av_freep(&s->avmv);
+
     return 0;
 }
 
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 4fac0c26e5c..1b5da03245a 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -487,9 +487,10 @@ static av_cold int svq1_encode_end(AVCodecContext *avctx)
     SVQ1EncContext *const s = avctx->priv_data;
     int i;
 
-    av_log(avctx, AV_LOG_DEBUG, "RD: %f\n",
-           s->rd_total / (double)(avctx->width * avctx->height *
-                                  avctx->frame_number));
+    if (avctx->frame_number)
+        av_log(avctx, AV_LOG_DEBUG, "RD: %f\n",
+               s->rd_total / (double)(avctx->width * avctx->height *
+                                      avctx->frame_number));
 
     s->m.mb_type = NULL;
     ff_mpv_common_end(&s->m);
diff --git a/libavcodec/ttadata.c b/libavcodec/ttadata.c
index bf793a4cc85..aa9f418a7d4 100644
--- a/libavcodec/ttadata.c
+++ b/libavcodec/ttadata.c
@@ -30,7 +30,8 @@ const uint32_t ff_tta_shift_1[] = {
     0x01000000, 0x02000000, 0x04000000, 0x08000000,
     0x10000000, 0x20000000, 0x40000000, 0x80000000,
     0x80000000, 0x80000000, 0x80000000, 0x80000000,
-    0x80000000, 0x80000000, 0x80000000, 0x80000000
+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
+    0xFFFFFFFF
 };
 
 const uint32_t * const ff_tta_shift_16 = ff_tta_shift_1 + 4;
diff --git a/libavcodec/ttadsp.c b/libavcodec/ttadsp.c
index 1d1443aee05..99dd66a0c2a 100644
--- a/libavcodec/ttadsp.c
+++ b/libavcodec/ttadsp.c
@@ -47,9 +47,9 @@ static void tta_filter_process_c(int32_t *qmi, int32_t *dx, int32_t *dl,
     *error = *in;
     *in += (round >> shift);
 
-    dl[4] = -dl[5]; dl[5] = -dl[6];
-    dl[6] = *in - dl[7]; dl[7] = *in;
-    dl[5] += dl[6]; dl[4] += dl[5];
+    dl[4] = -(unsigned)dl[5]; dl[5] = -(unsigned)dl[6];
+    dl[6] = *in -(unsigned)dl[7]; dl[7] = *in;
+    dl[5] += (unsigned)dl[6]; dl[4] += (unsigned)dl[5];
 }
 
 av_cold void ff_ttadsp_init(TTADSPContext *c)
diff --git a/libavcodec/ttmlenc.c b/libavcodec/ttmlenc.c
index 3972b4368c4..695651bb945 100644
--- a/libavcodec/ttmlenc.c
+++ b/libavcodec/ttmlenc.c
@@ -206,5 +206,5 @@ AVCodec ff_ttml_encoder = {
     .init           = ttml_encode_init,
     .encode_sub     = ttml_encode_frame,
     .close          = ttml_encode_close,
-    .capabilities   = FF_CODEC_CAP_INIT_CLEANUP,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 896b99dc3f2..825094d2f31 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -268,10 +268,21 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
             h_align = 4;
         }
         if (s->codec_id == AV_CODEC_ID_JV ||
+            s->codec_id == AV_CODEC_ID_ARGO ||
             s->codec_id == AV_CODEC_ID_INTERPLAY_VIDEO) {
             w_align = 8;
             h_align = 8;
         }
+        if (s->codec_id == AV_CODEC_ID_MJPEG   ||
+            s->codec_id == AV_CODEC_ID_MJPEGB  ||
+            s->codec_id == AV_CODEC_ID_LJPEG   ||
+            s->codec_id == AV_CODEC_ID_SMVJPEG ||
+            s->codec_id == AV_CODEC_ID_AMV     ||
+            s->codec_id == AV_CODEC_ID_SP5X    ||
+            s->codec_id == AV_CODEC_ID_JPEGLS) {
+            w_align =   8;
+            h_align = 2*8;
+        }
         break;
     case AV_PIX_FMT_BGR24:
         if ((s->codec_id == AV_CODEC_ID_MSZH) ||
@@ -286,6 +297,12 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
             h_align = 4;
         }
         break;
+    case AV_PIX_FMT_BGR0:
+        if (s->codec_id == AV_CODEC_ID_ARGO) {
+            w_align = 8;
+            h_align = 8;
+        }
+        break;
     default:
         break;
     }
@@ -739,7 +756,7 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba,
             case AV_CODEC_ID_ADPCM_THP:
             case AV_CODEC_ID_ADPCM_THP_LE:
                 if (extradata)
-                    return frame_bytes * 14 / (8 * ch);
+                    return frame_bytes * 14LL / (8 * ch);
                 break;
             case AV_CODEC_ID_ADPCM_XA:
                 return (frame_bytes / 128) * 224 / ch;
@@ -773,21 +790,33 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba,
             if (ba > 0) {
                 /* calc from frame_bytes, channels, and block_align */
                 int blocks = frame_bytes / ba;
+                int64_t tmp = 0;
                 switch (id) {
                 case AV_CODEC_ID_ADPCM_IMA_WAV:
                     if (bps < 2 || bps > 5)
                         return 0;
-                    return blocks * (1 + (ba - 4 * ch) / (bps * ch) * 8);
+                    tmp = blocks * (1LL + (ba - 4 * ch) / (bps * ch) * 8);
+                    break;
                 case AV_CODEC_ID_ADPCM_IMA_DK3:
-                    return blocks * (((ba - 16) * 2 / 3 * 4) / ch);
+                    tmp = blocks * (((ba - 16LL) * 2 / 3 * 4) / ch);
+                    break;
                 case AV_CODEC_ID_ADPCM_IMA_DK4:
-                    return blocks * (1 + (ba - 4 * ch) * 2 / ch);
+                    tmp = blocks * (1 + (ba - 4LL * ch) * 2 / ch);
+                    break;
                 case AV_CODEC_ID_ADPCM_IMA_RAD:
-                    return blocks * ((ba - 4 * ch) * 2 / ch);
+                    tmp = blocks * ((ba - 4LL * ch) * 2 / ch);
+                    break;
                 case AV_CODEC_ID_ADPCM_MS:
-                    return blocks * (2 + (ba - 7 * ch) * 2LL / ch);
+                    tmp = blocks * (2 + (ba - 7LL * ch) * 2LL / ch);
+                    break;
                 case AV_CODEC_ID_ADPCM_MTAF:
-                    return blocks * (ba - 16) * 2 / ch;
+                    tmp = blocks * (ba - 16LL) * 2 / ch;
+                    break;
+                }
+                if (tmp) {
+                    if (tmp != (int)tmp)
+                        return 0;
+                    return tmp;
                 }
             }
 
@@ -825,20 +854,22 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba,
 
 int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
 {
-    return get_audio_frame_duration(avctx->codec_id, avctx->sample_rate,
+    int duration = get_audio_frame_duration(avctx->codec_id, avctx->sample_rate,
                                     avctx->channels, avctx->block_align,
                                     avctx->codec_tag, avctx->bits_per_coded_sample,
                                     avctx->bit_rate, avctx->extradata, avctx->frame_size,
                                     frame_bytes);
+    return FFMAX(0, duration);
 }
 
 int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
 {
-    return get_audio_frame_duration(par->codec_id, par->sample_rate,
+    int duration = get_audio_frame_duration(par->codec_id, par->sample_rate,
                                     par->channels, par->block_align,
                                     par->codec_tag, par->bits_per_coded_sample,
                                     par->bit_rate, par->extradata, par->frame_size,
                                     frame_bytes);
+    return FFMAX(0, duration);
 }
 
 #if !HAVE_THREADS
diff --git a/libavcodec/vaapi_av1.c b/libavcodec/vaapi_av1.c
index 1809b485aa1..16b7e35747c 100644
--- a/libavcodec/vaapi_av1.c
+++ b/libavcodec/vaapi_av1.c
@@ -292,7 +292,7 @@ static int vaapi_av1_decode_slice(AVCodecContext *avctx,
         err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param,
                                                 sizeof(VASliceParameterBufferAV1),
                                                 buffer,
-                                                s->tile_group_info[i].tile_size);
+                                                size);
         if (err) {
             ff_vaapi_decode_cancel(avctx, pic);
             return err;
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index cd9975d8cfd..ab5e3008e0b 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -672,6 +672,8 @@ int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
     if (v->s.pict_type == AV_PICTURE_TYPE_P)
         v->rnd ^= 1;
 
+    if (get_bits_left(gb) < 5)
+        return AVERROR_INVALIDDATA;
     /* Quantizer stuff */
     pqindex = get_bits(gb, 5);
     if (!pqindex)
@@ -764,6 +766,9 @@ int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
         av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
                "Imode: %i, Invert: %i\n", status>>1, status&1);
 
+        if (get_bits_left(gb) < 4)
+            return AVERROR_INVALIDDATA;
+
         /* Hopefully this is correct for P-frames */
         v->s.mv_table_index = get_bits(gb, 2); //but using ff_vc1_ tables
         v->cbptab = get_bits(gb, 2);
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index ea93e115884..d4ceb60791b 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -1124,7 +1124,9 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
             ret = AVERROR_INVALIDDATA;
             goto err;
         }
-        if (!v->field_mode)
+        if (   !v->field_mode
+            && avctx->codec_id != AV_CODEC_ID_WMV3IMAGE
+            && avctx->codec_id != AV_CODEC_ID_VC1IMAGE)
             ff_er_frame_end(&s->er);
     }
 
@@ -1152,12 +1154,14 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) {
             if ((ret = av_frame_ref(pict, s->current_picture_ptr->f)) < 0)
                 goto err;
-            ff_print_debug_info(s, s->current_picture_ptr, pict);
+            if (!v->field_mode)
+                ff_print_debug_info(s, s->current_picture_ptr, pict);
             *got_frame = 1;
         } else if (s->last_picture_ptr) {
             if ((ret = av_frame_ref(pict, s->last_picture_ptr->f)) < 0)
                 goto err;
-            ff_print_debug_info(s, s->last_picture_ptr, pict);
+            if (!v->field_mode)
+                ff_print_debug_info(s, s->last_picture_ptr, pict);
             *got_frame = 1;
         }
     }
diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c
index 7bd2e4c2aba..295cc21dfae 100644
--- a/libavcodec/vc2enc.c
+++ b/libavcodec/vc2enc.c
@@ -982,6 +982,8 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f);
+    if (s->slice_min_bytes < 0)
+        return AVERROR(EINVAL);
 
     ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced);
     if (ret)
diff --git a/libavcodec/videotoolboxenc.c b/libavcodec/videotoolboxenc.c
index 9b7ee6720c4..8a02d3d1a02 100644
--- a/libavcodec/videotoolboxenc.c
+++ b/libavcodec/videotoolboxenc.c
@@ -49,6 +49,10 @@ enum { kCVPixelFormatType_420YpCbCr10BiPlanarFullRange = 'xf20' };
 enum { kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange = 'x420' };
 #endif
 
+#ifndef TARGET_CPU_ARM64
+#   define TARGET_CPU_ARM64 0
+#endif
+
 typedef OSStatus (*getParameterSetAtIndex)(CMFormatDescriptionRef videoDesc,
                                            size_t parameterSetIndex,
                                            const uint8_t **parameterSetPointerOut,
diff --git a/libavcodec/webp.c b/libavcodec/webp.c
index 5a7aebc5872..06a4cc04a5d 100644
--- a/libavcodec/webp.c
+++ b/libavcodec/webp.c
@@ -627,6 +627,9 @@ static int decode_entropy_coded_image(WebPContext *s, enum ImageRole role,
     while (y < img->frame->height) {
         int v;
 
+        if (get_bits_left(&s->gb) < 0)
+            return AVERROR_INVALIDDATA;
+
         hg = get_huffman_group(s, img, x, y);
         v = huff_reader_get_symbol(&hg[HUFF_IDX_GREEN], &s->gb);
         if (v < NUM_LITERAL_CODES) {
diff --git a/libavcodec/wma.c b/libavcodec/wma.c
index cfa5fa33550..a979a112bda 100644
--- a/libavcodec/wma.c
+++ b/libavcodec/wma.c
@@ -457,7 +457,7 @@ int ff_wma_run_level_decode(AVCodecContext *avctx, GetBitContext *gb,
                         if (get_bits1(gb)) {
                             av_log(avctx, AV_LOG_ERROR,
                                    "broken escape sequence\n");
-                            return -1;
+                            return AVERROR_INVALIDDATA;
                         } else
                             offset += get_bits(gb, frame_len_bits) + 4;
                     } else
@@ -475,7 +475,7 @@ int ff_wma_run_level_decode(AVCodecContext *avctx, GetBitContext *gb,
                offset,
                num_coefs
               );
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     return 0;
diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index 9c79556bb5a..8710414936c 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -590,15 +590,18 @@ static int wma_decode_block(WMACodecContext *s)
         if (s->channel_coded[ch]) {
             int tindex;
             WMACoef *ptr = &s->coefs1[ch][0];
+            int ret;
 
             /* special VLC tables are used for ms stereo because
              * there is potentially less energy there */
             tindex = (ch == 1 && s->ms_stereo);
             memset(ptr, 0, s->block_len * sizeof(WMACoef));
-            ff_wma_run_level_decode(s->avctx, &s->gb, &s->coef_vlc[tindex],
-                                    s->level_table[tindex], s->run_table[tindex],
-                                    0, ptr, 0, nb_coefs[ch],
-                                    s->block_len, s->frame_len_bits, coef_nb_bits);
+            ret = ff_wma_run_level_decode(s->avctx, &s->gb, &s->coef_vlc[tindex],
+                                          s->level_table[tindex], s->run_table[tindex],
+                                          0, ptr, 0, nb_coefs[ch],
+                                          s->block_len, s->frame_len_bits, coef_nb_bits);
+            if (ret < 0)
+                return ret;
         }
         if (s->version == 1 && s->avctx->channels >= 2)
             align_get_bits(&s->gb);
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index 8024ce16115..7c3044b0b01 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -985,13 +985,16 @@ static int decode_coeffs(WMAProDecodeCtx *s, int c)
 
     /** decode run level coded coefficients */
     if (cur_coeff < s->subframe_len) {
+        int ret;
+
         memset(&ci->coeffs[cur_coeff], 0,
                sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
-        if (ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
-                                    level, run, 1, ci->coeffs,
-                                    cur_coeff, s->subframe_len,
-                                    s->subframe_len, s->esc_len, 0))
-            return AVERROR_INVALIDDATA;
+        ret = ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
+                                      level, run, 1, ci->coeffs,
+                                      cur_coeff, s->subframe_len,
+                                      s->subframe_len, s->esc_len, 0);
+        if (ret < 0)
+            return ret;
     }
 
     return 0;
diff --git a/libavcodec/xpmdec.c b/libavcodec/xpmdec.c
index 922dfc0f672..993873c5958 100644
--- a/libavcodec/xpmdec.c
+++ b/libavcodec/xpmdec.c
@@ -341,9 +341,6 @@ static int xpm_decode_frame(AVCodecContext *avctx, void *data,
     if ((ret = ff_set_dimensions(avctx, width, height)) < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
-        return ret;
-
     if (cpp <= 0 || cpp >= 5) {
         av_log(avctx, AV_LOG_ERROR, "unsupported/invalid number of chars per pixel: %d\n", cpp);
         return AVERROR_INVALIDDATA;
@@ -360,14 +357,17 @@ static int xpm_decode_frame(AVCodecContext *avctx, void *data,
 
     size *= 4;
 
-    av_fast_padded_malloc(&x->pixels, &x->pixels_size, size);
-    if (!x->pixels)
-        return AVERROR(ENOMEM);
-
     ptr += mod_strcspn(ptr, ",") + 1;
     if (end - ptr < 1)
         return AVERROR_INVALIDDATA;
 
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
+        return ret;
+
+    av_fast_padded_malloc(&x->pixels, &x->pixels_size, size);
+    if (!x->pixels)
+        return AVERROR(ENOMEM);
+
     for (i = 0; i < ncolors; i++) {
         const uint8_t *index;
         int len;
diff --git a/libavfilter/af_drmeter.c b/libavfilter/af_drmeter.c
index ecccb651863..425c25ae87e 100644
--- a/libavfilter/af_drmeter.c
+++ b/libavfilter/af_drmeter.c
@@ -167,6 +167,11 @@ static void print_stats(AVFilterContext *ctx)
         float chdr, secondpeak, rmssum = 0;
         int i, j, first = 0;
 
+        if (!p->nb_samples) {
+            av_log(ctx, AV_LOG_INFO, "No data, dynamic range not meassurable\n");
+            return;
+        }
+
         finish_block(p);
 
         for (i = 0; i <= 10000; i++) {
diff --git a/libavfilter/f_metadata.c b/libavfilter/f_metadata.c
index 598257b15b8..3332d91a3e7 100644
--- a/libavfilter/f_metadata.c
+++ b/libavfilter/f_metadata.c
@@ -304,9 +304,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     AVDictionary **metadata = &frame->metadata;
     AVDictionaryEntry *e;
 
-    if (!*metadata && s->mode != METADATA_ADD)
-        return ff_filter_frame(outlink, frame);
-
     e = av_dict_get(*metadata, !s->key ? "" : s->key, NULL,
                     !s->key ? AV_DICT_IGNORE_SUFFIX: 0);
 
diff --git a/libavfilter/vf_ciescope.c b/libavfilter/vf_ciescope.c
index 719e66ad0f0..2ddec3d06be 100644
--- a/libavfilter/vf_ciescope.c
+++ b/libavfilter/vf_ciescope.c
@@ -849,7 +849,8 @@ rgb_to_xy(double rc,
     *z = m[2][0] * rc + m[2][1] * gc + m[2][2] * bc;
 
     sum = *x + *y + *z;
-
+    if (sum == 0)
+        sum = 1;
     *x = *x / sum;
     *y = *y / sum;
 }
diff --git a/libavfilter/vf_dctdnoiz.c b/libavfilter/vf_dctdnoiz.c
index a89f2631c82..2019a5b456e 100644
--- a/libavfilter/vf_dctdnoiz.c
+++ b/libavfilter/vf_dctdnoiz.c
@@ -564,6 +564,9 @@ static int config_input(AVFilterLink *inlink)
                inlink->h - s->pr_height);
 
     max_slice_h = s->pr_height / ((s->bsize - 1) * 2);
+    if (max_slice_h == 0)
+        return AVERROR(EINVAL);
+
     s->nb_threads = FFMIN3(MAX_THREADS, ff_filter_get_nb_threads(ctx), max_slice_h);
     av_log(ctx, AV_LOG_DEBUG, "threads: [max=%d hmax=%d user=%d] => %d\n",
            MAX_THREADS, max_slice_h, ff_filter_get_nb_threads(ctx), s->nb_threads);
diff --git a/libavfilter/vf_fftdnoiz.c b/libavfilter/vf_fftdnoiz.c
index 856d716be53..eea1887e407 100644
--- a/libavfilter/vf_fftdnoiz.c
+++ b/libavfilter/vf_fftdnoiz.c
@@ -161,7 +161,7 @@ static void export_row8(FFTComplex *src, uint8_t *dst, int rw, float scale, int
     int j;
 
     for (j = 0; j < rw; j++)
-        dst[j] = av_clip_uint8(src[j].re * scale + 0.5f);
+        dst[j] = av_clip_uint8(lrintf(src[j].re * scale));
 }
 
 static void import_row16(FFTComplex *dst, uint8_t *srcp, int rw)
diff --git a/libavfilter/vf_mestimate.c b/libavfilter/vf_mestimate.c
index 7ecfe7da603..9a2865a0cba 100644
--- a/libavfilter/vf_mestimate.c
+++ b/libavfilter/vf_mestimate.c
@@ -100,6 +100,9 @@ static int config_input(AVFilterLink *inlink)
     s->b_height = inlink->h >> s->log2_mb_size;
     s->b_count = s->b_width * s->b_height;
 
+    if (s->b_count == 0)
+        return AVERROR(EINVAL);
+
     for (i = 0; i < 3; i++) {
         s->mv_table[i] = av_mallocz_array(s->b_count, sizeof(*s->mv_table[0]));
         if (!s->mv_table[i])
diff --git a/libavfilter/vf_overlay_cuda.c b/libavfilter/vf_overlay_cuda.c
index f6ee43e9295..34241c8e1b5 100644
--- a/libavfilter/vf_overlay_cuda.c
+++ b/libavfilter/vf_overlay_cuda.c
@@ -63,6 +63,7 @@ typedef struct OverlayCUDAContext {
     enum AVPixelFormat in_format_overlay;
     enum AVPixelFormat in_format_main;
 
+    AVBufferRef *hw_device_ctx;
     AVCUDADeviceContext *hwctx;
 
     CUcontext cu_ctx;
@@ -256,6 +257,9 @@ static av_cold void overlay_cuda_uninit(AVFilterContext *avctx)
         CHECK_CU(cu->cuModuleUnload(ctx->cu_module));
         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
     }
+
+    av_buffer_unref(&ctx->hw_device_ctx);
+    ctx->hwctx = NULL;
 }
 
 /**
@@ -341,13 +345,19 @@ static int overlay_cuda_config_output(AVFilterLink *outlink)
 
     // initialize
 
-    ctx->hwctx = frames_ctx->device_ctx->hwctx;
+    ctx->hw_device_ctx = av_buffer_ref(frames_ctx->device_ref);
+    if (!ctx->hw_device_ctx)
+        return AVERROR(ENOMEM);
+    ctx->hwctx = ((AVHWDeviceContext*)ctx->hw_device_ctx->data)->hwctx;
+
     cuda_ctx = ctx->hwctx->cuda_ctx;
     ctx->fs.time_base = inlink->time_base;
 
     ctx->cu_stream = ctx->hwctx->stream;
 
     outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+    if (!outlink->hw_frames_ctx)
+        return AVERROR(ENOMEM);
 
     // load functions
 
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index 5ad9334d02c..3ca6ba2368d 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -739,6 +739,18 @@ static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
     out->width  = outlink->w;
     out->height = outlink->h;
 
+    // Sanity checks:
+    //   1. If the output is RGB, set the matrix coefficients to RGB.
+    //   2. If the output is not RGB and we've got the RGB/XYZ (identity)
+    //      matrix configured, unset the matrix.
+    //   In theory these should be in swscale itself as the AVFrame
+    //   based API gets in, so that not every swscale API user has
+    //   to go through duplicating such sanity checks.
+    if (av_pix_fmt_desc_get(out->format)->flags & AV_PIX_FMT_FLAG_RGB)
+        out->colorspace = AVCOL_SPC_RGB;
+    else if (out->colorspace == AVCOL_SPC_RGB)
+        out->colorspace = AVCOL_SPC_UNSPECIFIED;
+
     if (scale->output_is_pal)
         avpriv_set_systematic_pal2((uint32_t*)out->data[1], outlink->format == AV_PIX_FMT_PAL8 ? AV_PIX_FMT_BGR8 : outlink->format);
 
diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c
index 502ecfda946..34debc3135a 100644
--- a/libavfilter/vf_scale_npp.c
+++ b/libavfilter/vf_scale_npp.c
@@ -481,13 +481,16 @@ static int nppscale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
         src        = s->stages[i].frame;
         last_stage = i;
     }
-
     if (last_stage < 0)
         return AVERROR_BUG;
+
     ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
     if (ret < 0)
         return ret;
 
+    s->tmp_frame->width  = src->width;
+    s->tmp_frame->height = src->height;
+
     av_frame_move_ref(out, src);
     av_frame_move_ref(src, s->tmp_frame);
 
diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c
index 88d0b350959..0730147e7d8 100644
--- a/libavfilter/vf_vmafmotion.c
+++ b/libavfilter/vf_vmafmotion.c
@@ -238,6 +238,9 @@ int ff_vmafmotion_init(VMAFMotionData *s,
     int i;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
 
+    if (w < 3 || h < 3)
+        return AVERROR(EINVAL);
+
     s->width = w;
     s->height = h;
     s->stride = FFALIGN(w * sizeof(uint16_t), 32);
diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index 43dea67addc..06fd24ecfaf 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -123,20 +123,22 @@ static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
     uint8_t *next2 = parity ? cur  : next;
 
     const int edge = MAX_ALIGN - 1;
+    int offset = FFMAX(w - edge, 3);
 
     /* Only edge pixels need to be processed here.  A constant value of false
      * for is_not_edge should let the compiler ignore the whole branch. */
-    FILTER(0, 3, 0)
+    FILTER(0, FFMIN(3, w), 0)
 
-    dst  = (uint8_t*)dst1  + w - edge;
-    prev = (uint8_t*)prev1 + w - edge;
-    cur  = (uint8_t*)cur1  + w - edge;
-    next = (uint8_t*)next1 + w - edge;
+    dst  = (uint8_t*)dst1  + offset;
+    prev = (uint8_t*)prev1 + offset;
+    cur  = (uint8_t*)cur1  + offset;
+    next = (uint8_t*)next1 + offset;
     prev2 = (uint8_t*)(parity ? prev : cur);
     next2 = (uint8_t*)(parity ? cur  : next);
 
-    FILTER(w - edge, w - 3, 1)
-    FILTER(w - 3, w, 0)
+    FILTER(offset, w - 3, 1)
+    offset = FFMAX(offset, w - 3);
+    FILTER(offset, w, 0)
 }
 
 
@@ -170,21 +172,23 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
     uint16_t *next2 = parity ? cur  : next;
 
     const int edge = MAX_ALIGN / 2 - 1;
+    int offset = FFMAX(w - edge, 3);
 
     mrefs /= 2;
     prefs /= 2;
 
-    FILTER(0, 3, 0)
+    FILTER(0,  FFMIN(3, w), 0)
 
-    dst   = (uint16_t*)dst1  + w - edge;
-    prev  = (uint16_t*)prev1 + w - edge;
-    cur   = (uint16_t*)cur1  + w - edge;
-    next  = (uint16_t*)next1 + w - edge;
+    dst   = (uint16_t*)dst1  + offset;
+    prev  = (uint16_t*)prev1 + offset;
+    cur   = (uint16_t*)cur1  + offset;
+    next  = (uint16_t*)next1 + offset;
     prev2 = (uint16_t*)(parity ? prev : cur);
     next2 = (uint16_t*)(parity ? cur  : next);
 
-    FILTER(w - edge, w - 3, 1)
-    FILTER(w - 3, w, 0)
+    FILTER(offset, w - 3, 1)
+    offset = FFMAX(offset, w - 3);
+    FILTER(offset, w, 0)
 }
 
 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
diff --git a/libavformat/aaxdec.c b/libavformat/aaxdec.c
index c6d2d1c8d11..e69e5615ee4 100644
--- a/libavformat/aaxdec.c
+++ b/libavformat/aaxdec.c
@@ -117,6 +117,7 @@ static int aax_read_header(AVFormatContext *s)
     int64_t column_offset = 0;
     int ret, extradata_size;
     char *codec;
+    int64_t ret64;
 
     avio_skip(pb, 4);
     a->table_size      = avio_rb32(pb) + 8LL;
@@ -218,7 +219,10 @@ static int aax_read_header(AVFormatContext *s)
         }
     }
 
-    avio_seek(pb, a->strings_offset, SEEK_SET);
+    ret = ret64 = avio_seek(pb, a->strings_offset, SEEK_SET);
+    if (ret64 < 0)
+        goto fail;
+
     ret = avio_read(pb, a->string_table, a->strings_size);
     if (ret != a->strings_size) {
         if (ret < 0)
@@ -249,7 +253,10 @@ static int aax_read_header(AVFormatContext *s)
                 goto fail;
             }
 
-            avio_seek(pb, data_offset, SEEK_SET);
+            ret = ret64 = avio_seek(pb, data_offset, SEEK_SET);
+            if (ret64 < 0)
+                goto fail;
+
             if (type == COLUMN_TYPE_VLDATA) {
                 int64_t start, size;
 
@@ -281,8 +288,8 @@ static int aax_read_header(AVFormatContext *s)
     codec = a->string_table + a->name_offset;
     if (!strcmp(codec, "AAX")) {
         par->codec_id = AV_CODEC_ID_ADPCM_ADX;
-        avio_seek(pb, a->segments[0].start, SEEK_SET);
-        if (avio_rb16(pb) != 0x8000) {
+        ret64 = avio_seek(pb, a->segments[0].start, SEEK_SET);
+        if (ret64 < 0 || avio_rb16(pb) != 0x8000) {
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }
diff --git a/libavformat/adtsenc.c b/libavformat/adtsenc.c
index 3595cb3bb23..c35a12a6283 100644
--- a/libavformat/adtsenc.c
+++ b/libavformat/adtsenc.c
@@ -51,9 +51,11 @@ static int adts_decode_extradata(AVFormatContext *s, ADTSContext *adts, const ui
     GetBitContext gb;
     PutBitContext pb;
     MPEG4AudioConfig m4ac;
-    int off;
+    int off, ret;
 
-    init_get_bits(&gb, buf, size * 8);
+    ret = init_get_bits8(&gb, buf, size);
+    if (ret < 0)
+        return ret;
     off = avpriv_mpeg4audio_get_config2(&m4ac, buf, size, 1, s);
     if (off < 0)
         return off;
diff --git a/libavformat/aiffdec.c b/libavformat/aiffdec.c
index dcaf1560b60..8b85fea809c 100644
--- a/libavformat/aiffdec.c
+++ b/libavformat/aiffdec.c
@@ -100,6 +100,9 @@ static int get_aiff_header(AVFormatContext *s, int size,
     int sample_rate;
     unsigned int num_frames;
 
+    if (size == INT_MAX)
+        return AVERROR_INVALIDDATA;
+
     if (size & 1)
         size++;
     par->codec_type = AVMEDIA_TYPE_AUDIO;
diff --git a/libavformat/asfdec_f.c b/libavformat/asfdec_f.c
index f784e629963..c0265af20d2 100644
--- a/libavformat/asfdec_f.c
+++ b/libavformat/asfdec_f.c
@@ -522,7 +522,7 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size)
         tag1                             = avio_rl32(pb);
         avio_skip(pb, 20);
         if (sizeX > 40) {
-            if (size < sizeX - 40)
+            if (size < sizeX - 40 || sizeX - 40 > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
                 return AVERROR_INVALIDDATA;
             st->codecpar->extradata_size = ffio_limit(pb, sizeX - 40);
             st->codecpar->extradata      = av_mallocz(st->codecpar->extradata_size +
diff --git a/libavformat/asfdec_o.c b/libavformat/asfdec_o.c
index 7c085651801..f98ffc76faa 100644
--- a/libavformat/asfdec_o.c
+++ b/libavformat/asfdec_o.c
@@ -685,7 +685,7 @@ static int asf_read_properties(AVFormatContext *s, const GUIDParseTable *g)
     return 0;
 }
 
-static int parse_video_info(AVIOContext *pb, AVStream *st)
+static int parse_video_info(AVFormatContext *avfmt, AVIOContext *pb, AVStream *st)
 {
     uint16_t size_asf; // ASF-specific Format Data size
     uint32_t size_bmp; // BMP_HEADER-specific Format Data size
@@ -700,19 +700,10 @@ static int parse_video_info(AVIOContext *pb, AVStream *st)
     st->codecpar->codec_id  = ff_codec_get_id(ff_codec_bmp_tags, tag);
     size_bmp = FFMAX(size_asf, size_bmp);
 
-    if (size_bmp > BMP_HEADER_SIZE &&
-        size_bmp < INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
-        int ret;
-        st->codecpar->extradata_size  = size_bmp - BMP_HEADER_SIZE;
-        if (!(st->codecpar->extradata = av_malloc(st->codecpar->extradata_size +
-                                               AV_INPUT_BUFFER_PADDING_SIZE))) {
-            st->codecpar->extradata_size = 0;
-            return AVERROR(ENOMEM);
-        }
-        memset(st->codecpar->extradata + st->codecpar->extradata_size , 0,
-               AV_INPUT_BUFFER_PADDING_SIZE);
-        if ((ret = avio_read(pb, st->codecpar->extradata,
-                             st->codecpar->extradata_size)) < 0)
+    if (size_bmp > BMP_HEADER_SIZE) {
+        int ret = ff_get_extradata(avfmt, st->codecpar, pb, size_bmp - BMP_HEADER_SIZE);
+
+        if (ret < 0)
             return ret;
     }
     return 0;
@@ -795,7 +786,7 @@ static int asf_read_stream_properties(AVFormatContext *s, const GUIDParseTable *
         break;
     case AVMEDIA_TYPE_VIDEO:
         asf_st->type = AVMEDIA_TYPE_VIDEO;
-        if ((ret = parse_video_info(pb, st)) < 0)
+        if ((ret = parse_video_info(s, pb, st)) < 0)
             return ret;
         break;
     default:
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 59929afd498..542161e3600 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -165,7 +165,7 @@ static int get_riff(AVFormatContext *s, AVIOContext *pb)
     return 0;
 }
 
-static int read_odml_index(AVFormatContext *s, int frame_num)
+static int read_odml_index(AVFormatContext *s, int64_t frame_num)
 {
     AVIContext *avi     = s->priv_data;
     AVIOContext *pb     = s->pb;
@@ -185,7 +185,7 @@ static int read_odml_index(AVFormatContext *s, int frame_num)
 
     av_log(s, AV_LOG_TRACE,
             "longs_per_entry:%d index_type:%d entries_in_use:%d "
-            "chunk_id:%X base:%16"PRIX64" frame_num:%d\n",
+            "chunk_id:%X base:%16"PRIX64" frame_num:%"PRId64"\n",
             longs_per_entry,
             index_type,
             entries_in_use,
@@ -245,7 +245,7 @@ static int read_odml_index(AVFormatContext *s, int frame_num)
             avio_rl32(pb);       /* size */
             duration = avio_rl32(pb);
 
-            if (avio_feof(pb))
+            if (avio_feof(pb) || offset > INT64_MAX - 8)
                 return AVERROR_INVALIDDATA;
 
             pos = avio_tell(pb);
@@ -1783,7 +1783,10 @@ static int avi_load_index(AVFormatContext *s)
         size = avio_rl32(pb);
         if (avio_feof(pb))
             break;
-        next = avio_tell(pb) + size + (size & 1);
+        next = avio_tell(pb);
+        if (next < 0 || next > INT64_MAX - size - (size & 1))
+            break;
+        next += size + (size & 1LL);
 
         if (tag == MKTAG('i', 'd', 'x', '1') &&
             avi_read_idx1(s, size) >= 0) {
diff --git a/libavformat/avio.c b/libavformat/avio.c
index 8011482e769..cd9b5d9e7f9 100644
--- a/libavformat/avio.c
+++ b/libavformat/avio.c
@@ -316,8 +316,11 @@ int ffurl_open_whitelist(URLContext **puc, const char *filename, int flags,
     int ret = ffurl_alloc(puc, filename, flags, int_cb);
     if (ret < 0)
         return ret;
-    if (parent)
-        av_opt_copy(*puc, parent);
+    if (parent) {
+        ret = av_opt_copy(*puc, parent);
+        if (ret < 0)
+            goto fail;
+    }
     if (options &&
         (ret = av_opt_set_dict(*puc, options)) < 0)
         goto fail;
diff --git a/libavformat/bfi.c b/libavformat/bfi.c
index 2dab986f3af..f9e0bb2e30d 100644
--- a/libavformat/bfi.c
+++ b/libavformat/bfi.c
@@ -73,6 +73,8 @@ static int bfi_read_header(AVFormatContext * s)
         return AVERROR_INVALIDDATA;
 
     bfi->nframes           = avio_rl32(pb);
+    if (bfi->nframes < 0)
+        return AVERROR_INVALIDDATA;
     avio_rl32(pb);
     avio_rl32(pb);
     avio_rl32(pb);
diff --git a/libavformat/cafdec.c b/libavformat/cafdec.c
index 76bc4a46552..7f09a279779 100644
--- a/libavformat/cafdec.c
+++ b/libavformat/cafdec.c
@@ -79,7 +79,7 @@ static int read_desc_chunk(AVFormatContext *s)
     st->codecpar->channels    = avio_rb32(pb);
     st->codecpar->bits_per_coded_sample = avio_rb32(pb);
 
-    if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0)
+    if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0 || st->codecpar->channels < 0)
         return AVERROR_INVALIDDATA;
 
     /* calculate bit rate for constant size packets */
diff --git a/libavformat/dsfdec.c b/libavformat/dsfdec.c
index 1df163e114c..71dbf2f1125 100644
--- a/libavformat/dsfdec.c
+++ b/libavformat/dsfdec.c
@@ -129,7 +129,7 @@ static int dsf_read_header(AVFormatContext *s)
         return AVERROR_INVALIDDATA;
     }
     st->codecpar->block_align *= st->codecpar->channels;
-    st->codecpar->bit_rate = st->codecpar->channels * st->codecpar->sample_rate * 8LL;
+    st->codecpar->bit_rate = st->codecpar->channels * 8LL * st->codecpar->sample_rate;
     avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
     avio_skip(pb, 4);
 
diff --git a/libavformat/dsicin.c b/libavformat/dsicin.c
index b18f43b9a0e..5a1f2565952 100644
--- a/libavformat/dsicin.c
+++ b/libavformat/dsicin.c
@@ -166,7 +166,8 @@ static int cin_read_packet(AVFormatContext *s, AVPacket *pkt)
     CinDemuxContext *cin = s->priv_data;
     AVIOContext *pb = s->pb;
     CinFrameHeader *hdr = &cin->frame_header;
-    int rc, palette_type, pkt_size;
+    int rc, palette_type;
+    int64_t pkt_size;
     int ret;
 
     if (cin->audio_buffer_size == 0) {
@@ -182,7 +183,9 @@ static int cin_read_packet(AVFormatContext *s, AVPacket *pkt)
         }
 
         /* palette and video packet */
-        pkt_size = (palette_type + 3) * hdr->pal_colors_count + hdr->video_frame_size;
+        pkt_size = (palette_type + 3LL) * hdr->pal_colors_count + hdr->video_frame_size;
+        if (pkt_size + 4 > INT_MAX)
+            return AVERROR_INVALIDDATA;
 
         pkt_size = ffio_limit(pb, pkt_size);
 
diff --git a/libavformat/dxa.c b/libavformat/dxa.c
index 909c5ba2baf..cd9c4898515 100644
--- a/libavformat/dxa.c
+++ b/libavformat/dxa.c
@@ -79,7 +79,7 @@ static int dxa_read_header(AVFormatContext *s)
     if(fps > 0){
         den = 1000;
         num = fps;
-    }else if (fps < 0){
+    }else if (fps < 0 && fps > INT_MIN){
         den = 100000;
         num = -fps;
     }else{
diff --git a/libavformat/fifo.c b/libavformat/fifo.c
index 17748e94ce5..a1dca1bc160 100644
--- a/libavformat/fifo.c
+++ b/libavformat/fifo.c
@@ -593,7 +593,7 @@ static int fifo_write_packet(AVFormatContext *avf, AVPacket *pkt)
         goto fail;
     }
 
-    if (fifo->timeshift && pkt->dts != AV_NOPTS_VALUE)
+    if (fifo->timeshift && pkt && pkt->dts != AV_NOPTS_VALUE)
         atomic_fetch_add_explicit(&fifo->queue_duration, next_duration(avf, pkt, &fifo->last_sent_dts), memory_order_relaxed);
 
     return ret;
diff --git a/libavformat/ftp.c b/libavformat/ftp.c
index caeea429209..69caa7670ce 100644
--- a/libavformat/ftp.c
+++ b/libavformat/ftp.c
@@ -972,6 +972,8 @@ static int ftp_parse_entry_mlsd(char *mlsd, AVIODirEntry *next)
             continue;
         }
         fact = av_strtok(fact, "=", &value);
+        if (!fact)
+            continue;
         if (!av_strcasecmp(fact, "type")) {
             if (!av_strcasecmp(value, "cdir") || !av_strcasecmp(value, "pdir"))
                 return 1;
diff --git a/libavformat/hlsenc.c b/libavformat/hlsenc.c
index 7d97ce17891..e222b70ffa1 100644
--- a/libavformat/hlsenc.c
+++ b/libavformat/hlsenc.c
@@ -2672,14 +2672,13 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     vs->packets_written++;
     if (oc->pb) {
-        int64_t keyframe_pre_pos = avio_tell(oc->pb);
         ret = ff_write_chained(oc, stream_index, pkt, s, 0);
-        if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
-            (pkt->flags & AV_PKT_FLAG_KEY) && !keyframe_pre_pos) {
-            av_write_frame(oc, NULL); /* Flush any buffered data */
-            vs->video_keyframe_size = avio_tell(oc->pb) - keyframe_pre_pos;
+        vs->video_keyframe_size += pkt->size;
+        if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) && (pkt->flags & AV_PKT_FLAG_KEY)) {
+            vs->video_keyframe_size = avio_tell(oc->pb);
+        } else {
+            vs->video_keyframe_pos = avio_tell(vs->out);
         }
-        vs->video_keyframe_pos = vs->start_pos;
         if (hls->ignore_io_errors)
             ret = 0;
     }
diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index f33b7ba93a5..1377cef4b89 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -816,7 +816,7 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
     int isv34, unsync;
     unsigned tlen;
     char tag[5];
-    int64_t next, end = avio_tell(pb) + len;
+    int64_t next, end = avio_tell(pb);
     int taghdrlen;
     const char *reason = NULL;
     AVIOContext pb_local;
@@ -828,6 +828,10 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
     av_unused int uncompressed_buffer_size = 0;
     const char *comm_frame;
 
+    if (end > INT64_MAX - len - 10)
+        return;
+    end += len;
+
     av_log(s, AV_LOG_DEBUG, "id3v2 ver:%d flags:%02X len:%d\n", version, flags, len);
 
     switch (version) {
diff --git a/libavformat/iff.c b/libavformat/iff.c
index b07b6c8b187..c15302d3c56 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -385,7 +385,7 @@ static int read_dst_frame(AVFormatContext *s, AVPacket *pkt)
                 avio_skip(pb, 1);
             pkt->flags |= AV_PKT_FLAG_KEY;
             pkt->stream_index = 0;
-            pkt->duration = 588 * s->streams[0]->codecpar->sample_rate / 44100;
+            pkt->duration = 588LL * s->streams[0]->codecpar->sample_rate / 44100;
             pkt->pos = chunk_pos;
 
             chunk_pos = avio_tell(pb);
diff --git a/libavformat/jacosubdec.c b/libavformat/jacosubdec.c
index 9c6640eef83..2ccbf4c9dec 100644
--- a/libavformat/jacosubdec.c
+++ b/libavformat/jacosubdec.c
@@ -141,6 +141,9 @@ static int get_shift(int timeres, const char *buf)
     int n = sscanf(buf, "%d"SSEP"%d"SSEP"%d"SSEP"%d", &a, &b, &c, &d);
 #undef SSEP
 
+    if (a == INT_MIN)
+        return 0;
+
     if (*buf == '-' || a < 0) {
         sign = -1;
         a = FFABS(a);
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 65756ae06d4..fb1849f9c31 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -802,20 +802,22 @@ static int matroska_read_close(AVFormatContext *s);
 static int matroska_reset_status(MatroskaDemuxContext *matroska,
                                  uint32_t id, int64_t position)
 {
+    int64_t err = 0;
     if (position >= 0) {
-        int64_t err = avio_seek(matroska->ctx->pb, position, SEEK_SET);
-        if (err < 0)
-            return err;
-    }
+        err = avio_seek(matroska->ctx->pb, position, SEEK_SET);
+        if (err > 0)
+            err = 0;
+    } else
+        position = avio_tell(matroska->ctx->pb);
 
     matroska->current_id    = id;
     matroska->num_levels    = 1;
     matroska->unknown_count = 0;
-    matroska->resync_pos = avio_tell(matroska->ctx->pb);
+    matroska->resync_pos    = position;
     if (id)
         matroska->resync_pos -= (av_log2(id) + 7) / 8;
 
-    return 0;
+    return err;
 }
 
 static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
@@ -1871,6 +1873,7 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
     uint32_t saved_id  = matroska->current_id;
     int64_t before_pos = avio_tell(matroska->ctx->pb);
     int ret = 0;
+    int ret2;
 
     /* seek */
     if (avio_seek(matroska->ctx->pb, pos, SEEK_SET) == pos) {
@@ -1895,7 +1898,9 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
     }
     /* Seek back - notice that in all instances where this is used
      * it is safe to set the level to 1. */
-    matroska_reset_status(matroska, saved_id, before_pos);
+    ret2 = matroska_reset_status(matroska, saved_id, before_pos);
+    if (ret >= 0)
+        ret = ret2;
 
     return ret;
 }
@@ -2807,8 +2812,9 @@ static int matroska_parse_tracks(AVFormatContext *s)
                 st->need_parsing = AVSTREAM_PARSE_HEADERS;
 
             if (track->default_duration) {
+                int div = track->default_duration <= INT64_MAX ? 1 : 2;
                 av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den,
-                          1000000000, track->default_duration, 30000);
+                          1000000000 / div, track->default_duration / div, 30000);
 #if FF_API_R_FRAME_RATE
                 if (   st->avg_frame_rate.num < st->avg_frame_rate.den * 1000LL
                     && st->avg_frame_rate.num > st->avg_frame_rate.den * 5LL)
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index bbf231f2a46..b4284a87785 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -1768,6 +1768,7 @@ static int mkv_write_attachments(AVFormatContext *s)
             put_ebml_string(dyn_cp, MATROSKA_ID_FILEDESC, t->value);
         if (!(t = av_dict_get(st->metadata, "filename", NULL, 0))) {
             av_log(s, AV_LOG_ERROR, "Attachment stream %d has no filename tag.\n", i);
+            ffio_free_dyn_buf(&dyn_cp);
             return AVERROR(EINVAL);
         }
         put_ebml_string(dyn_cp, MATROSKA_ID_FILENAME, t->value);
diff --git a/libavformat/mccdec.c b/libavformat/mccdec.c
index 2a0b7905a01..627471a1fe3 100644
--- a/libavformat/mccdec.c
+++ b/libavformat/mccdec.c
@@ -127,8 +127,7 @@ static int mcc_read_header(AVFormatContext *s)
                 num = strtol(rate_str, &df, 10);
                 den = 1;
                 if (df && !av_strncasecmp(df, "DF", 2)) {
-                    num *= 1000;
-                    den  = 1001;
+                    av_reduce(&num, &den, num * 1000LL, 1001, INT_MAX);
                 }
             }
 
diff --git a/libavformat/moflex.c b/libavformat/moflex.c
index 41335ada789..0706f88e641 100644
--- a/libavformat/moflex.c
+++ b/libavformat/moflex.c
@@ -172,7 +172,6 @@ static int moflex_read_sync(AVFormatContext *s)
         unsigned type, ssize, codec_id = 0;
         unsigned codec_type, width = 0, height = 0, sample_rate = 0, channels = 0;
         int stream_index = -1;
-        int format;
         AVRational fps;
 
         read_var_byte(s, &type);
@@ -213,7 +212,6 @@ static int moflex_read_sync(AVFormatContext *s)
             fps.den = avio_rb16(pb);
             width = avio_rb16(pb);
             height = avio_rb16(pb);
-            format = AV_PIX_FMT_YUV420P;
             avio_skip(pb, type == 3 ? 3 : 2);
             break;
         case 4:
@@ -235,7 +233,6 @@ static int moflex_read_sync(AVFormatContext *s)
             st->codecpar->height     = height;
             st->codecpar->sample_rate= sample_rate;
             st->codecpar->channels   = channels;
-            st->codecpar->format     = format;
             st->priv_data            = av_packet_alloc();
             if (!st->priv_data)
                 return AVERROR(ENOMEM);
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 38a70589be2..4af796ee31c 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -294,6 +294,8 @@ static int mov_metadata_hmmt(MOVContext *c, AVIOContext *pb, unsigned len)
         int moment_time = avio_rb32(pb);
         avpriv_new_chapter(c->fc, i, av_make_q(1, 1000), moment_time, AV_NOPTS_VALUE, NULL);
     }
+    if (avio_feof(pb))
+        return AVERROR_INVALIDDATA;
     return 0;
 }
 
@@ -3835,7 +3837,11 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
         if ((empty_duration || start_time) && mov->time_scale > 0) {
             if (empty_duration)
                 empty_duration = av_rescale(empty_duration, sc->time_scale, mov->time_scale);
-            sc->time_offset = start_time - empty_duration;
+
+            if (av_sat_sub64(start_time, empty_duration) != start_time - (uint64_t)empty_duration)
+                av_log(mov->fc, AV_LOG_WARNING, "start_time - empty_duration is not representable\n");
+
+            sc->time_offset = start_time -  (uint64_t)empty_duration;
             sc->min_corrected_pts = start_time;
             if (!mov->advanced_editlist)
                 current_dts = -sc->time_offset;
@@ -4700,6 +4706,8 @@ static int mov_read_chap(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     for (i = 0; i < num && !pb->eof_reached; i++)
         c->chapter_tracks[i] = avio_rb32(pb);
 
+    c->nb_chapter_tracks = i;
+
     return 0;
 }
 
@@ -4984,6 +4992,8 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 "size %u, distance %d, keyframe %d\n", st->index,
                 index_entry_pos, offset, dts, sample_size, distance, keyframe);
         distance++;
+        if (av_sat_add64(dts, sample_duration) != dts + (uint64_t)sample_duration)
+            return AVERROR_INVALIDDATA;
         dts += sample_duration;
         offset += sample_size;
         sc->data_size += sample_size;
@@ -5124,7 +5134,9 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         if (frag_stream_info)
             frag_stream_info->sidx_pts = timestamp;
 
-        if (av_sat_add64(offset, size) != offset + size)
+        if (av_sat_add64(offset, size) != offset + (uint64_t)size ||
+            av_sat_add64(pts, duration) != pts + (uint64_t)duration
+        )
             return AVERROR_INVALIDDATA;
         offset += size;
         pts += duration;
@@ -5136,7 +5148,7 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     // See if the remaining bytes are just an mfra which we can ignore.
     is_complete = offset == stream_size;
-    if (!is_complete && (pb->seekable & AVIO_SEEKABLE_NORMAL)) {
+    if (!is_complete && (pb->seekable & AVIO_SEEKABLE_NORMAL) && stream_size > 0 ) {
         int64_t ret;
         int64_t original_pos = avio_tell(pb);
         if (!c->have_read_mfra_size) {
@@ -5147,7 +5159,7 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             if ((ret = avio_seek(pb, original_pos, SEEK_SET)) < 0)
                 return ret;
         }
-        if (offset + c->mfra_size == stream_size)
+        if (offset == stream_size - c->mfra_size)
             is_complete = 1;
     }
 
@@ -5464,7 +5476,7 @@ static int mov_read_mdcv(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
 
-    if (atom.size < 24) {
+    if (atom.size < 24 || sc->mastering) {
         av_log(c->fc, AV_LOG_ERROR, "Invalid Mastering Display Color Volume box\n");
         return AVERROR_INVALIDDATA;
     }
@@ -5512,6 +5524,11 @@ static int mov_read_coll(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
     avio_skip(pb, 3); /* flags */
 
+    if (sc->coll){
+        av_log(c->fc, AV_LOG_WARNING, "Ignoring duplicate COLL\n");
+        return 0;
+    }
+
     sc->coll = av_content_light_metadata_alloc(&sc->coll_size);
     if (!sc->coll)
         return AVERROR(ENOMEM);
@@ -5536,6 +5553,11 @@ static int mov_read_clli(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR_INVALIDDATA;
     }
 
+    if (sc->coll){
+        av_log(c->fc, AV_LOG_WARNING, "Ignoring duplicate CLLI/COLL\n");
+        return 0;
+    }
+
     sc->coll = av_content_light_metadata_alloc(&sc->coll_size);
     if (!sc->coll)
         return AVERROR(ENOMEM);
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index bade57dcea9..2cd5773dc5e 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -797,6 +797,7 @@ static int mov_write_dfla_tag(AVIOContext *pb, MOVTrack *track)
 static int mov_write_dops_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
 {
     int64_t pos = avio_tell(pb);
+    int channels, channel_map;
     avio_wb32(pb, 0);
     ffio_wfourcc(pb, "dOps");
     avio_w8(pb, 0); /* Version */
@@ -807,12 +808,22 @@ static int mov_write_dops_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
     /* extradata contains an Ogg OpusHead, other than byte-ordering and
        OpusHead's preceeding magic/version, OpusSpecificBox is currently
        identical. */
-    avio_w8(pb, AV_RB8(track->par->extradata + 9)); /* OuputChannelCount */
+    channels = AV_RB8(track->par->extradata + 9);
+    channel_map = AV_RB8(track->par->extradata + 18);
+
+    avio_w8(pb, channels); /* OuputChannelCount */
     avio_wb16(pb, AV_RL16(track->par->extradata + 10)); /* PreSkip */
     avio_wb32(pb, AV_RL32(track->par->extradata + 12)); /* InputSampleRate */
     avio_wb16(pb, AV_RL16(track->par->extradata + 16)); /* OutputGain */
+    avio_w8(pb, channel_map); /* ChannelMappingFamily */
     /* Write the rest of the header out without byte-swapping. */
-    avio_write(pb, track->par->extradata + 18, track->par->extradata_size - 18);
+    if (channel_map) {
+        if (track->par->extradata_size < 21 + channels) {
+            av_log(s, AV_LOG_ERROR, "invalid extradata size\n");
+            return AVERROR_INVALIDDATA;
+        }
+        avio_write(pb, track->par->extradata + 19, 2 + channels); /* ChannelMappingTable */
+    }
 
     return update_size(pb, pos);
 }
@@ -2166,11 +2177,13 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         avio_wb16(pb, 0x18); /* Reserved */
 
     if (track->mode == MODE_MOV && track->par->format == AV_PIX_FMT_PAL8) {
-        int pal_size = 1 << track->par->bits_per_coded_sample;
-        int i;
+        int pal_size, i;
         avio_wb16(pb, 0);             /* Color table ID */
         avio_wb32(pb, 0);             /* Color table seed */
         avio_wb16(pb, 0x8000);        /* Color table flags */
+        if (track->par->bits_per_coded_sample < 0 || track->par->bits_per_coded_sample > 8)
+            return AVERROR(EINVAL);
+        pal_size = 1 << track->par->bits_per_coded_sample;
         avio_wb16(pb, pal_size - 1);  /* Color table size (zero-relative) */
         for (i = 0; i < pal_size; i++) {
             uint32_t rgb = track->palette[i];
diff --git a/libavformat/mpc8.c b/libavformat/mpc8.c
index 88c55e3d22f..c3d7e115a75 100644
--- a/libavformat/mpc8.c
+++ b/libavformat/mpc8.c
@@ -177,7 +177,13 @@ static void mpc8_parse_seektable(AVFormatContext *s, int64_t off)
     }
     seekd = get_bits(&gb, 4);
     for(i = 0; i < 2; i++){
-        pos = gb_get_v(&gb) + c->header_pos;
+        pos = gb_get_v(&gb);
+        if (av_sat_add64(pos, c->header_pos) != pos + (uint64_t)c->header_pos) {
+            av_free(buf);
+            return;
+        }
+
+        pos += c->header_pos;
         ppos[1 - i] = pos;
         av_add_index_entry(s->streams[0], pos, i, 0, 0, AVINDEX_KEYFRAME);
     }
@@ -205,8 +211,11 @@ static void mpc8_handle_chunk(AVFormatContext *s, int tag, int64_t chunk_pos, in
 
     switch(tag){
     case TAG_SEEKTBLOFF:
-        pos = avio_tell(pb) + size;
+        pos = avio_tell(pb);
         off = ffio_read_varlen(pb);
+        if (pos > INT64_MAX - size || off < 0 || off > INT64_MAX - chunk_pos)
+            return;
+        pos += size;
         mpc8_parse_seektable(s, chunk_pos + off);
         avio_seek(pb, pos, SEEK_SET);
         break;
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 6e0d9d74963..a3033134f71 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -2026,6 +2026,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
                     return AVERROR_INVALIDDATA;
                 if (channel_config_code <= 0x8) {
                     st->codecpar->extradata[9]  = channels = channel_config_code ? channel_config_code : 2;
+                    AV_WL32(&st->codecpar->extradata[12], 48000);
                     st->codecpar->extradata[18] = channel_config_code ? (channels > 2) : /* Dual Mono */ 255;
                     st->codecpar->extradata[19] = opus_stream_cnt[channel_config_code];
                     st->codecpar->extradata[20] = opus_coupled_stream_cnt[channel_config_code];
@@ -2861,8 +2862,8 @@ static int mpegts_resync(AVFormatContext *s, int seekback, const uint8_t *curren
     int64_t back = FFMIN(seekback, pos);
 
     //Special case for files like 01c56b0dc1.ts
-    if (current_packet[0] == 0x80 && current_packet[12] == 0x47) {
-        avio_seek(pb, 12 - back, SEEK_CUR);
+    if (current_packet[0] == 0x80 && current_packet[12] == 0x47 && pos >= TS_PACKET_SIZE) {
+        avio_seek(pb, 12 - TS_PACKET_SIZE, SEEK_CUR);
         return 0;
     }
 
diff --git a/libavformat/msf.c b/libavformat/msf.c
index 155f488e440..1eaed54357c 100644
--- a/libavformat/msf.c
+++ b/libavformat/msf.c
@@ -70,6 +70,8 @@ static int msf_read_header(AVFormatContext *s)
     case 4:
     case 5:
     case 6: st->codecpar->block_align = (codec == 4 ? 96 : codec == 5 ? 152 : 192) * st->codecpar->channels;
+            if (st->codecpar->channels > UINT16_MAX / 2048)
+                return AVERROR_INVALIDDATA;
             ret = ff_alloc_extradata(st->codecpar, 14);
             if (ret < 0)
                 return ret;
diff --git a/libavformat/mvdec.c b/libavformat/mvdec.c
index 045c66ac3c1..ab7bc5a328a 100644
--- a/libavformat/mvdec.c
+++ b/libavformat/mvdec.c
@@ -156,7 +156,10 @@ static int parse_audio_var(AVFormatContext *avctx, AVStream *st,
     } else if (!strcmp(name, "NUM_CHANNELS")) {
         return set_channels(avctx, st, var_read_int(pb, size));
     } else if (!strcmp(name, "SAMPLE_RATE")) {
-        st->codecpar->sample_rate = var_read_int(pb, size);
+        int sample_rate = var_read_int(pb, size);
+        if (sample_rate <= 0)
+            return AVERROR_INVALIDDATA;
+        st->codecpar->sample_rate = sample_rate;
         avpriv_set_pts_info(st, 33, 1, st->codecpar->sample_rate);
     } else if (!strcmp(name, "SAMPLE_WIDTH")) {
         uint64_t bpc = var_read_int(pb, size) * (uint64_t)8;
diff --git a/libavformat/mvi.c b/libavformat/mvi.c
index cfdbe5d273b..d005001f5a4 100644
--- a/libavformat/mvi.c
+++ b/libavformat/mvi.c
@@ -32,7 +32,6 @@
 
 typedef struct MviDemuxContext {
     unsigned int (*get_int)(AVIOContext *);
-    uint32_t audio_data_size;
     uint64_t audio_size_counter;
     uint64_t audio_frame_size;
     int audio_size_left;
@@ -46,6 +45,7 @@ static int read_header(AVFormatContext *s)
     AVStream *ast, *vst;
     unsigned int version, frames_count, msecs_per_frame, player_version;
     int ret;
+    int audio_data_size;
 
     ast = avformat_new_stream(s, NULL);
     if (!ast)
@@ -67,13 +67,13 @@ static int read_header(AVFormatContext *s)
     vst->codecpar->height       = avio_rl16(pb);
     avio_r8(pb);
     ast->codecpar->sample_rate  = avio_rl16(pb);
-    mvi->audio_data_size     = avio_rl32(pb);
+    audio_data_size             = avio_rl32(pb);
     avio_r8(pb);
     player_version           = avio_rl32(pb);
     avio_rl16(pb);
     avio_r8(pb);
 
-    if (frames_count == 0 || mvi->audio_data_size == 0)
+    if (frames_count == 0 || audio_data_size <= 0)
         return AVERROR_INVALIDDATA;
 
     if (version != 7 || player_version > 213) {
@@ -96,16 +96,16 @@ static int read_header(AVFormatContext *s)
 
     mvi->get_int = (vst->codecpar->width * (int64_t)vst->codecpar->height < (1 << 16)) ? avio_rl16 : avio_rl24;
 
-    mvi->audio_frame_size   = ((uint64_t)mvi->audio_data_size << MVI_FRAC_BITS) / frames_count;
+    mvi->audio_frame_size   = ((uint64_t)audio_data_size << MVI_FRAC_BITS) / frames_count;
     if (mvi->audio_frame_size <= 1 << MVI_FRAC_BITS - 1) {
         av_log(s, AV_LOG_ERROR,
-               "Invalid audio_data_size (%"PRIu32") or frames_count (%u)\n",
-               mvi->audio_data_size, frames_count);
+               "Invalid audio_data_size (%d) or frames_count (%u)\n",
+               audio_data_size, frames_count);
         return AVERROR_INVALIDDATA;
     }
 
     mvi->audio_size_counter = (ast->codecpar->sample_rate * 830 / mvi->audio_frame_size - 1) * mvi->audio_frame_size;
-    mvi->audio_size_left    = mvi->audio_data_size;
+    mvi->audio_size_left    = audio_data_size;
 
     return 0;
 }
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 1f372affcb8..50174fcd5fe 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -565,6 +565,10 @@ static int mxf_get_d10_aes3_packet(AVIOContext *pb, AVStream *st, AVPacket *pkt,
     data_ptr = pkt->data;
     end_ptr = pkt->data + length;
     buf_ptr = pkt->data + 4; /* skip SMPTE 331M header */
+
+    if (st->codecpar->channels > 8)
+        return AVERROR_INVALIDDATA;
+
     for (; end_ptr - buf_ptr >= st->codecpar->channels * 4; ) {
         for (i = 0; i < st->codecpar->channels; i++) {
             uint32_t sample = bytestream_get_le32(&buf_ptr);
@@ -624,7 +628,7 @@ static int mxf_decrypt_triplet(AVFormatContext *s, AVPacket *pkt, KLVPacket *klv
         return AVERROR_INVALIDDATA;
     // enc. code
     size = klv_decode_ber_length(pb);
-    if (size < 32 || size - 32 < orig_size)
+    if (size < 32 || size - 32 < orig_size || (int)orig_size != orig_size)
         return AVERROR_INVALIDDATA;
     avio_read(pb, ivec, 16);
     avio_read(pb, tmpbuf, 16);
@@ -2903,7 +2907,7 @@ static int mxf_read_local_tags(MXFContext *mxf, KLVPacket *klv, MXFMetadataReadF
         meta = NULL;
         ctx  = mxf;
     }
-    while (avio_tell(pb) + 4 < klv_end && !avio_feof(pb)) {
+    while (avio_tell(pb) + 4ULL < klv_end && !avio_feof(pb)) {
         int ret;
         int tag = avio_rb16(pb);
         int size = avio_rb16(pb); /* KLV specified by 0x53 */
diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c
index fbecf71328f..58a74612a4a 100644
--- a/libavformat/nutdec.c
+++ b/libavformat/nutdec.c
@@ -286,6 +286,11 @@ static int decode_main_header(NUTContext *nut)
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }
+        if (tmp_size < 0 || tmp_size > INT_MAX - count) {
+            av_log(s, AV_LOG_ERROR, "illegal size\n");
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
 
         for (j = 0; j < count; j++, i++) {
             if (i == 'N') {
diff --git a/libavformat/pp_bnk.c b/libavformat/pp_bnk.c
index 07eeca3cd5b..5ffe733b18a 100644
--- a/libavformat/pp_bnk.c
+++ b/libavformat/pp_bnk.c
@@ -223,7 +223,7 @@ static int pp_bnk_read_header(AVFormatContext *s)
         par->bits_per_coded_sample  = 4;
         par->bits_per_raw_sample    = 16;
         par->block_align            = 1;
-        par->bit_rate               = par->sample_rate * par->bits_per_coded_sample * par->channels;
+        par->bit_rate               = par->sample_rate * (int64_t)par->bits_per_coded_sample * par->channels;
 
         avpriv_set_pts_info(st, 64, 1, par->sample_rate);
         st->start_time              = 0;
diff --git a/libavformat/qcp.c b/libavformat/qcp.c
index 168030dc161..4478875f2d8 100644
--- a/libavformat/qcp.c
+++ b/libavformat/qcp.c
@@ -93,7 +93,8 @@ static int qcp_read_header(AVFormatContext *s)
     QCPContext    *c  = s->priv_data;
     AVStream      *st = avformat_new_stream(s, NULL);
     uint8_t       buf[16];
-    int           i, nb_rates;
+    int           i;
+    unsigned      nb_rates;
 
     if (!st)
         return AVERROR(ENOMEM);
diff --git a/libavformat/realtextdec.c b/libavformat/realtextdec.c
index f534774420a..368a741240b 100644
--- a/libavformat/realtextdec.c
+++ b/libavformat/realtextdec.c
@@ -111,10 +111,11 @@ static int realtext_read_header(AVFormatContext *s)
             if (!merge) {
                 const char *begin = ff_smil_get_attr_ptr(buf.str, "begin");
                 const char *end   = ff_smil_get_attr_ptr(buf.str, "end");
+                int64_t endi = end ? read_ts(end) : 0;
 
                 sub->pos      = pos;
                 sub->pts      = begin ? read_ts(begin) : 0;
-                sub->duration = end ? (read_ts(end) - sub->pts) : duration;
+                sub->duration = (end && endi > sub->pts && endi - (uint64_t)sub->pts <= INT64_MAX) ? endi - sub->pts : duration;
             }
         }
         av_bprint_clear(&buf);
diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index fc3bff48590..97378703d10 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c
@@ -128,6 +128,10 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
     uint32_t version;
     int ret;
 
+    // Duplicate tags
+    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
+        return AVERROR_INVALIDDATA;
+
     /* ra type header */
     version = avio_rb16(pb); /* version */
     if (version == 3) {
@@ -269,9 +273,9 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
         case DEINT_ID_INT4:
             if (ast->coded_framesize > ast->audio_framesize ||
                 sub_packet_h <= 1 ||
-                ast->coded_framesize * sub_packet_h > (2 + (sub_packet_h & 1)) * ast->audio_framesize)
+                ast->coded_framesize * (uint64_t)sub_packet_h > (2 + (sub_packet_h & 1)) * ast->audio_framesize)
                 return AVERROR_INVALIDDATA;
-            if (ast->coded_framesize * sub_packet_h != 2*ast->audio_framesize) {
+            if (ast->coded_framesize * (uint64_t)sub_packet_h != 2*ast->audio_framesize) {
                 avpriv_request_sample(s, "mismatching interleaver parameters");
                 return AVERROR_INVALIDDATA;
             }
@@ -1012,8 +1016,8 @@ static int rm_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     RMDemuxContext *rm = s->priv_data;
     AVStream *st = NULL; // init to silence compiler warning
-    int i, len, res, seq = 1;
-    int64_t timestamp, pos;
+    int i, res, seq = 1;
+    int64_t timestamp, pos, len;
     int flags;
 
     for (;;) {
@@ -1032,7 +1036,9 @@ static int rm_read_packet(AVFormatContext *s, AVPacket *pkt)
                 ast = st->priv_data;
                 timestamp = AV_NOPTS_VALUE;
                 len = !ast->audio_framesize ? RAW_PACKET_SIZE :
-                    ast->coded_framesize * ast->sub_packet_h / 2;
+                    ast->coded_framesize * (int64_t)ast->sub_packet_h / 2;
+                if (len > INT_MAX)
+                    return AVERROR_INVALIDDATA;
                 flags = (seq++ == 1) ? 2 : 0;
                 pos = avio_tell(s->pb);
             } else {
diff --git a/libavformat/rpl.c b/libavformat/rpl.c
index 0f00c03a52e..ad3659e9368 100644
--- a/libavformat/rpl.c
+++ b/libavformat/rpl.c
@@ -103,7 +103,7 @@ static AVRational read_fps(const char* line, int* error)
         // Truncate any numerator too large to fit into an int64_t
         if (num > (INT64_MAX - 9) / 10 || den > INT64_MAX / 10)
             break;
-        num  = 10 * num + *line - '0';
+        num  = 10 * num + (*line - '0');
         den *= 10;
     }
     if (!num)
@@ -207,8 +207,10 @@ static int rpl_read_header(AVFormatContext *s)
             ast->codecpar->bits_per_coded_sample = 4;
 
         ast->codecpar->bit_rate = ast->codecpar->sample_rate *
-                                  ast->codecpar->bits_per_coded_sample *
-                                  ast->codecpar->channels;
+                                  (int64_t)ast->codecpar->channels;
+        if (ast->codecpar->bit_rate > INT64_MAX / ast->codecpar->bits_per_coded_sample)
+            return AVERROR_INVALIDDATA;
+        ast->codecpar->bit_rate *= ast->codecpar->bits_per_coded_sample;
 
         ast->codecpar->codec_id = AV_CODEC_ID_NONE;
         switch (audio_format) {
@@ -334,7 +336,7 @@ static int rpl_read_packet(AVFormatContext *s, AVPacket *pkt)
 
         avio_skip(pb, 4); /* flags */
         frame_size = avio_rl32(pb);
-        if (avio_seek(pb, -8, SEEK_CUR) < 0)
+        if (avio_feof(pb) || avio_seek(pb, -8, SEEK_CUR) < 0 || !frame_size)
             return AVERROR(EIO);
 
         ret = av_get_packet(pb, pkt, frame_size);
diff --git a/libavformat/sbgdec.c b/libavformat/sbgdec.c
index 83016d0c134..36cfff20fc4 100644
--- a/libavformat/sbgdec.c
+++ b/libavformat/sbgdec.c
@@ -935,6 +935,9 @@ static int expand_timestamps(void *log, struct sbg_script *s)
     }
     if (s->start_ts == AV_NOPTS_VALUE)
         s->start_ts = (s->opt_start_at_first && s->tseq) ? s->tseq[0].ts.t : now;
+    if (s->start_ts > INT64_MAX - s->opt_duration)
+        return AVERROR_INVALIDDATA;
+
     s->end_ts = s->opt_duration ? s->start_ts + s->opt_duration :
                 AV_NOPTS_VALUE; /* may be overridden later by -E option */
     cur_ts = now;
@@ -961,6 +964,9 @@ static int expand_tseq(void *log, struct sbg_script *s, int *nb_ev_max,
                tseq->name_len, tseq->name);
         return AVERROR(EINVAL);
     }
+    if (t0 + (uint64_t)tseq->ts.t != av_sat_add64(t0, tseq->ts.t))
+        return AVERROR(EINVAL);
+
     t0 += tseq->ts.t;
     for (i = 0; i < s->nb_def; i++) {
         if (s->def[i].name_len == tseq->name_len &&
@@ -1291,6 +1297,10 @@ static int generate_intervals(void *log, struct sbg_script *s, int sample_rate,
         ev1 = &s->events[i];
         ev2 = &s->events[(i + 1) % s->nb_events];
         ev1->ts_int   = ev1->ts;
+
+        if (!ev1->fade.slide && ev1 >= ev2 && ev2->ts > INT64_MAX - period)
+            return AVERROR_INVALIDDATA;
+
         ev1->ts_trans = ev1->fade.slide ? ev1->ts
                                         : ev2->ts + (ev1 < ev2 ? 0 : period);
     }
diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
index 05c07cd8524..6368ec74f9e 100644
--- a/libavformat/subtitles.c
+++ b/libavformat/subtitles.c
@@ -206,7 +206,7 @@ void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
           q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
                                      : cmp_pkt_sub_pos_ts);
     for (i = 0; i < q->nb_subs; i++)
-        if (q->subs[i]->duration < 0 && i < q->nb_subs - 1)
+        if (q->subs[i]->duration < 0 && i < q->nb_subs - 1 && q->subs[i + 1]->pts - (uint64_t)q->subs[i]->pts <= INT64_MAX)
             q->subs[i]->duration = q->subs[i + 1]->pts - q->subs[i]->pts;
 
     if (!q->keep_duplicates)
diff --git a/libavformat/tta.c b/libavformat/tta.c
index 07faa82eb38..6aa72b5d1d5 100644
--- a/libavformat/tta.c
+++ b/libavformat/tta.c
@@ -119,6 +119,8 @@ static int tta_read_header(AVFormatContext *s)
     for (i = 0; i < c->totalframes; i++) {
         uint32_t size = avio_rl32(s->pb);
         int r;
+        if (avio_feof(s->pb))
+            return AVERROR_INVALIDDATA;
         if ((r = av_add_index_entry(st, framepos, i * (int64_t)c->frame_size, size, 0,
                                     AVINDEX_KEYFRAME)) < 0)
             return r;
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 1384b567714..75e5350a277 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1208,7 +1208,9 @@ static void update_initial_durations(AVFormatContext *s, AVStream *st,
             (pktl->pkt.dts == AV_NOPTS_VALUE ||
              pktl->pkt.dts == st->first_dts ||
              pktl->pkt.dts == RELATIVE_TS_BASE) &&
-            !pktl->pkt.duration) {
+            !pktl->pkt.duration &&
+            av_sat_add64(cur_dts, duration) == cur_dts + (uint64_t)duration
+        ) {
             pktl->pkt.dts = cur_dts;
             if (!st->internal->avctx->has_b_frames)
                 pktl->pkt.pts = cur_dts;
@@ -3912,8 +3914,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
                     av_packet_unref(pkt1);
                 break;
             }
-            if (pkt->duration) {
-                if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time) {
+            if (pkt->duration > 0) {
+                if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time
+                    && (uint64_t)pkt->pts - st->start_time < INT64_MAX
+                ) {
                     st->internal->info->codec_info_duration = FFMIN(pkt->pts - st->start_time, st->internal->info->codec_info_duration + pkt->duration);
                 } else
                     st->internal->info->codec_info_duration += pkt->duration;
@@ -4059,7 +4063,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
             if (!st->r_frame_rate.num) {
                 if (    avctx->time_base.den * (int64_t) st->time_base.num
-                    <= avctx->time_base.num * avctx->ticks_per_frame * (uint64_t) st->time_base.den) {
+                    <= avctx->time_base.num * (uint64_t)avctx->ticks_per_frame * st->time_base.den) {
                     av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den,
                               avctx->time_base.den, (int64_t)avctx->time_base.num * avctx->ticks_per_frame, INT_MAX);
                 } else {
diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
index 8214ab8498d..b11c6091bc3 100644
--- a/libavformat/wavdec.c
+++ b/libavformat/wavdec.c
@@ -498,6 +498,8 @@ static int wav_read_header(AVFormatContext *s)
             wav->smv_data_ofs = avio_tell(pb) + (size - 5) * 3;
             avio_rl24(pb);
             wav->smv_block_size = avio_rl24(pb);
+            if (!wav->smv_block_size)
+                return AVERROR_INVALIDDATA;
             avpriv_set_pts_info(vst, 32, 1, avio_rl24(pb));
             vst->duration = avio_rl24(pb);
             avio_rl24(pb);
@@ -718,7 +720,7 @@ static int wav_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (wav->smv_last_stream) {
             uint64_t old_pos = avio_tell(s->pb);
             uint64_t new_pos = wav->smv_data_ofs +
-                wav->smv_block * wav->smv_block_size;
+                wav->smv_block * (int64_t)wav->smv_block_size;
             if (avio_seek(s->pb, new_pos, SEEK_SET) < 0) {
                 ret = AVERROR_EOF;
                 goto smv_out;
diff --git a/libavformat/wtvdec.c b/libavformat/wtvdec.c
index 4b3b7fb407b..1d5ba03befa 100644
--- a/libavformat/wtvdec.c
+++ b/libavformat/wtvdec.c
@@ -660,6 +660,8 @@ static AVStream * parse_media_type(AVFormatContext *s, AVStream *st, int sid,
         avio_skip(pb, size - 32);
         ff_get_guid(pb, &actual_subtype);
         ff_get_guid(pb, &actual_formattype);
+        if (avio_feof(pb))
+            return NULL;
         avio_seek(pb, -size, SEEK_CUR);
 
         st = parse_media_type(s, st, sid, mediatype, actual_subtype, actual_formattype, size - 32);
@@ -817,7 +819,7 @@ static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_p
                 avio_skip(pb, 12);
                 ff_get_guid(pb, &formattype);
                 size = avio_rl32(pb);
-                if (size < 0 || size > INT_MAX - 92)
+                if (size < 0 || size > INT_MAX - 92 - consumed)
                     return AVERROR_INVALIDDATA;
                 parse_media_type(s, 0, sid, mediatype, subtype, formattype, size);
                 consumed += 92 + size;
@@ -833,7 +835,7 @@ static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_p
                 avio_skip(pb, 12);
                 ff_get_guid(pb, &formattype);
                 size = avio_rl32(pb);
-                if (size < 0 || size > INT_MAX - 76)
+                if (size < 0 || size > INT_MAX - 76 - consumed)
                     return AVERROR_INVALIDDATA;
                 parse_media_type(s, s->streams[stream_index], sid, mediatype, subtype, formattype, size);
                 consumed += 76 + size;
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 52f6b9a3bf6..6bd0f07a623 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -291,6 +291,12 @@ int av_cpu_count(void)
     DWORD_PTR proc_aff, sys_aff;
     if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff))
         nb_cpus = av_popcount64(proc_aff);
+#elif HAVE_SYSCTL && defined(HW_NCPUONLINE)
+    int mib[2] = { CTL_HW, HW_NCPUONLINE };
+    size_t len = sizeof(nb_cpus);
+
+    if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1)
+        nb_cpus = 0;
 #elif HAVE_SYSCTL && defined(HW_NCPU)
     int mib[2] = { CTL_HW, HW_NCPU };
     size_t len = sizeof(nb_cpus);
diff --git a/libavutil/mathematics.h b/libavutil/mathematics.h
index 54901800ba6..64d4137a602 100644
--- a/libavutil/mathematics.h
+++ b/libavutil/mathematics.h
@@ -134,6 +134,7 @@ int64_t av_rescale(int64_t a, int64_t b, int64_t c) av_const;
  *
  * The operation is mathematically equivalent to `a * b / c`, but writing that
  * directly can overflow, and does not support different rounding methods.
+ * If the result is not representable then INT64_MIN is returned.
  *
  * @see av_rescale(), av_rescale_q(), av_rescale_q_rnd()
  */
diff --git a/libswscale/alphablend.c b/libswscale/alphablend.c
index b5686599c00..b5967c889bc 100644
--- a/libswscale/alphablend.c
+++ b/libswscale/alphablend.c
@@ -26,7 +26,7 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
     int nb_components = desc->nb_components;
-    int plane, x, y;
+    int plane, x, ysrc;
     int plane_count = isGray(c->srcFormat) ? 1 : 3;
     int sixteen_bits = desc->comp[0].depth >= 9;
     unsigned off    = 1<<(desc->comp[0].depth - 1);
@@ -50,14 +50,15 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
             int w = plane ? c->chrSrcW : c->srcW;
             int x_subsample = plane ? desc->log2_chroma_w: 0;
             int y_subsample = plane ? desc->log2_chroma_h: 0;
-            for (y = srcSliceY >> y_subsample; y < AV_CEIL_RSHIFT(srcSliceH, y_subsample); y++) {
+            for (ysrc = 0; ysrc < AV_CEIL_RSHIFT(srcSliceH, y_subsample); ysrc++) {
+                int y = ysrc + (srcSliceY >> y_subsample);
                 if (x_subsample || y_subsample) {
                     int alpha;
                     unsigned u;
                     if (sixteen_bits) {
                         ptrdiff_t alpha_step = srcStride[plane_count] >> 1;
-                        const uint16_t *s = (const uint16_t *)(src[plane      ] +  srcStride[plane      ] * y);
-                        const uint16_t *a = (const uint16_t *)(src[plane_count] + (srcStride[plane_count] * y << y_subsample));
+                        const uint16_t *s = (const uint16_t *)(src[plane      ] +  srcStride[plane      ] * ysrc);
+                        const uint16_t *a = (const uint16_t *)(src[plane_count] + (srcStride[plane_count] * ysrc << y_subsample));
                               uint16_t *d = (      uint16_t *)(dst[plane      ] +  dstStride[plane      ] * y);
                         if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
                             for (x = 0; x < w; x++) {
@@ -82,8 +83,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
                         }
                     } else {
                         ptrdiff_t alpha_step = srcStride[plane_count];
-                        const uint8_t *s = src[plane      ] + srcStride[plane] * y;
-                        const uint8_t *a = src[plane_count] + (srcStride[plane_count] * y << y_subsample);
+                        const uint8_t *s = src[plane      ] + srcStride[plane] * ysrc;
+                        const uint8_t *a = src[plane_count] + (srcStride[plane_count] * ysrc << y_subsample);
                               uint8_t *d = dst[plane      ] + dstStride[plane] * y;
                         for (x = 0; x < w; x++) {
                             if (y_subsample) {
@@ -97,8 +98,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
                     }
                 } else {
                 if (sixteen_bits) {
-                    const uint16_t *s = (const uint16_t *)(src[plane      ] + srcStride[plane      ] * y);
-                    const uint16_t *a = (const uint16_t *)(src[plane_count] + srcStride[plane_count] * y);
+                    const uint16_t *s = (const uint16_t *)(src[plane      ] + srcStride[plane      ] * ysrc);
+                    const uint16_t *a = (const uint16_t *)(src[plane_count] + srcStride[plane_count] * ysrc);
                           uint16_t *d = (      uint16_t *)(dst[plane      ] + dstStride[plane      ] * y);
                     if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
                         for (x = 0; x < w; x++) {
@@ -113,8 +114,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
                         }
                     }
                 } else {
-                    const uint8_t *s = src[plane      ] + srcStride[plane] * y;
-                    const uint8_t *a = src[plane_count] + srcStride[plane_count] * y;
+                    const uint8_t *s = src[plane      ] + srcStride[plane] * ysrc;
+                    const uint8_t *a = src[plane_count] + srcStride[plane_count] * ysrc;
                           uint8_t *d = dst[plane      ] + dstStride[plane] * y;
                     for (x = 0; x < w; x++) {
                         unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(255-a[x]) + 128;
@@ -127,10 +128,11 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
     } else {
         int alpha_pos = desc->comp[plane_count].offset;
         int w = c->srcW;
-        for (y = srcSliceY; y < srcSliceH; y++) {
+        for (ysrc = 0; ysrc < srcSliceH; ysrc++) {
+            int y = ysrc + srcSliceY;
             if (sixteen_bits) {
-                const uint16_t *s = (const uint16_t *)(src[0] + srcStride[0] * y + 2*!alpha_pos);
-                const uint16_t *a = (const uint16_t *)(src[0] + srcStride[0] * y +    alpha_pos);
+                const uint16_t *s = (const uint16_t *)(src[0] + srcStride[0] * ysrc + 2*!alpha_pos);
+                const uint16_t *a = (const uint16_t *)(src[0] + srcStride[0] * ysrc +    alpha_pos);
                       uint16_t *d = (      uint16_t *)(dst[0] + dstStride[0] * y);
                 if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
                     for (x = 0; x < w; x++) {
@@ -151,8 +153,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
                     }
                 }
             } else {
-                const uint8_t *s = src[0] + srcStride[0] * y + !alpha_pos;
-                const uint8_t *a = src[0] + srcStride[0] * y + alpha_pos;
+                const uint8_t *s = src[0] + srcStride[0] * ysrc + !alpha_pos;
+                const uint8_t *a = src[0] + srcStride[0] * ysrc + alpha_pos;
                       uint8_t *d = dst[0] + dstStride[0] * y;
                 for (x = 0; x < w; x++) {
                     for (plane = 0; plane < plane_count; plane++) {
diff --git a/libswscale/slice.c b/libswscale/slice.c
index d96db133648..b185b4aa189 100644
--- a/libswscale/slice.c
+++ b/libswscale/slice.c
@@ -288,7 +288,10 @@ int ff_init_filters(SwsContext * c)
     if (!c->desc)
         return AVERROR(ENOMEM);
     c->slice = av_mallocz_array(sizeof(SwsSlice), c->numSlice);
-
+    if (!c->slice) {
+        res = AVERROR(ENOMEM);
+        goto cleanup;
+    }
 
     res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
     if (res < 0) goto cleanup;
diff --git a/tests/ref/fate/ts-opus-demux b/tests/ref/fate/ts-opus-demux
index 3c5edffb2cc..37534350a1f 100644
--- a/tests/ref/fate/ts-opus-demux
+++ b/tests/ref/fate/ts-opus-demux
@@ -1,4 +1,4 @@
-#extradata 0:       30, 0x53be0347
+#extradata 0:       30, 0x69290482
 #tb 0: 1/90000
 #media_type 0: audio
 #codec_id 0: opus
diff --git a/tools/cws2fws.c b/tools/cws2fws.c
index 7046b699571..9ce321fe208 100644
--- a/tools/cws2fws.c
+++ b/tools/cws2fws.c
@@ -89,6 +89,12 @@ int main(int argc, char *argv[])
     for (i = 0; i < comp_len - 8;) {
         int ret, len = read(fd_in, &buf_in, 1024);
 
+        if (len == -1) {
+            printf("read failure\n");
+            inflateEnd(&zstream);
+            goto out;
+        }
+
         dbgprintf("read %d bytes\n", len);
 
         last_out = zstream.total_out;

From de51e04012d2c3714a8c3a3d8dbbd7cfe31312c6 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:11:05 +0800
Subject: [PATCH 02/41] Bump version to 4.4.1-1

---
 build.yaml       | 2 +-
 debian/changelog | 6 ++++++
 debian/control   | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/build.yaml b/build.yaml
index 270ce120d58..ab308e87317 100644
--- a/build.yaml
+++ b/build.yaml
@@ -1,7 +1,7 @@
 ---
 # We just wrap `build` so this is really it
 name: "jellyfin-ffmpeg"
-version: "4.4-1"
+version: "4.4.1-1"
 packages:
   - stretch-amd64
   - stretch-armhf
diff --git a/debian/changelog b/debian/changelog
index 75790999d87..93b3fd2d66c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+jellyfin-ffmpeg (4.4.1-1) unstable; urgency=medium
+
+  * New upstream version 4.4.1
+
+ -- nyanmisaka <nst799610810@gmail.com>  Sun, 7 Nov 2021 15:09:03 +0800
+
 jellyfin-ffmpeg (4.4-1) unstable; urgency=medium
 
   * New upstream version 4.4
diff --git a/debian/control b/debian/control
index c4c66d37954..9602939eb4b 100644
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,7 @@ Maintainer: Jellyfin Packaging Team <packaging@jellyfin.org>
 Uploaders: Jellyfin Packaging Team <packaging@jellyfin.org>
 Rules-Requires-Root: no
 Homepage: https://ffmpeg.org/
-Standards-Version: 4.4
+Standards-Version: 4.4.1
 Vcs-Git: https://github.com/jellyfin/jellyfin-ffmpeg.git
 Vcs-Browser: https://github.com/jellyfin/jellyfin-ffmpeg
 Build-Depends:

From a2562e2941ec4a8343ef838cd248dfa0400f1b82 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Tue, 14 Sep 2021 18:28:12 +0800
Subject: [PATCH 03/41] only disable x86 asm in dav1d if nasm is outdated

---
 debian/rules    | 8 +-------
 docker-build.sh | 9 +++++----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/debian/rules b/debian/rules
index 62c2fdcfb9a..ad7a87b435b 100755
--- a/debian/rules
+++ b/debian/rules
@@ -35,6 +35,7 @@ CONFIG := --prefix=${TARGET_DIR} \
 	--enable-libopus \
 	--enable-libtheora \
 	--enable-libvorbis \
+	--enable-libdav1d \
 	--enable-libwebp \
 	--enable-libvpx \
 	--enable-libx264 \
@@ -44,7 +45,6 @@ CONFIG := --prefix=${TARGET_DIR} \
 
 CONFIG_ARM_COMMON := --toolchain=hardened \
 	--enable-cross-compile \
-	--enable-libdav1d \
 	--enable-omx \
 	--enable-omx-rpi \
 
@@ -70,14 +70,8 @@ CONFIG_x86 := --arch=amd64 \
 	--enable-nvdec \
 	--enable-ffnvcodec \
 
-CONFIG_DAV1D := --enable-libdav1d \
-
 HOST_ARCH := $(shell arch)
 BUILD_ARCH := ${DEB_HOST_MULTIARCH}
-
-ifeq ($(ENABLE_X86_DAV1D),true)
-	CONFIG_x86 += $(CONFIG_DAV1D)
-endif
 ifeq ($(BUILD_ARCH),x86_64-linux-gnu)
 	# Native amd64 build
 	CONFIG += $(CONFIG_x86)
diff --git a/docker-build.sh b/docker-build.sh
index f9ad5012a0d..60040b952ed 100755
--- a/docker-build.sh
+++ b/docker-build.sh
@@ -42,17 +42,18 @@ prepare_extra_common() {
     nasmminver="2.13.02"
     nasmavx512ver="2.14.0"
     if [ "$(printf '%s\n' "$nasmminver" "$nasmver" | sort -V | head -n1)" = "$nasmminver" ]; then
-        export ENABLE_X86_DAV1D=true
+        x86asm=true
         if [ "$(printf '%s\n' "$nasmavx512ver" "$nasmver" | sort -V | head -n1)" = "$nasmavx512ver" ]; then
             avx512=true
         else
             avx512=false
         fi
     else
-        export ENABLE_X86_DAV1D=false
+        x86asm=false
+        avx512=false
     fi
-    if [ "${ENABLE_X86_DAV1D}" = "true" ] && [ "${ARCH}" = "amd64" ]; then
-        meson -Denable_asm=true \
+    if [ "${ARCH}" = "amd64" ]; then
+        meson -Denable_asm=$x86asm \
               -Denable_avx512=$avx512 \
               -Denable_tests=false \
               -Ddefault_library=shared \

From d1819de544ae4bb567956453489350c4cba493c9 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sat, 27 Nov 2021 17:55:54 +0800
Subject: [PATCH 04/41] update deps for linux build

---
 debian/rules    | 11 +++++------
 docker-build.sh | 16 +++++++++-------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/debian/rules b/debian/rules
index ad7a87b435b..2cf0e991b4c 100755
--- a/debian/rules
+++ b/debian/rules
@@ -22,15 +22,14 @@ CONFIG := --prefix=${TARGET_DIR} \
 	--enable-gpl \
 	--enable-version3 \
 	--enable-static \
-	--enable-libfontconfig \
-	--enable-fontconfig \
 	--enable-gmp \
 	--enable-gnutls \
-	--enable-libass \
-	--enable-libbluray \
 	--enable-libdrm \
+	--enable-libass \
 	--enable-libfreetype \
 	--enable-libfribidi \
+	--enable-libfontconfig \
+	--enable-libbluray \
 	--enable-libmp3lame \
 	--enable-libopus \
 	--enable-libtheora \
@@ -63,12 +62,12 @@ CONFIG_x86 := --arch=amd64 \
 	--enable-amf \
 	--enable-libmfx \
 	--enable-vdpau \
+	--enable-ffnvcodec \
 	--enable-cuda \
 	--enable-cuda-llvm \
 	--enable-cuvid \
-	--enable-nvenc \
 	--enable-nvdec \
-	--enable-ffnvcodec \
+	--enable-nvenc \
 
 HOST_ARCH := $(shell arch)
 BUILD_ARCH := ${DEB_HOST_MULTIARCH}
diff --git a/docker-build.sh b/docker-build.sh
index 60040b952ed..c1aaa270160 100755
--- a/docker-build.sh
+++ b/docker-build.sh
@@ -34,7 +34,7 @@ prepare_extra_common() {
 
     # Download and install dav1d
     pushd ${SOURCE_DIR}
-    git clone -b 0.9.1 --depth=1 https://code.videolan.org/videolan/dav1d.git
+    git clone -b 0.9.2 --depth=1 https://code.videolan.org/videolan/dav1d.git
     pushd dav1d
     mkdir build
     pushd build
@@ -103,7 +103,7 @@ prepare_extra_amd64() {
 
     # Download and install libva
     pushd ${SOURCE_DIR}
-    git clone -b v2.12-branch --depth=1 https://github.com/intel/libva
+    git clone --depth=1 https://github.com/intel/libva
     pushd libva
     sed -i 's|getenv("LIBVA_DRIVERS_PATH")|"/usr/lib/jellyfin-ffmpeg/lib/dri:/usr/lib/x86_64-linux-gnu/dri:/usr/lib/dri:/usr/local/lib/dri"|g' va/va.c
     sed -i 's|getenv("LIBVA_DRIVER_NAME")|NULL|g' va/va.c
@@ -117,7 +117,7 @@ prepare_extra_amd64() {
 
     # Download and install intel-vaapi-driver
     pushd ${SOURCE_DIR}
-    git clone -b v2.4-branch --depth=1 https://github.com/intel/intel-vaapi-driver
+    git clone --depth=1 https://github.com/intel/intel-vaapi-driver
     pushd intel-vaapi-driver
     ./autogen.sh
     ./configure LIBVA_DRIVERS_PATH=${TARGET_DIR}/lib/dri
@@ -130,7 +130,7 @@ prepare_extra_amd64() {
 
     # Download and install gmmlib
     pushd ${SOURCE_DIR}
-    git clone -b intel-gmmlib-21.2.1 --depth=1 https://github.com/intel/gmmlib
+    git clone -b intel-gmmlib-21.3.5 --depth=1 https://github.com/intel/gmmlib
     pushd gmmlib
     mkdir build && pushd build
     cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} ..
@@ -143,11 +143,13 @@ prepare_extra_amd64() {
 
     # Download and install MediaSDK
     pushd ${SOURCE_DIR}
-    git clone -b intel-mediasdk-21.2.3 --depth=1 https://github.com/Intel-Media-SDK/MediaSDK
+    git clone -b intel-mediasdk-21.4.3 --depth=1 https://github.com/Intel-Media-SDK/MediaSDK
     pushd MediaSDK
     sed -i 's|MFX_PLUGINS_CONF_DIR "/plugins.cfg"|"/usr/lib/jellyfin-ffmpeg/lib/mfx/plugins.cfg"|g' api/mfx_dispatch/linux/mfxloader.cpp
     mkdir build && pushd build
-    cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} ..
+    cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} \
+          -DBUILD_SAMPLES=OFF \
+          ..
     make -j$(nproc) && make install && make install DESTDIR=${SOURCE_DIR}/intel
     echo "intel${TARGET_DIR}/lib/libmfx* usr/lib/jellyfin-ffmpeg/lib" >> ${SOURCE_DIR}/debian/jellyfin-ffmpeg.install
     echo "intel${TARGET_DIR}/lib/mfx/*.so usr/lib/jellyfin-ffmpeg/lib/mfx" >> ${SOURCE_DIR}/debian/jellyfin-ffmpeg.install
@@ -161,7 +163,7 @@ prepare_extra_amd64() {
     # Full Feature Build: ENABLE_KERNELS=ON(Default) ENABLE_NONFREE_KERNELS=ON(Default)
     # Free Kernel Build: ENABLE_KERNELS=ON ENABLE_NONFREE_KERNELS=OFF
     #pushd ${SOURCE_DIR}
-    #git clone -b intel-media-21.2.3 --depth=1 https://github.com/intel/media-driver
+    #git clone -b intel-media-21.4.3 --depth=1 https://github.com/intel/media-driver
     #pushd media-driver
     #mkdir build && pushd build
     #cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} \

From 86b92d1804e1afc585309bd16516f4aeb59531fb Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Thu, 14 Oct 2021 17:29:50 +0800
Subject: [PATCH 05/41] cleanup for adding windows patches

---
 debian/patches/0001_fix-segment-muxer.patch   |   37 -
 ...for-uploading-normal-frames-to-VAAPI.patch |  241 ---
 ...-for-the-broken-tonemap_vaapi-filter.patch |  259 ---
 .../0004-cuda-format-converter-impl.patch     | 1438 -------------
 debian/patches/0005-cuda-tonemap-impl.patch   | 1824 -----------------
 ...for-peak-detection-in-opencl-tonemap.patch |  755 -------
 .../patches/0007-fix-for-fmp4-in-hlsenc.patch |   24 -
 ...fix-nvdec-exceeded-32-surfaces-error.patch |   17 -
 .../0009-fix-for-nvenc-from-upstream.patch    | 1716 ----------------
 debian/patches/series                         |    9 -
 10 files changed, 6320 deletions(-)
 delete mode 100644 debian/patches/0001_fix-segment-muxer.patch
 delete mode 100644 debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch
 delete mode 100644 debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch
 delete mode 100644 debian/patches/0004-cuda-format-converter-impl.patch
 delete mode 100644 debian/patches/0005-cuda-tonemap-impl.patch
 delete mode 100644 debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch
 delete mode 100644 debian/patches/0007-fix-for-fmp4-in-hlsenc.patch
 delete mode 100644 debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch
 delete mode 100644 debian/patches/0009-fix-for-nvenc-from-upstream.patch
 delete mode 100644 debian/patches/series

diff --git a/debian/patches/0001_fix-segment-muxer.patch b/debian/patches/0001_fix-segment-muxer.patch
deleted file mode 100644
index d3b57793d91..00000000000
--- a/debian/patches/0001_fix-segment-muxer.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-Index: jellyfin-ffmpeg/libavformat/segment.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/segment.c
-+++ jellyfin-ffmpeg/libavformat/segment.c
-@@ -87,6 +87,7 @@ typedef struct SegmentContext {
-     int64_t last_val;      ///< remember last time for wrap around detection
-     int cut_pending;
-     int header_written;    ///< whether we've already called avformat_write_header
-+    int64_t start_pts;     ///< pts of the very first packet processed, used to compute correct segment length
- 
-     char *entry_prefix;    ///< prefix to add to list entry filenames
-     int list_type;         ///< set the list type
-@@ -712,6 +713,7 @@ static int seg_init(AVFormatContext *s)
-         if ((ret = parse_frames(s, &seg->frames, &seg->nb_frames, seg->frames_str)) < 0)
-             return ret;
-     } else {
-+        seg->start_pts = -1;
-         if (seg->use_clocktime) {
-             if (seg->time <= 0) {
-                 av_log(s, AV_LOG_ERROR, "Invalid negative segment_time with segment_atclocktime option set\n");
-@@ -889,7 +891,15 @@ calc_times:
-                 seg->cut_pending = 1;
-             seg->last_val = wrapped_val;
-         } else {
--            end_pts = seg->time * (seg->segment_count + 1);
-+            if (seg->start_pts != -1) {
-+                end_pts = seg->start_pts + seg->time * (seg->segment_count + 1);
-+            } else if (pkt->stream_index == seg->reference_stream_index && pkt->pts != AV_NOPTS_VALUE) {
-+                // this is the first packet of the reference stream we see, initialize start point
-+                seg->start_pts = av_rescale_q(pkt->pts, st->time_base, AV_TIME_BASE_Q);
-+                seg->cur_entry.start_time = (double)pkt->pts * av_q2d(st->time_base);
-+                seg->cur_entry.start_pts = seg->start_pts;
-+                end_pts = seg->start_pts + seg->time * (seg->segment_count + 1);
-+            }
-         }
-     }
- 
diff --git a/debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch b/debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch
deleted file mode 100644
index 63eea729c55..00000000000
--- a/debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch
+++ /dev/null
@@ -1,241 +0,0 @@
-#From c1fb9225a1b8e26875cb9b4e2b3ae2f4d68c5630 Mon Sep 17 00:00:00 2001
-#From: nyanmisaka <nst799610810@gmail.com>
-#Date: Sun, 24 Jan 2021 19:58:04 +0800
-#Subject: [PATCH] lavfi: add a filter for uploading normal frames to VAAPI
-Index: jellyfin-ffmpeg/configure
-===================================================================
---- jellyfin-ffmpeg.orig/configure
-+++ jellyfin-ffmpeg/configure
-@@ -3577,6 +3577,7 @@ fspp_filter_deps="gpl"
- headphone_filter_select="fft"
- histeq_filter_deps="gpl"
- hqdn3d_filter_deps="gpl"
-+hwupload_vaapi_filter_deps="vaapi"
- interlace_filter_deps="gpl"
- kerndeint_filter_deps="gpl"
- ladspa_filter_deps="ladspa libdl"
-Index: jellyfin-ffmpeg/libavfilter/Makefile
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/Makefile
-+++ jellyfin-ffmpeg/libavfilter/Makefile
-@@ -297,6 +297,7 @@ OBJS-$(CONFIG_HUE_FILTER)
- OBJS-$(CONFIG_HWDOWNLOAD_FILTER)             += vf_hwdownload.o
- OBJS-$(CONFIG_HWMAP_FILTER)                  += vf_hwmap.o
- OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER)          += vf_hwupload_cuda.o
-+OBJS-$(CONFIG_HWUPLOAD_VAAPI_FILTER)         += vf_hwupload_vaapi.o
- OBJS-$(CONFIG_HWUPLOAD_FILTER)               += vf_hwupload.o
- OBJS-$(CONFIG_HYSTERESIS_FILTER)             += vf_hysteresis.o framesync.o
- OBJS-$(CONFIG_IDENTITY_FILTER)               += vf_identity.o
-Index: jellyfin-ffmpeg/libavfilter/allfilters.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/allfilters.c
-+++ jellyfin-ffmpeg/libavfilter/allfilters.c
-@@ -282,6 +282,7 @@ extern AVFilter ff_vf_hwdownload;
- extern AVFilter ff_vf_hwmap;
- extern AVFilter ff_vf_hwupload;
- extern AVFilter ff_vf_hwupload_cuda;
-+extern AVFilter ff_vf_hwupload_vaapi;
- extern AVFilter ff_vf_hysteresis;
- extern AVFilter ff_vf_identity;
- extern AVFilter ff_vf_idet;
-Index: jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c
-@@ -0,0 +1,196 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include "libavutil/buffer.h"
-+#include "libavutil/hwcontext.h"
-+#include "libavutil/log.h"
-+#include "libavutil/opt.h"
-+
-+#include "avfilter.h"
-+#include "formats.h"
-+#include "internal.h"
-+#include "video.h"
-+
-+typedef struct VaapiUploadContext {
-+    const AVClass *class;
-+    int device_idx;
-+
-+    AVBufferRef *hwdevice;
-+    AVBufferRef *hwframe;
-+} VaapiUploadContext;
-+
-+static av_cold int vaapiupload_init(AVFilterContext *ctx)
-+{
-+    VaapiUploadContext *s = ctx->priv;
-+    return av_hwdevice_ctx_create(&s->hwdevice, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0);
-+}
-+
-+static av_cold void vaapiupload_uninit(AVFilterContext *ctx)
-+{
-+    VaapiUploadContext *s = ctx->priv;
-+
-+    av_buffer_unref(&s->hwframe);
-+    av_buffer_unref(&s->hwdevice);
-+}
-+
-+static int vaapiupload_query_formats(AVFilterContext *ctx)
-+{
-+    int ret;
-+
-+    static const enum AVPixelFormat input_pix_fmts[] = {
-+        AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
-+        AV_PIX_FMT_UYVY422, AV_PIX_FMT_YUYV422, AV_PIX_FMT_Y210,
-+        AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
-+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_P010, AV_PIX_FMT_BGRA,
-+        AV_PIX_FMT_BGR0, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB0,
-+        AV_PIX_FMT_ABGR, AV_PIX_FMT_0BGR, AV_PIX_FMT_ARGB,
-+        AV_PIX_FMT_0RGB, AV_PIX_FMT_NONE,
-+    };
-+    static const enum AVPixelFormat output_pix_fmts[] = {
-+        AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE,
-+    };
-+    AVFilterFormats *in_fmts  = ff_make_format_list(input_pix_fmts);
-+    AVFilterFormats *out_fmts;
-+
-+    ret = ff_formats_ref(in_fmts, &ctx->inputs[0]->outcfg.formats);
-+    if (ret < 0)
-+        return ret;
-+
-+    out_fmts = ff_make_format_list(output_pix_fmts);
-+
-+    ret = ff_formats_ref(out_fmts, &ctx->outputs[0]->incfg.formats);
-+    if (ret < 0)
-+        return ret;
-+
-+    return 0;
-+}
-+
-+static int vaapiupload_config_output(AVFilterLink *outlink)
-+{
-+    AVFilterContext *ctx = outlink->src;
-+    AVFilterLink *inlink = ctx->inputs[0];
-+    VaapiUploadContext *s = ctx->priv;
-+
-+    AVHWFramesContext *hwframe_ctx;
-+    int ret;
-+
-+    av_buffer_unref(&s->hwframe);
-+    s->hwframe = av_hwframe_ctx_alloc(s->hwdevice);
-+    if (!s->hwframe)
-+        return AVERROR(ENOMEM);
-+
-+    hwframe_ctx            = (AVHWFramesContext*)s->hwframe->data;
-+    hwframe_ctx->format    = AV_PIX_FMT_VAAPI;
-+    if (inlink->hw_frames_ctx) {
-+        AVHWFramesContext *in_hwframe_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
-+        hwframe_ctx->sw_format = in_hwframe_ctx->sw_format;
-+    } else {
-+        hwframe_ctx->sw_format = inlink->format;
-+    }
-+    hwframe_ctx->width     = inlink->w;
-+    hwframe_ctx->height    = inlink->h;
-+
-+    ret = av_hwframe_ctx_init(s->hwframe);
-+    if (ret < 0)
-+        return ret;
-+
-+    outlink->hw_frames_ctx = av_buffer_ref(s->hwframe);
-+    if (!outlink->hw_frames_ctx)
-+        return AVERROR(ENOMEM);
-+
-+    return 0;
-+}
-+
-+static int vaapiupload_filter_frame(AVFilterLink *link, AVFrame *in)
-+{
-+    AVFilterContext   *ctx = link->dst;
-+    AVFilterLink  *outlink = ctx->outputs[0];
-+
-+    AVFrame *out = NULL;
-+    int ret;
-+
-+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-+    if (!out) {
-+        ret = AVERROR(ENOMEM);
-+        goto fail;
-+    }
-+
-+    out->width  = in->width;
-+    out->height = in->height;
-+
-+    ret = av_hwframe_transfer_data(out, in, 0);
-+    if (ret < 0) {
-+        av_log(ctx, AV_LOG_ERROR, "Error transferring data to the GPU\n");
-+        goto fail;
-+    }
-+
-+    ret = av_frame_copy_props(out, in);
-+    if (ret < 0)
-+        goto fail;
-+
-+    av_frame_free(&in);
-+
-+    return ff_filter_frame(ctx->outputs[0], out);
-+fail:
-+    av_frame_free(&in);
-+    av_frame_free(&out);
-+    return ret;
-+}
-+
-+static const AVClass vaapiupload_class = {
-+    .class_name = "vaapiupload",
-+    .item_name  = av_default_item_name,
-+    .option     = NULL,
-+    .version    = LIBAVUTIL_VERSION_INT,
-+};
-+
-+static const AVFilterPad vaapiupload_inputs[] = {
-+    {
-+        .name         = "default",
-+        .type         = AVMEDIA_TYPE_VIDEO,
-+        .filter_frame = vaapiupload_filter_frame,
-+    },
-+    { NULL }
-+};
-+
-+static const AVFilterPad vaapiupload_outputs[] = {
-+    {
-+        .name         = "default",
-+        .type         = AVMEDIA_TYPE_VIDEO,
-+        .config_props = vaapiupload_config_output,
-+    },
-+    { NULL }
-+};
-+
-+AVFilter ff_vf_hwupload_vaapi = {
-+    .name        = "hwupload_vaapi",
-+    .description = NULL_IF_CONFIG_SMALL("Upload a system memory frame to a VAAPI device."),
-+
-+    .init      = vaapiupload_init,
-+    .uninit    = vaapiupload_uninit,
-+
-+    .query_formats = vaapiupload_query_formats,
-+
-+    .priv_size  = sizeof(VaapiUploadContext),
-+    .priv_class = &vaapiupload_class,
-+
-+    .inputs    = vaapiupload_inputs,
-+    .outputs   = vaapiupload_outputs,
-+
-+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
-+};
diff --git a/debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch b/debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch
deleted file mode 100644
index b5be1c3afb5..00000000000
--- a/debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch
+++ /dev/null
@@ -1,259 +0,0 @@
-# Fix for the broken tonemap_vaapi filter
-# avfilter/tonemap_vaapi: pass filter parameters to VA parameter buffer
-# avfilter: Add H2H support in tonemap_vaapi
-Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_vaapi.c
-+++ jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c
-@@ -41,7 +41,11 @@ typedef struct HDRVAAPIContext {
-     enum AVColorTransferCharacteristic color_transfer;
-     enum AVColorSpace color_matrix;
- 
-+    char *master_display;
-+    char *content_light;
-+
-     VAHdrMetaDataHDR10  in_metadata;
-+    VAHdrMetaDataHDR10  out_metadata;
- 
-     AVFrameSideData    *src_display;
-     AVFrameSideData    *src_light;
-@@ -148,6 +152,107 @@ static int tonemap_vaapi_save_metadata(A
-     return 0;
- }
- 
-+static int tonemap_vaapi_update_sidedata(AVFilterContext *avctx, AVFrame *output_frame)
-+{
-+    HDRVAAPIContext *ctx = avctx->priv;
-+    AVFrameSideData *metadata;
-+    AVMasteringDisplayMetadata *hdr_meta;
-+    AVFrameSideData *metadata_lt;
-+    AVContentLightMetadata *hdr_meta_lt;
-+
-+    int i;
-+    const int mapping[3] = {1, 2, 0};  //green, blue, red
-+    const int chroma_den = 50000;
-+    const int luma_den   = 10000;
-+
-+    metadata = av_frame_get_side_data(output_frame,
-+                                      AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
-+    if (metadata) {
-+        av_frame_remove_side_data(output_frame,
-+                              AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
-+        metadata = av_frame_new_side_data(output_frame,
-+                                          AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
-+                                          sizeof(AVMasteringDisplayMetadata));
-+    } else {
-+        metadata = av_frame_new_side_data(output_frame,
-+                                          AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
-+                                          sizeof(AVMasteringDisplayMetadata));
-+    }
-+
-+    hdr_meta = (AVMasteringDisplayMetadata *)metadata->data;
-+
-+    for (i = 0; i < 3; i++) {
-+        const int j = mapping[i];
-+        hdr_meta->display_primaries[j][0].num = ctx->out_metadata.display_primaries_x[i];
-+        hdr_meta->display_primaries[j][0].den = chroma_den;
-+
-+        hdr_meta->display_primaries[j][1].num = ctx->out_metadata.display_primaries_y[i];
-+        hdr_meta->display_primaries[j][1].den = chroma_den;
-+    }
-+
-+    hdr_meta->white_point[0].num = ctx->out_metadata.white_point_x;
-+    hdr_meta->white_point[0].den = chroma_den;
-+
-+    hdr_meta->white_point[1].num = ctx->out_metadata.white_point_y;
-+    hdr_meta->white_point[1].den = chroma_den;
-+    hdr_meta->has_primaries = 1;
-+
-+    hdr_meta->max_luminance.num = ctx->out_metadata.max_display_mastering_luminance;
-+    hdr_meta->max_luminance.den = luma_den;
-+
-+    hdr_meta->min_luminance.num = ctx->out_metadata.min_display_mastering_luminance;
-+    hdr_meta->min_luminance.den = luma_den;
-+    hdr_meta->has_luminance = 1;
-+
-+    av_log(avctx, AV_LOG_DEBUG,
-+           "Mastering Display Metadata(out luminance):\n");
-+    av_log(avctx, AV_LOG_DEBUG,
-+           "min_luminance=%u, max_luminance=%u\n",
-+           ctx->out_metadata.min_display_mastering_luminance,
-+           ctx->out_metadata.max_display_mastering_luminance);
-+
-+    av_log(avctx, AV_LOG_DEBUG,
-+           "Mastering Display Metadata(out primaries):\n");
-+    av_log(avctx, AV_LOG_DEBUG,
-+           "G(%u,%u) B(%u,%u) R(%u,%u) WP(%u,%u)\n",
-+           ctx->out_metadata.display_primaries_x[0],
-+           ctx->out_metadata.display_primaries_y[0],
-+           ctx->out_metadata.display_primaries_x[1],
-+           ctx->out_metadata.display_primaries_y[1],
-+           ctx->out_metadata.display_primaries_x[2],
-+           ctx->out_metadata.display_primaries_y[2],
-+           ctx->out_metadata.white_point_x,
-+           ctx->out_metadata.white_point_y);
-+
-+    metadata_lt = av_frame_get_side_data(output_frame,
-+                                         AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
-+    if (metadata_lt) {
-+        av_frame_remove_side_data(output_frame,
-+                              AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
-+        metadata_lt = av_frame_new_side_data(output_frame,
-+                                          AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
-+                                          sizeof(AVContentLightMetadata));
-+    } else {
-+        metadata_lt = av_frame_new_side_data(output_frame,
-+                                          AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
-+                                          sizeof(AVContentLightMetadata));
-+    }
-+
-+    hdr_meta_lt = (AVContentLightMetadata *)metadata_lt->data;
-+
-+    hdr_meta_lt->MaxCLL = FFMIN(ctx->out_metadata.max_content_light_level, 65535);
-+    hdr_meta_lt->MaxFALL = FFMIN(ctx->out_metadata.max_pic_average_light_level, 65535);
-+
-+    av_log(avctx, AV_LOG_DEBUG,
-+           "Mastering Content Light Level (out):\n");
-+    av_log(avctx, AV_LOG_DEBUG,
-+           "MaxCLL(%u) MaxFALL(%u)\n",
-+           ctx->out_metadata.max_content_light_level,
-+           ctx->out_metadata.max_pic_average_light_level);
-+
-+    return 0;
-+}
-+
- static int tonemap_vaapi_set_filter_params(AVFilterContext *avctx, AVFrame *input_frame)
- {
-     VAAPIVPPContext *vpp_ctx   = avctx->priv;
-@@ -210,15 +315,26 @@ static int tonemap_vaapi_build_filter_pa
-         return AVERROR(EINVAL);
-     }
- 
--    for (i = 0; i < num_query_caps; i++) {
--        if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag)
--            break;
--    }
--
--    if (i >= num_query_caps) {
--        av_log(avctx, AV_LOG_ERROR,
--               "VAAPI driver doesn't support HDR to SDR\n");
--        return AVERROR(EINVAL);
-+    if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) {
-+        for (i = 0; i < num_query_caps; i++) {
-+            if (VA_TONE_MAPPING_HDR_TO_HDR & hdr_cap[i].caps_flag)
-+                break;
-+        }
-+        if (i >= num_query_caps) {
-+            av_log(avctx, AV_LOG_ERROR,
-+                   "VAAPI driver doesn't support HDR to HDR\n");
-+            return AVERROR(EINVAL);
-+        }
-+    } else {
-+        for (i = 0; i < num_query_caps; i++) {
-+            if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag)
-+                break;
-+        }
-+        if (i >= num_query_caps) {
-+            av_log(avctx, AV_LOG_ERROR,
-+                   "VAAPI driver doesn't support HDR to SDR\n");
-+            return AVERROR(EINVAL);
-+        }
-     }
- 
-     hdrtm_param.type = VAProcFilterHighDynamicRangeToneMapping;
-@@ -243,6 +359,8 @@ static int tonemap_vaapi_filter_frame(AV
-     VAProcPipelineParameterBuffer params;
-     int err;
- 
-+    VAHdrMetaData              out_hdr_metadata;
-+
-     av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
-            av_get_pix_fmt_name(input_frame->format),
-            input_frame->width, input_frame->height, input_frame->pts);
-@@ -291,11 +409,26 @@ static int tonemap_vaapi_filter_frame(AV
-     if (ctx->color_matrix != AVCOL_SPC_UNSPECIFIED)
-         output_frame->colorspace = ctx->color_matrix;
- 
-+    if (output_frame->color_trc == AVCOL_TRC_SMPTE2084) {
-+        err = tonemap_vaapi_update_sidedata(avctx, output_frame);
-+        if (err < 0)
-+            goto fail;
-+
-+        out_hdr_metadata.metadata_type = VAProcHighDynamicRangeMetadataHDR10;
-+        out_hdr_metadata.metadata      = &ctx->out_metadata;
-+        out_hdr_metadata.metadata_size = sizeof(VAHdrMetaDataHDR10);
-+
-+        params.output_hdr_metadata = &out_hdr_metadata;
-+    }
-+
-     err = ff_vaapi_vpp_init_params(avctx, &params,
-                                    input_frame, output_frame);
-     if (err < 0)
-         goto fail;
- 
-+    params.filters     = &vpp_ctx->filter_buffers[0];
-+    params.num_filters = vpp_ctx->nb_filter_buffers;
-+
-     err = ff_vaapi_vpp_render_picture(avctx, &params, output_frame);
-     if (err < 0)
-         goto fail;
-@@ -355,6 +488,46 @@ static av_cold int tonemap_vaapi_init(AV
-     STRING_OPTION(color_transfer,  color_transfer,  AVCOL_TRC_UNSPECIFIED);
-     STRING_OPTION(color_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
- 
-+    if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) {
-+        if (!ctx->master_display) {
-+            av_log(avctx, AV_LOG_ERROR,
-+                   "Option mastering-display input invalid\n");
-+            return AVERROR(EINVAL);
-+        }
-+
-+        if (10 != sscanf(ctx->master_display,
-+                         "G(%hu|%hu)B(%hu|%hu)R(%hu|%hu)WP(%hu|%hu)L(%u|%u)",
-+                         &ctx->out_metadata.display_primaries_x[0],
-+                         &ctx->out_metadata.display_primaries_y[0],
-+                         &ctx->out_metadata.display_primaries_x[1],
-+                         &ctx->out_metadata.display_primaries_y[1],
-+                         &ctx->out_metadata.display_primaries_x[2],
-+                         &ctx->out_metadata.display_primaries_y[2],
-+                         &ctx->out_metadata.white_point_x,
-+                         &ctx->out_metadata.white_point_y,
-+                         &ctx->out_metadata.min_display_mastering_luminance,
-+                         &ctx->out_metadata.max_display_mastering_luminance)) {
-+            av_log(avctx, AV_LOG_ERROR,
-+                   "Option mastering-display input invalid\n");
-+            return AVERROR(EINVAL);
-+        }
-+
-+        if (!ctx->content_light) {
-+            av_log(avctx, AV_LOG_ERROR,
-+                   "Option content-light input invalid\n");
-+            return AVERROR(EINVAL);
-+        }
-+
-+        if (2 != sscanf(ctx->content_light,
-+                        "CLL(%hu)FALL(%hu)",
-+                        &ctx->out_metadata.max_content_light_level,
-+                        &ctx->out_metadata.max_pic_average_light_level)) {
-+            av_log(avctx, AV_LOG_ERROR,
-+                   "Option content-light input invalid\n");
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+
-     return 0;
- }
- 
-@@ -380,10 +553,11 @@ static const AVOption tonemap_vaapi_opti
-     { "t",        "Output color transfer characteristics set",
-       OFFSET(color_transfer_string),  AV_OPT_TYPE_STRING,
-       { .str = NULL }, .flags = FLAGS, "transfer" },
-+    { "display", "set master display",  OFFSET(master_display), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS },
-+    { "light",   "set content light",   OFFSET(content_light),  AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS },
-     { NULL }
- };
- 
--
- AVFILTER_DEFINE_CLASS(tonemap_vaapi);
- 
- static const AVFilterPad tonemap_vaapi_inputs[] = {
diff --git a/debian/patches/0004-cuda-format-converter-impl.patch b/debian/patches/0004-cuda-format-converter-impl.patch
deleted file mode 100644
index a01c782d548..00000000000
--- a/debian/patches/0004-cuda-format-converter-impl.patch
+++ /dev/null
@@ -1,1438 +0,0 @@
-Index: jellyfin-ffmpeg/compat/cuda/cuda_runtime.h
-===================================================================
---- jellyfin-ffmpeg.orig/compat/cuda/cuda_runtime.h
-+++ jellyfin-ffmpeg/compat/cuda/cuda_runtime.h
-@@ -49,16 +49,6 @@ typedef struct __device_builtin__ __alig
-     unsigned short x, y;
- } ushort2;
- 
--typedef struct __device_builtin__ __align__(8) float2
--{
--    float x, y;
--} float2;
--
--typedef struct __device_builtin__ __align__(8) int2
--{
--    int x, y;
--} int2;
--
- typedef struct __device_builtin__ uint3
- {
-     unsigned int x, y, z;
-@@ -66,6 +56,11 @@ typedef struct __device_builtin__ uint3
- 
- typedef struct uint3 dim3;
- 
-+typedef struct __device_builtin__ __align__(8) int2
-+{
-+    int x, y;
-+} int2;
-+
- typedef struct __device_builtin__ __align__(4) uchar4
- {
-     unsigned char x, y, z, w;
-@@ -81,11 +76,6 @@ typedef struct __device_builtin__ __alig
-     int x, y, z, w;
- } int4;
- 
--typedef struct __device_builtin__ __align__(16) float4
--{
--    float x, y, z, w;
--} float4;
--
- // Accessors for special registers
- #define GETCOMP(reg, comp) \
-     asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \
-@@ -110,31 +100,24 @@ GET(getThreadIdx, tid)
- #define threadIdx (getThreadIdx())
- 
- // Basic initializers (simple macros rather than inline functions)
--#define make_int2(a, b) ((int2){.x = a, .y = b})
- #define make_uchar2(a, b) ((uchar2){.x = a, .y = b})
- #define make_ushort2(a, b) ((ushort2){.x = a, .y = b})
--#define make_float2(a, b) ((float2){.x = a, .y = b})
--#define make_int4(a, b, c, d) ((int4){.x = a, .y = b, .z = c, .w = d})
- #define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d})
- #define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d})
--#define make_float4(a, b, c, d) ((float4){.x = a, .y = b, .z = c, .w = d})
- 
- // Conversions from the tex instruction's 4-register output to various types
- #define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
- 
- TEX2D(unsigned char, a & 0xFF)
- TEX2D(unsigned short, a & 0xFFFF)
--TEX2D(float, a)
--TEX2D(uchar2, make_uchar2(a & 0xFF, b & 0xFF))
--TEX2D(ushort2, make_ushort2(a & 0xFFFF, b & 0xFFFF))
--TEX2D(float2, make_float2(a, b))
--TEX2D(uchar4, make_uchar4(a & 0xFF, b & 0xFF, c & 0xFF, d & 0xFF))
--TEX2D(ushort4, make_ushort4(a & 0xFFFF, b & 0xFFFF, c & 0xFFFF, d & 0xFFFF))
--TEX2D(float4, make_float4(a, b, c, d))
-+TEX2D(uchar2, make_uchar2((unsigned char)a, (unsigned char)b))
-+TEX2D(ushort2, make_ushort2((unsigned short)a, (unsigned short)b))
-+TEX2D(uchar4, make_uchar4((unsigned char)a, (unsigned char)b, (unsigned char)c, (unsigned char)d))
-+TEX2D(ushort4, make_ushort4((unsigned short)a, (unsigned short)b, (unsigned short)c, (unsigned short)d))
- 
- // Template calling tex instruction and converting the output to the selected type
--template<typename T>
--inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
-+template <class T>
-+static inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
- {
-   T ret;
-   unsigned ret1, ret2, ret3, ret4;
-@@ -145,44 +128,4 @@ inline __device__ T tex2D(cudaTextureObj
-   return ret;
- }
- 
--template<>
--inline __device__ float4 tex2D<float4>(cudaTextureObject_t texObject, float x, float y)
--{
--    float4 ret;
--    asm("tex.2d.v4.f32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
--        "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) :
--        "l"(texObject), "f"(x), "f"(y));
--    return ret;
--}
--
--template<>
--inline __device__ float tex2D<float>(cudaTextureObject_t texObject, float x, float y)
--{
--    return tex2D<float4>(texObject, x, y).x;
--}
--
--template<>
--inline __device__ float2 tex2D<float2>(cudaTextureObject_t texObject, float x, float y)
--{
--    float4 ret = tex2D<float4>(texObject, x, y);
--    return make_float2(ret.x, ret.y);
--}
--
--// Math helper functions
--static inline __device__ float floorf(float a) { return __builtin_floorf(a); }
--static inline __device__ float floor(float a) { return __builtin_floorf(a); }
--static inline __device__ double floor(double a) { return __builtin_floor(a); }
--static inline __device__ float ceilf(float a) { return __builtin_ceilf(a); }
--static inline __device__ float ceil(float a) { return __builtin_ceilf(a); }
--static inline __device__ double ceil(double a) { return __builtin_ceil(a); }
--static inline __device__ float truncf(float a) { return __builtin_truncf(a); }
--static inline __device__ float trunc(float a) { return __builtin_truncf(a); }
--static inline __device__ double trunc(double a) { return __builtin_trunc(a); }
--static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); }
--static inline __device__ float fabs(float a) { return __builtin_fabsf(a); }
--static inline __device__ double fabs(double a) { return __builtin_fabs(a); }
--
--static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); }
--static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); }
--
- #endif /* COMPAT_CUDA_CUDA_RUNTIME_H */
-Index: jellyfin-ffmpeg/configure
-===================================================================
---- jellyfin-ffmpeg.orig/configure
-+++ jellyfin-ffmpeg/configure
-@@ -6250,7 +6250,7 @@ fi
- if enabled cuda_nvcc; then
-     nvccflags="$nvccflags -ptx"
- else
--    nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -include ${source_link}/compat/cuda/cuda_runtime.h"
-+    nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -std=c++14 -include ${source_link}/compat/cuda/cuda_runtime.h"
-     check_nvcc cuda_llvm
- fi
- 
-Index: jellyfin-ffmpeg/libavfilter/Makefile
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/Makefile
-+++ jellyfin-ffmpeg/libavfilter/Makefile
-@@ -392,8 +392,7 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)
- OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
- OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
- OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o
--OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o scale_eval.o \
--                                                vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o
-+OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o
- OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
- OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
- OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
-Index: jellyfin-ffmpeg/libavfilter/cuda/vector_helpers.cuh
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/cuda/vector_helpers.cuh
-+++ /dev/null
-@@ -1,112 +0,0 @@
--/*
-- * This file is part of FFmpeg.
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining a
-- * copy of this software and associated documentation files (the "Software"),
-- * to deal in the Software without restriction, including without limitation
-- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-- * and/or sell copies of the Software, and to permit persons to whom the
-- * Software is furnished to do so, subject to the following conditions:
-- *
-- * The above copyright notice and this permission notice shall be included in
-- * all copies or substantial portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-- * DEALINGS IN THE SOFTWARE.
-- */
--
--#ifndef AVFILTER_CUDA_VECTORHELPERS_H
--#define AVFILTER_CUDA_VECTORHELPERS_H
--
--typedef unsigned char uchar;
--typedef unsigned short ushort;
--
--template<typename T> struct vector_helper { };
--template<> struct vector_helper<uchar>   { typedef float  ftype; typedef int  itype; };
--template<> struct vector_helper<uchar2>  { typedef float2 ftype; typedef int2 itype; };
--template<> struct vector_helper<uchar4>  { typedef float4 ftype; typedef int4 itype; };
--template<> struct vector_helper<ushort>  { typedef float  ftype; typedef int  itype; };
--template<> struct vector_helper<ushort2> { typedef float2 ftype; typedef int2 itype; };
--template<> struct vector_helper<ushort4> { typedef float4 ftype; typedef int4 itype; };
--template<> struct vector_helper<int>     { typedef float  ftype; typedef int  itype; };
--template<> struct vector_helper<int2>    { typedef float2 ftype; typedef int2 itype; };
--template<> struct vector_helper<int4>    { typedef float4 ftype; typedef int4 itype; };
--
--#define floatT typename vector_helper<T>::ftype
--#define intT typename vector_helper<T>::itype
--
--template<typename T, typename V> inline __device__ V to_floatN(const T &a) { return (V)a; }
--template<typename T, typename V> inline __device__ T from_floatN(const V &a) { return (T)a; }
--
--#define OPERATORS2(T) \
--    template<typename V> inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y); } \
--    template<typename V> inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y); } \
--    template<typename V> inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b); } \
--    template<typename V> inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b); } \
--    template<typename V> inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b); } \
--    template<typename V> inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b); } \
--    template<typename V> inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; return a; } \
--    template<typename V> inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; } \
--    template<typename V> inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; } \
--    template<> inline __device__ float2 to_floatN<T, float2>(const T &a) { return make_float2(a.x, a.y); } \
--    template<> inline __device__ T from_floatN<T, float2>(const float2 &a) { return make_ ## T(a.x, a.y); }
--#define OPERATORS4(T) \
--    template<typename V> inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } \
--    template<typename V> inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } \
--    template<typename V> inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b, a.z * b, a.w * b); } \
--    template<typename V> inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b, a.z / b, a.w / b); } \
--    template<typename V> inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b, a.z >> b, a.w >> b); } \
--    template<typename V> inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b, a.z << b, a.w << b); } \
--    template<typename V> inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; } \
--    template<typename V> inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; a.z = b.z; a.w = b.w; } \
--    template<typename V> inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; a.z = b; a.w = b; } \
--    template<> inline __device__ float4 to_floatN<T, float4>(const T &a) { return make_float4(a.x, a.y, a.z, a.w); } \
--    template<> inline __device__ T from_floatN<T, float4>(const float4 &a) { return make_ ## T(a.x, a.y, a.z, a.w); }
--
--OPERATORS2(int2)
--OPERATORS2(uchar2)
--OPERATORS2(ushort2)
--OPERATORS2(float2)
--OPERATORS4(int4)
--OPERATORS4(uchar4)
--OPERATORS4(ushort4)
--OPERATORS4(float4)
--
--template<typename V> inline __device__ void vec_set(int &a, V b) { a = b; }
--template<typename V> inline __device__ void vec_set(float &a, V b) { a = b; }
--template<typename V> inline __device__ void vec_set(uchar &a, V b) { a = b; }
--template<typename V> inline __device__ void vec_set(ushort &a, V b) { a = b; }
--template<typename V> inline __device__ void vec_set_scalar(int &a, V b) { a = b; }
--template<typename V> inline __device__ void vec_set_scalar(float &a, V b) { a = b; }
--template<typename V> inline __device__ void vec_set_scalar(uchar &a, V b) { a = b; }
--template<typename V> inline __device__ void vec_set_scalar(ushort &a, V b) { a = b; }
--
--template<typename T>
--inline __device__ T lerp_scalar(T v0, T v1, float t) {
--    return t*v1 + (1.0f - t)*v0;
--}
--
--template<>
--inline __device__ float2 lerp_scalar<float2>(float2 v0, float2 v1, float t) {
--    return make_float2(
--        lerp_scalar(v0.x, v1.x, t),
--        lerp_scalar(v0.y, v1.y, t)
--    );
--}
--
--template<>
--inline __device__ float4 lerp_scalar<float4>(float4 v0, float4 v1, float t) {
--    return make_float4(
--        lerp_scalar(v0.x, v1.x, t),
--        lerp_scalar(v0.y, v1.y, t),
--        lerp_scalar(v0.z, v1.z, t),
--        lerp_scalar(v0.w, v1.w, t)
--    );
--}
--
--#endif
-Index: jellyfin-ffmpeg/libavfilter/dither_matrix.h
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/dither_matrix.h
-@@ -0,0 +1,74 @@
-+/*
-+ * Dither matrix data
-+ *
-+ * This file is placed in the public domain.
-+ */
-+
-+#include <stdint.h>
-+static const int ff_fruit_dither_size = 64;
-+static const uint16_t ff_fruit_dither_matrix[] = {
-+	 332, 2776, 1933,   42, 2598, 1796, 1000, 2978, 1677, 3452, 2164, 1564, 2644,  358, 2012, 3471, 1147, 3071,  596, 1943, 3146, 1191, 2469,  919, 3664, 2359,  441, 2691, 1179, 3027, 1408,  298, 3892, 1825,  182, 2178, 3028,  317, 2412,  858, 3097, 2205, 1145, 2880,  990, 2697,  728, 1969, 2312, 1393, 3232, 1204, 3752, 1529,  448, 3955, 2076,  833, 3856,    1, 3445, 2105,  955, 1761,
-+	4060, 1053, 3038, 1445, 3302,  430, 3702, 2119,  625, 2523,   12, 3003,  959, 3814, 2388,  829, 4059, 2236, 1417, 3447,  198, 4020, 1891, 3368,   76, 1460, 2963, 1680, 3721,  535, 2275, 2916, 1226, 2348, 3580,  823, 1897, 4032, 1245, 2728,  194, 3285, 1941,  399, 3639, 1593, 3775, 1038, 3012,  162, 2687, 2029,  559, 2983, 1809, 2378,  325, 2861, 1331, 2533, 1171, 2701, 3328,  153,
-+	2214, 3412,  501, 3934,  892, 1918, 2686, 1199, 3090, 1351, 3779, 1776, 3371, 1457,  217, 2844, 1726,  311, 2896, 1021, 2604, 1546,  569, 2758, 1818, 3967,  727, 3305,  963, 1866, 3591,  853, 3215,  496, 2651, 1453, 2808,  704, 2247, 3395, 1779,  937, 4014, 2288, 1286, 3110,  331, 3309, 1839, 3866,  932, 3566, 2499, 1005, 3346, 1192, 3712, 1743, 3399,  757, 3765,  391, 1871, 2928,
-+	1411,  820, 2531, 1622, 2192, 3478,  215, 4079,  384, 3298,  742, 2332,  436, 2685, 1963, 3273,  680, 3571, 2033, 3795,  806, 3550, 2319, 1225, 3191, 1027, 2506,  237, 2196, 3126,   28, 2148, 1772, 3959, 1009, 3507,   85, 3742, 1539,  453, 3647, 2154,  573, 2786,  156, 2020, 2569, 1425,  538, 2375, 1723,  300, 1854, 4065,  110, 2741,  678, 3138,  213, 1979, 2330, 1530, 3542,  720,
-+	3811, 1955, 3240,  126, 2909,  760, 2482, 1493, 2301, 1719, 2788, 1180, 3998,  923, 3649, 1094, 1862, 2579, 1272,   30, 3109, 1987,  255, 3816,  471, 1977, 3519, 1557, 3882, 1086, 2754, 3776, 1304,  241, 2262, 1863, 3163, 1111, 2982, 2026, 1056, 2948, 1439, 3323, 1737, 3595,  860, 3980, 2895, 1193, 3365, 2779,  852, 2175, 3069, 1641, 2309, 1237, 2630, 4036,  965, 3052, 1096, 2487,
-+	 289, 2863, 1190, 3633, 1330, 3834, 1075, 3429,  602, 3850,  174, 3242, 1657, 3080,   98, 2357, 3899,  374, 3224, 1619, 2431, 1125, 3019, 1367, 2395, 3102,  794, 2850,  426, 2451, 1687,  548, 3070, 2596, 3335,  565, 1630, 2459,  288, 3937, 2504,   51, 3740,  725, 2475,  433, 2706, 2075,   19, 3694,  633, 1982, 3149, 1345,  729, 3843,  411, 3654, 1681,  564, 2721,  104, 3875, 2110,
-+	3504, 1730,  537, 2398, 2058,  443, 2636, 1802, 2925,  953, 2568, 2005,  583, 2108, 2802, 1441,  889, 2726, 1949, 3984,  486, 3407,  624, 2698, 1648,   97, 4055, 1340, 1994, 3667,  913, 3453, 1964,  815, 1400, 4072, 2767,  873, 3457,  684, 1576, 3119, 1884, 1222, 3898, 1535, 3482,  982, 1817, 2520, 1487, 3927,  181, 3493, 2396, 1438, 2871,  985, 2070, 3498, 1370, 3279, 1655,  586,
-+	1280, 2625, 3912,  939, 2999, 1649, 3162,   64, 2130, 3606, 1428, 3469, 1256, 3824,  409, 3562, 1780, 3433,  768, 1206, 2843, 2086, 3869,  926, 3663, 2099, 1047, 2623, 3217,  148, 1810, 2498,  314, 3790, 2126,  129, 2037, 3088, 1356, 2314, 3301,  946, 2716, 2163,  250, 3091,  575, 2227, 3204,  359, 2970, 1110, 2594, 1867,  532, 3338,   43, 2492, 3172,  282, 2394,  842, 2852, 2019,
-+	3111,   31, 1813, 3256,  243, 3687,  851, 4002, 1301,  480, 3004,  264, 2493, 1586, 2947,  714, 2538,  165, 2193, 3597, 1514,  130, 1792, 3132,  348, 2875, 3434,  491, 1251, 2120, 3979, 1093, 2905,  979, 2561, 3625, 1230,  373, 3836, 1953,  197, 4028,  498, 3383, 1713, 2400, 1402, 4083, 1150, 3589, 2106,  592, 3650, 1252, 4012, 1975, 3047, 1574,  690, 3909, 1745, 3616,  349, 3976,
-+	 894, 3438, 2251, 1160, 2582, 1467, 2342, 1747, 2837, 2242, 1076, 3946,  866, 3385, 1108, 1985, 4076, 1303, 2915,  438, 2589, 3312, 1085, 2367, 1916, 1403,  759, 2340, 3582, 2835,  637, 3264, 1997, 3506,  515, 1544, 3258, 2612, 1017, 2903, 1322, 1791, 3014, 1154, 3826,  871, 2858,  192, 2739,  719, 1701, 3177, 2222,  345, 2737,  770, 1181, 3774, 2264, 1060, 2667, 1262, 2318, 1584,
-+	2654, 1420,  499, 4045,  688, 3379,  319, 3490,  735, 3699, 1642, 2050, 2683,   40, 2204, 3096,  343, 3261, 1672, 3747,  900, 1981, 4025,  644, 3505, 2575, 3903, 1724,  232, 1517, 2590, 1405,   58, 1663, 3051, 2381,  751, 1756, 3646,  647, 3464, 2363,  826, 2563,   92, 3137, 1928, 3524, 1590, 2159, 3851,   84, 1423, 3082, 1684, 2417, 3417,  369, 1808, 3022,  118, 3254,  661, 3555,
-+	 229, 3756, 2917, 1627, 2773, 1235, 2949, 1002, 2470,  140, 3274,  594, 3533, 1678, 3799,  933, 1812, 2457,  673, 2306, 1474, 3055,  292, 2820, 1293,    2, 1106, 3237, 2013, 3823,  439, 3611, 2093, 4015, 1264,  258, 3914, 2294,   16, 2692, 1983,  295, 3723, 1893, 3442, 1306,  619, 2371,  392, 2989, 1071, 2484, 3512,  666, 3796,  195, 1522, 2775, 3586,  875, 4086, 1921, 2883, 1163,
-+	2456, 1935, 1032, 2305,  111, 3845, 2036, 1555, 3964, 2115, 1289, 2911,  977, 2372,  465, 2616, 3618, 1211, 3933,   72, 3404,  772, 2424, 1662, 3755, 2252, 3025,  518, 2524,  855, 2953, 1102, 2811,  795, 2602, 3414, 2060,  891, 3357, 1132, 3965, 1495, 2806,  551, 1633, 2629, 3659, 1172, 3958, 1446, 3319,  798, 1787, 2768, 1271, 3184, 2210,  587, 1338, 2478, 1550,  483, 2198, 3388,
-+	 832, 3219,  386, 3674, 1733, 2479,  578, 3214,  416, 3044, 1861,  291, 4033, 1449, 3477, 1940,  205, 2976,  885, 2704, 1886, 3820, 1070, 3288,  700, 1369, 1855, 4087, 1194, 3463, 1764, 2329,  297, 3296, 1742,  539, 1398, 3107, 1696, 2387,  711, 3151, 1223, 2219, 4038,  328, 2095, 2919,  845, 2595,  263, 2235, 4043,  398, 2129,  903, 3924, 1885, 3317,  249, 2732, 3685, 1383,   55,
-+	3926, 1481, 2977, 1177, 3292,  893, 3600, 1418, 2670,  927, 3637, 2477, 1127, 2665,  765, 2879, 1283, 2162, 3333, 1359,  482, 2180, 2942,  219, 2548, 3594,  308, 2759, 2185,  106, 3174,  699, 3738, 1139, 2277, 3833, 2804,  223, 3707,  415, 3518, 1913,  150, 2995,  864, 3222, 1575,   35, 3428, 1694, 3581,  992, 2907, 1494, 3676, 2702,   11, 3103,  849, 3825, 1848,  947, 3125, 1901,
-+	2494,  600, 2017, 2658,  293, 2207, 2851,    9, 1799, 3186,  510, 2077, 3318,  102, 3196, 1571, 3982,  357, 1781, 3717, 2774, 1176, 1834, 4001,  908, 2044, 3195,  986, 1675, 3696, 1339, 2638, 1617, 3026,   96, 1984,  940, 2446, 1563, 2876,  930, 2532, 3426, 1516, 2407, 1134, 3801, 1931, 2307,  521, 2744, 1883,  138, 3356,  597, 1758, 2343, 1389, 2831, 2145,  693, 3485, 2286,  520,
-+	3031, 1601, 3710,  956, 4067, 1849, 1097, 3760, 2406, 1215, 3861, 1596,  846, 3804, 1890,  516, 2295, 3062,  747, 2337,  169, 3178,  603, 2269, 3104, 1537,  627, 3537, 2415,  786, 2988,  379, 4040,  839, 2554, 3561, 1363, 4005,  745, 2128, 3817, 1348,  617, 3910,  397, 3050,  598, 2787, 1037, 3932, 1277, 3741, 2091, 1073, 2503, 3530,  934, 4024,  352, 1610, 2969,  158, 1347, 3992,
-+	1155, 3348,  142, 2259, 3087,  500, 3380, 2096,  730, 2795,  210, 3005, 2335, 1173, 2627, 3703, 1049, 2038, 3514,  983, 3868, 1653, 3552, 1249,   74, 3891, 2586, 1452,  238, 3936, 1738, 2168, 1209, 3384, 1847,  593, 3233,  355, 3036, 1753,   63, 3250, 2150, 1732, 2613, 1259, 3629, 1483, 3120,  280, 2382,  663, 2641, 3176, 1637,  265, 3013, 1239, 2420, 3416, 1088, 3769, 1938, 2555,
-+	 347, 2049, 2809, 1443,  761, 2628, 1693,  277, 3975, 1496, 3523, 1961,  547, 3106,  287, 1492, 3340,   24, 1547, 2887, 2147,  370, 2535, 1880, 2962, 2121,  479, 3077, 1922, 2618,  966, 3253, 2439,  327, 2847, 1515, 2226, 2713,  962, 3632, 2465, 1039, 2854,  245, 3179, 2080,  124, 2525,  792, 3486, 1744, 3289, 1404,  476, 3782, 2186,  715, 3579, 1971,  626, 2784, 2201,  741, 3248,
-+	1640, 3551, 1024, 3840, 1827, 3622, 1072, 3238, 2279,  835, 2562, 1295, 4085, 1816, 3535, 2495,  724, 2664, 4042,  570, 1281, 3422, 1013, 3787,  783, 1343, 3722, 1123, 3427,  599, 3634,   27, 1572, 3827, 1014, 3665,  139, 1868, 3197, 1390,  640, 4090, 1459, 3714,  902, 3421, 1676, 4004, 2246, 1128, 2720,   60, 4069, 2002, 1035, 2818, 1352, 2552,   88, 3847, 1801,  309, 3627, 1104,
-+	2894,  686, 2509,  403, 2926,   95, 1991, 2868,  469, 2052, 3370,   66, 2399,  679, 1248, 2081, 3181, 1100, 2224, 1870, 3001, 1506, 2760,  260, 3294, 2707, 2027,  146, 2355, 1323, 2783, 1956, 2946,  676, 2000, 3053, 1325, 3893,  424, 2084, 2998, 1797,  466, 2380, 1875,  507, 2939, 1054,  396, 3700, 1502, 3007,  904, 2418, 3443,  200, 3954, 1588, 3283, 1065, 3084, 1471, 2714, 2270,
-+	  34, 4029, 1869, 3446, 1207, 2442, 3919,  879, 1788, 3726, 1115, 3129, 1466, 2742, 3897,  214, 1750, 3763,  405, 3459,  123, 3916,  812, 2317, 1685,  533, 3389, 1589, 3908, 1844,  376, 3990, 1158, 3437, 2377,  458, 2645,  896, 2336, 3396,  193, 2581, 3539, 1083, 2756, 3871, 1426, 2135, 3166, 1824,  566, 2104, 3559,  413, 1814, 2922, 2122,  797, 2750, 2258,  670, 3944,  869, 1729,
-+	3183, 1409,  916, 2220, 3100,  568, 1427, 3351, 2601,  336, 2803,  778, 3656,  432, 2170, 3330,  944, 2302, 2856,  970, 2416, 1766, 3249, 1218, 4066, 2438,  993, 2635,  697, 3021, 2152,  830, 2608,  234, 1421, 4051, 1705, 3577, 1507,  791, 3807,  958, 2051, 3148,    4, 2256,  712, 3529,  179, 2536, 3880,  989, 2655, 1302, 3267,  562, 1210, 3517,  303, 1950, 3393,  168, 2125, 3749,
-+	 502, 2745, 3648,  256, 1620, 3684, 2240,  173, 1238, 3999, 2216, 1704, 2039, 2886, 1213, 1889, 2980,  665, 1585, 3715, 1333, 2662,  446, 3011,   41, 1906, 3772,  306, 3509, 1241, 3339, 1645, 3692, 2209, 3121,  607, 2833,   47, 3185, 2507, 1291, 2859,  400, 1580, 3675, 1328, 2676, 1658, 2857, 1183, 1965, 3190,  149, 3972, 1573, 2221, 3832, 1706, 3037, 1290, 2534, 1647, 2923, 1161,
-+	2452, 1942,  809, 3321, 2139, 1022, 2842, 1887, 3074, 1528,  536, 3475,  188, 3837,  752, 3573,   79, 4018, 2141,  285, 3354,  738, 3680, 2173, 1473, 2927, 1144, 2816,  877, 2368,   91, 2727,  635, 1253, 1888, 3513,  974, 2280, 1990,  488, 3326, 1948, 4007, 2153,  660, 3314,  302, 4039,  784, 3626,  445, 2327, 1674, 2513,  827, 3099,   26, 2464,  749, 4095,  912, 3704,  556, 3495,
-+	 225, 3962, 1567, 2550,  713, 4053,  371, 3436,  796, 2541, 3169, 1174, 2428, 1414, 2609, 1628, 2753, 1263, 3206, 1951, 1129, 2885, 1740,  911, 3376,  672, 3603, 1616, 2015, 3873, 1477, 3543, 2087, 3846,  171, 2549, 1612, 3730, 1157, 3923, 1650,  151, 1136, 3033, 2466,  996, 2931, 1450, 2391, 1763, 3362,  883, 3678,  333, 3496, 1422, 2730, 1084, 3225,  375, 2780, 1480, 2354, 1882,
-+	2961, 1028, 2814,   80, 3154, 1720, 2463, 1444, 3733,   23, 1666, 3917,  611, 3398,  388, 3235,  696, 2344,  425, 2580, 3839,  154, 2266, 3957,  381, 2056, 2476,  196, 3180,  527, 3009, 1044,  383, 2347, 2973,  781, 3227,  342, 2766,  654, 2967, 2101, 3546,  457, 1534, 3732, 1917,  616, 3134,   52, 2174, 2981, 1169, 2813, 2116,  541, 3781, 2156, 1448, 3578, 2199,   87, 3359, 1313,
-+	 618, 3424, 1397, 3818, 2098, 1095, 2943,  961, 2316, 1959, 2975, 1026, 2724, 1768, 2155, 1153, 3945, 1751, 3612,  814, 1551, 3304, 1178, 2764, 1643, 3136, 1081, 3981, 2213, 1310, 2587, 1826, 3369, 1533, 1092, 3953, 1296, 2433, 1833, 3598, 1023, 2543, 1309, 3187, 2043,  125, 2519, 3528, 1324, 3757, 1511,  492, 4057, 1910,  994, 3200, 1777,  212, 2960,  702, 1811, 3122,  943, 3925,
-+	2611, 2046,  477, 2397,  650, 3541,  235, 3864,  632, 3342,  423, 2234, 3592,  109, 3758, 2933,  239, 2790, 1051, 2985, 2113,  630, 2515,  257, 3778,  843, 2708, 1549,  653, 3521,  233, 4058,  748, 3086, 2231,  484, 2062, 3075,  115, 1501, 3287,  334, 3885,  856, 2763, 3971, 1195,  414, 2822,  681, 2700, 1830, 2422,  112, 3556, 2267,  901, 3983, 2373, 1217, 3770,  454, 2481, 1845,
-+	 266, 3662, 1654, 3208, 1287, 1934, 3060, 1807, 2648, 1395, 4074,  828, 1531, 2640, 1350,  872, 2454, 1512, 3499,    8, 4092, 1762, 3540, 1434, 2146, 3286,   71, 3683, 1902, 2421, 1385, 2719, 1972,   13, 3660, 1688, 3450, 1001, 4077, 2328,  907, 2913, 1690, 2353,  545, 1485, 2991, 2134, 1728, 3948,  938, 3353, 1298, 2891, 1592,  387, 3029, 1896,  524, 3378, 2092, 1560, 2972, 1089,
-+	3150,  882, 2672,  183, 3693, 2572,  506, 1109, 3218,  164, 2467, 1930, 3175,  706, 3480, 2111, 3831,  584, 2211, 2679, 1292, 2819,  442, 2935,  601, 1790, 2496, 1041, 2906,  435, 3230,  880, 3780, 2512,  909, 2770,  301, 2657, 1914,  508, 3754, 2079,   39, 3698, 1879, 3455,  800, 3322,  159, 2225, 3046,  304, 3621,  615, 3870, 2567, 1077, 3651, 1484, 2749,  190, 4008,  606, 3564,
-+	2203, 1410, 4046, 1988,  837, 1468, 3978, 2260, 1638, 3764,  929, 3547,  322, 1819, 2855,  394, 1416, 3006, 1702,  785, 3262,  960, 2249, 3624, 1224, 4023,  790, 3432, 1254, 3939, 1621, 2283,  531, 1519, 3394, 1258, 3819,  756, 3244, 1609, 2583, 1294, 3141, 1052, 2674,  261, 2427, 1553, 3653, 1165, 1962, 2606, 1040, 2320, 1937, 1284, 3160,   56, 2237,  920, 3194, 1372, 2401, 1952,
-+	   0, 2901,  689, 2338, 3352, 2840,   73, 3405,  739, 2054, 2959, 1377, 2202, 3941, 1067, 2021, 3306,  167, 3905, 2042,  307, 3792, 1860,  144, 2615, 1978, 3064,  278, 1945, 2383,  119, 3544, 1874, 3131,  203, 2187, 1767, 2462, 1229, 3585,  364, 2798,  683, 3503, 1412, 4049,  921, 2762,  447, 3144,  733, 3797, 1489, 2986,  231, 3474,  780, 2793, 3935, 1670, 2526,  367, 3315,  841,
-+	3855, 1734, 3489,  305, 1240, 1836, 2195, 1011, 3078,  390, 2556,  642, 3209,   44, 2743, 3679,  917, 2695, 1133, 3400, 2436, 1098, 3056, 1415, 3277,  487, 1568, 2643, 3670,  925, 3101, 1034, 2747, 1167, 4030, 2848,  580, 3094,   99, 2287, 1031, 3989, 1968, 2379,  514, 2053, 3092, 1234, 3894, 1623, 2472,   14, 3259,  822, 3968, 2248, 1401, 2040,  449, 3269,  766, 3706, 1603, 2944,
-+	1250, 2650, 1015, 2537, 3943,  579, 3630, 2593, 1274, 3467, 1583, 3829, 1162, 2471, 1543,  553, 2365, 1613, 3072,  677, 1748, 2752,  528, 3929,  906, 2321, 3810, 1288,  628, 2829, 2094, 3852,  401, 2392,  773, 1591, 3468, 1424, 3881, 1789, 3331, 1520,  172, 3220, 1636, 3596,   89, 1924, 2284, 1003, 3435, 1739, 2124, 2778, 1806,  517, 2621, 3403, 1205, 1829, 2869, 1062, 2293,  244,
-+	3608,  431, 3246, 1379, 2123, 2937, 1532,  296, 3922, 1865,  177, 2143, 2974,  427, 4011, 1912, 3576,  326, 2109, 4031,   75, 3655, 2206, 1541, 2048, 3401,   33, 2161, 3364, 1476,  254, 1691, 3337, 1375, 3605, 2055,  362, 2738,  716, 2634,  450, 2212, 3777, 1187, 2870,  863, 2485, 3363,  337, 3008,  634, 4063,  452, 1268, 3313, 1112, 3727,  253, 2228, 3798,  103, 1974, 4075, 1406,
-+	2430, 2061,  776, 3720,  152, 1114, 3293, 2325,  840, 2660, 3272,  988, 1725, 3366, 1305, 3020,  968, 3228,  799, 2544, 1840, 1016, 3170,  259, 2873,  777, 1838, 3142,  455, 3985, 2010, 2993,  646, 2689,   45, 2547, 3745, 1137, 3212, 1317, 3023,  825, 2566,  410, 2127, 3931, 1538,  775, 3812, 1785, 2757, 1368, 2542, 3584,  127, 3041, 2011, 1548, 2940,  723, 2490, 3307,  530, 2789,
-+	 948, 3123, 1579, 2777, 2035, 3848,  542, 1946, 3057, 1433,  558, 3731, 2600,  755, 2326,  108, 2785, 1380, 3750, 1276, 3451, 2063,  737, 3567, 1247, 4062, 2671,  884, 2411, 1126, 2588,  952, 3794, 1858, 3161,  924, 1804, 2333,  218, 3974, 1679, 3456, 1394, 3572, 1757,  211, 3205, 2659, 1242, 2346,  176, 3140,  850, 2188, 1716, 2460,  560, 4019,  997, 3522, 1735, 1105, 3048, 1661,
-+	3773,   61, 3548,  609, 1273, 2553, 1682, 3609,   25, 4056, 1898, 2233,  224, 3604, 1107, 3802, 1673, 2410,  246, 2849,  459, 3066, 2578, 1625, 2425,  368, 1463, 3734, 1671, 3510,  107, 3257, 1341,  460, 2239, 3947,  550, 3441, 1993,  971, 2408,   20, 2765, 1061, 2952, 2292, 1371,  434, 3642,  972, 3460, 1659, 3736,  330, 3904,  888, 3276, 1362, 2735,  356, 2281, 3901,  227, 2171,
-+	 805, 2528, 1774, 2183, 3165,  267, 2860, 1008, 2461, 1285, 2834,  935, 3081, 1509, 2694, 2073,  671, 3168, 1939, 1141, 3969, 1429,  133, 3815,  973, 3231, 2085,  201, 2918,  698, 2781, 1644, 2311, 3387, 1057, 1561, 2821, 1216, 2599, 3355,  701, 3791, 1892,  481, 4091,  651, 3310, 2191, 1700, 2899,  707, 2448, 1312, 2668, 1454, 2792, 2107,   36, 2030, 3234, 1465,  865, 2597, 3484,
-+	1518, 4000,  429, 3415,  870, 3949, 1503, 3492,  721, 3372,  361, 3789, 1831,  529, 3487,  320, 4081, 1012, 3607, 2250,  629, 2169, 3347, 1976, 2997,  722, 3620, 2351, 1200, 3889, 2144,  810, 4082,  236, 2023, 3515,  117, 3709,  444, 1835, 2845, 1299, 3198, 2140, 1510, 2546,  887, 3857,   49, 2009, 3994,  404, 3035, 1048, 3410,  461, 3668, 1618, 3828,  590, 2807, 3613, 1354,  620,
-+	3157, 1152, 2867, 1297, 2711, 2265,  490, 2131, 2955, 1712, 2350, 1214, 3203, 2179, 1069, 2577, 1857, 2924,   17, 2699, 1692, 2950, 1197,  485, 1582, 2740, 1407,  544, 3278, 1784,  382, 2941, 1208, 2510, 3063,  844, 2722, 2215, 1595, 4026,  226, 2370,  743, 3681,  145, 3465, 2068, 2782, 1392, 3153, 1006, 2255, 3638,  105, 1821, 2517,  813, 2920, 1189, 2409, 1967,  122, 3058, 2067,
-+	2403,  252, 1980, 3686,  116, 1652, 3746, 1337,  204, 3970,  669, 2715,   82, 3895, 1635, 3409,  763, 1526, 3502,  967, 3719,  268, 3900, 2393, 3558,   62, 3991, 1973, 2540,  987, 3716, 1909, 3327,  554, 1615, 3761, 1366,  595, 3300,  876, 3116, 1138, 2893, 1342, 2678, 1754,  378, 1079, 3563,  525, 2733, 1853, 1143, 2112, 4073, 1228, 3360, 1904,  377, 3391, 1042, 4050, 1721,  478,
-+	3860, 1624, 3270,  936, 2088, 2929,  610, 3145, 2444, 1486, 3534, 2007, 2996,  808, 2402,  199, 2828, 2100,  505, 3127, 1435, 2558, 2003,  613, 1852, 2633, 1117, 3164,  271, 3476, 2271,    3, 1469, 3853, 2362,  340, 2898, 1878, 2570, 1462, 2166, 3872,  335, 3367,  639, 3988, 2964, 1908, 2374, 1577, 3835,  281, 3481, 2839,  582, 3018,  187, 2276, 3928, 1597, 2652,  831, 2557, 3425,
-+	1119, 2836,  561, 2514, 4088, 1064, 3458, 1957,  886, 2841,  341, 1101, 1683, 3236, 1365, 3617, 1033, 3921, 2447, 1846,  750, 3402, 1087, 3065, 1308, 3439,  705, 1752, 2872, 1374,  848, 3207, 2637, 1030, 2065, 3462,  910, 3911,   57, 3623,  534, 1894, 2559, 1611, 2297, 1413,  746, 3783,  175, 3211,  905, 2527,  754, 1464, 2369, 1665, 3751, 1396,  652, 3040,  272, 3266, 1307, 2102,
-+	  15, 2274, 3526, 1432,  350, 1795, 2565,   48, 3883, 1269, 3411, 2136, 4027,  310, 2626,  645, 3113, 1311,  290, 3784, 2680,  113, 2268, 4068,  339, 1947, 2468, 3830,  526, 2078, 3950, 1711,  636, 3049,  166, 1731, 3108, 1184, 1999, 2669, 1349, 3500,  890, 3711,   78, 3290, 2502, 1201, 2853, 2184, 1357, 3303, 1926, 3884,  313, 2675,  789, 3171, 2437, 1265, 3718, 1920,  608, 2951,
-+	3766, 1569,  857, 2897, 2118, 3645,  762, 3182, 1629, 2360,  703, 2900,  954, 1876, 3759, 1488, 2289, 1911, 2956,  981, 1664, 3644,  918, 1562, 2904, 3661,  147, 1475, 3311, 2649,  230, 2245, 3636, 1386, 4022, 2603,  509, 2376, 3345,  847, 3059,  208, 2866, 1186, 2646, 1703,  366, 3392, 1823,  540, 4017,    6, 2987,  991, 3375, 1159, 3601, 2066,   68, 2772,  767, 2322, 3587, 1399,
-+	 519, 2181, 3879,  161, 3229, 1149, 2032, 2656,  417, 3767, 2018,  134, 3549, 2223,  552, 3284,   59, 3987,  589, 3444, 2339,  422, 2805, 2151,  563, 1196, 3199, 2345,  874, 1300, 3531, 1142, 2746,  372, 2182,  821, 3744, 1631,  269, 4070, 1587, 2405, 1782, 3887,  674, 3560, 2071,  942, 3669, 1513, 2571, 2089,  691, 2453, 1877, 2874,  468, 1717, 3525, 1430, 3960, 1626,  207, 2734,
-+	1832, 3000, 1203, 2585, 1749,  463, 3986, 1355, 3349,  975, 3054, 1604, 2607, 1182, 2938, 1698, 2574, 1227, 2794, 2069, 1146, 3260, 1316, 3876, 1794, 2681, 1996,  473, 3993, 1936, 3032,  567, 2001, 3397, 1478, 3252, 1148, 2954, 1929, 2272,  623, 3167,  420, 2158, 2930, 1121, 3085, 2441,  221, 2892, 1082, 3610, 1602, 3803,  157, 1523, 4044, 2285, 1043, 2912,  456, 2516, 3454,  978,
-+	4037,  315, 3361,  622, 3691, 2194, 2984,  189, 1815, 2530,  497, 3271,  740, 3859,  270, 3671,  834, 3511, 1771,  186, 3743,  718, 3015,   29, 3373,  819, 3762, 1578, 2755,   77, 2414, 1634, 3915,  881, 2826,   94, 2560,  467, 3520, 1260, 3724, 1360, 3473, 1498,  163, 4009,  555, 1851, 3800,  787, 3251,  299, 3039, 1232, 3268, 2229,  662, 3002,  242, 1899, 3295, 1198, 1989, 3159,
-+	 709, 2443, 2041, 1525, 2666, 1275,  859, 2090, 3557, 1116, 4054, 1437, 2404, 1944, 2810, 1068, 2352,  385, 3156, 2450, 1472, 2639, 1710, 2366, 1140, 2884,  262, 3239,  964, 3690,  807, 3135,  251, 2384, 1699, 3865, 1318, 3098,  779, 2486,   18, 2631,  914, 2799, 1919, 2364, 1566, 3334, 1255, 2304, 1656, 2688, 2047,  574, 2653, 1091, 3408, 1332, 3844, 2445,  631, 3737,   93, 2290,
-+	1646, 3672, 1046, 3508,   70, 3886, 2419, 3213,  346, 2815, 2138,    5, 3430,  614, 1479, 3374, 1651, 4064, 1353,  771, 3888,  344, 3545,  667, 4035, 1859, 2497, 1320, 2300, 1765, 2632, 1436, 3658, 1118, 3350,  731, 2315, 1843, 3956, 1010, 3316, 2082, 3918,  472, 3640,  744, 2690,   65, 2823,  428, 4078,  854, 3570, 1458, 3952,  354, 2014, 2624,  836, 1497, 3042, 1709, 2771, 1103,
-+	2990,  228, 2862,  811, 3189, 1736,  512, 1482, 3809,  710, 1773, 3034, 1346, 3951, 2291,  141, 2890,  585, 2696, 2022, 2910, 1055, 2197, 3139, 1382,  489, 3635,  621, 3461,  220, 4080,  546, 1881, 2958,  412, 1998, 3588,  184, 1455, 2761, 1605,  717, 1778, 3188, 1170, 3030, 1391, 3862, 1905, 3423, 1063, 2505,  101, 1932, 2429, 1267, 3701,   37, 3488, 2083,  294, 4013,  803, 3470,
-+	1387, 3907, 1793, 2324, 1244, 2800, 2006, 3068,  980, 2385, 3628, 1050, 2729,  380, 1798, 3739,  928, 2232, 3413,   81, 1669, 3329, 1524,  216, 2703, 2097, 2936, 1639, 2045, 2712, 1344, 2167, 3494, 1019, 2458, 3158, 1168, 2592, 3448,  440, 3788, 2945,  202, 2474, 1995,  321, 3501, 1018,  577, 2176, 3083, 1373, 2865, 3344,  513, 3093, 1600, 2832, 1099, 3143, 2303, 1025, 2622,  363,
-+	2263,  668, 2717,  395, 4071,  659, 3673,  137, 2682, 1660,  275, 3210,  824, 2423, 3308, 1270, 3095, 1856, 1221, 3652, 2529,  576, 3961, 1895, 3735, 1124,   90, 3890,  941, 3255,  817, 2881,   32, 2731, 1715,  312, 4047,  649, 1986, 2358, 1246, 1927, 3574,  838, 4048, 2341, 1722, 2610, 3247, 1556,  276, 3677,  643, 1164, 3806, 2261,  774, 1903, 3913,  641, 1461, 3282, 1769, 3657,
-+	1614, 3325, 1175, 3133, 1565, 2483, 1113, 3297, 1319, 4003, 2241, 1236, 3838, 1606,  655, 2751,  248, 3920,  464, 2149,  950, 3045, 1261, 2825,  736, 2432, 3358, 1442, 2511,  323, 3725, 1527, 3940,  862, 3602, 2257, 1581, 2797,  998, 3147,  284, 2705, 1045, 2889, 1504,  648, 3073,  185, 1243, 3902, 2310, 1746, 2117, 2710, 1521,  180, 3516, 2647,  318, 2489, 3682,  143, 2908,  732,
-+	2508,   22, 3822, 1954,  222, 3483, 2137,  474, 2902,  588, 3114, 2034,  114, 2882, 3666, 1822, 1059, 2323, 3216, 1329, 3805,  155, 2298,  402, 3128, 1540, 2177,  656, 3061, 1800, 2313,  523, 1915, 3089, 1376,  694, 3281,   83, 3688, 1727, 3966, 1451, 3265,   50, 2064, 3697, 1335, 3449, 2028,  758, 2791,  949, 3996,  351, 3241, 2172, 1805,  995, 3079, 1131, 1872, 2389, 1220, 3973,
-+	1336, 2838,  931, 2361, 2992,  788, 1714, 3878, 2208, 1842,  976, 3532, 1358, 2103,  475, 2230, 3491, 1558,  734, 2817, 1741, 3431, 1384, 3705,  899, 3565,  274, 3997, 1166, 3538,  922, 3390, 2614,  209, 2390, 3877, 1188, 3010, 2157,  801, 2500,  522, 2218, 3849, 2539,  470, 2723,  897, 2473, 3614,   21, 3386, 1447, 3016,  895, 3874,  504, 3619, 1542, 4089,  692, 3440, 2057,  493,
-+	3245, 1759, 3615,  543, 1491, 3768, 2576, 1266,   46, 3643, 2545,  353, 2661, 4094, 1135, 2932,   38, 2564, 4010,  286, 2386,  802, 2684, 1770, 2522, 1321, 2914, 1689, 2673,  121, 2888, 1755, 1212, 3466, 1992,  462, 2605, 1864,  360, 3299, 1219, 3527, 1667,  685, 1185, 3223, 1966, 4006,  418, 1594, 3112, 2253,  591, 1900, 2449, 1364, 2864, 1970,   67, 2254, 2827,  329, 2971, 1058,
-+	2296,  283, 2617, 1873, 3152, 1120,  389, 3263, 2725, 1552,  726, 3226, 1686,  604, 3280, 1500, 3695,  999, 2025, 3192, 1078, 2979,  451, 4061,   10, 3221,  753, 2331,  984, 3842, 2160,  407, 4021,  878, 2966, 1545, 3536,  945, 3813, 1599, 2748,  178, 3118, 2008, 3631, 1608,  136, 1419, 2934, 2189,  682, 1326, 3729, 2736,  131, 3420,  818, 2521, 3320,  898, 1668, 3841, 1431, 3713,
-+	1536, 3406, 1007, 4034,  128, 2244, 3575, 2016,  951, 3930, 2299, 1020, 3748, 2435, 1925,  324, 2200, 3043,  571, 1775, 3785, 1456, 3275, 1151, 2165, 1803, 3793,  316, 3291, 1923, 1090, 3076, 2426, 1841,  612, 3201,  160, 2278, 2830,  549, 2132, 4093,  867, 2591,  365, 2769, 2114, 3553, 1004, 3821, 1828, 3336,  338, 1607, 4041, 1202, 3067,  419, 1327, 3583, 2190,  793, 2677,  100,
-+	2480,  764, 2921, 2133, 1282, 2801, 1388,  657, 3173,  279, 1820, 2965,  132, 1231, 2812, 3938,  861, 1440, 3590, 2663,  120, 2282,  675, 2620, 3554,  969, 2488, 1598, 2824,  495, 3641, 1505,   54, 3771, 2243, 1381, 3977, 1960, 1279, 3569, 1036, 1786, 2994, 1378, 3906, 1074, 3343,  572, 2518,  206, 2877,  868, 2413, 3155,  581, 2308, 1697, 3808, 2024, 2718,  240, 3202, 1233, 3497,
-+	1718, 3863,  406, 1632, 3381,  511, 3963, 2440, 1470, 2619, 3341, 1315, 2217, 3593,  557, 1760, 3324, 2501,  408, 1314, 3479, 1708, 3896, 1554,  247, 3017,  638, 3418, 1334, 2455,  816, 3377, 2142, 1029, 2878,  437, 2551,  687, 2957,    7, 2491, 3332,  273, 2334,  782, 2968, 1783, 1278, 3124, 2074, 1490, 3942, 2004, 1066, 1907, 3568,  191, 2796,  605, 1122, 3995, 1850, 2273,  695,
-+	3130, 1156, 2356, 3728,  915, 3105, 2059,  170, 3753, 1080,  503, 4016,  804, 3115, 1361, 2693,   86, 1837, 3854, 2349,  769, 2846,  393, 3117, 2072, 1257, 3867, 2031,  135, 4084, 1958, 2709,  708, 3243, 1570, 3708, 1130, 3419, 1695, 3858, 1508,  658, 3786, 1707, 3472,   69, 2434, 4052,  421, 3599,  664, 2573,   53, 3382, 2642,  957, 3193, 1499, 2238, 3024, 1559,  494, 3689, 2584,
-+};
-Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.c
-+++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c
-@@ -1,5 +1,8 @@
- /*
- * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
-+* Copyright (c) 2019 rcombs
-+*
-+* This file is part of FFmpeg.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
-@@ -20,10 +23,10 @@
- * DEALINGS IN THE SOFTWARE.
- */
- 
--#include <float.h>
- #include <stdio.h>
- #include <string.h>
- 
-+#include "libavutil/avassert.h"
- #include "libavutil/avstring.h"
- #include "libavutil/common.h"
- #include "libavutil/hwcontext.h"
-@@ -34,13 +37,12 @@
- #include "libavutil/pixdesc.h"
- 
- #include "avfilter.h"
-+#include "dither_matrix.h"
- #include "formats.h"
- #include "internal.h"
- #include "scale_eval.h"
- #include "video.h"
- 
--#include "vf_scale_cuda.h"
--
- static const enum AVPixelFormat supported_formats[] = {
-     AV_PIX_FMT_YUV420P,
-     AV_PIX_FMT_NV12,
-@@ -48,8 +50,6 @@ static const enum AVPixelFormat supporte
-     AV_PIX_FMT_P010,
-     AV_PIX_FMT_P016,
-     AV_PIX_FMT_YUV444P16,
--    AV_PIX_FMT_0RGB32,
--    AV_PIX_FMT_0BGR32,
- };
- 
- #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
-@@ -58,17 +58,6 @@ static const enum AVPixelFormat supporte
- 
- #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
- 
--enum {
--    INTERP_ALGO_DEFAULT,
--
--    INTERP_ALGO_NEAREST,
--    INTERP_ALGO_BILINEAR,
--    INTERP_ALGO_BICUBIC,
--    INTERP_ALGO_LANCZOS,
--
--    INTERP_ALGO_COUNT
--};
--
- typedef struct CUDAScaleContext {
-     const AVClass *class;
- 
-@@ -87,6 +76,7 @@ typedef struct CUDAScaleContext {
-      * Output sw format. AV_PIX_FMT_NONE for no conversion.
-      */
-     enum AVPixelFormat format;
-+    char *format_str;
- 
-     char *w_expr;               ///< width  expression string
-     char *h_expr;               ///< height expression string
-@@ -96,30 +86,56 @@ typedef struct CUDAScaleContext {
- 
-     CUcontext   cu_ctx;
-     CUmodule    cu_module;
--    CUfunction  cu_func_uchar;
--    CUfunction  cu_func_uchar2;
--    CUfunction  cu_func_uchar4;
--    CUfunction  cu_func_ushort;
--    CUfunction  cu_func_ushort2;
--    CUfunction  cu_func_ushort4;
-+
-+#define VARIANT(NAME) \
-+    CUfunction  cu_func_ ## NAME;
-+#define VARIANTSET(NAME) \
-+    VARIANT(NAME) \
-+    VARIANT(NAME ## _c) \
-+    VARIANT(NAME ## _p2) \
-+    VARIANT(NAME ## _2) \
-+    VARIANT(NAME ## _2_u) \
-+    VARIANT(NAME ## _2_v) \
-+    VARIANT(NAME ## _4)
-+
-+    VARIANTSET(8_8)
-+    VARIANTSET(16_16)
-+    VARIANTSET(8_16)
-+    VARIANTSET(16_8)
-+#undef VARIANTSET
-+#undef VARIANT
-+
-+    CUfunction  cu_func_luma;
-+    CUfunction  cu_func_chroma_u;
-+    CUfunction  cu_func_chroma_v;
-+
-     CUstream    cu_stream;
- 
-     CUdeviceptr srcBuffer;
-     CUdeviceptr dstBuffer;
-     int         tex_alignment;
- 
--    int interp_algo;
--    int interp_use_linear;
--    int interp_as_integer;
-+    const AVPixFmtDescriptor *in_desc, *out_desc;
-+    int         in_planes, out_planes;
- 
--    float param;
-+    CUdeviceptr ditherBuffer;
-+    CUtexObject ditherTex;
- } CUDAScaleContext;
- 
- static av_cold int cudascale_init(AVFilterContext *ctx)
- {
-     CUDAScaleContext *s = ctx->priv;
- 
--    s->format = AV_PIX_FMT_NONE;
-+    if (!strcmp(s->format_str, "same")) {
-+        s->format = AV_PIX_FMT_NONE;
-+    } else {
-+        s->format = av_get_pix_fmt(s->format_str);
-+        if (s->format == AV_PIX_FMT_NONE) {
-+            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+
-     s->frame = av_frame_alloc();
-     if (!s->frame)
-         return AVERROR(ENOMEM);
-@@ -135,13 +151,22 @@ static av_cold void cudascale_uninit(AVF
- {
-     CUDAScaleContext *s = ctx->priv;
- 
--    if (s->hwctx && s->cu_module) {
-+    if (s->hwctx) {
-         CudaFunctions *cu = s->hwctx->internal->cuda_dl;
--        CUcontext dummy;
-+        CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
-+
-+        CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
-+
-+        if (s->ditherTex) {
-+            CHECK_CU(cu->cuTexObjectDestroy(s->ditherTex));
-+            s->ditherTex = 0;
-+        }
-+
-+        if (s->ditherBuffer) {
-+            CHECK_CU(cu->cuMemFree(s->ditherBuffer));
-+            s->ditherBuffer = 0;
-+        }
- 
--        CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
--        CHECK_CU(cu->cuModuleUnload(s->cu_module));
--        s->cu_module = NULL;
-         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
-     }
- 
-@@ -262,6 +287,63 @@ static av_cold int init_processing_chain
-     return 0;
- }
- 
-+static av_cold int cudascale_setup_dither(AVFilterContext *ctx)
-+{
-+    CUDAScaleContext    *s  = ctx->priv;
-+    AVFilterLink        *inlink = ctx->inputs[0];
-+    AVHWFramesContext   *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
-+    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
-+    CudaFunctions       *cu = device_hwctx->internal->cuda_dl;
-+    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
-+    int ret = 0;
-+
-+    CUDA_MEMCPY2D cpy = {
-+        .srcMemoryType = CU_MEMORYTYPE_HOST,
-+        .dstMemoryType = CU_MEMORYTYPE_DEVICE,
-+        .srcHost       = ff_fruit_dither_matrix,
-+        .dstDevice     = 0,
-+        .srcPitch      = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
-+        .dstPitch      = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
-+        .WidthInBytes  = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
-+        .Height        = ff_fruit_dither_size,
-+    };
-+
-+    CUDA_TEXTURE_DESC tex_desc = {
-+        .filterMode = CU_TR_FILTER_MODE_POINT,
-+        .flags = CU_TRSF_READ_AS_INTEGER,
-+    };
-+
-+    CUDA_RESOURCE_DESC res_desc = {
-+        .resType = CU_RESOURCE_TYPE_PITCH2D,
-+        .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT16,
-+        .res.pitch2D.numChannels = 1,
-+        .res.pitch2D.width = ff_fruit_dither_size,
-+        .res.pitch2D.height = ff_fruit_dither_size,
-+        .res.pitch2D.pitchInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
-+        .res.pitch2D.devPtr = 0,
-+    };
-+
-+    av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size);
-+
-+    if ((ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx))) < 0)
-+        return ret;
-+
-+    if ((ret = CHECK_CU(cu->cuMemAlloc(&s->ditherBuffer, sizeof(ff_fruit_dither_matrix)))) < 0)
-+        goto fail;
-+
-+    res_desc.res.pitch2D.devPtr = cpy.dstDevice = s->ditherBuffer;
-+
-+    if ((ret = CHECK_CU(cu->cuMemcpy2D(&cpy))) < 0)
-+        goto fail;
-+
-+    if ((ret = CHECK_CU(cu->cuTexObjectCreate(&s->ditherTex, &res_desc, &tex_desc, NULL))) < 0)
-+        goto fail;
-+
-+fail:
-+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
-+    return ret;
-+}
-+
- static av_cold int cudascale_config_props(AVFilterLink *outlink)
- {
-     AVFilterContext *ctx = outlink->src;
-@@ -271,46 +353,11 @@ static av_cold int cudascale_config_prop
-     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
-     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
-     CudaFunctions *cu = device_hwctx->internal->cuda_dl;
--    char buf[64];
-     int w, h;
-+    int i;
-     int ret;
- 
--    char *scaler_ptx;
--    const char *function_infix = "";
--
-     extern char vf_scale_cuda_ptx[];
--    extern char vf_scale_cuda_bicubic_ptx[];
--
--    switch(s->interp_algo) {
--    case INTERP_ALGO_NEAREST:
--        scaler_ptx = vf_scale_cuda_ptx;
--        function_infix = "_Nearest";
--        s->interp_use_linear = 0;
--        s->interp_as_integer = 1;
--        break;
--    case INTERP_ALGO_BILINEAR:
--        scaler_ptx = vf_scale_cuda_ptx;
--        function_infix = "_Bilinear";
--        s->interp_use_linear = 1;
--        s->interp_as_integer = 1;
--        break;
--    case INTERP_ALGO_DEFAULT:
--    case INTERP_ALGO_BICUBIC:
--        scaler_ptx = vf_scale_cuda_bicubic_ptx;
--        function_infix = "_Bicubic";
--        s->interp_use_linear = 0;
--        s->interp_as_integer = 0;
--        break;
--    case INTERP_ALGO_LANCZOS:
--        scaler_ptx = vf_scale_cuda_bicubic_ptx;
--        function_infix = "_Lanczos";
--        s->interp_use_linear = 0;
--        s->interp_as_integer = 0;
--        break;
--    default:
--        av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n");
--        return AVERROR_BUG;
--    }
- 
-     s->hwctx = device_hwctx;
-     s->cu_stream = s->hwctx->stream;
-@@ -319,40 +366,30 @@ static av_cold int cudascale_config_prop
-     if (ret < 0)
-         goto fail;
- 
--    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, scaler_ptx));
--    if (ret < 0)
--        goto fail;
--
--    snprintf(buf, sizeof(buf), "Subsample%s_uchar", function_infix);
--    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, buf));
--    if (ret < 0)
--        goto fail;
--
--    snprintf(buf, sizeof(buf), "Subsample%s_uchar2", function_infix);
--    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, buf));
--    if (ret < 0)
--        goto fail;
--
--    snprintf(buf, sizeof(buf), "Subsample%s_uchar4", function_infix);
--    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, buf));
--    if (ret < 0)
--        goto fail;
--
--    snprintf(buf, sizeof(buf), "Subsample%s_ushort", function_infix);
--    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, buf));
--    if (ret < 0)
--        goto fail;
--
--    snprintf(buf, sizeof(buf), "Subsample%s_ushort2", function_infix);
--    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, buf));
--    if (ret < 0)
--        goto fail;
--
--    snprintf(buf, sizeof(buf), "Subsample%s_ushort4", function_infix);
--    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, buf));
-+    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
-     if (ret < 0)
-         goto fail;
- 
-+#define VARIANT(NAME) \
-+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ ## NAME, s->cu_module, "Subsample_Bilinear_" #NAME)); \
-+    if (ret < 0) \
-+        goto fail;
-+
-+#define VARIANTSET(NAME) \
-+    VARIANT(NAME) \
-+    VARIANT(NAME ## _c) \
-+    VARIANT(NAME ## _2) \
-+    VARIANT(NAME ## _p2) \
-+    VARIANT(NAME ## _2_u) \
-+    VARIANT(NAME ## _2_v) \
-+    VARIANT(NAME ## _4)
-+
-+    VARIANTSET(8_8)
-+    VARIANTSET(16_16)
-+    VARIANTSET(8_16)
-+    VARIANTSET(16_8)
-+#undef VARIANTSET
-+#undef VARIANT
- 
-     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
- 
-@@ -376,6 +413,53 @@ static av_cold int cudascale_config_prop
-     if (ret < 0)
-         return ret;
- 
-+    s->in_desc  = av_pix_fmt_desc_get(s->in_fmt);
-+    s->out_desc = av_pix_fmt_desc_get(s->out_fmt);
-+
-+    for (i = 0; i < s->in_desc->nb_components; i++)
-+        s->in_planes  = FFMAX(s->in_planes,  s->in_desc ->comp[i].plane + 1);
-+
-+    for (i = 0; i < s->in_desc->nb_components; i++)
-+        s->out_planes = FFMAX(s->out_planes, s->out_desc->comp[i].plane + 1);
-+
-+#define VARIANT(INDEPTH, OUTDEPTH, SUFFIX) s->cu_func_ ## INDEPTH ## _ ## OUTDEPTH ## SUFFIX
-+#define BITS(n) ((n + 7) & ~7)
-+#define VARIANTSET(INDEPTH, OUTDEPTH) \
-+    else if (BITS(s->in_desc->comp[0].depth)  == INDEPTH && \
-+             BITS(s->out_desc->comp[0].depth) == OUTDEPTH) { \
-+        s->cu_func_luma = VARIANT(INDEPTH, OUTDEPTH,); \
-+        if (s->in_planes == 3 && s->out_planes == 3) { \
-+            s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _c); \
-+        } else if (s->in_planes == 3 && s->out_planes == 2) { \
-+            s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _p2); \
-+        } else if (s->in_planes == 2 && s->out_planes == 2) { \
-+            s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2); \
-+        } else if (s->in_planes == 2 && s->out_planes == 3) { \
-+            s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2_u); \
-+            s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _2_v); \
-+        } else { \
-+            ret = AVERROR_BUG; \
-+            goto fail; \
-+        } \
-+    }
-+
-+    if (0) {}
-+    VARIANTSET(8,  8)
-+    VARIANTSET(16, 16)
-+    VARIANTSET(8,  16)
-+    VARIANTSET(16, 8)
-+    else {
-+        ret = AVERROR_BUG;
-+        goto fail;
-+    }
-+#undef VARIANTSET
-+#undef VARIANT
-+
-+    if (s->in_desc->comp[0].depth > s->out_desc->comp[0].depth) {
-+        if ((ret = cudascale_setup_dither(ctx)) < 0)
-+            goto fail;
-+    }
-+
-     av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n",
-            inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : "");
- 
-@@ -396,21 +480,18 @@ fail:
- static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
-                               uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
-                               uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
--                              int pixel_size, int bit_depth)
-+                              int pixel_size)
- {
-     CUDAScaleContext *s = ctx->priv;
-     CudaFunctions *cu = s->hwctx->internal->cuda_dl;
-     CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
-     CUtexObject tex = 0;
--    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
--                           &src_width, &src_height, &bit_depth, &s->param };
-+    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &s->ditherTex };
-     int ret;
- 
-     CUDA_TEXTURE_DESC tex_desc = {
--        .filterMode = s->interp_use_linear ?
--                      CU_TR_FILTER_MODE_LINEAR :
--                      CU_TR_FILTER_MODE_POINT,
--        .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
-+        .filterMode = CU_TR_FILTER_MODE_LINEAR,
-+        .flags = CU_TRSF_READ_AS_INTEGER,
-     };
- 
-     CUDA_RESOURCE_DESC res_desc = {
-@@ -425,10 +506,6 @@ static int call_resize_kernel(AVFilterCo
-         .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
-     };
- 
--    // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch
--    // Same for pixel_size, which is represented via datatypes on the cuda side of things.
--    dst_pitch /= channels * pixel_size;
--
-     ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
-     if (ret < 0)
-         goto exit;
-@@ -447,91 +524,37 @@ exit:
- static int scalecuda_resize(AVFilterContext *ctx,
-                             AVFrame *out, AVFrame *in)
- {
--    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
-     CUDAScaleContext *s = ctx->priv;
- 
--    switch (in_frames_ctx->sw_format) {
--    case AV_PIX_FMT_YUV420P:
--        call_resize_kernel(ctx, s->cu_func_uchar, 1,
--                           in->data[0], in->width, in->height, in->linesize[0],
--                           out->data[0], out->width, out->height, out->linesize[0],
--                           1, 8);
--        call_resize_kernel(ctx, s->cu_func_uchar, 1,
--                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
--                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
--                           1, 8);
--        call_resize_kernel(ctx, s->cu_func_uchar, 1,
--                           in->data[2], in->width / 2, in->height / 2, in->linesize[2],
--                           out->data[2], out->width / 2, out->height / 2, out->linesize[2],
--                           1, 8);
--        break;
--    case AV_PIX_FMT_YUV444P:
--        call_resize_kernel(ctx, s->cu_func_uchar, 1,
--                           in->data[0], in->width, in->height, in->linesize[0],
--                           out->data[0], out->width, out->height, out->linesize[0],
--                           1, 8);
--        call_resize_kernel(ctx, s->cu_func_uchar, 1,
--                           in->data[1], in->width, in->height, in->linesize[1],
--                           out->data[1], out->width, out->height, out->linesize[1],
--                           1, 8);
--        call_resize_kernel(ctx, s->cu_func_uchar, 1,
--                           in->data[2], in->width, in->height, in->linesize[2],
--                           out->data[2], out->width, out->height, out->linesize[2],
--                           1, 8);
--        break;
--    case AV_PIX_FMT_YUV444P16:
--        call_resize_kernel(ctx, s->cu_func_ushort, 1,
--                           in->data[0], in->width, in->height, in->linesize[0],
--                           out->data[0], out->width, out->height, out->linesize[0],
--                           2, 16);
--        call_resize_kernel(ctx, s->cu_func_ushort, 1,
--                           in->data[1], in->width, in->height, in->linesize[1],
--                           out->data[1], out->width, out->height, out->linesize[1],
--                           2, 16);
--        call_resize_kernel(ctx, s->cu_func_ushort, 1,
--                           in->data[2], in->width, in->height, in->linesize[2],
--                           out->data[2], out->width, out->height, out->linesize[2],
--                           2, 16);
--        break;
--    case AV_PIX_FMT_NV12:
--        call_resize_kernel(ctx, s->cu_func_uchar, 1,
--                           in->data[0], in->width, in->height, in->linesize[0],
--                           out->data[0], out->width, out->height, out->linesize[0],
--                           1, 8);
--        call_resize_kernel(ctx, s->cu_func_uchar2, 2,
--                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
--                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
--                           1, 8);
--        break;
--    case AV_PIX_FMT_P010LE:
--        call_resize_kernel(ctx, s->cu_func_ushort, 1,
--                           in->data[0], in->width, in->height, in->linesize[0],
--                           out->data[0], out->width, out->height, out->linesize[0],
--                           2, 10);
--        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
--                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
--                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
--                           2, 10);
--        break;
--    case AV_PIX_FMT_P016LE:
--        call_resize_kernel(ctx, s->cu_func_ushort, 1,
--                           in->data[0], in->width, in->height, in->linesize[0],
--                           out->data[0], out->width, out->height, out->linesize[0],
--                           2, 16);
--        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
--                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
--                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
--                           2, 16);
--        break;
--    case AV_PIX_FMT_0RGB32:
--    case AV_PIX_FMT_0BGR32:
--        call_resize_kernel(ctx, s->cu_func_uchar4, 4,
--                           in->data[0], in->width, in->height, in->linesize[0],
--                           out->data[0], out->width, out->height, out->linesize[0],
--                           1, 8);
--        break;
--    default:
--        return AVERROR_BUG;
-+#define DEPTH_BYTES(depth) (((depth) + 7) / 8)
-+
-+    call_resize_kernel(ctx, s->cu_func_luma, 1,
-+                       in->data[0], in->width, in->height, in->linesize[0],
-+                       out->data[0], out->width, out->height, out->linesize[0],
-+                       DEPTH_BYTES(s->in_desc->comp[0].depth));
-+
-+    call_resize_kernel(ctx, s->cu_func_chroma_u, s->in_planes == 2 ? 2 : 1,
-+                       in->data[1],
-+                       AV_CEIL_RSHIFT(in->width,  s->in_desc->log2_chroma_w),
-+                       AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h),
-+                       in->linesize[1],
-+                       out->data[1],
-+                       AV_CEIL_RSHIFT(out->width,  s->out_desc->log2_chroma_w),
-+                       AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
-+                       out->linesize[1],
-+                       DEPTH_BYTES(s->in_desc->comp[1].depth));
-+
-+    if (s->cu_func_chroma_v) {
-+        call_resize_kernel(ctx, s->cu_func_chroma_v, s->in_planes == 2 ? 2 : 1,
-+                           in->data[s->in_desc->comp[2].plane],
-+                           AV_CEIL_RSHIFT(in->width,       s->in_desc->log2_chroma_w),
-+                           AV_CEIL_RSHIFT(in->height,      s->in_desc->log2_chroma_h),
-+                           in->linesize[s->in_desc->comp[2].plane],
-+                           out->data[s->out_desc->comp[2].plane] + s->out_desc->comp[2].offset,
-+                           AV_CEIL_RSHIFT(out->width,       s->out_desc->log2_chroma_w),
-+                           AV_CEIL_RSHIFT(out->height,      s->out_desc->log2_chroma_h),
-+                           out->linesize[s->out_desc->comp[2].plane],
-+                           DEPTH_BYTES(s->in_desc->comp[2].depth));
-     }
- 
-     return 0;
-@@ -621,20 +644,15 @@ static AVFrame *cudascale_get_video_buff
- #define OFFSET(x) offsetof(CUDAScaleContext, x)
- #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
- static const AVOption options[] = {
--    { "w", "Output video width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
--    { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
--    { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
--        { "nearest",  "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
--        { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
--        { "bicubic",  "bicubic",  0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC  }, 0, 0, FLAGS, "interp_algo" },
--        { "lanczos",  "lanczos",  0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS  }, 0, 0, FLAGS, "interp_algo" },
-+    { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
-+    { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
-+    { "format", "Output format",       OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
-     { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
--    { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
--    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
--        { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
--        { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
--        { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
--    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
-+    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" },
-+    { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
-+    { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
-+    { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
-+    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
-     { NULL },
- };
- 
-Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.cu
-+++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu
-@@ -20,35 +20,115 @@
-  * DEALINGS IN THE SOFTWARE.
-  */
- 
--#include "cuda/vector_helpers.cuh"
-+typedef unsigned char uchar;
-+typedef unsigned short ushort;
- 
--template<typename T>
--__device__ inline void Subsample_Nearest(cudaTextureObject_t tex,
--                                         T *dst,
--                                         int dst_width, int dst_height, int dst_pitch,
--                                         int src_width, int src_height,
--                                         int bit_depth)
-+#define SHIFTDOWN(val) (dstbase)(val >> abs(2 + shift))
-+#define SHIFTUP(val)   (dstbase)(val << abs(-shift - 2))
-+
-+template<class SRC, class DST, int shift, int dither> struct add_conv_shift1_d
- {
--    int xo = blockIdx.x * blockDim.x + threadIdx.x;
--    int yo = blockIdx.y * blockDim.y + threadIdx.y;
-+    typedef DST dstbase;
- 
--    if (yo < dst_height && xo < dst_width)
-+    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
-     {
--        float hscale = (float)src_width / (float)dst_width;
--        float vscale = (float)src_height / (float)dst_height;
--        float xi = (xo + 0.5f) * hscale;
--        float yi = (yo + 0.5f) * vscale;
-+        unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + ((1 + d) >> (sizeof(SRC) * 8 - dither + 3));
- 
--        dst[yo*dst_pitch+xo] = tex2D<T>(tex, xi, yi);
-+        if (shift > -2)
-+            return SHIFTDOWN(ret);
-+        else
-+            return SHIFTUP(ret);
-     }
--}
-+};
-+
-+template<class SRC, class DST, int shift, int dither> struct add_conv_shift1
-+{
-+    typedef DST dstbase;
- 
--template<typename T>
--__device__ inline void Subsample_Bilinear(cudaTextureObject_t tex,
--                                          T *dst,
--                                          int dst_width, int dst_height, int dst_pitch,
--                                          int src_width, int src_height,
--                                          int bit_depth)
-+    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
-+    {
-+        unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + 2;
-+
-+        if (shift > -2)
-+            return SHIFTDOWN(ret);
-+        else
-+            return SHIFTUP(ret);
-+    }
-+};
-+
-+template<class SRC, class DST, int shift, int dither> struct add_conv_shift2
-+{
-+    typedef decltype(DST::x) dstbase;
-+
-+    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
-+    {
-+        unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2;
-+        unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2;
-+
-+        if (shift > -2)
-+            return { SHIFTDOWN(retx), SHIFTDOWN(rety) };
-+        else
-+            return { SHIFTUP(retx),   SHIFTUP(rety)   };
-+    }
-+};
-+
-+template<class SRC, class DST, int shift, int dither> struct add_conv_shift2_x
-+{
-+    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
-+    {
-+        return add_conv_shift1<unsigned, DST, shift, dither>()(i1.x, i2.x, i3.x, i4.x, d);
-+    }
-+};
-+
-+template<class SRC, class DST, int shift, int dither> struct add_conv_shift2_y
-+{
-+    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
-+    {
-+        return add_conv_shift1<unsigned, DST, shift, dither>()(i1.y, i2.y, i3.y, i4.y, d);
-+    }
-+};
-+
-+template<class SRC, class DST, int shift, int dither> struct add_conv_shift3
-+{
-+    typedef decltype(DST::x) dstbase;
-+
-+    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
-+    {
-+        unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2;
-+        unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2;
-+        unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2;
-+
-+        if (shift > -2)
-+            return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz) };
-+        else
-+            return { SHIFTUP(retx),   SHIFTUP(rety),   SHIFTUP(retz)   };
-+    }
-+};
-+
-+template<class SRC, class DST, int shift, int dither> struct add_conv_shift4
-+{
-+    typedef decltype(DST::x) dstbase;
-+
-+    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
-+    {
-+        unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2;
-+        unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2;
-+        unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2;
-+        unsigned retw = (unsigned)i1.w + (unsigned)i2.w + (unsigned)i3.w + (unsigned)i4.w + 2;
-+
-+        if (shift > -2)
-+            return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz), SHIFTDOWN(retw) };
-+        else
-+            return { SHIFTUP(retx),   SHIFTUP(rety),   SHIFTUP(retz),   SHIFTUP(retw)   };
-+    }
-+};
-+
-+template<class SRC, class DST, template<class, class, int, int> class conv, int pitch, int shift, int dither>
-+__inline__ __device__ void Subsample_Bilinear(cudaTextureObject_t tex,
-+                                   DST *dst,
-+                                   int dst_width, int dst_height, int dst_pitch,
-+                                   int src_width, int src_height,
-+                                   cudaTextureObject_t ditherTex)
- {
-     int xo = blockIdx.x * blockDim.x + threadIdx.x;
-     int yo = blockIdx.y * blockDim.y + threadIdx.y;
-@@ -66,58 +146,48 @@ __device__ inline void Subsample_Bilinea
-         float dx = wh / (0.5f + wh);
-         float dy = wv / (0.5f + wv);
- 
--        intT r = { 0 };
--        vec_set_scalar(r, 2);
--        r += tex2D<T>(tex, xi - dx, yi - dy);
--        r += tex2D<T>(tex, xi + dx, yi - dy);
--        r += tex2D<T>(tex, xi - dx, yi + dy);
--        r += tex2D<T>(tex, xi + dx, yi + dy);
--        vec_set(dst[yo*dst_pitch+xo], r >> 2);
-+        SRC i0 = tex2D<SRC>(tex, xi-dx, yi-dy);
-+        SRC i1 = tex2D<SRC>(tex, xi+dx, yi-dy);
-+        SRC i2 = tex2D<SRC>(tex, xi-dx, yi+dy);
-+        SRC i3 = tex2D<SRC>(tex, xi+dx, yi+dy);
-+
-+        ushort ditherVal = dither ? tex2D<ushort>(ditherTex, xo, yo) : 0;
-+
-+        dst[yo*(dst_pitch / sizeof(DST))+xo*pitch] = conv<SRC, DST, shift, dither>()(i0, i1, i2, i3, ditherVal);
-     }
- }
- 
- extern "C" {
- 
--#define NEAREST_KERNEL(T) \
--    __global__ void Subsample_Nearest_ ## T(cudaTextureObject_t src_tex,                  \
--                                            T *dst,                                       \
--                                            int dst_width, int dst_height, int dst_pitch, \
--                                            int src_width, int src_height,                \
--                                            int bit_depth)                                \
--    {                                                                                     \
--        Subsample_Nearest<T>(src_tex, dst,                                                \
--                              dst_width, dst_height, dst_pitch,                           \
--                              src_width, src_height,                                      \
--                              bit_depth);                                                 \
--    }
--
--NEAREST_KERNEL(uchar)
--NEAREST_KERNEL(uchar2)
--NEAREST_KERNEL(uchar4)
--
--NEAREST_KERNEL(ushort)
--NEAREST_KERNEL(ushort2)
--NEAREST_KERNEL(ushort4)
--
--#define BILINEAR_KERNEL(T) \
--    __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex,                  \
--                                             T *dst,                                       \
--                                             int dst_width, int dst_height, int dst_pitch, \
--                                             int src_width, int src_height,                \
--                                             int bit_depth)                                \
--    {                                                                                      \
--        Subsample_Bilinear<T>(src_tex, dst,                                                \
--                              dst_width, dst_height, dst_pitch,                            \
--                              src_width, src_height,                                       \
--                              bit_depth);                                                  \
--    }
--
--BILINEAR_KERNEL(uchar)
--BILINEAR_KERNEL(uchar2)
--BILINEAR_KERNEL(uchar4)
--
--BILINEAR_KERNEL(ushort)
--BILINEAR_KERNEL(ushort2)
--BILINEAR_KERNEL(ushort4)
-+#define VARIANT(SRC, DST, CONV, SHIFT, PITCH, DITHER, NAME) \
-+__global__ void Subsample_Bilinear_ ## NAME(cudaTextureObject_t tex, \
-+                                    DST *dst, \
-+                                    int dst_width, int dst_height, int dst_pitch, \
-+                                    int src_width, int src_height, \
-+                                    cudaTextureObject_t ditherTex) \
-+{ \
-+    Subsample_Bilinear<SRC, DST, CONV, PITCH, SHIFT, DITHER>(tex, dst, dst_width, dst_height, dst_pitch, \
-+                                                             src_width, src_height, ditherTex); \
-+}
-+
-+#define VARIANTSET2(SRC, DST, SHIFT, NAME) \
-+    VARIANT(SRC,      DST,      add_conv_shift1_d, SHIFT, 1, (sizeof(DST) < sizeof(SRC)) ? sizeof(DST) : 0, NAME) \
-+    VARIANT(SRC,      DST,      add_conv_shift1,   SHIFT, 1, 0, NAME ## _c) \
-+    VARIANT(SRC,      DST,      add_conv_shift1,   SHIFT, 2, 0, NAME ## _p2) \
-+    VARIANT(SRC ## 2, DST ## 2, add_conv_shift2,   SHIFT, 1, 0, NAME ## _2) \
-+    VARIANT(SRC ## 2, DST,      add_conv_shift2_x, SHIFT, 1, 0, NAME ## _2_u) \
-+    VARIANT(SRC ## 2, DST,      add_conv_shift2_y, SHIFT, 1, 0, NAME ## _2_v) \
-+    VARIANT(SRC ## 4, DST ## 4, add_conv_shift4,   SHIFT, 1, 0, NAME ## _4)
-+
-+#define VARIANTSET(SRC, DST, SRCSIZE, DSTSIZE) \
-+    VARIANTSET2(SRC, DST, (SRCSIZE - DSTSIZE), SRCSIZE ## _ ## DSTSIZE)
-+
-+// Straight no-conversion
-+VARIANTSET(uchar,  uchar,  8,  8)
-+VARIANTSET(ushort, ushort, 16, 16)
-+
-+// Conversion between 8- and 16-bit
-+VARIANTSET(uchar,  ushort, 8,  16)
-+VARIANTSET(ushort, uchar,  16, 8)
- 
- }
-Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.h
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.h
-+++ /dev/null
-@@ -1,28 +0,0 @@
--/*
-- * This file is part of FFmpeg.
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining a
-- * copy of this software and associated documentation files (the "Software"),
-- * to deal in the Software without restriction, including without limitation
-- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-- * and/or sell copies of the Software, and to permit persons to whom the
-- * Software is furnished to do so, subject to the following conditions:
-- *
-- * The above copyright notice and this permission notice shall be included in
-- * all copies or substantial portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-- * DEALINGS IN THE SOFTWARE.
-- */
--
--#ifndef AVFILTER_SCALE_CUDA_H
--#define AVFILTER_SCALE_CUDA_H
--
--#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
--
--#endif
-Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda_bicubic.cu
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda_bicubic.cu
-+++ /dev/null
-@@ -1,224 +0,0 @@
--/*
-- * This file is part of FFmpeg.
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining a
-- * copy of this software and associated documentation files (the "Software"),
-- * to deal in the Software without restriction, including without limitation
-- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-- * and/or sell copies of the Software, and to permit persons to whom the
-- * Software is furnished to do so, subject to the following conditions:
-- *
-- * The above copyright notice and this permission notice shall be included in
-- * all copies or substantial portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-- * DEALINGS IN THE SOFTWARE.
-- */
--
--#include "cuda/vector_helpers.cuh"
--#include "vf_scale_cuda.h"
--
--typedef float4 (*coeffs_function_t)(float, float);
--
--__device__ inline float4 lanczos_coeffs(float x, float param)
--{
--    const float pi = 3.141592654f;
--
--    float4 res = make_float4(
--        pi * (x + 1),
--        pi * x,
--        pi * (x - 1),
--        pi * (x - 2));
--
--    res.x = res.x == 0.0f ? 1.0f :
--        __sinf(res.x) * __sinf(res.x / 2.0f) / (res.x * res.x / 2.0f);
--    res.y = res.y == 0.0f ? 1.0f :
--        __sinf(res.y) * __sinf(res.y / 2.0f) / (res.y * res.y / 2.0f);
--    res.z = res.z == 0.0f ? 1.0f :
--        __sinf(res.z) * __sinf(res.z / 2.0f) / (res.z * res.z / 2.0f);
--    res.w = res.w == 0.0f ? 1.0f :
--        __sinf(res.w) * __sinf(res.w / 2.0f) / (res.w * res.w / 2.0f);
--
--    return res / (res.x + res.y + res.z + res.w);
--}
--
--__device__ inline float4 bicubic_coeffs(float x, float param)
--{
--    const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param;
--
--    float4 res;
--    res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A;
--    res.y = ((A + 2) * x - (A + 3)) * x * x + 1;
--    res.z = ((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1;
--    res.w = 1.0f - res.x - res.y - res.z;
--
--    return res;
--}
--
--__device__ inline void derived_fast_coeffs(float4 coeffs, float x, float *h0, float *h1, float *s)
--{
--    float g0 = coeffs.x + coeffs.y;
--    float g1 = coeffs.z + coeffs.w;
--
--    *h0 = coeffs.y / g0 - 0.5f;
--    *h1 = coeffs.w / g1 + 1.5f;
--    *s  = g0 / (g0 + g1);
--}
--
--template<typename V>
--__device__ inline V apply_coeffs(float4 coeffs, V c0, V c1, V c2, V c3)
--{
--    V res = c0 * coeffs.x;
--    res  += c1 * coeffs.y;
--    res  += c2 * coeffs.z;
--    res  += c3 * coeffs.w;
--
--    return res;
--}
--
--template<typename T>
--__device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function,
--                                         cudaTextureObject_t src_tex,
--                                         T *dst,
--                                         int dst_width, int dst_height, int dst_pitch,
--                                         int src_width, int src_height,
--                                         int bit_depth, float param)
--{
--    int xo = blockIdx.x * blockDim.x + threadIdx.x;
--    int yo = blockIdx.y * blockDim.y + threadIdx.y;
--
--    if (yo < dst_height && xo < dst_width)
--    {
--        float hscale = (float)src_width / (float)dst_width;
--        float vscale = (float)src_height / (float)dst_height;
--        float xi = (xo + 0.5f) * hscale - 0.5f;
--        float yi = (yo + 0.5f) * vscale - 0.5f;
--        float px = floor(xi);
--        float py = floor(yi);
--        float fx = xi - px;
--        float fy = yi - py;
--
--        float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
--
--        float4 coeffsX = coeffs_function(fx, param);
--        float4 coeffsY = coeffs_function(fy, param);
--
--#define PIX(x, y) tex2D<floatT>(src_tex, (x), (y))
--
--        dst[yo * dst_pitch + xo] = from_floatN<T, floatT>(
--            apply_coeffs<floatT>(coeffsY,
--                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py - 1), PIX(px, py - 1), PIX(px + 1, py - 1), PIX(px + 2, py - 1)),
--                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py    ), PIX(px, py    ), PIX(px + 1, py    ), PIX(px + 2, py    )),
--                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 1), PIX(px, py + 1), PIX(px + 1, py + 1), PIX(px + 2, py + 1)),
--                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 2), PIX(px, py + 2), PIX(px + 1, py + 2), PIX(px + 2, py + 2))
--            ) * factor
--        );
--
--#undef PIX
--    }
--}
--
--/* This does not yield correct results. Most likely because of low internal precision in tex2D linear interpolation */
--template<typename T>
--__device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function,
--                                             cudaTextureObject_t src_tex,
--                                             T *dst,
--                                             int dst_width, int dst_height, int dst_pitch,
--                                             int src_width, int src_height,
--                                             int bit_depth, float param)
--{
--    int xo = blockIdx.x * blockDim.x + threadIdx.x;
--    int yo = blockIdx.y * blockDim.y + threadIdx.y;
--
--    if (yo < dst_height && xo < dst_width)
--    {
--        float hscale = (float)src_width / (float)dst_width;
--        float vscale = (float)src_height / (float)dst_height;
--        float xi = (xo + 0.5f) * hscale - 0.5f;
--        float yi = (yo + 0.5f) * vscale - 0.5f;
--        float px = floor(xi);
--        float py = floor(yi);
--        float fx = xi - px;
--        float fy = yi - py;
--
--        float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
--
--        float4 coeffsX = coeffs_function(fx, param);
--        float4 coeffsY = coeffs_function(fy, param);
--
--        float h0x, h1x, sx;
--        float h0y, h1y, sy;
--        derived_fast_coeffs(coeffsX, fx, &h0x, &h1x, &sx);
--        derived_fast_coeffs(coeffsY, fy, &h0y, &h1y, &sy);
--
--#define PIX(x, y) tex2D<floatT>(src_tex, (x), (y))
--
--        floatT pix[4] = {
--            PIX(px + h0x, py + h0y),
--            PIX(px + h1x, py + h0y),
--            PIX(px + h0x, py + h1y),
--            PIX(px + h1x, py + h1y)
--        };
--
--#undef PIX
--
--        dst[yo * dst_pitch + xo] = from_floatN<T, floatT>(
--            lerp_scalar(
--                lerp_scalar(pix[3], pix[2], sx),
--                lerp_scalar(pix[1], pix[0], sx),
--                sy) * factor
--        );
--    }
--}
--
--extern "C" {
--
--#define BICUBIC_KERNEL(T) \
--    __global__ void Subsample_Bicubic_ ## T(cudaTextureObject_t src_tex,                  \
--                                            T *dst,                                       \
--                                            int dst_width, int dst_height, int dst_pitch, \
--                                            int src_width, int src_height,                \
--                                            int bit_depth, float param)                   \
--    {                                                                                     \
--        Subsample_Bicubic<T>(&bicubic_coeffs, src_tex, dst,                               \
--                             dst_width, dst_height, dst_pitch,                            \
--                             src_width, src_height,                                       \
--                             bit_depth, param);                                           \
--    }
--
--BICUBIC_KERNEL(uchar)
--BICUBIC_KERNEL(uchar2)
--BICUBIC_KERNEL(uchar4)
--
--BICUBIC_KERNEL(ushort)
--BICUBIC_KERNEL(ushort2)
--BICUBIC_KERNEL(ushort4)
--
--
--#define LANCZOS_KERNEL(T) \
--    __global__ void Subsample_Lanczos_ ## T(cudaTextureObject_t src_tex,                  \
--                                            T *dst,                                       \
--                                            int dst_width, int dst_height, int dst_pitch, \
--                                            int src_width, int src_height,                \
--                                            int bit_depth, float param)                   \
--    {                                                                                     \
--        Subsample_Bicubic<T>(&lanczos_coeffs, src_tex, dst,                               \
--                             dst_width, dst_height, dst_pitch,                            \
--                             src_width, src_height,                                       \
--                             bit_depth, param);                                           \
--    }
--
--LANCZOS_KERNEL(uchar)
--LANCZOS_KERNEL(uchar2)
--LANCZOS_KERNEL(uchar4)
--
--LANCZOS_KERNEL(ushort)
--LANCZOS_KERNEL(ushort2)
--LANCZOS_KERNEL(ushort4)
--
--}
diff --git a/debian/patches/0005-cuda-tonemap-impl.patch b/debian/patches/0005-cuda-tonemap-impl.patch
deleted file mode 100644
index 4a4c483206b..00000000000
--- a/debian/patches/0005-cuda-tonemap-impl.patch
+++ /dev/null
@@ -1,1824 +0,0 @@
-Index: jellyfin-ffmpeg/compat/cuda/cuda_runtime.h
-===================================================================
---- jellyfin-ffmpeg.orig/compat/cuda/cuda_runtime.h
-+++ jellyfin-ffmpeg/compat/cuda/cuda_runtime.h
-@@ -33,55 +33,69 @@
- #define max(a, b) ((a) > (b) ? (a) : (b))
- #define min(a, b) ((a) < (b) ? (a) : (b))
- #define abs(x) ((x) < 0 ? -(x) : (x))
-+#define clamp(a, b, c) min(max((a), (b)), (c))
- 
- #define atomicAdd(a, b) (__atomic_fetch_add(a, b, __ATOMIC_SEQ_CST))
- 
- // Basic typedefs
- typedef __device_builtin__ unsigned long long cudaTextureObject_t;
- 
--typedef struct __device_builtin__ __align__(2) uchar2
--{
--    unsigned char x, y;
--} uchar2;
--
--typedef struct __device_builtin__ __align__(4) ushort2
--{
--    unsigned short x, y;
--} ushort2;
--
--typedef struct __device_builtin__ uint3
--{
--    unsigned int x, y, z;
--} uint3;
--
--typedef struct uint3 dim3;
--
--typedef struct __device_builtin__ __align__(8) int2
--{
--    int x, y;
--} int2;
-+#define MAKE_VECTORS(type, base) \
-+typedef struct __device_builtin__ type##1 { \
-+    base x; \
-+} type##1; \
-+static __inline__ __device__ type##1 make_##type##1(base x) { \
-+    type##1 ret; \
-+    ret.x = x; \
-+    return ret; \
-+} \
-+typedef struct __device_builtin__ __align__(sizeof(base) * 2) type##2 { \
-+    base x, y; \
-+} type##2; \
-+static __inline__ __device__ type##2 make_##type##2(base x, base y) { \
-+    type##2 ret; \
-+    ret.x = x; \
-+    ret.y = y; \
-+    return ret; \
-+} \
-+typedef struct __device_builtin__ type##3 { \
-+    base x, y, z; \
-+} type##3; \
-+static __inline__ __device__ type##3 make_##type##3(base x, base y, base z) { \
-+    type##3 ret; \
-+    ret.x = x; \
-+    ret.y = y; \
-+    ret.z = z; \
-+    return ret; \
-+} \
-+typedef struct __device_builtin__ __align__(sizeof(base) * 4) type##4 { \
-+    base x, y, z, w; \
-+} type##4; \
-+static __inline__ __device__ type##4 make_##type##4(base x, base y, base z, base w) { \
-+    type##4 ret; \
-+    ret.x = x; \
-+    ret.y = y; \
-+    ret.z = z; \
-+    ret.w = w; \
-+    return ret; \
-+}
- 
--typedef struct __device_builtin__ __align__(4) uchar4
--{
--    unsigned char x, y, z, w;
--} uchar4;
-+#define MAKE_TYPE
- 
--typedef struct __device_builtin__ __align__(8) ushort4
--{
--    unsigned short x, y, z, w;
--} ushort4;
-+MAKE_VECTORS(uchar, unsigned char)
-+MAKE_VECTORS(ushort, unsigned short)
-+MAKE_VECTORS(int, int)
-+MAKE_VECTORS(uint, unsigned int)
-+MAKE_VECTORS(float, float)
- 
--typedef struct __device_builtin__ __align__(16) int4
--{
--    int x, y, z, w;
--} int4;
-+typedef struct __device_builtin__ uint3 dim3;
- 
- // Accessors for special registers
- #define GETCOMP(reg, comp) \
-     asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \
-     ret.comp = tmp;
- 
--#define GET(name, reg) static inline __device__ uint3 name() {\
-+#define GET(name, reg) static __inline__ __device__ uint3 name() {\
-     uint3 ret; \
-     unsigned tmp; \
-     GETCOMP(reg, x) \
-@@ -99,14 +113,8 @@ GET(getThreadIdx, tid)
- #define blockDim (getBlockDim())
- #define threadIdx (getThreadIdx())
- 
--// Basic initializers (simple macros rather than inline functions)
--#define make_uchar2(a, b) ((uchar2){.x = a, .y = b})
--#define make_ushort2(a, b) ((ushort2){.x = a, .y = b})
--#define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d})
--#define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d})
--
- // Conversions from the tex instruction's 4-register output to various types
--#define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
-+#define TEX2D(type, ret) static __inline__ __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
- 
- TEX2D(unsigned char, a & 0xFF)
- TEX2D(unsigned short, a & 0xFFFF)
-@@ -117,15 +125,47 @@ TEX2D(ushort4, make_ushort4((unsigned sh
- 
- // Template calling tex instruction and converting the output to the selected type
- template <class T>
--static inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
-+static __inline__ __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
-+{
-+    T ret;
-+    unsigned ret1, ret2, ret3, ret4;
-+    asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
-+        "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) :
-+        "l"(texObject), "f"(x), "f"(y));
-+    conv(&ret, ret1, ret2, ret3, ret4);
-+    return ret;
-+}
-+
-+static __inline__ __device__ float __exp2f(float x)
-+{
-+    float ret;
-+    asm("ex2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x));
-+    return ret;
-+}
-+
-+#define __expf(x) (__exp2f((x) * 1.4427f))
-+
-+static __inline__ __device__ float __log2f(float x)
- {
--  T ret;
--  unsigned ret1, ret2, ret3, ret4;
--  asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
--      "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) :
--      "l"(texObject), "f"(x), "f"(y));
--  conv(&ret, ret1, ret2, ret3, ret4);
--  return ret;
-+    float ret;
-+    asm("lg2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x));
-+    return ret;
- }
- 
--#endif /* COMPAT_CUDA_CUDA_RUNTIME_H */
-+#define __logf(x) (__log2f((x)) * 0.693147f)
-+#define __log10f(x) (__log2f((x)) * 0.30103f)
-+
-+static __inline__ __device__ float __powf(float x, float y)
-+{
-+    return __exp2f(y * __log2f(x));
-+}
-+
-+static __inline__ __device__ float __sqrtf(float x)
-+{
-+    float ret;
-+    asm("sqrtf.approx.f32 %0, %1;" : "=f"(ret) : "f"(x));
-+    return ret;
-+}
-+
-+#endif
-+
-Index: jellyfin-ffmpeg/configure
-===================================================================
---- jellyfin-ffmpeg.orig/configure
-+++ jellyfin-ffmpeg/configure
-@@ -3057,6 +3057,8 @@ scale_cuda_filter_deps="ffnvcodec"
- scale_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
- thumbnail_cuda_filter_deps="ffnvcodec"
- thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
-+tonemap_cuda_filter_deps="ffnvcodec const_nan"
-+tonemap_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
- transpose_npp_filter_deps="ffnvcodec libnpp"
- overlay_cuda_filter_deps="ffnvcodec"
- overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
-Index: jellyfin-ffmpeg/ffbuild/common.mak
-===================================================================
---- jellyfin-ffmpeg.orig/ffbuild/common.mak
-+++ jellyfin-ffmpeg/ffbuild/common.mak
-@@ -38,6 +38,7 @@ OBJCCFLAGS  = $(CPPFLAGS) $(CFLAGS) $(OB
- ASFLAGS    := $(CPPFLAGS) $(ASFLAGS)
- CXXFLAGS   := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS)
- X86ASMFLAGS += $(IFLAGS:%=%/) -I$(<D)/ -Pconfig.asm
-+NVCCFLAGS  += $(IFLAGS)
- 
- HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS)
- LDFLAGS    := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS)
-Index: jellyfin-ffmpeg/libavfilter/Makefile
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/Makefile
-+++ jellyfin-ffmpeg/libavfilter/Makefile
-@@ -449,6 +449,8 @@ OBJS-$(CONFIG_TMEDIAN_FILTER)
- OBJS-$(CONFIG_TMIDEQUALIZER_FILTER)          += vf_tmidequalizer.o
- OBJS-$(CONFIG_TMIX_FILTER)                   += vf_mix.o framesync.o
- OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o colorspace.o
-+OBJS-$(CONFIG_TONEMAP_CUDA_FILTER)           += vf_tonemap_cuda.o cuda/tonemap.ptx.o \
-+                                                cuda/host_util.o
- OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace.o opencl.o \
-                                                 opencl/tonemap.o opencl/colorspace_common.o
- OBJS-$(CONFIG_TONEMAP_VAAPI_FILTER)          += vf_tonemap_vaapi.o vaapi_vpp.o
-Index: jellyfin-ffmpeg/libavfilter/allfilters.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/allfilters.c
-+++ jellyfin-ffmpeg/libavfilter/allfilters.c
-@@ -430,6 +430,7 @@ extern AVFilter ff_vf_tmedian;
- extern AVFilter ff_vf_tmidequalizer;
- extern AVFilter ff_vf_tmix;
- extern AVFilter ff_vf_tonemap;
-+extern AVFilter ff_vf_tonemap_cuda;
- extern AVFilter ff_vf_tonemap_opencl;
- extern AVFilter ff_vf_tonemap_vaapi;
- extern AVFilter ff_vf_tpad;
-Index: jellyfin-ffmpeg/libavfilter/cuda/colorspace_common.h
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/colorspace_common.h
-@@ -0,0 +1,240 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#ifndef AVFILTER_CUDA_COLORSPACE_COMMON_H
-+#define AVFILTER_CUDA_COLORSPACE_COMMON_H
-+
-+#include "util.h"
-+#include "libavutil/pixfmt.h"
-+
-+#define BT709_ALPHA 1.09929682680944f
-+#define BT709_BETA  0.018053968510807f
-+
-+#define ST2084_MAX_LUMINANCE 10000.0f
-+#define REFERENCE_WHITE      203.0f
-+
-+#define ST2084_M1 0.1593017578125f
-+#define ST2084_M2 78.84375f
-+#define ST2084_C1 0.8359375f
-+#define ST2084_C2 18.8515625f
-+#define ST2084_C3 18.6875f
-+
-+#define ARIB_B67_A 0.17883277f
-+#define ARIB_B67_B 0.28466892f
-+#define ARIB_B67_C 0.55991073f
-+
-+#define FLOAT_EPS 1.175494351e-38f
-+
-+extern const float3 luma_src, luma_dst;
-+extern const enum AVColorTransferCharacteristic trc_src, trc_dst;
-+extern const enum AVColorRange range_src, range_dst;
-+extern const enum AVChromaLocation chroma_loc_src, chroma_loc_dst;
-+extern const bool rgb2rgb_passthrough;
-+extern const float rgb2rgb_matrix[9];
-+extern const float yuv_matrix[9], rgb_matrix[9];
-+
-+static __inline__ __device__ float get_luma_dst(float3 c, const float3& luma_dst) {
-+    return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z;
-+}
-+
-+static __inline__ __device__ float get_luma_src(float3 c, const float3& luma_src) {
-+    return luma_src.x * c.x + luma_src.y * c.y + luma_src.z * c.z;
-+}
-+
-+static __inline__ __device__ float3 get_chroma_sample(float3 a, float3 b, float3 c, float3 d) {
-+    switch (chroma_loc_dst) {
-+    case AVCHROMA_LOC_LEFT:
-+        return ((a) + (c)) * 0.5f;
-+    case AVCHROMA_LOC_CENTER:
-+    case AVCHROMA_LOC_UNSPECIFIED:
-+    default:
-+        return ((a) + (b) + (c) + (d)) * 0.25f;
-+    case AVCHROMA_LOC_TOPLEFT:
-+        return a;
-+    case AVCHROMA_LOC_TOP:
-+        return ((a) + (b)) * 0.5f;
-+    case AVCHROMA_LOC_BOTTOMLEFT:
-+        return c;
-+    case AVCHROMA_LOC_BOTTOM:
-+        return ((c) + (d)) * 0.5f;
-+    }
-+}
-+
-+// linearizer for PQ/ST2084
-+static __inline__ __device__ float eotf_st2084(float x) {
-+    if (x > 0.0f) {
-+        float xpow = __powf(x, 1.0f / ST2084_M2);
-+        float num = max(xpow - ST2084_C1, 0.0f);
-+        float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS);
-+        x = __powf(num / den, 1.0f / ST2084_M1);
-+        return x * ST2084_MAX_LUMINANCE / REFERENCE_WHITE;
-+    } else {
-+        return 0.0f;
-+    }
-+}
-+
-+// delinearizer for PQ/ST2084
-+static __inline__ __device__ float inverse_eotf_st2084(float x) {
-+    if (x > 0.0f) {
-+        x *= REFERENCE_WHITE / ST2084_MAX_LUMINANCE;
-+        float xpow = __powf(x, ST2084_M1);
-+#if 0
-+        // Original formulation from SMPTE ST 2084:2014 publication.
-+        float num = ST2084_C1 + ST2084_C2 * xpow;
-+        float den = 1.0f + ST2084_C3 * xpow;
-+        return __powf(num / den, ST2084_M2);
-+#else
-+        // More stable arrangement that avoids some cancellation error.
-+        float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow;
-+        float den = 1.0f + ST2084_C3 * xpow;
-+        return __powf(1.0f + num / den, ST2084_M2);
-+#endif
-+    } else {
-+        return 0.0f;
-+    }
-+}
-+
-+static __inline__ __device__ float ootf_1_2(float x) {
-+    return x < 0.0f ? x : __powf(x, 1.2f);
-+}
-+
-+static __inline__ __device__ float inverse_ootf_1_2(float x) {
-+    return x < 0.0f ? x : __powf(x, 1.0f / 1.2f);
-+}
-+
-+static __inline__ __device__ float oetf_arib_b67(float x) {
-+    x = max(x, 0.0f);
-+    return x <= (1.0f / 12.0f)
-+           ? __sqrtf(3.0f * x)
-+           : (ARIB_B67_A * __logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C);
-+}
-+
-+static __inline__ __device__ float inverse_oetf_arib_b67(float x) {
-+    x = max(x, 0.0f);
-+    return x <= 0.5f
-+           ? (x * x) * (1.0f / 3.0f)
-+           : (__expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f);
-+}
-+
-+// linearizer for HLG/ARIB-B67
-+static __inline__ __device__ float eotf_arib_b67(float x) {
-+    return ootf_1_2(inverse_oetf_arib_b67(x));
-+}
-+
-+// delinearizer for HLG/ARIB-B67
-+static __inline__ __device__ float inverse_eotf_arib_b67(float x) {
-+    return oetf_arib_b67(inverse_ootf_1_2(x));
-+}
-+
-+static __inline__ __device__ float inverse_eotf_bt1886(float x) {
-+    return x < 0.0f ? 0.0f : __powf(x, 1.0f / 2.4f);
-+}
-+
-+static __inline__ __device__ float oetf_bt709(float x) {
-+    x = max(0.0f, x);
-+    return x < BT709_BETA
-+           ? (x * 4.5f)
-+           : (BT709_ALPHA * __powf(x, 0.45f) - (BT709_ALPHA - 1.0f));
-+}
-+
-+static __inline__ __device__ float inverse_oetf_bt709(float x) {
-+    return x < (4.5f * BT709_BETA)
-+           ? (x / 4.5f)
-+           : (__powf((x + (BT709_ALPHA - 1.0f)) / BT709_ALPHA, 1.0f / 0.45f));
-+}
-+
-+static __inline__ __device__ float linearize(float x)
-+{
-+    if (trc_src == AVCOL_TRC_SMPTE2084)
-+        return eotf_st2084(x);
-+    else if (trc_src == AVCOL_TRC_ARIB_STD_B67)
-+        return eotf_arib_b67(x);
-+    else
-+        return x;
-+}
-+
-+static __inline__ __device__ float delinearize(float x)
-+{
-+    if (trc_dst == AVCOL_TRC_BT709 || trc_dst == AVCOL_TRC_BT2020_10)
-+        return inverse_eotf_bt1886(x);
-+    else
-+        return x;
-+}
-+
-+static __inline__ __device__ float3 yuv2rgb(float y, float u, float v) {
-+    if (range_src == AVCOL_RANGE_JPEG) {
-+        u -= 0.5f; v -= 0.5f;
-+    } else {
-+        y = (y * 255.0f -  16.0f) / 219.0f;
-+        u = (u * 255.0f - 128.0f) / 224.0f;
-+        v = (v * 255.0f - 128.0f) / 224.0f;
-+    }
-+    float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2];
-+    float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5];
-+    float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8];
-+    return make_float3(r, g, b);
-+}
-+
-+static __inline__ __device__ float3 yuv2lrgb(float3 yuv) {
-+    float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z);
-+    return make_float3(linearize(rgb.x),
-+                       linearize(rgb.y),
-+                       linearize(rgb.z));
-+}
-+
-+static __inline__ __device__ float3 rgb2yuv(float r, float g, float b) {
-+    float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
-+    float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5];
-+    float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8];
-+    if (range_dst == AVCOL_RANGE_JPEG) {
-+        u += 0.5f; v += 0.5f;
-+    } else {
-+        y = (219.0f * y + 16.0f) / 255.0f;
-+        u = (224.0f * u + 128.0f) / 255.0f;
-+        v = (224.0f * v + 128.0f) / 255.0f;
-+    }
-+    return make_float3(y, u, v);
-+}
-+
-+static __inline__ __device__ float rgb2y(float r, float g, float b) {
-+    float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
-+    if (range_dst != AVCOL_RANGE_JPEG)
-+        y = (219.0f * y + 16.0f) / 255.0f;
-+    return y;
-+}
-+
-+static __inline__ __device__ float3 lrgb2yuv(float3 c) {
-+    float r = delinearize(c.x);
-+    float g = delinearize(c.y);
-+    float b = delinearize(c.z);
-+    return rgb2yuv(r, g, b);
-+}
-+
-+static __inline__ __device__ float3 lrgb2lrgb(float3 c) {
-+    if (rgb2rgb_passthrough) {
-+        return c;
-+    } else {
-+        float r = c.x, g = c.y, b = c.z;
-+        float rr = rgb2rgb_matrix[0] * r + rgb2rgb_matrix[1] * g + rgb2rgb_matrix[2] * b;
-+        float gg = rgb2rgb_matrix[3] * r + rgb2rgb_matrix[4] * g + rgb2rgb_matrix[5] * b;
-+        float bb = rgb2rgb_matrix[6] * r + rgb2rgb_matrix[7] * g + rgb2rgb_matrix[8] * b;
-+        return make_float3(rr, gg, bb);
-+    }
-+}
-+
-+#endif /* AVFILTER_CUDA_COLORSPACE_COMMON_H */
-Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.c
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/host_util.c
-@@ -0,0 +1,35 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include "libavfilter/colorspace.h"
-+#include "host_util.h"
-+
-+int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src)
-+{
-+    int i = 0;
-+    for (i = 0; i < 4; i++) {
-+        dst->data[i] = src->data[i];
-+        dst->linesize[i] = src->linesize[i];
-+    }
-+
-+    dst->width  = src->width;
-+    dst->height = src->height;
-+
-+    return 0;
-+}
-+
-Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.h
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/host_util.h
-@@ -0,0 +1,29 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#ifndef AVFILTER_CUDA_HOST_UTIL_H
-+#define AVFILTER_CUDA_HOST_UTIL_H
-+
-+#include "libavutil/frame.h"
-+
-+#include "shared.h"
-+
-+int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src);
-+
-+#endif /* AVFILTER_CUDA_HOST_UTIL_H */
-+
-Index: jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h
-@@ -0,0 +1,209 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#ifndef AVFILTER_CUDA_PIXFMT_H
-+#define AVFILTER_CUDA_PIXFMT_H
-+
-+#include "shared.h"
-+
-+extern const enum AVPixelFormat fmt_src, fmt_dst;
-+extern const int depth_src, depth_dst;
-+
-+// Single-sample read function
-+template<class T, int p>
-+static __inline__ __device__ T read_sample(const FFCUDAFrame& frame, int x, int y)
-+{
-+    T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p]));
-+    return ptr[x];
-+}
-+
-+// Per-format read functions
-+static __inline__ __device__ ushort3 read_p016(const FFCUDAFrame& frame, int x, int y)
-+{
-+    return make_ushort3(read_sample<unsigned short, 0>(frame, x,          y),
-+                        read_sample<unsigned short, 1>(frame, (x & ~1),     y / 2),
-+                        read_sample<unsigned short, 1>(frame, (x & ~1) + 1, y / 2));
-+}
-+
-+static __inline__ __device__ ushort3 read_p010(const FFCUDAFrame& frame, int x, int y)
-+{
-+    ushort3 val = read_p016(frame, x, y);
-+    return make_ushort3(val.x >> 6,
-+                        val.y >> 6,
-+                        val.z >> 6);
-+}
-+
-+static __inline__ __device__ ushort3 read_yuv420p16(const FFCUDAFrame& frame, int x, int y)
-+{
-+    return make_ushort3(read_sample<unsigned short, 0>(frame, x,      y),
-+                        read_sample<unsigned short, 1>(frame, x / 2, y / 2),
-+                        read_sample<unsigned short, 2>(frame, x / 2, y / 2));
-+}
-+
-+static __inline__ __device__ ushort3 read_yuv420p10(const FFCUDAFrame& frame, int x, int y)
-+{
-+    ushort3 val = read_yuv420p16(frame, x, y);
-+    return make_ushort3(val.x >> 6,
-+                        val.y >> 6,
-+                        val.z >> 6);
-+}
-+
-+// Generic read functions
-+static __inline__ __device__ ushort3 read_px(const FFCUDAFrame& frame, int x, int y)
-+{
-+    if (fmt_src == AV_PIX_FMT_P016)
-+        return read_p016(frame, x, y);
-+    else if (fmt_src == AV_PIX_FMT_P010)
-+        return read_p010(frame, x, y);
-+    else
-+        return make_ushort3(0, 0, 0);
-+}
-+
-+static __inline__ __device__ float sample_to_float(unsigned short i)
-+{
-+    return (float)i / ((1 << depth_src) - 1);
-+}
-+
-+static __inline__ __device__ float3 pixel_to_float3(ushort3 flt)
-+{
-+    return make_float3(sample_to_float(flt.x),
-+                       sample_to_float(flt.y),
-+                       sample_to_float(flt.z));
-+}
-+
-+static __inline__ __device__ float3 read_px_flt(const FFCUDAFrame& frame, int x, int y)
-+{
-+    return pixel_to_float3(read_px(frame, x, y));
-+}
-+
-+// Single-sample write function
-+template<int p, class T>
-+static __inline__ __device__ void write_sample(const FFCUDAFrame& frame, int x, int y, T sample)
-+{
-+    T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p]));
-+    ptr[x] = sample;
-+}
-+
-+// Per-format write functions
-+static __inline__ __device__ void write_nv12_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
-+{
-+    write_sample<0>(frame, x,     y,     (unsigned char)a.x);
-+    write_sample<0>(frame, x + 1, y,     (unsigned char)b.x);
-+    write_sample<0>(frame, x,     y + 1, (unsigned char)c.x);
-+    write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x);
-+
-+    write_sample<1>(frame, (x & ~1),     y / 2, (unsigned char)chroma.y);
-+    write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned char)chroma.z);
-+}
-+
-+static __inline__ __device__ void write_yuv420p_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
-+{
-+    write_sample<0>(frame, x,     y,     (unsigned char)a.x);
-+    write_sample<0>(frame, x + 1, y,     (unsigned char)b.x);
-+    write_sample<0>(frame, x,     y + 1, (unsigned char)c.x);
-+    write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x);
-+
-+    write_sample<1>(frame, x / 2, y / 2, (unsigned char)chroma.y);
-+    write_sample<2>(frame, x / 2, y / 2, (unsigned char)chroma.z);
-+}
-+
-+static __inline__ __device__ void write_p016_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
-+{
-+    write_sample<0>(frame, x,     y,     (unsigned short)a.x);
-+    write_sample<0>(frame, x + 1, y,     (unsigned short)b.x);
-+    write_sample<0>(frame, x,     y + 1, (unsigned short)c.x);
-+    write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x);
-+
-+    write_sample<1>(frame, (x & ~1),     y / 2, (unsigned short)chroma.y);
-+    write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)chroma.z);
-+}
-+
-+static __inline__ __device__ void write_p010_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
-+{
-+    write_sample<0>(frame, x,     y,     (unsigned short)(a.x << 6));
-+    write_sample<0>(frame, x + 1, y,     (unsigned short)(b.x << 6));
-+    write_sample<0>(frame, x,     y + 1, (unsigned short)(c.x << 6));
-+    write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6));
-+
-+    write_sample<1>(frame, (x & ~1),     y / 2, (unsigned short)(chroma.y << 6));
-+    write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)(chroma.z << 6));
-+}
-+
-+static __inline__ __device__ void write_yuv420p16_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
-+{
-+    write_sample<0>(frame, x,     y,     (unsigned short)a.x);
-+    write_sample<0>(frame, x + 1, y,     (unsigned short)b.x);
-+    write_sample<0>(frame, x,     y + 1, (unsigned short)c.x);
-+    write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x);
-+
-+    write_sample<1>(frame, x / 2, y / 2, (unsigned short)chroma.y);
-+    write_sample<2>(frame, x / 2, y / 2, (unsigned short)chroma.z);
-+}
-+
-+static __inline__ __device__ void write_yuv420p10_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
-+{
-+    write_sample<0>(frame, x,     y,     (unsigned short)(a.x << 6));
-+    write_sample<0>(frame, x + 1, y,     (unsigned short)(b.x << 6));
-+    write_sample<0>(frame, x,     y + 1, (unsigned short)(c.x << 6));
-+    write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6));
-+
-+    write_sample<1>(frame, x / 2, y / 2, (unsigned short)(chroma.y << 6));
-+    write_sample<2>(frame, x / 2, y / 2, (unsigned short)(chroma.z << 6));
-+}
-+
-+// Generic write functions
-+static __inline__ __device__ void write_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
-+{
-+    if (fmt_dst == AV_PIX_FMT_YUV420P)
-+        write_yuv420p_2x2(frame, x, y, a, b, c, d, chroma);
-+    else if (fmt_dst == AV_PIX_FMT_NV12)
-+        write_nv12_2x2(frame, x, y, a, b, c, d, chroma);
-+    else if (fmt_dst == AV_PIX_FMT_P016)
-+        write_p016_2x2(frame, x, y, a, b, c, d, chroma);
-+    else if (fmt_dst == AV_PIX_FMT_P010)
-+        write_p010_2x2(frame, x, y, a, b, c, d, chroma);
-+}
-+
-+static __inline__ __device__ unsigned short sample_to_ushort(float flt)
-+{
-+    return (unsigned short)(flt * ((1 << depth_dst) - 1));
-+}
-+
-+static __inline__ __device__ ushort3 pixel_to_ushort3(float3 flt)
-+{
-+    return make_ushort3(sample_to_ushort(flt.x),
-+                        sample_to_ushort(flt.y),
-+                        sample_to_ushort(flt.z));
-+}
-+
-+static __inline__ __device__ void write_2x2_flt(const FFCUDAFrame& frame, int x, int y, float3 a, float3 b, float3 c, float3 d)
-+{
-+    float3 chroma = get_chroma_sample(a, b, c, d);
-+
-+    ushort3 ia = pixel_to_ushort3(a);
-+    ushort3 ib = pixel_to_ushort3(b);
-+    ushort3 ic = pixel_to_ushort3(c);
-+    ushort3 id = pixel_to_ushort3(d);
-+
-+    ushort3 ichroma = pixel_to_ushort3(chroma);
-+
-+    write_2x2(frame, x, y, ia, ib, ic, id, ichroma);
-+}
-+
-+#endif /* AVFILTER_CUDA_PIXFMT_H */
-+
-Index: jellyfin-ffmpeg/libavfilter/cuda/shared.h
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/shared.h
-@@ -0,0 +1,32 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#ifndef AVFILTER_CUDA_SHARED_H
-+#define AVFILTER_CUDA_SHARED_H
-+
-+typedef struct FFCUDAFrame {
-+    unsigned char *data[4];
-+    int linesize[4];
-+
-+    int width, height;
-+
-+    float peak;
-+} FFCUDAFrame;
-+
-+#endif /* AVFILTER_CUDA_SHARED_H */
-+
-Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu
-@@ -0,0 +1,201 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include "colorspace_common.h"
-+#include "pixfmt.h"
-+#include "tonemap.h"
-+#include "util.h"
-+
-+extern const enum TonemapAlgorithm tonemap_func;
-+extern const float tone_param;
-+extern const float desat_param;
-+
-+#define mix(x, y, a) ((x) + ((y) - (x)) * (a))
-+
-+static __inline__ __device__
-+float hable_f(float in) {
-+    float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
-+    return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
-+}
-+
-+static __inline__ __device__
-+float direct(float s, float peak) {
-+    return s;
-+}
-+
-+static __inline__ __device__
-+float linear(float s, float peak) {
-+    return s * tone_param / peak;
-+}
-+
-+static __inline__ __device__
-+float gamma(float s, float peak) {
-+    float p = s > 0.05f ? s / peak : 0.05f / peak;
-+    float v = __powf(p, 1.0f / tone_param);
-+    return s > 0.05f ? v : (s * v / 0.05f);
-+}
-+
-+static __inline__ __device__
-+float clip(float s, float peak) {
-+    return clamp(s * tone_param, 0.0f, 1.0f);
-+}
-+
-+static __inline__ __device__
-+float reinhard(float s, float peak) {
-+    return s / (s + tone_param) * (peak + tone_param) / peak;
-+}
-+
-+static __inline__ __device__
-+float hable(float s, float peak) {
-+    return hable_f(s) / hable_f(peak);
-+}
-+
-+static __inline__ __device__
-+float mobius(float s, float peak) {
-+    float j = tone_param;
-+    float a, b;
-+
-+    if (s <= j)
-+        return s;
-+
-+    a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
-+    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS);
-+
-+    return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
-+}
-+
-+static __inline__ __device__
-+float bt2390(float s, float peak, float dst_peak) {
-+    float peak_pq = inverse_eotf_st2084(peak);
-+    float scale = 1.0f / peak_pq;
-+
-+    float s_pq = inverse_eotf_st2084(s) * scale;
-+    float maxLum = inverse_eotf_st2084(dst_peak) * scale;
-+
-+    float ks = 1.5f * maxLum - 0.5f;
-+    float tb = (s_pq - ks) / (1.0f - ks);
-+    float tb2 = tb * tb;
-+    float tb3 = tb2 * tb;
-+    float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks +
-+               (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) +
-+               (-2.0f * tb3 + 3.0f * tb2) * maxLum;
-+    float sig = (s_pq < ks) ? s_pq : pb;
-+
-+    return eotf_st2084(sig * peak_pq);
-+}
-+
-+static __inline__ __device__
-+float map(float s, float peak, float dst_peak)
-+{
-+    switch (tonemap_func) {
-+    case TONEMAP_NONE:
-+    default:
-+        return direct(s, peak);
-+    case TONEMAP_LINEAR:
-+        return linear(s, peak);
-+    case TONEMAP_GAMMA:
-+        return gamma(s, peak);
-+    case TONEMAP_CLIP:
-+        return clip(s, peak);
-+    case TONEMAP_REINHARD:
-+        return reinhard(s, peak);
-+    case TONEMAP_HABLE:
-+        return hable(s, peak);
-+    case TONEMAP_MOBIUS:
-+        return mobius(s, peak);
-+    case TONEMAP_BT2390:
-+        return bt2390(s, peak, dst_peak);
-+    }
-+}
-+
-+static __inline__ __device__
-+float3 map_one_pixel_rgb(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) {
-+    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS);
-+    float peak = src.peak;
-+    float dst_peak = dst.peak;
-+
-+    // Rescale the variables in order to bring it into a representation where
-+    // 1.0 represents the dst_peak. This is because all of the tone mapping
-+    // algorithms are defined in such a way that they map to the range [0.0, 1.0].
-+    if (dst.peak > 1.0f) {
-+        sig *= 1.0f / dst.peak;
-+        peak *= 1.0f / dst.peak;
-+    }
-+
-+    float sig_old = sig;
-+
-+    // Desaturate the color using a coefficient dependent on the signal level
-+    if (desat_param > 0.0f) {
-+        float luma = get_luma_dst(rgb, luma_dst);
-+        float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS);
-+        coeff = __powf(coeff, 10.0f / desat_param);
-+        rgb = mix(rgb, make_float3(luma, luma, luma), make_float3(coeff, coeff, coeff));
-+    }
-+
-+    sig = map(sig, peak, dst_peak);
-+
-+    sig = min(sig, 1.0f);
-+    rgb = rgb * (sig / sig_old);
-+    return rgb;
-+}
-+
-+// Map from source space YUV to destination space RGB
-+static __inline__ __device__
-+float3 map_to_dst_space_from_yuv(float3 yuv, float peak) {
-+    float3 c = yuv2lrgb(yuv);
-+    c = lrgb2lrgb(c);
-+    return c;
-+}
-+
-+extern "C" {
-+
-+__global__ void tonemap(FFCUDAFrame src, FFCUDAFrame dst)
-+{
-+    int xi = blockIdx.x * blockDim.x + threadIdx.x;
-+    int yi = blockIdx.y * blockDim.y + threadIdx.y;
-+    // each work item process four pixels
-+    int x = 2 * xi;
-+    int y = 2 * yi;
-+
-+    if (y + 1 < src.height && x + 1 < src.width)
-+    {
-+        float3 yuv0 = read_px_flt(src, x,     y);
-+        float3 yuv1 = read_px_flt(src, x + 1, y);
-+        float3 yuv2 = read_px_flt(src, x,     y + 1);
-+        float3 yuv3 = read_px_flt(src, x + 1, y + 1);
-+
-+        float3 c0 = map_to_dst_space_from_yuv(yuv0, src.peak);
-+        float3 c1 = map_to_dst_space_from_yuv(yuv1, src.peak);
-+        float3 c2 = map_to_dst_space_from_yuv(yuv2, src.peak);
-+        float3 c3 = map_to_dst_space_from_yuv(yuv3, src.peak);
-+
-+        c0 = map_one_pixel_rgb(c0, src, dst);
-+        c1 = map_one_pixel_rgb(c1, src, dst);
-+        c2 = map_one_pixel_rgb(c2, src, dst);
-+        c3 = map_one_pixel_rgb(c3, src, dst);
-+
-+        yuv0 = lrgb2yuv(c0);
-+        yuv1 = lrgb2yuv(c1);
-+        yuv2 = lrgb2yuv(c2);
-+        yuv3 = lrgb2yuv(c3);
-+
-+        write_2x2_flt(dst, x, y, yuv0, yuv1, yuv2, yuv3);
-+    }
-+}
-+
-+}
-Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.h
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.h
-@@ -0,0 +1,35 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#ifndef AVFILTER_CUDA_TONEMAP_H
-+#define AVFILTER_CUDA_TONEMAP_H
-+
-+enum TonemapAlgorithm {
-+    TONEMAP_NONE,
-+    TONEMAP_LINEAR,
-+    TONEMAP_GAMMA,
-+    TONEMAP_CLIP,
-+    TONEMAP_REINHARD,
-+    TONEMAP_HABLE,
-+    TONEMAP_MOBIUS,
-+    TONEMAP_BT2390,
-+    TONEMAP_MAX,
-+};
-+
-+#endif /* AVFILTER_CUDA_TONEMAP_H */
-+
-Index: jellyfin-ffmpeg/libavfilter/cuda/util.h
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/cuda/util.h
-@@ -0,0 +1,55 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#ifndef AVFILTER_CUDA_UTIL_H
-+#define AVFILTER_CUDA_UTIL_H
-+
-+static inline __device__ float3 operator+(const float3 &a, const float3 &b) {
-+    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-+}
-+
-+static inline __device__ float3 operator+(const float3 &a, float b) {
-+    return make_float3(a.x + b, a.y + b, a.z + b);
-+}
-+
-+static inline __device__ float3 operator-(const float3 &a, const float3 &b) {
-+    return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-+}
-+
-+static inline __device__ float3 operator-(const float3 &a, float b) {
-+    return make_float3(a.x - b, a.y - b, a.z - b);
-+}
-+
-+static inline __device__ float3 operator*(const float3 &a, const float3 &b) {
-+    return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-+}
-+
-+static inline __device__ float3 operator*(const float3 &a, float b) {
-+    return make_float3(a.x * b, a.y * b, a.z * b);
-+}
-+
-+static inline __device__ float3 operator/(const float3 &a, const float3 &b) {
-+    return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-+}
-+
-+static inline __device__ float3 operator/(const float3 &a, float b) {
-+    return make_float3(a.x / b, a.y / b, a.z / b);
-+}
-+
-+#endif /* AVFILTER_CUDA_UTIL_H */
-+
-Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c
-===================================================================
---- /dev/null
-+++ jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c
-@@ -0,0 +1,712 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include <float.h>
-+#include <stdio.h>
-+#include <string.h>
-+
-+#include "libavutil/avassert.h"
-+#include "libavutil/avstring.h"
-+#include "libavutil/bprint.h"
-+#include "libavutil/common.h"
-+#include "libavutil/hwcontext.h"
-+#include "libavutil/hwcontext_cuda_internal.h"
-+#include "libavutil/cuda_check.h"
-+#include "libavutil/internal.h"
-+#include "libavutil/opt.h"
-+#include "libavutil/pixdesc.h"
-+
-+#include "avfilter.h"
-+#include "colorspace.h"
-+#include "cuda/host_util.h"
-+#include "cuda/shared.h"
-+#include "cuda/tonemap.h"
-+#include "formats.h"
-+#include "internal.h"
-+#include "scale_eval.h"
-+#include "video.h"
-+
-+static const enum AVPixelFormat supported_formats[] = {
-+    AV_PIX_FMT_YUV420P,
-+    AV_PIX_FMT_NV12,
-+    AV_PIX_FMT_P010,
-+    AV_PIX_FMT_P016
-+};
-+
-+#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
-+#define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
-+#define NUM_BUFFERS 2
-+#define BLOCKX 32
-+#define BLOCKY 16
-+
-+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
-+
-+typedef struct TonemapCUDAContext {
-+    const AVClass *class;
-+
-+    AVCUDADeviceContext *hwctx;
-+
-+    enum AVPixelFormat in_fmt, out_fmt;
-+
-+    enum AVColorTransferCharacteristic trc, in_trc, out_trc;
-+    enum AVColorSpace spc, in_spc, out_spc;
-+    enum AVColorPrimaries pri, in_pri, out_pri;
-+    enum AVColorRange range, in_range, out_range;
-+    enum AVChromaLocation in_chroma_loc, out_chroma_loc;
-+
-+    AVBufferRef *frames_ctx;
-+    AVFrame     *frame;
-+
-+    AVFrame *tmp_frame;
-+
-+    /**
-+     * Output sw format. AV_PIX_FMT_NONE for no conversion.
-+     */
-+    enum AVPixelFormat format;
-+    char *format_str;
-+
-+    CUcontext   cu_ctx;
-+    CUmodule    cu_module;
-+
-+    CUfunction  cu_func;
-+
-+    CUdeviceptr srcBuffer;
-+    CUdeviceptr dstBuffer;
-+
-+    enum TonemapAlgorithm tonemap;
-+    double param;
-+    double desat_param;
-+    double peak;
-+    double dst_peak;
-+    double scene_threshold;
-+
-+    const AVPixFmtDescriptor *in_desc, *out_desc;
-+} TonemapCUDAContext;
-+
-+static av_cold int init(AVFilterContext *ctx)
-+{
-+    TonemapCUDAContext *s = ctx->priv;
-+
-+    if (!strcmp(s->format_str, "same")) {
-+        s->format = AV_PIX_FMT_NONE;
-+    } else {
-+        s->format = av_get_pix_fmt(s->format_str);
-+        if (s->format == AV_PIX_FMT_NONE) {
-+            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+
-+    s->frame = av_frame_alloc();
-+    if (!s->frame)
-+        return AVERROR(ENOMEM);
-+
-+    s->tmp_frame = av_frame_alloc();
-+    if (!s->tmp_frame)
-+        return AVERROR(ENOMEM);
-+
-+    return 0;
-+}
-+
-+static av_cold void uninit(AVFilterContext *ctx)
-+{
-+    TonemapCUDAContext *s = ctx->priv;
-+
-+    if (s->hwctx) {
-+        CudaFunctions *cu = s->hwctx->internal->cuda_dl;
-+        CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
-+
-+        CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
-+
-+        if (s->cu_module) {
-+            CHECK_CU(cu->cuModuleUnload(s->cu_module));
-+            s->cu_func = NULL;
-+            s->cu_module = NULL;
-+        }
-+
-+        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
-+    }
-+
-+    av_frame_free(&s->frame);
-+    av_buffer_unref(&s->frames_ctx);
-+    av_frame_free(&s->tmp_frame);
-+}
-+
-+static int query_formats(AVFilterContext *ctx)
-+{
-+    static const enum AVPixelFormat pixel_formats[] = {
-+        AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
-+    };
-+    AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
-+
-+    return ff_set_common_formats(ctx, pix_fmts);
-+}
-+
-+static av_cold int init_stage(TonemapCUDAContext *s, AVBufferRef *device_ctx,
-+                              AVFilterLink *outlink)
-+{
-+    AVBufferRef *out_ref = NULL;
-+    AVHWFramesContext *out_ctx;
-+    int ret;
-+
-+    out_ref = av_hwframe_ctx_alloc(device_ctx);
-+    if (!out_ref)
-+        return AVERROR(ENOMEM);
-+    out_ctx = (AVHWFramesContext*)out_ref->data;
-+
-+    out_ctx->format    = AV_PIX_FMT_CUDA;
-+    out_ctx->sw_format = s->out_fmt;
-+    out_ctx->width     = FFALIGN(outlink->w, 32);
-+    out_ctx->height    = FFALIGN(outlink->h, 32);
-+
-+    ret = av_hwframe_ctx_init(out_ref);
-+    if (ret < 0)
-+        goto fail;
-+
-+    av_frame_unref(s->frame);
-+    ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
-+    if (ret < 0)
-+        goto fail;
-+
-+    s->frame->width  = outlink->w;
-+    s->frame->height = outlink->h;
-+
-+    av_buffer_unref(&s->frames_ctx);
-+    s->frames_ctx = out_ref;
-+
-+    return 0;
-+fail:
-+    av_buffer_unref(&out_ref);
-+    return ret;
-+}
-+
-+static int format_is_supported(enum AVPixelFormat fmt)
-+{
-+    int i;
-+
-+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
-+        if (supported_formats[i] == fmt)
-+            return 1;
-+    return 0;
-+}
-+
-+static av_cold int init_processing_chain(AVFilterContext *ctx, AVFilterLink *outlink)
-+{
-+    TonemapCUDAContext *s = ctx->priv;
-+
-+    AVHWFramesContext *in_frames_ctx;
-+
-+    enum AVPixelFormat in_format;
-+    enum AVPixelFormat out_format;
-+    const AVPixFmtDescriptor *in_desc;
-+    const AVPixFmtDescriptor *out_desc;
-+    int ret;
-+
-+    /* check that we have a hw context */
-+    if (!ctx->inputs[0]->hw_frames_ctx) {
-+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
-+        return AVERROR(EINVAL);
-+    }
-+    in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
-+    in_format     = in_frames_ctx->sw_format;
-+    out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
-+    in_desc       = av_pix_fmt_desc_get(in_format);
-+    out_desc      = av_pix_fmt_desc_get(out_format);
-+
-+    if (!format_is_supported(in_format)) {
-+        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
-+               av_get_pix_fmt_name(in_format));
-+        return AVERROR(ENOSYS);
-+    }
-+    if (!format_is_supported(out_format)) {
-+        av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
-+               av_get_pix_fmt_name(out_format));
-+        return AVERROR(ENOSYS);
-+    }
-+    if (!(in_desc->comp[0].depth == 10 ||
-+        in_desc->comp[0].depth == 16)) {
-+        av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n",
-+               in_desc->comp[0].depth);
-+        return AVERROR(ENOSYS);
-+    }
-+
-+    s->in_fmt = in_format;
-+    s->out_fmt = out_format;
-+    s->in_desc  = in_desc;
-+    s->out_desc = out_desc;
-+
-+    ret = init_stage(s, in_frames_ctx->device_ref, outlink);
-+    if (ret < 0)
-+        return ret;
-+
-+    ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
-+    if (!ctx->outputs[0]->hw_frames_ctx)
-+        return AVERROR(ENOMEM);
-+
-+    return 0;
-+}
-+
-+static const struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = {
-+    [AVCOL_PRI_BT709]  = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
-+    [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
-+};
-+
-+static const struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = {
-+    [AVCOL_PRI_BT709]  = { 0.3127, 0.3290 },
-+    [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 },
-+};
-+
-+static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
-+                              double rgb2rgb[3][3]) {
-+    double rgb2xyz[3][3], xyz2rgb[3][3];
-+
-+    ff_fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz);
-+    ff_matrix_invert_3x3(rgb2xyz, xyz2rgb);
-+    ff_fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz);
-+    ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb);
-+
-+    return 0;
-+}
-+
-+static av_cold int compile(AVFilterLink *inlink)
-+{
-+    int ret = 0;
-+    AVFilterContext  *ctx = inlink->dst;
-+    TonemapCUDAContext *s = ctx->priv;
-+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
-+    CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
-+    AVBPrint constants;
-+    CUlinkState link_state;
-+    void *cubin;
-+    size_t cubin_size;
-+    double rgb_matrix[3][3], yuv_matrix[3][3], rgb2rgb_matrix[3][3];
-+    const struct LumaCoefficients *in_coeffs, *out_coeffs;
-+    enum AVColorTransferCharacteristic in_trc = s->in_trc, out_trc = s->out_trc;
-+    enum AVColorSpace in_spc = s->in_spc, out_spc = s->out_spc;
-+    enum AVColorPrimaries in_pri = s->in_pri, out_pri = s->out_pri;
-+    enum AVColorRange in_range = s->in_range, out_range = s->out_range;
-+    char info_log[4096], error_log[4096];
-+    CUjit_option options[] = {CU_JIT_INFO_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
-+    void *option_values[]  = {&info_log,              &error_log,              (void*)(intptr_t)sizeof(info_log), (void*)(intptr_t)sizeof(error_log)};
-+
-+    extern char tonemap_ptx[];
-+
-+    switch(s->tonemap) {
-+    case TONEMAP_GAMMA:
-+        if (isnan(s->param))
-+            s->param = 1.8f;
-+        break;
-+    case TONEMAP_REINHARD:
-+        if (!isnan(s->param))
-+            s->param = (1.0f - s->param) / s->param;
-+        break;
-+    case TONEMAP_MOBIUS:
-+        if (isnan(s->param))
-+            s->param = 0.3f;
-+        break;
-+    }
-+
-+    if (isnan(s->param))
-+        s->param = 1.0f;
-+
-+    s->dst_peak = 1.0f;
-+
-+    if (in_trc == AVCOL_TRC_UNSPECIFIED)
-+        in_trc = AVCOL_TRC_SMPTE2084;
-+    if (out_trc == AVCOL_TRC_UNSPECIFIED)
-+        out_trc = AVCOL_TRC_BT709;
-+
-+    if (in_spc == AVCOL_SPC_UNSPECIFIED)
-+        in_spc = AVCOL_SPC_BT2020_NCL;
-+    if (out_spc == AVCOL_SPC_UNSPECIFIED)
-+        out_spc = AVCOL_SPC_BT709;
-+
-+    if (in_pri == AVCOL_PRI_UNSPECIFIED)
-+        in_pri = AVCOL_PRI_BT2020;
-+    if (out_pri == AVCOL_PRI_UNSPECIFIED)
-+        out_pri = AVCOL_PRI_BT709;
-+
-+    if (in_range == AVCOL_RANGE_UNSPECIFIED)
-+        in_range = AVCOL_RANGE_MPEG;
-+    if (out_range == AVCOL_RANGE_UNSPECIFIED)
-+        out_range = AVCOL_RANGE_MPEG;
-+
-+    av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n",
-+           av_color_transfer_name(in_trc),
-+           av_color_transfer_name(out_trc));
-+    av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n",
-+           av_color_space_name(in_spc),
-+           av_color_space_name(out_spc));
-+    av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n",
-+           av_color_primaries_name(in_pri),
-+           av_color_primaries_name(out_pri));
-+    av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n",
-+           av_color_range_name(in_range),
-+           av_color_range_name(out_range));
-+
-+    if (!(in_coeffs = ff_get_luma_coefficients(in_spc)))
-+        return AVERROR(EINVAL);
-+
-+    ff_fill_rgb2yuv_table(in_coeffs, yuv_matrix);
-+    ff_matrix_invert_3x3(yuv_matrix, rgb_matrix);
-+
-+    if (!(out_coeffs = ff_get_luma_coefficients(out_spc)))
-+        return AVERROR(EINVAL);
-+
-+    ff_fill_rgb2yuv_table(out_coeffs, yuv_matrix);
-+
-+    if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0)
-+        return ret;
-+
-+    av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED);
-+
-+    av_bprintf(&constants, ".version 3.2\n");
-+    av_bprintf(&constants, ".target sm_30\n");
-+    av_bprintf(&constants, ".address_size %zu\n", sizeof(void*) * 8);
-+
-+#define CONSTANT_A(decl, align, ...) \
-+    av_bprintf(&constants, ".visible .const .align " #align " " decl ";\n", __VA_ARGS__)
-+#define CONSTANT(decl, ...) CONSTANT_A(decl, 4, __VA_ARGS__)
-+#define CONSTANT_M(a, b) \
-+    CONSTANT(".f32 " a "[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f}", \
-+             b[0][0], b[0][1], b[0][2], \
-+             b[1][0], b[1][1], b[1][2], \
-+             b[2][0], b[2][1], b[2][2])
-+#define CONSTANT_C(a, b) \
-+    CONSTANT(".f32 " a "[] = {%f, %f, %f}", \
-+             b->cr, b->cg, b->cb)
-+
-+    CONSTANT(".u32 depth_src      = %i", (int)s->in_desc->comp[0].depth);
-+    CONSTANT(".u32 depth_dst      = %i", (int)s->out_desc->comp[0].depth);
-+    CONSTANT(".u32 fmt_src        = %i", (int)s->in_fmt);
-+    CONSTANT(".u32 fmt_dst        = %i", (int)s->out_fmt);
-+    CONSTANT(".u32 range_src      = %i", (int)in_range);
-+    CONSTANT(".u32 range_dst      = %i", (int)out_range);
-+    CONSTANT(".u32 trc_src        = %i", (int)in_trc);
-+    CONSTANT(".u32 trc_dst        = %i", (int)out_trc);
-+    CONSTANT(".u32 chroma_loc_src = %i", (int)s->in_chroma_loc);
-+    CONSTANT(".u32 chroma_loc_dst = %i", (int)s->out_chroma_loc);
-+    CONSTANT(".u32 tonemap_func   = %i", (int)s->tonemap);
-+    CONSTANT(".f32 tone_param     = %f", s->param);
-+    CONSTANT(".f32 desat_param    = %f", s->desat_param);
-+    CONSTANT_M("rgb_matrix", rgb_matrix);
-+    CONSTANT_M("yuv_matrix", yuv_matrix);
-+    CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri);
-+    CONSTANT_M("rgb2rgb_matrix", rgb2rgb_matrix);
-+    CONSTANT_C("luma_src", in_coeffs);
-+    CONSTANT_C("luma_dst", out_coeffs);
-+
-+    ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
-+    if (ret < 0)
-+        return ret;
-+
-+    if (s->cu_module) {
-+        ret = CHECK_CU(cu->cuModuleUnload(s->cu_module));
-+        if (ret < 0)
-+            goto fail;
-+
-+        s->cu_func = NULL;
-+        s->cu_module = NULL;
-+    }
-+
-+    ret = CHECK_CU(cu->cuLinkCreate(sizeof(options) / sizeof(options[0]), options, option_values, &link_state));
-+    if (ret < 0)
-+        goto fail;
-+
-+    ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, constants.str,
-+                                     constants.len, "constants", 0, NULL, NULL));
-+    if (ret < 0)
-+        goto fail2;
-+
-+    ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, tonemap_ptx,
-+                                     strlen(tonemap_ptx), "tonemap.ptx", 0, NULL, NULL));
-+    if (ret < 0)
-+        goto fail2;
-+
-+    ret = CHECK_CU(cu->cuLinkComplete(link_state, &cubin, &cubin_size));
-+    if (ret < 0)
-+        goto fail2;
-+
-+    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, cubin));
-+    if (ret < 0)
-+        goto fail2;
-+
-+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, "tonemap"));
-+    if (ret < 0)
-+        goto fail2;
-+
-+fail2:
-+    CHECK_CU(cu->cuLinkDestroy(link_state));
-+
-+fail:
-+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
-+
-+    av_bprint_finalize(&constants, NULL);
-+
-+    if ((intptr_t)option_values[2] > 0)
-+        av_log(ctx, AV_LOG_INFO, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[2], info_log);
-+
-+    if ((intptr_t)option_values[3] > 0)
-+        av_log(ctx, AV_LOG_ERROR, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[3], error_log);
-+
-+    return ret;
-+}
-+
-+static av_cold int config_props(AVFilterLink *outlink)
-+{
-+    AVFilterContext *ctx = outlink->src;
-+    AVFilterLink *inlink = outlink->src->inputs[0];
-+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
-+    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
-+    TonemapCUDAContext *s  = ctx->priv;
-+    int ret;
-+
-+    s->hwctx = device_hwctx;
-+
-+    outlink->w = inlink->w;
-+    outlink->h = inlink->h;
-+
-+    ret = init_processing_chain(ctx, outlink);
-+    if (ret < 0)
-+        return ret;
-+
-+    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
-+
-+    return 0;
-+}
-+
-+static int run_kernel(AVFilterContext *ctx,
-+                      AVFrame *out, AVFrame *in)
-+{
-+    TonemapCUDAContext *s = ctx->priv;
-+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
-+    FFCUDAFrame src, dst;
-+    void *args_uchar[] = { &src, &dst };
-+    int ret;
-+
-+    ret = ff_make_cuda_frame(&src, in);
-+    if (ret < 0)
-+        goto fail;
-+
-+    ret = ff_make_cuda_frame(&dst, out);
-+    if (ret < 0)
-+        goto fail;
-+
-+    src.peak = s->peak;
-+    if (!src.peak) {
-+        src.peak = ff_determine_signal_peak(in);
-+        av_log(s, AV_LOG_DEBUG, "Computed signal peak: %f\n", src.peak);
-+    }
-+
-+    dst.peak = s->dst_peak;
-+
-+    ret = CHECK_CU(cu->cuLaunchKernel(s->cu_func,
-+                                      DIV_UP(src.width / 2, BLOCKX), DIV_UP(src.height / 2, BLOCKY), 1,
-+                                      BLOCKX, BLOCKY, 1, 0, s->hwctx->stream, args_uchar, NULL));
-+
-+fail:
-+    return ret;
-+}
-+
-+static int do_tonemap(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
-+{
-+    TonemapCUDAContext *s = ctx->priv;
-+    AVFrame *src = in;
-+    int ret;
-+
-+    ret = run_kernel(ctx, s->frame, src);
-+    if (ret < 0)
-+        return ret;
-+
-+    src = s->frame;
-+    ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
-+    if (ret < 0)
-+        return ret;
-+
-+    av_frame_move_ref(out, s->frame);
-+    av_frame_move_ref(s->frame, s->tmp_frame);
-+
-+    s->frame->width  = in->width;
-+    s->frame->height = in->height;
-+
-+    ret = av_frame_copy_props(out, in);
-+    if (ret < 0)
-+        return ret;
-+
-+    if (s->out_trc        != out->color_trc ||
-+        s->out_spc        != out->colorspace ||
-+        s->out_pri        != out->color_primaries ||
-+        s->out_range      != out->color_range ||
-+        s->out_chroma_loc != out->chroma_location) {
-+        out->color_trc       = s->out_trc;
-+        out->colorspace      = s->out_spc;
-+        out->color_primaries = s->out_pri;
-+        out->color_range     = s->out_range;
-+        out->chroma_location = s->out_chroma_loc;
-+    }
-+
-+    return 0;
-+}
-+
-+static int filter_frame(AVFilterLink *link, AVFrame *in)
-+{
-+    AVFilterContext       *ctx = link->dst;
-+    TonemapCUDAContext      *s = ctx->priv;
-+    AVFilterLink      *outlink = ctx->outputs[0];
-+    CudaFunctions          *cu = s->hwctx->internal->cuda_dl;
-+
-+    AVFrame *out = NULL;
-+    CUcontext dummy;
-+    int ret = 0;
-+
-+    out = av_frame_alloc();
-+    if (!out) {
-+        ret = AVERROR(ENOMEM);
-+        goto fail;
-+    }
-+
-+    if (!(in->color_trc == AVCOL_TRC_SMPTE2084 ||
-+        in->color_trc == AVCOL_TRC_ARIB_STD_B67)) {
-+        av_log(ctx, AV_LOG_ERROR, "Unsupported input transfer characteristic: %s\n",
-+               av_color_transfer_name(in->color_trc));
-+        ret = AVERROR(EINVAL);
-+        goto fail;
-+    }
-+
-+    if (!s->cu_func ||
-+        s->in_trc        != in->color_trc ||
-+        s->in_spc        != in->colorspace ||
-+        s->in_pri        != in->color_primaries ||
-+        s->in_range      != in->color_range ||
-+        s->in_chroma_loc != in->chroma_location) {
-+        s->in_trc        = in->color_trc;
-+        s->in_spc        = in->colorspace;
-+        s->in_pri        = in->color_primaries;
-+        s->in_range      = in->color_range;
-+        s->in_chroma_loc = in->chroma_location;
-+
-+        s->out_trc        = s->trc;
-+        s->out_spc        = s->spc;
-+        s->out_pri        = s->pri;
-+        s->out_range      = s->range;
-+        s->out_chroma_loc = s->in_chroma_loc;
-+
-+        if ((ret = compile(link)) < 0)
-+            goto fail;
-+    }
-+
-+    ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
-+    if (ret < 0)
-+        goto fail;
-+
-+    ret = do_tonemap(ctx, out, in);
-+
-+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
-+    if (ret < 0)
-+        goto fail;
-+
-+    av_frame_free(&in);
-+
-+    ff_update_hdr_metadata(out, s->dst_peak);
-+
-+    return ff_filter_frame(outlink, out);
-+fail:
-+    av_frame_free(&in);
-+    av_frame_free(&out);
-+    return ret;
-+}
-+
-+#define OFFSET(x) offsetof(TonemapCUDAContext, x)
-+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
-+static const AVOption options[] = {
-+    { "tonemap",      "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" },
-+    {     "none",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE},              0, 0, FLAGS, "tonemap" },
-+    {     "linear",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR},            0, 0, FLAGS, "tonemap" },
-+    {     "gamma",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA},             0, 0, FLAGS, "tonemap" },
-+    {     "clip",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP},              0, 0, FLAGS, "tonemap" },
-+    {     "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},          0, 0, FLAGS, "tonemap" },
-+    {     "hable",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE},             0, 0, FLAGS, "tonemap" },
-+    {     "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},            0, 0, FLAGS, "tonemap" },
-+    {     "bt2390",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390},            0, 0, FLAGS, "tonemap" },
-+    { "transfer",     "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
-+    { "t",            "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
-+    {     "bt709",    0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709},           0, 0, FLAGS, "transfer" },
-+    {     "bt2020",   0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10},       0, 0, FLAGS, "transfer" },
-+    { "matrix",       "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" },
-+    { "m",            "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" },
-+    {     "bt709",    0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709},           0, 0, FLAGS, "matrix" },
-+    {     "bt2020",   0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL},      0, 0, FLAGS, "matrix" },
-+    { "primaries",    "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" },
-+    { "p",            "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" },
-+    {     "bt709",    0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709},           0, 0, FLAGS, "primaries" },
-+    {     "bt2020",   0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020},          0, 0, FLAGS, "primaries" },
-+    { "range",        "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" },
-+    { "r",            "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" },
-+    {     "tv",       0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},          0, 0, FLAGS, "range" },
-+    {     "pc",       0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},          0, 0, FLAGS, "range" },
-+    {     "limited",  0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},          0, 0, FLAGS, "range" },
-+    {     "full",     0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},          0, 0, FLAGS, "range" },
-+    { "format",       "Output format",       OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
-+    { "peak",         "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
-+    { "param",        "tonemap parameter",   OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
-+    { "desat",        "desaturation parameter",   OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS },
-+    { "threshold",    "scene detection threshold",   OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS },
-+    { NULL },
-+};
-+
-+static const AVClass tonemap_cuda_class = {
-+    .class_name = "tonemap_cuda",
-+    .item_name  = av_default_item_name,
-+    .option     = options,
-+    .version    = LIBAVUTIL_VERSION_INT,
-+};
-+
-+static const AVFilterPad inputs[] = {
-+    {
-+        .name        = "default",
-+        .type        = AVMEDIA_TYPE_VIDEO,
-+        .filter_frame = filter_frame,
-+    },
-+    { NULL }
-+};
-+
-+static const AVFilterPad outputs[] = {
-+    {
-+        .name         = "default",
-+        .type         = AVMEDIA_TYPE_VIDEO,
-+        .config_props = config_props,
-+    },
-+    { NULL }
-+};
-+
-+AVFilter ff_vf_tonemap_cuda = {
-+    .name        = "tonemap_cuda",
-+    .description = NULL_IF_CONFIG_SMALL("GPU accelerated HDR to SDR tonemapping"),
-+
-+    .init          = init,
-+    .uninit        = uninit,
-+    .query_formats = query_formats,
-+
-+    .priv_size  = sizeof(TonemapCUDAContext),
-+    .priv_class = &tonemap_cuda_class,
-+
-+    .inputs  = inputs,
-+    .outputs = outputs,
-+
-+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
-+};
diff --git a/debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch b/debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch
deleted file mode 100644
index d88e553a90d..00000000000
--- a/debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch
+++ /dev/null
@@ -1,755 +0,0 @@
-Index: jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/opencl/colorspace_common.cl
-+++ jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl
-@@ -16,8 +16,23 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
-+#define BT709_ALPHA 1.09929682680944f
-+#define BT709_BETA  0.018053968510807f
-+
- #define ST2084_MAX_LUMINANCE 10000.0f
--#define REFERENCE_WHITE 100.0f
-+#define REFERENCE_WHITE      203.0f
-+
-+#define ST2084_M1 0.1593017578125f
-+#define ST2084_M2 78.84375f
-+#define ST2084_C1 0.8359375f
-+#define ST2084_C2 18.8515625f
-+#define ST2084_C3 18.6875f
-+
-+#define ARIB_B67_A 0.17883277f
-+#define ARIB_B67_B 0.28466892f
-+#define ARIB_B67_C 0.55991073f
-+
-+#define FLOAT_EPS 1.175494351e-38f
- 
- #if chroma_loc == 1
-     #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f)
-@@ -33,12 +48,6 @@
-     #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f)
- #endif
- 
--constant const float ST2084_M1 = 0.1593017578125f;
--constant const float ST2084_M2 = 78.84375f;
--constant const float ST2084_C1 = 0.8359375f;
--constant const float ST2084_C2 = 18.8515625f;
--constant const float ST2084_C3 = 18.6875f;
--
- float get_luma_dst(float3 c) {
-     return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z;
- }
-@@ -51,61 +60,87 @@ float3 get_chroma_sample(float3 a, float
-     return chroma_sample(a, b, c, d);
- }
- 
-+// linearizer for PQ/ST2084
- float eotf_st2084(float x) {
--    float p = powr(x, 1.0f / ST2084_M2);
--    float a = max(p -ST2084_C1, 0.0f);
--    float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f);
--    float c  = powr(a / b, 1.0f / ST2084_M1);
--    return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f;
--}
--
--__constant const float HLG_A = 0.17883277f;
--__constant const float HLG_B = 0.28466892f;
--__constant const float HLG_C = 0.55991073f;
--
--// linearizer for HLG
--float inverse_oetf_hlg(float x) {
--    float a = 4.0f * x * x;
--    float b = exp((x - HLG_C) / HLG_A) + HLG_B;
--    return x < 0.5f ? a : b;
--}
--
--// delinearizer for HLG
--float oetf_hlg(float x) {
--    float a = 0.5f * sqrt(x);
--    float b = HLG_A * log(x - HLG_B) + HLG_C;
--    return x <= 1.0f ? a : b;
--}
--
--float3 ootf_hlg(float3 c, float peak) {
--    float luma = get_luma_src(c);
--    float gamma =  1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
--    gamma = max(1.0f, gamma);
--    float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma);
--    return c * factor;
--}
--
--float3 inverse_ootf_hlg(float3 c, float peak) {
--    float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
--    c *=  powr(12.0f, gamma) / peak;
--    c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma);
--    return c;
-+    if (x > 0.0f) {
-+        float xpow = powr(x, 1.0f / ST2084_M2);
-+        float num = max(xpow - ST2084_C1, 0.0f);
-+        float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS);
-+        x = powr(num / den, 1.0f / ST2084_M1);
-+        return x * ST2084_MAX_LUMINANCE / REFERENCE_WHITE;
-+    } else {
-+        return 0.0f;
-+    }
-+}
-+
-+// delinearizer for PQ/ST2084
-+float inverse_eotf_st2084(float x) {
-+    if (x > 0.0f) {
-+        x *= REFERENCE_WHITE / ST2084_MAX_LUMINANCE;
-+        float xpow = powr(x, ST2084_M1);
-+#if 0
-+        // Original formulation from SMPTE ST 2084:2014 publication.
-+        float num = ST2084_C1 + ST2084_C2 * xpow;
-+        float den = 1.0f + ST2084_C3 * xpow;
-+        return powr(num / den, ST2084_M2);
-+#else
-+        // More stable arrangement that avoids some cancellation error.
-+        float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow;
-+        float den = 1.0f + ST2084_C3 * xpow;
-+        return powr(1.0f + num / den, ST2084_M2);
-+#endif
-+    } else {
-+        return 0.0f;
-+    }
-+}
-+
-+float ootf_1_2(float x) {
-+    return x < 0.0f ? x : powr(x, 1.2f);
-+}
-+
-+float inverse_ootf_1_2(float x) {
-+    return x < 0.0f ? x : powr(x, 1.0f / 1.2f);
-+}
-+
-+float oetf_arib_b67(float x) {
-+    x = max(x, 0.0f);
-+    return x <= (1.0f / 12.0f)
-+           ? sqrt(3.0f * x)
-+           : (ARIB_B67_A * log(12.0f * x - ARIB_B67_B) + ARIB_B67_C);
-+}
-+
-+float inverse_oetf_arib_b67(float x) {
-+    x = max(x, 0.0f);
-+    return x <= 0.5f
-+           ? (x * x) * (1.0f / 3.0f)
-+           : (exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f);
- }
- 
--float inverse_eotf_bt1886(float c) {
--    return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f);
-+// linearizer for HLG/ARIB-B67
-+float eotf_arib_b67(float x) {
-+    return ootf_1_2(inverse_oetf_arib_b67(x));
- }
- 
--float oetf_bt709(float c) {
--    c = c < 0.0f ? 0.0f : c;
--    float r1 = 4.5f * c;
--    float r2 = 1.099f * powr(c, 0.45f) - 0.099f;
--    return c < 0.018f ? r1 : r2;
--}
--float inverse_oetf_bt709(float c) {
--    float r1 = c / 4.5f;
--    float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f);
--    return c < 0.081f ? r1 : r2;
-+// delinearizer for HLG/ARIB-B67
-+float inverse_eotf_arib_b67(float x) {
-+    return oetf_arib_b67(inverse_ootf_1_2(x));
-+}
-+
-+float inverse_eotf_bt1886(float x) {
-+    return x < 0.0f ? 0.0f : powr(x, 1.0f / 2.4f);
-+}
-+
-+float oetf_bt709(float x) {
-+    x = max(0.0f, x);
-+    return x < BT709_BETA
-+           ? (x * 4.5f)
-+           : (BT709_ALPHA * powr(x, 0.45f) - (BT709_ALPHA - 1.0f));
-+}
-+
-+float inverse_oetf_bt709(float x) {
-+    return x < (4.5f * BT709_BETA)
-+           ? (x / 4.5f)
-+           : (powr((x + (BT709_ALPHA - 1.0f)) / BT709_ALPHA, 1.0f / 0.45f));
- }
- 
- float3 yuv2rgb(float y, float u, float v) {
-@@ -187,19 +222,3 @@ float3 lrgb2lrgb(float3 c) {
-     return (float3)(rr, gg, bb);
- #endif
- }
--
--float3 ootf(float3 c, float peak) {
--#ifdef ootf_impl
--    return ootf_impl(c, peak);
--#else
--    return c;
--#endif
--}
--
--float3 inverse_ootf(float3 c, float peak) {
--#ifdef inverse_ootf_impl
--    return inverse_ootf_impl(c, peak);
--#else
--    return c;
--#endif
--}
-Index: jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/opencl/tonemap.cl
-+++ jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl
-@@ -16,54 +16,50 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#define REFERENCE_WHITE 100.0f
-+#define FLOAT_EPS 1.175494351e-38f
-+
- extern float3 lrgb2yuv(float3);
- extern float  lrgb2y(float3);
- extern float3 yuv2lrgb(float3);
- extern float3 lrgb2lrgb(float3);
- extern float  get_luma_src(float3);
- extern float  get_luma_dst(float3);
--extern float3 ootf(float3 c, float peak);
--extern float3 inverse_ootf(float3 c, float peak);
-+extern float  eotf_st2084(float);
-+extern float  inverse_eotf_st2084(float);
- extern float3 get_chroma_sample(float3, float3, float3, float3);
- 
--struct detection_result {
--    float peak;
--    float average;
--};
--
- float hable_f(float in) {
-     float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
-     return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
- }
- 
--float direct(float s, float peak) {
-+float direct(float s, float peak, float target_peak) {
-     return s;
- }
- 
--float linear(float s, float peak) {
-+float linear(float s, float peak, float target_peak) {
-     return s * tone_param / peak;
- }
- 
--float gamma(float s, float peak) {
--    float p = s > 0.05f ? s /peak : 0.05f / peak;
-+float gamma(float s, float peak, float target_peak) {
-+    float p = s > 0.05f ? s / peak : 0.05f / peak;
-     float v = powr(p, 1.0f / tone_param);
--    return s > 0.05f ? v : (s * v /0.05f);
-+    return s > 0.05f ? v : (s * v / 0.05f);
- }
- 
--float clip(float s, float peak) {
-+float clip(float s, float peak, float target_peak) {
-     return clamp(s * tone_param, 0.0f, 1.0f);
- }
- 
--float reinhard(float s, float peak) {
-+float reinhard(float s, float peak, float target_peak) {
-     return s / (s + tone_param) * (peak + tone_param) / peak;
- }
- 
--float hable(float s, float peak) {
--    return hable_f(s)/hable_f(peak);
-+float hable(float s, float peak, float target_peak) {
-+    return hable_f(s) / hable_f(peak);
- }
- 
--float mobius(float s, float peak) {
-+float mobius(float s, float peak, float target_peak) {
-     float j = tone_param;
-     float a, b;
- 
-@@ -71,102 +67,32 @@ float mobius(float s, float peak) {
-         return s;
- 
-     a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
--    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f);
-+    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS);
- 
-     return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
- }
- 
--// detect peak/average signal of a frame, the algorithm was ported from:
--// libplacebo (https://github.com/haasn/libplacebo)
--struct detection_result
--detect_peak_avg(global uint *util_buf, __local uint *sum_wg,
--            float signal, float peak) {
--// layout of the util buffer
--//
--// Name:             : Size (units of 4-bytes)
--// average buffer    : detection_frames + 1
--// peak buffer       : detection_frames + 1
--// workgroup counter : 1
--// total of peak     : 1
--// total of average  : 1
--// frame index       : 1
--// frame number      : 1
--    global uint *avg_buf = util_buf;
--    global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1;
--    global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1;
--    global uint *max_total_p = counter_wg_p + 1;
--    global uint *avg_total_p = max_total_p + 1;
--    global uint *frame_idx_p = avg_total_p + 1;
--    global uint *scene_frame_num_p = frame_idx_p + 1;
--
--    uint frame_idx = *frame_idx_p;
--    uint scene_frame_num = *scene_frame_num_p;
--
--    size_t lidx = get_local_id(0);
--    size_t lidy = get_local_id(1);
--    size_t lsizex = get_local_size(0);
--    size_t lsizey = get_local_size(1);
--    uint num_wg = get_num_groups(0) * get_num_groups(1);
--    size_t group_idx = get_group_id(0);
--    size_t group_idy = get_group_id(1);
--    struct detection_result r = {peak, sdr_avg};
--    if (lidx == 0 && lidy == 0)
--        *sum_wg = 0;
--    barrier(CLK_LOCAL_MEM_FENCE);
--
--    // update workgroup sum
--    atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE));
--    barrier(CLK_LOCAL_MEM_FENCE);
--
--    // update frame peak/avg using work-group-average.
--    if (lidx == 0 && lidy == 0) {
--        uint avg_wg = *sum_wg / (lsizex * lsizey);
--        atomic_max(&peak_buf[frame_idx], avg_wg);
--        atomic_add(&avg_buf[frame_idx], avg_wg);
--    }
--
--    if (scene_frame_num > 0) {
--        float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num);
--        float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num);
--        r.peak = max(1.0f, peak);
--        r.average = max(0.25f, avg);
--    }
-+float bt2390(float s, float peak, float target_peak) {
-+    float peak_pq = inverse_eotf_st2084(peak);
-+    float scale = 1.0f / peak_pq;
-+
-+    float s_pq = inverse_eotf_st2084(s) * scale;
-+    float maxLum = inverse_eotf_st2084(target_peak) * scale;
-+
-+    float ks = 1.5f * maxLum - 0.5f;
-+    float tb = (s_pq - ks) / (1.0f - ks);
-+    float tb2 = tb * tb;
-+    float tb3 = tb2 * tb;
-+    float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks +
-+               (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) +
-+               (-2.0f * tb3 + 3.0f * tb2) * maxLum;
-+    float sig = (s_pq < ks) ? s_pq : pb;
- 
--    if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) {
--        *counter_wg_p = 0;
--        avg_buf[frame_idx] /= num_wg;
--
--        if (scene_threshold > 0.0f) {
--            uint cur_max = peak_buf[frame_idx];
--            uint cur_avg = avg_buf[frame_idx];
--            int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p;
--
--            if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) {
--                for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
--                  avg_buf[i] = 0;
--                for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
--                  peak_buf[i] = 0;
--                *avg_total_p = *max_total_p = 0;
--                *scene_frame_num_p = 0;
--                avg_buf[frame_idx] = cur_avg;
--                peak_buf[frame_idx] = cur_max;
--            }
--        }
--        uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1);
--        // add current frame, subtract next frame
--        *max_total_p += peak_buf[frame_idx] - peak_buf[next];
--        *avg_total_p += avg_buf[frame_idx] - avg_buf[next];
--        // reset next frame
--        peak_buf[next] = avg_buf[next] = 0;
--        *frame_idx_p = next;
--        *scene_frame_num_p = min(*scene_frame_num_p + 1,
--                                 (uint)DETECTION_FRAMES);
--    }
--    return r;
-+    return eotf_st2084(sig * peak_pq);
- }
- 
--float3 map_one_pixel_rgb(float3 rgb, float peak, float average) {
--    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f);
-+float3 map_one_pixel_rgb(float3 rgb, float peak) {
-+    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS);
- 
-     // Rescale the variables in order to bring it into a representation where
-     // 1.0 represents the dst_peak. This is because all of the tone mapping
-@@ -178,30 +104,24 @@ float3 map_one_pixel_rgb(float3 rgb, flo
- 
-     float sig_old = sig;
- 
--    // Scale the signal to compensate for differences in the average brightness
--    float slope = min(1.0f, sdr_avg / average);
--    sig *= slope;
--    peak *= slope;
--
-     // Desaturate the color using a coefficient dependent on the signal level
-     if (desat_param > 0.0f) {
-         float luma = get_luma_dst(rgb);
--        float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f);
--        coeff = native_powr(coeff, 10.0f / desat_param);
-+        float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS);
-+        coeff = powr(coeff, 10.0f / desat_param);
-         rgb = mix(rgb, (float3)luma, (float3)coeff);
--        sig = mix(sig, luma * slope, coeff);
-     }
- 
--    sig = TONE_FUNC(sig, peak);
-+    sig = TONE_FUNC(sig, peak, target_peak);
- 
-     sig = min(sig, 1.0f);
--    rgb *= (sig/sig_old);
-+    rgb *= (sig / sig_old);
-     return rgb;
- }
--// map from source space YUV to destination space RGB
-+
-+// Map from source space YUV to destination space RGB
- float3 map_to_dst_space_from_yuv(float3 yuv, float peak) {
-     float3 c = yuv2lrgb(yuv);
--    c = ootf(c, peak);
-     c = lrgb2lrgb(c);
-     return c;
- }
-@@ -210,7 +130,6 @@ __kernel void tonemap(__write_only image
-                       __read_only  image2d_t src1,
-                       __write_only image2d_t dst2,
-                       __read_only  image2d_t src2,
--                      global uint *util_buf,
-                       float peak
-                       )
- {
-@@ -241,23 +160,17 @@ __kernel void tonemap(__write_only image
-     float sig3 = max(c3.x, max(c3.y, c3.z));
-     float sig = max(sig0, max(sig1, max(sig2, sig3)));
- 
--    struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak);
--
-     float3 c0_old = c0, c1_old = c1, c2_old = c2;
--    c0 = map_one_pixel_rgb(c0, r.peak, r.average);
--    c1 = map_one_pixel_rgb(c1, r.peak, r.average);
--    c2 = map_one_pixel_rgb(c2, r.peak, r.average);
--    c3 = map_one_pixel_rgb(c3, r.peak, r.average);
--
--    c0 = inverse_ootf(c0, target_peak);
--    c1 = inverse_ootf(c1, target_peak);
--    c2 = inverse_ootf(c2, target_peak);
--    c3 = inverse_ootf(c3, target_peak);
-+    c0 = map_one_pixel_rgb(c0, peak);
-+    c1 = map_one_pixel_rgb(c1, peak);
-+    c2 = map_one_pixel_rgb(c2, peak);
-+    c3 = map_one_pixel_rgb(c3, peak);
- 
-     y0 = lrgb2y(c0);
-     y1 = lrgb2y(c1);
-     y2 = lrgb2y(c2);
-     y3 = lrgb2y(c3);
-+
-     float3 chroma_c = get_chroma_sample(c0, c1, c2, c3);
-     float3 chroma = lrgb2yuv(chroma_c);
- 
-Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_opencl.c
-+++ jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c
-@@ -15,6 +15,7 @@
-  * License along with FFmpeg; if not, write to the Free Software
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
-+
- #include <float.h>
- 
- #include "libavutil/avassert.h"
-@@ -31,13 +32,6 @@
- #include "video.h"
- #include "colorspace.h"
- 
--// TODO:
--// - separate peak-detection from tone-mapping kernel to solve
--//    one-frame-delay issue.
--// - more format support
--
--#define DETECTION_FRAMES 63
--
- enum TonemapAlgorithm {
-     TONEMAP_NONE,
-     TONEMAP_LINEAR,
-@@ -46,6 +40,7 @@ enum TonemapAlgorithm {
-     TONEMAP_REINHARD,
-     TONEMAP_HABLE,
-     TONEMAP_MOBIUS,
-+    TONEMAP_BT2390,
-     TONEMAP_MAX,
- };
- 
-@@ -68,12 +63,11 @@ typedef struct TonemapOpenCLContext {
-     int                   initialised;
-     cl_kernel             kernel;
-     cl_command_queue      command_queue;
--    cl_mem                util_mem;
- } TonemapOpenCLContext;
- 
- static const char *const linearize_funcs[AVCOL_TRC_NB] = {
--    [AVCOL_TRC_SMPTE2084] = "eotf_st2084",
--    [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg",
-+    [AVCOL_TRC_SMPTE2084]    = "eotf_st2084",
-+    [AVCOL_TRC_ARIB_STD_B67] = "eotf_arib_b67",
- };
- 
- static const char *const delinearize_funcs[AVCOL_TRC_NB] = {
-@@ -99,6 +93,7 @@ static const char *const tonemap_func[TO
-     [TONEMAP_REINHARD] = "reinhard",
-     [TONEMAP_HABLE]    = "hable",
-     [TONEMAP_MOBIUS]   = "mobius",
-+    [TONEMAP_BT2390]   = "bt2390",
- };
- 
- static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
-@@ -112,9 +107,6 @@ static void get_rgb2rgb_matrix(enum AVCo
- }
- 
- #define OPENCL_SOURCE_NB 3
--// Average light level for SDR signals. This is equal to a signal level of 0.5
--// under a typical presentation gamma of about 2.0.
--static const float sdr_avg = 0.25f;
- 
- static int tonemap_opencl_init(AVFilterContext *avctx)
- {
-@@ -127,7 +119,7 @@ static int tonemap_opencl_init(AVFilterC
-     AVBPrint header;
-     const char *opencl_sources[OPENCL_SOURCE_NB];
- 
--    av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC);
-+    av_bprint_init(&header, 2048, AV_BPRINT_SIZE_UNLIMITED);
- 
-     switch(ctx->tonemap) {
-     case TONEMAP_GAMMA:
-@@ -149,18 +141,20 @@ static int tonemap_opencl_init(AVFilterC
- 
-     // SDR peak is 1.0f
-     ctx->target_peak = 1.0f;
--    av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n",
-+
-+    av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n",
-            av_color_transfer_name(ctx->trc_in),
-            av_color_transfer_name(ctx->trc_out));
--    av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n",
-+    av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n",
-            av_color_space_name(ctx->colorspace_in),
-            av_color_space_name(ctx->colorspace_out));
--    av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n",
-+    av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n",
-            av_color_primaries_name(ctx->primaries_in),
-            av_color_primaries_name(ctx->primaries_out));
--    av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n",
-+    av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n",
-            av_color_range_name(ctx->range_in),
-            av_color_range_name(ctx->range_out));
-+
-     // checking valid value just because of limited implementaion
-     // please remove when more functionalities are implemented
-     av_assert0(ctx->trc_out == AVCOL_TRC_BT709 ||
-@@ -178,11 +172,9 @@ static int tonemap_opencl_init(AVFilterC
-                ctx->desat_param);
-     av_bprintf(&header, "__constant const float target_peak = %.4ff;\n",
-                ctx->target_peak);
--    av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg);
-     av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n",
-                ctx->scene_threshold);
-     av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]);
--    av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES);
- 
-     if (ctx->primaries_out != ctx->primaries_in) {
-         get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb);
-@@ -196,6 +188,16 @@ static int tonemap_opencl_init(AVFilterC
- 
-     av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc);
- 
-+    av_bprintf(&header, "#define powr native_powr\n");
-+
-+    av_bprintf(&header, "#define exp native_exp\n");
-+
-+    av_bprintf(&header, "#define log native_log\n");
-+
-+    av_bprintf(&header, "#define log10 native_log10\n");
-+
-+    av_bprintf(&header, "#define sqrt native_sqrt\n");
-+
-     if (rgb2rgb_passthrough)
-         av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n");
-     else
-@@ -205,7 +207,7 @@ static int tonemap_opencl_init(AVFilterC
-     luma_src = ff_get_luma_coefficients(ctx->colorspace_in);
-     if (!luma_src) {
-         err = AVERROR(EINVAL);
--        av_log(avctx, AV_LOG_ERROR, "unsupported input colorspace %d (%s)\n",
-+        av_log(avctx, AV_LOG_ERROR, "Unsupported input colorspace %d (%s)\n",
-                ctx->colorspace_in, av_color_space_name(ctx->colorspace_in));
-         goto fail;
-     }
-@@ -213,7 +215,7 @@ static int tonemap_opencl_init(AVFilterC
-     luma_dst = ff_get_luma_coefficients(ctx->colorspace_out);
-     if (!luma_dst) {
-         err = AVERROR(EINVAL);
--        av_log(avctx, AV_LOG_ERROR, "unsupported output colorspace %d (%s)\n",
-+        av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace %d (%s)\n",
-                ctx->colorspace_out, av_color_space_name(ctx->colorspace_out));
-         goto fail;
-     }
-@@ -225,21 +227,16 @@ static int tonemap_opencl_init(AVFilterC
-     ff_matrix_invert_3x3(rgb2yuv, yuv2rgb);
-     ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", yuv2rgb);
- 
--    av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n",
-+    av_bprintf(&header, "__constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n",
-                luma_src->cr, luma_src->cg, luma_src->cb);
--    av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n",
-+    av_bprintf(&header, "__constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n",
-                luma_dst->cr, luma_dst->cg, luma_dst->cb);
- 
--    av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]);
-+    av_bprintf(&header, "#define linearize %s\n",
-+               linearize_funcs[ctx->trc_in]);
-     av_bprintf(&header, "#define delinearize %s\n",
-                delinearize_funcs[ctx->trc_out]);
- 
--    if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67)
--        av_bprintf(&header, "#define ootf_impl ootf_hlg\n");
--
--    if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67)
--        av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n");
--
-     av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
-     opencl_sources[0] = header.str;
-     opencl_sources[1] = ff_opencl_source_tonemap;
-@@ -259,19 +256,11 @@ static int tonemap_opencl_init(AVFilterC
-     ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
-     CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
- 
--    ctx->util_mem =
--        clCreateBuffer(ctx->ocf.hwctx->context, 0,
--                       (2 * DETECTION_FRAMES + 7) * sizeof(unsigned),
--                       NULL, &cle);
--    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create util buffer: %d.\n", cle);
--
-     ctx->initialised = 1;
-     return 0;
- 
- fail:
-     av_bprint_finalize(&header, NULL);
--    if (ctx->util_mem)
--        clReleaseMemObject(ctx->util_mem);
-     if (ctx->command_queue)
-         clReleaseCommandQueue(ctx->command_queue);
-     if (ctx->kernel)
-@@ -285,11 +274,11 @@ static int tonemap_opencl_config_output(
-     TonemapOpenCLContext *s = avctx->priv;
-     int ret;
-     if (s->format == AV_PIX_FMT_NONE)
--        av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n");
-+        av_log(avctx, AV_LOG_WARNING, "Format not set, use default format NV12\n");
-     else {
-       if (s->format != AV_PIX_FMT_P010 &&
-           s->format != AV_PIX_FMT_NV12) {
--        av_log(avctx, AV_LOG_ERROR, "unsupported output format,"
-+        av_log(avctx, AV_LOG_ERROR, "Unsupported output format,"
-                "only p010/nv12 supported now\n");
-         return AVERROR(EINVAL);
-       }
-@@ -315,8 +304,7 @@ static int launch_kernel(AVFilterContext
-     CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]);
-     CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]);
-     CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]);
--    CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem);
--    CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak);
-+    CL_SET_KERNEL_ARG(kernel, 4, cl_float, &peak);
- 
-     local_work[0]  = 16;
-     local_work[1]  = 16;
-@@ -390,13 +378,15 @@ static int tonemap_opencl_filter_frame(A
-     if (!ctx->initialised) {
-         if (!(input->color_trc == AVCOL_TRC_SMPTE2084 ||
-             input->color_trc == AVCOL_TRC_ARIB_STD_B67)) {
--            av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n");
-+            av_log(ctx, AV_LOG_ERROR, "Unsupported transfer function characteristic: %s\n",
-+                   av_color_transfer_name(input->color_trc));
-             err = AVERROR(ENOSYS);
-             goto fail;
-         }
- 
-         if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) {
--            av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n");
-+            av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
-+                   av_get_pix_fmt_name(input_frames_ctx->sw_format));
-             err = AVERROR(ENOSYS);
-             goto fail;
-         }
-@@ -423,31 +413,9 @@ static int tonemap_opencl_filter_frame(A
- 
-     ff_update_hdr_metadata(output, ctx->target_peak);
- 
--    av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n",
-+    av_log(ctx, AV_LOG_DEBUG, "Tonemapping output: %s, %ux%u (%"PRId64").\n",
-            av_get_pix_fmt_name(output->format),
-            output->width, output->height, output->pts);
--#ifndef NDEBUG
--    {
--        uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p;
--        float peak_detected, avg_detected;
--        unsigned map_size = (2 * DETECTION_FRAMES  + 7) * sizeof(unsigned);
--        ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem,
--                                         CL_TRUE, CL_MAP_READ, 0, map_size,
--                                         0, NULL, NULL, &cle);
--        // For the layout of the util buffer, refer tonemap.cl
--        if (ptr) {
--            max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1;
--            avg_total_p = max_total_p + 1;
--            frame_number_p = avg_total_p + 2;
--            peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p));
--            avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p));
--            av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n",
--                   peak_detected, avg_detected);
--            clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0,
--                                    NULL, NULL);
--        }
--    }
--#endif
- 
-     return ff_filter_frame(outlink, output);
- 
-@@ -463,8 +431,6 @@ static av_cold void tonemap_opencl_unini
-     TonemapOpenCLContext *ctx = avctx->priv;
-     cl_int cle;
- 
--    if (ctx->util_mem)
--        clReleaseMemObject(ctx->util_mem);
-     if (ctx->kernel) {
-         cle = clReleaseKernel(ctx->kernel);
-         if (cle != CL_SUCCESS)
-@@ -493,6 +459,7 @@ static const AVOption tonemap_opencl_opt
-     {     "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},          0, 0, FLAGS, "tonemap" },
-     {     "hable",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE},             0, 0, FLAGS, "tonemap" },
-     {     "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},            0, 0, FLAGS, "tonemap" },
-+    {     "bt2390",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390},            0, 0, FLAGS, "tonemap" },
-     { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
-     { "t",        "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
-     {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709},         0, 0, FLAGS, "transfer" },
diff --git a/debian/patches/0007-fix-for-fmp4-in-hlsenc.patch b/debian/patches/0007-fix-for-fmp4-in-hlsenc.patch
deleted file mode 100644
index 5610f02b425..00000000000
--- a/debian/patches/0007-fix-for-fmp4-in-hlsenc.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-Index: jellyfin-ffmpeg/libavformat/hlsenc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/hlsenc.c
-+++ jellyfin-ffmpeg/libavformat/hlsenc.c
-@@ -2672,14 +2672,13 @@ static int hls_write_packet(AVFormatCont
- 
-     vs->packets_written++;
-     if (oc->pb) {
--        int64_t keyframe_pre_pos = avio_tell(oc->pb);
-         ret = ff_write_chained(oc, stream_index, pkt, s, 0);
--        if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
--            (pkt->flags & AV_PKT_FLAG_KEY) && !keyframe_pre_pos) {
--            av_write_frame(oc, NULL); /* Flush any buffered data */
--            vs->video_keyframe_size = avio_tell(oc->pb) - keyframe_pre_pos;
-+        vs->video_keyframe_size += pkt->size;
-+        if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) && (pkt->flags & AV_PKT_FLAG_KEY)) {
-+            vs->video_keyframe_size = avio_tell(oc->pb);
-+        } else {
-+            vs->video_keyframe_pos = avio_tell(vs->out);
-         }
--        vs->video_keyframe_pos = vs->start_pos;
-         if (hls->ignore_io_errors)
-             ret = 0;
-     }
diff --git a/debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch b/debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch
deleted file mode 100644
index ed02508ae5f..00000000000
--- a/debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch
+++ /dev/null
@@ -1,17 +0,0 @@
-Index: jellyfin-ffmpeg/libavcodec/nvdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/nvdec.c
-+++ jellyfin-ffmpeg/libavcodec/nvdec.c
-@@ -303,8 +303,10 @@ static int nvdec_init_hwframes(AVCodecCo
-     frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data;
- 
-     if (dummy) {
--        // Copied from ff_decode_get_hw_frames_ctx for compatibility
--        frames_ctx->initial_pool_size += 3;
-+        // The function above guarantees 1 work surface, We must guarantee 4 work surfaces.
-+        // (the absolute minimum), so add the missing count without exceeding the maximum
-+        // recommended for nvdec.
-+        frames_ctx->initial_pool_size = FFMIN(frames_ctx->initial_pool_size + 3, 32);
- 
-         frames_ctx->free = nvdec_free_dummy;
-         frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
diff --git a/debian/patches/0009-fix-for-nvenc-from-upstream.patch b/debian/patches/0009-fix-for-nvenc-from-upstream.patch
deleted file mode 100644
index 0eca70f361f..00000000000
--- a/debian/patches/0009-fix-for-nvenc-from-upstream.patch
+++ /dev/null
@@ -1,1716 +0,0 @@
-Index: jellyfin-ffmpeg/Changelog
-===================================================================
---- jellyfin-ffmpeg.orig/Changelog
-+++ jellyfin-ffmpeg/Changelog
-@@ -1,7 +1,7 @@
- Entries are sorted chronologically from oldest to youngest within each release,
- releases are sorted from youngest to oldest.
- 
--version <next>:
-+version 4.4:
- - AudioToolbox output device
- - MacCaption demuxer
- - PGX decoder
-Index: jellyfin-ffmpeg/RELEASE_NOTES
-===================================================================
---- jellyfin-ffmpeg.orig/RELEASE_NOTES
-+++ jellyfin-ffmpeg/RELEASE_NOTES
-@@ -11,5 +11,5 @@
- 
-    We hope you will like this release as much as we enjoyed working on it, and
-    as usual, if you have any questions about it, or any FFmpeg related topic,
--   feel free to join us on the #ffmpeg IRC channel (on irc.freenode.net) or ask
-+   feel free to join us on the #ffmpeg IRC channel (on irc.libera.chat) or ask
-    on the mailing-lists.
-Index: jellyfin-ffmpeg/configure
-===================================================================
---- jellyfin-ffmpeg.orig/configure
-+++ jellyfin-ffmpeg/configure
-@@ -536,7 +536,7 @@ die(){
- 
- If you think configure made a mistake, make sure you are using the latest
- version from Git.  If the latest version fails, report the problem to the
--ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.freenode.net.
-+ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.libera.chat.
- EOF
-     if disabled logging; then
-         cat <<EOF
-@@ -2761,6 +2761,7 @@ indeo3_decoder_select="hpeldsp"
- indeo4_decoder_select="ividsp"
- indeo5_decoder_select="ividsp"
- interplay_video_decoder_select="hpeldsp"
-+ipu_decoder_select="mpegvideo"
- jpegls_decoder_select="mjpeg_decoder"
- jv_decoder_select="blockdsp"
- lagarith_decoder_select="llviddsp"
-@@ -7501,7 +7502,6 @@ LD_LIB=$LD_LIB
- LD_PATH=$LD_PATH
- DLLTOOL=$dlltool
- WINDRES=$windres
--DEPWINDRES=$dep_cc
- DOXYGEN=$doxygen
- LDFLAGS=$LDFLAGS
- LDEXEFLAGS=$LDEXEFLAGS
-Index: jellyfin-ffmpeg/doc/writing_filters.txt
-===================================================================
---- jellyfin-ffmpeg.orig/doc/writing_filters.txt
-+++ jellyfin-ffmpeg/doc/writing_filters.txt
-@@ -418,4 +418,4 @@ done:
- 
- When all of this is done, you can submit your patch to the ffmpeg-devel
- mailing-list for review.  If you need any help, feel free to come on our IRC
--channel, #ffmpeg-devel on irc.freenode.net.
-+channel, #ffmpeg-devel on irc.libera.chat.
-Index: jellyfin-ffmpeg/ffbuild/common.mak
-===================================================================
---- jellyfin-ffmpeg.orig/ffbuild/common.mak
-+++ jellyfin-ffmpeg/ffbuild/common.mak
-@@ -90,7 +90,7 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS
- 	-$(if $(ASMSTRIPFLAGS), $(STRIP) $(ASMSTRIPFLAGS) $@)
- 
- %.o: %.rc
--	$(WINDRES) $(IFLAGS) --preprocessor "$(DEPWINDRES) -E -xc-header -DRC_INVOKED $(CC_DEPFLAGS)" -o $@ $<
-+	$(WINDRES) $(IFLAGS) $(foreach ARG,$(CC_DEPFLAGS),--preprocessor-arg "$(ARG)") -o $@ $<
- 
- %.i: %.c
- 	$(CC) $(CCFLAGS) $(CC_E) $<
-Index: jellyfin-ffmpeg/fftools/ffmpeg.c
-===================================================================
---- jellyfin-ffmpeg.orig/fftools/ffmpeg.c
-+++ jellyfin-ffmpeg/fftools/ffmpeg.c
-@@ -3950,7 +3950,7 @@ static OutputStream *choose_output(void)
-                 ost->st->index, ost->st->id, ost->initialized, ost->inputs_done, ost->finished);
- 
-         if (!ost->initialized && !ost->inputs_done)
--            return ost;
-+            return ost->unavailable ? NULL : ost;
- 
-         if (!ost->finished && opts < opts_min) {
-             opts_min = opts;
-Index: jellyfin-ffmpeg/libavcodec/aacenc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/aacenc.c
-+++ jellyfin-ffmpeg/libavcodec/aacenc.c
-@@ -28,6 +28,7 @@
-  *              TODOs:
-  * add sane pulse detection
-  ***********************************/
-+#include <float.h>
- 
- #include "libavutil/libm.h"
- #include "libavutil/float_dsp.h"
-@@ -852,7 +853,7 @@ static int aac_encode_frame(AVCodecConte
-                 /* Not so fast though */
-                 ratio = sqrtf(ratio);
-             }
--            s->lambda = FFMIN(s->lambda * ratio, 65536.f);
-+            s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
- 
-             /* Keep iterating if we must reduce and lambda is in the sky */
-             if (ratio > 0.9f && ratio < 1.1f) {
-@@ -897,7 +898,7 @@ static av_cold int aac_encode_end(AVCode
- {
-     AACEncContext *s = avctx->priv_data;
- 
--    av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
-+    av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
- 
-     ff_mdct_end(&s->mdct1024);
-     ff_mdct_end(&s->mdct128);
-Index: jellyfin-ffmpeg/libavcodec/aacpsy.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/aacpsy.c
-+++ jellyfin-ffmpeg/libavcodec/aacpsy.c
-@@ -308,6 +308,9 @@ static av_cold int psy_3gpp_init(FFPsyCo
-     const int bandwidth    = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
-     const float num_bark   = calc_bark((float)bandwidth);
- 
-+    if (bandwidth <= 0)
-+        return AVERROR(EINVAL);
-+
-     ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
-     if (!ctx->model_priv_data)
-         return AVERROR(ENOMEM);
-@@ -794,7 +797,7 @@ static void psy_3gpp_analyze_channel(FFP
- 
-         if (pe < 1.15f * desired_pe) {
-             /* 6.6.1.3.6 "Final threshold modification by linearization" */
--            norm_fac = 1.0f / norm_fac;
-+            norm_fac = norm_fac ? 1.0f / norm_fac : 0;
-             for (w = 0; w < wi->num_windows*16; w += 16) {
-                 for (g = 0; g < num_bands; g++) {
-                     AacPsyBand *band = &pch->band[w+g];
-Index: jellyfin-ffmpeg/libavcodec/aarch64/hevcdsp_idct_neon.S
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/aarch64/hevcdsp_idct_neon.S
-+++ jellyfin-ffmpeg/libavcodec/aarch64/hevcdsp_idct_neon.S
-@@ -573,14 +573,13 @@ idct_16x16 10
- // void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs)
- .macro idct_dc size, bitdepth
- function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1
--        movi          v1.8h,  #((1 << (14 - \bitdepth))+1)
-         ld1r         {v4.8h}, [x0]
--        add           v4.8h,  v4.8h,  v1.8h
--        sshr          v0.8h,  v4.8h,  #(15 - \bitdepth)
--        sshr          v1.8h,  v4.8h,  #(15 - \bitdepth)
-+        srshr         v4.8h,  v4.8h,  #1
-+        srshr         v0.8h,  v4.8h,  #(14 - \bitdepth)
-+        srshr         v1.8h,  v4.8h,  #(14 - \bitdepth)
- .if \size > 4
--        sshr          v2.8h,  v4.8h,  #(15 - \bitdepth)
--        sshr          v3.8h,  v4.8h,  #(15 - \bitdepth)
-+        srshr         v2.8h,  v4.8h,  #(14 - \bitdepth)
-+        srshr         v3.8h,  v4.8h,  #(14 - \bitdepth)
- .if \size > 16 /* dc 32x32 */
-         mov              x2,  #4
- 1:
-Index: jellyfin-ffmpeg/libavcodec/alsdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/alsdec.c
-+++ jellyfin-ffmpeg/libavcodec/alsdec.c
-@@ -1632,7 +1632,7 @@ static int read_frame_data(ALSDecContext
-     AVCodecContext *avctx    = ctx->avctx;
-     GetBitContext *gb = &ctx->gb;
-     unsigned int div_blocks[32];                ///< block sizes.
--    unsigned int c;
-+    int c;
-     unsigned int js_blocks[2];
-     uint32_t bs_info = 0;
-     int ret;
-@@ -1810,14 +1810,17 @@ static int decode_frame(AVCodecContext *
-     else
-         ctx->cur_frame_length = sconf->frame_length;
- 
--    ctx->highest_decoded_channel = 0;
-+    ctx->highest_decoded_channel = -1;
-     // decode the frame data
-     if ((invalid_frame = read_frame_data(ctx, ra_frame)) < 0)
-         av_log(ctx->avctx, AV_LOG_WARNING,
-                "Reading frame data failed. Skipping RA unit.\n");
- 
--    if (ctx->highest_decoded_channel == 0)
-+    if (ctx->highest_decoded_channel == -1) {
-+        av_log(ctx->avctx, AV_LOG_WARNING,
-+               "No channel data decoded.\n");
-         return AVERROR_INVALIDDATA;
-+    }
- 
-     ctx->frame_id++;
- 
-Index: jellyfin-ffmpeg/libavcodec/av1_metadata_bsf.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/av1_metadata_bsf.c
-+++ jellyfin-ffmpeg/libavcodec/av1_metadata_bsf.c
-@@ -28,6 +28,7 @@ typedef struct AV1MetadataContext {
-     CBSBSFContext common;
- 
-     int td;
-+    AV1RawOBU td_obu;
- 
-     int color_primaries;
-     int transfer_characteristics;
-@@ -107,12 +108,11 @@ static int av1_metadata_update_fragment(
-                                         CodedBitstreamFragment *frag)
- {
-     AV1MetadataContext *ctx = bsf->priv_data;
--    AV1RawOBU td, *obu;
-     int err, i;
- 
-     for (i = 0; i < frag->nb_units; i++) {
-         if (frag->units[i].type == AV1_OBU_SEQUENCE_HEADER) {
--            obu = frag->units[i].content;
-+            AV1RawOBU *obu = frag->units[i].content;
-             err = av1_metadata_update_sequence_header(bsf, &obu->obu.sequence_header);
-             if (err < 0)
-                 return err;
-@@ -124,12 +124,8 @@ static int av1_metadata_update_fragment(
-         if (ctx->td == BSF_ELEMENT_REMOVE)
-             ff_cbs_delete_unit(frag, 0);
-     } else if (pkt && ctx->td == BSF_ELEMENT_INSERT) {
--        td = (AV1RawOBU) {
--            .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER,
--        };
--
-         err = ff_cbs_insert_unit_content(frag, 0, AV1_OBU_TEMPORAL_DELIMITER,
--                                         &td, NULL);
-+                                         &ctx->td_obu, NULL);
-         if (err < 0) {
-             av_log(bsf, AV_LOG_ERROR, "Failed to insert Temporal Delimiter.\n");
-             return err;
-@@ -155,6 +151,12 @@ static const CBSBSFType av1_metadata_typ
- 
- static int av1_metadata_init(AVBSFContext *bsf)
- {
-+    AV1MetadataContext *ctx = bsf->priv_data;
-+
-+    ctx->td_obu = (AV1RawOBU) {
-+        .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER,
-+    };
-+
-     return ff_cbs_bsf_generic_init(bsf, &av1_metadata_type);
- }
- 
-Index: jellyfin-ffmpeg/libavcodec/clearvideo.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/clearvideo.c
-+++ jellyfin-ffmpeg/libavcodec/clearvideo.c
-@@ -722,8 +722,8 @@ static av_cold int clv_decode_init(AVCod
-     }
- 
-     c->tile_shift = av_log2(c->tile_size);
--    if (1U << c->tile_shift != c->tile_size) {
--        av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2.\n", c->tile_size);
-+    if (1U << c->tile_shift != c->tile_size || c->tile_shift < 1) {
-+        av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2 > 1\n", c->tile_size);
-         return AVERROR_INVALIDDATA;
-     }
- 
-Index: jellyfin-ffmpeg/libavcodec/crystalhd.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/crystalhd.c
-+++ jellyfin-ffmpeg/libavcodec/crystalhd.c
-@@ -785,6 +785,7 @@ static int crystalhd_receive_frame(AVCod
-         .flush          = flush, \
-         .bsfs           = bsf_name, \
-         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
-+        .caps_internal  = FF_CODEC_CAP_SETS_FRAME_PROPS, \
-         .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE}, \
-         .wrapper_name   = "crystalhd", \
-     };
-Index: jellyfin-ffmpeg/libavcodec/cuviddec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/cuviddec.c
-+++ jellyfin-ffmpeg/libavcodec/cuviddec.c
-@@ -1150,6 +1150,7 @@ static const AVCodecHWConfigInternal *co
-         .flush          = cuvid_flush, \
-         .bsfs           = bsf_name, \
-         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
-+        .caps_internal  = FF_CODEC_CAP_SETS_FRAME_PROPS, \
-         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
-                                                         AV_PIX_FMT_NV12, \
-                                                         AV_PIX_FMT_P010, \
-Index: jellyfin-ffmpeg/libavcodec/decode.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/decode.c
-+++ jellyfin-ffmpeg/libavcodec/decode.c
-@@ -233,9 +233,11 @@ int ff_decode_get_packet(AVCodecContext
-     if (ret < 0)
-         return ret;
- 
--    ret = extract_packet_props(avctx->internal, pkt);
--    if (ret < 0)
--        goto finish;
-+    if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
-+        ret = extract_packet_props(avctx->internal, pkt);
-+        if (ret < 0)
-+            goto finish;
-+    }
- 
-     ret = apply_param_change(avctx, pkt);
-     if (ret < 0)
-@@ -502,11 +504,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
- 
-         pkt->data                += consumed;
-         pkt->size                -= consumed;
--        avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment.
-         pkt->pts                  = AV_NOPTS_VALUE;
-         pkt->dts                  = AV_NOPTS_VALUE;
--        avci->last_pkt_props->pts = AV_NOPTS_VALUE;
--        avci->last_pkt_props->dts = AV_NOPTS_VALUE;
-+        if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
-+            avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment.
-+            avci->last_pkt_props->pts = AV_NOPTS_VALUE;
-+            avci->last_pkt_props->dts = AV_NOPTS_VALUE;
-+        }
-     }
- 
-     if (got_frame)
-@@ -548,6 +552,11 @@ static int decode_receive_frame_internal
-     if (ret == AVERROR_EOF)
-         avci->draining_done = 1;
- 
-+    if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS) &&
-+        IS_EMPTY(avci->last_pkt_props) && av_fifo_size(avci->pkt_props) >= sizeof(*avci->last_pkt_props))
-+        av_fifo_generic_read(avci->pkt_props,
-+                             avci->last_pkt_props, sizeof(*avci->last_pkt_props), NULL);
-+
-     if (!ret) {
-         frame->best_effort_timestamp = guess_correct_pts(avctx,
-                                                          frame->pts,
-@@ -1738,39 +1747,37 @@ int ff_decode_frame_props(AVCodecContext
-         { AV_PKT_DATA_S12M_TIMECODE,              AV_FRAME_DATA_S12M_TIMECODE },
-     };
- 
--    if (IS_EMPTY(pkt) && av_fifo_size(avctx->internal->pkt_props) >= sizeof(*pkt))
--        av_fifo_generic_read(avctx->internal->pkt_props,
--                             pkt, sizeof(*pkt), NULL);
--
--    frame->pts = pkt->pts;
-+    if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) {
-+        frame->pts = pkt->pts;
- #if FF_API_PKT_PTS
- FF_DISABLE_DEPRECATION_WARNINGS
--    frame->pkt_pts = pkt->pts;
-+        frame->pkt_pts = pkt->pts;
- FF_ENABLE_DEPRECATION_WARNINGS
- #endif
--    frame->pkt_pos      = pkt->pos;
--    frame->pkt_duration = pkt->duration;
--    frame->pkt_size     = pkt->size;
--
--    for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) {
--        buffer_size_t size;
--        uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size);
--        if (packet_sd) {
--            AVFrameSideData *frame_sd = av_frame_new_side_data(frame,
--                                                               sd[i].frame,
--                                                               size);
--            if (!frame_sd)
--                return AVERROR(ENOMEM);
-+        frame->pkt_pos      = pkt->pos;
-+        frame->pkt_duration = pkt->duration;
-+        frame->pkt_size     = pkt->size;
-+
-+        for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) {
-+            buffer_size_t size;
-+            uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size);
-+            if (packet_sd) {
-+                AVFrameSideData *frame_sd = av_frame_new_side_data(frame,
-+                                                                   sd[i].frame,
-+                                                                   size);
-+                if (!frame_sd)
-+                    return AVERROR(ENOMEM);
- 
--            memcpy(frame_sd->data, packet_sd, size);
-+                memcpy(frame_sd->data, packet_sd, size);
-+            }
-         }
--    }
--    add_metadata_from_side_data(pkt, frame);
-+        add_metadata_from_side_data(pkt, frame);
- 
--    if (pkt->flags & AV_PKT_FLAG_DISCARD) {
--        frame->flags |= AV_FRAME_FLAG_DISCARD;
--    } else {
--        frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD);
-+        if (pkt->flags & AV_PKT_FLAG_DISCARD) {
-+            frame->flags |= AV_FRAME_FLAG_DISCARD;
-+        } else {
-+            frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD);
-+        }
-     }
-     frame->reordered_opaque = avctx->reordered_opaque;
- 
-Index: jellyfin-ffmpeg/libavcodec/dpx.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/dpx.c
-+++ jellyfin-ffmpeg/libavcodec/dpx.c
-@@ -242,6 +242,9 @@ static int decode_frame(AVCodecContext *
-         return AVERROR_PATCHWELCOME;
-     }
- 
-+    if (bits_per_color > 31)
-+        return AVERROR_INVALIDDATA;
-+
-     buf += 820;
-     avctx->sample_aspect_ratio.num = read32(&buf, endian);
-     avctx->sample_aspect_ratio.den = read32(&buf, endian);
-@@ -316,7 +319,7 @@ static int decode_frame(AVCodecContext *
-             minCV = av_int2float(i);
-             maxCV = av_int2float(j);
-             if (bits_per_color >= 1 &&
--                minCV == 0.0f && maxCV == ((1<<bits_per_color) - 1)) {
-+                minCV == 0.0f && maxCV == ((1U<<bits_per_color) - 1)) {
-                 avctx->color_range = AVCOL_RANGE_JPEG;
-             } else if (bits_per_color >= 8 &&
-                        minCV == (1  <<(bits_per_color - 4)) &&
-Index: jellyfin-ffmpeg/libavcodec/exr.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/exr.c
-+++ jellyfin-ffmpeg/libavcodec/exr.c
-@@ -418,7 +418,7 @@ static int huf_decode(VLC *vlc, GetByteC
- 
-     init_get_bits(&gbit, gb->buffer, nbits);
-     while (get_bits_left(&gbit) > 0 && oe < no) {
--        uint16_t x = get_vlc2(&gbit, vlc->table, 12, 2);
-+        uint16_t x = get_vlc2(&gbit, vlc->table, 12, 3);
- 
-         if (x == run_sym) {
-             int run = get_bits(&gbit, 8);
-@@ -1059,11 +1059,11 @@ static int dwa_uncompress(EXRContext *s,
-         bytestream2_skip(&gb, ac_size);
-     }
- 
--    if (dc_size > 0) {
-+    {
-         unsigned long dest_len = dc_count * 2LL;
-         GetByteContext agb = gb;
- 
--        if (dc_count > (6LL * td->xsize * td->ysize + 63) / 64)
-+        if (dc_count != dc_w * dc_h * 3)
-             return AVERROR_INVALIDDATA;
- 
-         av_fast_padded_malloc(&td->dc_data, &td->dc_size, FFALIGN(dest_len, 64) * 2);
-@@ -1795,6 +1795,7 @@ static int decode_header(EXRContext *s,
-             ymax   = bytestream2_get_le32(gb);
- 
-             if (xmin > xmax || ymin > ymax ||
-+                ymax == INT_MAX || xmax == INT_MAX ||
-                 (unsigned)xmax - xmin >= INT_MAX ||
-                 (unsigned)ymax - ymin >= INT_MAX) {
-                 ret = AVERROR_INVALIDDATA;
-Index: jellyfin-ffmpeg/libavcodec/faxcompr.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/faxcompr.c
-+++ jellyfin-ffmpeg/libavcodec/faxcompr.c
-@@ -144,6 +144,8 @@ static int decode_uncompressed(AVCodecCo
-                 return AVERROR_INVALIDDATA;
-             }
-             cwi = 10 - av_log2(cwi);
-+            if (get_bits_left(gb) < cwi + 1)
-+                return AVERROR_INVALIDDATA;
-             skip_bits(gb, cwi + 1);
-             if (cwi > 5) {
-                 newmode = get_bits1(gb);
-@@ -209,6 +211,8 @@ static int decode_group3_1d_line(AVCodec
-     unsigned int run = 0;
-     unsigned int t;
-     for (;;) {
-+        if (get_bits_left(gb) <= 0)
-+            return AVERROR_INVALIDDATA;
-         t    = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2);
-         run += t;
-         if (t < 64) {
-@@ -227,7 +231,7 @@ static int decode_group3_1d_line(AVCodec
-             run       = 0;
-             mode      = !mode;
-         } else if ((int)t == -1) {
--            if (show_bits(gb, 12) == 15) {
-+            if (get_bits_left(gb) > 12 && show_bits(gb, 12) == 15) {
-                 int ret;
-                 skip_bits(gb, 12);
-                 ret = decode_uncompressed(avctx, gb, &pix_left, &runs, runend, &mode);
-@@ -254,7 +258,10 @@ static int decode_group3_2d_line(AVCodec
-     unsigned int offs = 0, run = 0;
- 
-     while (offs < width) {
--        int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1);
-+        int cmode;
-+        if (get_bits_left(gb) <= 0)
-+            return AVERROR_INVALIDDATA;
-+        cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1);
-         if (cmode == -1) {
-             av_log(avctx, AV_LOG_ERROR, "Incorrect mode VLC\n");
-             return AVERROR_INVALIDDATA;
-@@ -299,7 +306,10 @@ static int decode_group3_2d_line(AVCodec
-                 mode = !mode;
-             }
-         } else if (cmode == 9 || cmode == 10) {
--            int xxx = get_bits(gb, 3);
-+            int xxx;
-+            if (get_bits_left(gb) < 3)
-+                return AVERROR_INVALIDDATA;
-+            xxx = get_bits(gb, 3);
-             if (cmode == 9 && xxx == 7) {
-                 int ret;
-                 int pix_left = width - offs;
-Index: jellyfin-ffmpeg/libavcodec/h263.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/h263.c
-+++ jellyfin-ffmpeg/libavcodec/h263.c
-@@ -29,6 +29,7 @@
- 
- #include <limits.h>
- 
-+#include "libavutil/thread.h"
- #include "avcodec.h"
- #include "mpegvideo.h"
- #include "h263.h"
-@@ -38,6 +39,17 @@
- #include "flv.h"
- #include "mpeg4video.h"
- 
-+static av_cold void h263_init_rl_inter(void)
-+{
-+    static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3];
-+    ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table);
-+}
-+
-+av_cold void ff_h263_init_rl_inter(void)
-+{
-+    static AVOnce init_static_once = AV_ONCE_INIT;
-+    ff_thread_once(&init_static_once, h263_init_rl_inter);
-+}
- 
- void ff_h263_update_motion_val(MpegEncContext * s){
-     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
-Index: jellyfin-ffmpeg/libavcodec/h263.h
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/h263.h
-+++ jellyfin-ffmpeg/libavcodec/h263.h
-@@ -66,6 +66,7 @@ int16_t *ff_h263_pred_motion(MpegEncCont
-                              int *px, int *py);
- void ff_h263_encode_init(MpegEncContext *s);
- void ff_h263_decode_init_vlc(void);
-+void ff_h263_init_rl_inter(void);
- int ff_h263_decode_picture_header(MpegEncContext *s);
- int ff_h263_decode_gob_header(MpegEncContext *s);
- void ff_h263_update_motion_val(MpegEncContext * s);
-Index: jellyfin-ffmpeg/libavcodec/h263data.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/h263data.c
-+++ jellyfin-ffmpeg/libavcodec/h263data.c
-@@ -25,8 +25,6 @@
- 
- #include <stdint.h>
- 
--#include "libavutil/thread.h"
--
- #include "h263data.h"
- #include "mpegvideo.h"
- 
-@@ -290,15 +288,3 @@ const AVRational ff_h263_pixel_aspect[16
-     {  0,  1 },
-     {  0,  1 },
- };
--
--static av_cold void h263_init_rl_inter(void)
--{
--    static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3];
--    ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table);
--}
--
--av_cold void ff_h263_init_rl_inter(void)
--{
--    static AVOnce init_static_once = AV_ONCE_INIT;
--    ff_thread_once(&init_static_once, h263_init_rl_inter);
--}
-Index: jellyfin-ffmpeg/libavcodec/h263data.h
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/h263data.h
-+++ jellyfin-ffmpeg/libavcodec/h263data.h
-@@ -61,7 +61,6 @@ extern const int8_t ff_inter_run[102];
- 
- extern RLTable ff_h263_rl_inter;
- extern RLTable ff_rl_intra_aic;
--void ff_h263_init_rl_inter(void);
- 
- extern const uint16_t ff_h263_format[8][2];
- 
-Index: jellyfin-ffmpeg/libavcodec/internal.h
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/internal.h
-+++ jellyfin-ffmpeg/libavcodec/internal.h
-@@ -78,6 +78,11 @@
-  * Codec handles avctx->thread_count == 0 (auto) internally.
-  */
- #define FF_CODEC_CAP_AUTO_THREADS           (1 << 7)
-+/**
-+ * Codec handles output frame properties internally instead of letting the
-+ * internal logic derive them from AVCodecInternal.last_pkt_props.
-+ */
-+#define FF_CODEC_CAP_SETS_FRAME_PROPS       (1 << 8)
- 
- /**
-  * AVCodec.codec_tags termination value
-Index: jellyfin-ffmpeg/libavcodec/jpeglsdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/jpeglsdec.c
-+++ jellyfin-ffmpeg/libavcodec/jpeglsdec.c
-@@ -122,7 +122,7 @@ int ff_jpegls_decode_lse(MJpegDecodeCont
-             s->avctx->pix_fmt = AV_PIX_FMT_PAL8;
-             for (i=s->palette_index; i<=maxtab; i++) {
-                 uint8_t k = i << shift;
--                pal[k] = 0;
-+                pal[k] = wt < 4 ? 0xFF000000 : 0;
-                 for (j=0; j<wt; j++) {
-                     pal[k] |= get_bits(&s->gb, 8) << (8*(wt-j-1));
-                 }
-Index: jellyfin-ffmpeg/libavcodec/lpc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/lpc.c
-+++ jellyfin-ffmpeg/libavcodec/lpc.c
-@@ -189,7 +189,7 @@ double ff_lpc_calc_ref_coefs_f(LPCContex
-     compute_ref_coefs(autoc, order, ref, error);
-     for (i = 0; i < order; i++)
-         avg_err = (avg_err + error[i])/2.0f;
--    return signal/avg_err;
-+    return avg_err ? signal/avg_err : NAN;
- }
- 
- /**
-Index: jellyfin-ffmpeg/libavcodec/mpegvideo_enc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/mpegvideo_enc.c
-+++ jellyfin-ffmpeg/libavcodec/mpegvideo_enc.c
-@@ -2016,6 +2016,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
-             break;
-             default:
-                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
-+                s->stuffing_bits = 0;
-             }
-             flush_put_bits(&s->pb);
-             s->frame_bits  = put_bits_count(&s->pb);
-Index: jellyfin-ffmpeg/libavcodec/nellymoserenc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/nellymoserenc.c
-+++ jellyfin-ffmpeg/libavcodec/nellymoserenc.c
-@@ -138,10 +138,8 @@ static av_cold int encode_end(AVCodecCon
- 
-     ff_mdct_end(&s->mdct_ctx);
- 
--    if (s->avctx->trellis) {
--        av_freep(&s->opt);
--        av_freep(&s->path);
--    }
-+    av_freep(&s->opt);
-+    av_freep(&s->path);
-     ff_af_queue_close(&s->afq);
-     av_freep(&s->fdsp);
- 
-Index: jellyfin-ffmpeg/libavcodec/nvenc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/nvenc.c
-+++ jellyfin-ffmpeg/libavcodec/nvenc.c
-@@ -144,6 +144,70 @@ static int nvenc_print_error(AVCodecCont
-     return ret;
- }
- 
-+typedef struct GUIDTuple {
-+    const GUID guid;
-+    int flags;
-+} GUIDTuple;
-+
-+#define PRESET_ALIAS(alias, name, ...) \
-+    [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
-+
-+#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
-+
-+static void nvenc_map_preset(NvencContext *ctx)
-+{
-+    GUIDTuple presets[] = {
-+#ifdef NVENC_HAVE_NEW_PRESETS
-+        PRESET(P1),
-+        PRESET(P2),
-+        PRESET(P3),
-+        PRESET(P4),
-+        PRESET(P5),
-+        PRESET(P6),
-+        PRESET(P7),
-+        PRESET_ALIAS(SLOW,   P7, NVENC_TWO_PASSES),
-+        PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS),
-+        PRESET_ALIAS(FAST,   P1, NVENC_ONE_PASS),
-+        // Compat aliases
-+        PRESET_ALIAS(DEFAULT,             P4, NVENC_DEPRECATED_PRESET),
-+        PRESET_ALIAS(HP,                  P1, NVENC_DEPRECATED_PRESET),
-+        PRESET_ALIAS(HQ,                  P7, NVENC_DEPRECATED_PRESET),
-+        PRESET_ALIAS(BD,                  P5, NVENC_DEPRECATED_PRESET),
-+        PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
-+        PRESET_ALIAS(LOW_LATENCY_HP,      P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
-+        PRESET_ALIAS(LOW_LATENCY_HQ,      P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
-+        PRESET_ALIAS(LOSSLESS_DEFAULT,    P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
-+        PRESET_ALIAS(LOSSLESS_HP,         P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
-+#else
-+        PRESET(DEFAULT),
-+        PRESET(HP),
-+        PRESET(HQ),
-+        PRESET(BD),
-+        PRESET_ALIAS(SLOW,   HQ,    NVENC_TWO_PASSES),
-+        PRESET_ALIAS(MEDIUM, HQ,    NVENC_ONE_PASS),
-+        PRESET_ALIAS(FAST,   HP,    NVENC_ONE_PASS),
-+        PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
-+        PRESET(LOW_LATENCY_HP,      NVENC_LOWLATENCY),
-+        PRESET(LOW_LATENCY_HQ,      NVENC_LOWLATENCY),
-+        PRESET(LOSSLESS_DEFAULT,    NVENC_LOSSLESS),
-+        PRESET(LOSSLESS_HP,         NVENC_LOSSLESS),
-+#endif
-+    };
-+
-+    GUIDTuple *t = &presets[ctx->preset];
-+
-+    ctx->init_encode_params.presetGUID = t->guid;
-+    ctx->flags = t->flags;
-+
-+#ifdef NVENC_HAVE_NEW_PRESETS
-+    if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS)
-+        ctx->flags |= NVENC_LOSSLESS;
-+#endif
-+}
-+
-+#undef PRESET
-+#undef PRESET_ALIAS
-+
- static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
- {
- #if NVENCAPI_CHECK_VERSION(11, 1)
-@@ -358,7 +422,7 @@ static int nvenc_check_capabilities(AVCo
-     }
- 
-     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
--    if (ctx->preset >= PRESET_LOSSLESS_DEFAULT && ret <= 0) {
-+    if (ctx->flags & NVENC_LOSSLESS && ret <= 0) {
-         av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n");
-         return AVERROR(ENOSYS);
-     }
-@@ -548,6 +612,11 @@ static av_cold int nvenc_setup_device(AV
-         return AVERROR_BUG;
-     }
- 
-+    nvenc_map_preset(ctx);
-+
-+    if (ctx->flags & NVENC_DEPRECATED_PRESET)
-+        av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n");
-+
-     if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
-         AVHWFramesContext   *frames_ctx;
-         AVHWDeviceContext   *hwdev_ctx;
-@@ -638,65 +707,6 @@ static av_cold int nvenc_setup_device(AV
-     return 0;
- }
- 
--typedef struct GUIDTuple {
--    const GUID guid;
--    int flags;
--} GUIDTuple;
--
--#define PRESET_ALIAS(alias, name, ...) \
--    [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
--
--#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
--
--static void nvenc_map_preset(NvencContext *ctx)
--{
--    GUIDTuple presets[] = {
--#ifdef NVENC_HAVE_NEW_PRESETS
--        PRESET(P1),
--        PRESET(P2),
--        PRESET(P3),
--        PRESET(P4),
--        PRESET(P5),
--        PRESET(P6),
--        PRESET(P7),
--        PRESET_ALIAS(SLOW,   P7, NVENC_TWO_PASSES),
--        PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS),
--        PRESET_ALIAS(FAST,   P1, NVENC_ONE_PASS),
--        // Compat aliases
--        PRESET_ALIAS(DEFAULT,             P4, NVENC_DEPRECATED_PRESET),
--        PRESET_ALIAS(HP,                  P1, NVENC_DEPRECATED_PRESET),
--        PRESET_ALIAS(HQ,                  P7, NVENC_DEPRECATED_PRESET),
--        PRESET_ALIAS(BD,                  P5, NVENC_DEPRECATED_PRESET),
--        PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
--        PRESET_ALIAS(LOW_LATENCY_HP,      P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
--        PRESET_ALIAS(LOW_LATENCY_HQ,      P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
--        PRESET_ALIAS(LOSSLESS_DEFAULT,    P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
--        PRESET_ALIAS(LOSSLESS_HP,         P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
--#else
--        PRESET(DEFAULT),
--        PRESET(HP),
--        PRESET(HQ),
--        PRESET(BD),
--        PRESET_ALIAS(SLOW,   HQ,    NVENC_TWO_PASSES),
--        PRESET_ALIAS(MEDIUM, HQ,    NVENC_ONE_PASS),
--        PRESET_ALIAS(FAST,   HP,    NVENC_ONE_PASS),
--        PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
--        PRESET(LOW_LATENCY_HP,      NVENC_LOWLATENCY),
--        PRESET(LOW_LATENCY_HQ,      NVENC_LOWLATENCY),
--        PRESET(LOSSLESS_DEFAULT,    NVENC_LOSSLESS),
--        PRESET(LOSSLESS_HP,         NVENC_LOSSLESS),
--#endif
--    };
--
--    GUIDTuple *t = &presets[ctx->preset];
--
--    ctx->init_encode_params.presetGUID = t->guid;
--    ctx->flags = t->flags;
--}
--
--#undef PRESET
--#undef PRESET_ALIAS
--
- static av_cold void set_constqp(AVCodecContext *avctx)
- {
-     NvencContext *ctx = avctx->priv_data;
-@@ -1254,18 +1264,15 @@ static av_cold int nvenc_setup_encoder(A
- 
-     ctx->init_encode_params.encodeConfig = &ctx->encode_config;
- 
--    nvenc_map_preset(ctx);
--
--    if (ctx->flags & NVENC_DEPRECATED_PRESET)
--        av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n");
--
-     preset_config.version = NV_ENC_PRESET_CONFIG_VER;
-     preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
- 
- #ifdef NVENC_HAVE_NEW_PRESETS
-     ctx->init_encode_params.tuningInfo = ctx->tuning_info;
- 
--    if (ctx->flags & NVENC_LOWLATENCY)
-+    if (ctx->flags & NVENC_LOSSLESS)
-+        ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS;
-+    else if (ctx->flags & NVENC_LOWLATENCY)
-         ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
- 
-     nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder,
-@@ -1307,9 +1314,6 @@ static av_cold int nvenc_setup_encoder(A
-      * */
-     if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead)
-         ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth;
--
--    if (ctx->init_encode_params.tuningInfo == NV_ENC_TUNING_INFO_LOSSLESS)
--        ctx->flags |= NVENC_LOSSLESS;
- #endif
- 
-     if (ctx->weighted_pred == 1)
-Index: jellyfin-ffmpeg/libavcodec/nvenc.h
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/nvenc.h
-+++ jellyfin-ffmpeg/libavcodec/nvenc.h
-@@ -103,7 +103,7 @@ enum {
-     PRESET_LOW_LATENCY_DEFAULT ,
-     PRESET_LOW_LATENCY_HQ ,
-     PRESET_LOW_LATENCY_HP,
--    PRESET_LOSSLESS_DEFAULT, // lossless presets must be the last ones
-+    PRESET_LOSSLESS_DEFAULT,
-     PRESET_LOSSLESS_HP,
- #ifdef NVENC_HAVE_NEW_PRESETS
-     PRESET_P1,
-Index: jellyfin-ffmpeg/libavcodec/nvenc_hevc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/nvenc_hevc.c
-+++ jellyfin-ffmpeg/libavcodec/nvenc_hevc.c
-@@ -148,7 +148,7 @@ static const AVOption options[] = {
-     { "middle",       "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0,       VE, "b_ref_mode" },
- #endif
-     { "a53cc",        "Use A53 Closed Captions (if available)", OFFSET(a53_cc),   AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1,       VE },
--    { "s12m_tc",      "Use timecode (if available)",        OFFSET(s12m_tc),      AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1,       VE },
-+    { "s12m_tc",      "Use timecode (if available)",        OFFSET(s12m_tc),      AV_OPT_TYPE_BOOL,  { .i64 = 0 }, 0, 1,       VE },
-     { "dpb_size",     "Specifies the DPB size used for encoding (0 means automatic)",
-                                                             OFFSET(dpb_size),     AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE },
- #ifdef NVENC_HAVE_MULTIPASS
-Index: jellyfin-ffmpeg/libavcodec/pngdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/pngdec.c
-+++ jellyfin-ffmpeg/libavcodec/pngdec.c
-@@ -1644,7 +1644,7 @@ static int decode_frame_apng(AVCodecCont
-     if (!(avctx->active_thread_type & FF_THREAD_FRAME)) {
-         if (s->dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
-             ff_thread_release_buffer(avctx, &s->picture);
--        } else if (s->dispose_op == APNG_DISPOSE_OP_NONE) {
-+        } else {
-             ff_thread_release_buffer(avctx, &s->last_picture);
-             FFSWAP(ThreadFrame, s->picture, s->last_picture);
-         }
-@@ -1693,8 +1693,8 @@ static int update_thread_context(AVCodec
-         pdst->hdr_state |= psrc->hdr_state;
-     }
- 
--    src_frame = psrc->dispose_op == APNG_DISPOSE_OP_NONE ?
--                &psrc->picture : &psrc->last_picture;
-+    src_frame = psrc->dispose_op == APNG_DISPOSE_OP_PREVIOUS ?
-+                &psrc->last_picture : &psrc->picture;
- 
-     ff_thread_release_buffer(dst, &pdst->last_picture);
-     if (src_frame && src_frame->f->data[0]) {
-Index: jellyfin-ffmpeg/libavcodec/rv10.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/rv10.c
-+++ jellyfin-ffmpeg/libavcodec/rv10.c
-@@ -154,7 +154,7 @@ static int rv10_decode_picture_header(Mp
-     return mb_count;
- }
- 
--static int rv20_decode_picture_header(RVDecContext *rv)
-+static int rv20_decode_picture_header(RVDecContext *rv, int whole_size)
- {
-     MpegEncContext *s = &rv->m;
-     int seq, mb_pos, i, ret;
-@@ -232,6 +232,10 @@ static int rv20_decode_picture_header(RV
-                    "attempting to change resolution to %dx%d\n", new_w, new_h);
-             if (av_image_check_size(new_w, new_h, 0, s->avctx) < 0)
-                 return AVERROR_INVALIDDATA;
-+
-+            if (whole_size < (new_w + 15)/16 * ((new_h + 15)/16) / 8)
-+                return AVERROR_INVALIDDATA;
-+
-             ff_mpv_common_end(s);
- 
-             // attempt to keep aspect during typical resolution switches
-@@ -447,7 +451,7 @@ static int rv10_decode_packet(AVCodecCon
-     if (s->codec_id == AV_CODEC_ID_RV10)
-         mb_count = rv10_decode_picture_header(s);
-     else
--        mb_count = rv20_decode_picture_header(rv);
-+        mb_count = rv20_decode_picture_header(rv, whole_size);
-     if (mb_count < 0) {
-         if (mb_count != ERROR_SKIP_FRAME)
-             av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n");
-Index: jellyfin-ffmpeg/libavcodec/svq1enc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/svq1enc.c
-+++ jellyfin-ffmpeg/libavcodec/svq1enc.c
-@@ -487,9 +487,10 @@ static av_cold int svq1_encode_end(AVCod
-     SVQ1EncContext *const s = avctx->priv_data;
-     int i;
- 
--    av_log(avctx, AV_LOG_DEBUG, "RD: %f\n",
--           s->rd_total / (double)(avctx->width * avctx->height *
--                                  avctx->frame_number));
-+    if (avctx->frame_number)
-+        av_log(avctx, AV_LOG_DEBUG, "RD: %f\n",
-+               s->rd_total / (double)(avctx->width * avctx->height *
-+                                      avctx->frame_number));
- 
-     s->m.mb_type = NULL;
-     ff_mpv_common_end(&s->m);
-Index: jellyfin-ffmpeg/libavcodec/ttadata.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/ttadata.c
-+++ jellyfin-ffmpeg/libavcodec/ttadata.c
-@@ -30,7 +30,8 @@ const uint32_t ff_tta_shift_1[] = {
-     0x01000000, 0x02000000, 0x04000000, 0x08000000,
-     0x10000000, 0x20000000, 0x40000000, 0x80000000,
-     0x80000000, 0x80000000, 0x80000000, 0x80000000,
--    0x80000000, 0x80000000, 0x80000000, 0x80000000
-+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
-+    0xFFFFFFFF
- };
- 
- const uint32_t * const ff_tta_shift_16 = ff_tta_shift_1 + 4;
-Index: jellyfin-ffmpeg/libavcodec/ttmlenc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/ttmlenc.c
-+++ jellyfin-ffmpeg/libavcodec/ttmlenc.c
-@@ -206,5 +206,5 @@ AVCodec ff_ttml_encoder = {
-     .init           = ttml_encode_init,
-     .encode_sub     = ttml_encode_frame,
-     .close          = ttml_encode_close,
--    .capabilities   = FF_CODEC_CAP_INIT_CLEANUP,
-+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
- };
-Index: jellyfin-ffmpeg/libavcodec/utils.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/utils.c
-+++ jellyfin-ffmpeg/libavcodec/utils.c
-@@ -272,6 +272,16 @@ void avcodec_align_dimensions2(AVCodecCo
-             w_align = 8;
-             h_align = 8;
-         }
-+        if (s->codec_id == AV_CODEC_ID_MJPEG   ||
-+            s->codec_id == AV_CODEC_ID_MJPEGB  ||
-+            s->codec_id == AV_CODEC_ID_LJPEG   ||
-+            s->codec_id == AV_CODEC_ID_SMVJPEG ||
-+            s->codec_id == AV_CODEC_ID_AMV     ||
-+            s->codec_id == AV_CODEC_ID_SP5X    ||
-+            s->codec_id == AV_CODEC_ID_JPEGLS) {
-+            w_align =   8;
-+            h_align = 2*8;
-+        }
-         break;
-     case AV_PIX_FMT_BGR24:
-         if ((s->codec_id == AV_CODEC_ID_MSZH) ||
-@@ -773,21 +783,33 @@ static int get_audio_frame_duration(enum
-             if (ba > 0) {
-                 /* calc from frame_bytes, channels, and block_align */
-                 int blocks = frame_bytes / ba;
-+                int64_t tmp = 0;
-                 switch (id) {
-                 case AV_CODEC_ID_ADPCM_IMA_WAV:
-                     if (bps < 2 || bps > 5)
-                         return 0;
--                    return blocks * (1 + (ba - 4 * ch) / (bps * ch) * 8);
-+                    tmp = blocks * (1LL + (ba - 4 * ch) / (bps * ch) * 8);
-+                    break;
-                 case AV_CODEC_ID_ADPCM_IMA_DK3:
--                    return blocks * (((ba - 16) * 2 / 3 * 4) / ch);
-+                    tmp = blocks * (((ba - 16LL) * 2 / 3 * 4) / ch);
-+                    break;
-                 case AV_CODEC_ID_ADPCM_IMA_DK4:
--                    return blocks * (1 + (ba - 4 * ch) * 2 / ch);
-+                    tmp = blocks * (1 + (ba - 4LL * ch) * 2 / ch);
-+                    break;
-                 case AV_CODEC_ID_ADPCM_IMA_RAD:
--                    return blocks * ((ba - 4 * ch) * 2 / ch);
-+                    tmp = blocks * ((ba - 4LL * ch) * 2 / ch);
-+                    break;
-                 case AV_CODEC_ID_ADPCM_MS:
--                    return blocks * (2 + (ba - 7 * ch) * 2LL / ch);
-+                    tmp = blocks * (2 + (ba - 7LL * ch) * 2LL / ch);
-+                    break;
-                 case AV_CODEC_ID_ADPCM_MTAF:
--                    return blocks * (ba - 16) * 2 / ch;
-+                    tmp = blocks * (ba - 16LL) * 2 / ch;
-+                    break;
-+                }
-+                if (tmp) {
-+                    if (tmp != (int)tmp)
-+                        return 0;
-+                    return tmp;
-                 }
-             }
- 
-Index: jellyfin-ffmpeg/libavcodec/vaapi_av1.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/vaapi_av1.c
-+++ jellyfin-ffmpeg/libavcodec/vaapi_av1.c
-@@ -292,7 +292,7 @@ static int vaapi_av1_decode_slice(AVCode
-         err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param,
-                                                 sizeof(VASliceParameterBufferAV1),
-                                                 buffer,
--                                                s->tile_group_info[i].tile_size);
-+                                                size);
-         if (err) {
-             ff_vaapi_decode_cancel(avctx, pic);
-             return err;
-Index: jellyfin-ffmpeg/libavcodec/vc1.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/vc1.c
-+++ jellyfin-ffmpeg/libavcodec/vc1.c
-@@ -672,6 +672,8 @@ int ff_vc1_parse_frame_header(VC1Context
-     if (v->s.pict_type == AV_PICTURE_TYPE_P)
-         v->rnd ^= 1;
- 
-+    if (get_bits_left(gb) < 5)
-+        return AVERROR_INVALIDDATA;
-     /* Quantizer stuff */
-     pqindex = get_bits(gb, 5);
-     if (!pqindex)
-@@ -764,6 +766,9 @@ int ff_vc1_parse_frame_header(VC1Context
-         av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
-                "Imode: %i, Invert: %i\n", status>>1, status&1);
- 
-+        if (get_bits_left(gb) < 4)
-+            return AVERROR_INVALIDDATA;
-+
-         /* Hopefully this is correct for P-frames */
-         v->s.mv_table_index = get_bits(gb, 2); //but using ff_vc1_ tables
-         v->cbptab = get_bits(gb, 2);
-Index: jellyfin-ffmpeg/libavcodec/vc2enc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavcodec/vc2enc.c
-+++ jellyfin-ffmpeg/libavcodec/vc2enc.c
-@@ -982,6 +982,8 @@ static av_cold int vc2_encode_frame(AVCo
-     }
- 
-     s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f);
-+    if (s->slice_min_bytes < 0)
-+        return AVERROR(EINVAL);
- 
-     ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced);
-     if (ret)
-Index: jellyfin-ffmpeg/libavfilter/f_metadata.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/f_metadata.c
-+++ jellyfin-ffmpeg/libavfilter/f_metadata.c
-@@ -304,9 +304,6 @@ static int filter_frame(AVFilterLink *in
-     AVDictionary **metadata = &frame->metadata;
-     AVDictionaryEntry *e;
- 
--    if (!*metadata && s->mode != METADATA_ADD)
--        return ff_filter_frame(outlink, frame);
--
-     e = av_dict_get(*metadata, !s->key ? "" : s->key, NULL,
-                     !s->key ? AV_DICT_IGNORE_SUFFIX: 0);
- 
-Index: jellyfin-ffmpeg/libavfilter/vf_dctdnoiz.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_dctdnoiz.c
-+++ jellyfin-ffmpeg/libavfilter/vf_dctdnoiz.c
-@@ -564,6 +564,9 @@ static int config_input(AVFilterLink *in
-                inlink->h - s->pr_height);
- 
-     max_slice_h = s->pr_height / ((s->bsize - 1) * 2);
-+    if (max_slice_h == 0)
-+        return AVERROR(EINVAL);
-+
-     s->nb_threads = FFMIN3(MAX_THREADS, ff_filter_get_nb_threads(ctx), max_slice_h);
-     av_log(ctx, AV_LOG_DEBUG, "threads: [max=%d hmax=%d user=%d] => %d\n",
-            MAX_THREADS, max_slice_h, ff_filter_get_nb_threads(ctx), s->nb_threads);
-Index: jellyfin-ffmpeg/libavfilter/vf_overlay_cuda.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_cuda.c
-+++ jellyfin-ffmpeg/libavfilter/vf_overlay_cuda.c
-@@ -63,6 +63,7 @@ typedef struct OverlayCUDAContext {
-     enum AVPixelFormat in_format_overlay;
-     enum AVPixelFormat in_format_main;
- 
-+    AVBufferRef *hw_device_ctx;
-     AVCUDADeviceContext *hwctx;
- 
-     CUcontext cu_ctx;
-@@ -256,6 +257,9 @@ static av_cold void overlay_cuda_uninit(
-         CHECK_CU(cu->cuModuleUnload(ctx->cu_module));
-         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
-     }
-+
-+    av_buffer_unref(&ctx->hw_device_ctx);
-+    ctx->hwctx = NULL;
- }
- 
- /**
-@@ -341,13 +345,19 @@ static int overlay_cuda_config_output(AV
- 
-     // initialize
- 
--    ctx->hwctx = frames_ctx->device_ctx->hwctx;
-+    ctx->hw_device_ctx = av_buffer_ref(frames_ctx->device_ref);
-+    if (!ctx->hw_device_ctx)
-+        return AVERROR(ENOMEM);
-+    ctx->hwctx = ((AVHWDeviceContext*)ctx->hw_device_ctx->data)->hwctx;
-+
-     cuda_ctx = ctx->hwctx->cuda_ctx;
-     ctx->fs.time_base = inlink->time_base;
- 
-     ctx->cu_stream = ctx->hwctx->stream;
- 
-     outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
-+    if (!outlink->hw_frames_ctx)
-+        return AVERROR(ENOMEM);
- 
-     // load functions
- 
-Index: jellyfin-ffmpeg/libavfilter/vf_vmafmotion.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_vmafmotion.c
-+++ jellyfin-ffmpeg/libavfilter/vf_vmafmotion.c
-@@ -238,6 +238,9 @@ int ff_vmafmotion_init(VMAFMotionData *s
-     int i;
-     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
- 
-+    if (w < 3 || h < 3)
-+        return AVERROR(EINVAL);
-+
-     s->width = w;
-     s->height = h;
-     s->stride = FFALIGN(w * sizeof(uint16_t), 32);
-Index: jellyfin-ffmpeg/libavfilter/vf_yadif.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavfilter/vf_yadif.c
-+++ jellyfin-ffmpeg/libavfilter/vf_yadif.c
-@@ -123,20 +123,22 @@ static void filter_edges(void *dst1, voi
-     uint8_t *next2 = parity ? cur  : next;
- 
-     const int edge = MAX_ALIGN - 1;
-+    int offset = FFMAX(w - edge, 3);
- 
-     /* Only edge pixels need to be processed here.  A constant value of false
-      * for is_not_edge should let the compiler ignore the whole branch. */
--    FILTER(0, 3, 0)
-+    FILTER(0, FFMIN(3, w), 0)
- 
--    dst  = (uint8_t*)dst1  + w - edge;
--    prev = (uint8_t*)prev1 + w - edge;
--    cur  = (uint8_t*)cur1  + w - edge;
--    next = (uint8_t*)next1 + w - edge;
-+    dst  = (uint8_t*)dst1  + offset;
-+    prev = (uint8_t*)prev1 + offset;
-+    cur  = (uint8_t*)cur1  + offset;
-+    next = (uint8_t*)next1 + offset;
-     prev2 = (uint8_t*)(parity ? prev : cur);
-     next2 = (uint8_t*)(parity ? cur  : next);
- 
--    FILTER(w - edge, w - 3, 1)
--    FILTER(w - 3, w, 0)
-+    FILTER(offset, w - 3, 1)
-+    offset = FFMAX(offset, w - 3);
-+    FILTER(offset, w, 0)
- }
- 
- 
-@@ -170,21 +172,23 @@ static void filter_edges_16bit(void *dst
-     uint16_t *next2 = parity ? cur  : next;
- 
-     const int edge = MAX_ALIGN / 2 - 1;
-+    int offset = FFMAX(w - edge, 3);
- 
-     mrefs /= 2;
-     prefs /= 2;
- 
--    FILTER(0, 3, 0)
-+    FILTER(0,  FFMIN(3, w), 0)
- 
--    dst   = (uint16_t*)dst1  + w - edge;
--    prev  = (uint16_t*)prev1 + w - edge;
--    cur   = (uint16_t*)cur1  + w - edge;
--    next  = (uint16_t*)next1 + w - edge;
-+    dst   = (uint16_t*)dst1  + offset;
-+    prev  = (uint16_t*)prev1 + offset;
-+    cur   = (uint16_t*)cur1  + offset;
-+    next  = (uint16_t*)next1 + offset;
-     prev2 = (uint16_t*)(parity ? prev : cur);
-     next2 = (uint16_t*)(parity ? cur  : next);
- 
--    FILTER(w - edge, w - 3, 1)
--    FILTER(w - 3, w, 0)
-+    FILTER(offset, w - 3, 1)
-+    offset = FFMAX(offset, w - 3);
-+    FILTER(offset, w, 0)
- }
- 
- static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-Index: jellyfin-ffmpeg/libavformat/asfdec_o.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/asfdec_o.c
-+++ jellyfin-ffmpeg/libavformat/asfdec_o.c
-@@ -685,7 +685,7 @@ static int asf_read_properties(AVFormatC
-     return 0;
- }
- 
--static int parse_video_info(AVIOContext *pb, AVStream *st)
-+static int parse_video_info(AVFormatContext *avfmt, AVIOContext *pb, AVStream *st)
- {
-     uint16_t size_asf; // ASF-specific Format Data size
-     uint32_t size_bmp; // BMP_HEADER-specific Format Data size
-@@ -700,19 +700,10 @@ static int parse_video_info(AVIOContext
-     st->codecpar->codec_id  = ff_codec_get_id(ff_codec_bmp_tags, tag);
-     size_bmp = FFMAX(size_asf, size_bmp);
- 
--    if (size_bmp > BMP_HEADER_SIZE &&
--        size_bmp < INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
--        int ret;
--        st->codecpar->extradata_size  = size_bmp - BMP_HEADER_SIZE;
--        if (!(st->codecpar->extradata = av_malloc(st->codecpar->extradata_size +
--                                               AV_INPUT_BUFFER_PADDING_SIZE))) {
--            st->codecpar->extradata_size = 0;
--            return AVERROR(ENOMEM);
--        }
--        memset(st->codecpar->extradata + st->codecpar->extradata_size , 0,
--               AV_INPUT_BUFFER_PADDING_SIZE);
--        if ((ret = avio_read(pb, st->codecpar->extradata,
--                             st->codecpar->extradata_size)) < 0)
-+    if (size_bmp > BMP_HEADER_SIZE) {
-+        int ret = ff_get_extradata(avfmt, st->codecpar, pb, size_bmp - BMP_HEADER_SIZE);
-+
-+        if (ret < 0)
-             return ret;
-     }
-     return 0;
-@@ -795,7 +786,7 @@ static int asf_read_stream_properties(AV
-         break;
-     case AVMEDIA_TYPE_VIDEO:
-         asf_st->type = AVMEDIA_TYPE_VIDEO;
--        if ((ret = parse_video_info(pb, st)) < 0)
-+        if ((ret = parse_video_info(s, pb, st)) < 0)
-             return ret;
-         break;
-     default:
-Index: jellyfin-ffmpeg/libavformat/avio.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/avio.c
-+++ jellyfin-ffmpeg/libavformat/avio.c
-@@ -316,8 +316,11 @@ int ffurl_open_whitelist(URLContext **pu
-     int ret = ffurl_alloc(puc, filename, flags, int_cb);
-     if (ret < 0)
-         return ret;
--    if (parent)
--        av_opt_copy(*puc, parent);
-+    if (parent) {
-+        ret = av_opt_copy(*puc, parent);
-+        if (ret < 0)
-+            goto fail;
-+    }
-     if (options &&
-         (ret = av_opt_set_dict(*puc, options)) < 0)
-         goto fail;
-Index: jellyfin-ffmpeg/libavformat/cafdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/cafdec.c
-+++ jellyfin-ffmpeg/libavformat/cafdec.c
-@@ -79,7 +79,7 @@ static int read_desc_chunk(AVFormatConte
-     st->codecpar->channels    = avio_rb32(pb);
-     st->codecpar->bits_per_coded_sample = avio_rb32(pb);
- 
--    if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0)
-+    if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0 || st->codecpar->channels < 0)
-         return AVERROR_INVALIDDATA;
- 
-     /* calculate bit rate for constant size packets */
-Index: jellyfin-ffmpeg/libavformat/fifo.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/fifo.c
-+++ jellyfin-ffmpeg/libavformat/fifo.c
-@@ -593,7 +593,7 @@ static int fifo_write_packet(AVFormatCon
-         goto fail;
-     }
- 
--    if (fifo->timeshift && pkt->dts != AV_NOPTS_VALUE)
-+    if (fifo->timeshift && pkt && pkt->dts != AV_NOPTS_VALUE)
-         atomic_fetch_add_explicit(&fifo->queue_duration, next_duration(avf, pkt, &fifo->last_sent_dts), memory_order_relaxed);
- 
-     return ret;
-Index: jellyfin-ffmpeg/libavformat/id3v2.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/id3v2.c
-+++ jellyfin-ffmpeg/libavformat/id3v2.c
-@@ -816,7 +816,7 @@ static void id3v2_parse(AVIOContext *pb,
-     int isv34, unsync;
-     unsigned tlen;
-     char tag[5];
--    int64_t next, end = avio_tell(pb) + len;
-+    int64_t next, end = avio_tell(pb);
-     int taghdrlen;
-     const char *reason = NULL;
-     AVIOContext pb_local;
-@@ -828,6 +828,10 @@ static void id3v2_parse(AVIOContext *pb,
-     av_unused int uncompressed_buffer_size = 0;
-     const char *comm_frame;
- 
-+    if (end > INT64_MAX - len - 10)
-+        return;
-+    end += len;
-+
-     av_log(s, AV_LOG_DEBUG, "id3v2 ver:%d flags:%02X len:%d\n", version, flags, len);
- 
-     switch (version) {
-Index: jellyfin-ffmpeg/libavformat/matroskaenc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/matroskaenc.c
-+++ jellyfin-ffmpeg/libavformat/matroskaenc.c
-@@ -1768,6 +1768,7 @@ static int mkv_write_attachments(AVForma
-             put_ebml_string(dyn_cp, MATROSKA_ID_FILEDESC, t->value);
-         if (!(t = av_dict_get(st->metadata, "filename", NULL, 0))) {
-             av_log(s, AV_LOG_ERROR, "Attachment stream %d has no filename tag.\n", i);
-+            ffio_free_dyn_buf(&dyn_cp);
-             return AVERROR(EINVAL);
-         }
-         put_ebml_string(dyn_cp, MATROSKA_ID_FILENAME, t->value);
-Index: jellyfin-ffmpeg/libavformat/moflex.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/moflex.c
-+++ jellyfin-ffmpeg/libavformat/moflex.c
-@@ -172,7 +172,6 @@ static int moflex_read_sync(AVFormatCont
-         unsigned type, ssize, codec_id = 0;
-         unsigned codec_type, width = 0, height = 0, sample_rate = 0, channels = 0;
-         int stream_index = -1;
--        int format;
-         AVRational fps;
- 
-         read_var_byte(s, &type);
-@@ -213,7 +212,6 @@ static int moflex_read_sync(AVFormatCont
-             fps.den = avio_rb16(pb);
-             width = avio_rb16(pb);
-             height = avio_rb16(pb);
--            format = AV_PIX_FMT_YUV420P;
-             avio_skip(pb, type == 3 ? 3 : 2);
-             break;
-         case 4:
-@@ -235,7 +233,6 @@ static int moflex_read_sync(AVFormatCont
-             st->codecpar->height     = height;
-             st->codecpar->sample_rate= sample_rate;
-             st->codecpar->channels   = channels;
--            st->codecpar->format     = format;
-             st->priv_data            = av_packet_alloc();
-             if (!st->priv_data)
-                 return AVERROR(ENOMEM);
-Index: jellyfin-ffmpeg/libavformat/mov.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/mov.c
-+++ jellyfin-ffmpeg/libavformat/mov.c
-@@ -4700,6 +4700,8 @@ static int mov_read_chap(MOVContext *c,
-     for (i = 0; i < num && !pb->eof_reached; i++)
-         c->chapter_tracks[i] = avio_rb32(pb);
- 
-+    c->nb_chapter_tracks = i;
-+
-     return 0;
- }
- 
-@@ -5124,7 +5126,9 @@ static int mov_read_sidx(MOVContext *c,
-         if (frag_stream_info)
-             frag_stream_info->sidx_pts = timestamp;
- 
--        if (av_sat_add64(offset, size) != offset + size)
-+        if (av_sat_add64(offset, size) != offset + size ||
-+            av_sat_add64(pts, duration) != pts + (uint64_t)duration
-+        )
-             return AVERROR_INVALIDDATA;
-         offset += size;
-         pts += duration;
-@@ -5464,7 +5468,7 @@ static int mov_read_mdcv(MOVContext *c,
- 
-     sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
- 
--    if (atom.size < 24) {
-+    if (atom.size < 24 || sc->mastering) {
-         av_log(c->fc, AV_LOG_ERROR, "Invalid Mastering Display Color Volume box\n");
-         return AVERROR_INVALIDDATA;
-     }
-@@ -5512,6 +5516,11 @@ static int mov_read_coll(MOVContext *c,
-     }
-     avio_skip(pb, 3); /* flags */
- 
-+    if (sc->coll){
-+        av_log(c->fc, AV_LOG_WARNING, "Ignoring duplicate COLL\n");
-+        return 0;
-+    }
-+
-     sc->coll = av_content_light_metadata_alloc(&sc->coll_size);
-     if (!sc->coll)
-         return AVERROR(ENOMEM);
-Index: jellyfin-ffmpeg/libavformat/movenc.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/movenc.c
-+++ jellyfin-ffmpeg/libavformat/movenc.c
-@@ -797,6 +797,7 @@ static int mov_write_dfla_tag(AVIOContex
- static int mov_write_dops_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
- {
-     int64_t pos = avio_tell(pb);
-+    int channels, channel_map;
-     avio_wb32(pb, 0);
-     ffio_wfourcc(pb, "dOps");
-     avio_w8(pb, 0); /* Version */
-@@ -807,12 +808,22 @@ static int mov_write_dops_tag(AVFormatCo
-     /* extradata contains an Ogg OpusHead, other than byte-ordering and
-        OpusHead's preceeding magic/version, OpusSpecificBox is currently
-        identical. */
--    avio_w8(pb, AV_RB8(track->par->extradata + 9)); /* OuputChannelCount */
-+    channels = AV_RB8(track->par->extradata + 9);
-+    channel_map = AV_RB8(track->par->extradata + 18);
-+
-+    avio_w8(pb, channels); /* OuputChannelCount */
-     avio_wb16(pb, AV_RL16(track->par->extradata + 10)); /* PreSkip */
-     avio_wb32(pb, AV_RL32(track->par->extradata + 12)); /* InputSampleRate */
-     avio_wb16(pb, AV_RL16(track->par->extradata + 16)); /* OutputGain */
-+    avio_w8(pb, channel_map); /* ChannelMappingFamily */
-     /* Write the rest of the header out without byte-swapping. */
--    avio_write(pb, track->par->extradata + 18, track->par->extradata_size - 18);
-+    if (channel_map) {
-+        if (track->par->extradata_size < 21 + channels) {
-+            av_log(s, AV_LOG_ERROR, "invalid extradata size\n");
-+            return AVERROR_INVALIDDATA;
-+        }
-+        avio_write(pb, track->par->extradata + 19, 2 + channels); /* ChannelMappingTable */
-+    }
- 
-     return update_size(pb, pos);
- }
-@@ -2166,11 +2177,13 @@ static int mov_write_video_tag(AVFormatC
-         avio_wb16(pb, 0x18); /* Reserved */
- 
-     if (track->mode == MODE_MOV && track->par->format == AV_PIX_FMT_PAL8) {
--        int pal_size = 1 << track->par->bits_per_coded_sample;
--        int i;
-+        int pal_size, i;
-         avio_wb16(pb, 0);             /* Color table ID */
-         avio_wb32(pb, 0);             /* Color table seed */
-         avio_wb16(pb, 0x8000);        /* Color table flags */
-+        if (track->par->bits_per_coded_sample < 0 || track->par->bits_per_coded_sample > 8)
-+            return AVERROR(EINVAL);
-+        pal_size = 1 << track->par->bits_per_coded_sample;
-         avio_wb16(pb, pal_size - 1);  /* Color table size (zero-relative) */
-         for (i = 0; i < pal_size; i++) {
-             uint32_t rgb = track->palette[i];
-Index: jellyfin-ffmpeg/libavformat/mpegts.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/mpegts.c
-+++ jellyfin-ffmpeg/libavformat/mpegts.c
-@@ -2026,6 +2026,7 @@ int ff_parse_mpeg2_descriptor(AVFormatCo
-                     return AVERROR_INVALIDDATA;
-                 if (channel_config_code <= 0x8) {
-                     st->codecpar->extradata[9]  = channels = channel_config_code ? channel_config_code : 2;
-+                    AV_WL32(&st->codecpar->extradata[12], 48000);
-                     st->codecpar->extradata[18] = channel_config_code ? (channels > 2) : /* Dual Mono */ 255;
-                     st->codecpar->extradata[19] = opus_stream_cnt[channel_config_code];
-                     st->codecpar->extradata[20] = opus_coupled_stream_cnt[channel_config_code];
-Index: jellyfin-ffmpeg/libavformat/mvdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/mvdec.c
-+++ jellyfin-ffmpeg/libavformat/mvdec.c
-@@ -157,6 +157,8 @@ static int parse_audio_var(AVFormatConte
-         return set_channels(avctx, st, var_read_int(pb, size));
-     } else if (!strcmp(name, "SAMPLE_RATE")) {
-         st->codecpar->sample_rate = var_read_int(pb, size);
-+        if (st->codecpar->sample_rate <= 0)
-+            return AVERROR_INVALIDDATA;
-         avpriv_set_pts_info(st, 33, 1, st->codecpar->sample_rate);
-     } else if (!strcmp(name, "SAMPLE_WIDTH")) {
-         uint64_t bpc = var_read_int(pb, size) * (uint64_t)8;
-Index: jellyfin-ffmpeg/libavformat/mvi.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/mvi.c
-+++ jellyfin-ffmpeg/libavformat/mvi.c
-@@ -32,7 +32,6 @@
- 
- typedef struct MviDemuxContext {
-     unsigned int (*get_int)(AVIOContext *);
--    uint32_t audio_data_size;
-     uint64_t audio_size_counter;
-     uint64_t audio_frame_size;
-     int audio_size_left;
-@@ -46,6 +45,7 @@ static int read_header(AVFormatContext *
-     AVStream *ast, *vst;
-     unsigned int version, frames_count, msecs_per_frame, player_version;
-     int ret;
-+    int audio_data_size;
- 
-     ast = avformat_new_stream(s, NULL);
-     if (!ast)
-@@ -67,13 +67,13 @@ static int read_header(AVFormatContext *
-     vst->codecpar->height       = avio_rl16(pb);
-     avio_r8(pb);
-     ast->codecpar->sample_rate  = avio_rl16(pb);
--    mvi->audio_data_size     = avio_rl32(pb);
-+    audio_data_size             = avio_rl32(pb);
-     avio_r8(pb);
-     player_version           = avio_rl32(pb);
-     avio_rl16(pb);
-     avio_r8(pb);
- 
--    if (frames_count == 0 || mvi->audio_data_size == 0)
-+    if (frames_count == 0 || audio_data_size <= 0)
-         return AVERROR_INVALIDDATA;
- 
-     if (version != 7 || player_version > 213) {
-@@ -96,16 +96,16 @@ static int read_header(AVFormatContext *
- 
-     mvi->get_int = (vst->codecpar->width * (int64_t)vst->codecpar->height < (1 << 16)) ? avio_rl16 : avio_rl24;
- 
--    mvi->audio_frame_size   = ((uint64_t)mvi->audio_data_size << MVI_FRAC_BITS) / frames_count;
-+    mvi->audio_frame_size   = ((uint64_t)audio_data_size << MVI_FRAC_BITS) / frames_count;
-     if (mvi->audio_frame_size <= 1 << MVI_FRAC_BITS - 1) {
-         av_log(s, AV_LOG_ERROR,
--               "Invalid audio_data_size (%"PRIu32") or frames_count (%u)\n",
--               mvi->audio_data_size, frames_count);
-+               "Invalid audio_data_size (%d) or frames_count (%u)\n",
-+               audio_data_size, frames_count);
-         return AVERROR_INVALIDDATA;
-     }
- 
-     mvi->audio_size_counter = (ast->codecpar->sample_rate * 830 / mvi->audio_frame_size - 1) * mvi->audio_frame_size;
--    mvi->audio_size_left    = mvi->audio_data_size;
-+    mvi->audio_size_left    = audio_data_size;
- 
-     return 0;
- }
-Index: jellyfin-ffmpeg/libavformat/mxfdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/mxfdec.c
-+++ jellyfin-ffmpeg/libavformat/mxfdec.c
-@@ -2903,7 +2903,7 @@ static int mxf_read_local_tags(MXFContex
-         meta = NULL;
-         ctx  = mxf;
-     }
--    while (avio_tell(pb) + 4 < klv_end && !avio_feof(pb)) {
-+    while (avio_tell(pb) + 4ULL < klv_end && !avio_feof(pb)) {
-         int ret;
-         int tag = avio_rb16(pb);
-         int size = avio_rb16(pb); /* KLV specified by 0x53 */
-Index: jellyfin-ffmpeg/libavformat/rpl.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/rpl.c
-+++ jellyfin-ffmpeg/libavformat/rpl.c
-@@ -103,7 +103,7 @@ static AVRational read_fps(const char* l
-         // Truncate any numerator too large to fit into an int64_t
-         if (num > (INT64_MAX - 9) / 10 || den > INT64_MAX / 10)
-             break;
--        num  = 10 * num + *line - '0';
-+        num  = 10 * num + (*line - '0');
-         den *= 10;
-     }
-     if (!num)
-@@ -207,8 +207,10 @@ static int rpl_read_header(AVFormatConte
-             ast->codecpar->bits_per_coded_sample = 4;
- 
-         ast->codecpar->bit_rate = ast->codecpar->sample_rate *
--                                  ast->codecpar->bits_per_coded_sample *
--                                  ast->codecpar->channels;
-+                                  (int64_t)ast->codecpar->channels;
-+        if (ast->codecpar->bit_rate > INT64_MAX / ast->codecpar->bits_per_coded_sample)
-+            return AVERROR_INVALIDDATA;
-+        ast->codecpar->bit_rate *= ast->codecpar->bits_per_coded_sample;
- 
-         ast->codecpar->codec_id = AV_CODEC_ID_NONE;
-         switch (audio_format) {
-@@ -334,7 +336,7 @@ static int rpl_read_packet(AVFormatConte
- 
-         avio_skip(pb, 4); /* flags */
-         frame_size = avio_rl32(pb);
--        if (avio_seek(pb, -8, SEEK_CUR) < 0)
-+        if (avio_feof(pb) || avio_seek(pb, -8, SEEK_CUR) < 0 || !frame_size)
-             return AVERROR(EIO);
- 
-         ret = av_get_packet(pb, pkt, frame_size);
-Index: jellyfin-ffmpeg/libavformat/utils.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/utils.c
-+++ jellyfin-ffmpeg/libavformat/utils.c
-@@ -1208,7 +1208,9 @@ static void update_initial_durations(AVF
-             (pktl->pkt.dts == AV_NOPTS_VALUE ||
-              pktl->pkt.dts == st->first_dts ||
-              pktl->pkt.dts == RELATIVE_TS_BASE) &&
--            !pktl->pkt.duration) {
-+            !pktl->pkt.duration &&
-+            av_sat_add64(cur_dts, duration) == cur_dts + (uint64_t)duration
-+        ) {
-             pktl->pkt.dts = cur_dts;
-             if (!st->internal->avctx->has_b_frames)
-                 pktl->pkt.pts = cur_dts;
-@@ -3913,7 +3915,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
-                 break;
-             }
-             if (pkt->duration) {
--                if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time) {
-+                if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time
-+                    && (uint64_t)pkt->pts - st->start_time < INT64_MAX
-+                ) {
-                     st->internal->info->codec_info_duration = FFMIN(pkt->pts - st->start_time, st->internal->info->codec_info_duration + pkt->duration);
-                 } else
-                     st->internal->info->codec_info_duration += pkt->duration;
-@@ -4059,7 +4063,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
- 
-             if (!st->r_frame_rate.num) {
-                 if (    avctx->time_base.den * (int64_t) st->time_base.num
--                    <= avctx->time_base.num * avctx->ticks_per_frame * (uint64_t) st->time_base.den) {
-+                    <= avctx->time_base.num * (uint64_t)avctx->ticks_per_frame * st->time_base.den) {
-                     av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den,
-                               avctx->time_base.den, (int64_t)avctx->time_base.num * avctx->ticks_per_frame, INT_MAX);
-                 } else {
-Index: jellyfin-ffmpeg/libavformat/wtvdec.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavformat/wtvdec.c
-+++ jellyfin-ffmpeg/libavformat/wtvdec.c
-@@ -817,7 +817,7 @@ static int parse_chunks(AVFormatContext
-                 avio_skip(pb, 12);
-                 ff_get_guid(pb, &formattype);
-                 size = avio_rl32(pb);
--                if (size < 0 || size > INT_MAX - 92)
-+                if (size < 0 || size > INT_MAX - 92 - consumed)
-                     return AVERROR_INVALIDDATA;
-                 parse_media_type(s, 0, sid, mediatype, subtype, formattype, size);
-                 consumed += 92 + size;
-@@ -833,7 +833,7 @@ static int parse_chunks(AVFormatContext
-                 avio_skip(pb, 12);
-                 ff_get_guid(pb, &formattype);
-                 size = avio_rl32(pb);
--                if (size < 0 || size > INT_MAX - 76)
-+                if (size < 0 || size > INT_MAX - 76 - consumed)
-                     return AVERROR_INVALIDDATA;
-                 parse_media_type(s, s->streams[stream_index], sid, mediatype, subtype, formattype, size);
-                 consumed += 76 + size;
-Index: jellyfin-ffmpeg/libavutil/cpu.c
-===================================================================
---- jellyfin-ffmpeg.orig/libavutil/cpu.c
-+++ jellyfin-ffmpeg/libavutil/cpu.c
-@@ -291,6 +291,12 @@ int av_cpu_count(void)
-     DWORD_PTR proc_aff, sys_aff;
-     if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff))
-         nb_cpus = av_popcount64(proc_aff);
-+#elif HAVE_SYSCTL && defined(HW_NCPUONLINE)
-+    int mib[2] = { CTL_HW, HW_NCPUONLINE };
-+    size_t len = sizeof(nb_cpus);
-+
-+    if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1)
-+        nb_cpus = 0;
- #elif HAVE_SYSCTL && defined(HW_NCPU)
-     int mib[2] = { CTL_HW, HW_NCPU };
-     size_t len = sizeof(nb_cpus);
-Index: jellyfin-ffmpeg/tests/ref/fate/ts-opus-demux
-===================================================================
---- jellyfin-ffmpeg.orig/tests/ref/fate/ts-opus-demux
-+++ jellyfin-ffmpeg/tests/ref/fate/ts-opus-demux
-@@ -1,4 +1,4 @@
--#extradata 0:       30, 0x53be0347
-+#extradata 0:       30, 0x69290482
- #tb 0: 1/90000
- #media_type 0: audio
- #codec_id 0: opus
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index 40831a18763..00000000000
--- a/debian/patches/series
+++ /dev/null
@@ -1,9 +0,0 @@
-0001_fix-segment-muxer.patch
-0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch
-0003-fix-for-the-broken-tonemap_vaapi-filter.patch
-0004-cuda-format-converter-impl.patch
-0005-cuda-tonemap-impl.patch
-0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch
-0007-fix-for-fmp4-in-hlsenc.patch
-0008-fix-nvdec-exceeded-32-surfaces-error.patch
-0009-fix-for-nvenc-from-upstream.patch

From 0517e76837d897c8fe85cfb1e28abcdad93f31f0 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Thu, 14 Oct 2021 16:27:43 +0800
Subject: [PATCH 06/41] add build script for windows win64

---
 Dockerfile.win64.in   |  45 ++++
 Dockerfile.win64.make |   8 +
 build-win64           |  40 +++
 cross-win64.meson     |  16 ++
 docker-build-win64.sh | 562 ++++++++++++++++++++++++++++++++++++++++++
 toolchain-win64.cmake |  13 +
 6 files changed, 684 insertions(+)
 create mode 100644 Dockerfile.win64.in
 create mode 100644 Dockerfile.win64.make
 create mode 100755 build-win64
 create mode 100644 cross-win64.meson
 create mode 100755 docker-build-win64.sh
 create mode 100644 toolchain-win64.cmake

diff --git a/Dockerfile.win64.in b/Dockerfile.win64.in
new file mode 100644
index 00000000000..de08c7a9b53
--- /dev/null
+++ b/Dockerfile.win64.in
@@ -0,0 +1,45 @@
+FROM DISTRO
+
+# Docker build arguments
+ARG SOURCE_DIR=/ffmpeg
+ARG ARTIFACT_DIR=/dist
+
+# Docker run environment
+ENV DEBIAN_FRONTEND=noninteractive \
+    SOURCE_DIR=/ffmpeg \
+    ARTIFACT_DIR=/dist \
+    FF_REV=FFMPEG_REV \
+    FF_PREFIX=/opt/ffmpeg \
+    FF_DEPS_PREFIX=/opt/ffdeps \
+    FF_TOOLCHAIN=x86_64-w64-mingw32 \
+    FF_CROSS_PREFIX=x86_64-w64-mingw32- \
+    FF_CMAKE_TOOLCHAIN=${SOURCE_DIR}/toolchain-win64.cmake \
+    FF_MESON_TOOLCHAIN=${SOURCE_DIR}/cross-win64.meson \
+    FF_TARGET_FLAGS="--arch=x86_64 --target-os=mingw32 --cross-prefix=x86_64-w64-mingw32- --pkg-config=pkg-config --pkg-config-flags=--static" \
+    PKG_CONFIG=pkg-config \
+    PKG_CONFIG_LIBDIR=/opt/ffdeps/lib/pkgconfig:/opt/ffdeps/share/pkgconfig \
+    CFLAGS="-static-libgcc -static-libstdc++ -I/opt/ffdeps/include -mtune=generic -O2 -pipe -D_FORTIFY_SOURCE=0" \
+    CXXFLAGS="-static-libgcc -static-libstdc++ -I/opt/ffdeps/include -mtune=generic -O2 -pipe -D_FORTIFY_SOURCE=0" \
+    LDFLAGS="-static-libgcc -static-libstdc++ -L/opt/ffdeps/lib -O2 -pipe" \
+    DLLTOOL="x86_64-w64-mingw32-dlltool"
+
+# Prepare Debian and mingw-w64 build environment
+RUN \
+    apt-get -y update && \
+    apt-get -y install build-essential yasm nasm xxd pkgconf git curl wget unzip subversion autoconf automake libtool libtool-bin autopoint cmake clang texinfo texi2html help2man flex bison gperf gettext itstool ragel libc6-dev libssl-dev gtk-doc-tools gobject-introspection gawk meson ninja-build p7zip-full python3-distutils python3-apt python-is-python3 zip quilt binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 gfortran-mingw-w64-x86-64 && \
+    rm /usr/lib/gcc/*-w64-mingw32/*/libstdc++*.dll* && \
+    rm /usr/lib/gcc/*-w64-mingw32/*/libgcc_s* && \
+    rm /usr/lib/gcc/*-w64-mingw32/*/*.dll.a && \
+    rm /usr/*-w64-mingw32/lib/*.dll.a
+
+# Prepare build script and patches
+RUN \
+    mkdir -p /opt/ffmpeg /opt/ffdeps ${SOURCE_DIR} && \
+    ln -sf ${SOURCE_DIR}/debian/patches ${SOURCE_DIR} && \
+    ln -sf ${SOURCE_DIR}/docker-build-win64.sh /docker-build-win64.sh
+
+VOLUME ${ARTIFACT_DIR}/
+
+COPY . ${SOURCE_DIR}/
+
+ENTRYPOINT ["/docker-build-win64.sh"]
diff --git a/Dockerfile.win64.make b/Dockerfile.win64.make
new file mode 100644
index 00000000000..0ea81a73609
--- /dev/null
+++ b/Dockerfile.win64.make
@@ -0,0 +1,8 @@
+#!/usr/bin/make
+DISTRO=ubuntu:hirsute
+FF_REV=1
+.PHONY: Dockerfile
+Dockerfile: Dockerfile.win64.in
+	sed 's/DISTRO/$(DISTRO)/; s/FFMPEG_REV/$(FF_REV)/' $< > $@ || rm -f $@
+clean:
+	rm -f Dockerfile
diff --git a/build-win64 b/build-win64
new file mode 100755
index 00000000000..6353a192e61
--- /dev/null
+++ b/build-win64
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+set -o xtrace
+set -o errexit
+
+# Check for dependencies
+for dep in docker make; do
+    command -v ${dep} &>/dev/null || { echo "The command '${dep}' is required."; exit 1; }
+done
+
+# Use the latest distro for toolchains
+distro="ubuntu:impish"
+ffrevison="1"
+image_name="jellyfin-ffmpeg-build-windows-win64"
+package_temporary_dir="$( mktemp -d )"
+current_user="$( whoami )"
+
+# Trap cleanup for latter sections
+cleanup() {
+    # Clean up the Dockerfile
+    make -f Dockerfile.win64.make clean
+    # Remove tempdir
+    rm -rf "${package_temporary_dir}"
+}
+trap cleanup EXIT INT
+
+# Generate Dockerfile
+make -f Dockerfile.win64.make DISTRO=${distro} FF_REV=${ffrevison}
+# Set up the build environment docker image
+docker build . -t "${image_name}"
+# Build the APKs and copy out to ${package_temporary_dir}
+docker run --rm -v "${package_temporary_dir}:/dist" "${image_name}"
+# If no 1st parameter was specified, move APKs to parent directory
+if [[ -z ${1} ]]; then
+    path="../bin"
+else
+    path="${1}"
+fi
+mkdir ${path} &>/dev/null || true
+mv "${package_temporary_dir}"/zip/jellyfin-ffmpeg*.{zip,sha256sum} "${path}"
diff --git a/cross-win64.meson b/cross-win64.meson
new file mode 100644
index 00000000000..f5743f0865b
--- /dev/null
+++ b/cross-win64.meson
@@ -0,0 +1,16 @@
+[binaries]
+c = 'x86_64-w64-mingw32-gcc'
+cpp = 'x86_64-w64-mingw32-g++'
+ar = 'x86_64-w64-mingw32-ar'
+strip = 'x86_64-w64-mingw32-strip'
+windres = 'x86_64-w64-mingw32-windres'
+exe_wrapper = ['wine']
+
+[properties]
+needs_exe_wrapper = true
+
+[host_machine]
+system = 'windows'
+cpu_family = 'x86_64'
+cpu = 'x86_64'
+endian = 'little'
diff --git a/docker-build-win64.sh b/docker-build-win64.sh
new file mode 100755
index 00000000000..8b871829e10
--- /dev/null
+++ b/docker-build-win64.sh
@@ -0,0 +1,562 @@
+#!/bin/bash
+
+# Builds the EXE/ZIP inside the Docker container
+
+set -o errexit
+set -o xtrace
+
+# Update mingw-w64 headers
+git clone --depth=1 https://github.com/mirror/mingw-w64.git
+pushd mingw-w64/mingw-w64-headers
+./configure \
+    --prefix=/usr/${FF_TOOLCHAIN} \
+    --host=${FF_TOOLCHAIN} \
+    --with-default-win32-winnt="0x601" \
+    --enable-idl
+make -j$(nproc)
+make install
+popd
+
+# ICONV
+mkdir iconv
+pushd iconv
+iconv_ver="1.16"
+iconv_link="https://ftp.gnu.org/pub/gnu/libiconv/libiconv-${iconv_ver}.tar.gz"
+wget ${iconv_link} -O iconv.tar.gz
+tar xaf iconv.tar.gz
+pushd libiconv-${iconv_ver}
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-{static,extra-encodings} \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+popd
+
+# LIBXML2
+git clone --depth=1 https://gitlab.gnome.org/GNOME/libxml2.git
+pushd libxml2
+./autogen.sh \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,maintainer-mode} \
+    --enable-static \
+    --without-python
+make -j$(nproc)
+make install
+popd
+
+# ZLIB
+git clone --depth=1 https://github.com/madler/zlib.git
+pushd zlib
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --static
+make -j$(nproc) CC=${FF_CROSS_PREFIX}gcc AR=${FF_CROSS_PREFIX}ar
+make install
+popd
+
+# FREETYPE
+mkdir freetype
+pushd freetype
+ft_ver="2.11.0"
+ft_link="https://sourceforge.net/projects/freetype/files/freetype2/${ft_ver}/freetype-${ft_ver}.tar.xz/download"
+wget ${ft_link} -O ft.tar.gz
+tar xaf ft.tar.gz
+pushd freetype-${ft_ver}
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static
+make -j$(nproc)
+make install
+popd
+popd
+
+# FRIBIDI
+git clone --depth=1 https://github.com/fribidi/fribidi.git
+pushd fribidi
+mkdir build
+pushd build
+meson \
+    --prefix=${FF_DEPS_PREFIX} \
+    --cross-file=${FF_MESON_TOOLCHAIN} \
+    --buildtype=release \
+    --default-library=static \
+    -D{bin,docs,tests}=false \
+    ..
+ninja -j$(nproc)
+meson install
+sed -i 's/Cflags:/Cflags: -DFRIBIDI_LIB_STATIC/' ${FF_DEPS_PREFIX}/lib/pkgconfig/fribidi.pc
+popd
+popd
+
+# GMP
+mkdir gmp
+pushd gmp
+gmp_ver="6.2.1"
+gmp_link="https://ftp.gnu.org/gnu/gmp/gmp-${gmp_ver}.tar.xz"
+wget ${gmp_link} -O gmp.tar.gz
+tar xaf gmp.tar.gz
+pushd gmp-${gmp_ver}
+autoreconf -i
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static
+make -j$(nproc)
+make install
+popd
+popd
+
+# LZMA
+mkdir xz
+pushd xz
+xz_ver="5.2.5"
+xz_link="https://sourceforge.net/projects/lzmautils/files/xz-${xz_ver}.tar.xz/download"
+wget ${xz_link} -O xz.tar.xz
+tar xaf xz.tar.xz
+pushd xz-${xz_ver}
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+popd
+
+# SDL2
+mkdir sdl2
+pushd sdl2
+sdl2_ver="2.0.16"
+sdl2_link="https://libsdl.org/release/SDL2-${sdl2_ver}.tar.gz"
+wget ${sdl2_link} -O sdl2.tar.gz
+tar xaf sdl2.tar.gz
+pushd SDL2-${sdl2_ver}
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static
+make -j$(nproc)
+make install
+popd
+popd
+
+# FONTCONFIG
+mkdir fontconfig
+pushd fontconfig
+fc_ver="2.13.94"
+fc_link="https://www.freedesktop.org/software/fontconfig/release/fontconfig-${fc_ver}.tar.xz"
+wget ${fc_link} -O fc.tar.gz
+tar xaf fc.tar.gz
+pushd fontconfig-${fc_ver}
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,docs} \
+    --enable-{static,libxml2,iconv}
+make -j$(nproc)
+make install
+popd
+popd
+
+# HARFBUZZ
+git clone --depth=1 https://github.com/harfbuzz/harfbuzz.git
+pushd harfbuzz
+./autogen.sh \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# LIBUDFREAD
+git clone --depth=1 https://code.videolan.org/videolan/libudfread.git
+pushd libudfread
+./bootstrap
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# LIBASS
+git clone --depth=1 https://github.com/libass/libass.git
+pushd libass
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# LIBBLURAY
+git clone --depth=1 https://code.videolan.org/videolan/libbluray.git
+pushd libbluray
+./bootstrap
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,examples,bdjava-jar} \
+    --disable-doxygen-{doc,dot,html,ps,pdf} \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# LAME
+mkdir lame
+pushd lame
+lame_ver="3.100"
+lame_link="https://sourceforge.net/projects/lame/files/lame/${lame_ver}/lame-${lame_ver}.tar.gz/download"
+wget ${lame_link} -O lame.tar.gz
+tar xaf lame.tar.gz
+pushd lame-${lame_ver}
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,gtktest,cpml,frontend} \
+    --enable-{static,nasm}
+make -j$(nproc)
+make install
+popd
+popd
+
+# OGG
+git clone --depth=1 https://github.com/xiph/ogg.git
+pushd ogg
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# OPUS
+git clone --depth=1 https://github.com/xiph/opus.git
+pushd opus
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,extra-programs} \
+    --enable-static
+make -j$(nproc)
+make install
+popd
+
+# THEORA
+git clone --depth=1 https://github.com/xiph/theora.git
+pushd theora
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,examples,extra-programs,oggtest,vorbistest,spec,doc} \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# VORBIS
+git clone --depth=1 https://github.com/xiph/vorbis.git
+pushd vorbis
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,oggtest} \
+    --enable-static
+make -j$(nproc)
+make install
+popd
+
+# LIBWEBP
+git clone --depth=1 https://chromium.googlesource.com/webm/libwebp
+pushd libwebp
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-{shared,libwebpextras,libwebpdemux,sdl,gl,png,jpeg,tiff,gif} \
+    --enable-{static,libwebpmux} \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# LIBVPX
+git clone --depth=1 https://chromium.googlesource.com/webm/libvpx
+pushd libvpx
+export CROSS=${FF_CROSS_PREFIX}
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --target=x86_64-win64-gcc \
+    --disable-{shared,unit-tests,examples,tools,docs,install-bins} \
+    --enable-{static,pic,vp9-postproc,vp9-highbitdepth}
+make -j$(nproc)
+make install
+popd
+
+# ZIMG
+git clone --depth=1 https://github.com/sekrit-twc/zimg.git
+pushd zimg
+./autogen.sh
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-pic \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# X264
+git clone --depth=1 https://code.videolan.org/videolan/x264.git
+pushd x264
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --cross-prefix=${FF_CROSS_PREFIX} \
+    --disable-cli \
+    --enable-{static,lto,strip,pic}
+make -j$(nproc)
+make install
+popd
+
+# X265
+git clone -b 3.5 --depth=1 https://bitbucket.org/multicoreware/x265_git.git
+pushd x265_git
+x265_conf="
+    -DCMAKE_TOOLCHAIN_FILE=${FF_CMAKE_TOOLCHAIN}
+    -DCMAKE_INSTALL_PREFIX=${FF_DEPS_PREFIX}
+    -DCMAKE_ASM_NASM_FLAGS=-w-macro-params-legacy
+    -DENABLE_ASSEMBLY=ON
+    -DENABLE_SHARED=OFF
+    -DENABLE_TESTS=OFF
+    -DENABLE_CLI=OFF
+    -DENABLE_PIC=ON
+"
+mkdir 8b 10b 12b
+cmake \
+    ${x265_conf} \
+    -DHIGH_BIT_DEPTH=ON \
+    -DEXPORT_C_API=OFF \
+    -DENABLE_HDR10_PLUS=ON \
+    -DMAIN12=ON \
+    -S source \
+    -B 12b &
+cmake \
+    ${x265_conf} \
+    -DHIGH_BIT_DEPTH=ON \
+    -DEXPORT_C_API=OFF \
+    -DENABLE_HDR10_PLUS=ON \
+    -S source \
+    -B 10b &
+cmake \
+    ${x265_conf} \
+    -DEXTRA_LIB="x265_main10.a;x265_main12.a" \
+    -DEXTRA_LINK_FLAGS=-L. \
+    -DLINKED_{10,12}BIT=ON \
+    -S source \
+    -B 8b &
+wait
+cat > Makefile << "EOF"
+all: 12b/libx265.a 10b/libx265.a 8b/libx265.a
+%/libx265.a:
+	$(MAKE) -C $(subst /libx265.a,,$@)
+.PHONY: all
+EOF
+make -j$(nproc)
+pushd 8b
+mv ../12b/libx265.a ../8b/libx265_main12.a
+mv ../10b/libx265.a ../8b/libx265_main10.a
+mv libx265.a libx265_main.a
+${FF_CROSS_PREFIX}ar -M << "EOF"
+CREATE libx265.a
+ADDLIB libx265_main.a
+ADDLIB libx265_main10.a
+ADDLIB libx265_main12.a
+SAVE
+END
+EOF
+make install
+popd
+popd
+
+# DAV1D
+git clone -b 0.9.2 --depth=1 https://code.videolan.org/videolan/dav1d.git
+pushd dav1d
+mkdir build
+pushd build
+meson \
+    --prefix=${FF_DEPS_PREFIX} \
+    --cross-file=${FF_MESON_TOOLCHAIN} \
+    --buildtype=release \
+    --default-library=static \
+    -Denable_{asm,avx512}=true \
+    -Denable_{tests,examples}=false \
+    ..
+ninja -j$(nproc)
+meson install
+popd
+popd
+
+# OpenCL headers
+svn checkout https://github.com/KhronosGroup/OpenCL-Headers/trunk/CL
+pushd CL
+mkdir -p ${FF_DEPS_PREFIX}/include/CL
+mv * ${FF_DEPS_PREFIX}/include/CL
+popd
+
+# OpenCL ICD loader
+git clone -b v2021.06.30 --depth=1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
+pushd OpenCL-ICD-Loader
+sed -i 's|VERSION "1.2" SOVERSION "1"|PREFIX ""|g' CMakeLists.txt
+mkdir build
+pushd build
+cmake \
+    -DCMAKE_TOOLCHAIN_FILE=${FF_CMAKE_TOOLCHAIN} \
+    -DCMAKE_INSTALL_PREFIX=${FF_DEPS_PREFIX} \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_SHARED_LIBS=ON \
+    -DOPENCL_ICD_LOADER_HEADERS_DIR=${FF_DEPS_PREFIX}/include \
+    -DOPENCL_ICD_LOADER_{PIC,DISABLE_OPENCLON12}=ON \
+    -DOPENCL_ICD_LOADER_{BUILD_TESTING,REQUIRE_WDK}=OFF \
+    ..
+make -j$(nproc)
+make install
+mv ${FF_DEPS_PREFIX}/lib/libOpenCL.dll.a ${FF_DEPS_PREFIX}/lib/libOpenCL.a
+popd
+mkdir -p ${FF_DEPS_PREFIX}/lib/pkgconfig
+cat > ${FF_DEPS_PREFIX}/lib/pkgconfig/OpenCL.pc << EOF
+prefix=${FF_DEPS_PREFIX}
+exec_prefix=\${prefix}
+libdir=\${prefix}/lib
+includedir=\${prefix}/include
+Name: OpenCL
+Description: OpenCL ICD Loader
+Version: 3.0
+Libs: -L\${libdir} -lOpenCL
+Cflags: -I\${includedir}
+EOF
+popd
+
+# FFNVCODEC
+git clone -b n11.0.10.1 --depth=1 https://github.com/FFmpeg/nv-codec-headers.git
+pushd nv-codec-headers
+make PREFIX=${FF_DEPS_PREFIX} install
+popd
+
+# AMF
+svn checkout https://github.com/GPUOpen-LibrariesAndSDKs/AMF/trunk/amf/public/include
+pushd include
+mkdir -p ${FF_DEPS_PREFIX}/include/AMF
+mv * ${FF_DEPS_PREFIX}/include/AMF
+popd
+
+# LIBMFX
+git clone -b 1.35.1 --depth=1 https://github.com/lu-zero/mfx_dispatch.git
+pushd mfx_dispatch
+autoreconf -i
+./configure \
+    --prefix=${FF_DEPS_PREFIX} \
+    --host=${FF_TOOLCHAIN} \
+    --disable-shared \
+    --enable-static \
+    --with-pic
+make -j$(nproc)
+make install
+popd
+
+# Jellyfin-FFmpeg
+pushd ${SOURCE_DIR}
+ffversion="$(cat RELEASE)-${FF_REV}"
+if [[ -f "patches/series" ]]; then
+    quilt push -a
+fi
+./configure \
+    --prefix=${FF_PREFIX} \
+    ${FF_TARGET_FLAGS} \
+    --extra-version=Jellyfin \
+    --disable-ffplay \
+    --disable-debug \
+    --disable-doc \
+    --enable-shared \
+    --enable-gpl \
+    --enable-version3 \
+    --enable-schannel \
+    --enable-iconv \
+    --enable-libxml2 \
+    --enable-zlib \
+    --enable-lzma \
+    --enable-sdl2 \
+    --enable-gmp \
+    --enable-libfreetype \
+    --enable-libfribidi \
+    --enable-libfontconfig \
+    --enable-libass \
+    --enable-libbluray \
+    --enable-libmp3lame \
+    --enable-libopus \
+    --enable-libtheora \
+    --enable-libvorbis \
+    --enable-libwebp \
+    --enable-libvpx \
+    --enable-libzimg \
+    --enable-libx264 \
+    --enable-libx265 \
+    --enable-libdav1d \
+    --enable-opencl \
+    --enable-dxva2 \
+    --enable-d3d11va \
+    --enable-amf \
+    --enable-libmfx \
+    --enable-ffnvcodec \
+    --enable-cuda \
+    --enable-cuda-llvm \
+    --enable-cuvid \
+    --enable-nvdec \
+    --enable-nvenc
+make -j$(nproc)
+make install
+popd
+
+# Zip and copy artifacts
+mkdir -p ${ARTIFACT_DIR}/zip
+pushd ${FF_PREFIX}/bin
+ffpackage="jellyfin-ffmpeg_${ffversion}-windows_win64"
+zip -r ${ARTIFACT_DIR}/zip/${ffpackage}.zip ./*.{exe,dll}
+pushd ${ARTIFACT_DIR}/zip
+sha256sum ./${ffpackage}.zip > ./${ffpackage}.zip.sha256sum
+chown -Rc $(stat -c %u:%g ${ARTIFACT_DIR}) ${ARTIFACT_DIR}
+popd
+popd
diff --git a/toolchain-win64.cmake b/toolchain-win64.cmake
new file mode 100644
index 00000000000..11d1344d2e1
--- /dev/null
+++ b/toolchain-win64.cmake
@@ -0,0 +1,13 @@
+set(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_PROCESSOR x86_64)
+
+set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
+set(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
+set(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
+set(CMAKE_RANLIB x86_64-w64-mingw32-ranlib)
+
+set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32 /opt/ffdeps)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

From fe9619afc0cf043448f20174910423bb887f3c31 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Thu, 14 Oct 2021 16:28:00 +0800
Subject: [PATCH 07/41] add github workflow for windows win64

---
 .github/workflows/main.yml | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 99f32e8be19..a44242cfa01 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,4 +1,4 @@
-name: Build Linux
+name: Build jellyfin-ffmpeg
 
 on:
   push:
@@ -12,7 +12,7 @@ on:
   workflow_dispatch:
 
 jobs:
-  build:
+  build_linux:
     name: Build ${{ matrix.release }} ${{ matrix.arch }}
     runs-on: ubuntu-latest
 
@@ -46,3 +46,24 @@ jobs:
         with:
           name: ${{ matrix.release }} ${{ matrix.arch }} package
           path: dist
+  build_win64:
+    name: Build windows win64
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Install make
+        run: sudo apt-get install make
+
+      - name: Build
+        run: ./build-win64 dist
+
+      - name: Upload Packages
+        uses: actions/upload-artifact@v2.2.3
+        with:
+          name: windows win64 package
+          path: dist

From 8467063cbb74ff4fcdc3b0e4fba8d4f261886014 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 22:04:02 +0800
Subject: [PATCH 08/41] add support for Ubuntu Impish 21.10

---
 .github/workflows/main.yml | 4 +++-
 build                      | 7 ++++++-
 build.yaml                 | 3 +++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a44242cfa01..6f4573462e6 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -23,9 +23,11 @@ jobs:
           - bullseye
           - buster
           - stretch
-          - groovy
           - focal
           - bionic
+          - impish
+          - hirsute
+          - groovy
 
         arch:
           - amd64
diff --git a/build b/build
index dc6c07981c2..d2c7de092ba 100755
--- a/build
+++ b/build
@@ -15,6 +15,7 @@ usage() {
     echo -e " * focal"
     echo -e " * groovy"
     echo -e " * hirsute"
+    echo -e " * impish"
 }
 
 if [[ -z ${1} ]]; then
@@ -34,7 +35,7 @@ case ${cli_release} in
     ;;
     'bullseye')
         release="debian:bullseye"
-        gcc_version="9"
+        gcc_version="10"
     ;;
     'xenial')
         release="ubuntu:xenial"
@@ -68,6 +69,10 @@ case ${cli_release} in
         release="ubuntu:hirsute"
         gcc_version="10"
     ;;
+    'impish')
+        release="ubuntu:impish"
+        gcc_version="11"
+    ;;
     *)
         echo "Invalid release."
         usage
diff --git a/build.yaml b/build.yaml
index ab308e87317..5a1c65c48ce 100644
--- a/build.yaml
+++ b/build.yaml
@@ -31,3 +31,6 @@ packages:
   - hirsute-amd64
   - hirsute-armhf
   - hirsute-arm64
+  - impish-amd64
+  - impish-armhf
+  - impish-arm64

From bcea7a6e5953c305f329edca36b025757da66bb1 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 22:06:41 +0800
Subject: [PATCH 09/41] drop support for EOL Ubuntu versions

---
 .github/workflows/main.yml | 1 -
 build                      | 4 ----
 build.yaml                 | 9 ---------
 3 files changed, 14 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 6f4573462e6..7c8b94fd437 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -27,7 +27,6 @@ jobs:
           - bionic
           - impish
           - hirsute
-          - groovy
 
         arch:
           - amd64
diff --git a/build b/build
index d2c7de092ba..b5357dcd89d 100755
--- a/build
+++ b/build
@@ -9,11 +9,7 @@ usage() {
     echo -e " * bullseye         * arm64"
     echo -e " * xenial"
     echo -e " * bionic"
-    echo -e " * cosmic"
-    echo -e " * disco"
-    echo -e " * eoan"
     echo -e " * focal"
-    echo -e " * groovy"
     echo -e " * hirsute"
     echo -e " * impish"
 }
diff --git a/build.yaml b/build.yaml
index 5a1c65c48ce..7656ea4f063 100644
--- a/build.yaml
+++ b/build.yaml
@@ -16,18 +16,9 @@ packages:
   - bionic-amd64
   - bionic-armhf
   - bionic-arm64
-  - disco-amd64
-  - disco-armhf
-  - disco-arm64
-  - eoan-amd64
-  - eoan-armhf
-  - eoan-arm64
   - focal-amd64
   - focal-armhf
   - focal-arm64
-  - groovy-amd64
-  - groovy-armhf
-  - groovy-arm64
   - hirsute-amd64
   - hirsute-armhf
   - hirsute-arm64

From 2085628cd3dd6bf583b5eb5cc8721d5741a51cfc Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:36:46 +0800
Subject: [PATCH 10/41] add fixes for segement muxer

---
 .../0001-add-fixes-for-segement-muxer.patch   | 37 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 38 insertions(+)
 create mode 100644 debian/patches/0001-add-fixes-for-segement-muxer.patch
 create mode 100644 debian/patches/series

diff --git a/debian/patches/0001-add-fixes-for-segement-muxer.patch b/debian/patches/0001-add-fixes-for-segement-muxer.patch
new file mode 100644
index 00000000000..d3b57793d91
--- /dev/null
+++ b/debian/patches/0001-add-fixes-for-segement-muxer.patch
@@ -0,0 +1,37 @@
+Index: jellyfin-ffmpeg/libavformat/segment.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavformat/segment.c
++++ jellyfin-ffmpeg/libavformat/segment.c
+@@ -87,6 +87,7 @@ typedef struct SegmentContext {
+     int64_t last_val;      ///< remember last time for wrap around detection
+     int cut_pending;
+     int header_written;    ///< whether we've already called avformat_write_header
++    int64_t start_pts;     ///< pts of the very first packet processed, used to compute correct segment length
+ 
+     char *entry_prefix;    ///< prefix to add to list entry filenames
+     int list_type;         ///< set the list type
+@@ -712,6 +713,7 @@ static int seg_init(AVFormatContext *s)
+         if ((ret = parse_frames(s, &seg->frames, &seg->nb_frames, seg->frames_str)) < 0)
+             return ret;
+     } else {
++        seg->start_pts = -1;
+         if (seg->use_clocktime) {
+             if (seg->time <= 0) {
+                 av_log(s, AV_LOG_ERROR, "Invalid negative segment_time with segment_atclocktime option set\n");
+@@ -889,7 +891,15 @@ calc_times:
+                 seg->cut_pending = 1;
+             seg->last_val = wrapped_val;
+         } else {
+-            end_pts = seg->time * (seg->segment_count + 1);
++            if (seg->start_pts != -1) {
++                end_pts = seg->start_pts + seg->time * (seg->segment_count + 1);
++            } else if (pkt->stream_index == seg->reference_stream_index && pkt->pts != AV_NOPTS_VALUE) {
++                // this is the first packet of the reference stream we see, initialize start point
++                seg->start_pts = av_rescale_q(pkt->pts, st->time_base, AV_TIME_BASE_Q);
++                seg->cur_entry.start_time = (double)pkt->pts * av_q2d(st->time_base);
++                seg->cur_entry.start_pts = seg->start_pts;
++                end_pts = seg->start_pts + seg->time * (seg->segment_count + 1);
++            }
+         }
+     }
+ 
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 00000000000..c2e011753c2
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1 @@
+0001-add-fixes-for-segement-muxer.patch

From b482f5a8b70e4a9f2c6746934222f52860ae2431 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 22:21:10 +0800
Subject: [PATCH 11/41] add cuda pixfmt converter impl

---
 .../0002-add-cuda-pixfmt-converter-impl.patch | 1539 +++++++++++++++++
 debian/patches/series                         |    1 +
 2 files changed, 1540 insertions(+)
 create mode 100644 debian/patches/0002-add-cuda-pixfmt-converter-impl.patch

diff --git a/debian/patches/0002-add-cuda-pixfmt-converter-impl.patch b/debian/patches/0002-add-cuda-pixfmt-converter-impl.patch
new file mode 100644
index 00000000000..b29deea9767
--- /dev/null
+++ b/debian/patches/0002-add-cuda-pixfmt-converter-impl.patch
@@ -0,0 +1,1539 @@
+Index: jellyfin-ffmpeg/compat/cuda/cuda_runtime.h
+===================================================================
+--- jellyfin-ffmpeg.orig/compat/cuda/cuda_runtime.h
++++ jellyfin-ffmpeg/compat/cuda/cuda_runtime.h
+@@ -24,6 +24,7 @@
+ #define COMPAT_CUDA_CUDA_RUNTIME_H
+ 
+ // Common macros
++#define __constant__ __attribute__((constant))
+ #define __global__ __attribute__((global))
+ #define __device__ __attribute__((device))
+ #define __device_builtin__ __attribute__((device_builtin))
+@@ -33,65 +34,69 @@
+ #define max(a, b) ((a) > (b) ? (a) : (b))
+ #define min(a, b) ((a) < (b) ? (a) : (b))
+ #define abs(x) ((x) < 0 ? -(x) : (x))
++#define clamp(a, b, c) min(max((a), (b)), (c))
+ 
+ #define atomicAdd(a, b) (__atomic_fetch_add(a, b, __ATOMIC_SEQ_CST))
+ 
+ // Basic typedefs
+ typedef __device_builtin__ unsigned long long cudaTextureObject_t;
+ 
+-typedef struct __device_builtin__ __align__(2) uchar2
+-{
+-    unsigned char x, y;
+-} uchar2;
+-
+-typedef struct __device_builtin__ __align__(4) ushort2
+-{
+-    unsigned short x, y;
+-} ushort2;
+-
+-typedef struct __device_builtin__ __align__(8) float2
+-{
+-    float x, y;
+-} float2;
+-
+-typedef struct __device_builtin__ __align__(8) int2
+-{
+-    int x, y;
+-} int2;
+-
+-typedef struct __device_builtin__ uint3
+-{
+-    unsigned int x, y, z;
+-} uint3;
+-
+-typedef struct uint3 dim3;
+-
+-typedef struct __device_builtin__ __align__(4) uchar4
+-{
+-    unsigned char x, y, z, w;
+-} uchar4;
++#define MAKE_VECTORS(type, base) \
++typedef struct __device_builtin__ type##1 { \
++    base x; \
++} type##1; \
++static __inline__ __device__ type##1 make_##type##1(base x) { \
++    type##1 ret; \
++    ret.x = x; \
++    return ret; \
++} \
++typedef struct __device_builtin__ __align__(sizeof(base) * 2) type##2 { \
++    base x, y; \
++} type##2; \
++static __inline__ __device__ type##2 make_##type##2(base x, base y) { \
++    type##2 ret; \
++    ret.x = x; \
++    ret.y = y; \
++    return ret; \
++} \
++typedef struct __device_builtin__ type##3 { \
++    base x, y, z; \
++} type##3; \
++static __inline__ __device__ type##3 make_##type##3(base x, base y, base z) { \
++    type##3 ret; \
++    ret.x = x; \
++    ret.y = y; \
++    ret.z = z; \
++    return ret; \
++} \
++typedef struct __device_builtin__ __align__(sizeof(base) * 4) type##4 { \
++    base x, y, z, w; \
++} type##4; \
++static __inline__ __device__ type##4 make_##type##4(base x, base y, base z, base w) { \
++    type##4 ret; \
++    ret.x = x; \
++    ret.y = y; \
++    ret.z = z; \
++    ret.w = w; \
++    return ret; \
++}
+ 
+-typedef struct __device_builtin__ __align__(8) ushort4
+-{
+-    unsigned short x, y, z, w;
+-} ushort4;
++#define MAKE_TYPE
+ 
+-typedef struct __device_builtin__ __align__(16) int4
+-{
+-    int x, y, z, w;
+-} int4;
++MAKE_VECTORS(uchar, unsigned char)
++MAKE_VECTORS(ushort, unsigned short)
++MAKE_VECTORS(int, int)
++MAKE_VECTORS(uint, unsigned int)
++MAKE_VECTORS(float, float)
+ 
+-typedef struct __device_builtin__ __align__(16) float4
+-{
+-    float x, y, z, w;
+-} float4;
++typedef struct __device_builtin__ uint3 dim3;
+ 
+ // Accessors for special registers
+ #define GETCOMP(reg, comp) \
+     asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \
+     ret.comp = tmp;
+ 
+-#define GET(name, reg) static inline __device__ uint3 name() {\
++#define GET(name, reg) static __inline__ __device__ uint3 name() {\
+     uint3 ret; \
+     unsigned tmp; \
+     GETCOMP(reg, x) \
+@@ -109,80 +114,59 @@ GET(getThreadIdx, tid)
+ #define blockDim (getBlockDim())
+ #define threadIdx (getThreadIdx())
+ 
+-// Basic initializers (simple macros rather than inline functions)
+-#define make_int2(a, b) ((int2){.x = a, .y = b})
+-#define make_uchar2(a, b) ((uchar2){.x = a, .y = b})
+-#define make_ushort2(a, b) ((ushort2){.x = a, .y = b})
+-#define make_float2(a, b) ((float2){.x = a, .y = b})
+-#define make_int4(a, b, c, d) ((int4){.x = a, .y = b, .z = c, .w = d})
+-#define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d})
+-#define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d})
+-#define make_float4(a, b, c, d) ((float4){.x = a, .y = b, .z = c, .w = d})
+-
+ // Conversions from the tex instruction's 4-register output to various types
+-#define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
++#define TEX2D(type, ret) static __inline__ __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
+ 
+ TEX2D(unsigned char, a & 0xFF)
+ TEX2D(unsigned short, a & 0xFFFF)
+-TEX2D(float, a)
+-TEX2D(uchar2, make_uchar2(a & 0xFF, b & 0xFF))
+-TEX2D(ushort2, make_ushort2(a & 0xFFFF, b & 0xFFFF))
+-TEX2D(float2, make_float2(a, b))
+-TEX2D(uchar4, make_uchar4(a & 0xFF, b & 0xFF, c & 0xFF, d & 0xFF))
+-TEX2D(ushort4, make_ushort4(a & 0xFFFF, b & 0xFFFF, c & 0xFFFF, d & 0xFFFF))
+-TEX2D(float4, make_float4(a, b, c, d))
++TEX2D(uchar2, make_uchar2((unsigned char)a, (unsigned char)b))
++TEX2D(ushort2, make_ushort2((unsigned short)a, (unsigned short)b))
++TEX2D(uchar4, make_uchar4((unsigned char)a, (unsigned char)b, (unsigned char)c, (unsigned char)d))
++TEX2D(ushort4, make_ushort4((unsigned short)a, (unsigned short)b, (unsigned short)c, (unsigned short)d))
+ 
+ // Template calling tex instruction and converting the output to the selected type
+-template<typename T>
+-inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
++template <class T>
++static __inline__ __device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
+ {
+-  T ret;
+-  unsigned ret1, ret2, ret3, ret4;
+-  asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
+-      "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) :
+-      "l"(texObject), "f"(x), "f"(y));
+-  conv(&ret, ret1, ret2, ret3, ret4);
+-  return ret;
++    T ret;
++    unsigned ret1, ret2, ret3, ret4;
++    asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
++        "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) :
++        "l"(texObject), "f"(x), "f"(y));
++    conv(&ret, ret1, ret2, ret3, ret4);
++    return ret;
+ }
+ 
+-template<>
+-inline __device__ float4 tex2D<float4>(cudaTextureObject_t texObject, float x, float y)
++static __inline__ __device__ float __exp2f(float x)
+ {
+-    float4 ret;
+-    asm("tex.2d.v4.f32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
+-        "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) :
+-        "l"(texObject), "f"(x), "f"(y));
++    float ret;
++    asm("ex2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x));
+     return ret;
+ }
+ 
+-template<>
+-inline __device__ float tex2D<float>(cudaTextureObject_t texObject, float x, float y)
++#define __expf(x) (__exp2f((x) * 1.4427f))
++
++static __inline__ __device__ float __log2f(float x)
+ {
+-    return tex2D<float4>(texObject, x, y).x;
++    float ret;
++    asm("lg2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x));
++    return ret;
+ }
+ 
+-template<>
+-inline __device__ float2 tex2D<float2>(cudaTextureObject_t texObject, float x, float y)
++#define __logf(x) (__log2f((x)) * 0.693147f)
++#define __log10f(x) (__log2f((x)) * 0.30103f)
++
++static __inline__ __device__ float __powf(float x, float y)
+ {
+-    float4 ret = tex2D<float4>(texObject, x, y);
+-    return make_float2(ret.x, ret.y);
++    return __exp2f(y * __log2f(x));
+ }
+ 
+-// Math helper functions
+-static inline __device__ float floorf(float a) { return __builtin_floorf(a); }
+-static inline __device__ float floor(float a) { return __builtin_floorf(a); }
+-static inline __device__ double floor(double a) { return __builtin_floor(a); }
+-static inline __device__ float ceilf(float a) { return __builtin_ceilf(a); }
+-static inline __device__ float ceil(float a) { return __builtin_ceilf(a); }
+-static inline __device__ double ceil(double a) { return __builtin_ceil(a); }
+-static inline __device__ float truncf(float a) { return __builtin_truncf(a); }
+-static inline __device__ float trunc(float a) { return __builtin_truncf(a); }
+-static inline __device__ double trunc(double a) { return __builtin_trunc(a); }
+-static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); }
+-static inline __device__ float fabs(float a) { return __builtin_fabsf(a); }
+-static inline __device__ double fabs(double a) { return __builtin_fabs(a); }
++static __inline__ __device__ float __sqrtf(float x)
++{
++    float ret;
++    asm("sqrtf.approx.f32 %0, %1;" : "=f"(ret) : "f"(x));
++    return ret;
++}
+ 
+-static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); }
+-static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); }
++#endif
+ 
+-#endif /* COMPAT_CUDA_CUDA_RUNTIME_H */
+Index: jellyfin-ffmpeg/libavfilter/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/Makefile
++++ jellyfin-ffmpeg/libavfilter/Makefile
+@@ -392,8 +392,7 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)
+ OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
+ OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
+ OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o
+-OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o scale_eval.o \
+-                                                vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o
++OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o
+ OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
+ OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
+ OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
+Index: jellyfin-ffmpeg/libavfilter/cuda/vector_helpers.cuh
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/cuda/vector_helpers.cuh
++++ /dev/null
+@@ -1,112 +0,0 @@
+-/*
+- * This file is part of FFmpeg.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and associated documentation files (the "Software"),
+- * to deal in the Software without restriction, including without limitation
+- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+- * and/or sell copies of the Software, and to permit persons to whom the
+- * Software is furnished to do so, subject to the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+- * DEALINGS IN THE SOFTWARE.
+- */
+-
+-#ifndef AVFILTER_CUDA_VECTORHELPERS_H
+-#define AVFILTER_CUDA_VECTORHELPERS_H
+-
+-typedef unsigned char uchar;
+-typedef unsigned short ushort;
+-
+-template<typename T> struct vector_helper { };
+-template<> struct vector_helper<uchar>   { typedef float  ftype; typedef int  itype; };
+-template<> struct vector_helper<uchar2>  { typedef float2 ftype; typedef int2 itype; };
+-template<> struct vector_helper<uchar4>  { typedef float4 ftype; typedef int4 itype; };
+-template<> struct vector_helper<ushort>  { typedef float  ftype; typedef int  itype; };
+-template<> struct vector_helper<ushort2> { typedef float2 ftype; typedef int2 itype; };
+-template<> struct vector_helper<ushort4> { typedef float4 ftype; typedef int4 itype; };
+-template<> struct vector_helper<int>     { typedef float  ftype; typedef int  itype; };
+-template<> struct vector_helper<int2>    { typedef float2 ftype; typedef int2 itype; };
+-template<> struct vector_helper<int4>    { typedef float4 ftype; typedef int4 itype; };
+-
+-#define floatT typename vector_helper<T>::ftype
+-#define intT typename vector_helper<T>::itype
+-
+-template<typename T, typename V> inline __device__ V to_floatN(const T &a) { return (V)a; }
+-template<typename T, typename V> inline __device__ T from_floatN(const V &a) { return (T)a; }
+-
+-#define OPERATORS2(T) \
+-    template<typename V> inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y); } \
+-    template<typename V> inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y); } \
+-    template<typename V> inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b); } \
+-    template<typename V> inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b); } \
+-    template<typename V> inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b); } \
+-    template<typename V> inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b); } \
+-    template<typename V> inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; return a; } \
+-    template<typename V> inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; } \
+-    template<typename V> inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; } \
+-    template<> inline __device__ float2 to_floatN<T, float2>(const T &a) { return make_float2(a.x, a.y); } \
+-    template<> inline __device__ T from_floatN<T, float2>(const float2 &a) { return make_ ## T(a.x, a.y); }
+-#define OPERATORS4(T) \
+-    template<typename V> inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } \
+-    template<typename V> inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } \
+-    template<typename V> inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b, a.z * b, a.w * b); } \
+-    template<typename V> inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b, a.z / b, a.w / b); } \
+-    template<typename V> inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b, a.z >> b, a.w >> b); } \
+-    template<typename V> inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b, a.z << b, a.w << b); } \
+-    template<typename V> inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; } \
+-    template<typename V> inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; a.z = b.z; a.w = b.w; } \
+-    template<typename V> inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; a.z = b; a.w = b; } \
+-    template<> inline __device__ float4 to_floatN<T, float4>(const T &a) { return make_float4(a.x, a.y, a.z, a.w); } \
+-    template<> inline __device__ T from_floatN<T, float4>(const float4 &a) { return make_ ## T(a.x, a.y, a.z, a.w); }
+-
+-OPERATORS2(int2)
+-OPERATORS2(uchar2)
+-OPERATORS2(ushort2)
+-OPERATORS2(float2)
+-OPERATORS4(int4)
+-OPERATORS4(uchar4)
+-OPERATORS4(ushort4)
+-OPERATORS4(float4)
+-
+-template<typename V> inline __device__ void vec_set(int &a, V b) { a = b; }
+-template<typename V> inline __device__ void vec_set(float &a, V b) { a = b; }
+-template<typename V> inline __device__ void vec_set(uchar &a, V b) { a = b; }
+-template<typename V> inline __device__ void vec_set(ushort &a, V b) { a = b; }
+-template<typename V> inline __device__ void vec_set_scalar(int &a, V b) { a = b; }
+-template<typename V> inline __device__ void vec_set_scalar(float &a, V b) { a = b; }
+-template<typename V> inline __device__ void vec_set_scalar(uchar &a, V b) { a = b; }
+-template<typename V> inline __device__ void vec_set_scalar(ushort &a, V b) { a = b; }
+-
+-template<typename T>
+-inline __device__ T lerp_scalar(T v0, T v1, float t) {
+-    return t*v1 + (1.0f - t)*v0;
+-}
+-
+-template<>
+-inline __device__ float2 lerp_scalar<float2>(float2 v0, float2 v1, float t) {
+-    return make_float2(
+-        lerp_scalar(v0.x, v1.x, t),
+-        lerp_scalar(v0.y, v1.y, t)
+-    );
+-}
+-
+-template<>
+-inline __device__ float4 lerp_scalar<float4>(float4 v0, float4 v1, float t) {
+-    return make_float4(
+-        lerp_scalar(v0.x, v1.x, t),
+-        lerp_scalar(v0.y, v1.y, t),
+-        lerp_scalar(v0.z, v1.z, t),
+-        lerp_scalar(v0.w, v1.w, t)
+-    );
+-}
+-
+-#endif
+Index: jellyfin-ffmpeg/libavfilter/dither_matrix.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/dither_matrix.h
+@@ -0,0 +1,74 @@
++/*
++ * Dither matrix data
++ *
++ * This file is placed in the public domain.
++ */
++
++#include <stdint.h>
++static const int ff_fruit_dither_size = 64;
++static const uint16_t ff_fruit_dither_matrix[] = {
++	 332, 2776, 1933,   42, 2598, 1796, 1000, 2978, 1677, 3452, 2164, 1564, 2644,  358, 2012, 3471, 1147, 3071,  596, 1943, 3146, 1191, 2469,  919, 3664, 2359,  441, 2691, 1179, 3027, 1408,  298, 3892, 1825,  182, 2178, 3028,  317, 2412,  858, 3097, 2205, 1145, 2880,  990, 2697,  728, 1969, 2312, 1393, 3232, 1204, 3752, 1529,  448, 3955, 2076,  833, 3856,    1, 3445, 2105,  955, 1761,
++	4060, 1053, 3038, 1445, 3302,  430, 3702, 2119,  625, 2523,   12, 3003,  959, 3814, 2388,  829, 4059, 2236, 1417, 3447,  198, 4020, 1891, 3368,   76, 1460, 2963, 1680, 3721,  535, 2275, 2916, 1226, 2348, 3580,  823, 1897, 4032, 1245, 2728,  194, 3285, 1941,  399, 3639, 1593, 3775, 1038, 3012,  162, 2687, 2029,  559, 2983, 1809, 2378,  325, 2861, 1331, 2533, 1171, 2701, 3328,  153,
++	2214, 3412,  501, 3934,  892, 1918, 2686, 1199, 3090, 1351, 3779, 1776, 3371, 1457,  217, 2844, 1726,  311, 2896, 1021, 2604, 1546,  569, 2758, 1818, 3967,  727, 3305,  963, 1866, 3591,  853, 3215,  496, 2651, 1453, 2808,  704, 2247, 3395, 1779,  937, 4014, 2288, 1286, 3110,  331, 3309, 1839, 3866,  932, 3566, 2499, 1005, 3346, 1192, 3712, 1743, 3399,  757, 3765,  391, 1871, 2928,
++	1411,  820, 2531, 1622, 2192, 3478,  215, 4079,  384, 3298,  742, 2332,  436, 2685, 1963, 3273,  680, 3571, 2033, 3795,  806, 3550, 2319, 1225, 3191, 1027, 2506,  237, 2196, 3126,   28, 2148, 1772, 3959, 1009, 3507,   85, 3742, 1539,  453, 3647, 2154,  573, 2786,  156, 2020, 2569, 1425,  538, 2375, 1723,  300, 1854, 4065,  110, 2741,  678, 3138,  213, 1979, 2330, 1530, 3542,  720,
++	3811, 1955, 3240,  126, 2909,  760, 2482, 1493, 2301, 1719, 2788, 1180, 3998,  923, 3649, 1094, 1862, 2579, 1272,   30, 3109, 1987,  255, 3816,  471, 1977, 3519, 1557, 3882, 1086, 2754, 3776, 1304,  241, 2262, 1863, 3163, 1111, 2982, 2026, 1056, 2948, 1439, 3323, 1737, 3595,  860, 3980, 2895, 1193, 3365, 2779,  852, 2175, 3069, 1641, 2309, 1237, 2630, 4036,  965, 3052, 1096, 2487,
++	 289, 2863, 1190, 3633, 1330, 3834, 1075, 3429,  602, 3850,  174, 3242, 1657, 3080,   98, 2357, 3899,  374, 3224, 1619, 2431, 1125, 3019, 1367, 2395, 3102,  794, 2850,  426, 2451, 1687,  548, 3070, 2596, 3335,  565, 1630, 2459,  288, 3937, 2504,   51, 3740,  725, 2475,  433, 2706, 2075,   19, 3694,  633, 1982, 3149, 1345,  729, 3843,  411, 3654, 1681,  564, 2721,  104, 3875, 2110,
++	3504, 1730,  537, 2398, 2058,  443, 2636, 1802, 2925,  953, 2568, 2005,  583, 2108, 2802, 1441,  889, 2726, 1949, 3984,  486, 3407,  624, 2698, 1648,   97, 4055, 1340, 1994, 3667,  913, 3453, 1964,  815, 1400, 4072, 2767,  873, 3457,  684, 1576, 3119, 1884, 1222, 3898, 1535, 3482,  982, 1817, 2520, 1487, 3927,  181, 3493, 2396, 1438, 2871,  985, 2070, 3498, 1370, 3279, 1655,  586,
++	1280, 2625, 3912,  939, 2999, 1649, 3162,   64, 2130, 3606, 1428, 3469, 1256, 3824,  409, 3562, 1780, 3433,  768, 1206, 2843, 2086, 3869,  926, 3663, 2099, 1047, 2623, 3217,  148, 1810, 2498,  314, 3790, 2126,  129, 2037, 3088, 1356, 2314, 3301,  946, 2716, 2163,  250, 3091,  575, 2227, 3204,  359, 2970, 1110, 2594, 1867,  532, 3338,   43, 2492, 3172,  282, 2394,  842, 2852, 2019,
++	3111,   31, 1813, 3256,  243, 3687,  851, 4002, 1301,  480, 3004,  264, 2493, 1586, 2947,  714, 2538,  165, 2193, 3597, 1514,  130, 1792, 3132,  348, 2875, 3434,  491, 1251, 2120, 3979, 1093, 2905,  979, 2561, 3625, 1230,  373, 3836, 1953,  197, 4028,  498, 3383, 1713, 2400, 1402, 4083, 1150, 3589, 2106,  592, 3650, 1252, 4012, 1975, 3047, 1574,  690, 3909, 1745, 3616,  349, 3976,
++	 894, 3438, 2251, 1160, 2582, 1467, 2342, 1747, 2837, 2242, 1076, 3946,  866, 3385, 1108, 1985, 4076, 1303, 2915,  438, 2589, 3312, 1085, 2367, 1916, 1403,  759, 2340, 3582, 2835,  637, 3264, 1997, 3506,  515, 1544, 3258, 2612, 1017, 2903, 1322, 1791, 3014, 1154, 3826,  871, 2858,  192, 2739,  719, 1701, 3177, 2222,  345, 2737,  770, 1181, 3774, 2264, 1060, 2667, 1262, 2318, 1584,
++	2654, 1420,  499, 4045,  688, 3379,  319, 3490,  735, 3699, 1642, 2050, 2683,   40, 2204, 3096,  343, 3261, 1672, 3747,  900, 1981, 4025,  644, 3505, 2575, 3903, 1724,  232, 1517, 2590, 1405,   58, 1663, 3051, 2381,  751, 1756, 3646,  647, 3464, 2363,  826, 2563,   92, 3137, 1928, 3524, 1590, 2159, 3851,   84, 1423, 3082, 1684, 2417, 3417,  369, 1808, 3022,  118, 3254,  661, 3555,
++	 229, 3756, 2917, 1627, 2773, 1235, 2949, 1002, 2470,  140, 3274,  594, 3533, 1678, 3799,  933, 1812, 2457,  673, 2306, 1474, 3055,  292, 2820, 1293,    2, 1106, 3237, 2013, 3823,  439, 3611, 2093, 4015, 1264,  258, 3914, 2294,   16, 2692, 1983,  295, 3723, 1893, 3442, 1306,  619, 2371,  392, 2989, 1071, 2484, 3512,  666, 3796,  195, 1522, 2775, 3586,  875, 4086, 1921, 2883, 1163,
++	2456, 1935, 1032, 2305,  111, 3845, 2036, 1555, 3964, 2115, 1289, 2911,  977, 2372,  465, 2616, 3618, 1211, 3933,   72, 3404,  772, 2424, 1662, 3755, 2252, 3025,  518, 2524,  855, 2953, 1102, 2811,  795, 2602, 3414, 2060,  891, 3357, 1132, 3965, 1495, 2806,  551, 1633, 2629, 3659, 1172, 3958, 1446, 3319,  798, 1787, 2768, 1271, 3184, 2210,  587, 1338, 2478, 1550,  483, 2198, 3388,
++	 832, 3219,  386, 3674, 1733, 2479,  578, 3214,  416, 3044, 1861,  291, 4033, 1449, 3477, 1940,  205, 2976,  885, 2704, 1886, 3820, 1070, 3288,  700, 1369, 1855, 4087, 1194, 3463, 1764, 2329,  297, 3296, 1742,  539, 1398, 3107, 1696, 2387,  711, 3151, 1223, 2219, 4038,  328, 2095, 2919,  845, 2595,  263, 2235, 4043,  398, 2129,  903, 3924, 1885, 3317,  249, 2732, 3685, 1383,   55,
++	3926, 1481, 2977, 1177, 3292,  893, 3600, 1418, 2670,  927, 3637, 2477, 1127, 2665,  765, 2879, 1283, 2162, 3333, 1359,  482, 2180, 2942,  219, 2548, 3594,  308, 2759, 2185,  106, 3174,  699, 3738, 1139, 2277, 3833, 2804,  223, 3707,  415, 3518, 1913,  150, 2995,  864, 3222, 1575,   35, 3428, 1694, 3581,  992, 2907, 1494, 3676, 2702,   11, 3103,  849, 3825, 1848,  947, 3125, 1901,
++	2494,  600, 2017, 2658,  293, 2207, 2851,    9, 1799, 3186,  510, 2077, 3318,  102, 3196, 1571, 3982,  357, 1781, 3717, 2774, 1176, 1834, 4001,  908, 2044, 3195,  986, 1675, 3696, 1339, 2638, 1617, 3026,   96, 1984,  940, 2446, 1563, 2876,  930, 2532, 3426, 1516, 2407, 1134, 3801, 1931, 2307,  521, 2744, 1883,  138, 3356,  597, 1758, 2343, 1389, 2831, 2145,  693, 3485, 2286,  520,
++	3031, 1601, 3710,  956, 4067, 1849, 1097, 3760, 2406, 1215, 3861, 1596,  846, 3804, 1890,  516, 2295, 3062,  747, 2337,  169, 3178,  603, 2269, 3104, 1537,  627, 3537, 2415,  786, 2988,  379, 4040,  839, 2554, 3561, 1363, 4005,  745, 2128, 3817, 1348,  617, 3910,  397, 3050,  598, 2787, 1037, 3932, 1277, 3741, 2091, 1073, 2503, 3530,  934, 4024,  352, 1610, 2969,  158, 1347, 3992,
++	1155, 3348,  142, 2259, 3087,  500, 3380, 2096,  730, 2795,  210, 3005, 2335, 1173, 2627, 3703, 1049, 2038, 3514,  983, 3868, 1653, 3552, 1249,   74, 3891, 2586, 1452,  238, 3936, 1738, 2168, 1209, 3384, 1847,  593, 3233,  355, 3036, 1753,   63, 3250, 2150, 1732, 2613, 1259, 3629, 1483, 3120,  280, 2382,  663, 2641, 3176, 1637,  265, 3013, 1239, 2420, 3416, 1088, 3769, 1938, 2555,
++	 347, 2049, 2809, 1443,  761, 2628, 1693,  277, 3975, 1496, 3523, 1961,  547, 3106,  287, 1492, 3340,   24, 1547, 2887, 2147,  370, 2535, 1880, 2962, 2121,  479, 3077, 1922, 2618,  966, 3253, 2439,  327, 2847, 1515, 2226, 2713,  962, 3632, 2465, 1039, 2854,  245, 3179, 2080,  124, 2525,  792, 3486, 1744, 3289, 1404,  476, 3782, 2186,  715, 3579, 1971,  626, 2784, 2201,  741, 3248,
++	1640, 3551, 1024, 3840, 1827, 3622, 1072, 3238, 2279,  835, 2562, 1295, 4085, 1816, 3535, 2495,  724, 2664, 4042,  570, 1281, 3422, 1013, 3787,  783, 1343, 3722, 1123, 3427,  599, 3634,   27, 1572, 3827, 1014, 3665,  139, 1868, 3197, 1390,  640, 4090, 1459, 3714,  902, 3421, 1676, 4004, 2246, 1128, 2720,   60, 4069, 2002, 1035, 2818, 1352, 2552,   88, 3847, 1801,  309, 3627, 1104,
++	2894,  686, 2509,  403, 2926,   95, 1991, 2868,  469, 2052, 3370,   66, 2399,  679, 1248, 2081, 3181, 1100, 2224, 1870, 3001, 1506, 2760,  260, 3294, 2707, 2027,  146, 2355, 1323, 2783, 1956, 2946,  676, 2000, 3053, 1325, 3893,  424, 2084, 2998, 1797,  466, 2380, 1875,  507, 2939, 1054,  396, 3700, 1502, 3007,  904, 2418, 3443,  200, 3954, 1588, 3283, 1065, 3084, 1471, 2714, 2270,
++	  34, 4029, 1869, 3446, 1207, 2442, 3919,  879, 1788, 3726, 1115, 3129, 1466, 2742, 3897,  214, 1750, 3763,  405, 3459,  123, 3916,  812, 2317, 1685,  533, 3389, 1589, 3908, 1844,  376, 3990, 1158, 3437, 2377,  458, 2645,  896, 2336, 3396,  193, 2581, 3539, 1083, 2756, 3871, 1426, 2135, 3166, 1824,  566, 2104, 3559,  413, 1814, 2922, 2122,  797, 2750, 2258,  670, 3944,  869, 1729,
++	3183, 1409,  916, 2220, 3100,  568, 1427, 3351, 2601,  336, 2803,  778, 3656,  432, 2170, 3330,  944, 2302, 2856,  970, 2416, 1766, 3249, 1218, 4066, 2438,  993, 2635,  697, 3021, 2152,  830, 2608,  234, 1421, 4051, 1705, 3577, 1507,  791, 3807,  958, 2051, 3148,    4, 2256,  712, 3529,  179, 2536, 3880,  989, 2655, 1302, 3267,  562, 1210, 3517,  303, 1950, 3393,  168, 2125, 3749,
++	 502, 2745, 3648,  256, 1620, 3684, 2240,  173, 1238, 3999, 2216, 1704, 2039, 2886, 1213, 1889, 2980,  665, 1585, 3715, 1333, 2662,  446, 3011,   41, 1906, 3772,  306, 3509, 1241, 3339, 1645, 3692, 2209, 3121,  607, 2833,   47, 3185, 2507, 1291, 2859,  400, 1580, 3675, 1328, 2676, 1658, 2857, 1183, 1965, 3190,  149, 3972, 1573, 2221, 3832, 1706, 3037, 1290, 2534, 1647, 2923, 1161,
++	2452, 1942,  809, 3321, 2139, 1022, 2842, 1887, 3074, 1528,  536, 3475,  188, 3837,  752, 3573,   79, 4018, 2141,  285, 3354,  738, 3680, 2173, 1473, 2927, 1144, 2816,  877, 2368,   91, 2727,  635, 1253, 1888, 3513,  974, 2280, 1990,  488, 3326, 1948, 4007, 2153,  660, 3314,  302, 4039,  784, 3626,  445, 2327, 1674, 2513,  827, 3099,   26, 2464,  749, 4095,  912, 3704,  556, 3495,
++	 225, 3962, 1567, 2550,  713, 4053,  371, 3436,  796, 2541, 3169, 1174, 2428, 1414, 2609, 1628, 2753, 1263, 3206, 1951, 1129, 2885, 1740,  911, 3376,  672, 3603, 1616, 2015, 3873, 1477, 3543, 2087, 3846,  171, 2549, 1612, 3730, 1157, 3923, 1650,  151, 1136, 3033, 2466,  996, 2931, 1450, 2391, 1763, 3362,  883, 3678,  333, 3496, 1422, 2730, 1084, 3225,  375, 2780, 1480, 2354, 1882,
++	2961, 1028, 2814,   80, 3154, 1720, 2463, 1444, 3733,   23, 1666, 3917,  611, 3398,  388, 3235,  696, 2344,  425, 2580, 3839,  154, 2266, 3957,  381, 2056, 2476,  196, 3180,  527, 3009, 1044,  383, 2347, 2973,  781, 3227,  342, 2766,  654, 2967, 2101, 3546,  457, 1534, 3732, 1917,  616, 3134,   52, 2174, 2981, 1169, 2813, 2116,  541, 3781, 2156, 1448, 3578, 2199,   87, 3359, 1313,
++	 618, 3424, 1397, 3818, 2098, 1095, 2943,  961, 2316, 1959, 2975, 1026, 2724, 1768, 2155, 1153, 3945, 1751, 3612,  814, 1551, 3304, 1178, 2764, 1643, 3136, 1081, 3981, 2213, 1310, 2587, 1826, 3369, 1533, 1092, 3953, 1296, 2433, 1833, 3598, 1023, 2543, 1309, 3187, 2043,  125, 2519, 3528, 1324, 3757, 1511,  492, 4057, 1910,  994, 3200, 1777,  212, 2960,  702, 1811, 3122,  943, 3925,
++	2611, 2046,  477, 2397,  650, 3541,  235, 3864,  632, 3342,  423, 2234, 3592,  109, 3758, 2933,  239, 2790, 1051, 2985, 2113,  630, 2515,  257, 3778,  843, 2708, 1549,  653, 3521,  233, 4058,  748, 3086, 2231,  484, 2062, 3075,  115, 1501, 3287,  334, 3885,  856, 2763, 3971, 1195,  414, 2822,  681, 2700, 1830, 2422,  112, 3556, 2267,  901, 3983, 2373, 1217, 3770,  454, 2481, 1845,
++	 266, 3662, 1654, 3208, 1287, 1934, 3060, 1807, 2648, 1395, 4074,  828, 1531, 2640, 1350,  872, 2454, 1512, 3499,    8, 4092, 1762, 3540, 1434, 2146, 3286,   71, 3683, 1902, 2421, 1385, 2719, 1972,   13, 3660, 1688, 3450, 1001, 4077, 2328,  907, 2913, 1690, 2353,  545, 1485, 2991, 2134, 1728, 3948,  938, 3353, 1298, 2891, 1592,  387, 3029, 1896,  524, 3378, 2092, 1560, 2972, 1089,
++	3150,  882, 2672,  183, 3693, 2572,  506, 1109, 3218,  164, 2467, 1930, 3175,  706, 3480, 2111, 3831,  584, 2211, 2679, 1292, 2819,  442, 2935,  601, 1790, 2496, 1041, 2906,  435, 3230,  880, 3780, 2512,  909, 2770,  301, 2657, 1914,  508, 3754, 2079,   39, 3698, 1879, 3455,  800, 3322,  159, 2225, 3046,  304, 3621,  615, 3870, 2567, 1077, 3651, 1484, 2749,  190, 4008,  606, 3564,
++	2203, 1410, 4046, 1988,  837, 1468, 3978, 2260, 1638, 3764,  929, 3547,  322, 1819, 2855,  394, 1416, 3006, 1702,  785, 3262,  960, 2249, 3624, 1224, 4023,  790, 3432, 1254, 3939, 1621, 2283,  531, 1519, 3394, 1258, 3819,  756, 3244, 1609, 2583, 1294, 3141, 1052, 2674,  261, 2427, 1553, 3653, 1165, 1962, 2606, 1040, 2320, 1937, 1284, 3160,   56, 2237,  920, 3194, 1372, 2401, 1952,
++	   0, 2901,  689, 2338, 3352, 2840,   73, 3405,  739, 2054, 2959, 1377, 2202, 3941, 1067, 2021, 3306,  167, 3905, 2042,  307, 3792, 1860,  144, 2615, 1978, 3064,  278, 1945, 2383,  119, 3544, 1874, 3131,  203, 2187, 1767, 2462, 1229, 3585,  364, 2798,  683, 3503, 1412, 4049,  921, 2762,  447, 3144,  733, 3797, 1489, 2986,  231, 3474,  780, 2793, 3935, 1670, 2526,  367, 3315,  841,
++	3855, 1734, 3489,  305, 1240, 1836, 2195, 1011, 3078,  390, 2556,  642, 3209,   44, 2743, 3679,  917, 2695, 1133, 3400, 2436, 1098, 3056, 1415, 3277,  487, 1568, 2643, 3670,  925, 3101, 1034, 2747, 1167, 4030, 2848,  580, 3094,   99, 2287, 1031, 3989, 1968, 2379,  514, 2053, 3092, 1234, 3894, 1623, 2472,   14, 3259,  822, 3968, 2248, 1401, 2040,  449, 3269,  766, 3706, 1603, 2944,
++	1250, 2650, 1015, 2537, 3943,  579, 3630, 2593, 1274, 3467, 1583, 3829, 1162, 2471, 1543,  553, 2365, 1613, 3072,  677, 1748, 2752,  528, 3929,  906, 2321, 3810, 1288,  628, 2829, 2094, 3852,  401, 2392,  773, 1591, 3468, 1424, 3881, 1789, 3331, 1520,  172, 3220, 1636, 3596,   89, 1924, 2284, 1003, 3435, 1739, 2124, 2778, 1806,  517, 2621, 3403, 1205, 1829, 2869, 1062, 2293,  244,
++	3608,  431, 3246, 1379, 2123, 2937, 1532,  296, 3922, 1865,  177, 2143, 2974,  427, 4011, 1912, 3576,  326, 2109, 4031,   75, 3655, 2206, 1541, 2048, 3401,   33, 2161, 3364, 1476,  254, 1691, 3337, 1375, 3605, 2055,  362, 2738,  716, 2634,  450, 2212, 3777, 1187, 2870,  863, 2485, 3363,  337, 3008,  634, 4063,  452, 1268, 3313, 1112, 3727,  253, 2228, 3798,  103, 1974, 4075, 1406,
++	2430, 2061,  776, 3720,  152, 1114, 3293, 2325,  840, 2660, 3272,  988, 1725, 3366, 1305, 3020,  968, 3228,  799, 2544, 1840, 1016, 3170,  259, 2873,  777, 1838, 3142,  455, 3985, 2010, 2993,  646, 2689,   45, 2547, 3745, 1137, 3212, 1317, 3023,  825, 2566,  410, 2127, 3931, 1538,  775, 3812, 1785, 2757, 1368, 2542, 3584,  127, 3041, 2011, 1548, 2940,  723, 2490, 3307,  530, 2789,
++	 948, 3123, 1579, 2777, 2035, 3848,  542, 1946, 3057, 1433,  558, 3731, 2600,  755, 2326,  108, 2785, 1380, 3750, 1276, 3451, 2063,  737, 3567, 1247, 4062, 2671,  884, 2411, 1126, 2588,  952, 3794, 1858, 3161,  924, 1804, 2333,  218, 3974, 1679, 3456, 1394, 3572, 1757,  211, 3205, 2659, 1242, 2346,  176, 3140,  850, 2188, 1716, 2460,  560, 4019,  997, 3522, 1735, 1105, 3048, 1661,
++	3773,   61, 3548,  609, 1273, 2553, 1682, 3609,   25, 4056, 1898, 2233,  224, 3604, 1107, 3802, 1673, 2410,  246, 2849,  459, 3066, 2578, 1625, 2425,  368, 1463, 3734, 1671, 3510,  107, 3257, 1341,  460, 2239, 3947,  550, 3441, 1993,  971, 2408,   20, 2765, 1061, 2952, 2292, 1371,  434, 3642,  972, 3460, 1659, 3736,  330, 3904,  888, 3276, 1362, 2735,  356, 2281, 3901,  227, 2171,
++	 805, 2528, 1774, 2183, 3165,  267, 2860, 1008, 2461, 1285, 2834,  935, 3081, 1509, 2694, 2073,  671, 3168, 1939, 1141, 3969, 1429,  133, 3815,  973, 3231, 2085,  201, 2918,  698, 2781, 1644, 2311, 3387, 1057, 1561, 2821, 1216, 2599, 3355,  701, 3791, 1892,  481, 4091,  651, 3310, 2191, 1700, 2899,  707, 2448, 1312, 2668, 1454, 2792, 2107,   36, 2030, 3234, 1465,  865, 2597, 3484,
++	1518, 4000,  429, 3415,  870, 3949, 1503, 3492,  721, 3372,  361, 3789, 1831,  529, 3487,  320, 4081, 1012, 3607, 2250,  629, 2169, 3347, 1976, 2997,  722, 3620, 2351, 1200, 3889, 2144,  810, 4082,  236, 2023, 3515,  117, 3709,  444, 1835, 2845, 1299, 3198, 2140, 1510, 2546,  887, 3857,   49, 2009, 3994,  404, 3035, 1048, 3410,  461, 3668, 1618, 3828,  590, 2807, 3613, 1354,  620,
++	3157, 1152, 2867, 1297, 2711, 2265,  490, 2131, 2955, 1712, 2350, 1214, 3203, 2179, 1069, 2577, 1857, 2924,   17, 2699, 1692, 2950, 1197,  485, 1582, 2740, 1407,  544, 3278, 1784,  382, 2941, 1208, 2510, 3063,  844, 2722, 2215, 1595, 4026,  226, 2370,  743, 3681,  145, 3465, 2068, 2782, 1392, 3153, 1006, 2255, 3638,  105, 1821, 2517,  813, 2920, 1189, 2409, 1967,  122, 3058, 2067,
++	2403,  252, 1980, 3686,  116, 1652, 3746, 1337,  204, 3970,  669, 2715,   82, 3895, 1635, 3409,  763, 1526, 3502,  967, 3719,  268, 3900, 2393, 3558,   62, 3991, 1973, 2540,  987, 3716, 1909, 3327,  554, 1615, 3761, 1366,  595, 3300,  876, 3116, 1138, 2893, 1342, 2678, 1754,  378, 1079, 3563,  525, 2733, 1853, 1143, 2112, 4073, 1228, 3360, 1904,  377, 3391, 1042, 4050, 1721,  478,
++	3860, 1624, 3270,  936, 2088, 2929,  610, 3145, 2444, 1486, 3534, 2007, 2996,  808, 2402,  199, 2828, 2100,  505, 3127, 1435, 2558, 2003,  613, 1852, 2633, 1117, 3164,  271, 3476, 2271,    3, 1469, 3853, 2362,  340, 2898, 1878, 2570, 1462, 2166, 3872,  335, 3367,  639, 3988, 2964, 1908, 2374, 1577, 3835,  281, 3481, 2839,  582, 3018,  187, 2276, 3928, 1597, 2652,  831, 2557, 3425,
++	1119, 2836,  561, 2514, 4088, 1064, 3458, 1957,  886, 2841,  341, 1101, 1683, 3236, 1365, 3617, 1033, 3921, 2447, 1846,  750, 3402, 1087, 3065, 1308, 3439,  705, 1752, 2872, 1374,  848, 3207, 2637, 1030, 2065, 3462,  910, 3911,   57, 3623,  534, 1894, 2559, 1611, 2297, 1413,  746, 3783,  175, 3211,  905, 2527,  754, 1464, 2369, 1665, 3751, 1396,  652, 3040,  272, 3266, 1307, 2102,
++	  15, 2274, 3526, 1432,  350, 1795, 2565,   48, 3883, 1269, 3411, 2136, 4027,  310, 2626,  645, 3113, 1311,  290, 3784, 2680,  113, 2268, 4068,  339, 1947, 2468, 3830,  526, 2078, 3950, 1711,  636, 3049,  166, 1731, 3108, 1184, 1999, 2669, 1349, 3500,  890, 3711,   78, 3290, 2502, 1201, 2853, 2184, 1357, 3303, 1926, 3884,  313, 2675,  789, 3171, 2437, 1265, 3718, 1920,  608, 2951,
++	3766, 1569,  857, 2897, 2118, 3645,  762, 3182, 1629, 2360,  703, 2900,  954, 1876, 3759, 1488, 2289, 1911, 2956,  981, 1664, 3644,  918, 1562, 2904, 3661,  147, 1475, 3311, 2649,  230, 2245, 3636, 1386, 4022, 2603,  509, 2376, 3345,  847, 3059,  208, 2866, 1186, 2646, 1703,  366, 3392, 1823,  540, 4017,    6, 2987,  991, 3375, 1159, 3601, 2066,   68, 2772,  767, 2322, 3587, 1399,
++	 519, 2181, 3879,  161, 3229, 1149, 2032, 2656,  417, 3767, 2018,  134, 3549, 2223,  552, 3284,   59, 3987,  589, 3444, 2339,  422, 2805, 2151,  563, 1196, 3199, 2345,  874, 1300, 3531, 1142, 2746,  372, 2182,  821, 3744, 1631,  269, 4070, 1587, 2405, 1782, 3887,  674, 3560, 2071,  942, 3669, 1513, 2571, 2089,  691, 2453, 1877, 2874,  468, 1717, 3525, 1430, 3960, 1626,  207, 2734,
++	1832, 3000, 1203, 2585, 1749,  463, 3986, 1355, 3349,  975, 3054, 1604, 2607, 1182, 2938, 1698, 2574, 1227, 2794, 2069, 1146, 3260, 1316, 3876, 1794, 2681, 1996,  473, 3993, 1936, 3032,  567, 2001, 3397, 1478, 3252, 1148, 2954, 1929, 2272,  623, 3167,  420, 2158, 2930, 1121, 3085, 2441,  221, 2892, 1082, 3610, 1602, 3803,  157, 1523, 4044, 2285, 1043, 2912,  456, 2516, 3454,  978,
++	4037,  315, 3361,  622, 3691, 2194, 2984,  189, 1815, 2530,  497, 3271,  740, 3859,  270, 3671,  834, 3511, 1771,  186, 3743,  718, 3015,   29, 3373,  819, 3762, 1578, 2755,   77, 2414, 1634, 3915,  881, 2826,   94, 2560,  467, 3520, 1260, 3724, 1360, 3473, 1498,  163, 4009,  555, 1851, 3800,  787, 3251,  299, 3039, 1232, 3268, 2229,  662, 3002,  242, 1899, 3295, 1198, 1989, 3159,
++	 709, 2443, 2041, 1525, 2666, 1275,  859, 2090, 3557, 1116, 4054, 1437, 2404, 1944, 2810, 1068, 2352,  385, 3156, 2450, 1472, 2639, 1710, 2366, 1140, 2884,  262, 3239,  964, 3690,  807, 3135,  251, 2384, 1699, 3865, 1318, 3098,  779, 2486,   18, 2631,  914, 2799, 1919, 2364, 1566, 3334, 1255, 2304, 1656, 2688, 2047,  574, 2653, 1091, 3408, 1332, 3844, 2445,  631, 3737,   93, 2290,
++	1646, 3672, 1046, 3508,   70, 3886, 2419, 3213,  346, 2815, 2138,    5, 3430,  614, 1479, 3374, 1651, 4064, 1353,  771, 3888,  344, 3545,  667, 4035, 1859, 2497, 1320, 2300, 1765, 2632, 1436, 3658, 1118, 3350,  731, 2315, 1843, 3956, 1010, 3316, 2082, 3918,  472, 3640,  744, 2690,   65, 2823,  428, 4078,  854, 3570, 1458, 3952,  354, 2014, 2624,  836, 1497, 3042, 1709, 2771, 1103,
++	2990,  228, 2862,  811, 3189, 1736,  512, 1482, 3809,  710, 1773, 3034, 1346, 3951, 2291,  141, 2890,  585, 2696, 2022, 2910, 1055, 2197, 3139, 1382,  489, 3635,  621, 3461,  220, 4080,  546, 1881, 2958,  412, 1998, 3588,  184, 1455, 2761, 1605,  717, 1778, 3188, 1170, 3030, 1391, 3862, 1905, 3423, 1063, 2505,  101, 1932, 2429, 1267, 3701,   37, 3488, 2083,  294, 4013,  803, 3470,
++	1387, 3907, 1793, 2324, 1244, 2800, 2006, 3068,  980, 2385, 3628, 1050, 2729,  380, 1798, 3739,  928, 2232, 3413,   81, 1669, 3329, 1524,  216, 2703, 2097, 2936, 1639, 2045, 2712, 1344, 2167, 3494, 1019, 2458, 3158, 1168, 2592, 3448,  440, 3788, 2945,  202, 2474, 1995,  321, 3501, 1018,  577, 2176, 3083, 1373, 2865, 3344,  513, 3093, 1600, 2832, 1099, 3143, 2303, 1025, 2622,  363,
++	2263,  668, 2717,  395, 4071,  659, 3673,  137, 2682, 1660,  275, 3210,  824, 2423, 3308, 1270, 3095, 1856, 1221, 3652, 2529,  576, 3961, 1895, 3735, 1124,   90, 3890,  941, 3255,  817, 2881,   32, 2731, 1715,  312, 4047,  649, 1986, 2358, 1246, 1927, 3574,  838, 4048, 2341, 1722, 2610, 3247, 1556,  276, 3677,  643, 1164, 3806, 2261,  774, 1903, 3913,  641, 1461, 3282, 1769, 3657,
++	1614, 3325, 1175, 3133, 1565, 2483, 1113, 3297, 1319, 4003, 2241, 1236, 3838, 1606,  655, 2751,  248, 3920,  464, 2149,  950, 3045, 1261, 2825,  736, 2432, 3358, 1442, 2511,  323, 3725, 1527, 3940,  862, 3602, 2257, 1581, 2797,  998, 3147,  284, 2705, 1045, 2889, 1504,  648, 3073,  185, 1243, 3902, 2310, 1746, 2117, 2710, 1521,  180, 3516, 2647,  318, 2489, 3682,  143, 2908,  732,
++	2508,   22, 3822, 1954,  222, 3483, 2137,  474, 2902,  588, 3114, 2034,  114, 2882, 3666, 1822, 1059, 2323, 3216, 1329, 3805,  155, 2298,  402, 3128, 1540, 2177,  656, 3061, 1800, 2313,  523, 1915, 3089, 1376,  694, 3281,   83, 3688, 1727, 3966, 1451, 3265,   50, 2064, 3697, 1335, 3449, 2028,  758, 2791,  949, 3996,  351, 3241, 2172, 1805,  995, 3079, 1131, 1872, 2389, 1220, 3973,
++	1336, 2838,  931, 2361, 2992,  788, 1714, 3878, 2208, 1842,  976, 3532, 1358, 2103,  475, 2230, 3491, 1558,  734, 2817, 1741, 3431, 1384, 3705,  899, 3565,  274, 3997, 1166, 3538,  922, 3390, 2614,  209, 2390, 3877, 1188, 3010, 2157,  801, 2500,  522, 2218, 3849, 2539,  470, 2723,  897, 2473, 3614,   21, 3386, 1447, 3016,  895, 3874,  504, 3619, 1542, 4089,  692, 3440, 2057,  493,
++	3245, 1759, 3615,  543, 1491, 3768, 2576, 1266,   46, 3643, 2545,  353, 2661, 4094, 1135, 2932,   38, 2564, 4010,  286, 2386,  802, 2684, 1770, 2522, 1321, 2914, 1689, 2673,  121, 2888, 1755, 1212, 3466, 1992,  462, 2605, 1864,  360, 3299, 1219, 3527, 1667,  685, 1185, 3223, 1966, 4006,  418, 1594, 3112, 2253,  591, 1900, 2449, 1364, 2864, 1970,   67, 2254, 2827,  329, 2971, 1058,
++	2296,  283, 2617, 1873, 3152, 1120,  389, 3263, 2725, 1552,  726, 3226, 1686,  604, 3280, 1500, 3695,  999, 2025, 3192, 1078, 2979,  451, 4061,   10, 3221,  753, 2331,  984, 3842, 2160,  407, 4021,  878, 2966, 1545, 3536,  945, 3813, 1599, 2748,  178, 3118, 2008, 3631, 1608,  136, 1419, 2934, 2189,  682, 1326, 3729, 2736,  131, 3420,  818, 2521, 3320,  898, 1668, 3841, 1431, 3713,
++	1536, 3406, 1007, 4034,  128, 2244, 3575, 2016,  951, 3930, 2299, 1020, 3748, 2435, 1925,  324, 2200, 3043,  571, 1775, 3785, 1456, 3275, 1151, 2165, 1803, 3793,  316, 3291, 1923, 1090, 3076, 2426, 1841,  612, 3201,  160, 2278, 2830,  549, 2132, 4093,  867, 2591,  365, 2769, 2114, 3553, 1004, 3821, 1828, 3336,  338, 1607, 4041, 1202, 3067,  419, 1327, 3583, 2190,  793, 2677,  100,
++	2480,  764, 2921, 2133, 1282, 2801, 1388,  657, 3173,  279, 1820, 2965,  132, 1231, 2812, 3938,  861, 1440, 3590, 2663,  120, 2282,  675, 2620, 3554,  969, 2488, 1598, 2824,  495, 3641, 1505,   54, 3771, 2243, 1381, 3977, 1960, 1279, 3569, 1036, 1786, 2994, 1378, 3906, 1074, 3343,  572, 2518,  206, 2877,  868, 2413, 3155,  581, 2308, 1697, 3808, 2024, 2718,  240, 3202, 1233, 3497,
++	1718, 3863,  406, 1632, 3381,  511, 3963, 2440, 1470, 2619, 3341, 1315, 2217, 3593,  557, 1760, 3324, 2501,  408, 1314, 3479, 1708, 3896, 1554,  247, 3017,  638, 3418, 1334, 2455,  816, 3377, 2142, 1029, 2878,  437, 2551,  687, 2957,    7, 2491, 3332,  273, 2334,  782, 2968, 1783, 1278, 3124, 2074, 1490, 3942, 2004, 1066, 1907, 3568,  191, 2796,  605, 1122, 3995, 1850, 2273,  695,
++	3130, 1156, 2356, 3728,  915, 3105, 2059,  170, 3753, 1080,  503, 4016,  804, 3115, 1361, 2693,   86, 1837, 3854, 2349,  769, 2846,  393, 3117, 2072, 1257, 3867, 2031,  135, 4084, 1958, 2709,  708, 3243, 1570, 3708, 1130, 3419, 1695, 3858, 1508,  658, 3786, 1707, 3472,   69, 2434, 4052,  421, 3599,  664, 2573,   53, 3382, 2642,  957, 3193, 1499, 2238, 3024, 1559,  494, 3689, 2584,
++};
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.c
++++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c
+@@ -1,5 +1,8 @@
+ /*
+ * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
++* Copyright (c) 2019 rcombs
++*
++* This file is part of FFmpeg.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+@@ -20,10 +23,10 @@
+ * DEALINGS IN THE SOFTWARE.
+ */
+ 
+-#include <float.h>
+ #include <stdio.h>
+ #include <string.h>
+ 
++#include "libavutil/avassert.h"
+ #include "libavutil/avstring.h"
+ #include "libavutil/common.h"
+ #include "libavutil/hwcontext.h"
+@@ -34,13 +37,12 @@
+ #include "libavutil/pixdesc.h"
+ 
+ #include "avfilter.h"
++#include "dither_matrix.h"
+ #include "formats.h"
+ #include "internal.h"
+ #include "scale_eval.h"
+ #include "video.h"
+ 
+-#include "vf_scale_cuda.h"
+-
+ static const enum AVPixelFormat supported_formats[] = {
+     AV_PIX_FMT_YUV420P,
+     AV_PIX_FMT_NV12,
+@@ -48,8 +50,6 @@ static const enum AVPixelFormat supporte
+     AV_PIX_FMT_P010,
+     AV_PIX_FMT_P016,
+     AV_PIX_FMT_YUV444P16,
+-    AV_PIX_FMT_0RGB32,
+-    AV_PIX_FMT_0BGR32,
+ };
+ 
+ #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
+@@ -58,17 +58,6 @@ static const enum AVPixelFormat supporte
+ 
+ #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
+ 
+-enum {
+-    INTERP_ALGO_DEFAULT,
+-
+-    INTERP_ALGO_NEAREST,
+-    INTERP_ALGO_BILINEAR,
+-    INTERP_ALGO_BICUBIC,
+-    INTERP_ALGO_LANCZOS,
+-
+-    INTERP_ALGO_COUNT
+-};
+-
+ typedef struct CUDAScaleContext {
+     const AVClass *class;
+ 
+@@ -87,6 +76,7 @@ typedef struct CUDAScaleContext {
+      * Output sw format. AV_PIX_FMT_NONE for no conversion.
+      */
+     enum AVPixelFormat format;
++    char *format_str;
+ 
+     char *w_expr;               ///< width  expression string
+     char *h_expr;               ///< height expression string
+@@ -96,30 +86,56 @@ typedef struct CUDAScaleContext {
+ 
+     CUcontext   cu_ctx;
+     CUmodule    cu_module;
+-    CUfunction  cu_func_uchar;
+-    CUfunction  cu_func_uchar2;
+-    CUfunction  cu_func_uchar4;
+-    CUfunction  cu_func_ushort;
+-    CUfunction  cu_func_ushort2;
+-    CUfunction  cu_func_ushort4;
++
++#define VARIANT(NAME) \
++    CUfunction  cu_func_ ## NAME;
++#define VARIANTSET(NAME) \
++    VARIANT(NAME) \
++    VARIANT(NAME ## _c) \
++    VARIANT(NAME ## _p2) \
++    VARIANT(NAME ## _2) \
++    VARIANT(NAME ## _2_u) \
++    VARIANT(NAME ## _2_v) \
++    VARIANT(NAME ## _4)
++
++    VARIANTSET(8_8)
++    VARIANTSET(16_16)
++    VARIANTSET(8_16)
++    VARIANTSET(16_8)
++#undef VARIANTSET
++#undef VARIANT
++
++    CUfunction  cu_func_luma;
++    CUfunction  cu_func_chroma_u;
++    CUfunction  cu_func_chroma_v;
++
+     CUstream    cu_stream;
+ 
+     CUdeviceptr srcBuffer;
+     CUdeviceptr dstBuffer;
+     int         tex_alignment;
+ 
+-    int interp_algo;
+-    int interp_use_linear;
+-    int interp_as_integer;
++    const AVPixFmtDescriptor *in_desc, *out_desc;
++    int         in_planes, out_planes;
+ 
+-    float param;
++    CUdeviceptr ditherBuffer;
++    CUtexObject ditherTex;
+ } CUDAScaleContext;
+ 
+ static av_cold int cudascale_init(AVFilterContext *ctx)
+ {
+     CUDAScaleContext *s = ctx->priv;
+ 
+-    s->format = AV_PIX_FMT_NONE;
++    if (!strcmp(s->format_str, "same")) {
++        s->format = AV_PIX_FMT_NONE;
++    } else {
++        s->format = av_get_pix_fmt(s->format_str);
++        if (s->format == AV_PIX_FMT_NONE) {
++            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
++            return AVERROR(EINVAL);
++        }
++    }
++
+     s->frame = av_frame_alloc();
+     if (!s->frame)
+         return AVERROR(ENOMEM);
+@@ -135,13 +151,22 @@ static av_cold void cudascale_uninit(AVF
+ {
+     CUDAScaleContext *s = ctx->priv;
+ 
+-    if (s->hwctx && s->cu_module) {
++    if (s->hwctx) {
+         CudaFunctions *cu = s->hwctx->internal->cuda_dl;
+-        CUcontext dummy;
++        CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
++
++        CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
++
++        if (s->ditherTex) {
++            CHECK_CU(cu->cuTexObjectDestroy(s->ditherTex));
++            s->ditherTex = 0;
++        }
++
++        if (s->ditherBuffer) {
++            CHECK_CU(cu->cuMemFree(s->ditherBuffer));
++            s->ditherBuffer = 0;
++        }
+ 
+-        CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
+-        CHECK_CU(cu->cuModuleUnload(s->cu_module));
+-        s->cu_module = NULL;
+         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+     }
+ 
+@@ -262,6 +287,63 @@ static av_cold int init_processing_chain
+     return 0;
+ }
+ 
++static av_cold int cudascale_setup_dither(AVFilterContext *ctx)
++{
++    CUDAScaleContext    *s  = ctx->priv;
++    AVFilterLink        *inlink = ctx->inputs[0];
++    AVHWFramesContext   *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
++    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
++    CudaFunctions       *cu = device_hwctx->internal->cuda_dl;
++    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
++    int ret = 0;
++
++    CUDA_MEMCPY2D cpy = {
++        .srcMemoryType = CU_MEMORYTYPE_HOST,
++        .dstMemoryType = CU_MEMORYTYPE_DEVICE,
++        .srcHost       = ff_fruit_dither_matrix,
++        .dstDevice     = 0,
++        .srcPitch      = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
++        .dstPitch      = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
++        .WidthInBytes  = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
++        .Height        = ff_fruit_dither_size,
++    };
++
++    CUDA_TEXTURE_DESC tex_desc = {
++        .filterMode = CU_TR_FILTER_MODE_POINT,
++        .flags = CU_TRSF_READ_AS_INTEGER,
++    };
++
++    CUDA_RESOURCE_DESC res_desc = {
++        .resType = CU_RESOURCE_TYPE_PITCH2D,
++        .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT16,
++        .res.pitch2D.numChannels = 1,
++        .res.pitch2D.width = ff_fruit_dither_size,
++        .res.pitch2D.height = ff_fruit_dither_size,
++        .res.pitch2D.pitchInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]),
++        .res.pitch2D.devPtr = 0,
++    };
++
++    av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size);
++
++    if ((ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx))) < 0)
++        return ret;
++
++    if ((ret = CHECK_CU(cu->cuMemAlloc(&s->ditherBuffer, sizeof(ff_fruit_dither_matrix)))) < 0)
++        goto fail;
++
++    res_desc.res.pitch2D.devPtr = cpy.dstDevice = s->ditherBuffer;
++
++    if ((ret = CHECK_CU(cu->cuMemcpy2D(&cpy))) < 0)
++        goto fail;
++
++    if ((ret = CHECK_CU(cu->cuTexObjectCreate(&s->ditherTex, &res_desc, &tex_desc, NULL))) < 0)
++        goto fail;
++
++fail:
++    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
++    return ret;
++}
++
+ static av_cold int cudascale_config_props(AVFilterLink *outlink)
+ {
+     AVFilterContext *ctx = outlink->src;
+@@ -271,46 +353,11 @@ static av_cold int cudascale_config_prop
+     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
+     CudaFunctions *cu = device_hwctx->internal->cuda_dl;
+-    char buf[64];
+     int w, h;
++    int i;
+     int ret;
+ 
+-    char *scaler_ptx;
+-    const char *function_infix = "";
+-
+     extern char vf_scale_cuda_ptx[];
+-    extern char vf_scale_cuda_bicubic_ptx[];
+-
+-    switch(s->interp_algo) {
+-    case INTERP_ALGO_NEAREST:
+-        scaler_ptx = vf_scale_cuda_ptx;
+-        function_infix = "_Nearest";
+-        s->interp_use_linear = 0;
+-        s->interp_as_integer = 1;
+-        break;
+-    case INTERP_ALGO_BILINEAR:
+-        scaler_ptx = vf_scale_cuda_ptx;
+-        function_infix = "_Bilinear";
+-        s->interp_use_linear = 1;
+-        s->interp_as_integer = 1;
+-        break;
+-    case INTERP_ALGO_DEFAULT:
+-    case INTERP_ALGO_BICUBIC:
+-        scaler_ptx = vf_scale_cuda_bicubic_ptx;
+-        function_infix = "_Bicubic";
+-        s->interp_use_linear = 0;
+-        s->interp_as_integer = 0;
+-        break;
+-    case INTERP_ALGO_LANCZOS:
+-        scaler_ptx = vf_scale_cuda_bicubic_ptx;
+-        function_infix = "_Lanczos";
+-        s->interp_use_linear = 0;
+-        s->interp_as_integer = 0;
+-        break;
+-    default:
+-        av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n");
+-        return AVERROR_BUG;
+-    }
+ 
+     s->hwctx = device_hwctx;
+     s->cu_stream = s->hwctx->stream;
+@@ -319,40 +366,30 @@ static av_cold int cudascale_config_prop
+     if (ret < 0)
+         goto fail;
+ 
+-    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, scaler_ptx));
+-    if (ret < 0)
+-        goto fail;
+-
+-    snprintf(buf, sizeof(buf), "Subsample%s_uchar", function_infix);
+-    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, buf));
+-    if (ret < 0)
+-        goto fail;
+-
+-    snprintf(buf, sizeof(buf), "Subsample%s_uchar2", function_infix);
+-    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, buf));
+-    if (ret < 0)
+-        goto fail;
+-
+-    snprintf(buf, sizeof(buf), "Subsample%s_uchar4", function_infix);
+-    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, buf));
+-    if (ret < 0)
+-        goto fail;
+-
+-    snprintf(buf, sizeof(buf), "Subsample%s_ushort", function_infix);
+-    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, buf));
+-    if (ret < 0)
+-        goto fail;
+-
+-    snprintf(buf, sizeof(buf), "Subsample%s_ushort2", function_infix);
+-    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, buf));
+-    if (ret < 0)
+-        goto fail;
+-
+-    snprintf(buf, sizeof(buf), "Subsample%s_ushort4", function_infix);
+-    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, buf));
++    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
+     if (ret < 0)
+         goto fail;
+ 
++#define VARIANT(NAME) \
++    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ ## NAME, s->cu_module, "Subsample_Bilinear_" #NAME)); \
++    if (ret < 0) \
++        goto fail;
++
++#define VARIANTSET(NAME) \
++    VARIANT(NAME) \
++    VARIANT(NAME ## _c) \
++    VARIANT(NAME ## _2) \
++    VARIANT(NAME ## _p2) \
++    VARIANT(NAME ## _2_u) \
++    VARIANT(NAME ## _2_v) \
++    VARIANT(NAME ## _4)
++
++    VARIANTSET(8_8)
++    VARIANTSET(16_16)
++    VARIANTSET(8_16)
++    VARIANTSET(16_8)
++#undef VARIANTSET
++#undef VARIANT
+ 
+     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+ 
+@@ -376,6 +413,53 @@ static av_cold int cudascale_config_prop
+     if (ret < 0)
+         return ret;
+ 
++    s->in_desc  = av_pix_fmt_desc_get(s->in_fmt);
++    s->out_desc = av_pix_fmt_desc_get(s->out_fmt);
++
++    for (i = 0; i < s->in_desc->nb_components; i++)
++        s->in_planes  = FFMAX(s->in_planes,  s->in_desc ->comp[i].plane + 1);
++
++    for (i = 0; i < s->in_desc->nb_components; i++)
++        s->out_planes = FFMAX(s->out_planes, s->out_desc->comp[i].plane + 1);
++
++#define VARIANT(INDEPTH, OUTDEPTH, SUFFIX) s->cu_func_ ## INDEPTH ## _ ## OUTDEPTH ## SUFFIX
++#define BITS(n) ((n + 7) & ~7)
++#define VARIANTSET(INDEPTH, OUTDEPTH) \
++    else if (BITS(s->in_desc->comp[0].depth)  == INDEPTH && \
++             BITS(s->out_desc->comp[0].depth) == OUTDEPTH) { \
++        s->cu_func_luma = VARIANT(INDEPTH, OUTDEPTH,); \
++        if (s->in_planes == 3 && s->out_planes == 3) { \
++            s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _c); \
++        } else if (s->in_planes == 3 && s->out_planes == 2) { \
++            s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _p2); \
++        } else if (s->in_planes == 2 && s->out_planes == 2) { \
++            s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2); \
++        } else if (s->in_planes == 2 && s->out_planes == 3) { \
++            s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2_u); \
++            s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _2_v); \
++        } else { \
++            ret = AVERROR_BUG; \
++            goto fail; \
++        } \
++    }
++
++    if (0) {}
++    VARIANTSET(8,  8)
++    VARIANTSET(16, 16)
++    VARIANTSET(8,  16)
++    VARIANTSET(16, 8)
++    else {
++        ret = AVERROR_BUG;
++        goto fail;
++    }
++#undef VARIANTSET
++#undef VARIANT
++
++    if (s->in_desc->comp[0].depth > s->out_desc->comp[0].depth) {
++        if ((ret = cudascale_setup_dither(ctx)) < 0)
++            goto fail;
++    }
++
+     av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n",
+            inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : "");
+ 
+@@ -396,21 +480,18 @@ fail:
+ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
+                               uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
+                               uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
+-                              int pixel_size, int bit_depth)
++                              int pixel_size)
+ {
+     CUDAScaleContext *s = ctx->priv;
+     CudaFunctions *cu = s->hwctx->internal->cuda_dl;
+     CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
+     CUtexObject tex = 0;
+-    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
+-                           &src_width, &src_height, &bit_depth, &s->param };
++    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &s->ditherTex };
+     int ret;
+ 
+     CUDA_TEXTURE_DESC tex_desc = {
+-        .filterMode = s->interp_use_linear ?
+-                      CU_TR_FILTER_MODE_LINEAR :
+-                      CU_TR_FILTER_MODE_POINT,
+-        .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
++        .filterMode = CU_TR_FILTER_MODE_LINEAR,
++        .flags = CU_TRSF_READ_AS_INTEGER,
+     };
+ 
+     CUDA_RESOURCE_DESC res_desc = {
+@@ -425,10 +506,6 @@ static int call_resize_kernel(AVFilterCo
+         .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
+     };
+ 
+-    // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch
+-    // Same for pixel_size, which is represented via datatypes on the cuda side of things.
+-    dst_pitch /= channels * pixel_size;
+-
+     ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
+     if (ret < 0)
+         goto exit;
+@@ -447,91 +524,37 @@ exit:
+ static int scalecuda_resize(AVFilterContext *ctx,
+                             AVFrame *out, AVFrame *in)
+ {
+-    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
+     CUDAScaleContext *s = ctx->priv;
+ 
+-    switch (in_frames_ctx->sw_format) {
+-    case AV_PIX_FMT_YUV420P:
+-        call_resize_kernel(ctx, s->cu_func_uchar, 1,
+-                           in->data[0], in->width, in->height, in->linesize[0],
+-                           out->data[0], out->width, out->height, out->linesize[0],
+-                           1, 8);
+-        call_resize_kernel(ctx, s->cu_func_uchar, 1,
+-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
+-                           1, 8);
+-        call_resize_kernel(ctx, s->cu_func_uchar, 1,
+-                           in->data[2], in->width / 2, in->height / 2, in->linesize[2],
+-                           out->data[2], out->width / 2, out->height / 2, out->linesize[2],
+-                           1, 8);
+-        break;
+-    case AV_PIX_FMT_YUV444P:
+-        call_resize_kernel(ctx, s->cu_func_uchar, 1,
+-                           in->data[0], in->width, in->height, in->linesize[0],
+-                           out->data[0], out->width, out->height, out->linesize[0],
+-                           1, 8);
+-        call_resize_kernel(ctx, s->cu_func_uchar, 1,
+-                           in->data[1], in->width, in->height, in->linesize[1],
+-                           out->data[1], out->width, out->height, out->linesize[1],
+-                           1, 8);
+-        call_resize_kernel(ctx, s->cu_func_uchar, 1,
+-                           in->data[2], in->width, in->height, in->linesize[2],
+-                           out->data[2], out->width, out->height, out->linesize[2],
+-                           1, 8);
+-        break;
+-    case AV_PIX_FMT_YUV444P16:
+-        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+-                           in->data[0], in->width, in->height, in->linesize[0],
+-                           out->data[0], out->width, out->height, out->linesize[0],
+-                           2, 16);
+-        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+-                           in->data[1], in->width, in->height, in->linesize[1],
+-                           out->data[1], out->width, out->height, out->linesize[1],
+-                           2, 16);
+-        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+-                           in->data[2], in->width, in->height, in->linesize[2],
+-                           out->data[2], out->width, out->height, out->linesize[2],
+-                           2, 16);
+-        break;
+-    case AV_PIX_FMT_NV12:
+-        call_resize_kernel(ctx, s->cu_func_uchar, 1,
+-                           in->data[0], in->width, in->height, in->linesize[0],
+-                           out->data[0], out->width, out->height, out->linesize[0],
+-                           1, 8);
+-        call_resize_kernel(ctx, s->cu_func_uchar2, 2,
+-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
+-                           1, 8);
+-        break;
+-    case AV_PIX_FMT_P010LE:
+-        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+-                           in->data[0], in->width, in->height, in->linesize[0],
+-                           out->data[0], out->width, out->height, out->linesize[0],
+-                           2, 10);
+-        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
+-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
+-                           2, 10);
+-        break;
+-    case AV_PIX_FMT_P016LE:
+-        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+-                           in->data[0], in->width, in->height, in->linesize[0],
+-                           out->data[0], out->width, out->height, out->linesize[0],
+-                           2, 16);
+-        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
+-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
+-                           2, 16);
+-        break;
+-    case AV_PIX_FMT_0RGB32:
+-    case AV_PIX_FMT_0BGR32:
+-        call_resize_kernel(ctx, s->cu_func_uchar4, 4,
+-                           in->data[0], in->width, in->height, in->linesize[0],
+-                           out->data[0], out->width, out->height, out->linesize[0],
+-                           1, 8);
+-        break;
+-    default:
+-        return AVERROR_BUG;
++#define DEPTH_BYTES(depth) (((depth) + 7) / 8)
++
++    call_resize_kernel(ctx, s->cu_func_luma, 1,
++                       in->data[0], in->width, in->height, in->linesize[0],
++                       out->data[0], out->width, out->height, out->linesize[0],
++                       DEPTH_BYTES(s->in_desc->comp[0].depth));
++
++    call_resize_kernel(ctx, s->cu_func_chroma_u, s->in_planes == 2 ? 2 : 1,
++                       in->data[1],
++                       AV_CEIL_RSHIFT(in->width,  s->in_desc->log2_chroma_w),
++                       AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h),
++                       in->linesize[1],
++                       out->data[1],
++                       AV_CEIL_RSHIFT(out->width,  s->out_desc->log2_chroma_w),
++                       AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
++                       out->linesize[1],
++                       DEPTH_BYTES(s->in_desc->comp[1].depth));
++
++    if (s->cu_func_chroma_v) {
++        call_resize_kernel(ctx, s->cu_func_chroma_v, s->in_planes == 2 ? 2 : 1,
++                           in->data[s->in_desc->comp[2].plane],
++                           AV_CEIL_RSHIFT(in->width,       s->in_desc->log2_chroma_w),
++                           AV_CEIL_RSHIFT(in->height,      s->in_desc->log2_chroma_h),
++                           in->linesize[s->in_desc->comp[2].plane],
++                           out->data[s->out_desc->comp[2].plane] + s->out_desc->comp[2].offset,
++                           AV_CEIL_RSHIFT(out->width,       s->out_desc->log2_chroma_w),
++                           AV_CEIL_RSHIFT(out->height,      s->out_desc->log2_chroma_h),
++                           out->linesize[s->out_desc->comp[2].plane],
++                           DEPTH_BYTES(s->in_desc->comp[2].depth));
+     }
+ 
+     return 0;
+@@ -621,20 +644,15 @@ static AVFrame *cudascale_get_video_buff
+ #define OFFSET(x) offsetof(CUDAScaleContext, x)
+ #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
+ static const AVOption options[] = {
+-    { "w", "Output video width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
+-    { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
+-    { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
+-        { "nearest",  "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
+-        { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
+-        { "bicubic",  "bicubic",  0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC  }, 0, 0, FLAGS, "interp_algo" },
+-        { "lanczos",  "lanczos",  0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS  }, 0, 0, FLAGS, "interp_algo" },
++    { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
++    { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
++    { "format", "Output format",       OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+     { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
+-    { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
+-    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
+-        { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
+-        { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
+-        { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
+-    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
++    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" },
++    { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
++    { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
++    { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
++    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
+     { NULL },
+ };
+ 
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.cu
++++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu
+@@ -20,35 +20,115 @@
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+-#include "cuda/vector_helpers.cuh"
++typedef unsigned char uchar;
++typedef unsigned short ushort;
+ 
+-template<typename T>
+-__device__ inline void Subsample_Nearest(cudaTextureObject_t tex,
+-                                         T *dst,
+-                                         int dst_width, int dst_height, int dst_pitch,
+-                                         int src_width, int src_height,
+-                                         int bit_depth)
++#define SHIFTDOWN(val) (dstbase)(val >> abs(2 + shift))
++#define SHIFTUP(val)   (dstbase)(val << abs(-shift - 2))
++
++template<class SRC, class DST, int shift, int dither> struct add_conv_shift1_d
+ {
+-    int xo = blockIdx.x * blockDim.x + threadIdx.x;
+-    int yo = blockIdx.y * blockDim.y + threadIdx.y;
++    typedef DST dstbase;
+ 
+-    if (yo < dst_height && xo < dst_width)
++    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
+     {
+-        float hscale = (float)src_width / (float)dst_width;
+-        float vscale = (float)src_height / (float)dst_height;
+-        float xi = (xo + 0.5f) * hscale;
+-        float yi = (yo + 0.5f) * vscale;
++        unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + ((1 + d) >> (sizeof(SRC) * 8 - dither + 3));
+ 
+-        dst[yo*dst_pitch+xo] = tex2D<T>(tex, xi, yi);
++        if (shift > -2)
++            return SHIFTDOWN(ret);
++        else
++            return SHIFTUP(ret);
+     }
+-}
++};
++
++template<class SRC, class DST, int shift, int dither> struct add_conv_shift1
++{
++    typedef DST dstbase;
+ 
+-template<typename T>
+-__device__ inline void Subsample_Bilinear(cudaTextureObject_t tex,
+-                                          T *dst,
+-                                          int dst_width, int dst_height, int dst_pitch,
+-                                          int src_width, int src_height,
+-                                          int bit_depth)
++    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
++    {
++        unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + 2;
++
++        if (shift > -2)
++            return SHIFTDOWN(ret);
++        else
++            return SHIFTUP(ret);
++    }
++};
++
++template<class SRC, class DST, int shift, int dither> struct add_conv_shift2
++{
++    typedef decltype(DST::x) dstbase;
++
++    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
++    {
++        unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2;
++        unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2;
++
++        if (shift > -2)
++            return { SHIFTDOWN(retx), SHIFTDOWN(rety) };
++        else
++            return { SHIFTUP(retx),   SHIFTUP(rety)   };
++    }
++};
++
++template<class SRC, class DST, int shift, int dither> struct add_conv_shift2_x
++{
++    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
++    {
++        return add_conv_shift1<unsigned, DST, shift, dither>()(i1.x, i2.x, i3.x, i4.x, d);
++    }
++};
++
++template<class SRC, class DST, int shift, int dither> struct add_conv_shift2_y
++{
++    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
++    {
++        return add_conv_shift1<unsigned, DST, shift, dither>()(i1.y, i2.y, i3.y, i4.y, d);
++    }
++};
++
++template<class SRC, class DST, int shift, int dither> struct add_conv_shift3
++{
++    typedef decltype(DST::x) dstbase;
++
++    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
++    {
++        unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2;
++        unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2;
++        unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2;
++
++        if (shift > -2)
++            return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz) };
++        else
++            return { SHIFTUP(retx),   SHIFTUP(rety),   SHIFTUP(retz)   };
++    }
++};
++
++template<class SRC, class DST, int shift, int dither> struct add_conv_shift4
++{
++    typedef decltype(DST::x) dstbase;
++
++    __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d)
++    {
++        unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2;
++        unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2;
++        unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2;
++        unsigned retw = (unsigned)i1.w + (unsigned)i2.w + (unsigned)i3.w + (unsigned)i4.w + 2;
++
++        if (shift > -2)
++            return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz), SHIFTDOWN(retw) };
++        else
++            return { SHIFTUP(retx),   SHIFTUP(rety),   SHIFTUP(retz),   SHIFTUP(retw)   };
++    }
++};
++
++template<class SRC, class DST, template<class, class, int, int> class conv, int pitch, int shift, int dither>
++__inline__ __device__ void Subsample_Bilinear(cudaTextureObject_t tex,
++                                   DST *dst,
++                                   int dst_width, int dst_height, int dst_pitch,
++                                   int src_width, int src_height,
++                                   cudaTextureObject_t ditherTex)
+ {
+     int xo = blockIdx.x * blockDim.x + threadIdx.x;
+     int yo = blockIdx.y * blockDim.y + threadIdx.y;
+@@ -66,58 +146,48 @@ __device__ inline void Subsample_Bilinea
+         float dx = wh / (0.5f + wh);
+         float dy = wv / (0.5f + wv);
+ 
+-        intT r = { 0 };
+-        vec_set_scalar(r, 2);
+-        r += tex2D<T>(tex, xi - dx, yi - dy);
+-        r += tex2D<T>(tex, xi + dx, yi - dy);
+-        r += tex2D<T>(tex, xi - dx, yi + dy);
+-        r += tex2D<T>(tex, xi + dx, yi + dy);
+-        vec_set(dst[yo*dst_pitch+xo], r >> 2);
++        SRC i0 = tex2D<SRC>(tex, xi-dx, yi-dy);
++        SRC i1 = tex2D<SRC>(tex, xi+dx, yi-dy);
++        SRC i2 = tex2D<SRC>(tex, xi-dx, yi+dy);
++        SRC i3 = tex2D<SRC>(tex, xi+dx, yi+dy);
++
++        ushort ditherVal = dither ? tex2D<ushort>(ditherTex, xo, yo) : 0;
++
++        dst[yo*(dst_pitch / sizeof(DST))+xo*pitch] = conv<SRC, DST, shift, dither>()(i0, i1, i2, i3, ditherVal);
+     }
+ }
+ 
+ extern "C" {
+ 
+-#define NEAREST_KERNEL(T) \
+-    __global__ void Subsample_Nearest_ ## T(cudaTextureObject_t src_tex,                  \
+-                                            T *dst,                                       \
+-                                            int dst_width, int dst_height, int dst_pitch, \
+-                                            int src_width, int src_height,                \
+-                                            int bit_depth)                                \
+-    {                                                                                     \
+-        Subsample_Nearest<T>(src_tex, dst,                                                \
+-                              dst_width, dst_height, dst_pitch,                           \
+-                              src_width, src_height,                                      \
+-                              bit_depth);                                                 \
+-    }
+-
+-NEAREST_KERNEL(uchar)
+-NEAREST_KERNEL(uchar2)
+-NEAREST_KERNEL(uchar4)
+-
+-NEAREST_KERNEL(ushort)
+-NEAREST_KERNEL(ushort2)
+-NEAREST_KERNEL(ushort4)
+-
+-#define BILINEAR_KERNEL(T) \
+-    __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex,                  \
+-                                             T *dst,                                       \
+-                                             int dst_width, int dst_height, int dst_pitch, \
+-                                             int src_width, int src_height,                \
+-                                             int bit_depth)                                \
+-    {                                                                                      \
+-        Subsample_Bilinear<T>(src_tex, dst,                                                \
+-                              dst_width, dst_height, dst_pitch,                            \
+-                              src_width, src_height,                                       \
+-                              bit_depth);                                                  \
+-    }
+-
+-BILINEAR_KERNEL(uchar)
+-BILINEAR_KERNEL(uchar2)
+-BILINEAR_KERNEL(uchar4)
+-
+-BILINEAR_KERNEL(ushort)
+-BILINEAR_KERNEL(ushort2)
+-BILINEAR_KERNEL(ushort4)
++#define VARIANT(SRC, DST, CONV, SHIFT, PITCH, DITHER, NAME) \
++__global__ void Subsample_Bilinear_ ## NAME(cudaTextureObject_t tex, \
++                                    DST *dst, \
++                                    int dst_width, int dst_height, int dst_pitch, \
++                                    int src_width, int src_height, \
++                                    cudaTextureObject_t ditherTex) \
++{ \
++    Subsample_Bilinear<SRC, DST, CONV, PITCH, SHIFT, DITHER>(tex, dst, dst_width, dst_height, dst_pitch, \
++                                                             src_width, src_height, ditherTex); \
++}
++
++#define VARIANTSET2(SRC, DST, SHIFT, NAME) \
++    VARIANT(SRC,      DST,      add_conv_shift1_d, SHIFT, 1, (sizeof(DST) < sizeof(SRC)) ? sizeof(DST) : 0, NAME) \
++    VARIANT(SRC,      DST,      add_conv_shift1,   SHIFT, 1, 0, NAME ## _c) \
++    VARIANT(SRC,      DST,      add_conv_shift1,   SHIFT, 2, 0, NAME ## _p2) \
++    VARIANT(SRC ## 2, DST ## 2, add_conv_shift2,   SHIFT, 1, 0, NAME ## _2) \
++    VARIANT(SRC ## 2, DST,      add_conv_shift2_x, SHIFT, 1, 0, NAME ## _2_u) \
++    VARIANT(SRC ## 2, DST,      add_conv_shift2_y, SHIFT, 1, 0, NAME ## _2_v) \
++    VARIANT(SRC ## 4, DST ## 4, add_conv_shift4,   SHIFT, 1, 0, NAME ## _4)
++
++#define VARIANTSET(SRC, DST, SRCSIZE, DSTSIZE) \
++    VARIANTSET2(SRC, DST, (SRCSIZE - DSTSIZE), SRCSIZE ## _ ## DSTSIZE)
++
++// Straight no-conversion
++VARIANTSET(uchar,  uchar,  8,  8)
++VARIANTSET(ushort, ushort, 16, 16)
++
++// Conversion between 8- and 16-bit
++VARIANTSET(uchar,  ushort, 8,  16)
++VARIANTSET(ushort, uchar,  16, 8)
+ 
+ }
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.h
++++ /dev/null
+@@ -1,28 +0,0 @@
+-/*
+- * This file is part of FFmpeg.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and associated documentation files (the "Software"),
+- * to deal in the Software without restriction, including without limitation
+- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+- * and/or sell copies of the Software, and to permit persons to whom the
+- * Software is furnished to do so, subject to the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+- * DEALINGS IN THE SOFTWARE.
+- */
+-
+-#ifndef AVFILTER_SCALE_CUDA_H
+-#define AVFILTER_SCALE_CUDA_H
+-
+-#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
+-
+-#endif
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda_bicubic.cu
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda_bicubic.cu
++++ /dev/null
+@@ -1,224 +0,0 @@
+-/*
+- * This file is part of FFmpeg.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and associated documentation files (the "Software"),
+- * to deal in the Software without restriction, including without limitation
+- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+- * and/or sell copies of the Software, and to permit persons to whom the
+- * Software is furnished to do so, subject to the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+- * DEALINGS IN THE SOFTWARE.
+- */
+-
+-#include "cuda/vector_helpers.cuh"
+-#include "vf_scale_cuda.h"
+-
+-typedef float4 (*coeffs_function_t)(float, float);
+-
+-__device__ inline float4 lanczos_coeffs(float x, float param)
+-{
+-    const float pi = 3.141592654f;
+-
+-    float4 res = make_float4(
+-        pi * (x + 1),
+-        pi * x,
+-        pi * (x - 1),
+-        pi * (x - 2));
+-
+-    res.x = res.x == 0.0f ? 1.0f :
+-        __sinf(res.x) * __sinf(res.x / 2.0f) / (res.x * res.x / 2.0f);
+-    res.y = res.y == 0.0f ? 1.0f :
+-        __sinf(res.y) * __sinf(res.y / 2.0f) / (res.y * res.y / 2.0f);
+-    res.z = res.z == 0.0f ? 1.0f :
+-        __sinf(res.z) * __sinf(res.z / 2.0f) / (res.z * res.z / 2.0f);
+-    res.w = res.w == 0.0f ? 1.0f :
+-        __sinf(res.w) * __sinf(res.w / 2.0f) / (res.w * res.w / 2.0f);
+-
+-    return res / (res.x + res.y + res.z + res.w);
+-}
+-
+-__device__ inline float4 bicubic_coeffs(float x, float param)
+-{
+-    const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param;
+-
+-    float4 res;
+-    res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A;
+-    res.y = ((A + 2) * x - (A + 3)) * x * x + 1;
+-    res.z = ((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1;
+-    res.w = 1.0f - res.x - res.y - res.z;
+-
+-    return res;
+-}
+-
+-__device__ inline void derived_fast_coeffs(float4 coeffs, float x, float *h0, float *h1, float *s)
+-{
+-    float g0 = coeffs.x + coeffs.y;
+-    float g1 = coeffs.z + coeffs.w;
+-
+-    *h0 = coeffs.y / g0 - 0.5f;
+-    *h1 = coeffs.w / g1 + 1.5f;
+-    *s  = g0 / (g0 + g1);
+-}
+-
+-template<typename V>
+-__device__ inline V apply_coeffs(float4 coeffs, V c0, V c1, V c2, V c3)
+-{
+-    V res = c0 * coeffs.x;
+-    res  += c1 * coeffs.y;
+-    res  += c2 * coeffs.z;
+-    res  += c3 * coeffs.w;
+-
+-    return res;
+-}
+-
+-template<typename T>
+-__device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function,
+-                                         cudaTextureObject_t src_tex,
+-                                         T *dst,
+-                                         int dst_width, int dst_height, int dst_pitch,
+-                                         int src_width, int src_height,
+-                                         int bit_depth, float param)
+-{
+-    int xo = blockIdx.x * blockDim.x + threadIdx.x;
+-    int yo = blockIdx.y * blockDim.y + threadIdx.y;
+-
+-    if (yo < dst_height && xo < dst_width)
+-    {
+-        float hscale = (float)src_width / (float)dst_width;
+-        float vscale = (float)src_height / (float)dst_height;
+-        float xi = (xo + 0.5f) * hscale - 0.5f;
+-        float yi = (yo + 0.5f) * vscale - 0.5f;
+-        float px = floor(xi);
+-        float py = floor(yi);
+-        float fx = xi - px;
+-        float fy = yi - py;
+-
+-        float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
+-
+-        float4 coeffsX = coeffs_function(fx, param);
+-        float4 coeffsY = coeffs_function(fy, param);
+-
+-#define PIX(x, y) tex2D<floatT>(src_tex, (x), (y))
+-
+-        dst[yo * dst_pitch + xo] = from_floatN<T, floatT>(
+-            apply_coeffs<floatT>(coeffsY,
+-                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py - 1), PIX(px, py - 1), PIX(px + 1, py - 1), PIX(px + 2, py - 1)),
+-                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py    ), PIX(px, py    ), PIX(px + 1, py    ), PIX(px + 2, py    )),
+-                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 1), PIX(px, py + 1), PIX(px + 1, py + 1), PIX(px + 2, py + 1)),
+-                apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 2), PIX(px, py + 2), PIX(px + 1, py + 2), PIX(px + 2, py + 2))
+-            ) * factor
+-        );
+-
+-#undef PIX
+-    }
+-}
+-
+-/* This does not yield correct results. Most likely because of low internal precision in tex2D linear interpolation */
+-template<typename T>
+-__device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function,
+-                                             cudaTextureObject_t src_tex,
+-                                             T *dst,
+-                                             int dst_width, int dst_height, int dst_pitch,
+-                                             int src_width, int src_height,
+-                                             int bit_depth, float param)
+-{
+-    int xo = blockIdx.x * blockDim.x + threadIdx.x;
+-    int yo = blockIdx.y * blockDim.y + threadIdx.y;
+-
+-    if (yo < dst_height && xo < dst_width)
+-    {
+-        float hscale = (float)src_width / (float)dst_width;
+-        float vscale = (float)src_height / (float)dst_height;
+-        float xi = (xo + 0.5f) * hscale - 0.5f;
+-        float yi = (yo + 0.5f) * vscale - 0.5f;
+-        float px = floor(xi);
+-        float py = floor(yi);
+-        float fx = xi - px;
+-        float fy = yi - py;
+-
+-        float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
+-
+-        float4 coeffsX = coeffs_function(fx, param);
+-        float4 coeffsY = coeffs_function(fy, param);
+-
+-        float h0x, h1x, sx;
+-        float h0y, h1y, sy;
+-        derived_fast_coeffs(coeffsX, fx, &h0x, &h1x, &sx);
+-        derived_fast_coeffs(coeffsY, fy, &h0y, &h1y, &sy);
+-
+-#define PIX(x, y) tex2D<floatT>(src_tex, (x), (y))
+-
+-        floatT pix[4] = {
+-            PIX(px + h0x, py + h0y),
+-            PIX(px + h1x, py + h0y),
+-            PIX(px + h0x, py + h1y),
+-            PIX(px + h1x, py + h1y)
+-        };
+-
+-#undef PIX
+-
+-        dst[yo * dst_pitch + xo] = from_floatN<T, floatT>(
+-            lerp_scalar(
+-                lerp_scalar(pix[3], pix[2], sx),
+-                lerp_scalar(pix[1], pix[0], sx),
+-                sy) * factor
+-        );
+-    }
+-}
+-
+-extern "C" {
+-
+-#define BICUBIC_KERNEL(T) \
+-    __global__ void Subsample_Bicubic_ ## T(cudaTextureObject_t src_tex,                  \
+-                                            T *dst,                                       \
+-                                            int dst_width, int dst_height, int dst_pitch, \
+-                                            int src_width, int src_height,                \
+-                                            int bit_depth, float param)                   \
+-    {                                                                                     \
+-        Subsample_Bicubic<T>(&bicubic_coeffs, src_tex, dst,                               \
+-                             dst_width, dst_height, dst_pitch,                            \
+-                             src_width, src_height,                                       \
+-                             bit_depth, param);                                           \
+-    }
+-
+-BICUBIC_KERNEL(uchar)
+-BICUBIC_KERNEL(uchar2)
+-BICUBIC_KERNEL(uchar4)
+-
+-BICUBIC_KERNEL(ushort)
+-BICUBIC_KERNEL(ushort2)
+-BICUBIC_KERNEL(ushort4)
+-
+-
+-#define LANCZOS_KERNEL(T) \
+-    __global__ void Subsample_Lanczos_ ## T(cudaTextureObject_t src_tex,                  \
+-                                            T *dst,                                       \
+-                                            int dst_width, int dst_height, int dst_pitch, \
+-                                            int src_width, int src_height,                \
+-                                            int bit_depth, float param)                   \
+-    {                                                                                     \
+-        Subsample_Bicubic<T>(&lanczos_coeffs, src_tex, dst,                               \
+-                             dst_width, dst_height, dst_pitch,                            \
+-                             src_width, src_height,                                       \
+-                             bit_depth, param);                                           \
+-    }
+-
+-LANCZOS_KERNEL(uchar)
+-LANCZOS_KERNEL(uchar2)
+-LANCZOS_KERNEL(uchar4)
+-
+-LANCZOS_KERNEL(ushort)
+-LANCZOS_KERNEL(ushort2)
+-LANCZOS_KERNEL(ushort4)
+-
+-}
diff --git a/debian/patches/series b/debian/patches/series
index c2e011753c2..2b13748bf23 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,2 @@
 0001-add-fixes-for-segement-muxer.patch
+0002-add-cuda-pixfmt-converter-impl.patch

From cd47bf1feb6235f6bc53288dc4121e04aeba436b Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 22:21:28 +0800
Subject: [PATCH 12/41] add cuda tonemap impl

---
 .../patches/0003-add-cuda-tonemap-impl.patch  | 1639 +++++++++++++++++
 debian/patches/series                         |    1 +
 2 files changed, 1640 insertions(+)
 create mode 100644 debian/patches/0003-add-cuda-tonemap-impl.patch

diff --git a/debian/patches/0003-add-cuda-tonemap-impl.patch b/debian/patches/0003-add-cuda-tonemap-impl.patch
new file mode 100644
index 00000000000..81b702a0257
--- /dev/null
+++ b/debian/patches/0003-add-cuda-tonemap-impl.patch
@@ -0,0 +1,1639 @@
+Index: jellyfin-ffmpeg/configure
+===================================================================
+--- jellyfin-ffmpeg.orig/configure
++++ jellyfin-ffmpeg/configure
+@@ -3058,6 +3058,8 @@ scale_cuda_filter_deps="ffnvcodec"
+ scale_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
+ thumbnail_cuda_filter_deps="ffnvcodec"
+ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
++tonemap_cuda_filter_deps="ffnvcodec const_nan"
++tonemap_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
+ transpose_npp_filter_deps="ffnvcodec libnpp"
+ overlay_cuda_filter_deps="ffnvcodec"
+ overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
+@@ -6251,7 +6253,7 @@ fi
+ if enabled cuda_nvcc; then
+     nvccflags="$nvccflags -ptx"
+ else
+-    nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -include ${source_link}/compat/cuda/cuda_runtime.h"
++    nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -std=c++14 -include ${source_link}/compat/cuda/cuda_runtime.h"
+     check_nvcc cuda_llvm
+ fi
+ 
+Index: jellyfin-ffmpeg/ffbuild/common.mak
+===================================================================
+--- jellyfin-ffmpeg.orig/ffbuild/common.mak
++++ jellyfin-ffmpeg/ffbuild/common.mak
+@@ -38,6 +38,7 @@ OBJCCFLAGS  = $(CPPFLAGS) $(CFLAGS) $(OB
+ ASFLAGS    := $(CPPFLAGS) $(ASFLAGS)
+ CXXFLAGS   := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS)
+ X86ASMFLAGS += $(IFLAGS:%=%/) -I$(<D)/ -Pconfig.asm
++NVCCFLAGS  += $(IFLAGS)
+ 
+ HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS)
+ LDFLAGS    := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS)
+Index: jellyfin-ffmpeg/libavfilter/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/Makefile
++++ jellyfin-ffmpeg/libavfilter/Makefile
+@@ -448,6 +448,8 @@ OBJS-$(CONFIG_TMEDIAN_FILTER)
+ OBJS-$(CONFIG_TMIDEQUALIZER_FILTER)          += vf_tmidequalizer.o
+ OBJS-$(CONFIG_TMIX_FILTER)                   += vf_mix.o framesync.o
+ OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o colorspace.o
++OBJS-$(CONFIG_TONEMAP_CUDA_FILTER)           += vf_tonemap_cuda.o cuda/tonemap.ptx.o \
++                                                cuda/host_util.o
+ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace.o opencl.o \
+                                                 opencl/tonemap.o opencl/colorspace_common.o
+ OBJS-$(CONFIG_TONEMAP_VAAPI_FILTER)          += vf_tonemap_vaapi.o vaapi_vpp.o
+Index: jellyfin-ffmpeg/libavfilter/allfilters.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c
++++ jellyfin-ffmpeg/libavfilter/allfilters.c
+@@ -429,6 +429,7 @@ extern AVFilter ff_vf_tmedian;
+ extern AVFilter ff_vf_tmidequalizer;
+ extern AVFilter ff_vf_tmix;
+ extern AVFilter ff_vf_tonemap;
++extern AVFilter ff_vf_tonemap_cuda;
+ extern AVFilter ff_vf_tonemap_opencl;
+ extern AVFilter ff_vf_tonemap_vaapi;
+ extern AVFilter ff_vf_tpad;
+Index: jellyfin-ffmpeg/libavfilter/cuda/colorspace_common.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/colorspace_common.h
+@@ -0,0 +1,219 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVFILTER_CUDA_COLORSPACE_COMMON_H
++#define AVFILTER_CUDA_COLORSPACE_COMMON_H
++
++#include "util.h"
++#include "libavutil/pixfmt.h"
++
++#define ST2084_MAX_LUMINANCE 10000.0f
++
++#define ST2084_M1 0.1593017578125f
++#define ST2084_M2 78.84375f
++#define ST2084_C1 0.8359375f
++#define ST2084_C2 18.8515625f
++#define ST2084_C3 18.6875f
++
++#define ARIB_B67_A 0.17883277f
++#define ARIB_B67_B 0.28466892f
++#define ARIB_B67_C 0.55991073f
++
++#define FLOAT_EPS 1.175494351e-38f
++
++extern __constant__ const float ref_white;
++extern __constant__ const float3 luma_src, luma_dst;
++extern __constant__ const enum AVColorTransferCharacteristic trc_src, trc_dst;
++extern __constant__ const enum AVColorRange range_src, range_dst;
++extern __constant__ const enum AVChromaLocation chroma_loc_src, chroma_loc_dst;
++extern __constant__ const bool rgb2rgb_passthrough;
++extern __constant__ const float rgb2rgb_matrix[9];
++extern __constant__ const float yuv_matrix[9], rgb_matrix[9];
++
++static __inline__ __device__ float get_luma_dst(float3 c, const float3& luma_dst) {
++    return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z;
++}
++
++static __inline__ __device__ float get_luma_src(float3 c, const float3& luma_src) {
++    return luma_src.x * c.x + luma_src.y * c.y + luma_src.z * c.z;
++}
++
++static __inline__ __device__ float3 get_chroma_sample(float3 a, float3 b, float3 c, float3 d) {
++    switch (chroma_loc_dst) {
++    case AVCHROMA_LOC_LEFT:
++        return ((a) + (c)) * 0.5f;
++    case AVCHROMA_LOC_CENTER:
++    case AVCHROMA_LOC_UNSPECIFIED:
++    default:
++        return ((a) + (b) + (c) + (d)) * 0.25f;
++    case AVCHROMA_LOC_TOPLEFT:
++        return a;
++    case AVCHROMA_LOC_TOP:
++        return ((a) + (b)) * 0.5f;
++    case AVCHROMA_LOC_BOTTOMLEFT:
++        return c;
++    case AVCHROMA_LOC_BOTTOM:
++        return ((c) + (d)) * 0.5f;
++    }
++}
++
++// linearizer for PQ/ST2084
++static __inline__ __device__ float eotf_st2084(float x) {
++    x = max(x, 0.0f);
++    float xpow = __powf(x, 1.0f / ST2084_M2);
++    float num = max(xpow - ST2084_C1, 0.0f);
++    float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS);
++    x = __powf(num / den, 1.0f / ST2084_M1);
++    return x * ST2084_MAX_LUMINANCE / ref_white;
++}
++
++// delinearizer for PQ/ST2084
++static __inline__ __device__ float inverse_eotf_st2084(float x) {
++    x = max(x, 0.0f);
++    x *= ref_white / ST2084_MAX_LUMINANCE;
++    float xpow = __powf(x, ST2084_M1);
++#if 0
++    // Original formulation from SMPTE ST 2084:2014 publication.
++    float num = ST2084_C1 + ST2084_C2 * xpow;
++    float den = 1.0f + ST2084_C3 * xpow;
++    return __powf(num / den, ST2084_M2);
++#else
++    // More stable arrangement that avoids some cancellation error.
++    float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow;
++    float den = 1.0f + ST2084_C3 * xpow;
++    return __powf(1.0f + num / den, ST2084_M2);
++#endif
++}
++
++static __inline__ __device__ float ootf_1_2(float x) {
++    return x > 0.0f ? __powf(x, 1.2f) : x;
++}
++
++static __inline__ __device__ float inverse_ootf_1_2(float x) {
++    return x > 0.0f ? __powf(x, 1.0f / 1.2f) : x;
++}
++
++static __inline__ __device__ float oetf_arib_b67(float x) {
++    x = max(x, 0.0f);
++    return x <= (1.0f / 12.0f)
++           ? __sqrtf(3.0f * x)
++           : (ARIB_B67_A * __logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C);
++}
++
++static __inline__ __device__ float inverse_oetf_arib_b67(float x) {
++    x = max(x, 0.0f);
++    return x <= 0.5f
++           ? (x * x) * (1.0f / 3.0f)
++           : (__expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f);
++}
++
++// linearizer for HLG/ARIB-B67
++static __inline__ __device__ float eotf_arib_b67(float x) {
++    return ootf_1_2(inverse_oetf_arib_b67(x));
++}
++
++// delinearizer for HLG/ARIB-B67
++static __inline__ __device__ float inverse_eotf_arib_b67(float x) {
++    return oetf_arib_b67(inverse_ootf_1_2(x));
++}
++
++// delinearizer for BT709, BT2020-10
++static __inline__ __device__ float inverse_eotf_bt1886(float x) {
++    return x > 0.0f ? __powf(x, 1.0f / 2.4f) : 0.0f;
++}
++
++static __inline__ __device__ float linearize(float x)
++{
++    if (trc_src == AVCOL_TRC_SMPTE2084)
++        return eotf_st2084(x);
++    else if (trc_src == AVCOL_TRC_ARIB_STD_B67)
++        return eotf_arib_b67(x);
++    else
++        return x;
++}
++
++static __inline__ __device__ float delinearize(float x)
++{
++    if (trc_dst == AVCOL_TRC_BT709 || trc_dst == AVCOL_TRC_BT2020_10)
++        return inverse_eotf_bt1886(x);
++    else
++        return x;
++}
++
++static __inline__ __device__ float3 yuv2rgb(float y, float u, float v) {
++    if (range_src == AVCOL_RANGE_JPEG) {
++        u -= 0.5f; v -= 0.5f;
++    } else {
++        y = (y * 255.0f -  16.0f) / 219.0f;
++        u = (u * 255.0f - 128.0f) / 224.0f;
++        v = (v * 255.0f - 128.0f) / 224.0f;
++    }
++    float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2];
++    float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5];
++    float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8];
++    return make_float3(r, g, b);
++}
++
++static __inline__ __device__ float3 yuv2lrgb(float3 yuv) {
++    float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z);
++    return make_float3(linearize(rgb.x),
++                       linearize(rgb.y),
++                       linearize(rgb.z));
++}
++
++static __inline__ __device__ float3 rgb2yuv(float r, float g, float b) {
++    float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
++    float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5];
++    float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8];
++    if (range_dst == AVCOL_RANGE_JPEG) {
++        u += 0.5f; v += 0.5f;
++    } else {
++        y = (219.0f * y + 16.0f) / 255.0f;
++        u = (224.0f * u + 128.0f) / 255.0f;
++        v = (224.0f * v + 128.0f) / 255.0f;
++    }
++    return make_float3(y, u, v);
++}
++
++static __inline__ __device__ float rgb2y(float r, float g, float b) {
++    float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
++    if (range_dst != AVCOL_RANGE_JPEG)
++        y = (219.0f * y + 16.0f) / 255.0f;
++    return y;
++}
++
++static __inline__ __device__ float3 lrgb2yuv(float3 c) {
++    float r = delinearize(c.x);
++    float g = delinearize(c.y);
++    float b = delinearize(c.z);
++    return rgb2yuv(r, g, b);
++}
++
++static __inline__ __device__ float3 lrgb2lrgb(float3 c) {
++    if (rgb2rgb_passthrough) {
++        return c;
++    } else {
++        float r = c.x, g = c.y, b = c.z;
++        float rr = rgb2rgb_matrix[0] * r + rgb2rgb_matrix[1] * g + rgb2rgb_matrix[2] * b;
++        float gg = rgb2rgb_matrix[3] * r + rgb2rgb_matrix[4] * g + rgb2rgb_matrix[5] * b;
++        float bb = rgb2rgb_matrix[6] * r + rgb2rgb_matrix[7] * g + rgb2rgb_matrix[8] * b;
++        return make_float3(rr, gg, bb);
++    }
++}
++
++#endif /* AVFILTER_CUDA_COLORSPACE_COMMON_H */
+Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.c
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/host_util.c
+@@ -0,0 +1,35 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavfilter/colorspace.h"
++#include "host_util.h"
++
++int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src)
++{
++    int i = 0;
++    for (i = 0; i < 4; i++) {
++        dst->data[i] = src->data[i];
++        dst->linesize[i] = src->linesize[i];
++    }
++
++    dst->width  = src->width;
++    dst->height = src->height;
++
++    return 0;
++}
++
+Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/host_util.h
+@@ -0,0 +1,29 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVFILTER_CUDA_HOST_UTIL_H
++#define AVFILTER_CUDA_HOST_UTIL_H
++
++#include "libavutil/frame.h"
++
++#include "shared.h"
++
++int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src);
++
++#endif /* AVFILTER_CUDA_HOST_UTIL_H */
++
+Index: jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h
+@@ -0,0 +1,209 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVFILTER_CUDA_PIXFMT_H
++#define AVFILTER_CUDA_PIXFMT_H
++
++#include "shared.h"
++
++extern __constant__ const enum AVPixelFormat fmt_src, fmt_dst;
++extern __constant__ const int depth_src, depth_dst;
++
++// Single-sample read function
++template<class T, int p>
++static __inline__ __device__ T read_sample(const FFCUDAFrame& frame, int x, int y)
++{
++    T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p]));
++    return ptr[x];
++}
++
++// Per-format read functions
++static __inline__ __device__ ushort3 read_p016(const FFCUDAFrame& frame, int x, int y)
++{
++    return make_ushort3(read_sample<unsigned short, 0>(frame, x,          y),
++                        read_sample<unsigned short, 1>(frame, (x & ~1),     y / 2),
++                        read_sample<unsigned short, 1>(frame, (x & ~1) + 1, y / 2));
++}
++
++static __inline__ __device__ ushort3 read_p010(const FFCUDAFrame& frame, int x, int y)
++{
++    ushort3 val = read_p016(frame, x, y);
++    return make_ushort3(val.x >> 6,
++                        val.y >> 6,
++                        val.z >> 6);
++}
++
++static __inline__ __device__ ushort3 read_yuv420p16(const FFCUDAFrame& frame, int x, int y)
++{
++    return make_ushort3(read_sample<unsigned short, 0>(frame, x,      y),
++                        read_sample<unsigned short, 1>(frame, x / 2, y / 2),
++                        read_sample<unsigned short, 2>(frame, x / 2, y / 2));
++}
++
++static __inline__ __device__ ushort3 read_yuv420p10(const FFCUDAFrame& frame, int x, int y)
++{
++    ushort3 val = read_yuv420p16(frame, x, y);
++    return make_ushort3(val.x >> 6,
++                        val.y >> 6,
++                        val.z >> 6);
++}
++
++// Generic read functions
++static __inline__ __device__ ushort3 read_px(const FFCUDAFrame& frame, int x, int y)
++{
++    if (fmt_src == AV_PIX_FMT_P016)
++        return read_p016(frame, x, y);
++    else if (fmt_src == AV_PIX_FMT_P010)
++        return read_p010(frame, x, y);
++    else
++        return make_ushort3(0, 0, 0);
++}
++
++static __inline__ __device__ float sample_to_float(unsigned short i)
++{
++    return (float)i / ((1 << depth_src) - 1);
++}
++
++static __inline__ __device__ float3 pixel_to_float3(ushort3 flt)
++{
++    return make_float3(sample_to_float(flt.x),
++                       sample_to_float(flt.y),
++                       sample_to_float(flt.z));
++}
++
++static __inline__ __device__ float3 read_px_flt(const FFCUDAFrame& frame, int x, int y)
++{
++    return pixel_to_float3(read_px(frame, x, y));
++}
++
++// Single-sample write function
++template<int p, class T>
++static __inline__ __device__ void write_sample(const FFCUDAFrame& frame, int x, int y, T sample)
++{
++    T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p]));
++    ptr[x] = sample;
++}
++
++// Per-format write functions
++static __inline__ __device__ void write_nv12_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
++{
++    write_sample<0>(frame, x,     y,     (unsigned char)a.x);
++    write_sample<0>(frame, x + 1, y,     (unsigned char)b.x);
++    write_sample<0>(frame, x,     y + 1, (unsigned char)c.x);
++    write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x);
++
++    write_sample<1>(frame, (x & ~1),     y / 2, (unsigned char)chroma.y);
++    write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned char)chroma.z);
++}
++
++static __inline__ __device__ void write_yuv420p_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
++{
++    write_sample<0>(frame, x,     y,     (unsigned char)a.x);
++    write_sample<0>(frame, x + 1, y,     (unsigned char)b.x);
++    write_sample<0>(frame, x,     y + 1, (unsigned char)c.x);
++    write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x);
++
++    write_sample<1>(frame, x / 2, y / 2, (unsigned char)chroma.y);
++    write_sample<2>(frame, x / 2, y / 2, (unsigned char)chroma.z);
++}
++
++static __inline__ __device__ void write_p016_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
++{
++    write_sample<0>(frame, x,     y,     (unsigned short)a.x);
++    write_sample<0>(frame, x + 1, y,     (unsigned short)b.x);
++    write_sample<0>(frame, x,     y + 1, (unsigned short)c.x);
++    write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x);
++
++    write_sample<1>(frame, (x & ~1),     y / 2, (unsigned short)chroma.y);
++    write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)chroma.z);
++}
++
++static __inline__ __device__ void write_p010_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
++{
++    write_sample<0>(frame, x,     y,     (unsigned short)(a.x << 6));
++    write_sample<0>(frame, x + 1, y,     (unsigned short)(b.x << 6));
++    write_sample<0>(frame, x,     y + 1, (unsigned short)(c.x << 6));
++    write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6));
++
++    write_sample<1>(frame, (x & ~1),     y / 2, (unsigned short)(chroma.y << 6));
++    write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)(chroma.z << 6));
++}
++
++static __inline__ __device__ void write_yuv420p16_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
++{
++    write_sample<0>(frame, x,     y,     (unsigned short)a.x);
++    write_sample<0>(frame, x + 1, y,     (unsigned short)b.x);
++    write_sample<0>(frame, x,     y + 1, (unsigned short)c.x);
++    write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x);
++
++    write_sample<1>(frame, x / 2, y / 2, (unsigned short)chroma.y);
++    write_sample<2>(frame, x / 2, y / 2, (unsigned short)chroma.z);
++}
++
++static __inline__ __device__ void write_yuv420p10_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
++{
++    write_sample<0>(frame, x,     y,     (unsigned short)(a.x << 6));
++    write_sample<0>(frame, x + 1, y,     (unsigned short)(b.x << 6));
++    write_sample<0>(frame, x,     y + 1, (unsigned short)(c.x << 6));
++    write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6));
++
++    write_sample<1>(frame, x / 2, y / 2, (unsigned short)(chroma.y << 6));
++    write_sample<2>(frame, x / 2, y / 2, (unsigned short)(chroma.z << 6));
++}
++
++// Generic write functions
++static __inline__ __device__ void write_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma)
++{
++    if (fmt_dst == AV_PIX_FMT_YUV420P)
++        write_yuv420p_2x2(frame, x, y, a, b, c, d, chroma);
++    else if (fmt_dst == AV_PIX_FMT_NV12)
++        write_nv12_2x2(frame, x, y, a, b, c, d, chroma);
++    else if (fmt_dst == AV_PIX_FMT_P016)
++        write_p016_2x2(frame, x, y, a, b, c, d, chroma);
++    else if (fmt_dst == AV_PIX_FMT_P010)
++        write_p010_2x2(frame, x, y, a, b, c, d, chroma);
++}
++
++static __inline__ __device__ unsigned short sample_to_ushort(float flt)
++{
++    return (unsigned short)(flt * ((1 << depth_dst) - 1));
++}
++
++static __inline__ __device__ ushort3 pixel_to_ushort3(float3 flt)
++{
++    return make_ushort3(sample_to_ushort(flt.x),
++                        sample_to_ushort(flt.y),
++                        sample_to_ushort(flt.z));
++}
++
++static __inline__ __device__ void write_2x2_flt(const FFCUDAFrame& frame, int x, int y, float3 a, float3 b, float3 c, float3 d)
++{
++    float3 chroma = get_chroma_sample(a, b, c, d);
++
++    ushort3 ia = pixel_to_ushort3(a);
++    ushort3 ib = pixel_to_ushort3(b);
++    ushort3 ic = pixel_to_ushort3(c);
++    ushort3 id = pixel_to_ushort3(d);
++
++    ushort3 ichroma = pixel_to_ushort3(chroma);
++
++    write_2x2(frame, x, y, ia, ib, ic, id, ichroma);
++}
++
++#endif /* AVFILTER_CUDA_PIXFMT_H */
++
+Index: jellyfin-ffmpeg/libavfilter/cuda/shared.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/shared.h
+@@ -0,0 +1,32 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVFILTER_CUDA_SHARED_H
++#define AVFILTER_CUDA_SHARED_H
++
++typedef struct FFCUDAFrame {
++    unsigned char *data[4];
++    int linesize[4];
++
++    int width, height;
++
++    float peak;
++} FFCUDAFrame;
++
++#endif /* AVFILTER_CUDA_SHARED_H */
++
+Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu
+@@ -0,0 +1,201 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "colorspace_common.h"
++#include "pixfmt.h"
++#include "tonemap.h"
++#include "util.h"
++
++extern __constant__ const enum TonemapAlgorithm tonemap_func;
++extern __constant__ const float tone_param;
++extern __constant__ const float desat_param;
++
++#define mix(x, y, a) ((x) + ((y) - (x)) * (a))
++
++static __inline__ __device__
++float hable_f(float in) {
++    float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
++    return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
++}
++
++static __inline__ __device__
++float direct(float s, float peak) {
++    return s;
++}
++
++static __inline__ __device__
++float linear(float s, float peak) {
++    return s * tone_param / peak;
++}
++
++static __inline__ __device__
++float gamma(float s, float peak) {
++    float p = s > 0.05f ? s / peak : 0.05f / peak;
++    float v = __powf(p, 1.0f / tone_param);
++    return s > 0.05f ? v : (s * v / 0.05f);
++}
++
++static __inline__ __device__
++float clip(float s, float peak) {
++    return clamp(s * tone_param, 0.0f, 1.0f);
++}
++
++static __inline__ __device__
++float reinhard(float s, float peak) {
++    return s / (s + tone_param) * (peak + tone_param) / peak;
++}
++
++static __inline__ __device__
++float hable(float s, float peak) {
++    return hable_f(s) / hable_f(peak);
++}
++
++static __inline__ __device__
++float mobius(float s, float peak) {
++    float j = tone_param;
++    float a, b;
++
++    if (s <= j)
++        return s;
++
++    a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
++    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS);
++
++    return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
++}
++
++static __inline__ __device__
++float bt2390(float s, float peak, float dst_peak) {
++    float peak_pq = inverse_eotf_st2084(peak);
++    float scale = peak_pq > 0.0f ? (1.0f / peak_pq) : 1.0f;
++
++    float s_pq = inverse_eotf_st2084(s) * scale;
++    float max_lum = inverse_eotf_st2084(dst_peak) * scale;
++
++    float ks = 1.5f * max_lum - 0.5f;
++    float tb = (s_pq - ks) / (1.0f - ks);
++    float tb2 = tb * tb;
++    float tb3 = tb2 * tb;
++    float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks +
++               (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) +
++               (-2.0f * tb3 + 3.0f * tb2) * max_lum;
++    float sig = mix(pb, s_pq, s_pq < ks);
++
++    return eotf_st2084(sig * peak_pq);
++}
++
++static __inline__ __device__
++float map(float s, float peak, float dst_peak)
++{
++    switch (tonemap_func) {
++    case TONEMAP_NONE:
++    default:
++        return direct(s, peak);
++    case TONEMAP_LINEAR:
++        return linear(s, peak);
++    case TONEMAP_GAMMA:
++        return gamma(s, peak);
++    case TONEMAP_CLIP:
++        return clip(s, peak);
++    case TONEMAP_REINHARD:
++        return reinhard(s, peak);
++    case TONEMAP_HABLE:
++        return hable(s, peak);
++    case TONEMAP_MOBIUS:
++        return mobius(s, peak);
++    case TONEMAP_BT2390:
++        return bt2390(s, peak, dst_peak);
++    }
++}
++
++static __inline__ __device__
++float3 map_one_pixel_rgb(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) {
++    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS);
++    float peak = src.peak;
++    float dst_peak = dst.peak;
++
++    // Rescale the variables in order to bring it into a representation where
++    // 1.0 represents the dst_peak. This is because all of the tone mapping
++    // algorithms are defined in such a way that they map to the range [0.0, 1.0].
++    if (dst.peak > 1.0f) {
++        sig *= 1.0f / dst.peak;
++        peak *= 1.0f / dst.peak;
++    }
++
++    float sig_old = sig;
++
++    // Desaturate the color using a coefficient dependent on the signal level
++    if (desat_param > 0.0f) {
++        float luma = get_luma_dst(rgb, luma_dst);
++        float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS);
++        coeff = __powf(coeff, 10.0f / desat_param);
++        rgb = mix(rgb, make_float3(luma, luma, luma), make_float3(coeff, coeff, coeff));
++    }
++
++    sig = map(sig, peak, dst_peak);
++
++    sig = min(sig, 1.0f);
++    rgb = rgb * (sig / sig_old);
++    return rgb;
++}
++
++// Map from source space YUV to destination space RGB
++static __inline__ __device__
++float3 map_to_dst_space_from_yuv(float3 yuv) {
++    float3 c = yuv2lrgb(yuv);
++    c = lrgb2lrgb(c);
++    return c;
++}
++
++extern "C" {
++
++__global__ void tonemap(FFCUDAFrame src, FFCUDAFrame dst)
++{
++    int xi = blockIdx.x * blockDim.x + threadIdx.x;
++    int yi = blockIdx.y * blockDim.y + threadIdx.y;
++    // each work item process four pixels
++    int x = 2 * xi;
++    int y = 2 * yi;
++
++    if (y + 1 < src.height && x + 1 < src.width)
++    {
++        float3 yuv0 = read_px_flt(src, x,     y);
++        float3 yuv1 = read_px_flt(src, x + 1, y);
++        float3 yuv2 = read_px_flt(src, x,     y + 1);
++        float3 yuv3 = read_px_flt(src, x + 1, y + 1);
++
++        float3 c0 = map_to_dst_space_from_yuv(yuv0);
++        float3 c1 = map_to_dst_space_from_yuv(yuv1);
++        float3 c2 = map_to_dst_space_from_yuv(yuv2);
++        float3 c3 = map_to_dst_space_from_yuv(yuv3);
++
++        c0 = map_one_pixel_rgb(c0, src, dst);
++        c1 = map_one_pixel_rgb(c1, src, dst);
++        c2 = map_one_pixel_rgb(c2, src, dst);
++        c3 = map_one_pixel_rgb(c3, src, dst);
++
++        yuv0 = lrgb2yuv(c0);
++        yuv1 = lrgb2yuv(c1);
++        yuv2 = lrgb2yuv(c2);
++        yuv3 = lrgb2yuv(c3);
++
++        write_2x2_flt(dst, x, y, yuv0, yuv1, yuv2, yuv3);
++    }
++}
++
++}
+Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.h
+@@ -0,0 +1,35 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVFILTER_CUDA_TONEMAP_H
++#define AVFILTER_CUDA_TONEMAP_H
++
++enum TonemapAlgorithm {
++    TONEMAP_NONE,
++    TONEMAP_LINEAR,
++    TONEMAP_GAMMA,
++    TONEMAP_CLIP,
++    TONEMAP_REINHARD,
++    TONEMAP_HABLE,
++    TONEMAP_MOBIUS,
++    TONEMAP_BT2390,
++    TONEMAP_MAX,
++};
++
++#endif /* AVFILTER_CUDA_TONEMAP_H */
++
+Index: jellyfin-ffmpeg/libavfilter/cuda/util.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/cuda/util.h
+@@ -0,0 +1,55 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVFILTER_CUDA_UTIL_H
++#define AVFILTER_CUDA_UTIL_H
++
++static inline __device__ float3 operator+(const float3 &a, const float3 &b) {
++    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
++}
++
++static inline __device__ float3 operator+(const float3 &a, float b) {
++    return make_float3(a.x + b, a.y + b, a.z + b);
++}
++
++static inline __device__ float3 operator-(const float3 &a, const float3 &b) {
++    return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
++}
++
++static inline __device__ float3 operator-(const float3 &a, float b) {
++    return make_float3(a.x - b, a.y - b, a.z - b);
++}
++
++static inline __device__ float3 operator*(const float3 &a, const float3 &b) {
++    return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
++}
++
++static inline __device__ float3 operator*(const float3 &a, float b) {
++    return make_float3(a.x * b, a.y * b, a.z * b);
++}
++
++static inline __device__ float3 operator/(const float3 &a, const float3 &b) {
++    return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
++}
++
++static inline __device__ float3 operator/(const float3 &a, float b) {
++    return make_float3(a.x / b, a.y / b, a.z / b);
++}
++
++#endif /* AVFILTER_CUDA_UTIL_H */
++
+Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c
+@@ -0,0 +1,720 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include <float.h>
++#include <stdio.h>
++#include <string.h>
++
++#include "libavutil/avassert.h"
++#include "libavutil/avstring.h"
++#include "libavutil/bprint.h"
++#include "libavutil/common.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_cuda_internal.h"
++#include "libavutil/cuda_check.h"
++#include "libavutil/internal.h"
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++
++#include "avfilter.h"
++#include "colorspace.h"
++#include "cuda/host_util.h"
++#include "cuda/shared.h"
++#include "cuda/tonemap.h"
++#include "formats.h"
++#include "internal.h"
++#include "scale_eval.h"
++#include "video.h"
++
++static const enum AVPixelFormat supported_formats[] = {
++    AV_PIX_FMT_YUV420P,
++    AV_PIX_FMT_NV12,
++    AV_PIX_FMT_P010,
++    AV_PIX_FMT_P016
++};
++
++#define REF_WHITE_BT2390 203.0f
++#define REF_WHITE_DEFAULT 100.0f
++
++#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
++#define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
++#define NUM_BUFFERS 2
++#define BLOCKX 32
++#define BLOCKY 16
++
++#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
++
++typedef struct TonemapCUDAContext {
++    const AVClass *class;
++
++    AVCUDADeviceContext *hwctx;
++
++    enum AVPixelFormat in_fmt, out_fmt;
++
++    enum AVColorTransferCharacteristic trc, in_trc, out_trc;
++    enum AVColorSpace spc, in_spc, out_spc;
++    enum AVColorPrimaries pri, in_pri, out_pri;
++    enum AVColorRange range, in_range, out_range;
++    enum AVChromaLocation in_chroma_loc, out_chroma_loc;
++
++    AVBufferRef *frames_ctx;
++    AVFrame     *frame;
++
++    AVFrame *tmp_frame;
++
++    /**
++     * Output sw format. AV_PIX_FMT_NONE for no conversion.
++     */
++    enum AVPixelFormat format;
++    char *format_str;
++
++    CUcontext   cu_ctx;
++    CUmodule    cu_module;
++
++    CUfunction  cu_func;
++
++    CUdeviceptr srcBuffer;
++    CUdeviceptr dstBuffer;
++
++    enum TonemapAlgorithm tonemap;
++    double ref_white;
++    double param;
++    double desat_param;
++    double peak;
++    double dst_peak;
++    double scene_threshold;
++
++    const AVPixFmtDescriptor *in_desc, *out_desc;
++} TonemapCUDAContext;
++
++static av_cold int init(AVFilterContext *ctx)
++{
++    TonemapCUDAContext *s = ctx->priv;
++
++    if (!strcmp(s->format_str, "same")) {
++        s->format = AV_PIX_FMT_NONE;
++    } else {
++        s->format = av_get_pix_fmt(s->format_str);
++        if (s->format == AV_PIX_FMT_NONE) {
++            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
++            return AVERROR(EINVAL);
++        }
++    }
++
++    s->frame = av_frame_alloc();
++    if (!s->frame)
++        return AVERROR(ENOMEM);
++
++    s->tmp_frame = av_frame_alloc();
++    if (!s->tmp_frame)
++        return AVERROR(ENOMEM);
++
++    return 0;
++}
++
++static av_cold void uninit(AVFilterContext *ctx)
++{
++    TonemapCUDAContext *s = ctx->priv;
++
++    if (s->hwctx) {
++        CudaFunctions *cu = s->hwctx->internal->cuda_dl;
++        CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
++
++        CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
++
++        if (s->cu_module) {
++            CHECK_CU(cu->cuModuleUnload(s->cu_module));
++            s->cu_func = NULL;
++            s->cu_module = NULL;
++        }
++
++        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
++    }
++
++    av_frame_free(&s->frame);
++    av_buffer_unref(&s->frames_ctx);
++    av_frame_free(&s->tmp_frame);
++}
++
++static int query_formats(AVFilterContext *ctx)
++{
++    static const enum AVPixelFormat pixel_formats[] = {
++        AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
++    };
++    AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
++
++    return ff_set_common_formats(ctx, pix_fmts);
++}
++
++static av_cold int init_stage(TonemapCUDAContext *s, AVBufferRef *device_ctx,
++                              AVFilterLink *outlink)
++{
++    AVBufferRef *out_ref = NULL;
++    AVHWFramesContext *out_ctx;
++    int ret;
++
++    out_ref = av_hwframe_ctx_alloc(device_ctx);
++    if (!out_ref)
++        return AVERROR(ENOMEM);
++    out_ctx = (AVHWFramesContext*)out_ref->data;
++
++    out_ctx->format    = AV_PIX_FMT_CUDA;
++    out_ctx->sw_format = s->out_fmt;
++    out_ctx->width     = FFALIGN(outlink->w, 32);
++    out_ctx->height    = FFALIGN(outlink->h, 32);
++
++    ret = av_hwframe_ctx_init(out_ref);
++    if (ret < 0)
++        goto fail;
++
++    av_frame_unref(s->frame);
++    ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
++    if (ret < 0)
++        goto fail;
++
++    s->frame->width  = outlink->w;
++    s->frame->height = outlink->h;
++
++    av_buffer_unref(&s->frames_ctx);
++    s->frames_ctx = out_ref;
++
++    return 0;
++fail:
++    av_buffer_unref(&out_ref);
++    return ret;
++}
++
++static int format_is_supported(enum AVPixelFormat fmt)
++{
++    int i;
++
++    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
++        if (supported_formats[i] == fmt)
++            return 1;
++    return 0;
++}
++
++static av_cold int init_processing_chain(AVFilterContext *ctx, AVFilterLink *outlink)
++{
++    TonemapCUDAContext *s = ctx->priv;
++
++    AVHWFramesContext *in_frames_ctx;
++
++    enum AVPixelFormat in_format;
++    enum AVPixelFormat out_format;
++    const AVPixFmtDescriptor *in_desc;
++    const AVPixFmtDescriptor *out_desc;
++    int ret;
++
++    /* check that we have a hw context */
++    if (!ctx->inputs[0]->hw_frames_ctx) {
++        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
++        return AVERROR(EINVAL);
++    }
++    in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
++    in_format     = in_frames_ctx->sw_format;
++    out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
++    in_desc       = av_pix_fmt_desc_get(in_format);
++    out_desc      = av_pix_fmt_desc_get(out_format);
++
++    if (!format_is_supported(in_format)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
++               av_get_pix_fmt_name(in_format));
++        return AVERROR(ENOSYS);
++    }
++    if (!format_is_supported(out_format)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
++               av_get_pix_fmt_name(out_format));
++        return AVERROR(ENOSYS);
++    }
++    if (!(in_desc->comp[0].depth == 10 ||
++        in_desc->comp[0].depth == 16)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n",
++               in_desc->comp[0].depth);
++        return AVERROR(ENOSYS);
++    }
++
++    s->in_fmt = in_format;
++    s->out_fmt = out_format;
++    s->in_desc  = in_desc;
++    s->out_desc = out_desc;
++
++    ret = init_stage(s, in_frames_ctx->device_ref, outlink);
++    if (ret < 0)
++        return ret;
++
++    ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
++    if (!ctx->outputs[0]->hw_frames_ctx)
++        return AVERROR(ENOMEM);
++
++    return 0;
++}
++
++static const struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = {
++    [AVCOL_PRI_BT709]  = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
++    [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
++};
++
++static const struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = {
++    [AVCOL_PRI_BT709]  = { 0.3127, 0.3290 },
++    [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 },
++};
++
++static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
++                              double rgb2rgb[3][3]) {
++    double rgb2xyz[3][3], xyz2rgb[3][3];
++
++    ff_fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz);
++    ff_matrix_invert_3x3(rgb2xyz, xyz2rgb);
++    ff_fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz);
++    ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb);
++
++    return 0;
++}
++
++static av_cold int compile(AVFilterLink *inlink)
++{
++    int ret = 0;
++    AVFilterContext  *ctx = inlink->dst;
++    TonemapCUDAContext *s = ctx->priv;
++    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
++    CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
++    AVBPrint constants;
++    CUlinkState link_state;
++    void *cubin;
++    size_t cubin_size;
++    double rgb_matrix[3][3], yuv_matrix[3][3], rgb2rgb_matrix[3][3];
++    const struct LumaCoefficients *in_coeffs, *out_coeffs;
++    enum AVColorTransferCharacteristic in_trc = s->in_trc, out_trc = s->out_trc;
++    enum AVColorSpace in_spc = s->in_spc, out_spc = s->out_spc;
++    enum AVColorPrimaries in_pri = s->in_pri, out_pri = s->out_pri;
++    enum AVColorRange in_range = s->in_range, out_range = s->out_range;
++    char info_log[4096], error_log[4096];
++    CUjit_option options[] = {CU_JIT_INFO_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
++    void *option_values[]  = {&info_log,              &error_log,              (void*)(intptr_t)sizeof(info_log), (void*)(intptr_t)sizeof(error_log)};
++
++    extern char tonemap_ptx[];
++
++    switch(s->tonemap) {
++    case TONEMAP_GAMMA:
++        if (isnan(s->param))
++            s->param = 1.8f;
++        break;
++    case TONEMAP_REINHARD:
++        if (!isnan(s->param))
++            s->param = (1.0f - s->param) / s->param;
++        break;
++    case TONEMAP_MOBIUS:
++        if (isnan(s->param))
++            s->param = 0.3f;
++        break;
++    }
++
++    if (isnan(s->param))
++        s->param = 1.0f;
++
++    s->ref_white = s->tonemap == TONEMAP_BT2390 ? REF_WHITE_BT2390
++                                                : REF_WHITE_DEFAULT;
++
++    s->dst_peak = 1.0f;
++
++    if (in_trc == AVCOL_TRC_UNSPECIFIED)
++        in_trc = AVCOL_TRC_SMPTE2084;
++    if (out_trc == AVCOL_TRC_UNSPECIFIED)
++        out_trc = AVCOL_TRC_BT709;
++
++    if (in_spc == AVCOL_SPC_UNSPECIFIED)
++        in_spc = AVCOL_SPC_BT2020_NCL;
++    if (out_spc == AVCOL_SPC_UNSPECIFIED)
++        out_spc = AVCOL_SPC_BT709;
++
++    if (in_pri == AVCOL_PRI_UNSPECIFIED)
++        in_pri = AVCOL_PRI_BT2020;
++    if (out_pri == AVCOL_PRI_UNSPECIFIED)
++        out_pri = AVCOL_PRI_BT709;
++
++    if (in_range == AVCOL_RANGE_UNSPECIFIED)
++        in_range = AVCOL_RANGE_MPEG;
++    if (out_range == AVCOL_RANGE_UNSPECIFIED)
++        out_range = AVCOL_RANGE_MPEG;
++
++    av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n",
++           av_color_transfer_name(in_trc),
++           av_color_transfer_name(out_trc));
++    av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n",
++           av_color_space_name(in_spc),
++           av_color_space_name(out_spc));
++    av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n",
++           av_color_primaries_name(in_pri),
++           av_color_primaries_name(out_pri));
++    av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n",
++           av_color_range_name(in_range),
++           av_color_range_name(out_range));
++
++    if (!(in_coeffs = ff_get_luma_coefficients(in_spc)))
++        return AVERROR(EINVAL);
++
++    ff_fill_rgb2yuv_table(in_coeffs, yuv_matrix);
++    ff_matrix_invert_3x3(yuv_matrix, rgb_matrix);
++
++    if (!(out_coeffs = ff_get_luma_coefficients(out_spc)))
++        return AVERROR(EINVAL);
++
++    ff_fill_rgb2yuv_table(out_coeffs, yuv_matrix);
++
++    if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0)
++        return ret;
++
++    av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED);
++
++    av_bprintf(&constants, ".version 3.2\n");
++    av_bprintf(&constants, ".target sm_30\n");
++    av_bprintf(&constants, ".address_size %zu\n", sizeof(void*) * 8);
++
++#define CONSTANT_A(decl, align, ...) \
++    av_bprintf(&constants, ".visible .const .align " #align " " decl ";\n", __VA_ARGS__)
++#define CONSTANT(decl, ...) CONSTANT_A(decl, 4, __VA_ARGS__)
++#define CONSTANT_M(a, b) \
++    CONSTANT(".f32 " a "[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f}", \
++             b[0][0], b[0][1], b[0][2], \
++             b[1][0], b[1][1], b[1][2], \
++             b[2][0], b[2][1], b[2][2])
++#define CONSTANT_C(a, b) \
++    CONSTANT(".f32 " a "[] = {%f, %f, %f}", \
++             b->cr, b->cg, b->cb)
++
++    CONSTANT(".u32 depth_src      = %i", (int)s->in_desc->comp[0].depth);
++    CONSTANT(".u32 depth_dst      = %i", (int)s->out_desc->comp[0].depth);
++    CONSTANT(".u32 fmt_src        = %i", (int)s->in_fmt);
++    CONSTANT(".u32 fmt_dst        = %i", (int)s->out_fmt);
++    CONSTANT(".u32 range_src      = %i", (int)in_range);
++    CONSTANT(".u32 range_dst      = %i", (int)out_range);
++    CONSTANT(".u32 trc_src        = %i", (int)in_trc);
++    CONSTANT(".u32 trc_dst        = %i", (int)out_trc);
++    CONSTANT(".u32 chroma_loc_src = %i", (int)s->in_chroma_loc);
++    CONSTANT(".u32 chroma_loc_dst = %i", (int)s->out_chroma_loc);
++    CONSTANT(".u32 tonemap_func   = %i", (int)s->tonemap);
++    CONSTANT(".f32 ref_white      = %f", s->ref_white);
++    CONSTANT(".f32 tone_param     = %f", s->param);
++    CONSTANT(".f32 desat_param    = %f", s->desat_param);
++    CONSTANT_M("rgb_matrix", rgb_matrix);
++    CONSTANT_M("yuv_matrix", yuv_matrix);
++    CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri);
++    CONSTANT_M("rgb2rgb_matrix", rgb2rgb_matrix);
++    CONSTANT_C("luma_src", in_coeffs);
++    CONSTANT_C("luma_dst", out_coeffs);
++
++    ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
++    if (ret < 0)
++        return ret;
++
++    if (s->cu_module) {
++        ret = CHECK_CU(cu->cuModuleUnload(s->cu_module));
++        if (ret < 0)
++            goto fail;
++
++        s->cu_func = NULL;
++        s->cu_module = NULL;
++    }
++
++    ret = CHECK_CU(cu->cuLinkCreate(sizeof(options) / sizeof(options[0]), options, option_values, &link_state));
++    if (ret < 0)
++        goto fail;
++
++    ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, constants.str,
++                                     constants.len, "constants", 0, NULL, NULL));
++    if (ret < 0)
++        goto fail2;
++
++    ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, tonemap_ptx,
++                                     strlen(tonemap_ptx), "tonemap.ptx", 0, NULL, NULL));
++    if (ret < 0)
++        goto fail2;
++
++    ret = CHECK_CU(cu->cuLinkComplete(link_state, &cubin, &cubin_size));
++    if (ret < 0)
++        goto fail2;
++
++    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, cubin));
++    if (ret < 0)
++        goto fail2;
++
++    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, "tonemap"));
++    if (ret < 0)
++        goto fail2;
++
++fail2:
++    CHECK_CU(cu->cuLinkDestroy(link_state));
++
++fail:
++    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
++
++    av_bprint_finalize(&constants, NULL);
++
++    if ((intptr_t)option_values[2] > 0)
++        av_log(ctx, AV_LOG_INFO, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[2], info_log);
++
++    if ((intptr_t)option_values[3] > 0)
++        av_log(ctx, AV_LOG_ERROR, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[3], error_log);
++
++    return ret;
++}
++
++static av_cold int config_props(AVFilterLink *outlink)
++{
++    AVFilterContext *ctx = outlink->src;
++    AVFilterLink *inlink = outlink->src->inputs[0];
++    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
++    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
++    TonemapCUDAContext *s  = ctx->priv;
++    int ret;
++
++    s->hwctx = device_hwctx;
++
++    outlink->w = inlink->w;
++    outlink->h = inlink->h;
++
++    ret = init_processing_chain(ctx, outlink);
++    if (ret < 0)
++        return ret;
++
++    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
++
++    return 0;
++}
++
++static int run_kernel(AVFilterContext *ctx,
++                      AVFrame *out, AVFrame *in)
++{
++    TonemapCUDAContext *s = ctx->priv;
++    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
++    FFCUDAFrame src, dst;
++    void *args_uchar[] = { &src, &dst };
++    int ret;
++
++    ret = ff_make_cuda_frame(&src, in);
++    if (ret < 0)
++        goto fail;
++
++    ret = ff_make_cuda_frame(&dst, out);
++    if (ret < 0)
++        goto fail;
++
++    src.peak = s->peak;
++    if (!src.peak) {
++        src.peak = ff_determine_signal_peak(in);
++        av_log(s, AV_LOG_DEBUG, "Computed signal peak: %f\n", src.peak);
++    }
++
++    dst.peak = s->dst_peak;
++
++    ret = CHECK_CU(cu->cuLaunchKernel(s->cu_func,
++                                      DIV_UP(src.width / 2, BLOCKX), DIV_UP(src.height / 2, BLOCKY), 1,
++                                      BLOCKX, BLOCKY, 1, 0, s->hwctx->stream, args_uchar, NULL));
++
++fail:
++    return ret;
++}
++
++static int do_tonemap(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
++{
++    TonemapCUDAContext *s = ctx->priv;
++    AVFrame *src = in;
++    int ret;
++
++    ret = run_kernel(ctx, s->frame, src);
++    if (ret < 0)
++        return ret;
++
++    src = s->frame;
++    ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
++    if (ret < 0)
++        return ret;
++
++    av_frame_move_ref(out, s->frame);
++    av_frame_move_ref(s->frame, s->tmp_frame);
++
++    s->frame->width  = in->width;
++    s->frame->height = in->height;
++
++    ret = av_frame_copy_props(out, in);
++    if (ret < 0)
++        return ret;
++
++    if (s->out_trc        != out->color_trc ||
++        s->out_spc        != out->colorspace ||
++        s->out_pri        != out->color_primaries ||
++        s->out_range      != out->color_range ||
++        s->out_chroma_loc != out->chroma_location) {
++        out->color_trc       = s->out_trc;
++        out->colorspace      = s->out_spc;
++        out->color_primaries = s->out_pri;
++        out->color_range     = s->out_range;
++        out->chroma_location = s->out_chroma_loc;
++    }
++
++    return 0;
++}
++
++static int filter_frame(AVFilterLink *link, AVFrame *in)
++{
++    AVFilterContext       *ctx = link->dst;
++    TonemapCUDAContext      *s = ctx->priv;
++    AVFilterLink      *outlink = ctx->outputs[0];
++    CudaFunctions          *cu = s->hwctx->internal->cuda_dl;
++
++    AVFrame *out = NULL;
++    CUcontext dummy;
++    int ret = 0;
++
++    out = av_frame_alloc();
++    if (!out) {
++        ret = AVERROR(ENOMEM);
++        goto fail;
++    }
++
++    if (!(in->color_trc == AVCOL_TRC_SMPTE2084 ||
++        in->color_trc == AVCOL_TRC_ARIB_STD_B67)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported input transfer characteristic: %s\n",
++               av_color_transfer_name(in->color_trc));
++        ret = AVERROR(EINVAL);
++        goto fail;
++    }
++
++    if (!s->cu_func ||
++        s->in_trc        != in->color_trc ||
++        s->in_spc        != in->colorspace ||
++        s->in_pri        != in->color_primaries ||
++        s->in_range      != in->color_range ||
++        s->in_chroma_loc != in->chroma_location) {
++        s->in_trc        = in->color_trc;
++        s->in_spc        = in->colorspace;
++        s->in_pri        = in->color_primaries;
++        s->in_range      = in->color_range;
++        s->in_chroma_loc = in->chroma_location;
++
++        s->out_trc        = s->trc;
++        s->out_spc        = s->spc;
++        s->out_pri        = s->pri;
++        s->out_range      = s->range;
++        s->out_chroma_loc = s->in_chroma_loc;
++
++        if ((ret = compile(link)) < 0)
++            goto fail;
++    }
++
++    ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
++    if (ret < 0)
++        goto fail;
++
++    ret = do_tonemap(ctx, out, in);
++
++    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
++    if (ret < 0)
++        goto fail;
++
++    av_frame_free(&in);
++
++    ff_update_hdr_metadata(out, s->dst_peak);
++
++    return ff_filter_frame(outlink, out);
++fail:
++    av_frame_free(&in);
++    av_frame_free(&out);
++    return ret;
++}
++
++#define OFFSET(x) offsetof(TonemapCUDAContext, x)
++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
++static const AVOption options[] = {
++    { "tonemap",      "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" },
++    {     "none",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE},              0, 0, FLAGS, "tonemap" },
++    {     "linear",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR},            0, 0, FLAGS, "tonemap" },
++    {     "gamma",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA},             0, 0, FLAGS, "tonemap" },
++    {     "clip",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP},              0, 0, FLAGS, "tonemap" },
++    {     "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},          0, 0, FLAGS, "tonemap" },
++    {     "hable",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE},             0, 0, FLAGS, "tonemap" },
++    {     "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},            0, 0, FLAGS, "tonemap" },
++    {     "bt2390",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390},            0, 0, FLAGS, "tonemap" },
++    { "transfer",     "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
++    { "t",            "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
++    {     "bt709",    0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709},           0, 0, FLAGS, "transfer" },
++    {     "bt2020",   0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10},       0, 0, FLAGS, "transfer" },
++    { "matrix",       "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" },
++    { "m",            "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" },
++    {     "bt709",    0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709},           0, 0, FLAGS, "matrix" },
++    {     "bt2020",   0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL},      0, 0, FLAGS, "matrix" },
++    { "primaries",    "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" },
++    { "p",            "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" },
++    {     "bt709",    0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709},           0, 0, FLAGS, "primaries" },
++    {     "bt2020",   0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020},          0, 0, FLAGS, "primaries" },
++    { "range",        "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" },
++    { "r",            "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" },
++    {     "tv",       0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},          0, 0, FLAGS, "range" },
++    {     "pc",       0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},          0, 0, FLAGS, "range" },
++    {     "limited",  0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},          0, 0, FLAGS, "range" },
++    {     "full",     0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},          0, 0, FLAGS, "range" },
++    { "format",       "Output format",       OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
++    { "peak",         "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
++    { "param",        "tonemap parameter",   OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
++    { "desat",        "desaturation parameter",   OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS },
++    { "threshold",    "scene detection threshold",   OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS },
++    { NULL },
++};
++
++static const AVClass tonemap_cuda_class = {
++    .class_name = "tonemap_cuda",
++    .item_name  = av_default_item_name,
++    .option     = options,
++    .version    = LIBAVUTIL_VERSION_INT,
++};
++
++static const AVFilterPad inputs[] = {
++    {
++        .name        = "default",
++        .type        = AVMEDIA_TYPE_VIDEO,
++        .filter_frame = filter_frame,
++    },
++    { NULL }
++};
++
++static const AVFilterPad outputs[] = {
++    {
++        .name         = "default",
++        .type         = AVMEDIA_TYPE_VIDEO,
++        .config_props = config_props,
++    },
++    { NULL }
++};
++
++AVFilter ff_vf_tonemap_cuda = {
++    .name        = "tonemap_cuda",
++    .description = NULL_IF_CONFIG_SMALL("GPU accelerated HDR to SDR tonemapping"),
++
++    .init          = init,
++    .uninit        = uninit,
++    .query_formats = query_formats,
++
++    .priv_size  = sizeof(TonemapCUDAContext),
++    .priv_class = &tonemap_cuda_class,
++
++    .inputs  = inputs,
++    .outputs = outputs,
++
++    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
++};
diff --git a/debian/patches/series b/debian/patches/series
index 2b13748bf23..5f90ee2c53b 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,2 +1,3 @@
 0001-add-fixes-for-segement-muxer.patch
 0002-add-cuda-pixfmt-converter-impl.patch
+0003-add-cuda-tonemap-impl.patch

From 0a67055b046aff93c77b0af4b8aab7f5c9e7d97b Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 5 Dec 2021 00:07:28 +0800
Subject: [PATCH 13/41] add amf refactor and hevc 10-bit encoding

---
 ...mf-refactor-and-hevc-10-bit-encoding.patch | 2513 +++++++++++++++++
 debian/patches/series                         |    1 +
 2 files changed, 2514 insertions(+)
 create mode 100644 debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch

diff --git a/debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch b/debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch
new file mode 100644
index 00000000000..689f8a818e8
--- /dev/null
+++ b/debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch
@@ -0,0 +1,2513 @@
+Index: jellyfin-ffmpeg/libavcodec/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/Makefile
++++ jellyfin-ffmpeg/libavcodec/Makefile
+@@ -63,7 +63,7 @@ OBJS = ac3_parser.o
+ OBJS-$(CONFIG_AANDCTTABLES)            += aandcttab.o
+ OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o ac3.o ac3tab.o
+ OBJS-$(CONFIG_ADTS_HEADER)             += adts_header.o mpeg4audio.o
+-OBJS-$(CONFIG_AMF)                     += amfenc.o
++OBJS-$(CONFIG_AMF)                     += amfenc.o amf.o
+ OBJS-$(CONFIG_AUDIO_FRAME_QUEUE)       += audio_frame_queue.o
+ OBJS-$(CONFIG_ATSC_A53)                += atsc_a53.o
+ OBJS-$(CONFIG_AUDIODSP)                += audiodsp.o
+@@ -1196,7 +1196,7 @@ SKIPHEADERS                            +
+                                           aacenc_quantization_misc.h    \
+                                           $(ARCH)/vp56_arith.h          \
+ 
+-SKIPHEADERS-$(CONFIG_AMF)              += amfenc.h
++SKIPHEADERS-$(CONFIG_AMF)              += amfenc.h amf.h
+ SKIPHEADERS-$(CONFIG_D3D11VA)          += d3d11va.h dxva2_internal.h
+ SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
+ SKIPHEADERS-$(CONFIG_JNI)              += ffjni.h
+Index: jellyfin-ffmpeg/libavcodec/amf.c
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavcodec/amf.c
+@@ -0,0 +1,371 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "amf.h"
++
++#define FFMPEG_AMF_WRITER_ID L"ffmpeg_amf"
++
++const FormatMap format_map[] =
++{
++    { AV_PIX_FMT_NONE,       AMF_SURFACE_UNKNOWN },
++    { AV_PIX_FMT_NV12,       AMF_SURFACE_NV12    },
++    { AV_PIX_FMT_P010,       AMF_SURFACE_P010    },
++    { AV_PIX_FMT_BGR0,       AMF_SURFACE_BGRA    },
++    { AV_PIX_FMT_RGB0,       AMF_SURFACE_RGBA    },
++    { AV_PIX_FMT_GRAY8,      AMF_SURFACE_GRAY8   },
++    { AV_PIX_FMT_YUV420P,    AMF_SURFACE_YUV420P },
++    { AV_PIX_FMT_YUYV422,    AMF_SURFACE_YUY2    },
++};
++
++enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt)
++{
++    int i;
++    for (i = 0; i < amf_countof(format_map); i++) {
++        if (format_map[i].av_format == fmt) {
++            return format_map[i].amf_format;
++        }
++    }
++    return AMF_SURFACE_UNKNOWN;
++}
++
++enum AVPixelFormat amf_to_av_format(enum AMF_SURFACE_FORMAT fmt)
++{
++    int i;
++    for (i = 0; i < amf_countof(format_map); i++) {
++        if (format_map[i].amf_format == fmt) {
++            return format_map[i].av_format;
++        }
++    }
++    return AMF_SURFACE_UNKNOWN;
++}
++
++const ColorTransferMap color_trc_map[] =
++{
++    { AVCOL_TRC_RESERVED0,       AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED    },
++    { AVCOL_TRC_BT709,           AMF_COLOR_TRANSFER_CHARACTERISTIC_BT709        },
++    { AVCOL_TRC_UNSPECIFIED,     AMF_COLOR_TRANSFER_CHARACTERISTIC_UNSPECIFIED  },
++    { AVCOL_TRC_RESERVED,        AMF_COLOR_TRANSFER_CHARACTERISTIC_RESERVED     },
++    { AVCOL_TRC_GAMMA22,         AMF_COLOR_TRANSFER_CHARACTERISTIC_GAMMA22      },
++    { AVCOL_TRC_GAMMA28,         AMF_COLOR_TRANSFER_CHARACTERISTIC_GAMMA28      },
++    { AVCOL_TRC_SMPTE170M,       AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE170M    },
++    { AVCOL_TRC_SMPTE240M,       AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE240M    },
++    { AVCOL_TRC_LINEAR,          AMF_COLOR_TRANSFER_CHARACTERISTIC_LINEAR       },
++    { AVCOL_TRC_LOG,             AMF_COLOR_TRANSFER_CHARACTERISTIC_LOG          },
++    { AVCOL_TRC_LOG_SQRT,        AMF_COLOR_TRANSFER_CHARACTERISTIC_LOG_SQRT     },
++    { AVCOL_TRC_IEC61966_2_4,    AMF_COLOR_TRANSFER_CHARACTERISTIC_IEC61966_2_4 },
++    { AVCOL_TRC_BT1361_ECG,      AMF_COLOR_TRANSFER_CHARACTERISTIC_BT1361_ECG   },
++    { AVCOL_TRC_IEC61966_2_1,    AMF_COLOR_TRANSFER_CHARACTERISTIC_IEC61966_2_1 },
++    { AVCOL_TRC_BT2020_10,       AMF_COLOR_TRANSFER_CHARACTERISTIC_BT2020_10    },
++    { AVCOL_TRC_BT2020_12,       AMF_COLOR_TRANSFER_CHARACTERISTIC_BT2020_12    },
++    { AVCOL_TRC_SMPTE2084,       AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE2084    },
++    { AVCOL_TRC_SMPTE428,        AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE428     },
++    { AVCOL_TRC_ARIB_STD_B67,    AMF_COLOR_TRANSFER_CHARACTERISTIC_ARIB_STD_B67 },
++};
++
++enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM amf_av_to_amf_color_trc(enum AVColorTransferCharacteristic trc)
++{
++    int i;
++    for (i = 0; i < amf_countof(color_trc_map); i++) {
++        if (color_trc_map[i].av_color_trc == trc) {
++            return color_trc_map[i].amf_color_trc;
++        }
++    }
++    return AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED;
++}
++
++const ColorPrimariesMap color_prm_map[] =
++{
++    { AVCOL_PRI_RESERVED0,      AMF_COLOR_PRIMARIES_UNDEFINED   },
++    { AVCOL_PRI_BT709,          AMF_COLOR_PRIMARIES_BT709       },
++    { AVCOL_PRI_UNSPECIFIED,    AMF_COLOR_PRIMARIES_UNSPECIFIED },
++    { AVCOL_PRI_RESERVED,       AMF_COLOR_PRIMARIES_RESERVED    },
++    { AVCOL_PRI_BT470M,         AMF_COLOR_PRIMARIES_BT470M      },
++    { AVCOL_PRI_BT470BG,        AMF_COLOR_PRIMARIES_BT470BG     },
++    { AVCOL_PRI_SMPTE170M,      AMF_COLOR_PRIMARIES_SMPTE170M   },
++    { AVCOL_PRI_SMPTE240M,      AMF_COLOR_PRIMARIES_SMPTE240M   },
++    { AVCOL_PRI_FILM,           AMF_COLOR_PRIMARIES_FILM        },
++    { AVCOL_PRI_BT2020,         AMF_COLOR_PRIMARIES_BT2020      },
++    { AVCOL_PRI_SMPTE428,       AMF_COLOR_PRIMARIES_SMPTE428    },
++    { AVCOL_PRI_SMPTE431,       AMF_COLOR_PRIMARIES_SMPTE431    },
++    { AVCOL_PRI_SMPTE432,       AMF_COLOR_PRIMARIES_SMPTE432    },
++    { AVCOL_PRI_JEDEC_P22,      AMF_COLOR_PRIMARIES_JEDEC_P22   },
++};
++
++enum AMF_COLOR_PRIMARIES_ENUM amf_av_to_amf_color_prm(enum AVColorPrimaries prm)
++{
++    int i;
++    for (i = 0; i < amf_countof(color_prm_map); i++) {
++        if (color_prm_map[i].av_color_prm == prm) {
++            return color_prm_map[i].amf_color_prm;
++        }
++    }
++    return AMF_COLOR_PRIMARIES_UNDEFINED;
++}
++
++static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter *pThis,
++                                                const wchar_t *scope, const wchar_t *message)
++{
++    AVAMFLogger *logger = (AVAMFLogger*)pThis;
++    av_log(logger->avcl, AV_LOG_DEBUG, "%ls: %ls", scope, message);
++}
++
++static void AMF_CDECL_CALL AMFTraceWriter_Flush(AMFTraceWriter *pThis) {}
++
++static AMFTraceWriterVtbl tracer_vtbl =
++{
++    .Write = AMFTraceWriter_Write,
++    .Flush = AMFTraceWriter_Flush,
++};
++
++int amf_load_library(AVAMFContext *ctx)
++{
++    AMFInit_Fn         init_fun;
++    AMFQueryVersion_Fn version_fun;
++    AMF_RESULT         res;
++
++    ctx->library = dlopen(AMF_DLL_NAMEA, RTLD_NOW | RTLD_LOCAL);
++    AMF_RETURN_IF_FALSE(ctx->avclass, ctx->library != NULL,
++        AVERROR_UNKNOWN, "DLL %s failed to open\n", AMF_DLL_NAMEA);
++
++    init_fun = (AMFInit_Fn)dlsym(ctx->library, AMF_INIT_FUNCTION_NAME);
++    AMF_RETURN_IF_FALSE(ctx->avclass, init_fun != NULL,
++        AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_INIT_FUNCTION_NAME);
++
++    version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library, AMF_QUERY_VERSION_FUNCTION_NAME);
++    AMF_RETURN_IF_FALSE(ctx->avclass, version_fun != NULL,
++        AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME);
++
++    res = version_fun(&ctx->version);
++    AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK,
++        AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_QUERY_VERSION_FUNCTION_NAME, res);
++
++    res = init_fun(AMF_FULL_VERSION, &ctx->factory);
++    AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK,
++        AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_INIT_FUNCTION_NAME, res);
++
++    res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace);
++    AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK,
++        AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res);
++
++    res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
++    AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK,
++        AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res);
++
++    return 0;
++}
++
++int amf_create_context(AVAMFContext *ctx)
++{
++    AMF_RESULT res;
++
++    // configure AMF logger
++    ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, !!ctx->log_to_dbg);
++    if (ctx->log_to_dbg)
++        ctx->trace->pVtbl->SetWriterLevel(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE);
++    ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_CONSOLE, 0);
++    ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE);
++
++    // connect AMF logger to av_log
++    ctx->logger.vtbl = &tracer_vtbl;
++    ctx->logger.avcl = ctx->avclass;
++    ctx->trace->pVtbl->RegisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID, (AMFTraceWriter*)&ctx->logger, 1);
++    ctx->trace->pVtbl->SetWriterLevel(ctx->trace, FFMPEG_AMF_WRITER_ID, AMF_TRACE_TRACE);
++
++    res = ctx->factory->pVtbl->CreateContext(ctx->factory, &ctx->context);
++    AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK,
++        AVERROR_UNKNOWN, "CreateContext() failed with error %d\n", res);
++
++    return 0;
++}
++
++void amf_unload_library(AVAMFContext *ctx)
++{
++    if (ctx->context) {
++        ctx->context->pVtbl->Terminate(ctx->context);
++        ctx->context->pVtbl->Release(ctx->context);
++        ctx->context = NULL;
++    }
++    if (ctx->trace) {
++        ctx->trace->pVtbl->UnregisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID);
++    }
++    if (ctx->library) {
++        dlclose(ctx->library);
++        ctx->library = NULL;
++    }
++    ctx->trace = NULL;
++    ctx->debug = NULL;
++    ctx->factory = NULL;
++    ctx->version = 0;
++}
++
++int amf_context_init_dx11(AVAMFContext *ctx)
++{
++    AMF_RESULT res;
++
++    res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_1);
++    if (res != AMF_OK) {
++        res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_0);
++    }
++
++    if (res == AMF_OK) {
++        av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via DX11\n");
++    } else {
++        if (res == AMF_NOT_SUPPORTED)
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX11 is not supported on the given device\n");
++        else
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default DX11 device: %d\n", res);
++    }
++    return res;
++}
++
++int amf_context_init_dx9(AVAMFContext *ctx)
++{
++    AMF_RESULT res;
++
++    res = ctx->context->pVtbl->InitDX9(ctx->context, NULL);
++    if (res == AMF_OK) {
++        av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via DX9\n");
++    } else {
++        if (res == AMF_NOT_SUPPORTED)
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX9 is not supported on the given device\n");
++        else
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default DX9 device: %d\n", res);
++    }
++    return res;
++}
++
++int amf_context_init_vulkan(AVAMFContext *ctx)
++{
++    AMF_RESULT res;
++    AMFContext1* context1 = NULL;
++    AMFGuid guid = IID_AMFContext1();
++
++    res = ctx->context->pVtbl->QueryInterface(ctx->context, &guid, (void**)&context1);
++    AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext1() failed with error %d\n", res);
++
++    res = context1->pVtbl->InitVulkan(context1, NULL);
++    context1->pVtbl->Release(context1);
++    if (res == AMF_OK) {
++        av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via Vulkan\n");
++    } else {
++        if (res == AMF_NOT_SUPPORTED)
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF via Vulkan is not supported on the given device\n");
++        else
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default Vulkan device: %d\n", res);
++    }
++    return res;
++}
++
++int amf_context_init_opencl(AVAMFContext *ctx)
++{
++    AMF_RESULT res;
++
++    res = ctx->context->pVtbl->InitOpenCL(ctx->context, NULL);
++    if (res == AMF_OK) {
++        av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via OpenCL\n");
++    } else {
++        if (res == AMF_NOT_SUPPORTED)
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF via OpenCL is not supported on the given device\n");
++        else
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default OpenCL device: %d\n", res);
++    }
++    return res;
++}
++
++#if CONFIG_D3D11VA
++int amf_context_derive_dx11(AVAMFContext *ctx, AVD3D11VADeviceContext *hwctx)
++{
++    AMF_RESULT res;
++
++    res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_1);
++    if (res != AMF_OK) {
++        res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_0);
++    }
++
++    if (res == AMF_OK) {
++        av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via DX11\n");
++    } else {
++        if (res == AMF_NOT_SUPPORTED)
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX11 is not supported on the given device\n");
++        else
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given DX11 device: %d\n", res);
++        return AVERROR(ENODEV);
++    }
++    return res;
++}
++#endif
++
++#if CONFIG_DXVA2
++int amf_context_derive_dx9(AVAMFContext *ctx, AVDXVA2DeviceContext *hwctx)
++{
++    AMF_RESULT res;
++    HRESULT hr;
++    HANDLE device_handle;
++    IDirect3DDevice9* device;
++
++    hr = IDirect3DDeviceManager9_OpenDeviceHandle(hwctx->devmgr, &device_handle);
++    if (FAILED(hr)) {
++        av_log(ctx->avclass, AV_LOG_ERROR, "Failed to open device handle for DX9 device: %lx\n", (unsigned long)hr);
++        return AVERROR_EXTERNAL;
++    }
++
++    hr = IDirect3DDeviceManager9_LockDevice(hwctx->devmgr, device_handle, &device, FALSE);
++    if (SUCCEEDED(hr)) {
++        IDirect3DDeviceManager9_UnlockDevice(hwctx->devmgr, device_handle, FALSE);
++    } else {
++        av_log(ctx->avclass, AV_LOG_ERROR, "Failed to lock device handle for DX9 device: %lx\n", (unsigned long)hr);
++        return AVERROR_EXTERNAL;
++    }
++
++    IDirect3DDeviceManager9_CloseDeviceHandle(hwctx->devmgr, device_handle);
++
++    res = ctx->context->pVtbl->InitDX9(ctx->context, device);
++
++    IDirect3DDevice9_Release(device);
++
++    if (res == AMF_OK) {
++        av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via DX9\n");
++    } else {
++        if (res == AMF_NOT_SUPPORTED)
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX9 is not supported on the given device\n");
++        else
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given DX9 device: %d\n", res);
++        return AVERROR(ENODEV);
++    }
++    return res;
++}
++#endif
++
++#if CONFIG_OPENCL
++int amf_context_derive_opencl(AVAMFContext *ctx, AVOpenCLDeviceContext *hwctx)
++{
++    AMF_RESULT res;
++
++    res = ctx->context->pVtbl->InitOpenCL(ctx->context, hwctx->command_queue);
++    if (res == AMF_OK) {
++        av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via OpenCL\n");
++    } else {
++        if (res == AMF_NOT_SUPPORTED)
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF via OpenCL is not supported on the given device\n");
++        else
++            av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given OpenCL device: %d\n", res);
++        return AVERROR(ENODEV);
++    }
++    return res;
++}
++#endif
+Index: jellyfin-ffmpeg/libavcodec/amf.h
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavcodec/amf.h
+@@ -0,0 +1,156 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_AMF_H
++#define AVCODEC_AMF_H
++
++#include <AMF/core/Factory.h>
++#include <AMF/core/Surface.h>
++#include <AMF/components/ColorSpace.h>
++
++#include "config.h"
++#include "avcodec.h"
++
++#include "libavutil/pixdesc.h"
++
++#if CONFIG_D3D11VA
++#include "libavutil/hwcontext_d3d11va.h"
++#endif
++
++#if CONFIG_DXVA2
++#define COBJMACROS
++#include "libavutil/hwcontext_dxva2.h"
++#endif
++
++#if CONFIG_OPENCL
++#include "libavutil/hwcontext_opencl.h"
++#endif
++
++#ifdef _WIN32
++#include "compat/w32dlfcn.h"
++#else
++#include <dlfcn.h>
++#endif
++
++/**
++* Error handling helper
++*/
++#define AMF_RETURN_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \
++    if (!(exp)) { \
++        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
++        return ret_value; \
++    }
++
++#define AMF_GOTO_FAIL_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \
++    if (!(exp)) { \
++        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
++        ret = ret_value; \
++        goto fail; \
++    }
++
++/**
++* AMF trace writer callback class
++* Used to capture all AMF logging
++*/
++typedef struct AVAMFLogger {
++    AMFTraceWriterVtbl *vtbl;
++    void               *avcl;
++} AVAMFLogger;
++
++typedef struct AVAMFContext {
++    void               *avclass;
++    int                 log_to_dbg;
++
++    // access to AMF runtime
++    amf_handle          library; ///< handle to DLL library
++    AMFFactory         *factory; ///< pointer to AMF factory
++    AMFDebug           *debug;   ///< pointer to AMF debug interface
++    AMFTrace           *trace;   ///< pointer to AMF trace interface
++
++    amf_uint64          version; ///< version of AMF runtime
++    AVAMFLogger         logger;  ///< AMF writer registered with AMF
++    AMFContext         *context; ///< AMF context
++} AVAMFContext;
++
++/**
++* Surface/Pixel format
++*/
++typedef struct FormatMap {
++    enum AVPixelFormat      av_format;
++    enum AMF_SURFACE_FORMAT amf_format;
++} FormatMap;
++
++extern const FormatMap format_map[];
++enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt);
++enum AVPixelFormat amf_to_av_format(enum AMF_SURFACE_FORMAT fmt);
++
++/**
++* Color Transfer
++*/
++typedef struct ColorTransferMap {
++    enum AVColorTransferCharacteristic          av_color_trc;
++    enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM amf_color_trc;
++} ColorTransferMap;
++
++extern const ColorTransferMap color_trc_map[];
++enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM amf_av_to_amf_color_trc(enum AVColorTransferCharacteristic trc);
++enum AVColorTransferCharacteristic amf_to_av_color_trc(enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM trc);
++
++/**
++* Color Primaries
++*/
++typedef struct ColorPrimariesMap {
++    enum AVColorPrimaries         av_color_prm;
++    enum AMF_COLOR_PRIMARIES_ENUM amf_color_prm;
++} ColorPrimariesMap;
++
++extern const ColorPrimariesMap color_prm_map[];
++enum AMF_COLOR_PRIMARIES_ENUM amf_av_to_amf_color_prm(enum AVColorPrimaries prm);
++enum AVColorPrimaries amf_to_av_color_prm(enum AMF_COLOR_PRIMARIES_ENUM prm);
++
++/**
++* Load AMFContext
++*/
++int amf_load_library(AVAMFContext *ctx);
++int amf_create_context(AVAMFContext *ctx);
++void amf_unload_library(AVAMFContext *ctx);
++
++/**
++* Init AMFContext standalone
++*/
++int amf_context_init_dx11(AVAMFContext *ctx);
++int amf_context_init_dx9(AVAMFContext *ctx);
++int amf_context_init_vulkan(AVAMFContext *ctx);
++int amf_context_init_opencl(AVAMFContext *ctx);
++
++/**
++* Derive AMFContext from builtin hwcontext
++*/
++#if CONFIG_D3D11VA
++int amf_context_derive_dx11(AVAMFContext *ctx, AVD3D11VADeviceContext *hwctx);
++#endif
++
++#if CONFIG_DXVA2
++int amf_context_derive_dx9(AVAMFContext *ctx, AVDXVA2DeviceContext *hwctx);
++#endif
++
++#if CONFIG_OPENCL
++int amf_context_derive_opencl(AVAMFContext *ctx, AVOpenCLDeviceContext *hwctx);
++#endif
++
++#endif /* AVCODEC_AMF_H */
+Index: jellyfin-ffmpeg/libavcodec/amfenc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/amfenc.c
++++ jellyfin-ffmpeg/libavcodec/amfenc.c
+@@ -16,227 +16,54 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+-#include "config.h"
+-
+ #include "libavutil/avassert.h"
+-#include "libavutil/imgutils.h"
+ #include "libavutil/hwcontext.h"
+-#if CONFIG_D3D11VA
+-#include "libavutil/hwcontext_d3d11va.h"
+-#endif
+-#if CONFIG_DXVA2
+-#define COBJMACROS
+-#include "libavutil/hwcontext_dxva2.h"
+-#endif
++#include "libavutil/imgutils.h"
++
+ #include "libavutil/mem.h"
+-#include "libavutil/pixdesc.h"
+ #include "libavutil/time.h"
+ 
+ #include "amfenc.h"
+ #include "encode.h"
+ #include "internal.h"
+ 
+-#if CONFIG_D3D11VA
+-#include <d3d11.h>
+-#endif
+-
+-#ifdef _WIN32
+-#include "compat/w32dlfcn.h"
+-#else
+-#include <dlfcn.h>
+-#endif
+-
+-#define FFMPEG_AMF_WRITER_ID L"ffmpeg_amf"
+-
+ #define PTS_PROP L"PtsProp"
+ 
+-const enum AVPixelFormat ff_amf_pix_fmts[] = {
+-    AV_PIX_FMT_NV12,
+-    AV_PIX_FMT_YUV420P,
+-#if CONFIG_D3D11VA
+-    AV_PIX_FMT_D3D11,
+-#endif
+-#if CONFIG_DXVA2
+-    AV_PIX_FMT_DXVA2_VLD,
+-#endif
+-    AV_PIX_FMT_NONE
+-};
+-
+-typedef struct FormatMap {
+-    enum AVPixelFormat       av_format;
+-    enum AMF_SURFACE_FORMAT  amf_format;
+-} FormatMap;
+-
+-static const FormatMap format_map[] =
+-{
+-    { AV_PIX_FMT_NONE,       AMF_SURFACE_UNKNOWN },
+-    { AV_PIX_FMT_NV12,       AMF_SURFACE_NV12 },
+-    { AV_PIX_FMT_BGR0,       AMF_SURFACE_BGRA },
+-    { AV_PIX_FMT_RGB0,       AMF_SURFACE_RGBA },
+-    { AV_PIX_FMT_GRAY8,      AMF_SURFACE_GRAY8 },
+-    { AV_PIX_FMT_YUV420P,    AMF_SURFACE_YUV420P },
+-    { AV_PIX_FMT_YUYV422,    AMF_SURFACE_YUY2 },
+-};
+-
+-static enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt)
+-{
+-    int i;
+-    for (i = 0; i < amf_countof(format_map); i++) {
+-        if (format_map[i].av_format == fmt) {
+-            return format_map[i].amf_format;
+-        }
+-    }
+-    return AMF_SURFACE_UNKNOWN;
+-}
+-
+-static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter *pThis,
+-    const wchar_t *scope, const wchar_t *message)
+-{
+-    AmfTraceWriter *tracer = (AmfTraceWriter*)pThis;
+-    av_log(tracer->avctx, AV_LOG_DEBUG, "%ls: %ls", scope, message); // \n is provided from AMF
+-}
+-
+-static void AMF_CDECL_CALL AMFTraceWriter_Flush(AMFTraceWriter *pThis)
+-{
+-}
+-
+-static AMFTraceWriterVtbl tracer_vtbl =
++static int amf_init_context(AVCodecContext *avctx)
+ {
+-    .Write = AMFTraceWriter_Write,
+-    .Flush = AMFTraceWriter_Flush,
+-};
++    AMFEncContext *ctx = avctx->priv_data;
++    AVAMFContext *amfctx = NULL;
++    AMF_RESULT  res;
++    int ret;
+ 
+-static int amf_load_library(AVCodecContext *avctx)
+-{
+-    AmfContext        *ctx = avctx->priv_data;
+-    AMFInit_Fn         init_fun;
+-    AMFQueryVersion_Fn version_fun;
+-    AMF_RESULT         res;
++    ctx->dts_delay = 0;
++    ctx->hwsurfaces_in_queue = 0;
++    ctx->hwsurfaces_in_queue_max = 16;
+ 
+     ctx->delayed_frame = av_frame_alloc();
+-    if (!ctx->delayed_frame) {
++    if (!ctx->delayed_frame)
+         return AVERROR(ENOMEM);
+-    }
++
+     // hardcoded to current HW queue size - will realloc in timestamp_queue_enqueue() if too small
+     ctx->timestamp_list = av_fifo_alloc((avctx->max_b_frames + 16) * sizeof(int64_t));
+-    if (!ctx->timestamp_list) {
++    if (!ctx->timestamp_list)
+         return AVERROR(ENOMEM);
+-    }
+-    ctx->dts_delay = 0;
+ 
++    amfctx = av_mallocz(sizeof(AVAMFContext));
++    if (!amfctx)
++        return AVERROR(ENOMEM);
+ 
+-    ctx->library = dlopen(AMF_DLL_NAMEA, RTLD_NOW | RTLD_LOCAL);
+-    AMF_RETURN_IF_FALSE(ctx, ctx->library != NULL,
+-        AVERROR_UNKNOWN, "DLL %s failed to open\n", AMF_DLL_NAMEA);
+-
+-    init_fun = (AMFInit_Fn)dlsym(ctx->library, AMF_INIT_FUNCTION_NAME);
+-    AMF_RETURN_IF_FALSE(ctx, init_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_INIT_FUNCTION_NAME);
+-
+-    version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library, AMF_QUERY_VERSION_FUNCTION_NAME);
+-    AMF_RETURN_IF_FALSE(ctx, version_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME);
+-
+-    res = version_fun(&ctx->version);
+-    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_QUERY_VERSION_FUNCTION_NAME, res);
+-    res = init_fun(AMF_FULL_VERSION, &ctx->factory);
+-    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_INIT_FUNCTION_NAME, res);
+-    res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace);
+-    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res);
+-    res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug);
+-    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res);
+-    return 0;
+-}
+-
+-#if CONFIG_D3D11VA
+-static int amf_init_from_d3d11_device(AVCodecContext *avctx, AVD3D11VADeviceContext *hwctx)
+-{
+-    AmfContext *ctx = avctx->priv_data;
+-    AMF_RESULT res;
+-
+-    res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_1);
+-    if (res != AMF_OK) {
+-        if (res == AMF_NOT_SUPPORTED)
+-            av_log(avctx, AV_LOG_ERROR, "AMF via D3D11 is not supported on the given device.\n");
+-        else
+-            av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on the given D3D11 device: %d.\n", res);
+-        return AVERROR(ENODEV);
+-    }
+-
+-    return 0;
+-}
+-#endif
+-
+-#if CONFIG_DXVA2
+-static int amf_init_from_dxva2_device(AVCodecContext *avctx, AVDXVA2DeviceContext *hwctx)
+-{
+-    AmfContext *ctx = avctx->priv_data;
+-    HANDLE device_handle;
+-    IDirect3DDevice9 *device;
+-    HRESULT hr;
+-    AMF_RESULT res;
+-    int ret;
+-
+-    hr = IDirect3DDeviceManager9_OpenDeviceHandle(hwctx->devmgr, &device_handle);
+-    if (FAILED(hr)) {
+-        av_log(avctx, AV_LOG_ERROR, "Failed to open device handle for Direct3D9 device: %lx.\n", (unsigned long)hr);
+-        return AVERROR_EXTERNAL;
+-    }
+-
+-    hr = IDirect3DDeviceManager9_LockDevice(hwctx->devmgr, device_handle, &device, FALSE);
+-    if (SUCCEEDED(hr)) {
+-        IDirect3DDeviceManager9_UnlockDevice(hwctx->devmgr, device_handle, FALSE);
+-        ret = 0;
+-    } else {
+-        av_log(avctx, AV_LOG_ERROR, "Failed to lock device handle for Direct3D9 device: %lx.\n", (unsigned long)hr);
+-        ret = AVERROR_EXTERNAL;
+-    }
+-
+-    IDirect3DDeviceManager9_CloseDeviceHandle(hwctx->devmgr, device_handle);
++    ctx->amfctx = amfctx;
++    amfctx->avclass = avctx;
++    amfctx->log_to_dbg = ctx->log_to_dbg;
+ 
++    ret = amf_load_library(amfctx);
+     if (ret < 0)
+         return ret;
+ 
+-    res = ctx->context->pVtbl->InitDX9(ctx->context, device);
+-
+-    IDirect3DDevice9_Release(device);
+-
+-    if (res != AMF_OK) {
+-        if (res == AMF_NOT_SUPPORTED)
+-            av_log(avctx, AV_LOG_ERROR, "AMF via D3D9 is not supported on the given device.\n");
+-        else
+-            av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on given D3D9 device: %d.\n", res);
+-        return AVERROR(ENODEV);
+-    }
+-
+-    return 0;
+-}
+-#endif
+-
+-static int amf_init_context(AVCodecContext *avctx)
+-{
+-    AmfContext *ctx = avctx->priv_data;
+-    AMFContext1 *context1 = NULL;
+-    AMF_RESULT  res;
+-    av_unused int ret;
+-
+-    ctx->hwsurfaces_in_queue = 0;
+-    ctx->hwsurfaces_in_queue_max = 16;
+-
+-    // configure AMF logger
+-    // the return of these functions indicates old state and do not affect behaviour
+-    ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, ctx->log_to_dbg != 0 );
+-    if (ctx->log_to_dbg)
+-        ctx->trace->pVtbl->SetWriterLevel(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE);
+-    ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_CONSOLE, 0);
+-    ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE);
+-
+-    // connect AMF logger to av_log
+-    ctx->tracer.vtbl = &tracer_vtbl;
+-    ctx->tracer.avctx = avctx;
+-    ctx->trace->pVtbl->RegisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID,(AMFTraceWriter*)&ctx->tracer, 1);
+-    ctx->trace->pVtbl->SetWriterLevel(ctx->trace, FFMPEG_AMF_WRITER_ID, AMF_TRACE_TRACE);
+-
+-    res = ctx->factory->pVtbl->CreateContext(ctx->factory, &ctx->context);
+-    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext() failed with error %d\n", res);
++    ret = amf_create_context(amfctx);
++    if (ret < 0)
++        return ret;
+ 
+     // If a device was passed to the encoder, try to initialise from that.
+     if (avctx->hw_frames_ctx) {
+@@ -251,16 +78,16 @@ static int amf_init_context(AVCodecConte
+         switch (frames_ctx->device_ctx->type) {
+ #if CONFIG_D3D11VA
+         case AV_HWDEVICE_TYPE_D3D11VA:
+-            ret = amf_init_from_d3d11_device(avctx, frames_ctx->device_ctx->hwctx);
+-            if (ret < 0)
+-                return ret;
++            res = amf_context_derive_dx11(amfctx, frames_ctx->device_ctx->hwctx);
++            if (res != AMF_OK)
++                return res;
+             break;
+ #endif
+ #if CONFIG_DXVA2
+         case AV_HWDEVICE_TYPE_DXVA2:
+-            ret = amf_init_from_dxva2_device(avctx, frames_ctx->device_ctx->hwctx);
+-            if (ret < 0)
+-                return ret;
++            res = amf_context_derive_dx9(amfctx, frames_ctx->device_ctx->hwctx);
++            if (res != AMF_OK)
++                return res;
+             break;
+ #endif
+         default:
+@@ -282,16 +109,16 @@ static int amf_init_context(AVCodecConte
+         switch (device_ctx->type) {
+ #if CONFIG_D3D11VA
+         case AV_HWDEVICE_TYPE_D3D11VA:
+-            ret = amf_init_from_d3d11_device(avctx, device_ctx->hwctx);
+-            if (ret < 0)
+-                return ret;
++            res = amf_context_derive_dx11(amfctx, device_ctx->hwctx);
++            if (res != AMF_OK)
++                return res;
+             break;
+ #endif
+ #if CONFIG_DXVA2
+         case AV_HWDEVICE_TYPE_DXVA2:
+-            ret = amf_init_from_dxva2_device(avctx, device_ctx->hwctx);
+-            if (ret < 0)
+-                return ret;
++            res = amf_context_derive_dx9(amfctx, device_ctx->hwctx);
++            if (res != AMF_OK)
++                return res;
+             break;
+ #endif
+         default:
+@@ -305,40 +132,57 @@ static int amf_init_context(AVCodecConte
+             return AVERROR(ENOMEM);
+ 
+     } else {
+-        res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_1);
+-        if (res == AMF_OK) {
+-            av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D11.\n");
+-        } else {
+-            res = ctx->context->pVtbl->InitDX9(ctx->context, NULL);
+-            if (res == AMF_OK) {
+-                av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D9.\n");
+-            } else {
+-                AMFGuid guid = IID_AMFContext1();
+-                res = ctx->context->pVtbl->QueryInterface(ctx->context, &guid, (void**)&context1);
+-                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext1() failed with error %d\n", res);
+-
+-                res = context1->pVtbl->InitVulkan(context1, NULL);
+-                context1->pVtbl->Release(context1);
++#ifdef _WIN32
++        res = amf_context_init_dx11(amfctx);
++        if (res != AMF_OK) {
++            res = amf_context_init_dx9(amfctx);
++            if (res != AMF_OK) {
++#endif
++                res = amf_context_init_vulkan(amfctx);
+                 if (res != AMF_OK) {
+-                    if (res == AMF_NOT_SUPPORTED)
+-                        av_log(avctx, AV_LOG_ERROR, "AMF via Vulkan is not supported on the given device.\n");
+-                    else
+-                        av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on the given Vulkan device: %d.\n", res);
++                    av_log(avctx, AV_LOG_ERROR, "AMF initialisation is not supported.\n");
+                     return AVERROR(ENOSYS);
+                 }
+-                av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via Vulkan.\n");
++#ifdef _WIN32
+             }
+         }
++#endif
+     }
++
+     return 0;
+ }
+ 
++static int amf_check_hevc_encoder_10bit_support(AVCodecContext *avctx)
++{
++    AMFEncContext *ctx = avctx->priv_data;
++    AVAMFContext  *amfctx = ctx->amfctx;
++    const wchar_t *codec_id = AMFVideoEncoder_HEVC;
++    AMF_RESULT     res;
++
++    res = amfctx->factory->pVtbl->CreateComponent(amfctx->factory, amfctx->context, codec_id, &ctx->encoder);
++    AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res);
++
++    AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_10);
++    AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(EINVAL), "Assigning 10-bit property failed with error %d\n", res);
++
++    res = ctx->encoder->pVtbl->Init(ctx->encoder, AMF_SURFACE_P010, avctx->width, avctx->height);
++    if (res == AMF_OK) {
++        ctx->encoder->pVtbl->Terminate(ctx->encoder);
++        ctx->encoder->pVtbl->Release(ctx->encoder);
++        ctx->encoder = NULL;
++    } else {
++        return AVERROR(EINVAL);
++    }
++    return res;
++}
++
+ static int amf_init_encoder(AVCodecContext *avctx)
+ {
+-    AmfContext        *ctx = avctx->priv_data;
++    AMFEncContext     *ctx = avctx->priv_data;
++    AVAMFContext      *amfctx = ctx->amfctx;
+     const wchar_t     *codec_id = NULL;
+-    AMF_RESULT         res;
+     enum AVPixelFormat pix_fmt;
++    AMF_RESULT         res;
+ 
+     switch (avctx->codec->id) {
+         case AV_CODEC_ID_H264:
+@@ -350,26 +194,70 @@ static int amf_init_encoder(AVCodecConte
+         default:
+             break;
+     }
+-    AMF_RETURN_IF_FALSE(ctx, codec_id != NULL, AVERROR(EINVAL), "Codec %d is not supported\n", avctx->codec->id);
++    AMF_RETURN_IF_FALSE(avctx, codec_id != NULL,
++        AVERROR(EINVAL), "Codec %d is not supported\n", avctx->codec->id);
+ 
+-    if (ctx->hw_frames_ctx)
+-        pix_fmt = ((AVHWFramesContext*)ctx->hw_frames_ctx->data)->sw_format;
+-    else
+-        pix_fmt = avctx->pix_fmt;
++    pix_fmt = avctx->hw_frames_ctx ? ((AVHWFramesContext*)avctx->hw_frames_ctx->data)->sw_format
++                                   : avctx->pix_fmt;
+ 
+     ctx->format = amf_av_to_amf_format(pix_fmt);
+-    AMF_RETURN_IF_FALSE(ctx, ctx->format != AMF_SURFACE_UNKNOWN, AVERROR(EINVAL),
+-                        "Format %s is not supported\n", av_get_pix_fmt_name(pix_fmt));
++    AMF_RETURN_IF_FALSE(avctx, ctx->format != AMF_SURFACE_UNKNOWN,
++        AVERROR(EINVAL), "Format %s is not supported\n", av_get_pix_fmt_name(pix_fmt));
++
++    ctx->bit_depth = 8;
++    if (pix_fmt == AV_PIX_FMT_P010) {
++        switch (avctx->codec->id) {
++        case AV_CODEC_ID_HEVC:
++            // GPU >= Navi or APU >= Renoir is required.
++            res = amf_check_hevc_encoder_10bit_support(avctx);
++            if (res == AMF_OK) {
++                ctx->bit_depth = 10;
++            } else {
++                av_log(avctx, AV_LOG_ERROR, "HEVC 10-bit encoding is not supported by the given AMF device\n");
++                return res;
++            }
++            break;
++        default:
++            av_log(avctx, AV_LOG_ERROR, "10-bit encoding is not supported by AMF %s encoder\n", avctx->codec->name);
++            return AVERROR(EINVAL);
++        }
++    }
++
++    ctx->out_color_trc = amf_av_to_amf_color_trc(avctx->color_trc);
++    ctx->out_color_prm = amf_av_to_amf_color_prm(avctx->color_primaries);
++
++    switch (avctx->colorspace) {
++        case AVCOL_SPC_BT470BG:
++        case AVCOL_SPC_SMPTE170M:
++        case AVCOL_SPC_SMPTE240M:
++            ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_601;
++            break;
++        case AVCOL_SPC_BT709:
++            ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_709;
++            break;
++        case AVCOL_SPC_BT2020_NCL:
++        case AVCOL_SPC_BT2020_CL:
++            ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_2020;
++            break;
++        case AVCOL_SPC_RGB:
++            ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_JPEG;
++            break;
++        default:
++            ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_UNKNOWN;
++            break;
++    }
+ 
+-    res = ctx->factory->pVtbl->CreateComponent(ctx->factory, ctx->context, codec_id, &ctx->encoder);
+-    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res);
++    res = amfctx->factory->pVtbl->CreateComponent(amfctx->factory, amfctx->context, codec_id, &ctx->encoder);
++    AMF_RETURN_IF_FALSE(avctx, res == AMF_OK,
++        AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res);
+ 
+     return 0;
+ }
+ 
+-int av_cold ff_amf_encode_close(AVCodecContext *avctx)
++av_cold int ff_amf_encode_close(AVCodecContext *avctx)
+ {
+-    AmfContext *ctx = avctx->priv_data;
++    AMFEncContext *ctx = avctx->priv_data;
++    AVAMFContext  *amfctx = ctx->amfctx;
+ 
+     if (ctx->delayed_surface) {
+         ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface);
+@@ -382,34 +270,33 @@ int av_cold ff_amf_encode_close(AVCodecC
+         ctx->encoder = NULL;
+     }
+ 
+-    if (ctx->context) {
+-        ctx->context->pVtbl->Terminate(ctx->context);
+-        ctx->context->pVtbl->Release(ctx->context);
+-        ctx->context = NULL;
+-    }
++    amf_unload_library(amfctx);
++    av_freep(&amfctx);
++
++    ctx->delayed_drain = 0;
+     av_buffer_unref(&ctx->hw_device_ctx);
+     av_buffer_unref(&ctx->hw_frames_ctx);
+ 
+-    if (ctx->trace) {
+-        ctx->trace->pVtbl->UnregisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID);
+-    }
+-    if (ctx->library) {
+-        dlclose(ctx->library);
+-        ctx->library = NULL;
+-    }
+-    ctx->trace = NULL;
+-    ctx->debug = NULL;
+-    ctx->factory = NULL;
+-    ctx->version = 0;
+-    ctx->delayed_drain = 0;
+     av_frame_free(&ctx->delayed_frame);
+     av_fifo_freep(&ctx->timestamp_list);
+-
+     return 0;
+ }
+ 
+-static int amf_copy_surface(AVCodecContext *avctx, const AVFrame *frame,
+-    AMFSurface* surface)
++av_cold int ff_amf_encode_init(AVCodecContext *avctx)
++{
++    int ret;
++
++    if ((ret = amf_init_context(avctx)) == 0)
++        if ((ret = amf_init_encoder(avctx)) == 0)
++            return 0;
++
++    ff_amf_encode_close(avctx);
++    return ret;
++}
++
++static int amf_copy_surface(AVCodecContext *avctx,
++                            const AVFrame *frame,
++                            AMFSurface* surface)
+ {
+     AMFPlane *plane;
+     uint8_t  *dst_data[4];
+@@ -425,16 +312,16 @@ static int amf_copy_surface(AVCodecConte
+         dst_data[i] = plane->pVtbl->GetNative(plane);
+         dst_linesize[i] = plane->pVtbl->GetHPitch(plane);
+     }
++
+     av_image_copy(dst_data, dst_linesize,
+         (const uint8_t**)frame->data, frame->linesize, frame->format,
+         avctx->width, avctx->height);
+-
+     return 0;
+ }
+ 
+ static inline int timestamp_queue_enqueue(AVCodecContext *avctx, int64_t timestamp)
+ {
+-    AmfContext         *ctx = avctx->priv_data;
++    AMFEncContext *ctx = avctx->priv_data;
+     if (av_fifo_space(ctx->timestamp_list) < sizeof(timestamp)) {
+         if (av_fifo_grow(ctx->timestamp_list, sizeof(timestamp)) < 0) {
+             return AVERROR(ENOMEM);
+@@ -444,31 +331,30 @@ static inline int timestamp_queue_enqueu
+     return 0;
+ }
+ 
+-static int amf_copy_buffer(AVCodecContext *avctx, AVPacket *pkt, AMFBuffer *buffer)
++static int amf_copy_buffer(AVCodecContext *avctx,
++                           AVPacket *pkt,
++                           AMFBuffer *buffer)
+ {
+-    AmfContext      *ctx = avctx->priv_data;
++    AMFEncContext   *ctx = avctx->priv_data;
+     int              ret;
+-    AMFVariantStruct var = {0};
++    AMFVariantStruct var = { 0 };
+     int64_t          timestamp = AV_NOPTS_VALUE;
+     int64_t          size = buffer->pVtbl->GetSize(buffer);
+ 
+-    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) {
++    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+         return ret;
+-    }
+     memcpy(pkt->data, buffer->pVtbl->GetNative(buffer), size);
+ 
+     switch (avctx->codec->id) {
+         case AV_CODEC_ID_H264:
+             buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE, &var);
+-            if(var.int64Value == AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE_IDR) {
++            if (var.int64Value == AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE_IDR)
+                 pkt->flags = AV_PKT_FLAG_KEY;
+-            }
+             break;
+         case AV_CODEC_ID_HEVC:
+             buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE, &var);
+-            if (var.int64Value == AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE_IDR) {
++            if (var.int64Value == AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE_IDR)
+                 pkt->flags = AV_PKT_FLAG_KEY;
+-            }
+             break;
+         default:
+             break;
+@@ -478,48 +364,33 @@ static int amf_copy_buffer(AVCodecContex
+ 
+     pkt->pts = var.int64Value; // original pts
+ 
+-
+-    AMF_RETURN_IF_FALSE(ctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN, "timestamp_list is empty\n");
++    AMF_RETURN_IF_FALSE(avctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN, "timestamp_list is empty\n");
+ 
+     av_fifo_generic_read(ctx->timestamp_list, &timestamp, sizeof(timestamp), NULL);
+ 
+     // calc dts shift if max_b_frames > 0
+     if (avctx->max_b_frames > 0 && ctx->dts_delay == 0) {
+         int64_t timestamp_last = AV_NOPTS_VALUE;
+-        AMF_RETURN_IF_FALSE(ctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN,
+-            "timestamp_list is empty while max_b_frames = %d\n", avctx->max_b_frames);
+-        av_fifo_generic_peek_at(
+-            ctx->timestamp_list,
+-            &timestamp_last,
+-            (av_fifo_size(ctx->timestamp_list) / sizeof(timestamp) - 1) * sizeof(timestamp_last),
+-            sizeof(timestamp_last),
+-            NULL);
+-        if (timestamp < 0 || timestamp_last < AV_NOPTS_VALUE) {
++        AMF_RETURN_IF_FALSE(avctx, av_fifo_size(ctx->timestamp_list) > 0,
++            AVERROR_UNKNOWN, "timestamp_list is empty while max_b_frames = %d\n", avctx->max_b_frames);
++
++        av_fifo_generic_peek_at(ctx->timestamp_list,
++                                &timestamp_last,
++                                (av_fifo_size(ctx->timestamp_list) / sizeof(timestamp) - 1) * sizeof(timestamp_last),
++                                sizeof(timestamp_last),
++                                NULL);
++
++        if (timestamp < 0 || timestamp_last < AV_NOPTS_VALUE)
+             return AVERROR(ERANGE);
+-        }
+         ctx->dts_delay = timestamp_last - timestamp;
+     }
+     pkt->dts = timestamp - ctx->dts_delay;
+     return 0;
+ }
+ 
+-// amfenc API implementation
+-int ff_amf_encode_init(AVCodecContext *avctx)
+-{
+-    int ret;
+-
+-    if ((ret = amf_load_library(avctx)) == 0) {
+-        if ((ret = amf_init_context(avctx)) == 0) {
+-            if ((ret = amf_init_encoder(avctx)) == 0) {
+-                return 0;
+-            }
+-        }
+-    }
+-    ff_amf_encode_close(avctx);
+-    return ret;
+-}
+-
+-static AMF_RESULT amf_set_property_buffer(AMFSurface *object, const wchar_t *name, AMFBuffer *val)
++static AMF_RESULT amf_set_property_buffer(AMFSurface *object,
++                                          const wchar_t *name,
++                                          AMFBuffer *val)
+ {
+     AMF_RESULT res;
+     AMFVariantStruct var;
+@@ -533,15 +404,16 @@ static AMF_RESULT amf_set_property_buffe
+             res = AMFVariantAssignInterface(&var, amf_interface);
+             amf_interface->pVtbl->Release(amf_interface);
+         }
+-        if (res == AMF_OK) {
++        if (res == AMF_OK)
+             res = object->pVtbl->SetProperty(object, name, var);
+-        }
+         AMFVariantClear(&var);
+     }
+     return res;
+ }
+ 
+-static AMF_RESULT amf_get_property_buffer(AMFData *object, const wchar_t *name, AMFBuffer **val)
++static AMF_RESULT amf_get_property_buffer(AMFData *object,
++                                          const wchar_t *name,
++                                          AMFBuffer **val)
+ {
+     AMF_RESULT res;
+     AMFVariantStruct var;
+@@ -591,7 +463,8 @@ static void amf_release_buffer_with_fram
+ 
+ int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+ {
+-    AmfContext *ctx = avctx->priv_data;
++    AMFEncContext *ctx = avctx->priv_data;
++    AVAMFContext *amfctx = ctx->amfctx;
+     AMFSurface *surface;
+     AMF_RESULT  res;
+     int         ret;
+@@ -618,10 +491,9 @@ int ff_amf_receive_packet(AVCodecContext
+                 if (res == AMF_INPUT_FULL) {
+                     ctx->delayed_drain = 1; // input queue is full: resubmit Drain() in ff_amf_receive_packet
+                 } else {
+-                    if (res == AMF_OK) {
++                    if (res == AMF_OK)
+                         ctx->eof = 1; // drain started
+-                    }
+-                    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res);
++                    AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res);
+                 }
+             }
+         }
+@@ -642,8 +514,8 @@ int ff_amf_receive_packet(AVCodecContext
+ 
+                 texture->lpVtbl->SetPrivateData(texture, &AMFTextureArrayIndexGUID, sizeof(index), &index);
+ 
+-                res = ctx->context->pVtbl->CreateSurfaceFromDX11Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface
+-                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX11Native() failed  with error %d\n", res);
++                res = amfctx->context->pVtbl->CreateSurfaceFromDX11Native(amfctx->context, texture, &surface, NULL); // wrap to AMF surface
++                AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX11Native() failed with error %d\n", res);
+ 
+                 hw_surface = 1;
+             }
+@@ -654,8 +526,8 @@ int ff_amf_receive_packet(AVCodecContext
+             {
+                 IDirect3DSurface9 *texture = (IDirect3DSurface9 *)frame->data[3]; // actual texture
+ 
+-                res = ctx->context->pVtbl->CreateSurfaceFromDX9Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface
+-                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX9Native() failed  with error %d\n", res);
++                res = amfctx->context->pVtbl->CreateSurfaceFromDX9Native(amfctx->context, texture, &surface, NULL); // wrap to AMF surface
++                AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX9Native() failed with error %d\n", res);
+ 
+                 hw_surface = 1;
+             }
+@@ -663,8 +535,8 @@ int ff_amf_receive_packet(AVCodecContext
+ #endif
+         default:
+             {
+-                res = ctx->context->pVtbl->AllocSurface(ctx->context, AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height, &surface);
+-                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "AllocSurface() failed  with error %d\n", res);
++                res = amfctx->context->pVtbl->AllocSurface(amfctx->context, AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height, &surface);
++                AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "AllocSurface() failed with error %d\n", res);
+                 amf_copy_surface(avctx, frame, surface);
+             }
+             break;
+@@ -676,11 +548,12 @@ int ff_amf_receive_packet(AVCodecContext
+             // input HW surfaces can be vertically aligned by 16; tell AMF the real size
+             surface->pVtbl->SetCrop(surface, 0, 0, frame->width, frame->height);
+ 
+-            frame_ref_storage_buffer = amf_create_buffer_with_frame_ref(frame, ctx->context);
+-            AMF_RETURN_IF_FALSE(ctx, frame_ref_storage_buffer != NULL, AVERROR(ENOMEM), "create_buffer_with_frame_ref() returned NULL\n");
++            frame_ref_storage_buffer = amf_create_buffer_with_frame_ref(frame, amfctx->context);
++            AMF_RETURN_IF_FALSE(avctx, frame_ref_storage_buffer != NULL, AVERROR(ENOMEM), "create_buffer_with_frame_ref() returned NULL\n");
+ 
+             res = amf_set_property_buffer(surface, L"av_frame_ref", frame_ref_storage_buffer);
+-            AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_ref\" with error %d\n", res);
++            AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_ref\" with error %d\n", res);
++
+             ctx->hwsurfaces_in_queue++;
+             frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer);
+         }
+@@ -690,13 +563,9 @@ int ff_amf_receive_packet(AVCodecContext
+ 
+         switch (avctx->codec->id) {
+         case AV_CODEC_ID_H264:
+-            AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_AUD, !!ctx->aud);
+-            break;
++            AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_AUD, !!ctx->aud); break;
+         case AV_CODEC_ID_HEVC:
+-            AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_HEVC_INSERT_AUD, !!ctx->aud);
+-            break;
+-        default:
+-            break;
++            AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_HEVC_INSERT_AUD, !!ctx->aud); break;
+         }
+ 
+         // submit surface
+@@ -707,16 +576,14 @@ int ff_amf_receive_packet(AVCodecContext
+         } else {
+             int64_t pts = frame->pts;
+             surface->pVtbl->Release(surface);
+-            AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res);
++            AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res);
+ 
+             av_frame_unref(frame);
+-            if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) {
++            if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0)
+                 return ret;
+-            }
+         }
+     }
+ 
+-
+     do {
+         block_and_wait = 0;
+         // poll data
+@@ -733,14 +600,14 @@ int ff_amf_receive_packet(AVCodecContext
+             if (data->pVtbl->HasProperty(data, L"av_frame_ref")) {
+                 AMFBuffer *frame_ref_storage_buffer;
+                 res = amf_get_property_buffer(data, L"av_frame_ref", &frame_ref_storage_buffer);
+-                AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_ref\" with error %d\n", res);
++                AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_ref\" with error %d\n", res);
+                 amf_release_buffer_with_frame_ref(frame_ref_storage_buffer);
+                 ctx->hwsurfaces_in_queue--;
+             }
+ 
+             data->pVtbl->Release(data);
+ 
+-            AMF_RETURN_IF_FALSE(ctx, ret >= 0, ret, "amf_copy_buffer() failed with error %d\n", ret);
++            AMF_RETURN_IF_FALSE(avctx, ret >= 0, ret, "amf_copy_buffer() failed with error %d\n", ret);
+ 
+             if (ctx->delayed_surface != NULL) { // try to resubmit frame
+                 res = ctx->encoder->pVtbl->SubmitInput(ctx->encoder, (AMFData*)ctx->delayed_surface);
+@@ -749,11 +616,10 @@ int ff_amf_receive_packet(AVCodecContext
+                     ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface);
+                     ctx->delayed_surface = NULL;
+                     av_frame_unref(ctx->delayed_frame);
+-                    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated SubmitInput() failed with error %d\n", res);
++                    AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated SubmitInput() failed with error %d\n", res);
+ 
+-                    if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) {
++                    if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0)
+                         return ret;
+-                    }
+                 } else {
+                     av_log(avctx, AV_LOG_WARNING, "Data acquired but delayed frame submission got AMF_INPUT_FULL- should not happen\n");
+                 }
+@@ -762,24 +628,26 @@ int ff_amf_receive_packet(AVCodecContext
+                 if (res != AMF_INPUT_FULL) {
+                     ctx->delayed_drain = 0;
+                     ctx->eof = 1; // drain started
+-                    AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated Drain() failed with error %d\n", res);
++                    AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated Drain() failed with error %d\n", res);
+                 } else {
+                     av_log(avctx, AV_LOG_WARNING, "Data acquired but delayed drain submission got AMF_INPUT_FULL- should not happen\n");
+                 }
+             }
+-        } else if (ctx->delayed_surface != NULL || ctx->delayed_drain || (ctx->eof && res_query != AMF_EOF) || (ctx->hwsurfaces_in_queue >= ctx->hwsurfaces_in_queue_max)) {
++        } else if (ctx->delayed_surface != NULL ||
++                   ctx->delayed_drain ||
++                   (ctx->eof && res_query != AMF_EOF) ||
++                   (ctx->hwsurfaces_in_queue >= ctx->hwsurfaces_in_queue_max)) {
+             block_and_wait = 1;
+             av_usleep(1000); // wait and poll again
+         }
+     } while (block_and_wait);
+ 
+-    if (res_query == AMF_EOF) {
++    if (res_query == AMF_EOF)
+         ret = AVERROR_EOF;
+-    } else if (data == NULL) {
++    else if (data == NULL)
+         ret = AVERROR(EAGAIN);
+-    } else {
++    else
+         ret = 0;
+-    }
+     return ret;
+ }
+ 
+Index: jellyfin-ffmpeg/libavcodec/amfenc.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/amfenc.h
++++ jellyfin-ffmpeg/libavcodec/amfenc.h
+@@ -1,64 +1,46 @@
+ /*
+-* This file is part of FFmpeg.
+-*
+-* FFmpeg is free software; you can redistribute it and/or
+-* modify it under the terms of the GNU Lesser General Public
+-* License as published by the Free Software Foundation; either
+-* version 2.1 of the License, or (at your option) any later version.
+-*
+-* FFmpeg is distributed in the hope that it will be useful,
+-* but WITHOUT ANY WARRANTY; without even the implied warranty of
+-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+-* Lesser General Public License for more details.
+-*
+-* You should have received a copy of the GNU Lesser General Public
+-* License along with FFmpeg; if not, write to the Free Software
+-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+-*/
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
+ 
+ #ifndef AVCODEC_AMFENC_H
+ #define AVCODEC_AMFENC_H
+ 
+-#include <AMF/core/Factory.h>
+-
+ #include <AMF/components/VideoEncoderVCE.h>
+ #include <AMF/components/VideoEncoderHEVC.h>
+ 
+ #include "libavutil/fifo.h"
+ 
+-#include "avcodec.h"
++#include "amf.h"
+ #include "hwconfig.h"
+ 
+-
+-/**
+-* AMF trace writer callback class
+-* Used to capture all AMF logging
+-*/
+-
+-typedef struct AmfTraceWriter {
+-    AMFTraceWriterVtbl *vtbl;
+-    AVCodecContext     *avctx;
+-} AmfTraceWriter;
+-
+ /**
+ * AMF encoder context
+ */
+-
+-typedef struct AmfContext {
+-    AVClass            *avclass;
+-    // access to AMF runtime
+-    amf_handle          library; ///< handle to DLL library
+-    AMFFactory         *factory; ///< pointer to AMF factory
+-    AMFDebug           *debug;   ///< pointer to AMF debug interface
+-    AMFTrace           *trace;   ///< pointer to AMF trace interface
+-
+-    amf_uint64          version; ///< version of AMF runtime
+-    AmfTraceWriter      tracer;  ///< AMF writer registered with AMF
+-    AMFContext         *context; ///< AMF context
+-    //encoder
+-    AMFComponent       *encoder; ///< AMF encoder object
+-    amf_bool            eof;     ///< flag indicating EOF happened
+-    AMF_SURFACE_FORMAT  format;  ///< AMF surface format
++typedef struct AMFEncContext {
++    void               *avclass;
++    void               *amfctx;
++
++    // encoder
++    AMFComponent                          *encoder; ///< AMF encoder object
++    amf_bool                               eof;     ///< flag indicating EOF happened
++    AMF_SURFACE_FORMAT                     format;  ///< AMF surface format
++    AMF_VIDEO_CONVERTER_COLOR_PROFILE_ENUM out_color_profile;
++    AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM out_color_trc;
++    AMF_COLOR_PRIMARIES_ENUM               out_color_prm;
+ 
+     AVBufferRef        *hw_device_ctx; ///< pointer to HW accelerator (decoder)
+     AVBufferRef        *hw_frames_ctx; ///< pointer to HW accelerator (frame allocator)
+@@ -76,24 +58,25 @@ typedef struct AmfContext {
+     int64_t             dts_delay;
+ 
+     // common encoder option options
+-
+     int                 log_to_dbg;
+ 
+     // Static options, have to be set before Init() call
+     int                 usage;
+     int                 profile;
+     int                 level;
+-    int                 preanalysis;
++    int                 pre_encode;
+     int                 quality;
++    int                 bit_depth;
++    int                 qvbr_level;
+     int                 b_frame_delta_qp;
+     int                 ref_b_frame_delta_qp;
+ 
+     // Dynamic options, can be set after Init() call
+-
+     int                 rate_control_mode;
+     int                 enforce_hrd;
+     int                 filler_data;
+     int                 enable_vbaq;
++    int                 enable_hmqb;
+     int                 skip_frame;
+     int                 qp_i;
+     int                 qp_p;
+@@ -108,7 +91,6 @@ typedef struct AmfContext {
+     int                 aud;
+ 
+     // HEVC - specific options
+-
+     int                 gops_per_idr;
+     int                 header_insertion_mode;
+     int                 min_qp_i;
+@@ -116,7 +98,7 @@ typedef struct AmfContext {
+     int                 min_qp_p;
+     int                 max_qp_p;
+     int                 tier;
+-} AmfContext;
++} AMFEncContext;
+ 
+ extern const AVCodecHWConfigInternal *const ff_amfenc_hw_configs[];
+ 
+@@ -134,18 +116,4 @@ int ff_amf_encode_close(AVCodecContext *
+ */
+ int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
+ 
+-/**
+-* Supported formats
+-*/
+-extern const enum AVPixelFormat ff_amf_pix_fmts[];
+-
+-/**
+-* Error handling helper
+-*/
+-#define AMF_RETURN_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \
+-    if (!(exp)) { \
+-        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
+-        return ret_value; \
+-    }
+-
+-#endif //AVCODEC_AMFENC_H
++#endif /* AVCODEC_AMFENC_H */
+Index: jellyfin-ffmpeg/libavcodec/amfenc_h264.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/amfenc_h264.c
++++ jellyfin-ffmpeg/libavcodec/amfenc_h264.c
+@@ -16,111 +16,102 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+-
+ #include "libavutil/internal.h"
+ #include "libavutil/opt.h"
+ #include "amfenc.h"
+ #include "internal.h"
+ 
+-#define OFFSET(x) offsetof(AmfContext, x)
++#define OFFSET(x) offsetof(AMFEncContext, x)
+ #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
++#define ENUM(a, b, c, d) { a, b, 0, AV_OPT_TYPE_CONST, { .i64 = c }, 0, 0, VE, d }
+ 
+-static const AVOption options[] = {
+-    // Static
+-    /// Usage
+-    { "usage",          "Encoder Usage",        OFFSET(usage),  AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCONDING      }, AMF_VIDEO_ENCODER_USAGE_TRANSCONDING, AMF_VIDEO_ENCODER_USAGE_WEBCAM, VE, "usage" },
+-    { "transcoding",    "Generic Transcoding",  0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCONDING      }, 0, 0, VE, "usage" },
+-    { "ultralowlatency","",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY }, 0, 0, VE, "usage" },
+-    { "lowlatency",     "",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY       }, 0, 0, VE, "usage" },
+-    { "webcam",         "Webcam",               0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_WEBCAM            }, 0, 0, VE, "usage" },
+-
+-    /// Profile,
+-    { "profile",        "Profile",              OFFSET(profile),AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN                 }, AMF_VIDEO_ENCODER_PROFILE_BASELINE, AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH, VE, "profile" },
+-    { "main",           "",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN                 }, 0, 0, VE, "profile" },
+-    { "high",           "",                     0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_HIGH                 }, 0, 0, VE, "profile" },
+-    { "constrained_baseline", "",               0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_BASELINE }, 0, 0, VE, "profile" },
+-    { "constrained_high",     "",               0,              AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH     }, 0, 0, VE, "profile" },
+-
+-    /// Profile Level
+-    { "level",          "Profile Level",        OFFSET(level),  AV_OPT_TYPE_INT,   { .i64 = 0  }, 0, 62, VE, "level" },
+-    { "auto",           "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 0  }, 0, 0,  VE, "level" },
+-    { "1.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 10 }, 0, 0,  VE, "level" },
+-    { "1.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 11 }, 0, 0,  VE, "level" },
+-    { "1.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 12 }, 0, 0,  VE, "level" },
+-    { "1.3",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 13 }, 0, 0,  VE, "level" },
+-    { "2.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 20 }, 0, 0,  VE, "level" },
+-    { "2.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 21 }, 0, 0,  VE, "level" },
+-    { "2.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 22 }, 0, 0,  VE, "level" },
+-    { "3.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 30 }, 0, 0,  VE, "level" },
+-    { "3.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 31 }, 0, 0,  VE, "level" },
+-    { "3.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 32 }, 0, 0,  VE, "level" },
+-    { "4.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 40 }, 0, 0,  VE, "level" },
+-    { "4.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 41 }, 0, 0,  VE, "level" },
+-    { "4.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 42 }, 0, 0,  VE, "level" },
+-    { "5.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 50 }, 0, 0,  VE, "level" },
+-    { "5.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 51 }, 0, 0,  VE, "level" },
+-    { "5.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 52 }, 0, 0,  VE, "level" },
+-    { "6.0",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 60 }, 0, 0,  VE, "level" },
+-    { "6.1",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 61 }, 0, 0,  VE, "level" },
+-    { "6.2",            "",                     0,              AV_OPT_TYPE_CONST, { .i64 = 62 }, 0, 0,  VE, "level" },
+-
+-
+-    /// Quality Preset
+-    { "quality",        "Quality Preference",                   OFFSET(quality),    AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED    }, AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED, AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY, VE, "quality" },
+-    { "speed",          "Prefer Speed",                         0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED    },       0, 0, VE, "quality" },
+-    { "balanced",       "Balanced",                             0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED },    0, 0, VE, "quality" },
+-    { "quality",        "Prefer Quality",                       0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY  },     0, 0, VE, "quality" },
+-
+-    // Dynamic
+-    /// Rate Control Method
+-    { "rc",             "Rate Control Method",                  OFFSET(rate_control_mode), AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR, VE, "rc" },
+-    { "cqp",            "Constant Quantization Parameter",      0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP             }, 0, 0, VE, "rc" },
+-    { "cbr",            "Constant Bitrate",                     0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR                     }, 0, 0, VE, "rc" },
+-    { "vbr_peak",       "Peak Contrained Variable Bitrate",     0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR    }, 0, 0, VE, "rc" },
+-    { "vbr_latency",    "Latency Constrained Variable Bitrate", 0,                         AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR }, 0, 0, VE, "rc" },
+-
+-    /// Enforce HRD, Filler Data, VBAQ, Frame Skipping
+-    { "enforce_hrd",    "Enforce HRD",                          OFFSET(enforce_hrd),        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+-    { "filler_data",    "Filler Data Enable",                   OFFSET(filler_data),        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+-    { "vbaq",           "Enable VBAQ",                          OFFSET(enable_vbaq),        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+-    { "frame_skipping", "Rate Control Based Frame Skip",        OFFSET(skip_frame),         AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+-
+-    /// QP Values
+-    { "qp_i",           "Quantization Parameter for I-Frame",   OFFSET(qp_i),               AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE },
+-    { "qp_p",           "Quantization Parameter for P-Frame",   OFFSET(qp_p),               AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE },
+-    { "qp_b",           "Quantization Parameter for B-Frame",   OFFSET(qp_b),               AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE },
+-
+-    /// Pre-Pass, Pre-Analysis, Two-Pass
+-    { "preanalysis",    "Pre-Analysis Mode",                    OFFSET(preanalysis),        AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE, NULL },
+-
+-    /// Maximum Access Unit Size
+-    { "max_au_size",    "Maximum Access Unit Size for rate control (in bits)",   OFFSET(max_au_size),        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+-
+-    /// Header Insertion Spacing
+-    { "header_spacing", "Header Insertion Spacing",             OFFSET(header_spacing),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE },
+-
+-    /// B-Frames
+-    // BPicturesPattern=bf
+-    { "bf_delta_qp",    "B-Picture Delta QP",                   OFFSET(b_frame_delta_qp),   AV_OPT_TYPE_INT,  { .i64 = 4 }, -10, 10, VE },
+-    { "bf_ref",         "Enable Reference to B-Frames",         OFFSET(b_frame_ref),        AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
+-    { "bf_ref_delta_qp","Reference B-Picture Delta QP",         OFFSET(ref_b_frame_delta_qp), AV_OPT_TYPE_INT,  { .i64 = 4 }, -10, 10, VE },
+-
+-    /// Intra-Refresh
+-    { "intra_refresh_mb","Intra Refresh MBs Number Per Slot in Macroblocks",       OFFSET(intra_refresh_mb),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+-
+-    /// coder
+-    { "coder",          "Coding Type",                          OFFSET(coding_mode),   AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, AMF_VIDEO_ENCODER_UNDEFINED, AMF_VIDEO_ENCODER_CALV, VE, "coder" },
+-    { "auto",           "Automatic",                            0,                     AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, 0, 0, VE, "coder" },
+-    { "cavlc",          "Context Adaptive Variable-Length Coding", 0,                  AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_CALV },      0, 0, VE, "coder" },
+-    { "cabac",          "Context Adaptive Binary Arithmetic Coding", 0,                AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_CABAC },     0, 0, VE, "coder" },
+-
+-    { "me_half_pel",    "Enable ME Half Pixel",                 OFFSET(me_half_pel),   AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1, VE },
+-    { "me_quarter_pel", "Enable ME Quarter Pixel",              OFFSET(me_quarter_pel),AV_OPT_TYPE_BOOL,  { .i64 = 1 }, 0, 1, VE },
++static const enum AVPixelFormat ff_amfenc_h264_pix_fmts[] = {
++    AV_PIX_FMT_NV12,
++    AV_PIX_FMT_YUV420P,
++#if CONFIG_D3D11VA
++    AV_PIX_FMT_D3D11,
++#endif
++#if CONFIG_DXVA2
++    AV_PIX_FMT_DXVA2_VLD,
++#endif
++    AV_PIX_FMT_NONE
++};
+ 
+-    { "aud",            "Inserts AU Delimiter NAL unit",        OFFSET(aud)          ,AV_OPT_TYPE_BOOL,  { .i64 = 0 }, 0, 1, VE },
++static const AVOption options[] = {
++    { "usage",            "Encoder Usage",                                        OFFSET(usage),                AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCODING }, AMF_VIDEO_ENCODER_USAGE_TRANSCODING, AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY_HIGH_QUALITY, VE, "usage" },
++        ENUM("transcoding",          "Transcoding, video editing",                AMF_VIDEO_ENCODER_USAGE_TRANSCODING,              "usage"),
++        ENUM("ultralowlatency",      "Video game streaming",                      AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY,        "usage"),
++        ENUM("lowlatency",           "Video collaboration, RDP",                  AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY,              "usage"),
++        ENUM("webcam",               "Video conferencing",                        AMF_VIDEO_ENCODER_USAGE_WEBCAM,                   "usage"),
++        ENUM("highquality",          "High-quality encoding",                     AMF_VIDEO_ENCODER_USAGE_HIGH_QUALITY,             "usage"),
++        ENUM("llhighquality",        "High-quality encoding (low latency)",       AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY_HIGH_QUALITY, "usage"),
++
++    { "profile",          "Profile",                                              OFFSET(profile),              AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN }, AMF_VIDEO_ENCODER_PROFILE_BASELINE, AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH, VE, "profile" },
++        ENUM("main",                 "",                                          AMF_VIDEO_ENCODER_PROFILE_MAIN,                 "profile"),
++        ENUM("high",                 "",                                          AMF_VIDEO_ENCODER_PROFILE_HIGH,                 "profile"),
++        ENUM("constrained_baseline", "",                                          AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_BASELINE, "profile"),
++        ENUM("constrained_high",     "",                                          AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH,     "profile"),
++
++    { "level",            "Profile Level",                                        OFFSET(level),                AV_OPT_TYPE_INT,  { .i64 = 0 }, 0, 62, VE, "level" },
++        ENUM("auto",                 "",                                          0,  "level"),
++        ENUM("1.0",                  "",                                          10, "level"),
++        ENUM("1.1",                  "",                                          11, "level"),
++        ENUM("1.2",                  "",                                          12, "level"),
++        ENUM("1.3",                  "",                                          13, "level"),
++        ENUM("2.0",                  "",                                          20, "level"),
++        ENUM("2.1",                  "",                                          21, "level"),
++        ENUM("2.2",                  "",                                          22, "level"),
++        ENUM("3.0",                  "",                                          30, "level"),
++        ENUM("3.1",                  "",                                          31, "level"),
++        ENUM("3.2",                  "",                                          32, "level"),
++        ENUM("4.0",                  "",                                          40, "level"),
++        ENUM("4.1",                  "",                                          41, "level"),
++        ENUM("4.2",                  "",                                          42, "level"),
++        ENUM("5.0",                  "",                                          50, "level"),
++        ENUM("5.1",                  "",                                          51, "level"),
++        ENUM("5.2",                  "",                                          52, "level"),
++        ENUM("6.0",                  "",                                          60, "level"),
++        ENUM("6.1",                  "",                                          61, "level"),
++        ENUM("6.2",                  "",                                          62, "level"),
++
++    { "quality",          "Quality Preset",                                       OFFSET(quality),              AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED, AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY, VE, "quality" },
++        ENUM("speed",                "Prefer Speed",                              AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED,    "quality"),
++        ENUM("balanced",             "Balanced",                                  AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED, "quality"),
++        ENUM("quality",              "Prefer Quality",                            AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY,  "quality"),
++
++    { "rc",               "Rate Control Method",                                  OFFSET(rate_control_mode),    AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR, VE, "rc" },
++        ENUM("cqp",                  "Constant Quantization Parameter",           AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP,             "rc"),
++        ENUM("cbr",                  "Constant Bitrate",                          AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR,                     "rc"),
++        ENUM("vbr_peak",             "Peak Constrained Variable Bitrate",         AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR,    "rc"),
++        ENUM("vbr_latency",          "Latency Constrained Variable Bitrate",      AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR, "rc"),
++        ENUM("qvbr",                 "Quality-defined Variable Bitrate",          AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR,             "rc"),
++
++    { "preanalysis",      "Enable Pre-Encode/Analysis for Rate Control (2-Pass)", OFFSET(pre_encode),           AV_OPT_TYPE_BOOL, { .i64 = 0  },  0,   1, VE },
++    { "vbaq",             "Enable VBAQ",                                          OFFSET(enable_vbaq),          AV_OPT_TYPE_BOOL, { .i64 = 0  },  0,   1, VE },
++    { "hmqb",             "Enable High Motion Quality Boost",                     OFFSET(enable_hmqb),          AV_OPT_TYPE_BOOL, { .i64 = 0  },  0,   1, VE },
++    { "enforce_hrd",      "Enforce HRD",                                          OFFSET(enforce_hrd),          AV_OPT_TYPE_BOOL, { .i64 = 0  },  0,   1, VE },
++    { "filler_data",      "Filler Data Enable",                                   OFFSET(filler_data),          AV_OPT_TYPE_BOOL, { .i64 = 0  },  0,   1, VE },
++    { "frame_skipping",   "Rate Control Based Frame Skip",                        OFFSET(skip_frame),           AV_OPT_TYPE_BOOL, { .i64 = 0  },  0,   1, VE },
++    { "qvbr_level",       "Quality level for QVBR rate control",                  OFFSET(qvbr_level),           AV_OPT_TYPE_INT,  { .i64 = 23 },  1,   51, VE },
++    { "qp_i",             "Quantization Parameter for I-Frame",                   OFFSET(qp_i),                 AV_OPT_TYPE_INT,  { .i64 = -1 }, -1,   51, VE },
++    { "qp_p",             "Quantization Parameter for P-Frame",                   OFFSET(qp_p),                 AV_OPT_TYPE_INT,  { .i64 = -1 }, -1,   51, VE },
++    { "qp_b",             "Quantization Parameter for B-Frame",                   OFFSET(qp_b),                 AV_OPT_TYPE_INT,  { .i64 = -1 }, -1,   51, VE },
++    { "max_au_size",      "Maximum Access Unit Size for rate control (in bits)",  OFFSET(max_au_size),          AV_OPT_TYPE_INT,  { .i64 = 0  },  0,   INT_MAX, VE },
++    { "header_spacing",   "Header Insertion Spacing",                             OFFSET(header_spacing),       AV_OPT_TYPE_INT,  { .i64 = -1 }, -1,   1000, VE },
++    { "bf_delta_qp",      "B-Picture Delta QP",                                   OFFSET(b_frame_delta_qp),     AV_OPT_TYPE_INT,  { .i64 = 4  }, -10,  10, VE },
++    { "bf_ref",           "Enable Reference to B-Frames",                         OFFSET(b_frame_ref),          AV_OPT_TYPE_BOOL, { .i64 = 1  },  0,   1, VE },
++    { "bf_ref_delta_qp",  "Reference B-Picture Delta QP",                         OFFSET(ref_b_frame_delta_qp), AV_OPT_TYPE_INT,  { .i64 = 4  }, -10,  10, VE },
++    { "intra_refresh_mb", "Intra Refresh MBs Number Per Slot in Macroblocks",     OFFSET(intra_refresh_mb),     AV_OPT_TYPE_INT,  { .i64 = 0  },  0,   INT_MAX, VE },
++
++    { "coder",            "Coding Type",                                          OFFSET(coding_mode),          AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, AMF_VIDEO_ENCODER_UNDEFINED, AMF_VIDEO_ENCODER_CALV, VE, "coder" },
++        ENUM("auto",                 "Automatic",                                 AMF_VIDEO_ENCODER_UNDEFINED, "coder"),
++        ENUM("cavlc",                "Context Adaptive Variable-Length Coding",   AMF_VIDEO_ENCODER_CALV,      "coder"),
++        ENUM("cabac",                "Context Adaptive Binary Arithmetic Coding", AMF_VIDEO_ENCODER_CABAC,     "coder"),
+ 
+-    { "log_to_dbg",     "Enable AMF logging to debug output",   OFFSET(log_to_dbg)    , AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
++    { "me_half_pel",      "Enable ME Half Pixel",                                 OFFSET(me_half_pel),          AV_OPT_TYPE_BOOL, { .i64 = 1  },  0,   1, VE },
++    { "me_quarter_pel",   "Enable ME Quarter Pixel",                              OFFSET(me_quarter_pel),       AV_OPT_TYPE_BOOL, { .i64 = 1  },  0,   1, VE },
+ 
++    { "log_to_dbg",       "Enable AMF logging to debug output",                   OFFSET(log_to_dbg),           AV_OPT_TYPE_BOOL, { .i64 = 0  },  0,   1, VE },
+     { NULL }
+ };
+ 
+@@ -128,7 +119,7 @@ static av_cold int amf_encode_init_h264(
+ {
+     int                              ret = 0;
+     AMF_RESULT                       res = AMF_OK;
+-    AmfContext                      *ctx = avctx->priv_data;
++    AMFEncContext                   *ctx = avctx->priv_data;
+     AMFVariantStruct                 var = { 0 };
+     amf_int64                        profile = 0;
+     amf_int64                        profile_level = 0;
+@@ -136,13 +127,13 @@ static av_cold int amf_encode_init_h264(
+     AMFGuid                          guid;
+     AMFRate                          framerate;
+     AMFSize                          framesize = AMFConstructSize(avctx->width, avctx->height);
++    int                              probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN;
+     int                              deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
+ 
+-    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
++    if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
+         framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den);
+-    } else {
++    else
+         framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num * avctx->ticks_per_frame);
+-    }
+ 
+     if ((ret = ff_amf_encode_init(avctx)) != 0)
+         return ret;
+@@ -171,62 +162,84 @@ static av_cold int amf_encode_init_h264(
+         profile = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH;
+         break;
+     }
+-    if (profile == 0) {
++    if (profile == 0)
+         profile = ctx->profile;
+-    }
+ 
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE, profile);
+ 
+     profile_level = avctx->level;
+-    if (profile_level == FF_LEVEL_UNKNOWN) {
++    if (profile_level == FF_LEVEL_UNKNOWN)
+         profile_level = ctx->level;
+-    }
+-    if (profile_level != 0) {
++    if (profile_level != 0)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE_LEVEL, profile_level);
+-    }
+ 
+     // Maximum Reference Frames
+-    if (avctx->refs != -1) {
++    if (avctx->refs != -1)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_NUM_REFRAMES, avctx->refs);
+-    }
+     if (avctx->sample_aspect_ratio.den && avctx->sample_aspect_ratio.num) {
+         AMFRatio ratio = AMFConstructRatio(avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
+         AMF_ASSIGN_PROPERTY_RATIO(res, ctx->encoder, AMF_VIDEO_ENCODER_ASPECT_RATIO, ratio);
+     }
+ 
+-    /// Color Range (Partial/TV/MPEG or Full/PC/JPEG)
+-    if (avctx->color_range == AVCOL_RANGE_JPEG) {
+-        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 1);
++    // Auto detect rate control method
++    if (ctx->qp_i != -1 || ctx->qp_p != -1 || ctx->qp_b != -1) {
++        probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP;
++    } else if (avctx->rc_max_rate > 0 ) {
++        probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
++    } else {
++        probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR;
+     }
+ 
+-    // autodetect rate control method
+     if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN) {
+-        if (ctx->qp_i != -1 || ctx->qp_p != -1 || ctx->qp_b != -1) {
+-            ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP;
+-            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n");
+-        } else if (avctx->rc_max_rate > 0 ) {
+-            ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+-            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n");
+-        } else {
+-            ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR;
+-            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n");
++        switch (probed_rc_mode) {
++        case AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP:
++            ctx->rate_control_mode = probed_rc_mode;
++            av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CQP\n");
++            break;
++        case AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR:
++            ctx->rate_control_mode = probed_rc_mode;
++            av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to Peak VBR\n");
++            break;
++        case AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR:
++            ctx->rate_control_mode = probed_rc_mode;
++            av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CBR\n");
++            break;
+         }
+     }
+ 
++    // Pre-Encode/Two-Pass(pre-encode assisted rate control)
+     if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) {
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_PREANALYSIS_ENABLE, AMF_VIDEO_ENCODER_PREENCODE_DISABLED);
+-        if (ctx->preanalysis)
+-            av_log(ctx, AV_LOG_WARNING, "Pre-Analysis is not supported by cqp Rate Control Method, automatically disabled\n");
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PREENCODE_ENABLE, AMF_VIDEO_ENCODER_PREENCODE_DISABLED);
++        if (ctx->pre_encode) {
++            ctx->pre_encode = 0;
++            av_log(ctx, AV_LOG_WARNING, "Pre-Encode is not supported by CQP rate control method, automatically disabled\n");
++        }
+     } else {
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_PREANALYSIS_ENABLE, ctx->preanalysis);
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PREENCODE_ENABLE, ctx->pre_encode);
+     }
+ 
++    // Quality preset
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QUALITY_PRESET, ctx->quality);
+ 
+     // Dynamic parmaters
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD, ctx->rate_control_mode);
++    if (res != AMF_OK && ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR) {
++        ctx->rate_control_mode = probed_rc_mode;
++        av_log(ctx, AV_LOG_WARNING, "QVBR is not supported by this GPU, switch to auto detect rate control method\n");
++    }
+ 
+-    /// VBV Buffer
++    // High Motion Quality Boost mode
++    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR) {
++        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HIGH_MOTION_QUALITY_BOOST_ENABLE, 0);
++        if (ctx->enable_hmqb) {
++            ctx->enable_hmqb = 0;
++            av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by QVBR rate control method, automatically disabled\n");
++        }
++    } else {
++        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HIGH_MOTION_QUALITY_BOOST_ENABLE, !!ctx->enable_hmqb);
++    }
++
++    // VBV Buffer
+     if (avctx->rc_buffer_size != 0) {
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_VBV_BUFFER_SIZE, avctx->rc_buffer_size);
+         if (avctx->rc_initial_buffer_occupancy != 0) {
+@@ -236,7 +249,8 @@ static av_cold int amf_encode_init_h264(
+             AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_INITIAL_VBV_BUFFER_FULLNESS, amf_buffer_fullness);
+         }
+     }
+-    /// Maximum Access Unit Size
++
++    // Maximum Access Unit Size
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_AU_SIZE, ctx->max_au_size);
+ 
+     if (ctx->max_au_size)
+@@ -246,7 +260,25 @@ static av_cold int amf_encode_init_h264(
+     if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) {
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 0);
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 51);
++    } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR) {
++        if (ctx->qvbr_level) {
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QVBR_QUALITY_LEVEL, ctx->qvbr_level);
++        }
+     } else {
++        if (avctx->qmin == -1 && avctx->qmax == -1) {
++            switch (ctx->usage) {
++            case AMF_VIDEO_ENCODER_USAGE_TRANSCONDING:
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 18);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 46);
++                break;
++            case AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY:
++            case AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY:
++            case AMF_VIDEO_ENCODER_USAGE_WEBCAM:
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 22);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 48);
++                break;
++            }
++        }
+         if (avctx->qmin != -1) {
+             int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
+             AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, qval);
+@@ -266,31 +298,50 @@ static av_cold int amf_encode_init_h264(
+ 
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_TARGET_BITRATE, avctx->bit_rate);
+ 
+-    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR) {
++    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->bit_rate);
+-    }
++
+     if (avctx->rc_max_rate) {
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->rc_max_rate);
+     } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) {
+-        av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n");
++        av_log(ctx, AV_LOG_WARNING, "Rate control method is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n");
++    }
++
++    // Color Range (Partial/TV/MPEG or Full/PC/JPEG)
++    if (avctx->color_range == AVCOL_RANGE_JPEG) {
++        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 1);
++    } else {
++        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 0);
+     }
+ 
++    // Set output color profile, transfer and primaries
++    if (ctx->out_color_profile > AMF_VIDEO_CONVERTER_COLOR_PROFILE_UNKNOWN)
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_COLOR_PROFILE, ctx->out_color_profile);
++    if (ctx->out_color_trc > AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED)
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_TRANSFER_CHARACTERISTIC, ctx->out_color_trc);
++    if (ctx->out_color_prm > AMF_COLOR_PRIMARIES_UNDEFINED)
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_COLOR_PRIMARIES, ctx->out_color_prm);
++
+     // Initialize Encoder
+     res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height);
+     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res);
+ 
+-    // Enforce HRD, Filler Data, VBAQ, Frame Skipping, Deblocking Filter
++    // Enforce HRD, Filler Data, Frame Skipping, Deblocking Filter
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENFORCE_HRD, !!ctx->enforce_hrd);
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FILLER_DATA_ENABLE, !!ctx->filler_data);
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_SKIP_FRAME_ENABLE, !!ctx->skip_frame);
++    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_DE_BLOCKING_FILTER, !!deblocking_filter);
++
++    // VBAQ
+     if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) {
+         AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, 0);
+-        if (ctx->enable_vbaq)
++        if (ctx->enable_vbaq) {
++            ctx->enable_vbaq = 0;
+             av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n");
++        }
+     } else {
+         AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, !!ctx->enable_vbaq);
+     }
+-    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_DE_BLOCKING_FILTER, !!deblocking_filter);
+ 
+     // B-Frames
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, avctx->max_b_frames);
+@@ -338,9 +389,8 @@ static av_cold int amf_encode_init_h264(
+     guid = IID_AMFBuffer();
+ 
+     res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface
+-    if (res != AMF_OK) {
++    if (res != AMF_OK)
+         var.pInterface->pVtbl->Release(var.pInterface);
+-    }
+     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res);
+ 
+     avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer);
+@@ -359,15 +409,15 @@ static av_cold int amf_encode_init_h264(
+ }
+ 
+ static const AVCodecDefault defaults[] = {
+-    { "refs",       "-1"  },
+-    { "aspect",     "0"   },
+-    { "qmin",       "-1"  },
+-    { "qmax",       "-1"  },
+-    { "b",          "2M"  },
+-    { "g",          "250" },
+-    { "slices",     "1"   },
+-    { "flags",      "+loop"},
+-    { NULL                },
++    { "refs",       "-1"    },
++    { "aspect",     "0"     },
++    { "qmin",       "-1"    },
++    { "qmax",       "-1"    },
++    { "b",          "2M"    },
++    { "g",          "250"   },
++    { "slices",     "1"     },
++    { "flags",      "+loop" },
++    { NULL                  },
+ };
+ 
+ static const AVClass h264_amf_class = {
+@@ -385,13 +435,13 @@ AVCodec ff_h264_amf_encoder = {
+     .init           = amf_encode_init_h264,
+     .receive_packet = ff_amf_receive_packet,
+     .close          = ff_amf_encode_close,
+-    .priv_data_size = sizeof(AmfContext),
++    .priv_data_size = sizeof(AMFEncContext),
+     .priv_class     = &h264_amf_class,
+     .defaults       = defaults,
+     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
+                       AV_CODEC_CAP_DR1,
+     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+-    .pix_fmts       = ff_amf_pix_fmts,
++    .pix_fmts       = ff_amfenc_h264_pix_fmts,
+     .wrapper_name   = "amf",
+     .hw_configs     = ff_amfenc_hw_configs,
+ };
+Index: jellyfin-ffmpeg/libavcodec/amfenc_hevc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/amfenc_hevc.c
++++ jellyfin-ffmpeg/libavcodec/amfenc_hevc.c
+@@ -21,73 +21,91 @@
+ #include "amfenc.h"
+ #include "internal.h"
+ 
+-#define OFFSET(x) offsetof(AmfContext, x)
++#define OFFSET(x) offsetof(AMFEncContext, x)
+ #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+-static const AVOption options[] = {
+-    { "usage",          "Set the encoding usage",             OFFSET(usage),          AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING }, AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING, AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM, VE, "usage" },
+-    { "transcoding",    "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING },         0, 0, VE, "usage" },
+-    { "ultralowlatency","", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY },    0, 0, VE, "usage" },
+-    { "lowlatency",     "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY },          0, 0, VE, "usage" },
+-    { "webcam",         "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM },               0, 0, VE, "usage" },
+-
+-    { "profile",        "Set the profile (default main)",           OFFSET(profile),   AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, VE, "profile" },
+-    { "main",           "", 0,                      AV_OPT_TYPE_CONST,{ .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" },
+-
+-    { "profile_tier",   "Set the profile tier (default main)",      OFFSET(tier), AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, AMF_VIDEO_ENCODER_HEVC_TIER_MAIN, AMF_VIDEO_ENCODER_HEVC_TIER_HIGH, VE, "tier" },
+-    { "main",           "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, 0, 0, VE, "tier" },
+-    { "high",           "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_HIGH }, 0, 0, VE, "tier" },
+-
+-    { "level",          "Set the encoding level (default auto)",    OFFSET(level), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, AMF_LEVEL_6_2, VE, "level" },
+-    { "auto",           "", 0, AV_OPT_TYPE_CONST, { .i64 = 0             }, 0, 0, VE, "level" },
+-    { "1.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_1   }, 0, 0, VE, "level" },
+-    { "2.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_2   }, 0, 0, VE, "level" },
+-    { "2.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_2_1 }, 0, 0, VE, "level" },
+-    { "3.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_3   }, 0, 0, VE, "level" },
+-    { "3.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_3_1 }, 0, 0, VE, "level" },
+-    { "4.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_4   }, 0, 0, VE, "level" },
+-    { "4.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_4_1 }, 0, 0, VE, "level" },
+-    { "5.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5   }, 0, 0, VE, "level" },
+-    { "5.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5_1 }, 0, 0, VE, "level" },
+-    { "5.2",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5_2 }, 0, 0, VE, "level" },
+-    { "6.0",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6   }, 0, 0, VE, "level" },
+-    { "6.1",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6_1 }, 0, 0, VE, "level" },
+-    { "6.2",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6_2 }, 0, 0, VE, "level" },
+-
+-    { "quality",        "Set the encoding quality",                 OFFSET(quality),      AV_OPT_TYPE_INT,   { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED, VE, "quality" },
+-    { "balanced",       "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_BALANCED }, 0, 0, VE, "quality" },
+-    { "speed",          "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED    }, 0, 0, VE, "quality" },
+-    { "quality",        "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY  }, 0, 0, VE, "quality" },
+-
+-    { "rc",             "Set the rate control mode",            OFFSET(rate_control_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR, VE, "rc" },
+-    { "cqp",            "Constant Quantization Parameter",      0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP             }, 0, 0, VE, "rc" },
+-    { "cbr",            "Constant Bitrate",                     0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR                     }, 0, 0, VE, "rc" },
+-    { "vbr_peak",       "Peak Contrained Variable Bitrate",     0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR    }, 0, 0, VE, "rc" },
+-    { "vbr_latency",    "Latency Constrained Variable Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR }, 0, 0, VE, "rc" },
+-
+-    { "header_insertion_mode",        "Set header insertion mode",  OFFSET(header_insertion_mode),      AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE }, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED, VE, "hdrmode" },
+-    { "none",           "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE        }, 0, 0, VE, "hdrmode" },
+-    { "gop",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_GOP_ALIGNED }, 0, 0, VE, "hdrmode" },
+-    { "idr",            "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED }, 0, 0, VE, "hdrmode" },
+-
+-    { "gops_per_idr",    "GOPs per IDR 0-no IDR will be inserted",  OFFSET(gops_per_idr),  AV_OPT_TYPE_INT,  { .i64 = 1  },  0, INT_MAX, VE },
+-    { "preanalysis",    "Enable preanalysis",                       OFFSET(preanalysis),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+-    { "vbaq",           "Enable VBAQ",                              OFFSET(enable_vbaq),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+-    { "enforce_hrd",    "Enforce HRD",                              OFFSET(enforce_hrd),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+-    { "filler_data",    "Filler Data Enable",                       OFFSET(filler_data),   AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE},
+-    { "max_au_size",    "Maximum Access Unit Size for rate control (in bits)", OFFSET(max_au_size),   AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, VE},
+-    { "min_qp_i",       "min quantization parameter for I-frame",   OFFSET(min_qp_i),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+-    { "max_qp_i",       "max quantization parameter for I-frame",   OFFSET(max_qp_i),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+-    { "min_qp_p",       "min quantization parameter for P-frame",   OFFSET(min_qp_p),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+-    { "max_qp_p",       "max quantization parameter for P-frame",   OFFSET(max_qp_p),      AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+-    { "qp_p",           "quantization parameter for P-frame",       OFFSET(qp_p),          AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+-    { "qp_i",           "quantization parameter for I-frame",       OFFSET(qp_i),          AV_OPT_TYPE_INT, { .i64 = -1  }, -1, 51, VE },
+-    { "skip_frame",     "Rate Control Based Frame Skip",            OFFSET(skip_frame),    AV_OPT_TYPE_BOOL,{ .i64 = 0   },  0, 1, VE },
+-    { "me_half_pel",    "Enable ME Half Pixel",                     OFFSET(me_half_pel),   AV_OPT_TYPE_BOOL,{ .i64 = 1   },  0, 1, VE },
+-    { "me_quarter_pel", "Enable ME Quarter Pixel ",                 OFFSET(me_quarter_pel),AV_OPT_TYPE_BOOL,{ .i64 = 1   },  0, 1, VE },
++#define ENUM(a, b, c, d) { a, b, 0, AV_OPT_TYPE_CONST, { .i64 = c }, 0, 0, VE, d }
++
++static const enum AVPixelFormat ff_amfenc_hevc_pix_fmts[] = {
++    AV_PIX_FMT_NV12,
++    AV_PIX_FMT_YUV420P,
++    AV_PIX_FMT_P010,
++#if CONFIG_D3D11VA
++    AV_PIX_FMT_D3D11,
++#endif
++#if CONFIG_DXVA2
++    AV_PIX_FMT_DXVA2_VLD,
++#endif
++    AV_PIX_FMT_NONE
++};
+ 
+-    { "aud",            "Inserts AU Delimiter NAL unit",            OFFSET(aud)           ,AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE },
++static const AVOption options[] = {
++    { "usage",                 "Encoder Usage",                                        OFFSET(usage),                 AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING }, AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING, AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY_HIGH_QUALITY, VE, "usage" },
++        ENUM("transcoding",     "Transcoding, video editing",           AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING,              "usage"),
++        ENUM("ultralowlatency", "Video game streaming",                 AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY,        "usage"),
++        ENUM("lowlatency",      "Video collaboration, RDP",             AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY,              "usage"),
++        ENUM("webcam",          "Video conferencing",                   AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM,                   "usage"),
++        ENUM("highquality",     "High-quality encoding",                AMF_VIDEO_ENCODER_HEVC_USAGE_HIGH_QUALITY,             "usage"),
++        ENUM("llhighquality",   "High-quality encoding (low latency)",  AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY_HIGH_QUALITY, "usage"),
++
++    { "profile",               "Profile",                                              OFFSET(profile),               AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN_10, VE, "profile" },
++        ENUM("main",            "",                                     AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN,    "profile"),
++        ENUM("main10",          "",                                     AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN_10, "profile"),
++
++
++    { "profile_tier",          "Profile Tier",                                         OFFSET(tier),                  AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, AMF_VIDEO_ENCODER_HEVC_TIER_MAIN, AMF_VIDEO_ENCODER_HEVC_TIER_HIGH, VE, "tier" },
++        ENUM("main",            "",                                     AMF_VIDEO_ENCODER_HEVC_TIER_MAIN, "tier"),
++        ENUM("high",            "",                                     AMF_VIDEO_ENCODER_HEVC_TIER_HIGH, "tier"),
++
++    { "level",                 "Profile Level",                                        OFFSET(level),                 AV_OPT_TYPE_INT,  { .i64 = 0 }, 0, AMF_LEVEL_6_2, VE, "level" },
++        ENUM("auto",            "",                                     0,             "level"),
++        ENUM("1.0",             "",                                     AMF_LEVEL_1,   "level"),
++        ENUM("2.0",             "",                                     AMF_LEVEL_2,   "level"),
++        ENUM("2.1",             "",                                     AMF_LEVEL_2_1, "level"),
++        ENUM("3.0",             "",                                     AMF_LEVEL_3,   "level"),
++        ENUM("3.1",             "",                                     AMF_LEVEL_3_1, "level"),
++        ENUM("4.0",             "",                                     AMF_LEVEL_4,   "level"),
++        ENUM("4.1",             "",                                     AMF_LEVEL_4_1, "level"),
++        ENUM("5.0",             "",                                     AMF_LEVEL_5,   "level"),
++        ENUM("5.1",             "",                                     AMF_LEVEL_5_1, "level"),
++        ENUM("5.2",             "",                                     AMF_LEVEL_5_2, "level"),
++        ENUM("6.0",             "",                                     AMF_LEVEL_6,   "level"),
++        ENUM("6.1",             "",                                     AMF_LEVEL_6_1, "level"),
++        ENUM("6.2",             "",                                     AMF_LEVEL_6_2, "level"),
++
++    { "quality",               "Quality Preset",                                       OFFSET(quality),               AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED, VE, "quality" },
++        ENUM("speed",           "Prefer Speed",                         AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED,    "quality"),
++        ENUM("balanced",        "Balanced",                             AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_BALANCED, "quality"),
++        ENUM("quality",         "Prefer Quality",                       AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY,  "quality"),
++
++    { "rc",                    "Rate Control Method",                                  OFFSET(rate_control_mode),     AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR, VE, "rc" },
++        ENUM("cqp",             "Constant Quantization Parameter",      AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP,             "rc"),
++        ENUM("cbr",             "Constant Bitrate",                     AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR,                     "rc"),
++        ENUM("vbr_peak",        "Peak Contrained Variable Bitrate",     AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR,    "rc"),
++        ENUM("vbr_latency",     "Latency Constrained Variable Bitrate", AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR, "rc"),
++
++    { "header_insertion_mode", "Set header insertion mode",                            OFFSET(header_insertion_mode), AV_OPT_TYPE_INT,  { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE }, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED, VE, "hdrmode" },
++        ENUM("none",            "",                                     AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE,        "hdrmode"),
++        ENUM("gop",             "",                                     AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_GOP_ALIGNED, "hdrmode"),
++        ENUM("idr",             "",                                     AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED, "hdrmode"),
++
++    { "gops_per_idr",          "GOPs per IDR 0-no IDR will be inserted",               OFFSET(gops_per_idr),          AV_OPT_TYPE_INT,  { .i64 = 1  },  0, INT_MAX, VE },
++    { "preanalysis",           "Enable Pre-Encode/Analysis for rate rontrol (2-Pass)", OFFSET(pre_encode),            AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE },
++    { "vbaq",                  "Enable VBAQ",                                          OFFSET(enable_vbaq),           AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE },
++    { "hmqb",                  "Enable High Motion Quality Boost",                     OFFSET(enable_hmqb),           AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE },
++    { "enforce_hrd",           "Enforce HRD",                                          OFFSET(enforce_hrd),           AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE },
++    { "filler_data",           "Filler Data Enable",                                   OFFSET(filler_data),           AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE },
++    { "max_au_size",           "Maximum Access Unit Size for rate control (in bits)",  OFFSET(max_au_size),           AV_OPT_TYPE_INT,  { .i64 = 0  },  0, INT_MAX, VE},
++    { "min_qp_i",              "Min Quantization Parameter for I-frame",               OFFSET(min_qp_i),              AV_OPT_TYPE_INT,  { .i64 = -1 }, -1, 51, VE },
++    { "max_qp_i",              "Max Quantization Parameter for I-frame",               OFFSET(max_qp_i),              AV_OPT_TYPE_INT,  { .i64 = -1 }, -1, 51, VE },
++    { "min_qp_p",              "Min Quantization Parameter for P-frame",               OFFSET(min_qp_p),              AV_OPT_TYPE_INT,  { .i64 = -1 }, -1, 51, VE },
++    { "max_qp_p",              "Max Quantization Parameter for P-frame",               OFFSET(max_qp_p),              AV_OPT_TYPE_INT,  { .i64 = -1 }, -1, 51, VE },
++    { "qp_p",                  "Quantization Parameter for P-frame",                   OFFSET(qp_p),                  AV_OPT_TYPE_INT,  { .i64 = -1 }, -1, 51, VE },
++    { "qp_i",                  "Quantization Parameter for I-frame",                   OFFSET(qp_i),                  AV_OPT_TYPE_INT,  { .i64 = -1 }, -1, 51, VE },
++    { "skip_frame",            "Rate Control Based Frame Skip",                        OFFSET(skip_frame),            AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE },
++    { "me_half_pel",           "Enable ME Half Pixel",                                 OFFSET(me_half_pel),           AV_OPT_TYPE_BOOL, { .i64 = 1  },  0, 1, VE },
++    { "me_quarter_pel",        "Enable ME Quarter Pixel",                              OFFSET(me_quarter_pel),        AV_OPT_TYPE_BOOL, { .i64 = 1  },  0, 1, VE },
+ 
+-    { "log_to_dbg",     "Enable AMF logging to debug output",   OFFSET(log_to_dbg), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE },
++    { "log_to_dbg",            "Enable AMF logging to debug output",                   OFFSET(log_to_dbg),            AV_OPT_TYPE_BOOL, { .i64 = 0  },  0, 1, VE },
+     { NULL }
+ };
+ 
+@@ -95,8 +113,8 @@ static av_cold int amf_encode_init_hevc(
+ {
+     int                 ret = 0;
+     AMF_RESULT          res = AMF_OK;
+-    AmfContext         *ctx = avctx->priv_data;
+-    AMFVariantStruct    var = {0};
++    AMFEncContext      *ctx = avctx->priv_data;
++    AMFVariantStruct    var = { 0 };
+     amf_int64           profile = 0;
+     amf_int64           profile_level = 0;
+     AMFBuffer          *buffer;
+@@ -105,16 +123,15 @@ static av_cold int amf_encode_init_hevc(
+     AMFSize             framesize = AMFConstructSize(avctx->width, avctx->height);
+     int                 deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
+ 
+-    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
++    if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
+         framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den);
+-    } else {
++    else
+         framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num * avctx->ticks_per_frame);
+-    }
+ 
+     if ((ret = ff_amf_encode_init(avctx)) < 0)
+         return ret;
+ 
+-    // init static parameters
++    // Static parameters
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_USAGE, ctx->usage);
+ 
+     AMF_ASSIGN_PROPERTY_SIZE(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_FRAMESIZE, framesize);
+@@ -125,28 +142,28 @@ static av_cold int amf_encode_init_hevc(
+     case FF_PROFILE_HEVC_MAIN:
+         profile = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN;
+         break;
++    case FF_PROFILE_HEVC_MAIN_10:
++        profile = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN_10;
++        break;
+     default:
+         break;
+     }
+-    if (profile == 0) {
++    if (profile == 0)
+         profile = ctx->profile;
+-    }
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE, profile);
+ 
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_TIER, ctx->tier);
+ 
+     profile_level = avctx->level;
+-    if (profile_level == FF_LEVEL_UNKNOWN) {
++    if (profile_level == FF_LEVEL_UNKNOWN)
+         profile_level = ctx->level;
+-    }
+-    if (profile_level != 0) {
++    if (profile_level != 0)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE_LEVEL, profile_level);
+-    }
++
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET, ctx->quality);
+     // Maximum Reference Frames
+-    if (avctx->refs != -1) {
++    if (avctx->refs != -1)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_NUM_REFRAMES, avctx->refs);
+-    }
+     // Aspect Ratio
+     if (avctx->sample_aspect_ratio.den && avctx->sample_aspect_ratio.num) {
+         AMFRatio ratio = AMFConstructRatio(avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
+@@ -156,30 +173,28 @@ static av_cold int amf_encode_init_hevc(
+     // Picture control properties
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NUM_GOPS_PER_IDR, ctx->gops_per_idr);
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_GOP_SIZE, avctx->gop_size);
+-    if (avctx->slices > 1) {
++    if (avctx->slices > 1)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_SLICES_PER_FRAME, avctx->slices);
+-    }
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_DE_BLOCKING_FILTER_DISABLE, deblocking_filter);
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE, ctx->header_insertion_mode);
+ 
+-    // Rate control
+-    // autodetect rate control method
++    // Rate control properties
++    // Auto detect rate control method
+     if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN) {
+         if (ctx->min_qp_i != -1 || ctx->max_qp_i != -1 ||
+             ctx->min_qp_p != -1 || ctx->max_qp_p != -1 ||
+             ctx->qp_i !=-1 || ctx->qp_p != -1) {
+             ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP;
+-            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n");
++            av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CQP\n");
+         } else if (avctx->rc_max_rate > 0) {
+             ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR;
+-            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n");
++            av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to Peak VBR\n");
+         } else {
+             ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR;
+-            av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n");
++            av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CBR\n");
+         }
+     }
+ 
+-
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD, ctx->rate_control_mode);
+     if (avctx->rc_buffer_size) {
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_VBV_BUFFER_SIZE, avctx->rc_buffer_size);
+@@ -191,20 +206,37 @@ static av_cold int amf_encode_init_hevc(
+             AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_INITIAL_VBV_BUFFER_FULLNESS, amf_buffer_fullness);
+         }
+     }
+-    // Pre-Pass, Pre-Analysis, Two-Pass
+-    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_PREANALYSIS_ENABLE, ctx->preanalysis);
+ 
++    // Pre-Encode/Two-Pass(pre-encode assisted rate control)
++    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) {
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PREENCODE_ENABLE, 0);
++        if (ctx->pre_encode) {
++            ctx->pre_encode = 0;
++            av_log(ctx, AV_LOG_WARNING, "Pre-Encode is not supported by CQP rate control method, automatically disabled\n");
++        }
++    } else {
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PREENCODE_ENABLE, ctx->pre_encode);
++    }
++
++    // VBAQ
+     if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) {
+-        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, false);
+-        if (ctx->enable_vbaq)
+-            av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n");
++        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, 0);
++        if (ctx->enable_vbaq) {
++            ctx->enable_vbaq = 0;
++            av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by CQP rate control method, automatically disabled\n");
++        }
+     } else {
+         AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, !!ctx->enable_vbaq);
+     }
++
++    // High Motion Quality Boost mode
++    AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HIGH_MOTION_QUALITY_BOOST_ENABLE, !!ctx->enable_hmqb);
++
++    // Motion estimation
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_HALF_PIXEL, ctx->me_half_pel);
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_QUARTERPIXEL, ctx->me_quarter_pel);
+ 
+-    // init dynamic rate control params
++    // Dynamic rate control params
+     if (ctx->max_au_size)
+         ctx->enforce_hrd = 1;
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENFORCE_HRD, ctx->enforce_hrd);
+@@ -212,57 +244,99 @@ static av_cold int amf_encode_init_hevc(
+ 
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_TARGET_BITRATE, avctx->bit_rate);
+ 
+-    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR) {
++    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PEAK_BITRATE, avctx->bit_rate);
+-    }
+     if (avctx->rc_max_rate) {
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PEAK_BITRATE, avctx->rc_max_rate);
+     } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) {
+-        av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n");
++        av_log(ctx, AV_LOG_WARNING, "Rate control method is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n");
++    }
++
++    // Color Range (Studio/Partial/TV/MPEG or Full/PC/JPEG)
++    if (avctx->color_range == AVCOL_RANGE_JPEG) {
++        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE_FULL);
++    } else {
++        AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE_STUDIO);
+     }
+ 
+-    // init encoder
++    // Output color profile, transfer and primaries
++    if (ctx->out_color_profile > AMF_VIDEO_CONVERTER_COLOR_PROFILE_UNKNOWN)
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_COLOR_PROFILE, ctx->out_color_profile);
++    if (ctx->out_color_trc > AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED)
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_TRANSFER_CHARACTERISTIC, ctx->out_color_trc);
++    if (ctx->out_color_prm > AMF_COLOR_PRIMARIES_UNDEFINED)
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_COLOR_PRIMARIES, ctx->out_color_prm);
++
++    // Set 10-bit encoding if possible
++    if (ctx->bit_depth == 10)
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_10);
++
++    // Init encoder
+     res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height);
+     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res);
+ 
+-    // init dynamic picture control params
++    // Dynamic picture control params
+     AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_AU_SIZE, ctx->max_au_size);
+ 
+-    if (ctx->min_qp_i != -1) {
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, ctx->min_qp_i);
+-    } else if (avctx->qmin != -1) {
+-        int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, qval);
+-    }
+-    if (ctx->max_qp_i != -1) {
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, ctx->max_qp_i);
+-    } else if (avctx->qmax != -1) {
+-        int qval = avctx->qmax > 51 ? 51 : avctx->qmax;
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, qval);
+-    }
+-    if (ctx->min_qp_p != -1) {
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, ctx->min_qp_p);
+-    } else if (avctx->qmin != -1) {
+-        int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, qval);
+-    }
+-    if (ctx->max_qp_p != -1) {
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, ctx->max_qp_p);
+-    } else if (avctx->qmax != -1) {
+-        int qval = avctx->qmax > 51 ? 51 : avctx->qmax;
+-        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, qval);
++    // QP Minimum / Maximum
++    if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) {
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 0);
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 51);
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 0);
++        AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 51);
++    } else {
++        if (ctx->min_qp_i != -1) {
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, ctx->min_qp_i);
++        } else if (avctx->qmin != -1) {
++            int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, qval);
++        }
++        if (ctx->max_qp_i != -1) {
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, ctx->max_qp_i);
++        } else if (avctx->qmax != -1) {
++            int qval = avctx->qmax > 51 ? 51 : avctx->qmax;
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, qval);
++        }
++        if (ctx->min_qp_p != -1) {
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, ctx->min_qp_p);
++        } else if (avctx->qmin != -1) {
++            int qval = avctx->qmin > 51 ? 51 : avctx->qmin;
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, qval);
++        }
++        if (ctx->max_qp_p != -1) {
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, ctx->max_qp_p);
++        } else if (avctx->qmax != -1) {
++            int qval = avctx->qmax > 51 ? 51 : avctx->qmax;
++            AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, qval);
++        }
++        if (ctx->min_qp_i == -1 && ctx->max_qp_i == -1 && ctx->min_qp_p == -1 && ctx->max_qp_p == -1 &&
++            avctx->qmin == -1 && avctx->qmax == -1) {
++            switch (ctx->usage) {
++            case AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING:
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 18);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 46);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 18);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 46);
++                break;
++            case AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY:
++            case AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY:
++            case AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM:
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 22);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 48);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 22);
++                AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 48);
++                break;
++            }
++        }
+     }
+ 
+-    if (ctx->qp_p != -1) {
++    if (ctx->qp_p != -1)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QP_P, ctx->qp_p);
+-    }
+-    if (ctx->qp_i != -1) {
++    if (ctx->qp_i != -1)
+         AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QP_I, ctx->qp_i);
+-    }
+     AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_SKIP_FRAME_ENABLE, ctx->skip_frame);
+ 
+-
+-    // fill extradata
++    // Fill extradata
+     res = AMFVariantInit(&var);
+     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "AMFVariantInit() failed with error %d\n", res);
+ 
+@@ -273,9 +347,8 @@ static av_cold int amf_encode_init_hevc(
+     guid = IID_AMFBuffer();
+ 
+     res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface
+-    if (res != AMF_OK) {
++    if (res != AMF_OK)
+         var.pInterface->pVtbl->Release(var.pInterface);
+-    }
+     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res);
+ 
+     avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer);
+@@ -292,6 +365,7 @@ static av_cold int amf_encode_init_hevc(
+ 
+     return 0;
+ }
++
+ static const AVCodecDefault defaults[] = {
+     { "refs",       "-1"  },
+     { "aspect",     "0"   },
+@@ -317,13 +391,13 @@ AVCodec ff_hevc_amf_encoder = {
+     .init           = amf_encode_init_hevc,
+     .receive_packet = ff_amf_receive_packet,
+     .close          = ff_amf_encode_close,
+-    .priv_data_size = sizeof(AmfContext),
++    .priv_data_size = sizeof(AMFEncContext),
+     .priv_class     = &hevc_amf_class,
+     .defaults       = defaults,
+     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
+                       AV_CODEC_CAP_DR1,
+     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+-    .pix_fmts       = ff_amf_pix_fmts,
++    .pix_fmts       = ff_amfenc_hevc_pix_fmts,
+     .wrapper_name   = "amf",
+     .hw_configs     = ff_amfenc_hw_configs,
+ };
diff --git a/debian/patches/series b/debian/patches/series
index 5f90ee2c53b..2f13511a402 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,4 @@
 0001-add-fixes-for-segement-muxer.patch
 0002-add-cuda-pixfmt-converter-impl.patch
 0003-add-cuda-tonemap-impl.patch
+0004-add-amf-refactor-and-hevc-10-bit-encoding.patch

From 15afff4147ad6453b14cc79cf0a7fb29d34ec35e Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:38:09 +0800
Subject: [PATCH 14/41] add opencl scaler and pixfmt converter impl

---
 ...ncl-scaler-and-pixfmt-converter-impl.patch | 985 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 986 insertions(+)
 create mode 100644 debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch

diff --git a/debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch b/debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch
new file mode 100644
index 00000000000..74486fd0108
--- /dev/null
+++ b/debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch
@@ -0,0 +1,985 @@
+Index: jellyfin-ffmpeg/configure
+===================================================================
+--- jellyfin-ffmpeg.orig/configure
++++ jellyfin-ffmpeg/configure
+@@ -3619,6 +3619,7 @@ rubberband_filter_deps="librubberband"
+ sab_filter_deps="gpl swscale"
+ scale2ref_filter_deps="swscale"
+ scale_filter_deps="swscale"
++scale_opencl_filter_deps="opencl"
+ scale_qsv_filter_deps="libmfx"
+ scdet_filter_select="scene_sad"
+ select_filter_select="scene_sad"
+Index: jellyfin-ffmpeg/libavfilter/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/Makefile
++++ jellyfin-ffmpeg/libavfilter/Makefile
+@@ -394,6 +394,7 @@ OBJS-$(CONFIG_SAB_FILTER)
+ OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o
+ OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o
+ OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
++OBJS-$(CONFIG_SCALE_OPENCL_FILTER)           += vf_scale_opencl.o opencl.o opencl/scale.o scale_eval.o
+ OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
+ OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
+ OBJS-$(CONFIG_SCALE_VULKAN_FILTER)           += vf_scale_vulkan.o vulkan.o
+Index: jellyfin-ffmpeg/libavfilter/allfilters.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c
++++ jellyfin-ffmpeg/libavfilter/allfilters.c
+@@ -376,6 +376,7 @@ extern AVFilter ff_vf_sab;
+ extern AVFilter ff_vf_scale;
+ extern AVFilter ff_vf_scale_cuda;
+ extern AVFilter ff_vf_scale_npp;
++extern AVFilter ff_vf_scale_opencl;
+ extern AVFilter ff_vf_scale_qsv;
+ extern AVFilter ff_vf_scale_vaapi;
+ extern AVFilter ff_vf_scale_vulkan;
+Index: jellyfin-ffmpeg/libavfilter/opencl/scale.cl
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/opencl/scale.cl
+@@ -0,0 +1,217 @@
++/*
++ * Copyright (c) 2018 Gabriel Machado
++ * Copyright (c) 2021 NyanMisaka
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
++                                CLK_ADDRESS_CLAMP_TO_EDGE   |
++                                CLK_FILTER_NEAREST);
++
++__constant sampler_t sampler2 = (CLK_NORMALIZED_COORDS_FALSE |
++                                 CLK_ADDRESS_NONE            |
++                                 CLK_FILTER_NEAREST);
++
++#ifdef CONV
++__kernel void conv_yuv(__write_only image2d_t dst1,
++                       __read_only  image2d_t src1,
++                       __write_only image2d_t dst2,
++                       __read_only  image2d_t src2
++#ifdef NON_SEMI_PLANAR_OUT
++                      ,__write_only image2d_t dst3
++#endif
++#ifdef NON_SEMI_PLANAR_IN
++                      ,__read_only  image2d_t src3
++#endif
++                       )
++{
++    int xi = get_global_id(0);
++    int yi = get_global_id(1);
++    // each work item process four pixels
++    int x = 2 * xi;
++    int y = 2 * yi;
++
++    if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) {
++        float y0 = read_imagef(src1, sampler, (int2)(x,     y)).x;
++        float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
++        float y2 = read_imagef(src1, sampler, (int2)(x,     y + 1)).x;
++        float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
++#ifdef NON_SEMI_PLANAR_IN
++        float u = read_imagef(src2, sampler, (int2)(xi, yi)).x;
++        float v = read_imagef(src3, sampler, (int2)(xi, yi)).x;
++#else
++        float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy;
++        float u = uv.x;
++        float v = uv.y;
++#endif
++
++        write_imagef(dst1, (int2)(x,     y), (float4)(y0, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst1, (int2)(x + 1, y), (float4)(y1, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst1, (int2)(x,     y + 1), (float4)(y2, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst1, (int2)(x + 1, y + 1), (float4)(y3, 0.0f, 0.0f, 1.0f));
++#ifdef NON_SEMI_PLANAR_OUT
++        write_imagef(dst2, (int2)(xi, yi), (float4)(u, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst3, (int2)(xi, yi), (float4)(v, 0.0f, 0.0f, 1.0f));
++#else
++        write_imagef(dst2, (int2)(xi, yi), (float4)(u, v, 0.0f, 1.0f));
++#endif
++    }
++}
++#endif
++
++#ifdef NEIGHBOR
++__kernel void neighbor(__write_only image2d_t dst1,
++                       __read_only  image2d_t src1,
++                                    int2      src_size)
++{
++    int2 dst_pos = { get_global_id(0), get_global_id(1) };
++    float2 dst_size = { get_global_size(0), get_global_size(1) };
++
++    float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size);
++    int2 src_pos = convert_int2(floor(src_coord - 0.5f));
++
++    int2 read_pos = clamp(src_pos, 0, src_size - 1);
++    float4 c = read_imagef(src1, sampler2, read_pos);
++    write_imagef(dst1, dst_pos, (float4)(c.x, 0.0f, 0.0f, 1.0f));
++}
++
++__kernel void neighbor_uv(__write_only image2d_t dst2,
++                          __read_only  image2d_t src2,
++#ifdef NON_SEMI_PLANAR_OUT
++                          __write_only image2d_t dst3,
++#endif
++#ifdef NON_SEMI_PLANAR_IN
++                          __read_only  image2d_t src3,
++#endif
++                                       int2      src_size)
++{
++    int2 dst_pos = { get_global_id(0), get_global_id(1) };
++    float2 dst_size = { get_global_size(0), get_global_size(1) };
++
++    float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size);
++    int2 src_pos = convert_int2(floor(src_coord - 0.5f));
++
++    int2 read_pos = clamp(src_pos, 0, src_size - 1);
++#ifdef NON_SEMI_PLANAR_IN
++    float u = read_imagef(src2, sampler2, read_pos).x;
++    float v = read_imagef(src3, sampler2, read_pos).x;
++#else
++    float2 uv = read_imagef(src2, sampler2, read_pos).xy;
++    float u = uv.x;
++    float v = uv.y;
++#endif
++
++#ifdef NON_SEMI_PLANAR_OUT
++    write_imagef(dst2, dst_pos, (float4)(u, 0.0f, 0.0f, 1.0f));
++    write_imagef(dst3, dst_pos, (float4)(v, 0.0f, 0.0f, 1.0f));
++#else
++    write_imagef(dst2, dst_pos, (float4)(u, v, 0.0f, 1.0f));
++#endif
++}
++#endif
++
++#ifdef SCALE
++__kernel void scale(__write_only image2d_t dst1,
++                    __read_only  image2d_t src1,
++                    __constant   float    *cx,
++                    __constant   float    *cy,
++                                 int2      src_size)
++{
++    int2 dst_pos = { get_global_id(0), get_global_id(1) };
++    float2 dst_size = { get_global_size(0), get_global_size(1) };
++
++    float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size);
++    int2 src_pos = convert_int2(floor(src_coord - 0.5f));
++
++    int i, j;
++    int filterw2 = filterw >> 1;
++    int filterh2 = filterh >> 1;
++    int2 src_size_edge = src_size - 1;
++    float4 col1 = 0.0f, s1 = 0.0f;
++
++    #pragma unroll
++    for (i = 0; i < filterh; ++i, s1 = 0.0f) {
++        #pragma unroll
++        for (j = 0; j < filterw; ++j) {
++            int2 read_pos = clamp(src_pos + (int2)(filterw2 - j, filterh2 - i), 0, src_size_edge);
++            float4 c1 = read_imagef(src1, sampler2, read_pos);
++            s1 += c1 * cx[dst_pos.x * filterw + j];
++        }
++        col1 += s1 * cy[dst_pos.y * filterh + i];
++    }
++    write_imagef(dst1, dst_pos, (float4)(col1.x, 0.0f, 0.0f, 1.0f));
++}
++
++__kernel void scale_uv(__write_only image2d_t dst2,
++                       __read_only  image2d_t src2,
++#ifdef NON_SEMI_PLANAR_OUT
++                       __write_only image2d_t dst3,
++#endif
++#ifdef NON_SEMI_PLANAR_IN
++                       __read_only  image2d_t src3,
++#endif
++                       __constant   float    *cx,
++                       __constant   float    *cy,
++                                    int2      src_size)
++{
++    int2 dst_pos = { get_global_id(0), get_global_id(1) };
++    float2 dst_size = { get_global_size(0), get_global_size(1) };
++
++    float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size);
++    int2 src_pos = convert_int2(floor(src_coord - 0.5f));
++
++    int i, j;
++    int filterw2 = filterw >> 1;
++    int filterh2 = filterh >> 1;
++    int2 src_size_edge = src_size - 1;
++    float4 col2 = 0.0f, col3 = 0.0f, s2 = 0.0f, s3 = 0.0f;
++
++    #pragma unroll
++    for (i = 0; i < filterh; ++i, s2 = s3 = 0.0f) {
++        #pragma unroll
++        for (j = 0; j < filterw; ++j) {
++            int2 read_pos = clamp(src_pos + (int2)(filterw2 - j, filterh2 - i), 0, src_size_edge);
++            float4 c2 = read_imagef(src2, sampler2, read_pos);
++            s2 += c2 * cx[dst_pos.x * filterw + j];
++#ifdef NON_SEMI_PLANAR_IN
++            float4 c3 = read_imagef(src3, sampler2, read_pos);
++            s3 += c3 * cx[dst_pos.x * filterw + j];
++#endif
++        }
++        col2 += s2 * cy[dst_pos.y * filterh + i];
++#ifdef NON_SEMI_PLANAR_IN
++        col3 += s3 * cy[dst_pos.y * filterh + i];
++#endif
++    }
++
++#ifdef NON_SEMI_PLANAR_IN
++    float u = col2.x;
++    float v = col3.x;
++#else
++    float u = col2.x;
++    float v = col2.y;
++#endif
++
++#ifdef NON_SEMI_PLANAR_OUT
++    write_imagef(dst2, dst_pos, (float4)(u, 0.0f, 0.0f, 1.0f));
++    write_imagef(dst3, dst_pos, (float4)(v, 0.0f, 0.0f, 1.0f));
++#else
++    write_imagef(dst2, dst_pos, (float4)(u, v, 0.0f, 1.0f));
++#endif
++}
++#endif
+Index: jellyfin-ffmpeg/libavfilter/opencl_source.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/opencl_source.h
++++ jellyfin-ffmpeg/libavfilter/opencl_source.h
+@@ -27,6 +27,7 @@ extern const char *ff_opencl_source_desh
+ extern const char *ff_opencl_source_neighbor;
+ extern const char *ff_opencl_source_nlmeans;
+ extern const char *ff_opencl_source_overlay;
++extern const char *ff_opencl_source_scale;
+ extern const char *ff_opencl_source_pad;
+ extern const char *ff_opencl_source_tonemap;
+ extern const char *ff_opencl_source_transpose;
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_opencl.c
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/vf_scale_opencl.c
+@@ -0,0 +1,710 @@
++/*
++ * Copyright (c) 2018 Gabriel Machado
++ * Copyright (c) 2021 NyanMisaka
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/common.h"
++#include "libavutil/imgutils.h"
++#include "libavutil/mem.h"
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++
++#include "avfilter.h"
++#include "internal.h"
++#include "opencl.h"
++#include "opencl_source.h"
++#include "scale_eval.h"
++#include "video.h"
++
++#define OPENCL_SOURCE_NB 2
++
++static const enum AVPixelFormat supported_formats[] = {
++    AV_PIX_FMT_YUV420P,
++    AV_PIX_FMT_YUV420P16,
++    AV_PIX_FMT_NV12,
++    AV_PIX_FMT_P010,
++    AV_PIX_FMT_P016,
++};
++
++enum filters {
++    F_AREA,
++    F_BICUBIC,
++    F_BILINEAR,
++    F_GAUSSIAN,
++    F_LANCZOS,
++    F_NEIGHBOR,
++    F_SINC,
++    F_SPLINE,
++    F_EXPERIMENTAL
++};
++
++static const int filter_radius[] = {
++    [F_AREA]         =  1,
++    [F_BICUBIC]      =  2,
++    [F_BILINEAR]     =  1,
++    [F_GAUSSIAN]     =  4,
++    [F_LANCZOS]      =  3,
++    [F_NEIGHBOR]     = -1,
++    [F_SINC]         =  10,
++    [F_SPLINE]       =  10,
++    [F_EXPERIMENTAL] =  4
++};
++
++typedef struct ScaleOpenCLContext {
++    OpenCLFilterContext ocf;
++
++    cl_command_queue command_queue;
++    cl_mem           cx, cy;
++    cl_kernel        kernel;
++    cl_kernel        kernel_uv;
++    const char      *kernel_name;
++    const char      *kernel_name_uv;
++
++    char *w_expr,  *h_expr;
++    int   dst_w,    dst_h;
++    int   src_w,    src_h;
++    int   passthrough;
++    int   algorithm;
++    int   force_original_aspect_ratio;
++    int   force_divisible_by;
++    enum AVPixelFormat format;
++
++    enum AVPixelFormat in_fmt, out_fmt;
++    const AVPixFmtDescriptor *in_desc, *out_desc;
++    int in_planes, out_planes;
++
++    int       filterw, filterh;
++    int       initialised;
++} ScaleOpenCLContext;
++
++static float netravali(float t, float B, float C)
++{
++    if (t > 2) {
++        return 0;
++    } else {
++        float tt  = t * t;
++        float ttt = t * tt;
++        if (t < 1) {
++            return ((12 -  9 * B - 6 * C) * ttt +
++                   (-18 + 12 * B + 6 * C) * tt  +
++                     (6 -  2 * B)) / 6;
++        } else {
++            return     ((-B -  6 * C) * ttt +
++                     (6 * B + 30 * C) * tt +
++                   (-12 * B - 48 * C) * t +
++                     (8 * B + 24 * C)) / 6;
++        }
++    }
++}
++
++static float sinc(float t)
++{
++    return (t == 0) ? 1.0 : sin(t * M_PI) / (t * M_PI);
++}
++
++static float lanczos(float t, float a)
++{
++    return (t < a) ? sinc(t) * sinc(t / a) : 0;
++}
++
++static double spline(double a, double b, double c, double d, double dist)
++{
++    if (dist <= 1.0)
++        return ((d * dist + c) * dist + b) * dist + a;
++    else
++        return spline(0.0,
++                      b + 2.0 * c + 3.0 * d,
++                      c + 3.0 * d,
++                      -b - 3.0 * c - 6.0 * d,
++                      dist - 1.0);
++}
++
++static float calc_weight(int algorithm, float ratio, float t)
++{
++    t = fabs(t);
++
++    switch (algorithm) {
++        case F_AREA: {
++            float t2 = t - 0.5;
++            if (t2 * ratio < -0.5)
++                return 1;
++            else if (t2 * ratio < 0.5)
++                return -t2 * ratio + 0.5;
++            else
++                return 0;
++        }
++
++        case F_BICUBIC: {
++            const float B = 0, C = 0.6;
++            return netravali(t, B, C);
++        }
++
++        case F_BILINEAR:
++            return t < 1 ? (1 - t) : 0;
++
++        case F_EXPERIMENTAL: {
++            double A = 1.0;
++            double c;
++
++            if (t < 1.0)
++                c = cos(t * M_PI);
++            else
++                c = -1.0;
++            if (c < 0.0)
++                c = -pow(-c, A);
++            else
++                c = pow(c, A);
++            return c * 0.5 + 0.5;
++        }
++
++        case F_GAUSSIAN: {
++            const float p = 3.0;
++            return exp2(-p * t * t);
++        }
++
++        case F_LANCZOS: {
++            return lanczos(t, filter_radius[algorithm]);
++        }
++
++        case F_NEIGHBOR:
++            return 1;
++
++        case F_SINC:
++            return sinc(t);
++
++        case F_SPLINE: {
++            const double p = -2.196152422706632;
++            return spline(1.0, 0.0, p, -p - 1.0, t);
++        }
++    }
++
++    return 0;
++}
++
++static int scale_opencl_init(AVFilterContext *avctx)
++{
++    ScaleOpenCLContext *ctx = avctx->priv;
++    AVBPrint header;
++    const char *opencl_sources[OPENCL_SOURCE_NB];
++    cl_int cle;
++    int i, j, err;
++    float scalex, scaley;
++    float *cx = NULL, *cy = NULL;
++
++    av_bprint_init(&header, 512, AV_BPRINT_SIZE_UNLIMITED);
++
++    if (ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h) {
++        if (ctx->passthrough && ctx->in_fmt == ctx->out_fmt) {
++            ctx->initialised = 1;
++            return 0;
++        } else {
++            av_bprintf(&header, "#define CONV\n");
++            ctx->kernel_name = "conv_yuv";
++        }
++    } else if (ctx->algorithm == F_NEIGHBOR) {
++        av_bprintf(&header, "#define NEIGHBOR\n");
++        ctx->kernel_name = "neighbor";
++        ctx->kernel_name_uv = "neighbor_uv";
++    } else {
++        av_bprintf(&header, "#define SCALE\n");
++        ctx->kernel_name = "scale";
++        ctx->kernel_name_uv = "scale_uv";
++
++        scalex = FFMAX((float)(ctx->src_w / ctx->dst_w), 1);
++        scaley = FFMAX((float)(ctx->src_h / ctx->dst_h), 1);
++        ctx->filterw = ceil(2 * filter_radius[ctx->algorithm] * scalex);
++        ctx->filterh = ceil(2 * filter_radius[ctx->algorithm] * scaley);
++
++        ctx->filterw = FFMIN(ctx->filterw, ctx->src_w - 2);
++        ctx->filterw = FFMAX(ctx->filterw, 1);
++        ctx->filterh = FFMIN(ctx->filterh, ctx->src_h - 2);
++        ctx->filterh = FFMAX(ctx->filterh, 1);
++
++        av_bprintf(&header, "#define filterw %d\n", ctx->filterw);
++        av_bprintf(&header, "#define filterh %d\n", ctx->filterh);
++
++        av_log(avctx, AV_LOG_DEBUG, "Filter size: %dx%d.\n", ctx->filterw, ctx->filterh);
++
++        cx = av_malloc_array(ctx->dst_w * ctx->filterw, sizeof(cl_float));
++        cy = av_malloc_array(ctx->dst_h * ctx->filterh, sizeof(cl_float));
++
++        if (!cx || !cy) {
++            err = AVERROR(ENOMEM);
++            goto fail;
++        }
++
++        for (i = 0; i < ctx->dst_w; ++i) {
++            float s_x = (i + 0.5) * ctx->src_w / ctx->dst_w - 0.5;
++            float t = s_x - floor(s_x);  // fract
++
++            float sum = 0;
++            for (j = 0; j < ctx->filterw; ++j) {
++                int x = ctx->filterw / 2 - j;
++                sum += cx[i * ctx->filterw + j] = calc_weight(ctx->algorithm,
++                                                              scalex,
++                                                              (x - t) / scalex);
++            }
++
++            for (j = 0; j < ctx->filterw; ++j)
++                cx[i * ctx->filterw + j] /= sum;
++        }
++
++        for (i = 0; i < ctx->dst_h; ++i) {
++            float s_y = (i + 0.5) * ctx->src_h / ctx->dst_h - 0.5;
++            float t = s_y - floor(s_y);  // fract
++
++            float sum = 0;
++            for (j = 0; j < ctx->filterh; ++j) {
++                int y = ctx->filterh / 2 - j;
++                sum += cy[i * ctx->filterh + j] = calc_weight(ctx->algorithm,
++                                                              scaley,
++                                                              (y - t) / scaley);
++            }
++
++            for (j = 0; j < ctx->filterh; ++j)
++                cy[i * ctx->filterh + j] /= sum;
++        }
++
++        ctx->cx = clCreateBuffer(ctx->ocf.hwctx->context,
++                                 CL_MEM_READ_ONLY     |
++                                 CL_MEM_COPY_HOST_PTR |
++                                 CL_MEM_HOST_NO_ACCESS,
++                                 ctx->dst_w * ctx->filterw * sizeof(cl_float),
++                                 cx,
++                                 &cle);
++
++        ctx->cy = clCreateBuffer(ctx->ocf.hwctx->context,
++                                 CL_MEM_READ_ONLY     |
++                                 CL_MEM_COPY_HOST_PTR |
++                                 CL_MEM_HOST_NO_ACCESS,
++                                 ctx->dst_h * ctx->filterh * sizeof(cl_float),
++                                 cy,
++                                 &cle);
++        av_free(cx);
++        av_free(cy);
++        if (!ctx->cx || !ctx->cy) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to create weights buffer: %d.\n", cle);
++            err = AVERROR(EIO);
++            goto fail;
++        }
++    }
++
++    if (ctx->in_planes > 2)
++        av_bprintf(&header, "#define NON_SEMI_PLANAR_IN\n");
++
++    if (ctx->out_planes > 2)
++        av_bprintf(&header, "#define NON_SEMI_PLANAR_OUT\n");
++
++    av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
++    opencl_sources[0] = header.str;
++    opencl_sources[1] = ff_opencl_source_scale;
++    err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB);
++
++    av_bprint_finalize(&header, NULL);
++    if (err < 0)
++        goto fail;
++
++    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
++                                              ctx->ocf.hwctx->device_id,
++                                              0, &cle);
++    if (!ctx->command_queue) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL command queue: %d.\n", cle);
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle);
++    if (!ctx->kernel) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    if (ctx->kernel_name_uv) {
++        ctx->kernel_uv = clCreateKernel(ctx->ocf.program, ctx->kernel_name_uv, &cle);
++        if (!ctx->kernel_uv) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to create kernel_uv: %d.\n", cle);
++            err = AVERROR(EIO);
++            goto fail;
++        }
++    }
++
++    ctx->initialised = 1;
++    return 0;
++
++fail:
++    av_bprint_finalize(&header, NULL);
++    if (ctx->command_queue)
++        clReleaseCommandQueue(ctx->command_queue);
++    if (ctx->kernel)
++        clReleaseKernel(ctx->kernel);
++    if (ctx->kernel_uv)
++        clReleaseKernel(ctx->kernel_uv);
++    if (ctx->cx)
++        clReleaseMemObject(ctx->cx);
++    if (ctx->cy)
++        clReleaseMemObject(ctx->cy);
++    if (cx)
++        av_free(cx);
++    if (cy)
++        av_free(cy);
++    return err;
++}
++
++static int format_is_supported(enum AVPixelFormat fmt)
++{
++    for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
++        if (supported_formats[i] == fmt)
++            return 1;
++    return 0;
++}
++
++static int scale_opencl_config_output(AVFilterLink *outlink)
++{
++    AVFilterContext  *avctx = outlink->src;
++    AVFilterLink    *inlink = avctx->inputs[0];
++    ScaleOpenCLContext *ctx = avctx->priv;
++    AVHWFramesContext *in_frames_ctx;
++    enum AVPixelFormat in_format;
++    enum AVPixelFormat out_format;
++    const AVPixFmtDescriptor *in_desc;
++    const AVPixFmtDescriptor *out_desc;
++    int ret;
++
++    if (!inlink->hw_frames_ctx)
++        return AVERROR(EINVAL);
++    in_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
++    in_format     = in_frames_ctx->sw_format;
++    out_format    = (ctx->format == AV_PIX_FMT_NONE) ? in_format : ctx->format;
++    in_desc       = av_pix_fmt_desc_get(in_format);
++    out_desc      = av_pix_fmt_desc_get(out_format);
++
++    if (!format_is_supported(in_format)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
++               av_get_pix_fmt_name(in_format));
++        return AVERROR(ENOSYS);
++    }
++    if (!format_is_supported(out_format)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
++               av_get_pix_fmt_name(out_format));
++        return AVERROR(ENOSYS);
++    }
++
++    ctx->in_fmt     = in_format;
++    ctx->out_fmt    = out_format;
++    ctx->in_desc    = in_desc;
++    ctx->out_desc   = out_desc;
++    ctx->in_planes  = av_pix_fmt_count_planes(ctx->in_fmt);
++    ctx->out_planes = av_pix_fmt_count_planes(ctx->out_fmt);
++    ctx->ocf.output_format = out_format;
++
++    if ((ret = ff_scale_eval_dimensions(ctx,
++                                        ctx->w_expr, ctx->h_expr,
++                                        inlink, outlink,
++                                        &ctx->dst_w, &ctx->dst_h)) < 0)
++        return ret;
++
++    ff_scale_adjust_dimensions(inlink, &ctx->dst_w, &ctx->dst_h,
++                               ctx->force_original_aspect_ratio, ctx->force_divisible_by);
++
++    if (((int64_t)(ctx->dst_h * inlink->w)) > INT_MAX  ||
++        ((int64_t)(ctx->dst_w * inlink->h)) > INT_MAX)
++        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
++
++    ctx->src_w = inlink->w;
++    ctx->src_h = inlink->h;
++    ctx->ocf.output_width  = ctx->dst_w;
++    ctx->ocf.output_height = ctx->dst_h;
++
++    if (ctx->passthrough && ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h && ctx->in_fmt == ctx->out_fmt) {
++        av_buffer_unref(&outlink->hw_frames_ctx);
++        outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
++        if (!outlink->hw_frames_ctx)
++            return AVERROR(ENOMEM);
++        return 0;
++    } else {
++        ctx->passthrough = 0;
++    }
++
++    ret = ff_opencl_filter_config_output(outlink);
++    if (ret < 0)
++        return ret;
++
++    return 0;
++}
++
++static AVFrame *scale_opencl_get_video_buffer(AVFilterLink *inlink, int w, int h)
++{
++    ScaleOpenCLContext *ctx = inlink->dst->priv;
++
++    return ctx->passthrough ? ff_null_get_video_buffer(inlink, w, h) :
++                              ff_default_get_video_buffer(inlink, w, h);
++}
++
++static int scale_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
++{
++    AVFilterContext     *avctx = inlink->dst;
++    AVFilterLink      *outlink = avctx->outputs[0];
++    ScaleOpenCLContext    *ctx = avctx->priv;
++    int x_subsample = 1 << ctx->in_desc->log2_chroma_w;
++    int y_subsample = 1 << ctx->in_desc->log2_chroma_h;
++    AVFrame *output = NULL;
++    size_t global_work[2];
++    cl_int cle;
++    cl_int2 src_size, uv_size;
++    int err, idx_arg1, idx_arg2;
++
++    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
++           av_get_pix_fmt_name(input->format),
++           input->width, input->height, input->pts);
++
++    if (!input->hw_frames_ctx)
++        return AVERROR(EINVAL);
++
++    if (!ctx->initialised) {
++        err = scale_opencl_init(avctx);
++        if (err < 0)
++            goto fail;
++    }
++
++    if (ctx->passthrough && ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h && ctx->in_fmt == ctx->out_fmt)
++        return ff_filter_frame(outlink, input);
++
++    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
++    if (!output) {
++        err = AVERROR(ENOMEM);
++        goto fail;
++    }
++
++    err = av_frame_copy_props(output, input);
++    if (err < 0)
++        goto fail;
++    output->width  = outlink->w;
++    output->height = outlink->h;
++
++    if (!output->data[0] || !input->data[0] || !output->data[1] || !input->data[1]) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    if (ctx->out_planes > 2 && !output->data[2]) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    if (ctx->in_planes > 2 && !input->data[2]) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &output->data[0]);
++    CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &input->data[0]);
++
++    if (ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h) {
++        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_mem, &output->data[1]);
++        CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem, &input->data[1]);
++
++        idx_arg1 = 4;
++        if (ctx->out_planes > 2)
++            CL_SET_KERNEL_ARG(ctx->kernel, idx_arg1++, cl_mem, &output->data[2]);
++        if (ctx->in_planes > 2)
++            CL_SET_KERNEL_ARG(ctx->kernel, idx_arg1++, cl_mem, &input->data[2]);
++
++        // conv_yuv
++        global_work[0] = output->width / x_subsample;
++        global_work[1] = output->height / y_subsample;
++
++        av_log(avctx, AV_LOG_DEBUG, "Run kernel %s "
++               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
++               ctx->kernel_name, global_work[0], global_work[1]);
++
++        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
++                                     global_work, NULL, 0, NULL, NULL);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
++    } else {
++        CL_SET_KERNEL_ARG(ctx->kernel_uv, 0, cl_mem, &output->data[1]);
++        CL_SET_KERNEL_ARG(ctx->kernel_uv, 1, cl_mem, &input->data[1]);
++
++        idx_arg1 = 2;
++        if (ctx->out_planes > 2)
++            CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &output->data[2]);
++        if (ctx->in_planes > 2)
++            CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &input->data[2]);
++
++        idx_arg2 = 2;
++        if (ctx->algorithm != F_NEIGHBOR) {
++            CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_mem, &ctx->cx);
++            CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_mem, &ctx->cy);
++
++            CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &ctx->cx);
++            CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &ctx->cy);
++        }
++
++        src_size.s[0] = ctx->src_w;
++        src_size.s[1] = ctx->src_h;
++        uv_size.s[0] = src_size.s[0] / x_subsample;
++        uv_size.s[1] = src_size.s[1] / y_subsample;
++        CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_int2, &src_size);
++        CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_int2, &uv_size);
++
++        // scale, neighbor
++        global_work[0] = output->width;
++        global_work[1] = output->height;
++
++        av_log(avctx, AV_LOG_DEBUG, "Run kernel %s "
++               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
++               ctx->kernel_name, global_work[0], global_work[1]);
++
++        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
++                                     global_work, NULL, 0, NULL, NULL);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
++
++        // scale_uv, neighbor_uv
++        global_work[0] = output->width / x_subsample;
++        global_work[1] = output->height / y_subsample;
++
++        av_log(avctx, AV_LOG_DEBUG, "Run kernel %s "
++               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
++               ctx->kernel_name_uv, global_work[0], global_work[1]);
++
++        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_uv, 2, NULL,
++                                     global_work, NULL, 0, NULL, NULL);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
++    }
++
++    cle = clFinish(ctx->command_queue);
++    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
++
++    av_frame_free(&input);
++
++    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
++           av_get_pix_fmt_name(output->format),
++           output->width, output->height, output->pts);
++
++    return ff_filter_frame(outlink, output);
++
++fail:
++    clFinish(ctx->command_queue);
++    av_frame_free(&input);
++    av_frame_free(&output);
++    return err;
++}
++
++static av_cold void scale_opencl_uninit(AVFilterContext *avctx)
++{
++    ScaleOpenCLContext *ctx = avctx->priv;
++    cl_int cle;
++
++    if (ctx->kernel) {
++        cle = clReleaseKernel(ctx->kernel);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++                   "kernel: %d.\n", cle);
++    }
++
++    if (ctx->kernel_uv) {
++        cle = clReleaseKernel(ctx->kernel_uv);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++                   "kernel_uv: %d.\n", cle);
++    }
++
++    if (ctx->command_queue) {
++        cle = clReleaseCommandQueue(ctx->command_queue);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++                   "command queue: %d.\n", cle);
++    }
++
++    if (ctx->cx) {
++        cle = clReleaseMemObject(ctx->cx);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++            "weights buffer: %d.\n", cle);
++    }
++
++    if (ctx->cy) {
++        cle = clReleaseMemObject(ctx->cy);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++            "weights buffer: %d.\n", cle);
++    }
++
++    ff_opencl_filter_uninit(avctx);
++}
++
++#define OFFSET(x) offsetof(ScaleOpenCLContext, x)
++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
++static const AVOption scale_opencl_options[] = {
++    { "w",           "Output video width",                               OFFSET(w_expr),      AV_OPT_TYPE_STRING,    { .str = "iw"            }, .flags = FLAGS },
++    { "h",           "Output video height",                              OFFSET(h_expr),      AV_OPT_TYPE_STRING,    { .str = "ih"            }, .flags = FLAGS },
++    { "format",      "Output pixel format",                              OFFSET(format),      AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" },
++    { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL,      { .i64 = 0               }, 0, 1, FLAGS },
++    { "algo",        "Scaling algorithm",                                OFFSET(algorithm),   AV_OPT_TYPE_INT,       { .i64 = F_BILINEAR      }, INT_MIN, INT_MAX, FLAGS, "algo" },
++        { "area",         "Area averaging",   0, AV_OPT_TYPE_CONST, { .i64 = F_AREA         }, 0, 0, FLAGS, "algo" },
++        { "bicubic",      "Bicubic",          0, AV_OPT_TYPE_CONST, { .i64 = F_BICUBIC      }, 0, 0, FLAGS, "algo" },
++        { "bilinear",     "Bilinear",         0, AV_OPT_TYPE_CONST, { .i64 = F_BILINEAR     }, 0, 0, FLAGS, "algo" },
++        { "gauss",        "Gaussian",         0, AV_OPT_TYPE_CONST, { .i64 = F_GAUSSIAN     }, 0, 0, FLAGS, "algo" },
++        { "lanczos",      "Lanczos",          0, AV_OPT_TYPE_CONST, { .i64 = F_LANCZOS      }, 0, 0, FLAGS, "algo" },
++        { "neighbor",     "Nearest Neighbor", 0, AV_OPT_TYPE_CONST, { .i64 = F_NEIGHBOR     }, 0, 0, FLAGS, "algo" },
++        { "sinc",         "Sinc",             0, AV_OPT_TYPE_CONST, { .i64 = F_SINC         }, 0, 0, FLAGS, "algo" },
++        { "spline",       "Bicubic Spline",   0, AV_OPT_TYPE_CONST, { .i64 = F_SPLINE       }, 0, 0, FLAGS, "algo" },
++        { "experimental", "Experimental",     0, AV_OPT_TYPE_CONST, { .i64 = F_EXPERIMENTAL }, 0, 0, FLAGS, "algo" },
++    { "force_original_aspect_ratio", "Decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
++        { "disable",       NULL,              0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
++        { "decrease",      NULL,              0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
++        { "increase",      NULL,              0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
++    { "force_divisible_by", "Enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
++    { NULL }
++};
++
++AVFILTER_DEFINE_CLASS(scale_opencl);
++
++static const AVFilterPad scale_opencl_inputs[] = {
++    {
++        .name             = "default",
++        .type             = AVMEDIA_TYPE_VIDEO,
++        .filter_frame     = &scale_opencl_filter_frame,
++        .get_video_buffer = &scale_opencl_get_video_buffer,
++        .config_props     = &ff_opencl_filter_config_input,
++    },
++    { NULL }
++};
++
++static const AVFilterPad scale_opencl_outputs[] = {
++    {
++        .name         = "default",
++        .type         = AVMEDIA_TYPE_VIDEO,
++        .config_props = &scale_opencl_config_output,
++    },
++    { NULL }
++};
++
++AVFilter ff_vf_scale_opencl = {
++    .name           = "scale_opencl",
++    .description    = NULL_IF_CONFIG_SMALL("Scale the input video size through OpenCL."),
++    .priv_size      = sizeof(ScaleOpenCLContext),
++    .priv_class     = &scale_opencl_class,
++    .init           = &ff_opencl_filter_init,
++    .uninit         = &scale_opencl_uninit,
++    .query_formats  = &ff_opencl_filter_query_formats,
++    .inputs         = scale_opencl_inputs,
++    .outputs        = scale_opencl_outputs,
++    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
++};
diff --git a/debian/patches/series b/debian/patches/series
index 2f13511a402..a59a133267e 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -2,3 +2,4 @@
 0002-add-cuda-pixfmt-converter-impl.patch
 0003-add-cuda-tonemap-impl.patch
 0004-add-amf-refactor-and-hevc-10-bit-encoding.patch
+0005-add-opencl-scaler-and-pixfmt-converter-impl.patch

From 303e0a390ef2d6cc43cb3e09851ab57c358af834 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:38:25 +0800
Subject: [PATCH 15/41] add bt2390 eetf and code refactor to opencl tonemap

---
 ...-and-code-refactor-to-opencl-tonemap.patch | 1302 +++++++++++++++++
 debian/patches/series                         |    1 +
 2 files changed, 1303 insertions(+)
 create mode 100644 debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch

diff --git a/debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
new file mode 100644
index 00000000000..19456a2b17c
--- /dev/null
+++ b/debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
@@ -0,0 +1,1302 @@
+Index: jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/opencl/colorspace_common.cl
++++ jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl
+@@ -17,7 +17,24 @@
+  */
+ 
+ #define ST2084_MAX_LUMINANCE 10000.0f
+-#define REFERENCE_WHITE 100.0f
++
++#if (defined(TONE_FUNC) && TONE_FUNC == bt2390)
++    #define REF_WHITE 203.0f
++#else
++    #define REF_WHITE 100.0f
++#endif
++
++#define ST2084_M1 0.1593017578125f
++#define ST2084_M2 78.84375f
++#define ST2084_C1 0.8359375f
++#define ST2084_C2 18.8515625f
++#define ST2084_C3 18.6875f
++
++#define ARIB_B67_A 0.17883277f
++#define ARIB_B67_B 0.28466892f
++#define ARIB_B67_C 0.55991073f
++
++#define FLOAT_EPS 1.175494351e-38f
+ 
+ #if chroma_loc == 1
+     #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f)
+@@ -33,12 +50,6 @@
+     #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f)
+ #endif
+ 
+-constant const float ST2084_M1 = 0.1593017578125f;
+-constant const float ST2084_M2 = 78.84375f;
+-constant const float ST2084_C1 = 0.8359375f;
+-constant const float ST2084_C2 = 18.8515625f;
+-constant const float ST2084_C3 = 18.6875f;
+-
+ float get_luma_dst(float3 c) {
+     return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z;
+ }
+@@ -51,61 +62,99 @@ float3 get_chroma_sample(float3 a, float
+     return chroma_sample(a, b, c, d);
+ }
+ 
++// linearizer for PQ/ST2084
+ float eotf_st2084(float x) {
+-    float p = powr(x, 1.0f / ST2084_M2);
+-    float a = max(p -ST2084_C1, 0.0f);
+-    float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f);
+-    float c  = powr(a / b, 1.0f / ST2084_M1);
+-    return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f;
+-}
+-
+-__constant const float HLG_A = 0.17883277f;
+-__constant const float HLG_B = 0.28466892f;
+-__constant const float HLG_C = 0.55991073f;
+-
+-// linearizer for HLG
+-float inverse_oetf_hlg(float x) {
+-    float a = 4.0f * x * x;
+-    float b = exp((x - HLG_C) / HLG_A) + HLG_B;
+-    return x < 0.5f ? a : b;
+-}
+-
+-// delinearizer for HLG
+-float oetf_hlg(float x) {
+-    float a = 0.5f * sqrt(x);
+-    float b = HLG_A * log(x - HLG_B) + HLG_C;
+-    return x <= 1.0f ? a : b;
+-}
+-
+-float3 ootf_hlg(float3 c, float peak) {
+-    float luma = get_luma_src(c);
+-    float gamma =  1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
+-    gamma = max(1.0f, gamma);
+-    float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma);
+-    return c * factor;
+-}
+-
+-float3 inverse_ootf_hlg(float3 c, float peak) {
+-    float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
+-    c *=  powr(12.0f, gamma) / peak;
+-    c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma);
+-    return c;
++    x = max(x, 0.0f);
++    float xpow = native_powr(x, 1.0f / ST2084_M2);
++    float num = max(xpow - ST2084_C1, 0.0f);
++    float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS);
++    x = native_powr(num / den, 1.0f / ST2084_M1);
++    return x * ST2084_MAX_LUMINANCE / REF_WHITE;
++}
++
++// delinearizer for PQ/ST2084
++float inverse_eotf_st2084(float x) {
++    x = max(x, 0.0f);
++    x *= REF_WHITE / ST2084_MAX_LUMINANCE;
++    float xpow = native_powr(x, ST2084_M1);
++#if 0
++    // Original formulation from SMPTE ST 2084:2014 publication.
++    float num = ST2084_C1 + ST2084_C2 * xpow;
++    float den = 1.0f + ST2084_C3 * xpow;
++    return native_powr(num / den, ST2084_M2);
++#else
++    // More stable arrangement that avoids some cancellation error.
++    float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow;
++    float den = 1.0f + ST2084_C3 * xpow;
++    return native_powr(1.0f + num / den, ST2084_M2);
++#endif
+ }
+ 
+-float inverse_eotf_bt1886(float c) {
+-    return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f);
++float ootf_1_2(float x) {
++    return x > 0.0f ? native_powr(x, 1.2f) : x;
+ }
+ 
+-float oetf_bt709(float c) {
+-    c = c < 0.0f ? 0.0f : c;
+-    float r1 = 4.5f * c;
+-    float r2 = 1.099f * powr(c, 0.45f) - 0.099f;
+-    return c < 0.018f ? r1 : r2;
+-}
+-float inverse_oetf_bt709(float c) {
+-    float r1 = c / 4.5f;
+-    float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f);
+-    return c < 0.081f ? r1 : r2;
++float inverse_ootf_1_2(float x) {
++    return x > 0.0f ? native_powr(x, 1.0f / 1.2f) : x;
++}
++
++float oetf_arib_b67(float x) {
++    x = max(x, 0.0f);
++    return x <= (1.0f / 12.0f)
++           ? native_sqrt(3.0f * x)
++           : (ARIB_B67_A * native_log(12.0f * x - ARIB_B67_B) + ARIB_B67_C);
++}
++
++float inverse_oetf_arib_b67(float x) {
++    x = max(x, 0.0f);
++    return x <= 0.5f
++           ? (x * x) * (1.0f / 3.0f)
++           : (native_exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f);
++}
++
++// linearizer for HLG/ARIB-B67
++float eotf_arib_b67(float x) {
++    return ootf_1_2(inverse_oetf_arib_b67(x));
++}
++
++// delinearizer for HLG/ARIB-B67
++float inverse_eotf_arib_b67(float x) {
++    return oetf_arib_b67(inverse_ootf_1_2(x));
++}
++
++// delinearizer for BT709, BT2020-10
++float inverse_eotf_bt1886(float x) {
++    return x > 0.0f ? native_powr(x, 1.0f / 2.4f) : 0.0f;
++}
++
++#ifdef TRC_LUT
++float linearize_lut(float x) {
++    return lin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)];
++}
++
++float delinearize_lut(float x) {
++    return delin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)];
++}
++#endif
++
++float linearize_pq(float x) {
++#ifdef TRC_LUT_PQ
++    return pqlin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)];
++#elif defined(TRC_LUT)
++    return linearize_lut(x);
++#else
++    return eotf_st2084(x);
++#endif
++}
++
++float delinearize_pq(float x) {
++#ifdef TRC_LUT_PQ
++    return pqdelin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)];
++#elif defined(TRC_LUT)
++    return delinearize_lut(x);
++#else
++    return inverse_eotf_st2084(x);
++#endif
+ }
+ 
+ float3 yuv2rgb(float y, float u, float v) {
+@@ -187,19 +236,3 @@ float3 lrgb2lrgb(float3 c) {
+     return (float3)(rr, gg, bb);
+ #endif
+ }
+-
+-float3 ootf(float3 c, float peak) {
+-#ifdef ootf_impl
+-    return ootf_impl(c, peak);
+-#else
+-    return c;
+-#endif
+-}
+-
+-float3 inverse_ootf(float3 c, float peak) {
+-#ifdef inverse_ootf_impl
+-    return inverse_ootf_impl(c, peak);
+-#else
+-    return c;
+-#endif
+-}
+Index: jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/opencl/tonemap.cl
++++ jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl
+@@ -16,54 +16,51 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+-#define REFERENCE_WHITE 100.0f
++#define FLOAT_EPS 1.175494351e-38f
++
+ extern float3 lrgb2yuv(float3);
+ extern float  lrgb2y(float3);
+ extern float3 yuv2lrgb(float3);
+ extern float3 lrgb2lrgb(float3);
++extern float  linearize_pq(float);
++extern float  delinearize_pq(float);
++extern float  inverse_eotf_st2084(float);
+ extern float  get_luma_src(float3);
+ extern float  get_luma_dst(float3);
+-extern float3 ootf(float3 c, float peak);
+-extern float3 inverse_ootf(float3 c, float peak);
+ extern float3 get_chroma_sample(float3, float3, float3, float3);
+ 
+-struct detection_result {
+-    float peak;
+-    float average;
+-};
+-
+ float hable_f(float in) {
+     float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
+     return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
+ }
+ 
+-float direct(float s, float peak) {
++float direct(float s, float peak, float target_peak) {
+     return s;
+ }
+ 
+-float linear(float s, float peak) {
++float linear(float s, float peak, float target_peak) {
+     return s * tone_param / peak;
+ }
+ 
+-float gamma(float s, float peak) {
+-    float p = s > 0.05f ? s /peak : 0.05f / peak;
+-    float v = powr(p, 1.0f / tone_param);
+-    return s > 0.05f ? v : (s * v /0.05f);
++float gamma(float s, float peak, float target_peak) {
++    float p = s > 0.05f ? s / peak : 0.05f / peak;
++    float v = native_powr(p, 1.0f / tone_param);
++    return s > 0.05f ? v : (s * v / 0.05f);
+ }
+ 
+-float clip(float s, float peak) {
++float clip(float s, float peak, float target_peak) {
+     return clamp(s * tone_param, 0.0f, 1.0f);
+ }
+ 
+-float reinhard(float s, float peak) {
++float reinhard(float s, float peak, float target_peak) {
+     return s / (s + tone_param) * (peak + tone_param) / peak;
+ }
+ 
+-float hable(float s, float peak) {
+-    return hable_f(s)/hable_f(peak);
++float hable(float s, float peak, float target_peak) {
++    return hable_f(s) / hable_f(peak);
+ }
+ 
+-float mobius(float s, float peak) {
++float mobius(float s, float peak, float target_peak) {
+     float j = tone_param;
+     float a, b;
+ 
+@@ -71,102 +68,32 @@ float mobius(float s, float peak) {
+         return s;
+ 
+     a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
+-    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f);
++    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS);
+ 
+     return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
+ }
+ 
+-// detect peak/average signal of a frame, the algorithm was ported from:
+-// libplacebo (https://github.com/haasn/libplacebo)
+-struct detection_result
+-detect_peak_avg(global uint *util_buf, __local uint *sum_wg,
+-            float signal, float peak) {
+-// layout of the util buffer
+-//
+-// Name:             : Size (units of 4-bytes)
+-// average buffer    : detection_frames + 1
+-// peak buffer       : detection_frames + 1
+-// workgroup counter : 1
+-// total of peak     : 1
+-// total of average  : 1
+-// frame index       : 1
+-// frame number      : 1
+-    global uint *avg_buf = util_buf;
+-    global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1;
+-    global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1;
+-    global uint *max_total_p = counter_wg_p + 1;
+-    global uint *avg_total_p = max_total_p + 1;
+-    global uint *frame_idx_p = avg_total_p + 1;
+-    global uint *scene_frame_num_p = frame_idx_p + 1;
+-
+-    uint frame_idx = *frame_idx_p;
+-    uint scene_frame_num = *scene_frame_num_p;
+-
+-    size_t lidx = get_local_id(0);
+-    size_t lidy = get_local_id(1);
+-    size_t lsizex = get_local_size(0);
+-    size_t lsizey = get_local_size(1);
+-    uint num_wg = get_num_groups(0) * get_num_groups(1);
+-    size_t group_idx = get_group_id(0);
+-    size_t group_idy = get_group_id(1);
+-    struct detection_result r = {peak, sdr_avg};
+-    if (lidx == 0 && lidy == 0)
+-        *sum_wg = 0;
+-    barrier(CLK_LOCAL_MEM_FENCE);
+-
+-    // update workgroup sum
+-    atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE));
+-    barrier(CLK_LOCAL_MEM_FENCE);
+-
+-    // update frame peak/avg using work-group-average.
+-    if (lidx == 0 && lidy == 0) {
+-        uint avg_wg = *sum_wg / (lsizex * lsizey);
+-        atomic_max(&peak_buf[frame_idx], avg_wg);
+-        atomic_add(&avg_buf[frame_idx], avg_wg);
+-    }
+-
+-    if (scene_frame_num > 0) {
+-        float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num);
+-        float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num);
+-        r.peak = max(1.0f, peak);
+-        r.average = max(0.25f, avg);
+-    }
++float bt2390(float s, float peak, float target_peak) {
++    float peak_pq = inverse_eotf_st2084(peak);
++    float scale = peak_pq > 0.0f ? (1.0f / peak_pq) : 1.0f;
++
++    float s_pq = inverse_eotf_st2084(s) * scale;
++    float max_lum = inverse_eotf_st2084(target_peak) * scale;
++
++    float ks = 1.5f * max_lum - 0.5f;
++    float tb = (s_pq - ks) / (1.0f - ks);
++    float tb2 = tb * tb;
++    float tb3 = tb2 * tb;
++    float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks +
++               (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) +
++               (-2.0f * tb3 + 3.0f * tb2) * max_lum;
++    float sig = mix(pb, s_pq, s_pq < ks);
+ 
+-    if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) {
+-        *counter_wg_p = 0;
+-        avg_buf[frame_idx] /= num_wg;
+-
+-        if (scene_threshold > 0.0f) {
+-            uint cur_max = peak_buf[frame_idx];
+-            uint cur_avg = avg_buf[frame_idx];
+-            int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p;
+-
+-            if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) {
+-                for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
+-                  avg_buf[i] = 0;
+-                for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
+-                  peak_buf[i] = 0;
+-                *avg_total_p = *max_total_p = 0;
+-                *scene_frame_num_p = 0;
+-                avg_buf[frame_idx] = cur_avg;
+-                peak_buf[frame_idx] = cur_max;
+-            }
+-        }
+-        uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1);
+-        // add current frame, subtract next frame
+-        *max_total_p += peak_buf[frame_idx] - peak_buf[next];
+-        *avg_total_p += avg_buf[frame_idx] - avg_buf[next];
+-        // reset next frame
+-        peak_buf[next] = avg_buf[next] = 0;
+-        *frame_idx_p = next;
+-        *scene_frame_num_p = min(*scene_frame_num_p + 1,
+-                                 (uint)DETECTION_FRAMES);
+-    }
+-    return r;
++    return linearize_pq(sig * peak_pq);
+ }
+ 
+-float3 map_one_pixel_rgb(float3 rgb, float peak, float average) {
+-    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f);
++float3 map_one_pixel_rgb(float3 rgb, float peak) {
++    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS);
+ 
+     // Rescale the variables in order to bring it into a representation where
+     // 1.0 represents the dst_peak. This is because all of the tone mapping
+@@ -178,95 +105,91 @@ float3 map_one_pixel_rgb(float3 rgb, flo
+ 
+     float sig_old = sig;
+ 
+-    // Scale the signal to compensate for differences in the average brightness
+-    float slope = min(1.0f, sdr_avg / average);
+-    sig *= slope;
+-    peak *= slope;
+-
+     // Desaturate the color using a coefficient dependent on the signal level
+     if (desat_param > 0.0f) {
+         float luma = get_luma_dst(rgb);
+-        float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f);
++        float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS);
+         coeff = native_powr(coeff, 10.0f / desat_param);
+         rgb = mix(rgb, (float3)luma, (float3)coeff);
+-        sig = mix(sig, luma * slope, coeff);
+     }
+ 
+-    sig = TONE_FUNC(sig, peak);
+-
++    sig = TONE_FUNC(sig, peak, target_peak);
+     sig = min(sig, 1.0f);
+-    rgb *= (sig/sig_old);
++    rgb *= (sig / sig_old);
++
+     return rgb;
+ }
+-// map from source space YUV to destination space RGB
+-float3 map_to_dst_space_from_yuv(float3 yuv, float peak) {
++
++// Map from source space YUV to destination space RGB
++float3 map_to_dst_space_from_yuv(float3 yuv) {
+     float3 c = yuv2lrgb(yuv);
+-    c = ootf(c, peak);
+     c = lrgb2lrgb(c);
+     return c;
+ }
+ 
++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
++                                CLK_ADDRESS_CLAMP_TO_EDGE   |
++                                CLK_FILTER_NEAREST);
++
+ __kernel void tonemap(__write_only image2d_t dst1,
+                       __read_only  image2d_t src1,
+                       __write_only image2d_t dst2,
+                       __read_only  image2d_t src2,
+-                      global uint *util_buf,
++#ifdef NON_SEMI_PLANAR_OUT
++                      __write_only image2d_t dst3,
++#endif
++#ifdef NON_SEMI_PLANAR_IN
++                      __read_only  image2d_t src3,
++#endif
+                       float peak
+                       )
+ {
+-    __local uint sum_wg;
+-    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+-                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+-                               CLK_FILTER_NEAREST);
+     int xi = get_global_id(0);
+     int yi = get_global_id(1);
+     // each work item process four pixels
+     int x = 2 * xi;
+     int y = 2 * yi;
+ 
+-    float y0 = read_imagef(src1, sampler, (int2)(x,     y)).x;
+-    float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
+-    float y2 = read_imagef(src1, sampler, (int2)(x,     y + 1)).x;
+-    float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
+-    float2 uv = read_imagef(src2, sampler, (int2)(xi,     yi)).xy;
+-
+-    float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak);
+-    float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak);
+-    float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak);
+-    float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak);
+-
+-    float sig0 = max(c0.x, max(c0.y, c0.z));
+-    float sig1 = max(c1.x, max(c1.y, c1.z));
+-    float sig2 = max(c2.x, max(c2.y, c2.z));
+-    float sig3 = max(c3.x, max(c3.y, c3.z));
+-    float sig = max(sig0, max(sig1, max(sig2, sig3)));
+-
+-    struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak);
+-
+-    float3 c0_old = c0, c1_old = c1, c2_old = c2;
+-    c0 = map_one_pixel_rgb(c0, r.peak, r.average);
+-    c1 = map_one_pixel_rgb(c1, r.peak, r.average);
+-    c2 = map_one_pixel_rgb(c2, r.peak, r.average);
+-    c3 = map_one_pixel_rgb(c3, r.peak, r.average);
+-
+-    c0 = inverse_ootf(c0, target_peak);
+-    c1 = inverse_ootf(c1, target_peak);
+-    c2 = inverse_ootf(c2, target_peak);
+-    c3 = inverse_ootf(c3, target_peak);
+-
+-    y0 = lrgb2y(c0);
+-    y1 = lrgb2y(c1);
+-    y2 = lrgb2y(c2);
+-    y3 = lrgb2y(c3);
+-    float3 chroma_c = get_chroma_sample(c0, c1, c2, c3);
+-    float3 chroma = lrgb2yuv(chroma_c);
+-
+     if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) {
+-        write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f));
+-        write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f));
+-        write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f));
+-        write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f));
+-        write_imagef(dst2, (int2)(xi, yi),
+-                     (float4)(chroma.y, chroma.z, 0.0f, 1.0f));
++        float y0 = read_imagef(src1, sampler, (int2)(x,     y)).x;
++        float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
++        float y2 = read_imagef(src1, sampler, (int2)(x,     y + 1)).x;
++        float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
++#ifdef NON_SEMI_PLANAR_IN
++        float u = read_imagef(src2, sampler, (int2)(xi, yi)).x;
++        float v = read_imagef(src3, sampler, (int2)(xi, yi)).x;
++        float2 uv = (float2)(u, v);
++#else
++        float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy;
++#endif
++
++        float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y));
++        float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y));
++        float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y));
++        float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y));
++
++        c0 = map_one_pixel_rgb(c0, peak);
++        c1 = map_one_pixel_rgb(c1, peak);
++        c2 = map_one_pixel_rgb(c2, peak);
++        c3 = map_one_pixel_rgb(c3, peak);
++
++        y0 = lrgb2y(c0);
++        y1 = lrgb2y(c1);
++        y2 = lrgb2y(c2);
++        y3 = lrgb2y(c3);
++
++        float3 chroma_c = get_chroma_sample(c0, c1, c2, c3);
++        float3 chroma = lrgb2yuv(chroma_c);
++
++        write_imagef(dst1, (int2)(x,     y), (float4)(y0, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst1, (int2)(x + 1, y), (float4)(y1, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst1, (int2)(x,     y + 1), (float4)(y2, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst1, (int2)(x + 1, y + 1), (float4)(y3, 0.0f, 0.0f, 1.0f));
++#ifdef NON_SEMI_PLANAR_OUT
++        write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, 0.0f, 0.0f, 1.0f));
++        write_imagef(dst3, (int2)(xi, yi), (float4)(chroma.z, 0.0f, 0.0f, 1.0f));
++#else
++        write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, chroma.z, 0.0f, 1.0f));
++#endif
+     }
+ }
+Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_opencl.c
++++ jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c
+@@ -15,6 +15,7 @@
+  * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
++
+ #include <float.h>
+ 
+ #include "libavutil/avassert.h"
+@@ -31,12 +32,36 @@
+ #include "video.h"
+ #include "colorspace.h"
+ 
+-// TODO:
+-// - separate peak-detection from tone-mapping kernel to solve
+-//    one-frame-delay issue.
+-// - more format support
++#define OPENCL_SOURCE_NB 3
++
++#define FLOAT_EPS 1.175494351e-38f
+ 
+-#define DETECTION_FRAMES 63
++#define ST2084_MAX_LUMINANCE 10000.0f
++#define REF_WHITE_BT2390 203.0f
++#define REF_WHITE_DEFAULT 100.0f
++
++#define ST2084_M1 0.1593017578125f
++#define ST2084_M2 78.84375f
++#define ST2084_C1 0.8359375f
++#define ST2084_C2 18.8515625f
++#define ST2084_C3 18.6875f
++
++#define ARIB_B67_A 0.17883277f
++#define ARIB_B67_B 0.28466892f
++#define ARIB_B67_C 0.55991073f
++
++#define MAX(a, b) ((a) > (b) ? (a) : (b))
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++#define CLAMP(a, b, c) MIN(MAX((a), (b)), (c))
++#define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
++
++static const enum AVPixelFormat supported_formats[] = {
++    AV_PIX_FMT_YUV420P,
++    AV_PIX_FMT_YUV420P16,
++    AV_PIX_FMT_NV12,
++    AV_PIX_FMT_P010,
++    AV_PIX_FMT_P016,
++};
+ 
+ enum TonemapAlgorithm {
+     TONEMAP_NONE,
+@@ -46,6 +71,7 @@ enum TonemapAlgorithm {
+     TONEMAP_REINHARD,
+     TONEMAP_HABLE,
+     TONEMAP_MOBIUS,
++    TONEMAP_BT2390,
+     TONEMAP_MAX,
+ };
+ 
+@@ -57,23 +83,30 @@ typedef struct TonemapOpenCLContext {
+     enum AVColorPrimaries primaries, primaries_in, primaries_out;
+     enum AVColorRange range, range_in, range_out;
+     enum AVChromaLocation chroma_loc;
++    enum AVPixelFormat in_fmt, out_fmt;
++    const AVPixFmtDescriptor *in_desc, *out_desc;
++    int in_planes, out_planes;
++
++    float *lin_lut, *delin_lut;
++    float *pqlin_lut, *pqdelin_lut;
+ 
+     enum TonemapAlgorithm tonemap;
+     enum AVPixelFormat    format;
++    double                ref_white;
+     double                peak;
+     double                param;
+     double                desat_param;
+     double                target_peak;
+     double                scene_threshold;
++    int                   lut_trc;
+     int                   initialised;
+     cl_kernel             kernel;
+     cl_command_queue      command_queue;
+-    cl_mem                util_mem;
+ } TonemapOpenCLContext;
+ 
+ static const char *const linearize_funcs[AVCOL_TRC_NB] = {
+-    [AVCOL_TRC_SMPTE2084] = "eotf_st2084",
+-    [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg",
++    [AVCOL_TRC_SMPTE2084]    = "eotf_st2084",
++    [AVCOL_TRC_ARIB_STD_B67] = "eotf_arib_b67",
+ };
+ 
+ static const char *const delinearize_funcs[AVCOL_TRC_NB] = {
+@@ -99,8 +132,161 @@ static const char *const tonemap_func[TO
+     [TONEMAP_REINHARD] = "reinhard",
+     [TONEMAP_HABLE]    = "hable",
+     [TONEMAP_MOBIUS]   = "mobius",
++    [TONEMAP_BT2390]   = "bt2390",
+ };
+ 
++// linearizer for PQ/ST2084
++static float eotf_st2084(float x, float ref_white)
++{
++    x = FFMAX(x, 0.0f);
++    float xpow = powf(x, 1.0f / ST2084_M2);
++    float num = FFMAX(xpow - ST2084_C1, 0.0f);
++    float den = FFMAX(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS);
++    x = powf(num / den, 1.0f / ST2084_M1);
++    return x * ST2084_MAX_LUMINANCE / ref_white;
++}
++
++// delinearizer for PQ/ST2084
++static float inverse_eotf_st2084(float x, float ref_white)
++{
++    x = FFMAX(x, 0.0f);
++    x *= ref_white / ST2084_MAX_LUMINANCE;
++    float xpow = powf(x, ST2084_M1);
++#if 0
++    // Original formulation from SMPTE ST 2084:2014 publication.
++    float num = ST2084_C1 + ST2084_C2 * xpow;
++    float den = 1.0f + ST2084_C3 * xpow;
++    return powf(num / den, ST2084_M2);
++#else
++    // More stable arrangement that avoids some cancellation error.
++    float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow;
++    float den = 1.0f + ST2084_C3 * xpow;
++    return powf(1.0f + num / den, ST2084_M2);
++#endif
++}
++
++static float ootf_1_2(float x) {
++    return x > 0.0f ? powf(x, 1.2f) : x;
++}
++
++static float inverse_ootf_1_2(float x) {
++    return x > 0.0f ? powf(x, 1.0f / 1.2f) : x;
++}
++
++static float oetf_arib_b67(float x) {
++    x = FFMAX(x, 0.0f);
++    return x <= (1.0f / 12.0f)
++           ? sqrtf(3.0f * x)
++           : (ARIB_B67_A * logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C);
++}
++
++static float inverse_oetf_arib_b67(float x) {
++    x = FFMAX(x, 0.0f);
++    return x <= 0.5f
++           ? (x * x) * (1.0f / 3.0f)
++           : (expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f);
++}
++
++// linearizer for HLG/ARIB-B67
++static float eotf_arib_b67(float x) {
++    return ootf_1_2(inverse_oetf_arib_b67(x));
++}
++
++// delinearizer for HLG/ARIB-B67
++static float inverse_eotf_arib_b67(float x) {
++    return oetf_arib_b67(inverse_ootf_1_2(x));
++}
++
++// delinearizer for BT709, BT2020-10
++static float inverse_eotf_bt1886(float x) {
++    return x > 0.0f ? powf(x, 1.0f / 2.4f) : 0.0f;
++}
++
++static float linearize(float x, float ref_white, enum AVColorTransferCharacteristic trc_in)
++{
++    if (trc_in == AVCOL_TRC_SMPTE2084)
++        return eotf_st2084(x, ref_white);
++    else if (trc_in == AVCOL_TRC_ARIB_STD_B67)
++        return eotf_arib_b67(x);
++    else
++        return x;
++}
++
++static float delinearize(float x, float ref_white, enum AVColorTransferCharacteristic trc_out)
++{
++    if (trc_out == AVCOL_TRC_BT709 || trc_out == AVCOL_TRC_BT2020_10)
++        return inverse_eotf_bt1886(x);
++    if (trc_out == AVCOL_TRC_SMPTE2084)
++        return inverse_eotf_st2084(x, ref_white);
++    else
++        return x;
++}
++
++static int compute_trc_luts(AVFilterContext *avctx)
++{
++    TonemapOpenCLContext *ctx = avctx->priv;
++    int lut_pq = ctx->tonemap == TONEMAP_BT2390 && ctx->trc_in != AVCOL_TRC_SMPTE2084;
++    int i;
++
++    if (!ctx->lin_lut && !(ctx->lin_lut = av_calloc(1024, sizeof(float))))
++        return AVERROR(ENOMEM);
++    if (!ctx->delin_lut && !(ctx->delin_lut = av_calloc(1024, sizeof(float))))
++        return AVERROR(ENOMEM);
++    if (lut_pq) {
++        if (!ctx->pqlin_lut && !(ctx->pqlin_lut = av_calloc(1024, sizeof(float))))
++            return AVERROR(ENOMEM);
++        if (!ctx->pqdelin_lut && !(ctx->pqdelin_lut = av_calloc(1024, sizeof(float))))
++            return AVERROR(ENOMEM);
++    }
++
++    for (i = 0; i < 1024; i++) {
++        float x = i / 1023.0f;
++        ctx->lin_lut[i] = FFMAX(linearize(x, ctx->ref_white, ctx->trc_in), 0.0f);
++        ctx->delin_lut[i] = FFMAX(delinearize(x, ctx->ref_white, ctx->trc_out), 0.0f);
++        if (lut_pq) {
++            ctx->pqlin_lut[i] = FFMAX(linearize(x, ctx->ref_white, AVCOL_TRC_SMPTE2084), 0.0f);
++            ctx->pqdelin_lut[i] = FFMAX(delinearize(x, ctx->ref_white, AVCOL_TRC_SMPTE2084), 0.0f);
++        }
++    }
++
++    return 0;
++}
++
++static void print_opencl_const_trc_luts(AVFilterContext *avctx, AVBPrint *buf)
++{
++    TonemapOpenCLContext *ctx = avctx->priv;
++    int i;
++
++    if (ctx->lin_lut) {
++        av_bprintf(buf, "__constant float lin_lut[1024] = {\n");
++        for (i = 0; i < 1024; i++) {
++            av_bprintf(buf, " %.5ff,", ctx->lin_lut[i]);
++        }
++        av_bprintf(buf, "};\n");
++    }
++    if (ctx->delin_lut) {
++        av_bprintf(buf, "__constant float delin_lut[1024] = {\n");
++        for (i = 0; i < 1024; i++) {
++            av_bprintf(buf, " %.5ff,", ctx->delin_lut[i]);
++        }
++        av_bprintf(buf, "};\n");
++    }
++    if (ctx->pqlin_lut) {
++        av_bprintf(buf, "__constant float pqlin_lut[1024] = {\n");
++        for (i = 0; i < 1024; i++) {
++            av_bprintf(buf, " %.5ff,", ctx->pqlin_lut[i]);
++        }
++        av_bprintf(buf, "};\n");
++    }
++    if (ctx->pqdelin_lut) {
++        av_bprintf(buf, "__constant float pqdelin_lut[1024] = {\n");
++        for (i = 0; i < 1024; i++) {
++            av_bprintf(buf, " %.5ff,", ctx->pqdelin_lut[i]);
++        }
++        av_bprintf(buf, "};\n");
++    }
++}
++
+ static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
+                                double rgb2rgb[3][3]) {
+     double rgb2xyz[3][3], xyz2rgb[3][3];
+@@ -111,23 +297,17 @@ static void get_rgb2rgb_matrix(enum AVCo
+     ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb);
+ }
+ 
+-#define OPENCL_SOURCE_NB 3
+-// Average light level for SDR signals. This is equal to a signal level of 0.5
+-// under a typical presentation gamma of about 2.0.
+-static const float sdr_avg = 0.25f;
+-
+ static int tonemap_opencl_init(AVFilterContext *avctx)
+ {
+     TonemapOpenCLContext *ctx = avctx->priv;
++    AVBPrint header;
++    const char *opencl_sources[OPENCL_SOURCE_NB];
+     int rgb2rgb_passthrough = 1;
+     double rgb2rgb[3][3], rgb2yuv[3][3], yuv2rgb[3][3];
+     const struct LumaCoefficients *luma_src, *luma_dst;
+     cl_int cle;
++    int lut_pq = ctx->tonemap == TONEMAP_BT2390 && ctx->trc_in != AVCOL_TRC_SMPTE2084;
+     int err;
+-    AVBPrint header;
+-    const char *opencl_sources[OPENCL_SOURCE_NB];
+-
+-    av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC);
+ 
+     switch(ctx->tonemap) {
+     case TONEMAP_GAMMA:
+@@ -147,22 +327,25 @@ static int tonemap_opencl_init(AVFilterC
+     if (isnan(ctx->param))
+         ctx->param = 1.0f;
+ 
++    ctx->ref_white = ctx->tonemap == TONEMAP_BT2390 ? REF_WHITE_BT2390
++                                                    : REF_WHITE_DEFAULT;
++
+     // SDR peak is 1.0f
+     ctx->target_peak = 1.0f;
+-    av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n",
++
++    av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n",
+            av_color_transfer_name(ctx->trc_in),
+            av_color_transfer_name(ctx->trc_out));
+-    av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n",
++    av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n",
+            av_color_space_name(ctx->colorspace_in),
+            av_color_space_name(ctx->colorspace_out));
+-    av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n",
++    av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n",
+            av_color_primaries_name(ctx->primaries_in),
+            av_color_primaries_name(ctx->primaries_out));
+-    av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n",
++    av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n",
+            av_color_range_name(ctx->range_in),
+            av_color_range_name(ctx->range_out));
+-    // checking valid value just because of limited implementaion
+-    // please remove when more functionalities are implemented
++
+     av_assert0(ctx->trc_out == AVCOL_TRC_BT709 ||
+                ctx->trc_out == AVCOL_TRC_BT2020_10);
+     av_assert0(ctx->trc_in == AVCOL_TRC_SMPTE2084||
+@@ -172,22 +355,30 @@ static int tonemap_opencl_init(AVFilterC
+     av_assert0(ctx->primaries_in == AVCOL_PRI_BT2020 ||
+                ctx->primaries_in == AVCOL_PRI_BT709);
+ 
+-    av_bprintf(&header, "__constant const float tone_param = %.4ff;\n",
++    av_bprint_init(&header, 2048, AV_BPRINT_SIZE_UNLIMITED);
++
++    av_bprintf(&header, "__constant float tone_param = %.4ff;\n",
+                ctx->param);
+-    av_bprintf(&header, "__constant const float desat_param = %.4ff;\n",
++    av_bprintf(&header, "__constant float desat_param = %.4ff;\n",
+                ctx->desat_param);
+-    av_bprintf(&header, "__constant const float target_peak = %.4ff;\n",
++    av_bprintf(&header, "__constant float target_peak = %.4ff;\n",
+                ctx->target_peak);
+-    av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg);
+-    av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n",
++    av_bprintf(&header, "__constant float scene_threshold = %.4ff;\n",
+                ctx->scene_threshold);
++
+     av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]);
+-    av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES);
++
++    if (ctx->in_planes > 2)
++        av_bprintf(&header, "#define NON_SEMI_PLANAR_IN\n");
++
++    if (ctx->out_planes > 2)
++        av_bprintf(&header, "#define NON_SEMI_PLANAR_OUT\n");
+ 
+     if (ctx->primaries_out != ctx->primaries_in) {
+         get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb);
+         rgb2rgb_passthrough = 0;
+     }
++
+     if (ctx->range_in == AVCOL_RANGE_JPEG)
+         av_bprintf(&header, "#define FULL_RANGE_IN\n");
+ 
+@@ -201,11 +392,10 @@ static int tonemap_opencl_init(AVFilterC
+     else
+         ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb);
+ 
+-
+     luma_src = ff_get_luma_coefficients(ctx->colorspace_in);
+     if (!luma_src) {
+         err = AVERROR(EINVAL);
+-        av_log(avctx, AV_LOG_ERROR, "unsupported input colorspace %d (%s)\n",
++        av_log(avctx, AV_LOG_ERROR, "Unsupported input colorspace %d (%s)\n",
+                ctx->colorspace_in, av_color_space_name(ctx->colorspace_in));
+         goto fail;
+     }
+@@ -213,7 +403,7 @@ static int tonemap_opencl_init(AVFilterC
+     luma_dst = ff_get_luma_coefficients(ctx->colorspace_out);
+     if (!luma_dst) {
+         err = AVERROR(EINVAL);
+-        av_log(avctx, AV_LOG_ERROR, "unsupported output colorspace %d (%s)\n",
++        av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace %d (%s)\n",
+                ctx->colorspace_out, av_color_space_name(ctx->colorspace_out));
+         goto fail;
+     }
+@@ -225,20 +415,27 @@ static int tonemap_opencl_init(AVFilterC
+     ff_matrix_invert_3x3(rgb2yuv, yuv2rgb);
+     ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", yuv2rgb);
+ 
+-    av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n",
++    av_bprintf(&header, "__constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n",
+                luma_src->cr, luma_src->cg, luma_src->cb);
+-    av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n",
++    av_bprintf(&header, "__constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n",
+                luma_dst->cr, luma_dst->cg, luma_dst->cb);
+ 
+-    av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]);
+-    av_bprintf(&header, "#define delinearize %s\n",
+-               delinearize_funcs[ctx->trc_out]);
+-
+-    if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67)
+-        av_bprintf(&header, "#define ootf_impl ootf_hlg\n");
+-
+-    if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67)
+-        av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n");
++    if (ctx->lut_trc) {
++        if (!ctx->lin_lut || !ctx->delin_lut) {
++            err = compute_trc_luts(avctx);
++            if (err < 0)
++                goto fail;
++        }
++        print_opencl_const_trc_luts(avctx, &header);
++        if (lut_pq)
++            av_bprintf(&header, "#define TRC_LUT_PQ\n");
++        av_bprintf(&header, "#define TRC_LUT\n");
++        av_bprintf(&header, "#define linearize %s\n", "linearize_lut");
++        av_bprintf(&header, "#define delinearize %s\n", "delinearize_lut");
++    } else {
++        av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]);
++        av_bprintf(&header, "#define delinearize %s\n", delinearize_funcs[ctx->trc_out]);
++    }
+ 
+     av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
+     opencl_sources[0] = header.str;
+@@ -259,43 +456,78 @@ static int tonemap_opencl_init(AVFilterC
+     ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
+     CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+ 
+-    ctx->util_mem =
+-        clCreateBuffer(ctx->ocf.hwctx->context, 0,
+-                       (2 * DETECTION_FRAMES + 7) * sizeof(unsigned),
+-                       NULL, &cle);
+-    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create util buffer: %d.\n", cle);
+-
+     ctx->initialised = 1;
+     return 0;
+ 
+ fail:
+     av_bprint_finalize(&header, NULL);
+-    if (ctx->util_mem)
+-        clReleaseMemObject(ctx->util_mem);
+     if (ctx->command_queue)
+         clReleaseCommandQueue(ctx->command_queue);
+     if (ctx->kernel)
+         clReleaseKernel(ctx->kernel);
++    if (ctx->lin_lut)
++        av_freep(&ctx->lin_lut);
++    if (ctx->delin_lut)
++        av_freep(&ctx->delin_lut);
++    if (ctx->pqlin_lut)
++        av_freep(&ctx->pqlin_lut);
++    if (ctx->pqdelin_lut)
++        av_freep(&ctx->pqdelin_lut);
+     return err;
+ }
+ 
++static int format_is_supported(enum AVPixelFormat fmt)
++{
++    for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
++        if (supported_formats[i] == fmt)
++            return 1;
++    return 0;
++}
++
+ static int tonemap_opencl_config_output(AVFilterLink *outlink)
+ {
+-    AVFilterContext *avctx = outlink->src;
+-    TonemapOpenCLContext *s = avctx->priv;
++    AVFilterContext    *avctx = outlink->src;
++    AVFilterLink      *inlink = avctx->inputs[0];
++    TonemapOpenCLContext *ctx = avctx->priv;
++    AVHWFramesContext *in_frames_ctx;
++    enum AVPixelFormat in_format;
++    enum AVPixelFormat out_format;
++    const AVPixFmtDescriptor *in_desc;
++    const AVPixFmtDescriptor *out_desc;
+     int ret;
+-    if (s->format == AV_PIX_FMT_NONE)
+-        av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n");
+-    else {
+-      if (s->format != AV_PIX_FMT_P010 &&
+-          s->format != AV_PIX_FMT_NV12) {
+-        av_log(avctx, AV_LOG_ERROR, "unsupported output format,"
+-               "only p010/nv12 supported now\n");
++
++    if (!inlink->hw_frames_ctx)
+         return AVERROR(EINVAL);
+-      }
+-    }
++    in_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
++    in_format     = in_frames_ctx->sw_format;
++    out_format    = (ctx->format == AV_PIX_FMT_NONE) ? in_format : ctx->format;
++    in_desc       = av_pix_fmt_desc_get(in_format);
++    out_desc      = av_pix_fmt_desc_get(out_format);
++
++    if (!format_is_supported(in_format)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
++               av_get_pix_fmt_name(in_format));
++        return AVERROR(ENOSYS);
++    }
++    if (!format_is_supported(out_format)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
++               av_get_pix_fmt_name(out_format));
++        return AVERROR(ENOSYS);
++    }
++    if (in_desc->comp[0].depth != 10 && in_desc->comp[0].depth != 16) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n",
++               in_desc->comp[0].depth);
++        return AVERROR(ENOSYS);
++    }
++
++    ctx->in_fmt     = in_format;
++    ctx->out_fmt    = out_format;
++    ctx->in_desc    = in_desc;
++    ctx->out_desc   = out_desc;
++    ctx->in_planes  = av_pix_fmt_count_planes(in_format);
++    ctx->out_planes = av_pix_fmt_count_planes(out_format);
++    ctx->ocf.output_format = out_format;
+ 
+-    s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format;
+     ret = ff_opencl_filter_config_output(outlink);
+     if (ret < 0)
+         return ret;
+@@ -310,13 +542,36 @@ static int launch_kernel(AVFilterContext
+     size_t global_work[2];
+     size_t local_work[2];
+     cl_int cle;
++    int idx_arg;
++
++    if (!output->data[0] || !input->data[0] || !output->data[1] || !input->data[1]) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    if (ctx->out_planes > 2 && !output->data[2]) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    if (ctx->in_planes > 2 && !input->data[2]) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
+ 
+     CL_SET_KERNEL_ARG(kernel, 0, cl_mem, &output->data[0]);
+     CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]);
+     CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]);
+     CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]);
+-    CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem);
+-    CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak);
++
++    idx_arg = 4;
++    if (ctx->out_planes > 2)
++        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &output->data[2]);
++
++    if (ctx->in_planes > 2)
++        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &input->data[2]);
++
++    CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_float, &peak);
+ 
+     local_work[0]  = 16;
+     local_work[1]  = 16;
+@@ -343,10 +598,6 @@ static int tonemap_opencl_filter_frame(A
+     AVFrame *output = NULL;
+     cl_int cle;
+     int err;
+-    double peak = ctx->peak;
+-
+-    AVHWFramesContext *input_frames_ctx =
+-        (AVHWFramesContext*)input->hw_frames_ctx->data;
+ 
+     av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+            av_get_pix_fmt_name(input->format),
+@@ -365,8 +616,10 @@ static int tonemap_opencl_filter_frame(A
+     if (err < 0)
+         goto fail;
+ 
+-    if (!peak)
+-        peak = ff_determine_signal_peak(input);
++    if (!ctx->peak) {
++        ctx->peak = ff_determine_signal_peak(input);
++        av_log(ctx, AV_LOG_DEBUG, "Computed signal peak: %f\n", ctx->peak);
++    }
+ 
+     if (ctx->trc != -1)
+         output->color_trc = ctx->trc;
+@@ -390,13 +643,8 @@ static int tonemap_opencl_filter_frame(A
+     if (!ctx->initialised) {
+         if (!(input->color_trc == AVCOL_TRC_SMPTE2084 ||
+             input->color_trc == AVCOL_TRC_ARIB_STD_B67)) {
+-            av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n");
+-            err = AVERROR(ENOSYS);
+-            goto fail;
+-        }
+-
+-        if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) {
+-            av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n");
++            av_log(ctx, AV_LOG_ERROR, "Unsupported transfer function characteristic: %s\n",
++                   av_color_transfer_name(input->color_trc));
+             err = AVERROR(ENOSYS);
+             goto fail;
+         }
+@@ -406,15 +654,9 @@ static int tonemap_opencl_filter_frame(A
+             goto fail;
+     }
+ 
+-    switch(input_frames_ctx->sw_format) {
+-    case AV_PIX_FMT_P010:
+-        err = launch_kernel(avctx, ctx->kernel, output, input, peak);
+-        if (err < 0) goto fail;
+-        break;
+-    default:
+-        err = AVERROR(ENOSYS);
++    err = launch_kernel(avctx, ctx->kernel, output, input, ctx->peak);
++    if (err < 0)
+         goto fail;
+-    }
+ 
+     cle = clFinish(ctx->command_queue);
+     CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+@@ -423,31 +665,9 @@ static int tonemap_opencl_filter_frame(A
+ 
+     ff_update_hdr_metadata(output, ctx->target_peak);
+ 
+-    av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n",
++    av_log(ctx, AV_LOG_DEBUG, "Tonemapping output: %s, %ux%u (%"PRId64").\n",
+            av_get_pix_fmt_name(output->format),
+            output->width, output->height, output->pts);
+-#ifndef NDEBUG
+-    {
+-        uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p;
+-        float peak_detected, avg_detected;
+-        unsigned map_size = (2 * DETECTION_FRAMES  + 7) * sizeof(unsigned);
+-        ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem,
+-                                         CL_TRUE, CL_MAP_READ, 0, map_size,
+-                                         0, NULL, NULL, &cle);
+-        // For the layout of the util buffer, refer tonemap.cl
+-        if (ptr) {
+-            max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1;
+-            avg_total_p = max_total_p + 1;
+-            frame_number_p = avg_total_p + 2;
+-            peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p));
+-            avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p));
+-            av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n",
+-                   peak_detected, avg_detected);
+-            clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0,
+-                                    NULL, NULL);
+-        }
+-    }
+-#endif
+ 
+     return ff_filter_frame(outlink, output);
+ 
+@@ -463,8 +683,6 @@ static av_cold void tonemap_opencl_unini
+     TonemapOpenCLContext *ctx = avctx->priv;
+     cl_int cle;
+ 
+-    if (ctx->util_mem)
+-        clReleaseMemObject(ctx->util_mem);
+     if (ctx->kernel) {
+         cle = clReleaseKernel(ctx->kernel);
+         if (cle != CL_SUCCESS)
+@@ -479,43 +697,54 @@ static av_cold void tonemap_opencl_unini
+                    "command queue: %d.\n", cle);
+     }
+ 
++    if (ctx->lin_lut)
++        av_freep(&ctx->lin_lut);
++    if (ctx->delin_lut)
++        av_freep(&ctx->delin_lut);
++    if (ctx->pqlin_lut)
++        av_freep(&ctx->pqlin_lut);
++    if (ctx->pqdelin_lut)
++        av_freep(&ctx->pqdelin_lut);
++
+     ff_opencl_filter_uninit(avctx);
+ }
+ 
+ #define OFFSET(x) offsetof(TonemapOpenCLContext, x)
+ #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+ static const AVOption tonemap_opencl_options[] = {
+-    { "tonemap",      "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" },
+-    {     "none",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE},              0, 0, FLAGS, "tonemap" },
+-    {     "linear",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR},            0, 0, FLAGS, "tonemap" },
+-    {     "gamma",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA},             0, 0, FLAGS, "tonemap" },
+-    {     "clip",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP},              0, 0, FLAGS, "tonemap" },
+-    {     "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},          0, 0, FLAGS, "tonemap" },
+-    {     "hable",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE},             0, 0, FLAGS, "tonemap" },
+-    {     "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},            0, 0, FLAGS, "tonemap" },
+-    { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
+-    { "t",        "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
+-    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709},         0, 0, FLAGS, "transfer" },
+-    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10},     0, 0, FLAGS, "transfer" },
+-    { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+-    { "m",      "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+-    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709},         0, 0, FLAGS, "matrix" },
+-    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL},    0, 0, FLAGS, "matrix" },
+-    { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+-    { "p",         "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+-    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709},         0, 0, FLAGS, "primaries" },
+-    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020},        0, 0, FLAGS, "primaries" },
+-    { "range",         "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" },
+-    { "r",             "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" },
+-    {     "tv",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range" },
+-    {     "pc",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range" },
+-    {     "limited",       0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range" },
+-    {     "full",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range" },
+-    { "format",    "output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" },
+-    { "peak",      "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
+-    { "param",     "tonemap parameter",   OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
+-    { "desat",     "desaturation parameter",   OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS },
+-    { "threshold", "scene detection threshold",   OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS },
++    { "tonemap",      "Tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, { .i64 = TONEMAP_NONE }, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" },
++        { "none",     0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_NONE },              0, 0, FLAGS, "tonemap" },
++        { "linear",   0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_LINEAR },            0, 0, FLAGS, "tonemap" },
++        { "gamma",    0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_GAMMA },             0, 0, FLAGS, "tonemap" },
++        { "clip",     0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_CLIP },              0, 0, FLAGS, "tonemap" },
++        { "reinhard", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_REINHARD },          0, 0, FLAGS, "tonemap" },
++        { "hable",    0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_HABLE },             0, 0, FLAGS, "tonemap" },
++        { "mobius",   0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MOBIUS },            0, 0, FLAGS, "tonemap" },
++        { "bt2390",   0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_BT2390 },            0, 0, FLAGS, "tonemap" },
++    { "transfer", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" },
++    { "t",        "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" },
++        { "bt709",            0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT709 },         0, 0, FLAGS, "transfer" },
++        { "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT2020_10 },     0, 0, FLAGS, "transfer" },
++    { "matrix", "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, "matrix" },
++    { "m",      "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, "matrix" },
++        { "bt709",            0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT709 },         0, 0, FLAGS, "matrix" },
++        { "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT2020_NCL },    0, 0, FLAGS, "matrix" },
++    { "primaries", "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, "primaries" },
++    { "p",         "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, "primaries" },
++        { "bt709",            0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT709 },         0, 0, FLAGS, "primaries" },
++        { "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT2020 },        0, 0, FLAGS, "primaries" },
++    { "range",         "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_MPEG }, -1, INT_MAX, FLAGS, "range" },
++    { "r",             "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_MPEG }, -1, INT_MAX, FLAGS, "range" },
++        { "tv",            0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG },         0, 0, FLAGS, "range" },
++        { "pc",            0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG },         0, 0, FLAGS, "range" },
++        { "limited",       0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG },         0, 0, FLAGS, "range" },
++        { "full",          0,       0,                 AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG },         0, 0, FLAGS, "range" },
++    { "format",    "Output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" },
++    { "peak",      "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS },
++    { "param",     "Tonemap parameter",   OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS },
++    { "desat",     "Desaturation parameter",   OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS },
++    { "threshold", "Scene detection threshold",   OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS },
++    { "luttrc",    "Enable LUT for de/linearize",   OFFSET(lut_trc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+     { NULL }
+ };
+ 
diff --git a/debian/patches/series b/debian/patches/series
index a59a133267e..d44e268ac89 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -3,3 +3,4 @@
 0003-add-cuda-tonemap-impl.patch
 0004-add-amf-refactor-and-hevc-10-bit-encoding.patch
 0005-add-opencl-scaler-and-pixfmt-converter-impl.patch
+0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch

From b13cb92bf2a0e7b7240bdb2d2de2cfba2a0c8c2b Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:38:41 +0800
Subject: [PATCH 16/41] add pgs subtitle support and code refactor to opencl
 overlay

---
 ...-support-and-code-refactor-to-opencl.patch | 634 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 635 insertions(+)
 create mode 100644 debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch

diff --git a/debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch b/debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch
new file mode 100644
index 00000000000..bb348f93d63
--- /dev/null
+++ b/debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch
@@ -0,0 +1,634 @@
+Index: jellyfin-ffmpeg/libavfilter/opencl/overlay.cl
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/opencl/overlay.cl
++++ jellyfin-ffmpeg/libavfilter/opencl/overlay.cl
+@@ -16,15 +16,24 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+-__kernel void overlay_no_alpha(__write_only image2d_t dst,
+-                               __read_only  image2d_t main,
+-                               __read_only  image2d_t overlay,
+-                               int x_position,
+-                               int y_position)
++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
++                                CLK_FILTER_NEAREST);
++
++__kernel void overlay_pass(__write_only image2d_t dst,
++                           __read_only  image2d_t main)
+ {
+-    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+-                               CLK_FILTER_NEAREST);
++    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+ 
++    float4 val = read_imagef(main, sampler, loc);
++    write_imagef(dst, loc, val);
++}
++
++__kernel void overlay_noalpha(__write_only image2d_t dst,
++                              __read_only  image2d_t main,
++                              __read_only  image2d_t overlay,
++                              int x_position,
++                              int y_position)
++{
+     int2 overlay_size = get_image_dim(overlay);
+     int2 loc = (int2)(get_global_id(0), get_global_id(1));
+ 
+@@ -41,15 +50,15 @@ __kernel void overlay_no_alpha(__write_o
+     }
+ }
+ 
+-__kernel void overlay_internal_alpha(__write_only image2d_t dst,
+-                                     __read_only  image2d_t main,
+-                                     __read_only  image2d_t overlay,
+-                                     int x_position,
+-                                     int y_position)
++__kernel void overlay_alpha(__write_only image2d_t dst,
++                            __read_only  image2d_t main,
++                            __read_only  image2d_t overlay,
++                            __read_only  image2d_t alpha,
++                            int x_position,
++                            int y_position,
++                            int alpha_adj_x,
++                            int alpha_adj_y)
+ {
+-    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+-                               CLK_FILTER_NEAREST);
+-
+     int2 overlay_size = get_image_dim(overlay);
+     int2 loc = (int2)(get_global_id(0), get_global_id(1));
+ 
+@@ -63,24 +72,50 @@ __kernel void overlay_internal_alpha(__w
+         int2 loc_overlay  = (int2)(x_position, y_position);
+         float4 in_main    = read_imagef(main,    sampler, loc);
+         float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay);
+-        float4 val        = in_overlay * in_overlay.w + in_main * (1.0f - in_overlay.w);
++
++        int2 loc_alpha    = (int2)(loc.x * alpha_adj_x, loc.y * alpha_adj_y) - loc_overlay;
++        float4 in_alpha   = read_imagef(alpha,   sampler, loc_alpha);
++
++        float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x);
+         write_imagef(dst, loc, val);
+     }
+ }
+ 
+-__kernel void overlay_external_alpha(__write_only image2d_t dst,
+-                                     __read_only  image2d_t main,
+-                                     __read_only  image2d_t overlay,
+-                                     __read_only  image2d_t alpha,
+-                                     int x_position,
+-                                     int y_position,
+-                                     int alpha_adj_x,
+-                                     int alpha_adj_y)
++__kernel void overlay_noalpha_uv(__write_only image2d_t dst,
++                                 __read_only  image2d_t main,
++                                 __read_only  image2d_t overlay_u,
++                                 __read_only  image2d_t overlay_v,
++                                 int x_position,
++                                 int y_position)
+ {
+-    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+-                               CLK_FILTER_NEAREST);
++    int2 overlay_size = get_image_dim(overlay_u);
++    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+ 
+-    int2 overlay_size = get_image_dim(overlay);
++    if (loc.x <  x_position ||
++        loc.y <  y_position ||
++        loc.x >= overlay_size.x + x_position ||
++        loc.y >= overlay_size.y + y_position) {
++        float4 val = read_imagef(main, sampler, loc);
++        write_imagef(dst, loc, val);
++    } else {
++        int2 loc_overlay = (int2)(x_position, y_position);
++        float4 val_u     = read_imagef(overlay_u, sampler, loc - loc_overlay);
++        float4 val_v     = read_imagef(overlay_v, sampler, loc - loc_overlay);
++        write_imagef(dst, loc, (float4)(val_u.x, val_v.x, 0.0f, 1.0f));
++    }
++}
++
++__kernel void overlay_alpha_uv(__write_only image2d_t dst,
++                               __read_only  image2d_t main,
++                               __read_only  image2d_t overlay_u,
++                               __read_only  image2d_t overlay_v,
++                               __read_only  image2d_t alpha,
++                               int x_position,
++                               int y_position,
++                               int alpha_adj_x,
++                               int alpha_adj_y)
++{
++    int2 overlay_size = get_image_dim(overlay_u);
+     int2 loc = (int2)(get_global_id(0), get_global_id(1));
+ 
+     if (loc.x <  x_position ||
+@@ -90,13 +125,14 @@ __kernel void overlay_external_alpha(__w
+         float4 val = read_imagef(main, sampler, loc);
+         write_imagef(dst, loc, val);
+     } else {
+-        int2 loc_overlay  = (int2)(x_position, y_position);
+-        float4 in_main    = read_imagef(main,    sampler, loc);
+-        float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay);
++        int2 loc_overlay    = (int2)(x_position, y_position);
++        float4 in_main      = read_imagef(main,    sampler, loc);
++        float4 in_overlay_u = read_imagef(overlay_u, sampler, loc - loc_overlay);
++        float4 in_overlay_v = read_imagef(overlay_v, sampler, loc - loc_overlay);
++        float4 in_overlay   = (float4)(in_overlay_u.x, in_overlay_v.x, 0.0f, 1.0f);
+ 
+-        int2 loc_alpha    = (int2)(loc.x * alpha_adj_x,
+-                                   loc.y * alpha_adj_y) - loc_overlay;
+-        float4 in_alpha   = read_imagef(alpha,   sampler, loc_alpha);
++        int2 loc_alpha      = (int2)(loc.x * alpha_adj_x, loc.y * alpha_adj_y) - loc_overlay;
++        float4 in_alpha     = read_imagef(alpha,   sampler, loc_alpha);
+ 
+         float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x);
+         write_imagef(dst, loc, val);
+Index: jellyfin-ffmpeg/libavfilter/vf_overlay_opencl.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_opencl.c
++++ jellyfin-ffmpeg/libavfilter/vf_overlay_opencl.c
+@@ -28,72 +28,113 @@
+ #include "opencl_source.h"
+ #include "video.h"
+ 
++static const enum AVPixelFormat supported_main_formats[] = {
++    AV_PIX_FMT_NV12,
++    AV_PIX_FMT_YUV420P,
++    AV_PIX_FMT_NONE,
++};
++
++static const enum AVPixelFormat supported_overlay_formats[] = {
++    AV_PIX_FMT_NV12,
++    AV_PIX_FMT_YUV420P,
++    AV_PIX_FMT_YUVA420P,
++    AV_PIX_FMT_NONE,
++};
++
+ typedef struct OverlayOpenCLContext {
+     OpenCLFilterContext ocf;
+ 
++    enum AVPixelFormat in_fmt_main, in_fmt_overlay;
++    const AVPixFmtDescriptor *in_desc_main, *in_desc_overlay;
++    int in_planes_main, in_planes_overlay;
++
+     int              initialised;
+     cl_kernel        kernel;
++    cl_kernel        kernel_pass;
++    cl_kernel        kernel_uv;
++    const char      *kernel_name;
++    const char      *kernel_name_pass;
++    const char      *kernel_name_uv;
+     cl_command_queue command_queue;
+ 
+     FFFrameSync      fs;
+ 
+-    int              nb_planes;
+     int              x_subsample;
+     int              y_subsample;
+-    int              alpha_separate;
++    int              alpha;
+ 
+     int              x_position;
+     int              y_position;
++
++    int              opt_repeatlast;
++    int              opt_shortest;
++    int              opt_eof_action;
+ } OverlayOpenCLContext;
+ 
+-static int overlay_opencl_load(AVFilterContext *avctx,
+-                               enum AVPixelFormat main_format,
+-                               enum AVPixelFormat overlay_format)
++static int format_is_supported(const enum AVPixelFormat fmts[], enum AVPixelFormat fmt)
++{
++    for (int i = 0; fmts[i] != AV_PIX_FMT_NONE; i++)
++        if (fmts[i] == fmt)
++            return 1;
++    return 0;
++}
++
++static int formats_match(const enum AVPixelFormat fmt_main, const enum AVPixelFormat fmt_overlay) {
++    switch(fmt_main) {
++    case AV_PIX_FMT_NV12:
++        return fmt_overlay == AV_PIX_FMT_NV12 ||
++               fmt_overlay == AV_PIX_FMT_YUV420P ||
++               fmt_overlay == AV_PIX_FMT_YUVA420P;
++    case AV_PIX_FMT_YUV420P:
++        return fmt_overlay == AV_PIX_FMT_YUV420P ||
++               fmt_overlay == AV_PIX_FMT_YUVA420P;
++    default:
++        return 0;
++    }
++}
++
++static int overlay_opencl_load(AVFilterContext *avctx)
+ {
+     OverlayOpenCLContext *ctx = avctx->priv;
+     cl_int cle;
+-    const char *source = ff_opencl_source_overlay;
+-    const char *kernel;
+-    const AVPixFmtDescriptor *main_desc, *overlay_desc;
+-    int err, i, main_planes, overlay_planes;
+-
+-    main_desc    = av_pix_fmt_desc_get(main_format);
+-    overlay_desc = av_pix_fmt_desc_get(overlay_format);
+-
+-    main_planes = overlay_planes = 0;
+-    for (i = 0; i < main_desc->nb_components; i++)
+-        main_planes = FFMAX(main_planes,
+-                            main_desc->comp[i].plane + 1);
+-    for (i = 0; i < overlay_desc->nb_components; i++)
+-        overlay_planes = FFMAX(overlay_planes,
+-                               overlay_desc->comp[i].plane + 1);
+-
+-    ctx->nb_planes = main_planes;
+-    ctx->x_subsample = 1 << main_desc->log2_chroma_w;
+-    ctx->y_subsample = 1 << main_desc->log2_chroma_h;
++    int err;
++
++    ctx->x_subsample = 1 << ctx->in_desc_main->log2_chroma_w;
++    ctx->y_subsample = 1 << ctx->in_desc_main->log2_chroma_h;
+ 
+     if (ctx->x_position % ctx->x_subsample ||
+         ctx->y_position % ctx->y_subsample) {
+-        av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) "
++        av_log(avctx, AV_LOG_WARNING, "Overlay position (%d, %d) "
+                "does not match subsampling (%d, %d).\n",
+                ctx->x_position, ctx->y_position,
+                ctx->x_subsample, ctx->y_subsample);
+     }
+ 
+-    if (main_planes == overlay_planes) {
+-        if (main_desc->nb_components == overlay_desc->nb_components)
+-            kernel = "overlay_no_alpha";
++    switch(ctx->in_fmt_overlay) {
++    case AV_PIX_FMT_NV12:
++    case AV_PIX_FMT_YUV420P:
++        ctx->alpha = 0;
++        ctx->kernel_name = "overlay_noalpha";
++        break;
++    case AV_PIX_FMT_YUVA420P:
++        ctx->alpha = 1;
++        ctx->kernel_name = "overlay_alpha";
++        break;
++    default:
++        err = AVERROR_BUG;
++        goto fail;
++    }
++
++    if (ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) {
++        if (ctx->alpha)
++            ctx->kernel_name_uv = "overlay_alpha_uv";
+         else
+-            kernel = "overlay_internal_alpha";
+-        ctx->alpha_separate = 0;
+-    } else {
+-        kernel = "overlay_external_alpha";
+-        ctx->alpha_separate = 1;
++            ctx->kernel_name_uv = "overlay_noalpha_uv";
+     }
+ 
+-    av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel);
++    av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", ctx->kernel_name);
+ 
+-    err = ff_opencl_filter_load_program(avctx, &source, 1);
++    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_overlay, 1);
+     if (err < 0)
+         goto fail;
+ 
+@@ -103,10 +144,20 @@ static int overlay_opencl_load(AVFilterC
+     CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                      "command queue %d.\n", cle);
+ 
+-    ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle);
++    ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle);
+     CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+ 
++    ctx->kernel_name_pass = "overlay_pass";
++    ctx->kernel_pass = clCreateKernel(ctx->ocf.program, ctx->kernel_name_pass, &cle);
++    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel_pass %d.\n", cle);
++
++    if (ctx->kernel_name_uv) {
++        ctx->kernel_uv = clCreateKernel(ctx->ocf.program, ctx->kernel_name_uv, &cle);
++        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel_uv %d.\n", cle);
++    }
++
+     ctx->initialised = 1;
++
+     return 0;
+ 
+ fail:
+@@ -114,21 +165,113 @@ fail:
+         clReleaseCommandQueue(ctx->command_queue);
+     if (ctx->kernel)
+         clReleaseKernel(ctx->kernel);
++    if (ctx->kernel_pass)
++        clReleaseKernel(ctx->kernel_pass);
++    if (ctx->kernel_uv)
++        clReleaseKernel(ctx->kernel_uv);
++    return err;
++}
++
++static int launch_kernel(AVFilterContext *avctx, AVFrame *output, AVFrame *input_main,
++                         AVFrame *input_overlay, int plane, int passthrough) {
++    OverlayOpenCLContext *ctx = avctx->priv;
++    cl_mem mem;
++    cl_int cle, x, y;
++    cl_kernel kernel;
++    size_t global_work[2];
++    int idx_arg = 0;
++    int err;
++
++    if (passthrough)
++        kernel = ctx->kernel_pass;
++    else if (plane == 1 && ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2)
++        kernel = ctx->kernel_uv;
++    else
++        kernel = ctx->kernel;
++
++    // dst
++    mem = (cl_mem)output->data[plane];
++    if (!mem) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
++    CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem);
++
++    // main
++    mem = (cl_mem)input_main->data[plane];
++    if (!mem) {
++        err = AVERROR(EIO);
++        goto fail;
++    }
++    CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem);
++
++    if (!passthrough) {
++        // overlay
++        mem = (cl_mem)input_overlay->data[plane];
++        if (!mem) {
++            err = AVERROR(EIO);
++            goto fail;
++        }
++        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem);
++
++        // non-semi planar on top of the semi planar
++        if (plane == 1 && ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) {
++            mem = (cl_mem)input_overlay->data[plane + 1];
++            if (!mem) {
++                err = AVERROR(EIO);
++                goto fail;
++            }
++            CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem);
++        }
++
++        // alpha
++        if (ctx->alpha) {
++            mem = (cl_mem)input_overlay->data[ctx->in_planes_overlay - 1];
++            if (!mem) {
++                err = AVERROR(EIO);
++                goto fail;
++            }
++            CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem);
++        }
++
++        x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample);
++        y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample);
++        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &x);
++        CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &y);
++
++        if (ctx->alpha) {
++            cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample;
++            cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample;
++            CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &alpha_adj_x);
++            CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &alpha_adj_y);
++        }
++    }
++
++    err = ff_opencl_filter_work_size_from_image(avctx, global_work,
++                                                input_main, plane, 0);
++    if (err < 0)
++        goto fail;
++
++    cle = clEnqueueNDRangeKernel(ctx->command_queue, kernel, 2, NULL,
++                                 global_work, NULL, 0, NULL, NULL);
++    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel "
++                     "for plane %d: %d.\n", plane, cle);
++    return 0;
++
++fail:
+     return err;
+ }
+ 
+ static int overlay_opencl_blend(FFFrameSync *fs)
+ {
+-    AVFilterContext    *avctx = fs->parent;
+-    AVFilterLink     *outlink = avctx->outputs[0];
++    AVFilterContext *avctx = fs->parent;
++    AVFilterLink    *outlink = avctx->outputs[0];
+     OverlayOpenCLContext *ctx = avctx->priv;
+     AVFrame *input_main, *input_overlay;
+     AVFrame *output;
+-    cl_mem mem;
+-    cl_int cle, x, y;
+-    size_t global_work[2];
+-    int kernel_arg = 0;
+-    int err, plane;
++    cl_int cle;
++    int passthrough = 0;
++    int err, p;
+ 
+     err = ff_framesync_get_frame(fs, 0, &input_main, 0);
+     if (err < 0)
+@@ -137,14 +280,14 @@ static int overlay_opencl_blend(FFFrameS
+     if (err < 0)
+         return err;
+ 
+-    if (!ctx->initialised) {
+-        AVHWFramesContext *main_fc =
+-            (AVHWFramesContext*)input_main->hw_frames_ctx->data;
+-        AVHWFramesContext *overlay_fc =
+-            (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
++    if (!input_main)
++        return AVERROR_BUG;
++
++    if (!input_overlay)
++        passthrough = 1;
+ 
+-        err = overlay_opencl_load(avctx, main_fc->sw_format,
+-                                  overlay_fc->sw_format);
++    if (!ctx->initialised) {
++        err = overlay_opencl_load(avctx);
+         if (err < 0)
+             return err;
+     }
+@@ -155,54 +298,10 @@ static int overlay_opencl_blend(FFFrameS
+         goto fail;
+     }
+ 
+-    for (plane = 0; plane < ctx->nb_planes; plane++) {
+-        kernel_arg = 0;
+-
+-        mem = (cl_mem)output->data[plane];
+-        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+-        kernel_arg++;
+-
+-        mem = (cl_mem)input_main->data[plane];
+-        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+-        kernel_arg++;
+-
+-        mem = (cl_mem)input_overlay->data[plane];
+-        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+-        kernel_arg++;
+-
+-        if (ctx->alpha_separate) {
+-            mem = (cl_mem)input_overlay->data[ctx->nb_planes];
+-            CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
+-            kernel_arg++;
+-        }
+-
+-        x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample);
+-        y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample);
+-
+-        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &x);
+-        kernel_arg++;
+-        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &y);
+-        kernel_arg++;
+-
+-        if (ctx->alpha_separate) {
+-            cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample;
+-            cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample;
+-
+-            CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_x);
+-            kernel_arg++;
+-            CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_y);
+-            kernel_arg++;
+-        }
+-
+-        err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+-                                                    output, plane, 0);
++    for (p = 0; p < ctx->in_planes_main; p++) {
++        err = launch_kernel(avctx, output, input_main, input_overlay, p, passthrough);
+         if (err < 0)
+-            goto fail;
+-
+-        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+-                                     global_work, NULL, 0, NULL, NULL);
+-        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel "
+-                         "for plane %d: %d.\n", plane, cle);
++            return err;
+     }
+ 
+     cle = clFinish(ctx->command_queue);
+@@ -217,6 +316,9 @@ static int overlay_opencl_blend(FFFrameS
+     return ff_filter_frame(outlink, output);
+ 
+ fail:
++    clFinish(ctx->command_queue);
++    av_frame_free(&input_main);
++    av_frame_free(&input_overlay);
+     av_frame_free(&output);
+     return err;
+ }
+@@ -225,8 +327,49 @@ static int overlay_opencl_config_output(
+ {
+     AVFilterContext *avctx = outlink->src;
+     OverlayOpenCLContext *ctx = avctx->priv;
++
++    AVFilterLink *inlink = avctx->inputs[0];
++    AVHWFramesContext *frames_ctx_main = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
++
++    AVFilterLink *inlink_overlay = avctx->inputs[1];
++    AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
++
+     int err;
+ 
++    if (!frames_ctx_main) {
++        av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n");
++        return AVERROR(EINVAL);
++    }
++
++    ctx->in_fmt_main = frames_ctx_main->sw_format;
++    ctx->in_desc_main = av_pix_fmt_desc_get(frames_ctx_main->sw_format);
++    ctx->in_planes_main = av_pix_fmt_count_planes(frames_ctx_main->sw_format);
++    if (!format_is_supported(supported_main_formats, ctx->in_fmt_main)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s\n",
++               av_get_pix_fmt_name(ctx->in_fmt_main));
++        return AVERROR(ENOSYS);
++    }
++
++    if (!frames_ctx_overlay) {
++        av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n");
++        return AVERROR(EINVAL);
++    }
++
++    ctx->in_fmt_overlay = frames_ctx_overlay->sw_format;
++    ctx->in_desc_overlay = av_pix_fmt_desc_get(frames_ctx_overlay->sw_format);
++    ctx->in_planes_overlay = av_pix_fmt_count_planes(frames_ctx_overlay->sw_format);
++    if (!format_is_supported(supported_overlay_formats, ctx->in_fmt_overlay)) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s\n",
++            av_get_pix_fmt_name(ctx->in_fmt_overlay));
++        return AVERROR(ENOSYS);
++    }
++
++    if (!formats_match(ctx->in_fmt_main, ctx->in_fmt_overlay)) {
++        av_log(ctx, AV_LOG_ERROR, "Can't overlay %s on %s \n",
++            av_get_pix_fmt_name(ctx->in_fmt_overlay), av_get_pix_fmt_name(ctx->in_fmt_main));
++        return AVERROR(EINVAL);
++    }
++
+     err = ff_opencl_filter_config_output(outlink);
+     if (err < 0)
+         return err;
+@@ -235,6 +378,11 @@ static int overlay_opencl_config_output(
+     if (err < 0)
+         return err;
+ 
++    ctx->fs.opt_repeatlast = ctx->opt_repeatlast;
++    ctx->fs.opt_shortest = ctx->opt_shortest;
++    ctx->fs.opt_eof_action = ctx->opt_eof_action;
++    ctx->fs.time_base = outlink->time_base = inlink->time_base;
++
+     return ff_framesync_configure(&ctx->fs);
+ }
+ 
+@@ -266,6 +414,20 @@ static av_cold void overlay_opencl_unini
+                    "kernel: %d.\n", cle);
+     }
+ 
++    if (ctx->kernel_pass) {
++        cle = clReleaseKernel(ctx->kernel_pass);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++                   "kernel_pass: %d.\n", cle);
++    }
++
++    if (ctx->kernel_uv) {
++        cle = clReleaseKernel(ctx->kernel_uv);
++        if (cle != CL_SUCCESS)
++            av_log(avctx, AV_LOG_ERROR, "Failed to release "
++                   "kernel_uv: %d.\n", cle);
++    }
++
+     if (ctx->command_queue) {
+         cle = clReleaseCommandQueue(ctx->command_queue);
+         if (cle != CL_SUCCESS)
+@@ -280,11 +442,20 @@ static av_cold void overlay_opencl_unini
+ 
+ #define OFFSET(x) offsetof(OverlayOpenCLContext, x)
+ #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
++
+ static const AVOption overlay_opencl_options[] = {
+     { "x", "Overlay x position",
+       OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
+     { "y", "Overlay y position",
+       OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
++    { "eof_action", "Action to take when encountering EOF from secondary input ",
++        OFFSET(opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT },
++        EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
++        { "repeat", "Repeat the previous frame.",   0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
++        { "endall", "End both streams.",            0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
++        { "pass",   "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS },   .flags = FLAGS, "eof_action" },
++    { "shortest", "force termination when the shortest input terminates", OFFSET(opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
++    { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
+     { NULL },
+ };
+ 
diff --git a/debian/patches/series b/debian/patches/series
index d44e268ac89..f875b00723e 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -4,3 +4,4 @@
 0004-add-amf-refactor-and-hevc-10-bit-encoding.patch
 0005-add-opencl-scaler-and-pixfmt-converter-impl.patch
 0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
+0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch

From 201acce0cb61c729bc63309a3e4b56a5bb6b4b61 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Wed, 10 Nov 2021 21:31:11 +0800
Subject: [PATCH 17/41] add d3d11-opencl interop for AMD

---
 ...008-add-d3d11-opencl-interop-for-AMD.patch | 387 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 388 insertions(+)
 create mode 100644 debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch

diff --git a/debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch b/debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch
new file mode 100644
index 00000000000..5ab172ac65f
--- /dev/null
+++ b/debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch
@@ -0,0 +1,387 @@
+Index: jellyfin-ffmpeg/libavutil/hwcontext_opencl.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_opencl.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_opencl.c
+@@ -64,6 +64,16 @@
+ #if HAVE_OPENCL_D3D11
+ #include <CL/cl_d3d11.h>
+ #include "hwcontext_d3d11va.h"
++
++// From cl_amd_planar_yuv; unfortunately no header is provided.
++typedef CL_API_ENTRY cl_mem(CL_API_CALL *clGetPlaneFromImageAMD_fn)(
++    cl_context context, cl_mem mem, cl_uint plane,
++    cl_int *errcode_ret);
++
++typedef CL_API_ENTRY cl_mem(CL_API_CALL *clConvertImageAMD_fn)(
++    cl_context context, cl_mem image, const cl_image_format *image_format,
++    cl_int *errcode_ret);
++
+ #endif
+ 
+ #if HAVE_OPENCL_DRM_ARM
+@@ -72,7 +82,6 @@
+ #include "hwcontext_drm.h"
+ #endif
+ 
+-
+ typedef struct OpenCLDeviceContext {
+     // Default command queue to use for transfer/mapping operations on
+     // the device.  If the user supplies one, this is a reference to it.
+@@ -113,12 +122,19 @@ typedef struct OpenCLDeviceContext {
+ 
+ #if HAVE_OPENCL_D3D11
+     int d3d11_mapping_usable;
++    int d3d11_map_amd;
++    int d3d11_map_intel;
++
+     clCreateFromD3D11Texture2DKHR_fn
+         clCreateFromD3D11Texture2DKHR;
+     clEnqueueAcquireD3D11ObjectsKHR_fn
+         clEnqueueAcquireD3D11ObjectsKHR;
+     clEnqueueReleaseD3D11ObjectsKHR_fn
+         clEnqueueReleaseD3D11ObjectsKHR;
++    clGetPlaneFromImageAMD_fn
++        clGetPlaneFromImageAMD;
++    clConvertImageAMD_fn
++        clConvertImageAMD;
+ #endif
+ 
+ #if HAVE_OPENCL_DRM_ARM
+@@ -142,7 +158,6 @@ typedef struct OpenCLFramesContext {
+ #endif
+ } OpenCLFramesContext;
+ 
+-
+ static void CL_CALLBACK opencl_error_callback(const char *errinfo,
+                                               const void *private_info,
+                                               size_t cb,
+@@ -820,17 +835,25 @@ static int opencl_device_init(AVHWDevice
+ #if HAVE_OPENCL_D3D11
+     {
+         const char *d3d11_ext = "cl_khr_d3d11_sharing";
+-        const char *nv12_ext  = "cl_intel_d3d11_nv12_media_sharing";
++        const char *amd_ext   = "cl_amd_planar_yuv";
++        const char *intel_ext = "cl_intel_d3d11_nv12_media_sharing";
+         int fail = 0;
+ 
+         if (!opencl_check_extension(hwdev, d3d11_ext)) {
+             av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is "
+                    "required for D3D11 to OpenCL mapping.\n", d3d11_ext);
+             fail = 1;
+-        } else if (!opencl_check_extension(hwdev, nv12_ext)) {
+-            av_log(hwdev, AV_LOG_VERBOSE, "The %s extension may be "
+-                   "required for D3D11 to OpenCL mapping.\n", nv12_ext);
+-            // Not fatal.
++        } else {
++            if (opencl_check_extension(hwdev, amd_ext)) {
++                priv->d3d11_map_amd = 1;
++            } else if (opencl_check_extension(hwdev, intel_ext)) {
++                priv->d3d11_map_intel = 1;
++            } else {
++                av_log(hwdev, AV_LOG_VERBOSE, "One of the %s or %s "
++                       "extensions are required for D3D11 to OpenCL "
++                       "mapping.\n", amd_ext, intel_ext);
++                fail = 1;
++            }
+         }
+ 
+         CL_FUNC(clCreateFromD3D11Texture2DKHR,
+@@ -840,6 +863,13 @@ static int opencl_device_init(AVHWDevice
+         CL_FUNC(clEnqueueReleaseD3D11ObjectsKHR,
+                 "D3D11 in OpenCL release");
+ 
++        if (priv->d3d11_map_amd) {
++            CL_FUNC(clGetPlaneFromImageAMD,
++                    "D3D11 to OpenCL image planar mapping on AMD");
++            CL_FUNC(clConvertImageAMD,
++                    "D3D11 to OpenCL image data type converting on AMD");
++        }
++
+         if (fail) {
+             av_log(hwdev, AV_LOG_WARNING, "D3D11 to OpenCL mapping "
+                    "not usable.\n");
+@@ -1242,7 +1272,7 @@ static int opencl_device_derive(AVHWDevi
+                 CL_CONTEXT_VA_API_DISPLAY_INTEL,
+                 (intptr_t)src_hwctx->display,
+                 CL_CONTEXT_INTEROP_USER_SYNC,
+-                CL_FALSE,
++                CL_TRUE,
+                 0,
+             };
+             OpenCLDeviceSelector selector = {
+@@ -1281,11 +1311,13 @@ static int opencl_device_derive(AVHWDevi
+                                                     device_handle,
+                                                     &device, FALSE);
+             if (SUCCEEDED(hr)) {
+-                cl_context_properties props[5] = {
++                cl_context_properties props[7] = {
+                     CL_CONTEXT_PLATFORM,
+                     0,
+                     CL_CONTEXT_ADAPTER_D3D9EX_KHR,
+                     (intptr_t)device,
++                    CL_CONTEXT_INTEROP_USER_SYNC,
++                    CL_TRUE,
+                     0,
+                 };
+                 OpenCLDeviceSelector selector = {
+@@ -1318,11 +1350,13 @@ static int opencl_device_derive(AVHWDevi
+     case AV_HWDEVICE_TYPE_D3D11VA:
+         {
+             AVD3D11VADeviceContext *src_hwctx = src_ctx->hwctx;
+-            cl_context_properties props[5] = {
++            cl_context_properties props[7] = {
+                 CL_CONTEXT_PLATFORM,
+                 0,
+                 CL_CONTEXT_D3D11_DEVICE_KHR,
+                 (intptr_t)src_hwctx->device,
++                CL_CONTEXT_INTEROP_USER_SYNC,
++                CL_TRUE,
+                 0,
+             };
+             OpenCLDeviceSelector selector = {
+@@ -2004,7 +2038,8 @@ static int opencl_map_frame(AVHWFramesCo
+             goto fail;
+         }
+ 
+-        dst->data[p] = map->address[p];
++        dst->data[p]     = map->address[p];
++        dst->linesize[p] = row_pitch;
+ 
+         av_log(hwfc, AV_LOG_DEBUG, "Map plane %d (%p -> %p).\n",
+                p, src->data[p], dst->data[p]);
+@@ -2329,7 +2364,7 @@ static void opencl_unmap_from_dxva2(AVHW
+ {
+     AVOpenCLFrameDescriptor    *desc = hwmap->priv;
+     OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+-    OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv;
++    OpenCLFramesContext *frames_priv = dst_fc->internal->priv;
+     cl_event event;
+     cl_int cle;
+ 
+@@ -2421,11 +2456,13 @@ static int opencl_frames_derive_from_dxv
+     cl_int cle;
+     int err, i, p, nb_planes;
+ 
+-    if (src_fc->sw_format != AV_PIX_FMT_NV12) {
+-        av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported "
++    if (src_fc->sw_format != AV_PIX_FMT_NV12 &&
++        src_fc->sw_format != AV_PIX_FMT_P010) {
++        av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are supported "
+                "for DXVA2 to OpenCL mapping.\n");
+         return AVERROR(EINVAL);
+     }
++
+     nb_planes = 2;
+ 
+     if (src_fc->initial_pool_size == 0) {
+@@ -2493,7 +2530,7 @@ static void opencl_unmap_from_d3d11(AVHW
+ {
+     AVOpenCLFrameDescriptor    *desc = hwmap->priv;
+     OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
+-    OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv;
++    OpenCLFramesContext *frames_priv = dst_fc->internal->priv;
+     cl_event event;
+     cl_int cle;
+ 
+@@ -2501,7 +2538,7 @@ static void opencl_unmap_from_d3d11(AVHW
+         frames_priv->command_queue, desc->nb_planes, desc->planes,
+         0, NULL, &event);
+     if (cle != CL_SUCCESS) {
+-        av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface "
++        av_log(dst_fc, AV_LOG_ERROR, "Failed to release texture "
+                "handle: %d.\n", cle);
+     }
+ 
+@@ -2516,7 +2553,7 @@ static int opencl_map_from_d3d11(AVHWFra
+     AVOpenCLFrameDescriptor *desc;
+     cl_event event;
+     cl_int cle;
+-    int err, index, i;
++    int err, index, i, nb_planes;
+ 
+     index = (intptr_t)src->data[1];
+     if (index >= frames_priv->nb_mapped_frames) {
+@@ -2530,20 +2567,36 @@ static int opencl_map_from_d3d11(AVHWFra
+ 
+     desc = &frames_priv->mapped_frames[index];
+ 
+-    cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR(
+-        frames_priv->command_queue, desc->nb_planes, desc->planes,
+-        0, NULL, &event);
+-    if (cle != CL_SUCCESS) {
+-        av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface "
+-               "handle: %d.\n", cle);
+-        return AVERROR(EIO);
++    nb_planes = device_priv->d3d11_map_amd ? (desc->nb_planes - 1)
++                                           : desc->nb_planes;
++
++    if (device_priv->d3d11_map_amd) {
++        cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR(
++            frames_priv->command_queue, 1, &desc->planes[nb_planes],
++            0, NULL, &event);
++        if (cle != CL_SUCCESS) {
++            av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture "
++                   "handle: %d.\n", cle);
++            return AVERROR(EIO);
++        }
++    } else if (device_priv->d3d11_map_intel) {
++        cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR(
++            frames_priv->command_queue, nb_planes, desc->planes,
++            0, NULL, &event);
++        if (cle != CL_SUCCESS) {
++            av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture "
++                   "handle: %d.\n", cle);
++            return AVERROR(EIO);
++        }
++    } else {
++        return AVERROR(ENOSYS);
+     }
+ 
+     err = opencl_wait_events(dst_fc, &event, 1);
+     if (err < 0)
+         goto fail;
+ 
+-    for (i = 0; i < desc->nb_planes; i++)
++    for (i = 0; i < nb_planes; i++)
+         dst->data[i] = (uint8_t*)desc->planes[i];
+ 
+     err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+@@ -2572,16 +2625,26 @@ static int opencl_frames_derive_from_d3d
+     AVD3D11VAFramesContext *src_hwctx = src_fc->hwctx;
+     OpenCLDeviceContext  *device_priv = dst_fc->device_ctx->internal->priv;
+     OpenCLFramesContext  *frames_priv = dst_fc->internal->priv;
++    cl_mem planeUI;
+     cl_mem_flags cl_flags;
+     cl_int cle;
+     int err, i, p, nb_planes;
+ 
+-    if (src_fc->sw_format != AV_PIX_FMT_NV12) {
+-        av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported "
+-               "for D3D11 to OpenCL mapping.\n");
+-        return AVERROR(EINVAL);
++    // both AMD and Intel supports NV12 and P010,
++    // but Intel requires D3D11_RESOURCE_MISC_SHARED.
++    if (device_priv->d3d11_map_amd ||
++        device_priv->d3d11_map_intel) {
++        if (src_fc->sw_format != AV_PIX_FMT_NV12 &&
++            src_fc->sw_format != AV_PIX_FMT_P010) {
++            av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are "
++                   "supported with AMD and Intel for D3D11 to OpenCL mapping.\n");
++            return AVERROR(EINVAL);
++        }
++    } else {
++        return AVERROR(ENOSYS);
+     }
+-    nb_planes = 2;
++
++    nb_planes = device_priv->d3d11_map_amd ? 3 : 2;
+ 
+     if (src_fc->initial_pool_size == 0) {
+         av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported "
+@@ -2604,27 +2667,94 @@ static int opencl_frames_derive_from_d3d
+     for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
+         AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
+         desc->nb_planes = nb_planes;
+-        for (p = 0; p < nb_planes; p++) {
+-            UINT subresource = 2 * i + p;
+-
+-            desc->planes[p] =
+-                device_priv->clCreateFromD3D11Texture2DKHR(
+-                    dst_dev->context, cl_flags, src_hwctx->texture,
+-                    subresource, &cle);
+-            if (!desc->planes[p]) {
+-                av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
+-                       "image from plane %d of D3D texture "
+-                       "index %d (subresource %u): %d.\n",
+-                       p, i, (unsigned int)subresource, cle);
++        if (device_priv->d3d11_map_amd) {
++            // put the multiple-plane AMD shared image at the end.
++            desc->planes[nb_planes - 1] = device_priv->clCreateFromD3D11Texture2DKHR(
++                dst_dev->context, cl_flags, src_hwctx->texture, i, &cle);
++            if (!desc->planes[nb_planes - 1]) {
++                av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image "
++                       "from D3D11 texture index %d: %d.\n", i, cle);
+                 err = AVERROR(EIO);
+                 goto fail;
+             }
++
++            for (p = 0; p < nb_planes - 1; p++) {
++                cl_image_format image_fmt;
++
++                // get plane from AMD in CL_UNSIGNED_INT8|16 type.
++                planeUI = device_priv->clGetPlaneFromImageAMD(
++                    dst_dev->context, desc->planes[nb_planes - 1], p, &cle);
++                if (!planeUI) {
++                    av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image "
++                           "from plane %d of image created from D3D11 "
++                           "texture index %d: %d.\n", p, i, cle);
++                    err = AVERROR(EIO);
++                    goto fail;
++                }
++
++                cle = clGetImageInfo(
++                    planeUI, CL_IMAGE_FORMAT, sizeof(cl_image_format), &image_fmt, NULL);
++                if (cle != CL_SUCCESS) {
++                    av_log(dst_fc, AV_LOG_ERROR, "Failed to query image format of CL image "
++                           "from plane %d of image created from D3D11 "
++                           "texture index %d: %d.\n", p, i, cle);
++                    err = AVERROR_UNKNOWN;
++                    goto fail;
++                }
++
++                switch (image_fmt.image_channel_data_type) {
++                case CL_UNSIGNED_INT8:
++                    image_fmt.image_channel_data_type = CL_UNORM_INT8; break;
++                case CL_UNSIGNED_INT16:
++                    image_fmt.image_channel_data_type = CL_UNORM_INT16; break;
++                default:
++                    av_log(dst_fc, AV_LOG_ERROR, "The data type of CL image "
++                           "from plane %d of image created from D3D11 texture index %d "
++                           "isn't a CL_UNSIGNED_INT8|16 type.\n", p, i);
++                    err = AVERROR(EIO);
++                    goto fail;
++                }
++
++                // convert plane from CL_UNSIGNED_INT8|16 to CL_UNORM_INT8|16.
++                desc->planes[p] = device_priv->clConvertImageAMD(
++                    dst_dev->context, planeUI, &image_fmt, &cle);
++                if (!desc->planes[p]) {
++                    av_log(dst_fc, AV_LOG_ERROR, "Failed to convert data type of CL image "
++                           "from plane %d of image created from D3D11 texture index %d "
++                           "to CL_UNORM_INT8|16 type: %d.\n", p, i, cle);
++                    err = AVERROR(EIO);
++                    goto fail;
++                }
++
++                clReleaseMemObject(planeUI);
++            }
++        } else if (device_priv->d3d11_map_intel) {
++            for (p = 0; p < nb_planes; p++) {
++                UINT subresource = 2 * i + p;
++
++                desc->planes[p] =
++                    device_priv->clCreateFromD3D11Texture2DKHR(
++                        dst_dev->context, cl_flags, src_hwctx->texture,
++                        subresource, &cle);
++                if (!desc->planes[p]) {
++                    av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
++                           "image from plane %d of D3D11 texture "
++                           "index %d (subresource %u): %d.\n",
++                           p, i, (unsigned int)subresource, cle);
++                    err = AVERROR(EIO);
++                    goto fail;
++                }
++            }
++        } else {
++            return AVERROR(ENOSYS);
+         }
+     }
+ 
+     return 0;
+ 
+ fail:
++    if (planeUI)
++        clReleaseMemObject(planeUI);
+     for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
+         AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
+         for (p = 0; p < desc->nb_planes; p++) {
diff --git a/debian/patches/series b/debian/patches/series
index f875b00723e..807ae0cef18 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -5,3 +5,4 @@
 0005-add-opencl-scaler-and-pixfmt-converter-impl.patch
 0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
 0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch
+0008-add-d3d11-opencl-interop-for-AMD.patch

From 01ba5a279896e6b2a81741c7d29e9842d20e3617 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:39:20 +0800
Subject: [PATCH 18/41] add a hack for d3d11-opencl reverse mapping

---
 ...dd-a-hack-for-opencl-reverse-mapping.patch | 131 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 132 insertions(+)
 create mode 100644 debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch

diff --git a/debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch b/debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch
new file mode 100644
index 00000000000..825614bfa97
--- /dev/null
+++ b/debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch
@@ -0,0 +1,131 @@
+Index: jellyfin-ffmpeg/libavfilter/avfilter.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/avfilter.h
++++ jellyfin-ffmpeg/libavfilter/avfilter.h
+@@ -481,6 +481,7 @@ struct AVFilterLink {
+     int w;                      ///< agreed upon image width
+     int h;                      ///< agreed upon image height
+     AVRational sample_aspect_ratio; ///< agreed upon sample aspect ratio
++    int fixed_pool_size;        ///< fixed size of the frame pool for reverse hw mapping
+     /* These parameters apply only to audio */
+     uint64_t channel_layout;    ///< channel layout of current buffer (see libavutil/channel_layout.h)
+     int sample_rate;            ///< samples per second
+Index: jellyfin-ffmpeg/libavfilter/opencl.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/opencl.c
++++ jellyfin-ffmpeg/libavfilter/opencl.c
+@@ -60,6 +60,7 @@ static int opencl_filter_set_device(AVFi
+ int ff_opencl_filter_config_input(AVFilterLink *inlink)
+ {
+     AVFilterContext   *avctx = inlink->dst;
++    AVFilterLink    *outlink = avctx->outputs[0];
+     OpenCLFilterContext *ctx = avctx->priv;
+     AVHWFramesContext *input_frames;
+     int err;
+@@ -90,12 +91,15 @@ int ff_opencl_filter_config_input(AVFilt
+     if (!ctx->output_height)
+         ctx->output_height = inlink->h;
+ 
++    outlink->fixed_pool_size = inlink->fixed_pool_size;
++
+     return 0;
+ }
+ 
+ int ff_opencl_filter_config_output(AVFilterLink *outlink)
+ {
+     AVFilterContext   *avctx = outlink->src;
++    AVFilterLink     *inlink = avctx->inputs[0];
+     OpenCLFilterContext *ctx = avctx->priv;
+     AVBufferRef       *output_frames_ref = NULL;
+     AVHWFramesContext *output_frames;
+@@ -137,6 +141,7 @@ int ff_opencl_filter_config_output(AVFil
+     outlink->hw_frames_ctx = output_frames_ref;
+     outlink->w = ctx->output_width;
+     outlink->h = ctx->output_height;
++    outlink->fixed_pool_size = inlink->fixed_pool_size;
+ 
+     return 0;
+ fail:
+Index: jellyfin-ffmpeg/libavfilter/vf_hwmap.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_hwmap.c
++++ jellyfin-ffmpeg/libavfilter/vf_hwmap.c
+@@ -22,6 +22,10 @@
+ #include "libavutil/opt.h"
+ #include "libavutil/pixdesc.h"
+ 
++#if HAVE_OPENCL_D3D11
++#include "libavutil/hwcontext_d3d11va.h"
++#endif
++
+ #include "avfilter.h"
+ #include "formats.h"
+ #include "internal.h"
+@@ -122,6 +126,12 @@ static int hwmap_config_output(AVFilterL
+                 goto fail;
+             }
+ 
++            if (hwfc->initial_pool_size) {
++                outlink->fixed_pool_size = hwfc->initial_pool_size;
++                av_log(avctx, AV_LOG_DEBUG, "Saved the fixed_pool_size from "
++                       "initial_pool_size: %d\n", outlink->fixed_pool_size);
++            }
++
+         } else if (inlink->format == hwfc->format &&
+                    (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) &&
+                    ctx->reverse) {
+@@ -144,8 +154,20 @@ static int hwmap_config_output(AVFilterL
+             frames->width     = hwfc->width;
+             frames->height    = hwfc->height;
+ 
+-            if (avctx->extra_hw_frames >= 0)
+-                frames->initial_pool_size = 2 + avctx->extra_hw_frames;
++            if (inlink->fixed_pool_size)
++                frames->initial_pool_size = inlink->fixed_pool_size;
++
++            if (frames->initial_pool_size == 0) {
++                // Dynamic allocation.
++            } else if (avctx->extra_hw_frames) {
++                frames->initial_pool_size += avctx->extra_hw_frames;
++            }
++
++#if HAVE_OPENCL_D3D11
++            D3D11_TEXTURE2D_DESC texDesc = { .BindFlags = D3D11_BIND_DECODER, };
++            if (frames->format == AV_PIX_FMT_D3D11)
++                frames->user_opaque = &texDesc;
++#endif
+ 
+             err = av_hwframe_ctx_init(ctx->hwframes_ref);
+             if (err < 0) {
+Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+@@ -190,7 +190,7 @@ static AVBufferRef *d3d11va_alloc_single
+         .ArraySize  = 1,
+         .Usage      = D3D11_USAGE_DEFAULT,
+         .BindFlags  = hwctx->BindFlags,
+-        .MiscFlags  = hwctx->MiscFlags,
++        .MiscFlags  = hwctx->MiscFlags | D3D11_RESOURCE_MISC_SHARED,
+     };
+ 
+     hr = ID3D11Device_CreateTexture2D(device_hwctx->device, &texDesc, NULL, &tex);
+@@ -254,9 +254,17 @@ static int d3d11va_frames_init(AVHWFrame
+         .ArraySize  = ctx->initial_pool_size,
+         .Usage      = D3D11_USAGE_DEFAULT,
+         .BindFlags  = hwctx->BindFlags,
+-        .MiscFlags  = hwctx->MiscFlags,
++        .MiscFlags  = hwctx->MiscFlags | D3D11_RESOURCE_MISC_SHARED,
+     };
+ 
++#if HAVE_OPENCL_D3D11
++    if (ctx->user_opaque) {
++        D3D11_TEXTURE2D_DESC *desc = ctx->user_opaque;
++        if (desc->BindFlags & D3D11_BIND_DECODER)
++            texDesc.BindFlags = D3D11_BIND_DECODER;
++    }
++#endif
++
+     if (hwctx->texture) {
+         D3D11_TEXTURE2D_DESC texDesc2;
+         ID3D11Texture2D_GetDesc(hwctx->texture, &texDesc2);
diff --git a/debian/patches/series b/debian/patches/series
index 807ae0cef18..67198916555 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -6,3 +6,4 @@
 0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch
 0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch
 0008-add-d3d11-opencl-interop-for-AMD.patch
+0009-add-a-hack-for-opencl-reverse-mapping.patch

From c9c4edb88eea54bbafa5a8e59b4414d6cd2edb2e Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:39:34 +0800
Subject: [PATCH 19/41] add fixes for ffmpeg_hw

---
 .../0010-add-fixes-for-ffmpeg_hw.patch        | 70 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 71 insertions(+)
 create mode 100644 debian/patches/0010-add-fixes-for-ffmpeg_hw.patch

diff --git a/debian/patches/0010-add-fixes-for-ffmpeg_hw.patch b/debian/patches/0010-add-fixes-for-ffmpeg_hw.patch
new file mode 100644
index 00000000000..6c1afef91ac
--- /dev/null
+++ b/debian/patches/0010-add-fixes-for-ffmpeg_hw.patch
@@ -0,0 +1,70 @@
+Index: jellyfin-ffmpeg/fftools/ffmpeg_hw.c
+===================================================================
+--- jellyfin-ffmpeg.orig/fftools/ffmpeg_hw.c
++++ jellyfin-ffmpeg/fftools/ffmpeg_hw.c
+@@ -93,6 +93,8 @@ static char *hw_device_default_name(enum
+ 
+ int hw_device_init_from_string(const char *arg, HWDevice **dev_out)
+ {
++    // "type=name"
++    // "type=name,key=value,key2=value2"
+     // "type=name:device,key=value,key2=value2"
+     // "type:device,key=value,key2=value2"
+     // -> av_hwdevice_ctx_create()
+@@ -124,7 +126,7 @@ int hw_device_init_from_string(const cha
+     }
+ 
+     if (*p == '=') {
+-        k = strcspn(p + 1, ":@");
++        k = strcspn(p + 1, ":@,");
+ 
+         name = av_strndup(p + 1, k);
+         if (!name) {
+@@ -190,6 +192,18 @@ int hw_device_init_from_string(const cha
+                                              src->device_ref, 0);
+         if (err < 0)
+             goto fail;
++    } else if (*p == ',') {
++        err = av_dict_parse_string(&options, p + 1, "=", ",", 0);
++
++        if (err < 0) {
++            errmsg = "failed to parse options";
++            goto invalid;
++        }
++
++        err = av_hwdevice_ctx_create(&device_ref, type,
++                                     NULL, options, 0);
++        if (err < 0)
++            goto fail;
+     } else {
+         errmsg = "parse error";
+         goto invalid;
+@@ -527,15 +541,21 @@ int hw_device_setup_for_filter(FilterGra
+     HWDevice *dev;
+     int i;
+ 
+-    // If the user has supplied exactly one hardware device then just
+-    // give it straight to every filter for convenience.  If more than
+-    // one device is available then the user needs to pick one explcitly
+-    // with the filter_hw_device option.
++    // Pick the last hardware device if the user doesn't pick the device for
++    // filters explicitly with the filter_hw_device option.
+     if (filter_hw_device)
+         dev = filter_hw_device;
+-    else if (nb_hw_devices == 1)
+-        dev = hw_devices[0];
+-    else
++    else if (nb_hw_devices > 0) {
++        dev = hw_devices[nb_hw_devices - 1];
++
++        if (nb_hw_devices > 1)
++            av_log(NULL, AV_LOG_WARNING, "There are %d hardware devices. device "
++                   "%s of type %s is picked for filters by default. Set hardware "
++                   "device explicitly with the filter_hw_device option if device "
++                   "%s is not usable for filters.\n",
++                   nb_hw_devices, dev->name,
++                   av_hwdevice_get_type_name(dev->type), dev->name);
++    } else
+         dev = NULL;
+ 
+     if (dev) {
diff --git a/debian/patches/series b/debian/patches/series
index 67198916555..f1b2f9672ba 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -7,3 +7,4 @@
 0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch
 0008-add-d3d11-opencl-interop-for-AMD.patch
 0009-add-a-hack-for-opencl-reverse-mapping.patch
+0010-add-fixes-for-ffmpeg_hw.patch

From f7497b48527a5db0d1333ffc7f0c29f9b20600c9 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:39:55 +0800
Subject: [PATCH 20/41] add d3d11 support for QSV

---
 .../0011-add-d3d11-support-for-QSV.patch      | 1118 +++++++++++++++++
 debian/patches/series                         |    1 +
 2 files changed, 1119 insertions(+)
 create mode 100644 debian/patches/0011-add-d3d11-support-for-QSV.patch

diff --git a/debian/patches/0011-add-d3d11-support-for-QSV.patch b/debian/patches/0011-add-d3d11-support-for-QSV.patch
new file mode 100644
index 00000000000..5caf8cd3194
--- /dev/null
+++ b/debian/patches/0011-add-d3d11-support-for-QSV.patch
@@ -0,0 +1,1118 @@
+Index: jellyfin-ffmpeg/libavcodec/qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsv.c
++++ jellyfin-ffmpeg/libavcodec/qsv.c
+@@ -36,6 +36,8 @@
+ #include "avcodec.h"
+ #include "qsv_internal.h"
+ 
++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
++
+ #if QSV_VERSION_ATLEAST(1, 12)
+ #include "mfx/mfxvp8.h"
+ #endif
+@@ -243,7 +245,9 @@ int ff_qsv_find_surface_idx(QSVFramesCon
+     int i;
+     for (i = 0; i < ctx->nb_mids; i++) {
+         QSVMid *mid = &ctx->mids[i];
+-        if (mid->handle == frame->surface.Data.MemId)
++        mfxHDLPair *pair = (mfxHDLPair*)frame->surface.Data.MemId;
++        if ((mid->handle_pair->first == pair->first) &&
++            (mid->handle_pair->second == pair->second))
+             return i;
+     }
+     return AVERROR_BUG;
+@@ -383,7 +387,11 @@ static int ff_qsv_set_display_handle(AVC
+ int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
+                                  const char *load_plugins, int gpu_copy)
+ {
++#if CONFIG_D3D11VA
++    mfxIMPL          impl = MFX_IMPL_AUTO_ANY | MFX_IMPL_VIA_D3D11;
++#else
+     mfxIMPL          impl = MFX_IMPL_AUTO_ANY;
++#endif
+     mfxVersion        ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };
+     mfxInitParam init_par = { MFX_IMPL_AUTO_ANY };
+ 
+@@ -472,7 +480,7 @@ static AVBufferRef *qsv_create_mids(AVBu
+ 
+     for (i = 0; i < nb_surfaces; i++) {
+         QSVMid *mid = &mids[i];
+-        mid->handle        = frames_hwctx->surfaces[i].Data.MemId;
++        mid->handle_pair   = (mfxHDLPair*)frames_hwctx->surfaces[i].Data.MemId;
+         mid->hw_frames_ref = hw_frames_ref1;
+     }
+ 
+@@ -649,7 +657,7 @@ static mfxStatus qsv_frame_lock(mfxHDL p
+         goto fail;
+ 
+     qsv_mid->surf.Info = hw_frames_hwctx->surfaces[0].Info;
+-    qsv_mid->surf.Data.MemId = qsv_mid->handle;
++    qsv_mid->surf.Data.MemId = qsv_mid->handle_pair;
+ 
+     /* map the data to the system memory */
+     ret = av_hwframe_map(qsv_mid->locked_frame, qsv_mid->hw_frame,
+@@ -682,7 +690,13 @@ static mfxStatus qsv_frame_unlock(mfxHDL
+ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+ {
+     QSVMid *qsv_mid = (QSVMid*)mid;
+-    *hdl = qsv_mid->handle;
++    mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
++    mfxHDLPair *pair_src = (mfxHDLPair*)qsv_mid->handle_pair;
++
++    pair_dst->first = pair_src->first;
++
++    if (pair_src->second != (mfxMemId)MFX_INFINITE)
++        pair_dst->second = pair_src->second;
+     return MFX_ERR_NONE;
+ }
+ 
+@@ -690,24 +704,19 @@ int ff_qsv_init_session_device(AVCodecCo
+                                AVBufferRef *device_ref, const char *load_plugins,
+                                int gpu_copy)
+ {
+-    static const mfxHandleType handle_types[] = {
+-        MFX_HANDLE_VA_DISPLAY,
+-        MFX_HANDLE_D3D9_DEVICE_MANAGER,
+-        MFX_HANDLE_D3D11_DEVICE,
+-    };
+     AVHWDeviceContext    *device_ctx = (AVHWDeviceContext*)device_ref->data;
+     AVQSVDeviceContext *device_hwctx = device_ctx->hwctx;
+     mfxSession        parent_session = device_hwctx->session;
+     mfxInitParam            init_par = { MFX_IMPL_AUTO_ANY };
+     mfxHDL                    handle = NULL;
++    int          hw_handle_supported = 0;
+ 
+     mfxSession    session;
+     mfxVersion    ver;
+     mfxIMPL       impl;
+     mfxHandleType handle_type;
+     mfxStatus err;
+-
+-    int i, ret;
++    int ret;
+ 
+     err = MFXQueryIMPL(parent_session, &impl);
+     if (err == MFX_ERR_NONE)
+@@ -716,13 +725,23 @@ int ff_qsv_init_session_device(AVCodecCo
+         return ff_qsv_print_error(avctx, err,
+                                   "Error querying the session attributes");
+ 
+-    for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) {
+-        err = MFXVideoCORE_GetHandle(parent_session, handle_types[i], &handle);
+-        if (err == MFX_ERR_NONE) {
+-            handle_type = handle_types[i];
+-            break;
++    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_VA_DISPLAY;
++        hw_handle_supported = 1;
++    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D11_DEVICE;
++        hw_handle_supported = 1;
++    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
++        hw_handle_supported = 1;
++    }
++
++    if (hw_handle_supported) {
++        err = MFXVideoCORE_GetHandle(parent_session, handle_type, &handle);
++        if (err != MFX_ERR_NONE) {
++            return ff_qsv_print_error(avctx, err,
++                                  "Error getting handle session");
+         }
+-        handle = NULL;
+     }
+     if (!handle) {
+         av_log(avctx, AV_LOG_VERBOSE, "No supported hw handle could be retrieved "
+Index: jellyfin-ffmpeg/libavcodec/qsv_internal.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsv_internal.h
++++ jellyfin-ffmpeg/libavcodec/qsv_internal.h
+@@ -62,7 +62,7 @@
+ 
+ typedef struct QSVMid {
+     AVBufferRef *hw_frames_ref;
+-    mfxHDL handle;
++    mfxHDLPair *handle_pair;
+ 
+     AVFrame *locked_frame;
+     AVFrame *hw_frame;
+Index: jellyfin-ffmpeg/libavfilter/qsvvpp.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.c
++++ jellyfin-ffmpeg/libavfilter/qsvvpp.c
+@@ -68,11 +68,7 @@ struct QSVVPPContext {
+     int                 nb_ext_buffers;
+ };
+ 
+-static const mfxHandleType handle_types[] = {
+-    MFX_HANDLE_VA_DISPLAY,
+-    MFX_HANDLE_D3D9_DEVICE_MANAGER,
+-    MFX_HANDLE_D3D11_DEVICE,
+-};
++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
+ 
+ static const AVRational default_tb = { 1, 90000 };
+ 
+@@ -233,7 +229,13 @@ static mfxStatus frame_unlock(mfxHDL pth
+ 
+ static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+ {
+-    *hdl = mid;
++    mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
++    mfxHDLPair *pair_src = (mfxHDLPair*)mid;
++
++    pair_dst->first = pair_src->first;
++
++    if (pair_src->second != (mfxMemId)MFX_INFINITE)
++        pair_dst->second = pair_src->second;
+     return MFX_ERR_NONE;
+ }
+ 
+@@ -555,7 +557,7 @@ static int init_vpp_session(AVFilterCont
+ 
+         s->out_mem_mode = IS_OPAQUE_MEMORY(s->in_mem_mode) ?
+                           MFX_MEMTYPE_OPAQUE_FRAME :
+-                          MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
++                          MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET | MFX_MEMTYPE_FROM_VPPOUT;
+ 
+         out_frames_ctx   = (AVHWFramesContext *)out_frames_ref->data;
+         out_frames_hwctx = out_frames_ctx->hwctx;
+@@ -601,14 +603,18 @@ static int init_vpp_session(AVFilterCont
+         return AVERROR_UNKNOWN;
+     }
+ 
+-    for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) {
+-        ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle);
+-        if (ret == MFX_ERR_NONE) {
+-            handle_type = handle_types[i];
+-            break;
+-        }
++    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_VA_DISPLAY;
++    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D11_DEVICE;
++    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
++    } else {
++        av_log(avctx, AV_LOG_ERROR, "Error unsupported handle type\n");
++        return AVERROR_UNKNOWN;
+     }
+ 
++    ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle);
+     if (ret < 0)
+         return ff_qsvvpp_print_error(avctx, ret, "Error getting the session handle");
+     else if (ret > 0) {
+Index: jellyfin-ffmpeg/libavfilter/vf_deinterlace_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_deinterlace_qsv.c
++++ jellyfin-ffmpeg/libavfilter/vf_deinterlace_qsv.c
+@@ -42,6 +42,8 @@
+ #include "internal.h"
+ #include "video.h"
+ 
++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
++
+ enum {
+     QSVDEINT_MORE_OUTPUT = 1,
+     QSVDEINT_MORE_INPUT,
+@@ -157,16 +159,16 @@ static mfxStatus frame_unlock(mfxHDL pth
+ 
+ static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+ {
+-    *hdl = mid;
++    mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
++    mfxHDLPair *pair_src = (mfxHDLPair*)mid;
++
++    pair_dst->first = pair_src->first;
++
++    if (pair_src->second != (mfxMemId)MFX_INFINITE)
++        pair_dst->second = pair_src->second;
+     return MFX_ERR_NONE;
+ }
+ 
+-static const mfxHandleType handle_types[] = {
+-    MFX_HANDLE_VA_DISPLAY,
+-    MFX_HANDLE_D3D9_DEVICE_MANAGER,
+-    MFX_HANDLE_D3D11_DEVICE,
+-};
+-
+ static int init_out_session(AVFilterContext *ctx)
+ {
+ 
+@@ -194,14 +196,18 @@ static int init_out_session(AVFilterCont
+         return AVERROR_UNKNOWN;
+     }
+ 
+-    for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) {
+-        err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle);
+-        if (err == MFX_ERR_NONE) {
+-            handle_type = handle_types[i];
+-            break;
+-        }
++    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_VA_DISPLAY;
++    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D11_DEVICE;
++    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
++    } else {
++        av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n");
++        return AVERROR_UNKNOWN;
+     }
+ 
++    err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle);
+     if (err < 0)
+         return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle");
+     else if (err > 0) {
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_qsv.c
++++ jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c
+@@ -70,6 +70,7 @@ enum var_name {
+ };
+ 
+ #define QSV_HAVE_SCALING_CONFIG  QSV_VERSION_ATLEAST(1, 19)
++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
+ 
+ typedef struct QSVScaleContext {
+     const AVClass *class;
+@@ -206,7 +207,7 @@ static int init_out_pool(AVFilterContext
+     out_frames_ctx->sw_format         = out_format;
+     out_frames_ctx->initial_pool_size = 4;
+ 
+-    out_frames_hwctx->frame_type = in_frames_hwctx->frame_type;
++    out_frames_hwctx->frame_type = in_frames_hwctx->frame_type | MFX_MEMTYPE_FROM_VPPOUT;
+ 
+     ret = ff_filter_init_hw_frames(ctx, outlink, 32);
+     if (ret < 0)
+@@ -264,16 +265,16 @@ static mfxStatus frame_unlock(mfxHDL pth
+ 
+ static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+ {
+-    *hdl = mid;
++    mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
++    mfxHDLPair *pair_src = (mfxHDLPair*)mid;
++
++    pair_dst->first = pair_src->first;
++
++    if (pair_src->second != (mfxMemId)MFX_INFINITE)
++        pair_dst->second = pair_src->second;
+     return MFX_ERR_NONE;
+ }
+ 
+-static const mfxHandleType handle_types[] = {
+-    MFX_HANDLE_VA_DISPLAY,
+-    MFX_HANDLE_D3D9_DEVICE_MANAGER,
+-    MFX_HANDLE_D3D11_DEVICE,
+-};
+-
+ static int init_out_session(AVFilterContext *ctx)
+ {
+ 
+@@ -305,14 +306,18 @@ static int init_out_session(AVFilterCont
+         return AVERROR_UNKNOWN;
+     }
+ 
+-    for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) {
+-        err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle);
+-        if (err == MFX_ERR_NONE) {
+-            handle_type = handle_types[i];
+-            break;
+-        }
++    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_VA_DISPLAY;
++    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D11_DEVICE;
++    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
++        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
++    } else {
++        av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n");
++        return AVERROR_UNKNOWN;
+     }
+ 
++    err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle);
+     if (err < 0)
+         return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle");
+     else if (err > 0) {
+Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+@@ -112,6 +112,8 @@ static void d3d11va_frames_uninit(AVHWFr
+     if (s->staging_texture)
+         ID3D11Texture2D_Release(s->staging_texture);
+     s->staging_texture = NULL;
++
++    av_freep(&frames_hwctx->texture_infos);
+ }
+ 
+ static int d3d11va_frames_get_constraints(AVHWDeviceContext *ctx,
+@@ -152,15 +154,21 @@ static void free_texture(void *opaque, u
+     av_free(data);
+ }
+ 
+-static AVBufferRef *wrap_texture_buf(ID3D11Texture2D *tex, int index)
++static AVBufferRef *wrap_texture_buf(AVHWFramesContext *ctx, ID3D11Texture2D *tex, int index)
+ {
+     AVBufferRef *buf;
+-    AVD3D11FrameDescriptor *desc = av_mallocz(sizeof(*desc));
++    AVD3D11FrameDescriptor         *desc = av_mallocz(sizeof(*desc));
++    D3D11VAFramesContext              *s = ctx->internal->priv;
++    AVD3D11VAFramesContext *frames_hwctx = ctx->hwctx;
+     if (!desc) {
+         ID3D11Texture2D_Release(tex);
+         return NULL;
+     }
+ 
++    frames_hwctx->texture_infos[s->nb_surfaces_used].texture = tex;
++    frames_hwctx->texture_infos[s->nb_surfaces_used].index = index;
++    s->nb_surfaces_used++;
++
+     desc->texture = tex;
+     desc->index   = index;
+ 
+@@ -199,7 +207,7 @@ static AVBufferRef *d3d11va_alloc_single
+         return NULL;
+     }
+ 
+-    return wrap_texture_buf(tex, 0);
++    return wrap_texture_buf(ctx, tex, 0);
+ }
+ 
+ static AVBufferRef *d3d11va_pool_alloc(void *opaque, buffer_size_t size)
+@@ -220,7 +228,8 @@ static AVBufferRef *d3d11va_pool_alloc(v
+     }
+ 
+     ID3D11Texture2D_AddRef(hwctx->texture);
+-    return wrap_texture_buf(hwctx->texture, s->nb_surfaces_used++);
++
++    return wrap_texture_buf(ctx, hwctx->texture, s->nb_surfaces_used);
+ }
+ 
+ static int d3d11va_frames_init(AVHWFramesContext *ctx)
+@@ -267,7 +276,7 @@ static int d3d11va_frames_init(AVHWFrame
+             av_log(ctx, AV_LOG_ERROR, "User-provided texture has mismatching parameters\n");
+             return AVERROR(EINVAL);
+         }
+-    } else if (texDesc.ArraySize > 0) {
++    } else if (!(texDesc.BindFlags & D3D11_BIND_RENDER_TARGET) && texDesc.ArraySize > 0) {
+         hr = ID3D11Device_CreateTexture2D(device_hwctx->device, &texDesc, NULL, &hwctx->texture);
+         if (FAILED(hr)) {
+             av_log(ctx, AV_LOG_ERROR, "Could not create the texture (%lx)\n", (long)hr);
+@@ -275,6 +284,11 @@ static int d3d11va_frames_init(AVHWFrame
+         }
+     }
+ 
++    hwctx->texture_infos = av_mallocz_array(ctx->initial_pool_size, sizeof(*hwctx->texture_infos));
++    if (!hwctx->texture_infos)
++        return AVERROR(ENOMEM);
++    hwctx->nb_surfaces = ctx->initial_pool_size;
++
+     ctx->internal->pool_internal = av_buffer_pool_init2(sizeof(AVD3D11FrameDescriptor),
+                                                         ctx, d3d11va_pool_alloc, NULL);
+     if (!ctx->internal->pool_internal)
+Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.h
++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.h
+@@ -164,6 +164,17 @@ typedef struct AVD3D11VAFramesContext {
+      * This field is ignored/invalid if a user-allocated texture is provided.
+      */
+     UINT MiscFlags;
++
++    /**
++     * In case if texture structure member above is not NULL contains the same texture
++     * pointer for all elements and different indexes into the array texture.
++     * In case if texture structure member above is NULL, all elements contains
++     * pointers to separate non-array textures and 0 indexes.
++     * This field is ignored/invalid if a user-allocated texture is provided.
++    */
++    AVD3D11FrameDescriptor *texture_infos;
++
++    int nb_surfaces;
+ } AVD3D11VAFramesContext;
+ 
+ #endif /* AVUTIL_HWCONTEXT_D3D11VA_H */
+Index: jellyfin-ffmpeg/libavutil/hwcontext_opencl.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_opencl.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_opencl.c
+@@ -2249,7 +2249,8 @@ static int opencl_map_from_qsv(AVHWFrame
+ #if CONFIG_LIBMFX
+     if (src->format == AV_PIX_FMT_QSV) {
+         mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3];
+-        va_surface = *(VASurfaceID*)mfx_surface->Data.MemId;
++        mfxHDLPair *pair = (mfxHDLPair*)mfx_surface->Data.MemId;
++        va_surface = *(VASurfaceID*)pair->first;
+     } else
+ #endif
+         if (src->format == AV_PIX_FMT_VAAPI) {
+Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+@@ -27,9 +27,13 @@
+ #include <pthread.h>
+ #endif
+ 
++#define COBJMACROS
+ #if CONFIG_VAAPI
+ #include "hwcontext_vaapi.h"
+ #endif
++#if CONFIG_D3D11VA
++#include "hwcontext_d3d11va.h"
++#endif
+ #if CONFIG_DXVA2
+ #include "hwcontext_dxva2.h"
+ #endif
+@@ -48,6 +52,8 @@
+     (MFX_VERSION_MAJOR > (MAJOR) ||         \
+      MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR))
+ 
++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
++
+ typedef struct QSVDevicePriv {
+     AVBufferRef *child_device_ctx;
+ } QSVDevicePriv;
+@@ -74,6 +80,7 @@ typedef struct QSVFramesContext {
+ 
+     AVBufferRef *child_frames_ref;
+     mfxFrameSurface1 *surfaces_internal;
++    mfxHDLPair *handle_pairs_internal;
+     int             nb_surfaces_used;
+ 
+     // used in the frame allocator for non-opaque surfaces
+@@ -86,20 +93,6 @@ typedef struct QSVFramesContext {
+ } QSVFramesContext;
+ 
+ static const struct {
+-    mfxHandleType handle_type;
+-    enum AVHWDeviceType device_type;
+-    enum AVPixelFormat  pix_fmt;
+-} supported_handle_types[] = {
+-#if CONFIG_VAAPI
+-    { MFX_HANDLE_VA_DISPLAY,          AV_HWDEVICE_TYPE_VAAPI, AV_PIX_FMT_VAAPI },
+-#endif
+-#if CONFIG_DXVA2
+-    { MFX_HANDLE_D3D9_DEVICE_MANAGER, AV_HWDEVICE_TYPE_DXVA2, AV_PIX_FMT_DXVA2_VLD },
+-#endif
+-    { 0 },
+-};
+-
+-static const struct {
+     enum AVPixelFormat pix_fmt;
+     uint32_t           fourcc;
+ } supported_pixel_formats[] = {
+@@ -127,28 +120,32 @@ static uint32_t qsv_fourcc_from_pix_fmt(
+     return 0;
+ }
+ 
++#if CONFIG_D3D11VA
++static uint32_t qsv_get_d3d11va_bind_flags(int mem_type)
++{
++    uint32_t bind_flags = 0;
++
++    if ((mem_type & MFX_MEMTYPE_VIDEO_MEMORY_ENCODER_TARGET) && (mem_type & MFX_MEMTYPE_INTERNAL_FRAME))
++        bind_flags = D3D11_BIND_DECODER | D3D11_BIND_VIDEO_ENCODER;
++    else
++        bind_flags = D3D11_BIND_DECODER;
++
++    if ((MFX_MEMTYPE_FROM_VPPOUT & mem_type) || (MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET & mem_type))
++        bind_flags = D3D11_BIND_RENDER_TARGET;
++
++    return bind_flags;
++}
++#endif
++
+ static int qsv_device_init(AVHWDeviceContext *ctx)
+ {
+     AVQSVDeviceContext *hwctx = ctx->hwctx;
+     QSVDeviceContext       *s = ctx->internal->priv;
+-
++    int   hw_handle_supported = 0;
++    mfxHandleType handle_type;
++    enum AVHWDeviceType device_type;
++    enum AVPixelFormat  pix_fmt;
+     mfxStatus err;
+-    int i;
+-
+-    for (i = 0; supported_handle_types[i].handle_type; i++) {
+-        err = MFXVideoCORE_GetHandle(hwctx->session, supported_handle_types[i].handle_type,
+-                                     &s->handle);
+-        if (err == MFX_ERR_NONE) {
+-            s->handle_type       = supported_handle_types[i].handle_type;
+-            s->child_device_type = supported_handle_types[i].device_type;
+-            s->child_pix_fmt     = supported_handle_types[i].pix_fmt;
+-            break;
+-        }
+-    }
+-    if (!s->handle) {
+-        av_log(ctx, AV_LOG_VERBOSE, "No supported hw handle could be retrieved "
+-               "from the session\n");
+-    }
+ 
+     err = MFXQueryIMPL(hwctx->session, &s->impl);
+     if (err == MFX_ERR_NONE)
+@@ -158,6 +155,41 @@ static int qsv_device_init(AVHWDeviceCon
+         return AVERROR_UNKNOWN;
+     }
+ 
++    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(s->impl)) {
++#if CONFIG_VAAPI
++        handle_type = MFX_HANDLE_VA_DISPLAY;
++        device_type = AV_HWDEVICE_TYPE_VAAPI;
++        pix_fmt = AV_PIX_FMT_VAAPI;
++        hw_handle_supported = 1;
++#endif
++    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(s->impl)) {
++#if CONFIG_D3D11VA
++        handle_type = MFX_HANDLE_D3D11_DEVICE;
++        device_type = AV_HWDEVICE_TYPE_D3D11VA;
++        pix_fmt = AV_PIX_FMT_D3D11;
++        hw_handle_supported = 1;
++#endif
++    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(s->impl)) {
++#if CONFIG_DXVA2
++        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
++        device_type = AV_HWDEVICE_TYPE_DXVA2;
++        pix_fmt = AV_PIX_FMT_DXVA2_VLD;
++        hw_handle_supported = 1;
++#endif
++    }
++
++    if (hw_handle_supported) {
++        err = MFXVideoCORE_GetHandle(hwctx->session, handle_type, &s->handle);
++        if (err == MFX_ERR_NONE) {
++            s->handle_type       = handle_type;
++            s->child_device_type = device_type;
++            s->child_pix_fmt     = pix_fmt;
++        }
++    }
++    if (!s->handle) {
++        av_log(ctx, AV_LOG_VERBOSE, "No supported hw handle could be retrieved "
++               "from the session\n");
++    }
+     return 0;
+ }
+ 
+@@ -187,6 +219,7 @@ static void qsv_frames_uninit(AVHWFrames
+     av_freep(&s->mem_ids);
+     av_freep(&s->surface_ptrs);
+     av_freep(&s->surfaces_internal);
++    av_freep(&s->handle_pairs_internal);
+     av_buffer_unref(&s->child_frames_ref);
+ }
+ 
+@@ -202,6 +235,8 @@ static AVBufferRef *qsv_pool_alloc(void
+ 
+     if (s->nb_surfaces_used < hwctx->nb_surfaces) {
+         s->nb_surfaces_used++;
++        av_buffer_create((uint8_t*)(s->handle_pairs_internal + s->nb_surfaces_used - 1),
++                                sizeof(*s->handle_pairs_internal), qsv_pool_release_dummy, NULL, 0);
+         return av_buffer_create((uint8_t*)(s->surfaces_internal + s->nb_surfaces_used - 1),
+                                 sizeof(*hwctx->surfaces), qsv_pool_release_dummy, NULL, 0);
+     }
+@@ -241,6 +276,13 @@ static int qsv_init_child_ctx(AVHWFrames
+         child_device_hwctx->display = (VADisplay)device_priv->handle;
+     }
+ #endif
++#if CONFIG_D3D11VA
++    if (child_device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
++        AVD3D11VADeviceContext *child_device_hwctx = child_device_ctx->hwctx;
++        ID3D11Device_AddRef((ID3D11Device*)device_priv->handle);
++        child_device_hwctx->device = (ID3D11Device*)device_priv->handle;
++    }
++#endif
+ #if CONFIG_DXVA2
+     if (child_device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) {
+         AVDXVA2DeviceContext *child_device_hwctx = child_device_ctx->hwctx;
+@@ -267,6 +309,16 @@ static int qsv_init_child_ctx(AVHWFrames
+     child_frames_ctx->width             = FFALIGN(ctx->width, 16);
+     child_frames_ctx->height            = FFALIGN(ctx->height, 16);
+ 
++#if CONFIG_D3D11VA
++    if (child_device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
++        AVD3D11VAFramesContext *child_frames_hwctx = child_frames_ctx->hwctx;
++        if (hwctx->frame_type == 0)
++            hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET;
++        if (hwctx->frame_type & MFX_MEMTYPE_SHARED_RESOURCE)
++            child_frames_hwctx->MiscFlags = D3D11_RESOURCE_MISC_SHARED;
++        child_frames_hwctx->BindFlags = qsv_get_d3d11va_bind_flags(hwctx->frame_type);
++    }
++#endif
+ #if CONFIG_DXVA2
+     if (child_device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) {
+         AVDXVA2FramesContext *child_frames_hwctx = child_frames_ctx->hwctx;
+@@ -286,16 +338,41 @@ static int qsv_init_child_ctx(AVHWFrames
+ #if CONFIG_VAAPI
+     if (child_device_ctx->type == AV_HWDEVICE_TYPE_VAAPI) {
+         AVVAAPIFramesContext *child_frames_hwctx = child_frames_ctx->hwctx;
+-        for (i = 0; i < ctx->initial_pool_size; i++)
+-            s->surfaces_internal[i].Data.MemId = child_frames_hwctx->surface_ids + i;
++        for (i = 0; i < ctx->initial_pool_size; i++) {
++            s->handle_pairs_internal[i].first = child_frames_hwctx->surface_ids + i;
++            s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE;
++            s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i];
++        }
+         hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+     }
+ #endif
++#if CONFIG_D3D11VA
++    if (child_device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
++        AVD3D11VAFramesContext *child_frames_hwctx = child_frames_ctx->hwctx;
++        for (i = 0; i < ctx->initial_pool_size; i++) {
++            s->handle_pairs_internal[i].first = (mfxMemId)child_frames_hwctx->texture_infos[i].texture;
++            if(child_frames_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) {
++                s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE;
++            } else {
++                s->handle_pairs_internal[i].second = (mfxMemId)child_frames_hwctx->texture_infos[i].index;
++            }
++            s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i];
++        }
++        if (child_frames_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) {
++            hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET;
++        } else {
++            hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
++        }
++    }
++#endif
+ #if CONFIG_DXVA2
+     if (child_device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) {
+         AVDXVA2FramesContext *child_frames_hwctx = child_frames_ctx->hwctx;
+-        for (i = 0; i < ctx->initial_pool_size; i++)
+-            s->surfaces_internal[i].Data.MemId = (mfxMemId)child_frames_hwctx->surfaces[i];
++        for (i = 0; i < ctx->initial_pool_size; i++) {
++            s->handle_pairs_internal[i].first = (mfxMemId)child_frames_hwctx->surfaces[i];
++            s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE;
++            s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i];
++        }
+         if (child_frames_hwctx->surface_type == DXVA2_VideoProcessorRenderTarget)
+             hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET;
+         else
+@@ -360,6 +437,11 @@ static int qsv_init_pool(AVHWFramesConte
+         return AVERROR(EINVAL);
+     }
+ 
++    s->handle_pairs_internal = av_mallocz_array(ctx->initial_pool_size,
++                                            sizeof(*s->handle_pairs_internal));
++    if (!s->handle_pairs_internal)
++        return AVERROR(ENOMEM);
++
+     s->surfaces_internal = av_mallocz_array(ctx->initial_pool_size,
+                                             sizeof(*s->surfaces_internal));
+     if (!s->surfaces_internal)
+@@ -433,7 +515,13 @@ static mfxStatus frame_unlock(mfxHDL pth
+ 
+ static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+ {
+-    *hdl = mid;
++    mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
++    mfxHDLPair *pair_src = (mfxHDLPair*)mid;
++
++    pair_dst->first = pair_src->first;
++
++    if (pair_src->second != (mfxMemId)MFX_INFINITE)
++        pair_dst->second = pair_src->second;
+     return MFX_ERR_NONE;
+ }
+ 
+@@ -626,13 +714,26 @@ static int qsv_frames_derive_from(AVHWFr
+                                                       sizeof(*dst_hwctx->surface_ids));
+             if (!dst_hwctx->surface_ids)
+                 return AVERROR(ENOMEM);
+-            for (i = 0; i < src_hwctx->nb_surfaces; i++)
+-                dst_hwctx->surface_ids[i] =
+-                    *(VASurfaceID*)src_hwctx->surfaces[i].Data.MemId;
++            for (i = 0; i < src_hwctx->nb_surfaces; i++) {
++                mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId;
++                dst_hwctx->surface_ids[i] = *(VASurfaceID*)pair->first;
++            }
+             dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces;
+         }
+         break;
+ #endif
++#if CONFIG_D3D11VA
++    case AV_HWDEVICE_TYPE_D3D11VA:
++        {
++            AVD3D11VAFramesContext *dst_hwctx = dst_ctx->hwctx;
++            mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId;
++            dst_hwctx->texture = (ID3D11Texture2D*)pair->first;
++            if (src_hwctx->frame_type & MFX_MEMTYPE_SHARED_RESOURCE)
++                dst_hwctx->MiscFlags = D3D11_RESOURCE_MISC_SHARED;
++            dst_hwctx->BindFlags = qsv_get_d3d11va_bind_flags(src_hwctx->frame_type);
++        }
++        break;
++#endif
+ #if CONFIG_DXVA2
+     case AV_HWDEVICE_TYPE_DXVA2:
+         {
+@@ -641,9 +742,10 @@ static int qsv_frames_derive_from(AVHWFr
+                                                    sizeof(*dst_hwctx->surfaces));
+             if (!dst_hwctx->surfaces)
+                 return AVERROR(ENOMEM);
+-            for (i = 0; i < src_hwctx->nb_surfaces; i++)
+-                dst_hwctx->surfaces[i] =
+-                    (IDirect3DSurface9*)src_hwctx->surfaces[i].Data.MemId;
++            for (i = 0; i < src_hwctx->nb_surfaces; i++) {
++                mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId;
++                dst_hwctx->surfaces[i] = (IDirect3DSurface9*)pair->first;
++            }
+             dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces;
+             if (src_hwctx->frame_type == MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET)
+                 dst_hwctx->surface_type = DXVA2_VideoDecoderRenderTarget;
+@@ -677,13 +779,31 @@ static int qsv_map_from(AVHWFramesContex
+     switch (child_frames_ctx->device_ctx->type) {
+ #if CONFIG_VAAPI
+     case AV_HWDEVICE_TYPE_VAAPI:
+-        child_data = (uint8_t*)(intptr_t)*(VASurfaceID*)surf->Data.MemId;
++    {
++        mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId;
++        /* pair->first is *VASurfaceID while data[3] in vaapi frame is VASurfaceID, so
++         * we need this casting for vaapi.
++         * Add intptr_t to force cast from VASurfaceID(uint) type to pointer(long) type
++         * to avoid compile warning */
++        child_data = (uint8_t*)(intptr_t)*(VASurfaceID*)pair->first;
+         break;
++    }
++#endif
++#if CONFIG_D3D11VA
++    case AV_HWDEVICE_TYPE_D3D11VA:
++    {
++        mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId;
++        child_data = pair->first;
++        break;
++    }
+ #endif
+ #if CONFIG_DXVA2
+     case AV_HWDEVICE_TYPE_DXVA2:
+-        child_data = surf->Data.MemId;
++    {
++        mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId;
++        child_data = pair->first;
+         break;
++    }
+ #endif
+     default:
+         return AVERROR(ENOSYS);
+@@ -697,7 +817,14 @@ static int qsv_map_from(AVHWFramesContex
+ 
+         dst->width   = src->width;
+         dst->height  = src->height;
+-        dst->data[3] = child_data;
++
++       if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
++            mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId;
++            dst->data[0] = pair->first;
++            dst->data[1] = pair->second;
++        } else {
++            dst->data[3] = child_data;
++        }
+ 
+         return 0;
+     }
+@@ -720,7 +847,14 @@ static int qsv_map_from(AVHWFramesContex
+     dummy->format        = child_frames_ctx->format;
+     dummy->width         = src->width;
+     dummy->height        = src->height;
+-    dummy->data[3]       = child_data;
++
++    if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
++        mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId;
++        dummy->data[0] = pair->first;
++        dummy->data[1] = pair->second;
++    } else {
++        dummy->data[3] = child_data;
++    }
+ 
+     ret = av_hwframe_map(dst, dummy, flags);
+ 
+@@ -978,35 +1112,84 @@ static int qsv_frames_derive_to(AVHWFram
+     AVQSVFramesContext *dst_hwctx = dst_ctx->hwctx;
+     int i;
+ 
++    if (src_ctx->initial_pool_size == 0) {
++        av_log(dst_ctx, AV_LOG_ERROR, "Only fixed-size pools can be "
++            "mapped to QSV frames.\n");
++        return AVERROR(EINVAL);
++    }
++
+     switch (src_ctx->device_ctx->type) {
+ #if CONFIG_VAAPI
+     case AV_HWDEVICE_TYPE_VAAPI:
+         {
+             AVVAAPIFramesContext *src_hwctx = src_ctx->hwctx;
++            s->handle_pairs_internal = av_mallocz_array(src_hwctx->nb_surfaces,
++                                                        sizeof(*s->handle_pairs_internal));
++            if (!s->handle_pairs_internal)
++                return AVERROR(ENOMEM);
+             s->surfaces_internal = av_mallocz_array(src_hwctx->nb_surfaces,
+                                                     sizeof(*s->surfaces_internal));
+             if (!s->surfaces_internal)
+                 return AVERROR(ENOMEM);
+             for (i = 0; i < src_hwctx->nb_surfaces; i++) {
+                 qsv_init_surface(dst_ctx, &s->surfaces_internal[i]);
+-                s->surfaces_internal[i].Data.MemId = src_hwctx->surface_ids + i;
++                s->handle_pairs_internal[i].first = src_hwctx->surface_ids + i;
++                s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE;
++                s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i];
+             }
+             dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces;
+             dst_hwctx->frame_type  = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+         }
+         break;
+ #endif
++#if CONFIG_D3D11VA
++    case AV_HWDEVICE_TYPE_D3D11VA:
++        {
++            AVD3D11VAFramesContext *src_hwctx = src_ctx->hwctx;
++            s->handle_pairs_internal = av_mallocz_array(src_hwctx->nb_surfaces,
++                                                        sizeof(*s->handle_pairs_internal));
++            if (!s->handle_pairs_internal)
++                return AVERROR(ENOMEM);
++            s->surfaces_internal = av_mallocz_array(src_hwctx->nb_surfaces,
++                                                    sizeof(*s->surfaces_internal));
++            if (!s->surfaces_internal)
++                return AVERROR(ENOMEM);
++            for (i = 0; i < src_hwctx->nb_surfaces; i++) {
++                qsv_init_surface(dst_ctx, &s->surfaces_internal[i]);
++                s->handle_pairs_internal[i].first = (mfxMemId)src_hwctx->texture_infos[i].texture;
++                if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) {
++                    s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE;
++                } else {
++                    s->handle_pairs_internal[i].second = (mfxMemId)src_hwctx->texture_infos[i].index;
++                }
++                s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i];
++            }
++            dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces;
++            if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) {
++                dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET;
++            } else {
++                dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
++            }
++        }
++        break;
++#endif
+ #if CONFIG_DXVA2
+     case AV_HWDEVICE_TYPE_DXVA2:
+         {
+             AVDXVA2FramesContext *src_hwctx = src_ctx->hwctx;
++            s->handle_pairs_internal = av_mallocz_array(src_hwctx->nb_surfaces,
++                                                        sizeof(*s->handle_pairs_internal));
++            if (!s->handle_pairs_internal)
++                return AVERROR(ENOMEM);
+             s->surfaces_internal = av_mallocz_array(src_hwctx->nb_surfaces,
+                                                     sizeof(*s->surfaces_internal));
+             if (!s->surfaces_internal)
+                 return AVERROR(ENOMEM);
+             for (i = 0; i < src_hwctx->nb_surfaces; i++) {
+                 qsv_init_surface(dst_ctx, &s->surfaces_internal[i]);
+-                s->surfaces_internal[i].Data.MemId = (mfxMemId)src_hwctx->surfaces[i];
++                s->handle_pairs_internal[i].first = (mfxMemId)src_hwctx->surfaces[i];
++                s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE;
++                s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i];
+             }
+             dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces;
+             if (src_hwctx->surface_type == DXVA2_VideoProcessorRenderTarget)
+@@ -1029,21 +1212,44 @@ static int qsv_map_to(AVHWFramesContext
+                       AVFrame *dst, const AVFrame *src, int flags)
+ {
+     AVQSVFramesContext *hwctx = dst_ctx->hwctx;
+-    int i, err;
++    int i, err, index = -1;
+ 
+-    for (i = 0; i < hwctx->nb_surfaces; i++) {
++    for (i = 0; i < hwctx->nb_surfaces && index < 0; i++) {
++        switch(src->format) {
+ #if CONFIG_VAAPI
+-        if (*(VASurfaceID*)hwctx->surfaces[i].Data.MemId ==
+-            (VASurfaceID)(uintptr_t)src->data[3])
+-            break;
++        case AV_PIX_FMT_VAAPI:
++        {
++            mfxHDLPair *pair = (mfxHDLPair*)hwctx->surfaces[i].Data.MemId;
++            if (*(VASurfaceID*)pair->first == (VASurfaceID)src->data[3]) {
++                index = i;
++                break;
++            }
++        }
++#endif
++#if CONFIG_D3D11VA
++        case AV_PIX_FMT_D3D11:
++        {
++            mfxHDLPair *pair = (mfxHDLPair*)hwctx->surfaces[i].Data.MemId;
++            if (pair->first == src->data[0]
++                && pair->second == src->data[1]) {
++                index = i;
++                break;
++            }
++        }
+ #endif
+ #if CONFIG_DXVA2
+-        if ((IDirect3DSurface9*)hwctx->surfaces[i].Data.MemId ==
+-            (IDirect3DSurface9*)(uintptr_t)src->data[3])
+-            break;
++        case AV_PIX_FMT_DXVA2_VLD:
++        {
++            mfxHDLPair *pair = (mfxHDLPair*)hwctx->surfaces[i].Data.MemId;
++            if (pair->first == src->data[3]) {
++                index = i;
++                break;
++            }
++        }
+ #endif
++        }
+     }
+-    if (i >= hwctx->nb_surfaces) {
++    if (index < 0) {
+         av_log(dst_ctx, AV_LOG_ERROR, "Trying to map from a surface which "
+                "is not in the mapped frames context.\n");
+         return AVERROR(EINVAL);
+@@ -1056,7 +1262,7 @@ static int qsv_map_to(AVHWFramesContext
+ 
+     dst->width   = src->width;
+     dst->height  = src->height;
+-    dst->data[3] = (uint8_t*)&hwctx->surfaces[i];
++    dst->data[3] = (uint8_t*)&hwctx->surfaces[index];
+ 
+     return 0;
+ }
+@@ -1098,7 +1304,7 @@ static void qsv_device_free(AVHWDeviceCo
+     av_freep(&priv);
+ }
+ 
+-static mfxIMPL choose_implementation(const char *device)
++static mfxIMPL choose_implementation(const char *device, enum AVHWDeviceType child_device_type)
+ {
+     static const struct {
+         const char *name;
+@@ -1127,6 +1333,13 @@ static mfxIMPL choose_implementation(con
+             impl = strtol(device, NULL, 0);
+     }
+ 
++    if (impl != MFX_IMPL_SOFTWARE) {
++        if (child_device_type == AV_HWDEVICE_TYPE_D3D11VA)
++            impl |= MFX_IMPL_VIA_D3D11;
++        else if (child_device_type == AV_HWDEVICE_TYPE_DXVA2)
++            impl |= MFX_IMPL_VIA_D3D9;
++    }
++
+     return impl;
+ }
+ 
+@@ -1153,6 +1366,15 @@ static int qsv_device_derive_from_child(
+         }
+         break;
+ #endif
++#if CONFIG_D3D11VA
++    case AV_HWDEVICE_TYPE_D3D11VA:
++        {
++            AVD3D11VADeviceContext *child_device_hwctx = child_device_ctx->hwctx;
++            handle_type = MFX_HANDLE_D3D11_DEVICE;
++            handle = (mfxHDL)child_device_hwctx->device;
++        }
++        break;
++#endif
+ #if CONFIG_DXVA2
+     case AV_HWDEVICE_TYPE_DXVA2:
+         {
+@@ -1216,7 +1438,9 @@ static int qsv_device_derive(AVHWDeviceC
+                              AVHWDeviceContext *child_device_ctx,
+                              AVDictionary *opts, int flags)
+ {
+-    return qsv_device_derive_from_child(ctx, MFX_IMPL_HARDWARE_ANY,
++    mfxIMPL impl;
++    impl = choose_implementation("hw_any", child_device_ctx->type);
++    return qsv_device_derive_from_child(ctx, impl,
+                                         child_device_ctx, flags);
+ }
+ 
+@@ -1239,25 +1463,57 @@ static int qsv_device_create(AVHWDeviceC
+     ctx->user_opaque = priv;
+     ctx->free        = qsv_device_free;
+ 
+-    e = av_dict_get(opts, "child_device", NULL, 0);
+-
+-    child_device_opts = NULL;
+-    if (CONFIG_VAAPI) {
++    e = av_dict_get(opts, "child_device_type", NULL, 0);
++    if (e) {
++        child_device_type = av_hwdevice_find_type_by_name(e ? e->value : NULL);
++        if (child_device_type == AV_HWDEVICE_TYPE_NONE) {
++            av_log(ctx, AV_LOG_ERROR, "Unknown child device type "
++                   "\"%s\".\n", e ? e->value : NULL);
++            return AVERROR(EINVAL);
++        }
++    } else if (CONFIG_VAAPI) {
+         child_device_type = AV_HWDEVICE_TYPE_VAAPI;
+-        // libmfx does not actually implement VAAPI properly, rather it
+-        // depends on the specific behaviour of a matching iHD driver when
+-        // used on recent Intel hardware.  Set options to the VAAPI device
+-        // creation so that we should pick a usable setup by default if
+-        // possible, even when multiple devices and drivers are available.
+-        av_dict_set(&child_device_opts, "kernel_driver", "i915", 0);
+-        av_dict_set(&child_device_opts, "driver",        "iHD",  0);
+-    } else if (CONFIG_DXVA2)
++    } else if (CONFIG_D3D11VA) {
++        child_device_type = AV_HWDEVICE_TYPE_D3D11VA;
++    } else if (CONFIG_DXVA2) {
+         child_device_type = AV_HWDEVICE_TYPE_DXVA2;
+-    else {
++    } else {
+         av_log(ctx, AV_LOG_ERROR, "No supported child device type is enabled\n");
+         return AVERROR(ENOSYS);
+     }
+ 
++    child_device_opts = NULL;
++    switch (child_device_type) {
++#if CONFIG_VAAPI
++    case AV_HWDEVICE_TYPE_VAAPI:
++        {
++            // libmfx does not actually implement VAAPI properly, rather it
++            // depends on the specific behaviour of a matching iHD driver when
++            // used on recent Intel hardware.  Set options to the VAAPI device
++            // creation so that we should pick a usable setup by default if
++            // possible, even when multiple devices and drivers are available.
++            av_dict_set(&child_device_opts, "kernel_driver", "i915", 0);
++            av_dict_set(&child_device_opts, "driver",        "iHD",  0);
++        }
++        break;
++#endif
++#if CONFIG_D3D11VA
++    case AV_HWDEVICE_TYPE_D3D11VA:
++        break;
++#endif
++#if CONFIG_DXVA2
++    case AV_HWDEVICE_TYPE_DXVA2:
++        break;
++#endif
++    default:
++        {
++            av_log(ctx, AV_LOG_ERROR, "No supported child device type is enabled\n");
++            return AVERROR(ENOSYS);
++        }
++        break;
++    }
++
++    e = av_dict_get(opts, "child_device", NULL, 0);
+     ret = av_hwdevice_ctx_create(&priv->child_device_ctx, child_device_type,
+                                  e ? e->value : NULL, child_device_opts, 0);
+ 
+@@ -1267,9 +1523,15 @@ static int qsv_device_create(AVHWDeviceC
+ 
+     child_device = (AVHWDeviceContext*)priv->child_device_ctx->data;
+ 
+-    impl = choose_implementation(device);
++    impl = choose_implementation(device, child_device_type);
+ 
+-    return qsv_device_derive_from_child(ctx, impl, child_device, 0);
++    ret = qsv_device_derive_from_child(ctx, impl, child_device, 0);
++    if (ret == 0) {
++        ctx->internal->source_device = av_buffer_ref(priv->child_device_ctx);
++        if (!ctx->internal->source_device)
++            ret = AVERROR(ENOMEM);
++    }
++    return ret;
+ }
+ 
+ const HWContextType ff_hwcontext_type_qsv = {
diff --git a/debian/patches/series b/debian/patches/series
index f1b2f9672ba..e214bc44f08 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -8,3 +8,4 @@
 0008-add-d3d11-opencl-interop-for-AMD.patch
 0009-add-a-hack-for-opencl-reverse-mapping.patch
 0010-add-fixes-for-ffmpeg_hw.patch
+0011-add-d3d11-support-for-QSV.patch

From 930f8da233592d2f8940a728327c2d9be3357280 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:40:14 +0800
Subject: [PATCH 21/41] add hw_device_ctx support for qsvdec

---
 ...add-hw_device_ctx-support-for-qsvdec.patch | 271 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 272 insertions(+)
 create mode 100644 debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch

diff --git a/debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch b/debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch
new file mode 100644
index 00000000000..8b440fd002b
--- /dev/null
+++ b/debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch
@@ -0,0 +1,271 @@
+Index: jellyfin-ffmpeg/fftools/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/fftools/Makefile
++++ jellyfin-ffmpeg/fftools/Makefile
+@@ -10,7 +10,6 @@ ALLAVPROGS   = $(AVBASENAMES:%=%$(PROGSS
+ ALLAVPROGS_G = $(AVBASENAMES:%=%$(PROGSSUF)_g$(EXESUF))
+ 
+ OBJS-ffmpeg                        += fftools/ffmpeg_opt.o fftools/ffmpeg_filter.o fftools/ffmpeg_hw.o
+-OBJS-ffmpeg-$(CONFIG_LIBMFX)       += fftools/ffmpeg_qsv.o
+ ifndef CONFIG_VIDEOTOOLBOX
+ OBJS-ffmpeg-$(CONFIG_VDA)          += fftools/ffmpeg_videotoolbox.o
+ endif
+Index: jellyfin-ffmpeg/fftools/ffmpeg.h
+===================================================================
+--- jellyfin-ffmpeg.orig/fftools/ffmpeg.h
++++ jellyfin-ffmpeg/fftools/ffmpeg.h
+@@ -60,7 +60,6 @@ enum HWAccelID {
+     HWACCEL_AUTO,
+     HWACCEL_GENERIC,
+     HWACCEL_VIDEOTOOLBOX,
+-    HWACCEL_QSV,
+ };
+ 
+ typedef struct HWAccel {
+Index: jellyfin-ffmpeg/fftools/ffmpeg_hw.c
+===================================================================
+--- jellyfin-ffmpeg.orig/fftools/ffmpeg_hw.c
++++ jellyfin-ffmpeg/fftools/ffmpeg_hw.c
+@@ -353,6 +353,18 @@ int hw_device_setup_for_decode(InputStre
+         } else if (ist->hwaccel_id == HWACCEL_GENERIC) {
+             type = ist->hwaccel_device_type;
+             dev = hw_device_get_by_type(type);
++
++            // When "-qsv_device device" is used, an internal QSV device named
++            // as "__qsv_device" is created. Another QSV device is created too
++            // if "-init_hw_device qsv=name:device" is used. There are 2 QSV devices
++            // if both "-qsv_device device" and "-init_hw_device qsv=name:device"
++            // are used, hw_device_get_by_type(AV_HWDEVICE_TYPE_QSV) returns NULL.
++            // To keep back-compatibility with the removed ad-hoc libmfx setup code,
++            // call hw_device_get_by_name("__qsv_device") to select the internal QSV
++            // device.
++            if (!dev && type == AV_HWDEVICE_TYPE_QSV)
++                dev = hw_device_get_by_name("__qsv_device");
++
+             if (!dev)
+                 err = hw_device_init_from_type(type, NULL, &dev);
+         } else {
+Index: jellyfin-ffmpeg/fftools/ffmpeg_opt.c
+===================================================================
+--- jellyfin-ffmpeg.orig/fftools/ffmpeg_opt.c
++++ jellyfin-ffmpeg/fftools/ffmpeg_opt.c
+@@ -137,9 +137,6 @@ const HWAccel hwaccels[] = {
+ #if CONFIG_VIDEOTOOLBOX
+     { "videotoolbox", videotoolbox_init, HWACCEL_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX },
+ #endif
+-#if CONFIG_LIBMFX
+-    { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
+-#endif
+     { 0 },
+ };
+ HWDevice *filter_hw_device;
+@@ -569,6 +566,23 @@ static int opt_vaapi_device(void *optctx
+ }
+ #endif
+ 
++#if CONFIG_QSV
++static int opt_qsv_device(void *optctx, const char *opt, const char *arg)
++{
++    const char *prefix = "qsv=__qsv_device:hw_any,child_device=";
++    int err;
++    char *tmp = av_asprintf("%s%s", prefix, arg);
++
++    if (!tmp)
++        return AVERROR(ENOMEM);
++
++    err = hw_device_init_from_string(tmp, NULL);
++    av_free(tmp);
++
++    return err;
++}
++#endif
++
+ static int opt_init_hw_device(void *optctx, const char *opt, const char *arg)
+ {
+     if (!strcmp(arg, "list")) {
+@@ -893,6 +907,12 @@ static void add_input_streams(OptionsCon
+                     "with old commandlines. This behaviour is DEPRECATED and will be removed "
+                     "in the future. Please explicitly set \"-hwaccel_output_format cuda\".\n");
+                 ist->hwaccel_output_format = AV_PIX_FMT_CUDA;
++            } else if (!hwaccel_output_format && hwaccel && !strcmp(hwaccel, "qsv")) {
++                av_log(NULL, AV_LOG_WARNING,
++                    "WARNING: defaulting hwaccel_output_format to qsv for compatibility "
++                    "with old commandlines. This behaviour is DEPRECATED and will be removed "
++                    "in the future. Please explicitly set \"-hwaccel_output_format qsv\".\n");
++                ist->hwaccel_output_format = AV_PIX_FMT_QSV;
+             } else if (hwaccel_output_format) {
+                 ist->hwaccel_output_format = av_get_pix_fmt(hwaccel_output_format);
+                 if (ist->hwaccel_output_format == AV_PIX_FMT_NONE) {
+@@ -3814,7 +3834,7 @@ const OptionDef options[] = {
+ #endif
+ 
+ #if CONFIG_QSV
+-    { "qsv_device", HAS_ARG | OPT_STRING | OPT_EXPERT, { &qsv_device },
++    { "qsv_device", HAS_ARG | OPT_EXPERT, { .func_arg = opt_qsv_device },
+         "set QSV hardware device (DirectX adapter index, DRM path or X11 display name)", "device"},
+ #endif
+ 
+Index: jellyfin-ffmpeg/fftools/ffmpeg_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/fftools/ffmpeg_qsv.c
++++ /dev/null
+@@ -1,110 +0,0 @@
+-/*
+- * This file is part of FFmpeg.
+- *
+- * FFmpeg is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU Lesser General Public
+- * License as published by the Free Software Foundation; either
+- * version 2.1 of the License, or (at your option) any later version.
+- *
+- * FFmpeg is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+- * Lesser General Public License for more details.
+- *
+- * You should have received a copy of the GNU Lesser General Public
+- * License along with FFmpeg; if not, write to the Free Software
+- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+- */
+-
+-#include <mfx/mfxvideo.h>
+-#include <stdlib.h>
+-
+-#include "libavutil/dict.h"
+-#include "libavutil/hwcontext.h"
+-#include "libavutil/hwcontext_qsv.h"
+-#include "libavutil/mem.h"
+-#include "libavutil/opt.h"
+-#include "libavcodec/qsv.h"
+-
+-#include "ffmpeg.h"
+-
+-static AVBufferRef *hw_device_ctx;
+-char *qsv_device = NULL;
+-
+-static int qsv_get_buffer(AVCodecContext *s, AVFrame *frame, int flags)
+-{
+-    InputStream *ist = s->opaque;
+-
+-    return av_hwframe_get_buffer(ist->hw_frames_ctx, frame, 0);
+-}
+-
+-static void qsv_uninit(AVCodecContext *s)
+-{
+-    InputStream *ist = s->opaque;
+-    av_buffer_unref(&ist->hw_frames_ctx);
+-}
+-
+-static int qsv_device_init(InputStream *ist)
+-{
+-    int err;
+-    AVDictionary *dict = NULL;
+-
+-    if (qsv_device) {
+-        err = av_dict_set(&dict, "child_device", qsv_device, 0);
+-        if (err < 0)
+-            return err;
+-    }
+-
+-    err = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV,
+-                                 ist->hwaccel_device, dict, 0);
+-    if (err < 0) {
+-        av_log(NULL, AV_LOG_ERROR, "Error creating a QSV device\n");
+-        goto err_out;
+-    }
+-
+-err_out:
+-    if (dict)
+-        av_dict_free(&dict);
+-
+-    return err;
+-}
+-
+-int qsv_init(AVCodecContext *s)
+-{
+-    InputStream *ist = s->opaque;
+-    AVHWFramesContext *frames_ctx;
+-    AVQSVFramesContext *frames_hwctx;
+-    int ret;
+-
+-    if (!hw_device_ctx) {
+-        ret = qsv_device_init(ist);
+-        if (ret < 0)
+-            return ret;
+-    }
+-
+-    av_buffer_unref(&ist->hw_frames_ctx);
+-    ist->hw_frames_ctx = av_hwframe_ctx_alloc(hw_device_ctx);
+-    if (!ist->hw_frames_ctx)
+-        return AVERROR(ENOMEM);
+-
+-    frames_ctx   = (AVHWFramesContext*)ist->hw_frames_ctx->data;
+-    frames_hwctx = frames_ctx->hwctx;
+-
+-    frames_ctx->width             = FFALIGN(s->coded_width,  32);
+-    frames_ctx->height            = FFALIGN(s->coded_height, 32);
+-    frames_ctx->format            = AV_PIX_FMT_QSV;
+-    frames_ctx->sw_format         = s->sw_pix_fmt;
+-    frames_ctx->initial_pool_size = 64 + s->extra_hw_frames;
+-    frames_hwctx->frame_type      = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+-
+-    ret = av_hwframe_ctx_init(ist->hw_frames_ctx);
+-    if (ret < 0) {
+-        av_log(NULL, AV_LOG_ERROR, "Error initializing a QSV frame pool\n");
+-        return ret;
+-    }
+-
+-    ist->hwaccel_get_buffer = qsv_get_buffer;
+-    ist->hwaccel_uninit     = qsv_uninit;
+-
+-    return 0;
+-}
+Index: jellyfin-ffmpeg/libavcodec/qsvdec.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsvdec.c
++++ jellyfin-ffmpeg/libavcodec/qsvdec.c
+@@ -89,7 +89,7 @@ static const AVCodecHWConfigInternal *co
+         .public = {
+             .pix_fmt     = AV_PIX_FMT_QSV,
+             .methods     = AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX |
+-                           AV_CODEC_HW_CONFIG_METHOD_AD_HOC,
++                           AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX,
+             .device_type = AV_HWDEVICE_TYPE_QSV,
+         },
+         .hwaccel = NULL,
+@@ -238,6 +238,35 @@ static int qsv_decode_preinit(AVCodecCon
+         q->nb_ext_buffers = user_ctx->nb_ext_buffers;
+     }
+ 
++    if (avctx->hw_device_ctx && !avctx->hw_frames_ctx && ret == AV_PIX_FMT_QSV) {
++        AVHWFramesContext *hwframes_ctx;
++        AVQSVFramesContext *frames_hwctx;
++
++        avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
++
++        if (!avctx->hw_frames_ctx) {
++            av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
++            return AVERROR(ENOMEM);
++        }
++
++        hwframes_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
++        frames_hwctx = hwframes_ctx->hwctx;
++        hwframes_ctx->width             = FFALIGN(avctx->coded_width,  32);
++        hwframes_ctx->height            = FFALIGN(avctx->coded_height, 32);
++        hwframes_ctx->format            = AV_PIX_FMT_QSV;
++        hwframes_ctx->sw_format         = avctx->sw_pix_fmt;
++        hwframes_ctx->initial_pool_size = 64 + avctx->extra_hw_frames;
++        frames_hwctx->frame_type        = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
++
++        ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
++
++        if (ret < 0) {
++            av_log(NULL, AV_LOG_ERROR, "Error initializing a QSV frame pool\n");
++            av_buffer_unref(&avctx->hw_frames_ctx);
++            return ret;
++        }
++    }
++
+     if (avctx->hw_frames_ctx) {
+         AVHWFramesContext    *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+         AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
diff --git a/debian/patches/series b/debian/patches/series
index e214bc44f08..54d9308e99c 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -9,3 +9,4 @@
 0009-add-a-hack-for-opencl-reverse-mapping.patch
 0010-add-fixes-for-ffmpeg_hw.patch
 0011-add-d3d11-support-for-QSV.patch
+0012-add-hw_device_ctx-support-for-qsvdec.patch

From 1accfc76cbc8a0cab83dd6c4e874e358e75f6ea2 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Fri, 17 Dec 2021 23:51:48 +0800
Subject: [PATCH 22/41] add qsv(d3d11)-opencl interop

---
 .../0013-add-qsv-d3d11-opencl-interop.patch   | 321 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 322 insertions(+)
 create mode 100644 debian/patches/0013-add-qsv-d3d11-opencl-interop.patch

diff --git a/debian/patches/0013-add-qsv-d3d11-opencl-interop.patch b/debian/patches/0013-add-qsv-d3d11-opencl-interop.patch
new file mode 100644
index 00000000000..97552fbb374
--- /dev/null
+++ b/debian/patches/0013-add-qsv-d3d11-opencl-interop.patch
@@ -0,0 +1,321 @@
+Index: jellyfin-ffmpeg/libavutil/hwcontext_opencl.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_opencl.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_opencl.c
+@@ -62,6 +62,9 @@
+ #endif
+ 
+ #if HAVE_OPENCL_D3D11
++#if CONFIG_LIBMFX
++#include "hwcontext_qsv.h"
++#endif
+ #include <CL/cl_d3d11.h>
+ #include "hwcontext_d3d11va.h"
+ 
+@@ -122,6 +125,7 @@ typedef struct OpenCLDeviceContext {
+ 
+ #if HAVE_OPENCL_D3D11
+     int d3d11_mapping_usable;
++    int d3d11_qsv_mapping_usable;
+     int d3d11_map_amd;
+     int d3d11_map_intel;
+ 
+@@ -876,6 +880,11 @@ static int opencl_device_init(AVHWDevice
+             priv->d3d11_mapping_usable = 0;
+         } else {
+             priv->d3d11_mapping_usable = 1;
++
++            if (priv->d3d11_map_intel)
++                priv->d3d11_qsv_mapping_usable = 1;
++            else
++                priv->d3d11_qsv_mapping_usable = 0;
+         }
+     }
+ #endif
+@@ -1746,18 +1755,20 @@ static void opencl_frames_uninit(AVHWFra
+ 
+ #if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11
+     int i, p;
+-    for (i = 0; i < priv->nb_mapped_frames; i++) {
+-        AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i];
+-        for (p = 0; p < desc->nb_planes; p++) {
+-            cle = clReleaseMemObject(desc->planes[p]);
+-            if (cle != CL_SUCCESS) {
+-                av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped "
+-                       "frame object (frame %d plane %d): %d.\n",
+-                       i, p, cle);
++    if (priv->nb_mapped_frames && priv->mapped_frames) {
++        for (i = 0; i < priv->nb_mapped_frames; i++) {
++            AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i];
++            for (p = 0; p < desc->nb_planes; p++) {
++                cle = clReleaseMemObject(desc->planes[p]);
++                if (cle != CL_SUCCESS) {
++                    av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped "
++                           "frame object (frame %d plane %d): %d.\n",
++                           i, p, cle);
++                }
+             }
+         }
++        av_freep(&priv->mapped_frames);
+     }
+-    av_freep(&priv->mapped_frames);
+ #endif
+ 
+     if (priv->command_queue) {
+@@ -2526,6 +2537,225 @@ fail:
+ 
+ #if HAVE_OPENCL_D3D11
+ 
++#if CONFIG_LIBMFX
++
++static void opencl_unmap_from_d3d11_qsv(AVHWFramesContext *dst_fc,
++                                        HWMapDescriptor *hwmap)
++{
++    AVOpenCLFrameDescriptor    *desc = hwmap->priv;
++    OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv;
++    OpenCLFramesContext *frames_priv = dst_fc->internal->priv;
++    cl_event event;
++    cl_int cle;
++    int p;
++
++    av_log(dst_fc, AV_LOG_DEBUG, "Unmap QSV surface from OpenCL.\n");
++
++    cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR(
++        frames_priv->command_queue, desc->nb_planes, desc->planes,
++        0, NULL, &event);
++    if (cle != CL_SUCCESS) {
++        av_log(dst_fc, AV_LOG_ERROR, "Failed to release texture "
++               "handle: %d.\n", cle);
++    }
++
++    opencl_wait_events(dst_fc, &event, 1);
++
++    if (!frames_priv->nb_mapped_frames && !frames_priv->mapped_frames) {
++        for (p = 0; p < desc->nb_planes; p++) {
++            cle = clReleaseMemObject(desc->planes[p]);
++            if (cle != CL_SUCCESS) {
++                av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL "
++                       "image of plane %d of D3D11 texture: %d\n",
++                       p, cle);
++            }
++        }
++        av_freep(&desc);
++    }
++}
++
++static int opencl_map_from_d3d11_qsv(AVHWFramesContext *dst_fc, AVFrame *dst,
++                                     const AVFrame *src, int flags)
++{
++    AVOpenCLDeviceContext    *dst_dev = dst_fc->device_ctx->hwctx;
++    OpenCLDeviceContext  *device_priv = dst_fc->device_ctx->internal->priv;
++    OpenCLFramesContext  *frames_priv = dst_fc->internal->priv;
++    mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3];
++    mfxHDLPair *pair = (mfxHDLPair*)mfx_surface->Data.MemId;
++    ID3D11Texture2D *tex = (ID3D11Texture2D*)pair->first;
++    AVOpenCLFrameDescriptor *desc;
++    cl_mem_flags cl_flags;
++    cl_event event;
++    cl_int cle;
++    int err, p, index, decoder_target;
++
++    cl_flags = opencl_mem_flags_for_mapping(flags);
++    if (!cl_flags)
++        return AVERROR(EINVAL);
++
++    av_log(dst_fc, AV_LOG_DEBUG, "Map QSV surface %#x to OpenCL.\n", pair);
++
++    index = (intptr_t)pair->second;
++    decoder_target = index >= 0 && index != MFX_INFINITE;
++
++    if (decoder_target && index >= frames_priv->nb_mapped_frames) {
++        av_log(dst_fc, AV_LOG_ERROR, "Texture array index out of range for "
++               "mapping: %d >= %d.\n", index, frames_priv->nb_mapped_frames);
++        return AVERROR(EINVAL);
++    }
++
++    if (decoder_target) {
++        desc = &frames_priv->mapped_frames[index];
++    } else {
++        desc = av_mallocz(sizeof(*desc));
++        if (!desc)
++            return AVERROR(ENOMEM);
++
++        desc->nb_planes = 2;
++        for (p = 0; p < desc->nb_planes; p++) {
++            desc->planes[p] =
++                device_priv->clCreateFromD3D11Texture2DKHR(
++                    dst_dev->context, cl_flags, tex,
++                    p, &cle);
++            if (!desc->planes[p]) {
++                av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
++                       "image from plane %d of D3D11 texture: %d.\n",
++                       p, cle);
++                err = AVERROR(EIO);
++                goto fail2;
++            }
++        }
++    }
++
++    cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR(
++        frames_priv->command_queue, desc->nb_planes, desc->planes,
++        0, NULL, &event);
++    if (cle != CL_SUCCESS) {
++        av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture "
++               "handle: %d.\n", cle);
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    err = opencl_wait_events(dst_fc, &event, 1);
++    if (err < 0)
++        goto fail;
++
++    for (p = 0; p < desc->nb_planes; p++)
++        dst->data[p] = (uint8_t*)desc->planes[p];
++
++    err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
++                                &opencl_unmap_from_d3d11_qsv, desc);
++    if (err < 0)
++        goto fail;
++
++    dst->width  = src->width;
++    dst->height = src->height;
++
++    return 0;
++
++fail:
++    cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR(
++        frames_priv->command_queue, desc->nb_planes, desc->planes,
++        0, NULL, &event);
++    if (cle == CL_SUCCESS)
++        opencl_wait_events(dst_fc, &event, 1);
++fail2:
++    if (!decoder_target) {
++        for (p = 0; p < desc->nb_planes; p++) {
++            if (desc->planes[p])
++                clReleaseMemObject(desc->planes[p]);
++        }
++        av_freep(&desc);
++    }
++    return err;
++}
++
++static int opencl_frames_derive_from_d3d11_qsv(AVHWFramesContext *dst_fc,
++                                               AVHWFramesContext *src_fc, int flags)
++{
++    AVOpenCLDeviceContext    *dst_dev = dst_fc->device_ctx->hwctx;
++    AVQSVFramesContext     *src_hwctx = src_fc->hwctx;
++    OpenCLDeviceContext  *device_priv = dst_fc->device_ctx->internal->priv;
++    OpenCLFramesContext  *frames_priv = dst_fc->internal->priv;
++    cl_mem_flags cl_flags;
++    cl_int cle;
++    int err, i, p, nb_planes = 2;
++
++    mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId;
++    ID3D11Texture2D *tex = (ID3D11Texture2D*)pair->first;
++
++    if (src_fc->sw_format != AV_PIX_FMT_NV12 &&
++        src_fc->sw_format != AV_PIX_FMT_P010) {
++        av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are "
++               "supported for QSV with D3D11 to OpenCL mapping.\n");
++        return AVERROR(EINVAL);
++    }
++
++    if (src_fc->initial_pool_size == 0) {
++        av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported "
++               "for QSV with D3D11 to OpenCL mapping.\n");
++        return AVERROR(EINVAL);
++    }
++
++    if (!(src_hwctx->frame_type & MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET) ||
++        (src_hwctx->frame_type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) ||
++        (src_hwctx->frame_type & MFX_MEMTYPE_FROM_VPPOUT)) {
++        av_log(dst_fc, AV_LOG_DEBUG, "Non-DECODER_TARGET direct input for QSV "
++               "with D3D11 to OpenCL mapping.\n");
++        return 0;
++    }
++
++    cl_flags = opencl_mem_flags_for_mapping(flags);
++    if (!cl_flags)
++        return AVERROR(EINVAL);
++
++    frames_priv->nb_mapped_frames = src_fc->initial_pool_size;
++
++    frames_priv->mapped_frames =
++        av_mallocz_array(frames_priv->nb_mapped_frames,
++                         sizeof(*frames_priv->mapped_frames));
++    if (!frames_priv->mapped_frames)
++        return AVERROR(ENOMEM);
++
++    for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
++        AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
++        desc->nb_planes = nb_planes;
++
++        for (p = 0; p < nb_planes; p++) {
++            UINT subresource = 2 * i + p;
++            desc->planes[p] =
++                device_priv->clCreateFromD3D11Texture2DKHR(
++                    dst_dev->context, cl_flags, tex,
++                    subresource, &cle);
++            if (!desc->planes[p]) {
++                av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
++                       "image from plane %d of D3D11 texture "
++                       "index %d (subresource %u): %d.\n",
++                       p, i, (unsigned int)subresource, cle);
++                err = AVERROR(EIO);
++                goto fail;
++            }
++        }
++    }
++
++    return 0;
++
++fail:
++    for (i = 0; i < frames_priv->nb_mapped_frames; i++) {
++        AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i];
++        for (p = 0; p < desc->nb_planes; p++) {
++            if (desc->planes[p])
++                clReleaseMemObject(desc->planes[p]);
++        }
++    }
++    av_freep(&frames_priv->mapped_frames);
++    frames_priv->nb_mapped_frames = 0;
++    return err;
++}
++
++#endif
++
+ static void opencl_unmap_from_d3d11(AVHWFramesContext *dst_fc,
+                                     HWMapDescriptor *hwmap)
+ {
+@@ -2966,6 +3196,11 @@ static int opencl_map_to(AVHWFramesConte
+             return opencl_map_from_dxva2(hwfc, dst, src, flags);
+ #endif
+ #if HAVE_OPENCL_D3D11
++#if CONFIG_LIBMFX
++    case AV_PIX_FMT_QSV:
++        if (priv->d3d11_qsv_mapping_usable)
++            return opencl_map_from_d3d11_qsv(hwfc, dst, src, flags);
++#endif
+     case AV_PIX_FMT_D3D11:
+         if (priv->d3d11_mapping_usable)
+             return opencl_map_from_d3d11(hwfc, dst, src, flags);
+@@ -3016,6 +3251,18 @@ static int opencl_frames_derive_to(AVHWF
+         break;
+ #endif
+ #if HAVE_OPENCL_D3D11
++#if CONFIG_LIBMFX
++    case AV_HWDEVICE_TYPE_QSV:
++        if (!priv->d3d11_qsv_mapping_usable)
++            return AVERROR(ENOSYS);
++        {
++            int err;
++            err = opencl_frames_derive_from_d3d11_qsv(dst_fc, src_fc, flags);
++            if (err < 0)
++                return err;
++        }
++        break;
++#endif
+     case AV_HWDEVICE_TYPE_D3D11VA:
+         if (!priv->d3d11_mapping_usable)
+             return AVERROR(ENOSYS);
diff --git a/debian/patches/series b/debian/patches/series
index 54d9308e99c..75de1254ced 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -10,3 +10,4 @@
 0010-add-fixes-for-ffmpeg_hw.patch
 0011-add-d3d11-support-for-QSV.patch
 0012-add-hw_device_ctx-support-for-qsvdec.patch
+0013-add-qsv-d3d11-opencl-interop.patch

From 29c27b2262bd95554cfebc2783bb2834aeae2453 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:40:52 +0800
Subject: [PATCH 23/41] add vendor opts to d3d11va and bgra fmt to d3d11/dxva2

---
 ...to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch | 93 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 94 insertions(+)
 create mode 100644 debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch

diff --git a/debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch b/debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch
new file mode 100644
index 00000000000..f89c0a9c677
--- /dev/null
+++ b/debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch
@@ -0,0 +1,93 @@
+Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+@@ -83,11 +83,12 @@ static const struct {
+     DXGI_FORMAT d3d_format;
+     enum AVPixelFormat pix_fmt;
+ } supported_formats[] = {
+-    { DXGI_FORMAT_NV12,         AV_PIX_FMT_NV12 },
+-    { DXGI_FORMAT_P010,         AV_PIX_FMT_P010 },
++    { DXGI_FORMAT_NV12,           AV_PIX_FMT_NV12 },
++    { DXGI_FORMAT_P010,           AV_PIX_FMT_P010 },
++    { DXGI_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA },
+     // Special opaque formats. The pix_fmt is merely a place holder, as the
+     // opaque format cannot be accessed directly.
+-    { DXGI_FORMAT_420_OPAQUE,   AV_PIX_FMT_YUV420P },
++    { DXGI_FORMAT_420_OPAQUE,     AV_PIX_FMT_YUV420P },
+ };
+ 
+ static void d3d11va_default_lock(void *ctx)
+@@ -539,9 +540,12 @@ static int d3d11va_device_create(AVHWDev
+     AVD3D11VADeviceContext *device_hwctx = ctx->hwctx;
+ 
+     HRESULT hr;
++    AVDictionaryEntry *e;
+     IDXGIAdapter           *pAdapter = NULL;
+     ID3D10Multithread      *pMultithread;
+     UINT creationFlags = D3D11_CREATE_DEVICE_VIDEO_SUPPORT;
++    int adapter = -1;
++    long int vendor_id = -1;
+     int is_debug       = !!av_dict_get(opts, "debug", NULL, 0);
+     int ret;
+ 
+@@ -561,13 +565,45 @@ static int d3d11va_device_create(AVHWDev
+         return AVERROR_UNKNOWN;
+     }
+ 
++    e = av_dict_get(opts, "vendor", NULL, 0);
++    if (e) {
++        vendor_id = strtol(e->value, NULL, 0);
++    }
++
+     if (device) {
++        adapter = atoi(device);
++    }
++
++    if (adapter >= 0 || vendor_id != -1) {
+         IDXGIFactory2 *pDXGIFactory;
+         hr = mCreateDXGIFactory(&IID_IDXGIFactory2, (void **)&pDXGIFactory);
+         if (SUCCEEDED(hr)) {
+-            int adapter = atoi(device);
+-            if (FAILED(IDXGIFactory2_EnumAdapters(pDXGIFactory, adapter, &pAdapter)))
++            if (adapter < 0) {
++                int adapter_cnt = 0;
++                while (IDXGIFactory2_EnumAdapters(pDXGIFactory, adapter_cnt++, &pAdapter) != DXGI_ERROR_NOT_FOUND) {
++                    DXGI_ADAPTER_DESC adapter_desc;
++                    hr = IDXGIAdapter2_GetDesc(pAdapter, &adapter_desc);
++                    if (FAILED(hr)) {
++                        av_log(ctx, AV_LOG_ERROR, "IDXGIAdapter2_GetDesc returned error with adapter id %d\n", adapter_cnt);
++                        continue;
++                    }
++
++                    if (adapter_desc.VendorId == vendor_id) {
++                        break;
++                    }
++
++                    if (adapter)
++                        IDXGIAdapter_Release(pAdapter);
++                }
++                if (adapter_cnt < 0) {
++                    av_log(ctx, AV_LOG_ERROR, "Failed to find d3d11va adapter by vendor id %ld\n", vendor_id);
++                    IDXGIFactory2_Release(pDXGIFactory);
++                    return AVERROR_UNKNOWN;
++                }
++            } else {
++                if (FAILED(IDXGIFactory2_EnumAdapters(pDXGIFactory, adapter, &pAdapter)))
+                 pAdapter = NULL;
++            }
+             IDXGIFactory2_Release(pDXGIFactory);
+         }
+     }
+Index: jellyfin-ffmpeg/libavutil/hwcontext_dxva2.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_dxva2.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_dxva2.c
+@@ -83,6 +83,7 @@ static const struct {
+     { MKTAG('N', 'V', '1', '2'), AV_PIX_FMT_NV12 },
+     { MKTAG('P', '0', '1', '0'), AV_PIX_FMT_P010 },
+     { D3DFMT_P8,                 AV_PIX_FMT_PAL8 },
++    { D3DFMT_A8R8G8B8,           AV_PIX_FMT_BGRA },
+ };
+ 
+ DEFINE_GUID(video_decoder_service,   0xfc51a551, 0xd5e7, 0x11d9, 0xaf, 0x55, 0x00, 0x05, 0x4e, 0x43, 0xff, 0x02);
diff --git a/debian/patches/series b/debian/patches/series
index 75de1254ced..f4e7004c478 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -11,3 +11,4 @@
 0011-add-d3d11-support-for-QSV.patch
 0012-add-hw_device_ctx-support-for-qsvdec.patch
 0013-add-qsv-d3d11-opencl-interop.patch
+0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch

From ff7e513554cfb2d687ee4747022fd96b9975fc1c Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:41:14 +0800
Subject: [PATCH 24/41] add a vaapi hwupload filter

---
 .../0015-add-a-vaapi-hwupload-filter.patch    | 237 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 238 insertions(+)
 create mode 100644 debian/patches/0015-add-a-vaapi-hwupload-filter.patch

diff --git a/debian/patches/0015-add-a-vaapi-hwupload-filter.patch b/debian/patches/0015-add-a-vaapi-hwupload-filter.patch
new file mode 100644
index 00000000000..6963c69df8b
--- /dev/null
+++ b/debian/patches/0015-add-a-vaapi-hwupload-filter.patch
@@ -0,0 +1,237 @@
+Index: jellyfin-ffmpeg/configure
+===================================================================
+--- jellyfin-ffmpeg.orig/configure
++++ jellyfin-ffmpeg/configure
+@@ -3579,6 +3579,7 @@ fspp_filter_deps="gpl"
+ headphone_filter_select="fft"
+ histeq_filter_deps="gpl"
+ hqdn3d_filter_deps="gpl"
++hwupload_vaapi_filter_deps="vaapi"
+ interlace_filter_deps="gpl"
+ kerndeint_filter_deps="gpl"
+ ladspa_filter_deps="ladspa libdl"
+Index: jellyfin-ffmpeg/libavfilter/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/Makefile
++++ jellyfin-ffmpeg/libavfilter/Makefile
+@@ -297,6 +297,7 @@ OBJS-$(CONFIG_HUE_FILTER)
+ OBJS-$(CONFIG_HWDOWNLOAD_FILTER)             += vf_hwdownload.o
+ OBJS-$(CONFIG_HWMAP_FILTER)                  += vf_hwmap.o
+ OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER)          += vf_hwupload_cuda.o
++OBJS-$(CONFIG_HWUPLOAD_VAAPI_FILTER)         += vf_hwupload_vaapi.o
+ OBJS-$(CONFIG_HWUPLOAD_FILTER)               += vf_hwupload.o
+ OBJS-$(CONFIG_HYSTERESIS_FILTER)             += vf_hysteresis.o framesync.o
+ OBJS-$(CONFIG_IDENTITY_FILTER)               += vf_identity.o
+Index: jellyfin-ffmpeg/libavfilter/allfilters.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c
++++ jellyfin-ffmpeg/libavfilter/allfilters.c
+@@ -282,6 +282,7 @@ extern AVFilter ff_vf_hwdownload;
+ extern AVFilter ff_vf_hwmap;
+ extern AVFilter ff_vf_hwupload;
+ extern AVFilter ff_vf_hwupload_cuda;
++extern AVFilter ff_vf_hwupload_vaapi;
+ extern AVFilter ff_vf_hysteresis;
+ extern AVFilter ff_vf_identity;
+ extern AVFilter ff_vf_idet;
+Index: jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c
+@@ -0,0 +1,196 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/buffer.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/log.h"
++#include "libavutil/opt.h"
++
++#include "avfilter.h"
++#include "formats.h"
++#include "internal.h"
++#include "video.h"
++
++typedef struct VaapiUploadContext {
++    const AVClass *class;
++    int device_idx;
++
++    AVBufferRef *hwdevice;
++    AVBufferRef *hwframe;
++} VaapiUploadContext;
++
++static av_cold int vaapiupload_init(AVFilterContext *ctx)
++{
++    VaapiUploadContext *s = ctx->priv;
++    return av_hwdevice_ctx_create(&s->hwdevice, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0);
++}
++
++static av_cold void vaapiupload_uninit(AVFilterContext *ctx)
++{
++    VaapiUploadContext *s = ctx->priv;
++
++    av_buffer_unref(&s->hwframe);
++    av_buffer_unref(&s->hwdevice);
++}
++
++static int vaapiupload_query_formats(AVFilterContext *ctx)
++{
++    int ret;
++
++    static const enum AVPixelFormat input_pix_fmts[] = {
++        AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
++        AV_PIX_FMT_UYVY422, AV_PIX_FMT_YUYV422, AV_PIX_FMT_Y210,
++        AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
++        AV_PIX_FMT_GRAY8, AV_PIX_FMT_P010, AV_PIX_FMT_BGRA,
++        AV_PIX_FMT_BGR0, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB0,
++        AV_PIX_FMT_ABGR, AV_PIX_FMT_0BGR, AV_PIX_FMT_ARGB,
++        AV_PIX_FMT_0RGB, AV_PIX_FMT_NONE,
++    };
++    static const enum AVPixelFormat output_pix_fmts[] = {
++        AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE,
++    };
++    AVFilterFormats *in_fmts  = ff_make_format_list(input_pix_fmts);
++    AVFilterFormats *out_fmts;
++
++    ret = ff_formats_ref(in_fmts, &ctx->inputs[0]->outcfg.formats);
++    if (ret < 0)
++        return ret;
++
++    out_fmts = ff_make_format_list(output_pix_fmts);
++
++    ret = ff_formats_ref(out_fmts, &ctx->outputs[0]->incfg.formats);
++    if (ret < 0)
++        return ret;
++
++    return 0;
++}
++
++static int vaapiupload_config_output(AVFilterLink *outlink)
++{
++    AVFilterContext *ctx = outlink->src;
++    AVFilterLink *inlink = ctx->inputs[0];
++    VaapiUploadContext *s = ctx->priv;
++
++    AVHWFramesContext *hwframe_ctx;
++    int ret;
++
++    av_buffer_unref(&s->hwframe);
++    s->hwframe = av_hwframe_ctx_alloc(s->hwdevice);
++    if (!s->hwframe)
++        return AVERROR(ENOMEM);
++
++    hwframe_ctx            = (AVHWFramesContext*)s->hwframe->data;
++    hwframe_ctx->format    = AV_PIX_FMT_VAAPI;
++    if (inlink->hw_frames_ctx) {
++        AVHWFramesContext *in_hwframe_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
++        hwframe_ctx->sw_format = in_hwframe_ctx->sw_format;
++    } else {
++        hwframe_ctx->sw_format = inlink->format;
++    }
++    hwframe_ctx->width     = inlink->w;
++    hwframe_ctx->height    = inlink->h;
++
++    ret = av_hwframe_ctx_init(s->hwframe);
++    if (ret < 0)
++        return ret;
++
++    outlink->hw_frames_ctx = av_buffer_ref(s->hwframe);
++    if (!outlink->hw_frames_ctx)
++        return AVERROR(ENOMEM);
++
++    return 0;
++}
++
++static int vaapiupload_filter_frame(AVFilterLink *link, AVFrame *in)
++{
++    AVFilterContext   *ctx = link->dst;
++    AVFilterLink  *outlink = ctx->outputs[0];
++
++    AVFrame *out = NULL;
++    int ret;
++
++    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
++    if (!out) {
++        ret = AVERROR(ENOMEM);
++        goto fail;
++    }
++
++    out->width  = in->width;
++    out->height = in->height;
++
++    ret = av_hwframe_transfer_data(out, in, 0);
++    if (ret < 0) {
++        av_log(ctx, AV_LOG_ERROR, "Error transferring data to the GPU\n");
++        goto fail;
++    }
++
++    ret = av_frame_copy_props(out, in);
++    if (ret < 0)
++        goto fail;
++
++    av_frame_free(&in);
++
++    return ff_filter_frame(ctx->outputs[0], out);
++fail:
++    av_frame_free(&in);
++    av_frame_free(&out);
++    return ret;
++}
++
++static const AVClass vaapiupload_class = {
++    .class_name = "vaapiupload",
++    .item_name  = av_default_item_name,
++    .option     = NULL,
++    .version    = LIBAVUTIL_VERSION_INT,
++};
++
++static const AVFilterPad vaapiupload_inputs[] = {
++    {
++        .name         = "default",
++        .type         = AVMEDIA_TYPE_VIDEO,
++        .filter_frame = vaapiupload_filter_frame,
++    },
++    { NULL }
++};
++
++static const AVFilterPad vaapiupload_outputs[] = {
++    {
++        .name         = "default",
++        .type         = AVMEDIA_TYPE_VIDEO,
++        .config_props = vaapiupload_config_output,
++    },
++    { NULL }
++};
++
++AVFilter ff_vf_hwupload_vaapi = {
++    .name        = "hwupload_vaapi",
++    .description = NULL_IF_CONFIG_SMALL("Upload a system memory frame to a VAAPI device."),
++
++    .init      = vaapiupload_init,
++    .uninit    = vaapiupload_uninit,
++
++    .query_formats = vaapiupload_query_formats,
++
++    .priv_size  = sizeof(VaapiUploadContext),
++    .priv_class = &vaapiupload_class,
++
++    .inputs    = vaapiupload_inputs,
++    .outputs   = vaapiupload_outputs,
++
++    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
++};
diff --git a/debian/patches/series b/debian/patches/series
index f4e7004c478..51f33e36975 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -12,3 +12,4 @@
 0012-add-hw_device_ctx-support-for-qsvdec.patch
 0013-add-qsv-d3d11-opencl-interop.patch
 0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch
+0015-add-a-vaapi-hwupload-filter.patch

From 1505e5be63b520b2458bb6e60490c79824150fbb Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:41:29 +0800
Subject: [PATCH 25/41] add fixes for the broken vaapi tonemap

---
 ...d-fixes-for-the-broken-vaapi-tonemap.patch | 328 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 329 insertions(+)
 create mode 100644 debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch

diff --git a/debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch b/debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch
new file mode 100644
index 00000000000..c0e65d6ec81
--- /dev/null
+++ b/debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch
@@ -0,0 +1,328 @@
+Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_vaapi.c
++++ jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c
+@@ -41,7 +41,13 @@ typedef struct HDRVAAPIContext {
+     enum AVColorTransferCharacteristic color_transfer;
+     enum AVColorSpace color_matrix;
+ 
++    char *in_master_display;
++    char *in_content_light;
++    char *out_master_display;
++    char *out_content_light;
++
+     VAHdrMetaDataHDR10  in_metadata;
++    VAHdrMetaDataHDR10  out_metadata;
+ 
+     AVFrameSideData    *src_display;
+     AVFrameSideData    *src_light;
+@@ -54,7 +60,7 @@ static int tonemap_vaapi_save_metadata(A
+     AVContentLightMetadata *light_meta;
+ 
+     if (input_frame->color_trc != AVCOL_TRC_SMPTE2084) {
+-        av_log(avctx, AV_LOG_WARNING, "Only support HDR10 as input for vaapi tone-mapping\n");
++        av_log(avctx, AV_LOG_DEBUG, "Only support HDR10 as input for vaapi tone-mapping\n");
+     }
+ 
+     ctx->src_display = av_frame_get_side_data(input_frame,
+@@ -62,8 +68,7 @@ static int tonemap_vaapi_save_metadata(A
+     if (ctx->src_display) {
+         hdr_meta = (AVMasteringDisplayMetadata *)ctx->src_display->data;
+         if (!hdr_meta) {
+-            av_log(avctx, AV_LOG_ERROR, "No mastering display data\n");
+-            return AVERROR(EINVAL);
++            av_log(avctx, AV_LOG_DEBUG, "No mastering display data\n");
+         }
+ 
+         if (hdr_meta->has_luminance) {
+@@ -120,8 +125,7 @@ static int tonemap_vaapi_save_metadata(A
+                    ctx->in_metadata.white_point_y);
+         }
+     } else {
+-        av_log(avctx, AV_LOG_ERROR, "No mastering display data from input\n");
+-        return AVERROR(EINVAL);
++        av_log(avctx, AV_LOG_DEBUG, "No mastering display data from input\n");
+     }
+ 
+     ctx->src_light = av_frame_get_side_data(input_frame,
+@@ -129,8 +133,7 @@ static int tonemap_vaapi_save_metadata(A
+     if (ctx->src_light) {
+         light_meta = (AVContentLightMetadata *)ctx->src_light->data;
+         if (!light_meta) {
+-            av_log(avctx, AV_LOG_ERROR, "No light metadata\n");
+-            return AVERROR(EINVAL);
++            av_log(avctx, AV_LOG_DEBUG, "No light metadata\n");
+         }
+ 
+         ctx->in_metadata.max_content_light_level = light_meta->MaxCLL;
+@@ -148,6 +151,107 @@ static int tonemap_vaapi_save_metadata(A
+     return 0;
+ }
+ 
++static int tonemap_vaapi_update_sidedata(AVFilterContext *avctx, AVFrame *output_frame)
++{
++    HDRVAAPIContext *ctx = avctx->priv;
++    AVFrameSideData *metadata;
++    AVMasteringDisplayMetadata *hdr_meta;
++    AVFrameSideData *metadata_lt;
++    AVContentLightMetadata *hdr_meta_lt;
++
++    int i;
++    const int mapping[3] = {1, 2, 0};  //green, blue, red
++    const int chroma_den = 50000;
++    const int luma_den   = 10000;
++
++    metadata = av_frame_get_side_data(output_frame,
++                                      AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
++    if (metadata) {
++        av_frame_remove_side_data(output_frame,
++                              AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
++        metadata = av_frame_new_side_data(output_frame,
++                                          AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
++                                          sizeof(AVMasteringDisplayMetadata));
++    } else {
++        metadata = av_frame_new_side_data(output_frame,
++                                          AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
++                                          sizeof(AVMasteringDisplayMetadata));
++    }
++
++    hdr_meta = (AVMasteringDisplayMetadata *)metadata->data;
++
++    for (i = 0; i < 3; i++) {
++        const int j = mapping[i];
++        hdr_meta->display_primaries[j][0].num = ctx->out_metadata.display_primaries_x[i];
++        hdr_meta->display_primaries[j][0].den = chroma_den;
++
++        hdr_meta->display_primaries[j][1].num = ctx->out_metadata.display_primaries_y[i];
++        hdr_meta->display_primaries[j][1].den = chroma_den;
++    }
++
++    hdr_meta->white_point[0].num = ctx->out_metadata.white_point_x;
++    hdr_meta->white_point[0].den = chroma_den;
++
++    hdr_meta->white_point[1].num = ctx->out_metadata.white_point_y;
++    hdr_meta->white_point[1].den = chroma_den;
++    hdr_meta->has_primaries = 1;
++
++    hdr_meta->max_luminance.num = ctx->out_metadata.max_display_mastering_luminance;
++    hdr_meta->max_luminance.den = luma_den;
++
++    hdr_meta->min_luminance.num = ctx->out_metadata.min_display_mastering_luminance;
++    hdr_meta->min_luminance.den = luma_den;
++    hdr_meta->has_luminance = 1;
++
++    av_log(avctx, AV_LOG_DEBUG,
++           "Mastering Display Metadata(out luminance):\n");
++    av_log(avctx, AV_LOG_DEBUG,
++           "min_luminance=%u, max_luminance=%u\n",
++           ctx->out_metadata.min_display_mastering_luminance,
++           ctx->out_metadata.max_display_mastering_luminance);
++
++    av_log(avctx, AV_LOG_DEBUG,
++           "Mastering Display Metadata(out primaries):\n");
++    av_log(avctx, AV_LOG_DEBUG,
++           "G(%u,%u) B(%u,%u) R(%u,%u) WP(%u,%u)\n",
++           ctx->out_metadata.display_primaries_x[0],
++           ctx->out_metadata.display_primaries_y[0],
++           ctx->out_metadata.display_primaries_x[1],
++           ctx->out_metadata.display_primaries_y[1],
++           ctx->out_metadata.display_primaries_x[2],
++           ctx->out_metadata.display_primaries_y[2],
++           ctx->out_metadata.white_point_x,
++           ctx->out_metadata.white_point_y);
++
++    metadata_lt = av_frame_get_side_data(output_frame,
++                                         AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
++    if (metadata_lt) {
++        av_frame_remove_side_data(output_frame,
++                              AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
++        metadata_lt = av_frame_new_side_data(output_frame,
++                                          AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
++                                          sizeof(AVContentLightMetadata));
++    } else {
++        metadata_lt = av_frame_new_side_data(output_frame,
++                                          AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
++                                          sizeof(AVContentLightMetadata));
++    }
++
++    hdr_meta_lt = (AVContentLightMetadata *)metadata_lt->data;
++
++    hdr_meta_lt->MaxCLL = FFMIN(ctx->out_metadata.max_content_light_level, 65535);
++    hdr_meta_lt->MaxFALL = FFMIN(ctx->out_metadata.max_pic_average_light_level, 65535);
++
++    av_log(avctx, AV_LOG_DEBUG,
++           "Mastering Content Light Level (out):\n");
++    av_log(avctx, AV_LOG_DEBUG,
++           "MaxCLL(%u) MaxFALL(%u)\n",
++           ctx->out_metadata.max_content_light_level,
++           ctx->out_metadata.max_pic_average_light_level);
++
++    return 0;
++}
++
+ static int tonemap_vaapi_set_filter_params(AVFilterContext *avctx, AVFrame *input_frame)
+ {
+     VAAPIVPPContext *vpp_ctx   = avctx->priv;
+@@ -210,15 +314,26 @@ static int tonemap_vaapi_build_filter_pa
+         return AVERROR(EINVAL);
+     }
+ 
+-    for (i = 0; i < num_query_caps; i++) {
+-        if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag)
+-            break;
+-    }
+-
+-    if (i >= num_query_caps) {
+-        av_log(avctx, AV_LOG_ERROR,
+-               "VAAPI driver doesn't support HDR to SDR\n");
+-        return AVERROR(EINVAL);
++    if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) {
++        for (i = 0; i < num_query_caps; i++) {
++            if (VA_TONE_MAPPING_HDR_TO_HDR & hdr_cap[i].caps_flag)
++                break;
++        }
++        if (i >= num_query_caps) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "VAAPI driver doesn't support HDR to HDR\n");
++            return AVERROR(EINVAL);
++        }
++    } else {
++        for (i = 0; i < num_query_caps; i++) {
++            if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag)
++                break;
++        }
++        if (i >= num_query_caps) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "VAAPI driver doesn't support HDR to SDR\n");
++            return AVERROR(EINVAL);
++        }
+     }
+ 
+     hdrtm_param.type = VAProcFilterHighDynamicRangeToneMapping;
+@@ -243,6 +358,8 @@ static int tonemap_vaapi_filter_frame(AV
+     VAProcPipelineParameterBuffer params;
+     int err;
+ 
++    VAHdrMetaData              out_hdr_metadata;
++
+     av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+            av_get_pix_fmt_name(input_frame->format),
+            input_frame->width, input_frame->height, input_frame->pts);
+@@ -252,9 +369,11 @@ static int tonemap_vaapi_filter_frame(AV
+         return AVERROR(EINVAL);
+     }
+ 
+-    err = tonemap_vaapi_save_metadata(avctx, input_frame);
+-    if (err < 0)
+-        goto fail;
++    if (!ctx->in_master_display && !ctx->in_content_light) {
++        err = tonemap_vaapi_save_metadata(avctx, input_frame);
++        if (err < 0)
++            goto fail;
++    }
+ 
+     err = tonemap_vaapi_set_filter_params(avctx, input_frame);
+     if (err < 0)
+@@ -291,11 +410,26 @@ static int tonemap_vaapi_filter_frame(AV
+     if (ctx->color_matrix != AVCOL_SPC_UNSPECIFIED)
+         output_frame->colorspace = ctx->color_matrix;
+ 
++    if (output_frame->color_trc == AVCOL_TRC_SMPTE2084) {
++        err = tonemap_vaapi_update_sidedata(avctx, output_frame);
++        if (err < 0)
++            goto fail;
++
++        out_hdr_metadata.metadata_type = VAProcHighDynamicRangeMetadataHDR10;
++        out_hdr_metadata.metadata      = &ctx->out_metadata;
++        out_hdr_metadata.metadata_size = sizeof(VAHdrMetaDataHDR10);
++
++        params.output_hdr_metadata = &out_hdr_metadata;
++    }
++
+     err = ff_vaapi_vpp_init_params(avctx, &params,
+                                    input_frame, output_frame);
+     if (err < 0)
+         goto fail;
+ 
++    params.filters     = &vpp_ctx->filter_buffers[0];
++    params.num_filters = vpp_ctx->nb_filter_buffers;
++
+     err = ff_vaapi_vpp_render_picture(avctx, &params, output_frame);
+     if (err < 0)
+         goto fail;
+@@ -355,6 +489,60 @@ static av_cold int tonemap_vaapi_init(AV
+     STRING_OPTION(color_transfer,  color_transfer,  AVCOL_TRC_UNSPECIFIED);
+     STRING_OPTION(color_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
+ 
++#define READ_DISPLAY_OPTION(in_or_out) do { \
++        if (10 != sscanf(ctx->in_or_out ## _master_display, \
++                         "G(%hu|%hu)B(%hu|%hu)R(%hu|%hu)WP(%hu|%hu)L(%u|%u)", \
++                         &ctx->in_or_out ## _metadata.display_primaries_x[0], \
++                         &ctx->in_or_out ## _metadata.display_primaries_y[0], \
++                         &ctx->in_or_out ## _metadata.display_primaries_x[1], \
++                         &ctx->in_or_out ## _metadata.display_primaries_y[1], \
++                         &ctx->in_or_out ## _metadata.display_primaries_x[2], \
++                         &ctx->in_or_out ## _metadata.display_primaries_y[2], \
++                         &ctx->in_or_out ## _metadata.white_point_x, \
++                         &ctx->in_or_out ## _metadata.white_point_y, \
++                         &ctx->in_or_out ## _metadata.min_display_mastering_luminance, \
++                         &ctx->in_or_out ## _metadata.max_display_mastering_luminance)) { \
++            av_log(avctx, AV_LOG_ERROR, \
++                   "Option " #in_or_out "-mastering-display input invalid\n"); \
++            return AVERROR(EINVAL); \
++        } \
++    } while (0)
++
++#define READ_LIGHT_OPTION(in_or_out) do { \
++        if (2 != sscanf(ctx->in_or_out ## _content_light, \
++                        "CLL(%hu)FALL(%hu)", \
++                        &ctx->in_or_out ## _metadata.max_content_light_level, \
++                        &ctx->in_or_out ## _metadata.max_pic_average_light_level)) { \
++            av_log(avctx, AV_LOG_ERROR, \
++                   "Option " #in_or_out "-content-light input invalid\n"); \
++            return AVERROR(EINVAL); \
++        } \
++    } while (0)
++
++    if (ctx->in_master_display) {
++        READ_DISPLAY_OPTION(in);
++    }
++
++    if (ctx->in_content_light) {
++        READ_LIGHT_OPTION(in);
++    }
++
++    if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) {
++        if (!ctx->out_master_display) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "H2H tone-mapping requires valid out-mastering-display metadata\n");
++            return AVERROR(EINVAL);
++        }
++        READ_DISPLAY_OPTION(out);
++
++        if (!ctx->out_content_light) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "H2H tone-mapping requires valid out-content-light metadata\n");
++            return AVERROR(EINVAL);
++        }
++        READ_LIGHT_OPTION(out);
++    }
++
+     return 0;
+ }
+ 
+@@ -380,10 +568,13 @@ static const AVOption tonemap_vaapi_opti
+     { "t",        "Output color transfer characteristics set",
+       OFFSET(color_transfer_string),  AV_OPT_TYPE_STRING,
+       { .str = NULL }, .flags = FLAGS, "transfer" },
++    { "indisplay", "Set input mastering display",  OFFSET(in_master_display), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS },
++    { "inlight",   "Set input content light",   OFFSET(in_content_light),  AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS },
++    { "outdisplay", "Set output mastering display for H2H",  OFFSET(out_master_display), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS },
++    { "outlight",   "Set output content light for H2H",   OFFSET(out_content_light),  AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS },
+     { NULL }
+ };
+ 
+-
+ AVFILTER_DEFINE_CLASS(tonemap_vaapi);
+ 
+ static const AVFilterPad tonemap_vaapi_inputs[] = {
diff --git a/debian/patches/series b/debian/patches/series
index 51f33e36975..507ebf96943 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -13,3 +13,4 @@
 0013-add-qsv-d3d11-opencl-interop.patch
 0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch
 0015-add-a-vaapi-hwupload-filter.patch
+0016-add-fixes-for-the-broken-vaapi-tonemap.patch

From 4f64ad1d23b366aa0c3b682284f696e68b46f5b5 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:41:48 +0800
Subject: [PATCH 26/41] add fixes for webvttenc when using segement muxer

---
 ...-webvttenc-when-using-segement-muxer.patch | 25 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 26 insertions(+)
 create mode 100644 debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch

diff --git a/debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch b/debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch
new file mode 100644
index 00000000000..7cb65ee8552
--- /dev/null
+++ b/debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch
@@ -0,0 +1,25 @@
+Index: jellyfin-ffmpeg/libavformat/webvttenc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavformat/webvttenc.c
++++ jellyfin-ffmpeg/libavformat/webvttenc.c
+@@ -49,8 +49,8 @@ static int webvtt_write_header(AVFormatC
+     AVCodecParameters *par = ctx->streams[0]->codecpar;
+     AVIOContext *pb = ctx->pb;
+ 
+-    if (ctx->nb_streams != 1 || par->codec_id != AV_CODEC_ID_WEBVTT) {
+-        av_log(ctx, AV_LOG_ERROR, "Exactly one WebVTT stream is needed.\n");
++    if (par->codec_id != AV_CODEC_ID_WEBVTT) {
++        av_log(ctx, AV_LOG_ERROR, "First stream must be WebVTT.\n");
+         return AVERROR(EINVAL);
+     }
+ 
+@@ -67,6 +67,9 @@ static int webvtt_write_packet(AVFormatC
+     buffer_size_t id_size, settings_size;
+     uint8_t *id, *settings;
+ 
++    if (pkt->stream_index != 0)
++        return 0;
++
+     avio_printf(pb, "\n");
+ 
+     id = av_packet_get_side_data(pkt, AV_PKT_DATA_WEBVTT_IDENTIFIER,
diff --git a/debian/patches/series b/debian/patches/series
index 507ebf96943..c0a2b490c41 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -14,3 +14,4 @@
 0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch
 0015-add-a-vaapi-hwupload-filter.patch
 0016-add-fixes-for-the-broken-vaapi-tonemap.patch
+0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch

From deb62788011fd15398fd6dc40ba4123a40a16b89 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:42:19 +0800
Subject: [PATCH 27/41] add fixes for nvdec exceed 32 surfaces error

---
 ...xes-for-nvdec-exceed-32-surfaces-error.patch | 17 +++++++++++++++++
 debian/patches/series                           |  1 +
 2 files changed, 18 insertions(+)
 create mode 100644 debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch

diff --git a/debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch b/debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch
new file mode 100644
index 00000000000..ed02508ae5f
--- /dev/null
+++ b/debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch
@@ -0,0 +1,17 @@
+Index: jellyfin-ffmpeg/libavcodec/nvdec.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/nvdec.c
++++ jellyfin-ffmpeg/libavcodec/nvdec.c
+@@ -303,8 +303,10 @@ static int nvdec_init_hwframes(AVCodecCo
+     frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data;
+ 
+     if (dummy) {
+-        // Copied from ff_decode_get_hw_frames_ctx for compatibility
+-        frames_ctx->initial_pool_size += 3;
++        // The function above guarantees 1 work surface, We must guarantee 4 work surfaces.
++        // (the absolute minimum), so add the missing count without exceeding the maximum
++        // recommended for nvdec.
++        frames_ctx->initial_pool_size = FFMIN(frames_ctx->initial_pool_size + 3, 32);
+ 
+         frames_ctx->free = nvdec_free_dummy;
+         frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
diff --git a/debian/patches/series b/debian/patches/series
index c0a2b490c41..7552128249c 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -15,3 +15,4 @@
 0015-add-a-vaapi-hwupload-filter.patch
 0016-add-fixes-for-the-broken-vaapi-tonemap.patch
 0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch
+0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch

From 719c36d688bd71c54b57d82f2c8bb77851140f15 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:42:40 +0800
Subject: [PATCH 28/41] add miscellaneous fixes for QSV from upstream

---
 ...llaneous-fixes-for-QSV-from-upstream.patch | 631 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 632 insertions(+)
 create mode 100644 debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch

diff --git a/debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch b/debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch
new file mode 100644
index 00000000000..fb0c7c97d2b
--- /dev/null
+++ b/debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch
@@ -0,0 +1,631 @@
+Index: jellyfin-ffmpeg/libavcodec/qsv_internal.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsv_internal.h
++++ jellyfin-ffmpeg/libavcodec/qsv_internal.h
+@@ -52,6 +52,8 @@
+ 
+ #define QSV_MAX_ENC_PAYLOAD 2       // # of mfxEncodeCtrl payloads supported
+ 
++#define QSV_PAYLOAD_SIZE 1024
++
+ #define QSV_VERSION_ATLEAST(MAJOR, MINOR)   \
+     (MFX_VERSION_MAJOR > (MAJOR) ||         \
+      MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR))
+Index: jellyfin-ffmpeg/libavcodec/qsvdec.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsvdec.c
++++ jellyfin-ffmpeg/libavcodec/qsvdec.c
+@@ -38,14 +38,27 @@
+ #include "libavutil/pixfmt.h"
+ #include "libavutil/time.h"
+ #include "libavutil/imgutils.h"
++#include "libavutil/stereo3d.h"
+ 
+ #include "avcodec.h"
+ #include "internal.h"
+ #include "decode.h"
+ #include "hwconfig.h"
++#include "get_bits.h"
+ #include "qsv.h"
++#include "h264_sei.h"
+ #include "qsv_internal.h"
+ 
++static const AVRational mfx_tb = { 1, 90000 };
++
++#define PTS_TO_MFX_PTS(pts, pts_tb) ((pts) == AV_NOPTS_VALUE ? \
++    MFX_TIMESTAMP_UNKNOWN : pts_tb.num ? \
++    av_rescale_q(pts, pts_tb, mfx_tb) : pts)
++
++#define MFX_PTS_TO_PTS(mfx_pts, pts_tb) ((mfx_pts) == MFX_TIMESTAMP_UNKNOWN ? \
++    AV_NOPTS_VALUE : pts_tb.num ? \
++    av_rescale_q(mfx_pts, mfx_tb, pts_tb) : mfx_pts)
++
+ typedef struct QSVContext {
+     // the session used for decoding
+     mfxSession session;
+@@ -63,14 +76,13 @@ typedef struct QSVContext {
+ 
+     AVFifoBuffer *async_fifo;
+     int zero_consume_run;
+-    int buffered_count;
+     int reinit_flag;
+ 
+     enum AVPixelFormat orig_pix_fmt;
+     uint32_t fourcc;
+     mfxFrameInfo frame_info;
+     AVBufferPool *pool;
+-
++    int suggest_pool_size;
+     int initialized;
+ 
+     // options set by the caller
+@@ -80,8 +92,13 @@ typedef struct QSVContext {
+ 
+     char *load_plugins;
+ 
++    mfxPayload payload;
++
+     mfxExtBuffer **ext_buffers;
+     int         nb_ext_buffers;
++
++    H264SEIContext sei;
++    H264ParamSets ps;
+ } QSVContext;
+ 
+ static const AVCodecHWConfigInternal *const qsv_hw_configs[] = {
+@@ -218,6 +235,8 @@ static int qsv_decode_preinit(AVCodecCon
+         pix_fmt,        /* system memory format obtained from bitstream parser */
+         AV_PIX_FMT_NONE };
+ 
++    av_buffer_unref(&q->frames_ctx.mids_buf);
++    av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
+     ret = ff_get_format(avctx, pix_fmts);
+     if (ret < 0) {
+         q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;
+@@ -255,7 +274,7 @@ static int qsv_decode_preinit(AVCodecCon
+         hwframes_ctx->height            = FFALIGN(avctx->coded_height, 32);
+         hwframes_ctx->format            = AV_PIX_FMT_QSV;
+         hwframes_ctx->sw_format         = avctx->sw_pix_fmt;
+-        hwframes_ctx->initial_pool_size = 64 + avctx->extra_hw_frames;
++        hwframes_ctx->initial_pool_size = q->suggest_pool_size + 16 + avctx->extra_hw_frames;
+         frames_hwctx->frame_type        = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+ 
+         ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
+@@ -330,14 +349,15 @@ static int qsv_decode_header(AVCodecCont
+                              mfxVideoParam *param)
+ {
+     int ret;
+-
++    mfxExtVideoSignalInfo video_signal_info = { 0 };
++    mfxExtBuffer *header_ext_params[1] = { (mfxExtBuffer *)&video_signal_info };
+     mfxBitstream bs = { 0 };
+ 
+     if (avpkt->size) {
+         bs.Data       = avpkt->data;
+         bs.DataLength = avpkt->size;
+         bs.MaxLength  = bs.DataLength;
+-        bs.TimeStamp  = avpkt->pts;
++        bs.TimeStamp  = PTS_TO_MFX_PTS(avpkt->pts, avctx->pkt_timebase);
+         if (avctx->field_order == AV_FIELD_PROGRESSIVE)
+             bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
+     } else
+@@ -355,6 +375,12 @@ static int qsv_decode_header(AVCodecCont
+         return ret;
+ 
+     param->mfx.CodecId = ret;
++    video_signal_info.Header.BufferId = MFX_EXTBUFF_VIDEO_SIGNAL_INFO;
++    video_signal_info.Header.BufferSz = sizeof(video_signal_info);
++    // The SDK doesn't support other ext buffers when calling MFXVideoDECODE_DecodeHeader,
++    // so do not append this buffer to the existent buffer array
++    param->ExtParam    = header_ext_params;
++    param->NumExtParam = 1;
+     ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);
+     if (MFX_ERR_MORE_DATA == ret) {
+        return AVERROR(EAGAIN);
+@@ -363,6 +389,17 @@ static int qsv_decode_header(AVCodecCont
+         return ff_qsv_print_error(avctx, ret,
+                 "Error decoding stream header");
+ 
++    avctx->color_range = video_signal_info.VideoFullRange ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
++
++    if (video_signal_info.ColourDescriptionPresent) {
++        avctx->color_primaries = video_signal_info.ColourPrimaries;
++        avctx->color_trc = video_signal_info.TransferCharacteristics;
++        avctx->colorspace = video_signal_info.MatrixCoefficients;
++    }
++
++    param->ExtParam    = q->ext_buffers;
++    param->NumExtParam = q->nb_ext_buffers;
++
+     return 0;
+ }
+ 
+@@ -381,13 +418,13 @@ static int alloc_frame(AVCodecContext *a
+     if (frame->frame->format == AV_PIX_FMT_QSV) {
+         frame->surface = *(mfxFrameSurface1*)frame->frame->data[3];
+     } else {
+-        frame->surface.Info = q->frame_info;
+-
+         frame->surface.Data.PitchLow = frame->frame->linesize[0];
+         frame->surface.Data.Y        = frame->frame->data[0];
+         frame->surface.Data.UV       = frame->frame->data[1];
+     }
+ 
++    frame->surface.Info = q->frame_info;
++
+     if (q->frames_ctx.mids) {
+         ret = ff_qsv_find_surface_idx(&q->frames_ctx, frame);
+         if (ret < 0)
+@@ -470,6 +507,147 @@ static QSVFrame *find_frame(QSVContext *
+     return NULL;
+ }
+ 
++static int h264_decode_fpa(H264SEIFramePacking *fpa, AVFrame *frame)
++{
++    if (!fpa || !frame) {
++        return AVERROR(EINVAL);
++    }
++
++    if (!fpa->arrangement_cancel_flag &&
++        fpa->arrangement_type <= 6 &&
++        fpa->content_interpretation_type > 0 &&
++        fpa->content_interpretation_type < 3) {
++        AVStereo3D *stereo = av_stereo3d_create_side_data(frame);
++        if (stereo) {
++            switch (fpa->arrangement_type) {
++            case 0:
++                stereo->type = AV_STEREO3D_CHECKERBOARD;
++                break;
++            case 1:
++                stereo->type = AV_STEREO3D_COLUMNS;
++                break;
++            case 2:
++                stereo->type = AV_STEREO3D_LINES;
++                break;
++            case 3:
++                if (fpa->quincunx_sampling_flag)
++                    stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
++                else
++                    stereo->type = AV_STEREO3D_SIDEBYSIDE;
++                break;
++            case 4:
++                stereo->type = AV_STEREO3D_TOPBOTTOM;
++                break;
++            case 5:
++                stereo->type = AV_STEREO3D_FRAMESEQUENCE;
++                if (fpa->current_frame_is_frame0_flag)
++                    stereo->view = AV_STEREO3D_VIEW_LEFT;
++                else
++                    stereo->view = AV_STEREO3D_VIEW_RIGHT;
++                break;
++            case 6:
++                stereo->type = AV_STEREO3D_2D;
++                break;
++            }
++
++            if (fpa->content_interpretation_type == 2)
++                stereo->flags = AV_STEREO3D_FLAG_INVERT;
++        }
++    }
++    return 0;
++}
++
++static int h264_parse_side_data(AVCodecContext *avctx, QSVContext *q, AVFrame *frame)
++{
++    GetBitContext gb_payload;
++    uint8_t *sei_buffer;
++    int sei_buffer_index;
++    int ret;
++
++    /* remove emulation prevention bytes */
++    sei_buffer = (uint8_t *)av_mallocz(q->payload.NumBit / 8);
++    if (!sei_buffer) {
++        av_freep(&sei_buffer);
++        return AVERROR(ENOMEM);
++    }
++    sei_buffer_index = 0;
++    for (int i = 0; i < q->payload.NumBit / 8; i++) {
++        if (q->payload.Data[i] == 3)
++            i++;
++        sei_buffer[sei_buffer_index] = q->payload.Data[i];
++        sei_buffer_index += 1;
++    }
++
++    ret = init_get_bits8(&gb_payload, sei_buffer, sei_buffer_index+1);
++    if (ret < 0) {
++        av_freep(&sei_buffer);
++        return ret;
++    }
++
++    ret = ff_h264_sei_decode(&q->sei, &gb_payload, &q->ps, avctx);
++    if (ret < 0) {
++        av_freep(&sei_buffer);
++        return ret;
++    }
++
++    switch (q->payload.Type) {
++    case SEI_TYPE_FRAME_PACKING_ARRANGEMENT:
++        ret = h264_decode_fpa(&q->sei.frame_packing, frame);
++        break;
++    default:
++        break;
++    }
++
++    av_freep(&sei_buffer);
++    return ret;
++}
++
++static int extract_frame_side_data(AVCodecContext *avctx, QSVContext *q, AVFrame *frame)
++{
++    mfxU64 ts;
++    mfxStatus sts;
++    int ret;
++
++    if (q->payload.BufSize == 0) {
++        q->payload.Data = av_mallocz(QSV_PAYLOAD_SIZE);
++        if (!q->payload.Data) {
++            av_freep(&q->payload.Data);
++            return AVERROR(ENOMEM);
++        }
++        q->payload.BufSize = QSV_PAYLOAD_SIZE;
++    }
++
++    sts = MFX_ERR_NONE;
++    while (sts == MFX_ERR_NONE) {
++
++        sts = MFXVideoDECODE_GetPayload(q->session, &ts, &q->payload);
++
++        if (sts == MFX_ERR_NOT_ENOUGH_BUFFER) {
++            av_log(avctx, AV_LOG_DEBUG, "Space for SEI is not enough. One SEI will be skipped\n");
++            continue;
++        } else if (sts != MFX_ERR_NONE || q->payload.NumBit == 0) {
++            break;
++        }
++
++        if (q->payload.Type != SEI_TYPE_FRAME_PACKING_ARRANGEMENT)
++            continue;
++
++        switch (avctx->codec_id) {
++        case AV_CODEC_ID_H264:
++            ret = h264_parse_side_data(avctx, q, frame);
++            break;
++        default:
++            break;
++        }
++
++        if (ret < 0) {
++            av_log(avctx, AV_LOG_WARNING, "parse side data failed\n");
++            break;
++        }
++    }
++    return ret;
++}
++
+ static int qsv_decode(AVCodecContext *avctx, QSVContext *q,
+                       AVFrame *frame, int *got_frame,
+                       const AVPacket *avpkt)
+@@ -485,7 +663,7 @@ static int qsv_decode(AVCodecContext *av
+         bs.Data       = avpkt->data;
+         bs.DataLength = avpkt->size;
+         bs.MaxLength  = bs.DataLength;
+-        bs.TimeStamp  = avpkt->pts;
++        bs.TimeStamp  = PTS_TO_MFX_PTS(avpkt->pts, avctx->pkt_timebase);
+         if (avctx->field_order == AV_FIELD_PROGRESSIVE)
+             bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
+     }
+@@ -510,6 +688,13 @@ static int qsv_decode(AVCodecContext *av
+ 
+     } while (ret == MFX_WRN_DEVICE_BUSY || ret == MFX_ERR_MORE_SURFACE);
+ 
++    if (ret == MFX_ERR_INCOMPATIBLE_VIDEO_PARAM) {
++        q->reinit_flag = 1;
++        av_log(avctx, AV_LOG_DEBUG, "Video parameter change\n");
++        av_freep(&sync);
++        return 0;
++    }
++
+     if (ret != MFX_ERR_NONE &&
+         ret != MFX_ERR_MORE_DATA &&
+         ret != MFX_WRN_VIDEO_PARAM_CHANGED &&
+@@ -526,8 +711,6 @@ static int qsv_decode(AVCodecContext *av
+         ++q->zero_consume_run;
+         if (q->zero_consume_run > 1)
+             ff_qsv_print_warning(avctx, ret, "A decode call did not consume any data");
+-    } else if (!*sync && bs.DataOffset) {
+-        ++q->buffered_count;
+     } else {
+         q->zero_consume_run = 0;
+     }
+@@ -542,7 +725,7 @@ static int qsv_decode(AVCodecContext *av
+             return AVERROR_BUG;
+         }
+ 
+-        out_frame->queued = 1;
++        out_frame->queued += 1;
+         av_fifo_generic_write(q->async_fifo, &out_frame, sizeof(out_frame), NULL);
+         av_fifo_generic_write(q->async_fifo, &sync,      sizeof(sync),      NULL);
+     } else {
+@@ -555,7 +738,7 @@ static int qsv_decode(AVCodecContext *av
+ 
+         av_fifo_generic_read(q->async_fifo, &out_frame, sizeof(out_frame), NULL);
+         av_fifo_generic_read(q->async_fifo, &sync,      sizeof(sync),      NULL);
+-        out_frame->queued = 0;
++        out_frame->queued -= 1;
+ 
+         if (avctx->pix_fmt != AV_PIX_FMT_QSV) {
+             do {
+@@ -573,12 +756,16 @@ static int qsv_decode(AVCodecContext *av
+ 
+         outsurf = &out_frame->surface;
+ 
++        ret = extract_frame_side_data(avctx, q, frame);
++        if (ret < 0)
++            av_log(avctx, AV_LOG_WARNING, "Extracting side from packet failed\n");
++
+ #if FF_API_PKT_PTS
+ FF_DISABLE_DEPRECATION_WARNINGS
+         frame->pkt_pts = outsurf->Data.TimeStamp;
+ FF_ENABLE_DEPRECATION_WARNINGS
+ #endif
+-        frame->pts = outsurf->Data.TimeStamp;
++        frame->pts = MFX_PTS_TO_PTS(outsurf->Data.TimeStamp, avctx->pkt_timebase);
+ 
+         frame->repeat_pict =
+             outsurf->Info.PicStruct & MFX_PICSTRUCT_FRAME_TRIPLING ? 4 :
+@@ -635,6 +822,8 @@ static void qsv_decode_close_qsvcontext(
+     av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
+     av_buffer_unref(&q->frames_ctx.mids_buf);
+     av_buffer_pool_uninit(&q->pool);
++
++    av_freep(&q->payload.Data);
+ }
+ 
+ static int qsv_process_data(AVCodecContext *avctx, QSVContext *q,
+@@ -659,26 +848,37 @@ static int qsv_process_data(AVCodecConte
+     if (!avctx->coded_height)
+         avctx->coded_height = 720;
+ 
+-    ret = qsv_decode_header(avctx, q, pkt, pix_fmt, &param);
+-
+-    if (ret >= 0 && (q->orig_pix_fmt != ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||
+-        avctx->coded_width  != param.mfx.FrameInfo.Width ||
+-        avctx->coded_height != param.mfx.FrameInfo.Height)) {
++    /* decode zero-size pkt to flush the buffered pkt before reinit */
++    if (q->reinit_flag) {
+         AVPacket zero_pkt = {0};
++        ret = qsv_decode(avctx, q, frame, got_frame, &zero_pkt);
++        if (ret < 0 || *got_frame)
++            return ret;
++    }
++
++    if (q->reinit_flag || !q->session) {
++        mfxFrameAllocRequest request;
++        memset(&request, 0, sizeof(request));
+ 
+-        if (q->buffered_count) {
+-            q->reinit_flag = 1;
+-            /* decode zero-size pkt to flush the buffered pkt before reinit */
+-            q->buffered_count--;
+-            return qsv_decode(avctx, q, frame, got_frame, &zero_pkt);
+-        }
+         q->reinit_flag = 0;
++        ret = qsv_decode_header(avctx, q, pkt, pix_fmt, &param);
++        if (ret < 0) {
++            av_log(avctx, AV_LOG_ERROR, "Error decoding header\n");
++            goto reinit_fail;
++        }
++        param.IOPattern = q->iopattern;
+ 
+         q->orig_pix_fmt = avctx->pix_fmt = pix_fmt = ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);
+ 
+         avctx->coded_width  = param.mfx.FrameInfo.Width;
+         avctx->coded_height = param.mfx.FrameInfo.Height;
+ 
++        ret = MFXVideoDECODE_QueryIOSurf(q->session, &param, &request);
++        if (ret < 0)
++            return ff_qsv_print_error(avctx, ret, "Error querying IO surface");
++
++        q->suggest_pool_size = request.NumFrameSuggested;
++
+         ret = qsv_decode_preinit(avctx, q, pix_fmt, &param);
+         if (ret < 0)
+             goto reinit_fail;
+@@ -782,6 +982,9 @@ static av_cold int qsv_decode_init(AVCod
+         goto fail;
+     }
+ 
++    if (!avctx->pkt_timebase.num)
++        av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
++
+     return 0;
+ fail:
+     qsv_decode_close(avctx);
+Index: jellyfin-ffmpeg/libavcodec/qsvenc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsvenc.c
++++ jellyfin-ffmpeg/libavcodec/qsvenc.c
+@@ -448,7 +448,7 @@ static int init_video_param_jpeg(AVCodec
+     q->param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
+     q->param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
+     q->param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
+-    q->param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
++    q->param.mfx.FrameInfo.Shift          = desc->comp[0].shift > 0;
+ 
+     q->param.mfx.FrameInfo.Width  = FFALIGN(avctx->width, 16);
+     q->param.mfx.FrameInfo.Height = FFALIGN(avctx->height, 16);
+@@ -510,7 +510,7 @@ static int init_video_param(AVCodecConte
+         }
+     }
+ 
+-    if (q->low_power) {
++    if (q->low_power == 1) {
+ #if QSV_HAVE_VDENC
+         q->param.mfx.LowPower = MFX_CODINGOPTION_ON;
+ #else
+@@ -519,7 +519,9 @@ static int init_video_param(AVCodecConte
+         q->low_power = 0;
+         q->param.mfx.LowPower = MFX_CODINGOPTION_OFF;
+ #endif
+-    } else
++    } else if (q->low_power == -1)
++        q->param.mfx.LowPower = MFX_CODINGOPTION_UNKNOWN;
++    else
+         q->param.mfx.LowPower = MFX_CODINGOPTION_OFF;
+ 
+     q->param.mfx.CodecProfile       = q->profile;
+@@ -527,7 +529,7 @@ static int init_video_param(AVCodecConte
+     q->param.mfx.GopPicSize         = FFMAX(0, avctx->gop_size);
+     q->param.mfx.GopRefDist         = FFMAX(-1, avctx->max_b_frames) + 1;
+     q->param.mfx.GopOptFlag         = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ?
+-                                      MFX_GOP_CLOSED : 0;
++                                      MFX_GOP_CLOSED : MFX_GOP_STRICT;
+     q->param.mfx.IdrInterval        = q->idr_interval;
+     q->param.mfx.NumSlice           = avctx->slices;
+     q->param.mfx.NumRefFrame        = FFMAX(0, avctx->refs);
+@@ -550,7 +552,7 @@ static int init_video_param(AVCodecConte
+                                             !desc->log2_chroma_w + !desc->log2_chroma_h;
+     q->param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
+     q->param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
+-    q->param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
++    q->param.mfx.FrameInfo.Shift          = desc->comp[0].shift > 0;
+ 
+     // If the minor version is greater than or equal to 19,
+     // then can use the same alignment settings as H.264 for HEVC
+@@ -646,7 +648,7 @@ static int init_video_param(AVCodecConte
+     case MFX_RATECONTROL_LA_ICQ:
+         q->extco2.LookAheadDepth = q->look_ahead_depth;
+     case MFX_RATECONTROL_ICQ:
+-        q->param.mfx.ICQQuality  = avctx->global_quality;
++        q->param.mfx.ICQQuality  = av_clip(avctx->global_quality, 1, 51);
+         break;
+ #endif
+ #endif
+@@ -804,6 +806,24 @@ FF_ENABLE_DEPRECATION_WARNINGS
+     }
+ #endif
+ 
++    q->extvsi.VideoFullRange = (avctx->color_range == AVCOL_RANGE_JPEG);
++    q->extvsi.ColourDescriptionPresent = 0;
++
++    if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
++        avctx->color_trc != AVCOL_TRC_UNSPECIFIED ||
++        avctx->colorspace != AVCOL_SPC_UNSPECIFIED) {
++        q->extvsi.ColourDescriptionPresent = 1;
++        q->extvsi.ColourPrimaries = avctx->color_primaries;
++        q->extvsi.TransferCharacteristics = avctx->color_trc;
++        q->extvsi.MatrixCoefficients = avctx->colorspace;
++    }
++
++    if (q->extvsi.VideoFullRange || q->extvsi.ColourDescriptionPresent) {
++        q->extvsi.Header.BufferId = MFX_EXTBUFF_VIDEO_SIGNAL_INFO;
++        q->extvsi.Header.BufferSz = sizeof(q->extvsi);
++        q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extvsi;
++    }
++
+     if (!check_enc_param(avctx,q)) {
+         av_log(avctx, AV_LOG_ERROR,
+                "some encoding parameters are not supported by the QSV "
+@@ -1250,6 +1270,8 @@ static void clear_unused_frames(QSVEncCo
+     while (cur) {
+         if (cur->used && !cur->surface.Data.Locked) {
+             free_encoder_ctrl_payloads(&cur->enc_ctrl);
++            //do not reuse enc_ctrl from previous frame
++            memset(&cur->enc_ctrl, 0, sizeof(cur->enc_ctrl));
+             if (cur->frame->format == AV_PIX_FMT_QSV) {
+                 av_frame_unref(cur->frame);
+             }
+Index: jellyfin-ffmpeg/libavcodec/qsvenc.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsvenc.h
++++ jellyfin-ffmpeg/libavcodec/qsvenc.h
+@@ -96,7 +96,7 @@
+ { "adaptive_b",     "Adaptive B-frame placement",             OFFSET(qsv.adaptive_b),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+ { "b_strategy",     "Strategy to choose between I/P/B-frames", OFFSET(qsv.b_strategy),    AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+ { "forced_idr",     "Forcing I frames as IDR frames",         OFFSET(qsv.forced_idr),     AV_OPT_TYPE_BOOL,{ .i64 = 0  },  0,          1, VE },                         \
+-{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = 0}, 0, 1, VE},\
++{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE},\
+ 
+ extern const AVCodecHWConfigInternal *const ff_qsv_enc_hw_configs[];
+ 
+@@ -139,7 +139,9 @@ typedef struct QSVEncContext {
+     mfxFrameSurface1       **opaque_surfaces;
+     AVBufferRef             *opaque_alloc_buf;
+ 
+-    mfxExtBuffer  *extparam_internal[2 + QSV_HAVE_CO2 + QSV_HAVE_CO3 + (QSV_HAVE_MF * 2)];
++    mfxExtVideoSignalInfo extvsi;
++
++    mfxExtBuffer  *extparam_internal[3 + QSV_HAVE_CO2 + QSV_HAVE_CO3 + (QSV_HAVE_MF * 2)];
+     int         nb_extparam_internal;
+ 
+     mfxExtBuffer **extparam;
+Index: jellyfin-ffmpeg/libavfilter/qsvvpp.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.c
++++ jellyfin-ffmpeg/libavfilter/qsvvpp.c
+@@ -488,9 +488,6 @@ static QSVFrame *query_frame(QSVVPPConte
+         if (!out_frame->frame)
+             return NULL;
+ 
+-        out_frame->frame->width  = outlink->w;
+-        out_frame->frame->height = outlink->h;
+-
+         ret = map_frame_to_surface(out_frame->frame,
+                                   &out_frame->surface_internal);
+         if (ret < 0)
+@@ -499,6 +496,8 @@ static QSVFrame *query_frame(QSVVPPConte
+         out_frame->surface = &out_frame->surface_internal;
+     }
+ 
++    out_frame->frame->width  = outlink->w;
++    out_frame->frame->height = outlink->h;
+     out_frame->surface->Info = s->vpp_param.vpp.Out;
+ 
+     return out_frame;
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_qsv.c
++++ jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c
+@@ -275,7 +275,7 @@ static mfxStatus frame_get_hdl(mfxHDL pt
+     return MFX_ERR_NONE;
+ }
+ 
+-static int init_out_session(AVFilterContext *ctx)
++static int init_out_session(AVFilterContext *ctx, int in_width, int in_height)
+ {
+ 
+     QSVScaleContext                   *s = ctx->priv;
+@@ -392,8 +392,11 @@ static int init_out_session(AVFilterCont
+                                          sizeof(*s->mem_ids_in));
+         if (!s->mem_ids_in)
+             return AVERROR(ENOMEM);
+-        for (i = 0; i < in_frames_hwctx->nb_surfaces; i++)
++        for (i = 0; i < in_frames_hwctx->nb_surfaces; i++) {
+             s->mem_ids_in[i] = in_frames_hwctx->surfaces[i].Data.MemId;
++            in_frames_hwctx->surfaces[i].Info.CropW = in_width;
++            in_frames_hwctx->surfaces[i].Info.CropH = in_height;
++        }
+         s->nb_mem_ids_in = in_frames_hwctx->nb_surfaces;
+ 
+         s->mem_ids_out = av_mallocz_array(out_frames_hwctx->nb_surfaces,
+@@ -465,7 +468,7 @@ static int init_scale_session(AVFilterCo
+     if (ret < 0)
+         return ret;
+ 
+-    ret = init_out_session(ctx);
++    ret = init_out_session(ctx, in_width, in_height);
+     if (ret < 0)
+         return ret;
+ 
+Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+@@ -404,7 +404,7 @@ static int qsv_init_surface(AVHWFramesCo
+ 
+     surf->Info.BitDepthLuma   = desc->comp[0].depth;
+     surf->Info.BitDepthChroma = desc->comp[0].depth;
+-    surf->Info.Shift          = desc->comp[0].depth > 8;
++    surf->Info.Shift          = desc->comp[0].shift > 0;
+ 
+     if (desc->log2_chroma_w && desc->log2_chroma_h)
+         surf->Info.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
diff --git a/debian/patches/series b/debian/patches/series
index 7552128249c..791665e7b4f 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -16,3 +16,4 @@
 0016-add-fixes-for-the-broken-vaapi-tonemap.patch
 0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch
 0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch
+0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch

From cf26dfc77aa44ff706837a6a966e74a7a2c79eda Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:45:55 +0800
Subject: [PATCH 29/41] add miscellaneous fixes for NV from upstream

---
 ...ellaneous-fixes-for-NV-from-upstream.patch | 79 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 80 insertions(+)
 create mode 100644 debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch

diff --git a/debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch b/debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch
new file mode 100644
index 00000000000..116e456aab3
--- /dev/null
+++ b/debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch
@@ -0,0 +1,79 @@
+Index: jellyfin-ffmpeg/libavcodec/cuviddec.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/cuviddec.c
++++ jellyfin-ffmpeg/libavcodec/cuviddec.c
+@@ -336,7 +336,8 @@ static int CUDAAPI cuvid_handle_picture_
+ 
+     av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
+ 
+-    ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
++    if(picparams->intra_pic_flag)
++        ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
+ 
+     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
+     if (ctx->internal_error < 0)
+@@ -593,6 +594,8 @@ static int cuvid_output_frame(AVCodecCon
+         }
+ 
+         frame->key_frame = ctx->key_frame[parsed_frame.dispinfo.picture_index];
++        ctx->key_frame[parsed_frame.dispinfo.picture_index] = 0;
++
+         frame->width = avctx->width;
+         frame->height = avctx->height;
+         if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
+Index: jellyfin-ffmpeg/libavcodec/nvenc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/nvenc.c
++++ jellyfin-ffmpeg/libavcodec/nvenc.c
+@@ -210,8 +210,14 @@ static void nvenc_map_preset(NvencContex
+ 
+ static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
+ {
+-#if NVENCAPI_CHECK_VERSION(11, 1)
++#if NVENCAPI_CHECK_VERSION(11, 2)
+     const char *minver = "(unknown)";
++#elif NVENCAPI_CHECK_VERSION(11, 1)
++# if defined(_WIN32) || defined(__CYGWIN__)
++    const char *minver = "471.41";
++# else
++    const char *minver = "470.57.02";
++# endif
+ #elif NVENCAPI_CHECK_VERSION(11, 0)
+ # if defined(_WIN32) || defined(__CYGWIN__)
+     const char *minver = "456.71";
+@@ -1053,7 +1059,7 @@ static av_cold int nvenc_setup_h264_conf
+         || vui->videoFullRangeFlag != 0);
+ 
+     h264->sliceMode = 3;
+-    h264->sliceModeData = 1;
++    h264->sliceModeData = avctx->slices > 0 ? avctx->slices : 1;
+ 
+     h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
+     h264->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
+@@ -1150,7 +1156,7 @@ static av_cold int nvenc_setup_hevc_conf
+         || vui->videoFullRangeFlag != 0);
+ 
+     hevc->sliceMode = 3;
+-    hevc->sliceModeData = 1;
++    hevc->sliceModeData = avctx->slices > 0 ? avctx->slices : 1;
+ 
+     hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
+     hevc->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
+Index: jellyfin-ffmpeg/libavfilter/vf_yadif_cuda.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_yadif_cuda.c
++++ jellyfin-ffmpeg/libavfilter/vf_yadif_cuda.c
+@@ -297,10 +297,9 @@ static int config_output(AVFilterLink *l
+         goto exit;
+     }
+ 
+-    link->time_base.num = ctx->inputs[0]->time_base.num;
+-    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
+-    link->w             = ctx->inputs[0]->w;
+-    link->h             = ctx->inputs[0]->h;
++    link->time_base = av_mul_q(ctx->inputs[0]->time_base, (AVRational){1, 2});
++    link->w         = ctx->inputs[0]->w;
++    link->h         = ctx->inputs[0]->h;
+ 
+     if(y->mode & 1)
+         link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
diff --git a/debian/patches/series b/debian/patches/series
index 791665e7b4f..8c3e62c5e1e 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -17,3 +17,4 @@
 0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch
 0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch
 0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch
+0020-add-miscellaneous-fixes-for-NV-from-upstream.patch

From aeb8ea2dc001f6400c119d7920d5ee616f5edd94 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Wed, 10 Nov 2021 21:45:24 +0800
Subject: [PATCH 30/41] add qsv dec support for yuv444 8/10bit

---
 ...d-qsv-dec-support-for-yuv444-8-10bit.patch | 369 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 370 insertions(+)
 create mode 100644 debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch

diff --git a/debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch b/debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch
new file mode 100644
index 00000000000..4978f80d5e9
--- /dev/null
+++ b/debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch
@@ -0,0 +1,369 @@
+Index: jellyfin-ffmpeg/libavcodec/qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsv.c
++++ jellyfin-ffmpeg/libavcodec/qsv.c
+@@ -201,10 +201,14 @@ enum AVPixelFormat ff_qsv_map_fourcc(uin
+     case MFX_FOURCC_NV12: return AV_PIX_FMT_NV12;
+     case MFX_FOURCC_P010: return AV_PIX_FMT_P010;
+     case MFX_FOURCC_P8:   return AV_PIX_FMT_PAL8;
+-#if CONFIG_VAAPI
++#if CONFIG_VAAPI || CONFIG_D3D11VA
+     case MFX_FOURCC_YUY2: return AV_PIX_FMT_YUYV422;
++#if QSV_VERSION_ATLEAST(1, 17)
++    case MFX_FOURCC_AYUV: return AV_PIX_FMT_0YUV;
++#endif
+ #if QSV_VERSION_ATLEAST(1, 27)
+     case MFX_FOURCC_Y210: return AV_PIX_FMT_Y210;
++    case MFX_FOURCC_Y410: return AV_PIX_FMT_Y410;
+ #endif
+ #endif
+     }
+@@ -223,16 +227,26 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat
+     case AV_PIX_FMT_P010:
+         *fourcc = MFX_FOURCC_P010;
+         return AV_PIX_FMT_P010;
+-#if CONFIG_VAAPI
++#if CONFIG_VAAPI || CONFIG_D3D11VA
+     case AV_PIX_FMT_YUV422P:
+     case AV_PIX_FMT_YUYV422:
+         *fourcc = MFX_FOURCC_YUY2;
+         return AV_PIX_FMT_YUYV422;
++#if QSV_VERSION_ATLEAST(1, 17)
++    case AV_PIX_FMT_0YUV:
++    case AV_PIX_FMT_YUV444P:
++        *fourcc = MFX_FOURCC_AYUV;
++        return AV_PIX_FMT_0YUV;
++#endif
+ #if QSV_VERSION_ATLEAST(1, 27)
+     case AV_PIX_FMT_YUV422P10:
+     case AV_PIX_FMT_Y210:
+         *fourcc = MFX_FOURCC_Y210;
+         return AV_PIX_FMT_Y210;
++    case AV_PIX_FMT_Y410:
++    case AV_PIX_FMT_YUV444P10:
++        *fourcc = MFX_FOURCC_Y410;
++        return AV_PIX_FMT_Y410;
+ #endif
+ #endif
+     default:
+Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c
+@@ -85,6 +85,10 @@ static const struct {
+ } supported_formats[] = {
+     { DXGI_FORMAT_NV12,           AV_PIX_FMT_NV12 },
+     { DXGI_FORMAT_P010,           AV_PIX_FMT_P010 },
++    { DXGI_FORMAT_AYUV,           AV_PIX_FMT_0YUV },
++    { DXGI_FORMAT_YUY2,           AV_PIX_FMT_YUYV422 },
++    { DXGI_FORMAT_Y210,           AV_PIX_FMT_Y210 },
++    { DXGI_FORMAT_Y410,           AV_PIX_FMT_Y410 },
+     { DXGI_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA },
+     // Special opaque formats. The pix_fmt is merely a place holder, as the
+     // opaque format cannot be accessed directly.
+Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+@@ -100,12 +100,18 @@ static const struct {
+     { AV_PIX_FMT_BGRA, MFX_FOURCC_RGB4 },
+     { AV_PIX_FMT_P010, MFX_FOURCC_P010 },
+     { AV_PIX_FMT_PAL8, MFX_FOURCC_P8   },
+-#if CONFIG_VAAPI
++#if CONFIG_VAAPI || CONFIG_D3D11VA
+     { AV_PIX_FMT_YUYV422,
+                        MFX_FOURCC_YUY2 },
++#if QSV_VERSION_ATLEAST(1, 17)
++    { AV_PIX_FMT_0YUV,
++                       MFX_FOURCC_AYUV },
++#endif
+ #if QSV_VERSION_ATLEAST(1, 27)
+     { AV_PIX_FMT_Y210,
+                        MFX_FOURCC_Y210 },
++    { AV_PIX_FMT_Y410,
++                       MFX_FOURCC_Y410 },
+ #endif
+ #endif
+ };
+@@ -919,7 +925,7 @@ static int map_frame_to_surface(const AV
+         surface->Data.R = frame->data[0] + 2;
+         surface->Data.A = frame->data[0] + 3;
+         break;
+-#if CONFIG_VAAPI
++#if CONFIG_VAAPI || CONFIG_D3D11VA
+     case AV_PIX_FMT_YUYV422:
+         surface->Data.Y = frame->data[0];
+         surface->Data.U = frame->data[0] + 1;
+@@ -931,6 +937,15 @@ static int map_frame_to_surface(const AV
+         surface->Data.U16 = (mfxU16 *)frame->data[0] + 1;
+         surface->Data.V16 = (mfxU16 *)frame->data[0] + 3;
+         break;
++    case AV_PIX_FMT_0YUV:
++        surface->Data.V = frame->data[0];
++        surface->Data.U = frame->data[0] + 1;
++        surface->Data.Y = frame->data[0] + 2;
++        surface->Data.A = frame->data[0] + 3;
++        break;
++    case AV_PIX_FMT_Y410:
++        surface->Data.U = frame->data[0];
++        break;
+ #endif
+     default:
+         return MFX_ERR_UNSUPPORTED;
+Index: jellyfin-ffmpeg/libavutil/pixdesc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/pixdesc.c
++++ jellyfin-ffmpeg/libavutil/pixdesc.c
+@@ -228,6 +228,41 @@ static const AVPixFmtDescriptor av_pix_f
+         },
+         .flags = AV_PIX_FMT_FLAG_BE,
+     },
++    [AV_PIX_FMT_0YUV] = {
++        .name = "0yuv",
++        .nb_components = 3,
++        .log2_chroma_w = 0,
++        .log2_chroma_h = 0,
++        .comp = {
++            { 0, 4, 1, 0, 8 },        /* Y */
++            { 0, 4, 2, 0, 8 },        /* U */
++            { 0, 4, 3, 0, 8 },        /* V */
++        },
++    },
++    [AV_PIX_FMT_Y410LE] = {
++        .name = "y410le",
++        .nb_components = 3,
++        .log2_chroma_w = 0,
++        .log2_chroma_h = 0,
++        .comp = {
++            { 0, 32, 10, 0, 10 },        /* Y */
++            { 0, 32,  0, 0, 10 },        /* U */
++            { 0, 32, 20, 0, 10 },        /* V */
++        },
++        .flags = AV_PIX_FMT_FLAG_ALPHA | AV_PIX_FMT_FLAG_BITSTREAM,
++    },
++    [AV_PIX_FMT_Y410BE] = {
++        .name = "y410be",
++        .nb_components = 3,
++        .log2_chroma_w = 0,
++        .log2_chroma_h = 0,
++        .comp = {
++            { 0, 32, 10, 0, 10 },        /* Y */
++            { 0, 32,  0, 0, 10 },        /* U */
++            { 0, 32, 20, 0, 10 },        /* V */
++        },
++        .flags = AV_PIX_FMT_FLAG_ALPHA | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_BE,
++    },
+     [AV_PIX_FMT_RGB24] = {
+         .name = "rgb24",
+         .nb_components = 3,
+Index: jellyfin-ffmpeg/libavutil/pixfmt.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/pixfmt.h
++++ jellyfin-ffmpeg/libavutil/pixfmt.h
+@@ -358,6 +358,10 @@ enum AVPixelFormat {
+     AV_PIX_FMT_Y210BE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
+     AV_PIX_FMT_Y210LE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
+ 
++    AV_PIX_FMT_0YUV,      ///< packed YUV 4:4:4, 32bpp,  X  Y Cb Cr, X=unused/undefined
++    AV_PIX_FMT_Y410LE,    ///< packed YUV 4:4:4, 32bpp, Cr  Y Cb  A, little-endian
++    AV_PIX_FMT_Y410BE,    ///< packed YUV 4:4:4, 32bpp, Cr  Y Cb  A, big-endian
++
+     AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined
+     AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined
+     AV_PIX_FMT_NB         ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
+@@ -449,6 +453,7 @@ enum AVPixelFormat {
+ #define AV_PIX_FMT_P016       AV_PIX_FMT_NE(P016BE,  P016LE)
+ 
+ #define AV_PIX_FMT_Y210       AV_PIX_FMT_NE(Y210BE,  Y210LE)
++#define AV_PIX_FMT_Y410       AV_PIX_FMT_NE(Y410BE,  Y410LE)
+ #define AV_PIX_FMT_X2RGB10    AV_PIX_FMT_NE(X2RGB10BE, X2RGB10LE)
+ 
+ /**
+Index: jellyfin-ffmpeg/libswscale/input.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libswscale/input.c
++++ jellyfin-ffmpeg/libswscale/input.c
+@@ -573,6 +573,25 @@ static void y210le_Y_c(uint8_t *dst, con
+         AV_WN16(dst + i * 2, AV_RL16(src + i * 4) >> 6);
+ }
+ 
++static void XyuvToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
++                      uint32_t *unused)
++{
++    int i;
++    for (i = 0; i < width; i++)
++        dst[i] = src[4 * i + 2];
++}
++
++static void XyuvToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
++                       const uint8_t *src2, int width, uint32_t *unused)
++{
++    int i;
++    for (i = 0; i < width; i++) {
++        dstV[i] = src1[4 * i];
++        dstU[i] = src1[4 * i + 1];
++    }
++    av_assert1(src1 == src2);
++}
++
+ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width,
+                        uint32_t *unused)
+ {
+@@ -1257,6 +1276,9 @@ av_cold void ff_sws_init_input_funcs(Sws
+     case AV_PIX_FMT_Y210LE:
+         c->chrToYV12 = y210le_UV_c;
+         break;
++    case AV_PIX_FMT_0YUV:
++        c->lumToYV12 = XyuvToY_c;
++        break;
+     }
+     if (c->chrSrcHSubSample) {
+         switch (srcFormat) {
+@@ -1708,6 +1730,9 @@ av_cold void ff_sws_init_input_funcs(Sws
+     case AV_PIX_FMT_Y210LE:
+         c->lumToYV12 = y210le_Y_c;
+         break;
++    case AV_PIX_FMT_0YUV:
++        c->lumToYV12 = XyuvToY_c;
++        break;
+     case AV_PIX_FMT_X2RGB10LE:
+         c->lumToYV12 =rgb30leToY_c;
+         break;
+Index: jellyfin-ffmpeg/libswscale/output.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libswscale/output.c
++++ jellyfin-ffmpeg/libswscale/output.c
+@@ -2492,6 +2492,53 @@ yuv2ya8_X_c(SwsContext *c, const int16_t
+ }
+ 
+ static void
++yuv2Xyuv_X_c(SwsContext *c, const int16_t *lumFilter,
++                 const int16_t **lumSrc, int lumFilterSize,
++                 const int16_t *chrFilter, const int16_t **chrUSrc,
++                 const int16_t **chrVSrc, int chrFilterSize,
++                 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
++{
++    int hasAlpha = !!alpSrc;
++    int i;
++
++    for (i = 0; i < dstW; i++) {
++        int j;
++        int A = 1 << 18;
++        int Y = 1 << 18;
++        int U = 1 << 18;
++        int V = 1 << 18;
++
++        for (j = 0; j < lumFilterSize; j++) {
++            Y += lumSrc[j][i]  * lumFilter[j];
++        }
++        for (j = 0; j < chrFilterSize; j++) {
++            U += chrUSrc[j][i] * chrFilter[j];
++            V += chrVSrc[j][i] * chrFilter[j];
++        }
++        if (hasAlpha)
++            for (j = 0; j < lumFilterSize; j++)
++                A += alpSrc[j][i] * lumFilter[j];
++        A >>= 19;
++        Y >>= 19;
++        U >>= 19;
++        V >>= 19;
++        A = hasAlpha ? A : 255;
++
++        if ((A | Y | U | V) & 0x100) {
++            A = av_clip_uint8(A);
++            Y = av_clip_uint8(Y);
++            U = av_clip_uint8(U);
++            V = av_clip_uint8(V);
++        }
++
++        dest[4*i]     = V;
++        dest[4*i + 1] = U;
++        dest[4*i + 2] = Y;
++        dest[4*i + 3] = A;
++    }
++}
++
++static void
+ yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter,
+                  const int16_t **_lumSrc, int lumFilterSize,
+                  const int16_t *chrFilter, const int16_t **_chrUSrc,
+@@ -3033,6 +3080,9 @@ av_cold void ff_sws_init_output_funcs(Sw
+         *yuv2packed2 = yuv2ya16be_2_c;
+         *yuv2packedX = yuv2ya16be_X_c;
+         break;
++    case AV_PIX_FMT_0YUV:
++        *yuv2packedX = yuv2Xyuv_X_c;
++        break;
+     case AV_PIX_FMT_AYUV64LE:
+         *yuv2packedX = yuv2ayuv64le_X_c;
+         break;
+Index: jellyfin-ffmpeg/libswscale/swscale_unscaled.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libswscale/swscale_unscaled.c
++++ jellyfin-ffmpeg/libswscale/swscale_unscaled.c
+@@ -403,6 +403,41 @@ static int yuyvToYuv422Wrapper(SwsContex
+     return srcSliceH;
+ }
+ 
++static void yuv444pTo0yuv(const uint8_t *src[], int srcStride[],
++                          uint8_t *dst, int dstStride, int srcSliceH, int width)
++{
++    int x, h, i;
++    for (h = 0; h < srcSliceH; h++) {
++        uint8_t *dest = dst + dstStride * h;
++
++        for (x = 0; x < width; x++) {
++            *dest++ = src[2][x];
++            *dest++ = src[1][x];
++            *dest++ = src[0][x];
++            *dest++ = 0xFF;
++        }
++
++        for (i = 0; i < 3; i++)
++            src[i] += srcStride[i];
++    }
++}
++
++
++static int yuv444pTo0yuvWrapper(SwsContext *c, const uint8_t *src[],
++                                int srcStride[], int srcSliceY, int srcSliceH,
++                                uint8_t *dstParam[], int dstStride[])
++{
++    uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY;
++
++    const uint8_t *source[] = { src[0], src[1], src[2] };
++    int stride[] = { srcStride[0], srcStride[1], srcStride[2] };
++
++    yuv444pTo0yuv(source, stride, dst + srcSliceY * dstStride[0], dstStride[0],
++                  srcSliceH, c->srcW);
++
++    return srcSliceH;
++}
++
+ static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t *src[],
+                                int srcStride[], int srcSliceY, int srcSliceH,
+                                uint8_t *dstParam[], int dstStride[])
+@@ -2170,6 +2205,11 @@ void ff_get_unscaled_swscale(SwsContext
+             c->swscale = yuv422pToUyvyWrapper;
+     }
+ 
++    if (srcFormat == AV_PIX_FMT_YUV444P) {
++        if (dstFormat == AV_PIX_FMT_0YUV)
++            c->swscale = yuv444pTo0yuvWrapper;
++    }
++
+     /* uint Y to float Y */
+     if (srcFormat == AV_PIX_FMT_GRAY8 && dstFormat == AV_PIX_FMT_GRAYF32){
+         c->swscale = uint_y_to_float_y_wrapper;
+Index: jellyfin-ffmpeg/libswscale/utils.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libswscale/utils.c
++++ jellyfin-ffmpeg/libswscale/utils.c
+@@ -271,6 +271,7 @@ static const FormatEntry format_entries[
+     [AV_PIX_FMT_NV24]        = { 1, 1 },
+     [AV_PIX_FMT_NV42]        = { 1, 1 },
+     [AV_PIX_FMT_Y210LE]      = { 1, 0 },
++    [AV_PIX_FMT_0YUV]        = { 1, 1 },
+     [AV_PIX_FMT_X2RGB10LE]   = { 1, 1 },
+ };
+ 
diff --git a/debian/patches/series b/debian/patches/series
index 8c3e62c5e1e..6806f4027a4 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -18,3 +18,4 @@
 0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch
 0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch
 0020-add-miscellaneous-fixes-for-NV-from-upstream.patch
+0021-add-qsv-dec-support-for-yuv444-8-10bit.patch

From e19e1e95b267d9b2aaf8f8d404490787f728821a Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:47:00 +0800
Subject: [PATCH 31/41] add fixes for warning on overlay filters

---
 ...dd-fixes-for-warning-on-overlay-filters.patch | 16 ++++++++++++++++
 debian/patches/series                            |  1 +
 2 files changed, 17 insertions(+)
 create mode 100644 debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch

diff --git a/debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch b/debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch
new file mode 100644
index 00000000000..4133e07a5e9
--- /dev/null
+++ b/debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch
@@ -0,0 +1,16 @@
+Index: jellyfin-ffmpeg/libavfilter/buffersrc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/buffersrc.c
++++ jellyfin-ffmpeg/libavfilter/buffersrc.c
+@@ -69,9 +69,9 @@ typedef struct BufferSourceContext {
+ 
+ #define CHECK_VIDEO_PARAM_CHANGE(s, c, width, height, format, pts)\
+     if (c->w != width || c->h != height || c->pix_fmt != format) {\
+-        av_log(s, AV_LOG_INFO, "filter context - w: %d h: %d fmt: %d, incoming frame - w: %d h: %d fmt: %d pts_time: %s\n",\
++        av_log(s, AV_LOG_DEBUG, "filter context - w: %d h: %d fmt: %d, incoming frame - w: %d h: %d fmt: %d pts_time: %s\n",\
+                c->w, c->h, c->pix_fmt, width, height, format, av_ts2timestr(pts, &s->outputs[0]->time_base));\
+-        av_log(s, AV_LOG_WARNING, "Changing video frame properties on the fly is not supported by all filters.\n");\
++        av_log(s, AV_LOG_DEBUG, "Changing video frame properties on the fly is not supported by all filters.\n");\
+     }
+ 
+ #define CHECK_AUDIO_PARAM_CHANGE(s, c, srate, ch_layout, ch_count, format, pts)\
diff --git a/debian/patches/series b/debian/patches/series
index 6806f4027a4..971e851876f 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -19,3 +19,4 @@
 0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch
 0020-add-miscellaneous-fixes-for-NV-from-upstream.patch
 0021-add-qsv-dec-support-for-yuv444-8-10bit.patch
+0022-add-fixes-for-warning-on-overlay-filters.patch

From 3f558dd3b2fc5c2ee7ae6f038dcc0205a326c365 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:47:19 +0800
Subject: [PATCH 32/41] add fixes for HEVC 10bit HDR decoding in bsf

---
 ...-for-HEVC-10-bit-HDR-decoding-in-bsf.patch | 28 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 29 insertions(+)
 create mode 100644 debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch

diff --git a/debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch b/debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch
new file mode 100644
index 00000000000..3e8f4a2b2bf
--- /dev/null
+++ b/debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch
@@ -0,0 +1,28 @@
+Index: jellyfin-ffmpeg/libavcodec/hevc_mp4toannexb_bsf.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/hevc_mp4toannexb_bsf.c
++++ jellyfin-ffmpeg/libavcodec/hevc_mp4toannexb_bsf.c
+@@ -121,7 +121,7 @@ static int hevc_mp4toannexb_filter(AVBSF
+     HEVCBSFContext *s = ctx->priv_data;
+     AVPacket *in;
+     GetByteContext gb;
+-
++    int has_sps = 0, has_pps = 0;
+     int got_irap = 0;
+     int i, ret = 0;
+ 
+@@ -155,10 +155,13 @@ static int hevc_mp4toannexb_filter(AVBSF
+         }
+ 
+         nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f;
++        has_sps = (has_sps || nalu_type == HEVC_NAL_SPS);
++        has_pps = (has_pps || nalu_type == HEVC_NAL_PPS);
+ 
+         /* prepend extradata to IRAP frames */
+         is_irap       = nalu_type >= 16 && nalu_type <= 23;
+-        add_extradata = is_irap && !got_irap;
++        /* ignore the extradata if IRAP frame has sps and pps */
++        add_extradata = is_irap && !got_irap && !(has_sps && has_pps);
+         extra_size    = add_extradata * ctx->par_out->extradata_size;
+         got_irap     |= is_irap;
+ 
diff --git a/debian/patches/series b/debian/patches/series
index 971e851876f..fc58f4b6987 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -20,3 +20,4 @@
 0020-add-miscellaneous-fixes-for-NV-from-upstream.patch
 0021-add-qsv-dec-support-for-yuv444-8-10bit.patch
 0022-add-fixes-for-warning-on-overlay-filters.patch
+0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch

From b6a47d815974b0e8714b201170765a7fb5096d34 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:47:37 +0800
Subject: [PATCH 33/41] add sub2video option to subtitles filter

---
 ...sub2video-option-to-subtitles-filter.patch | 111 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 112 insertions(+)
 create mode 100644 debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch

diff --git a/debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch b/debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch
new file mode 100644
index 00000000000..87dfc2e4490
--- /dev/null
+++ b/debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch
@@ -0,0 +1,111 @@
+Index: jellyfin-ffmpeg/libavfilter/vf_subtitles.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_subtitles.c
++++ jellyfin-ffmpeg/libavfilter/vf_subtitles.c
+@@ -55,10 +55,13 @@ typedef struct AssContext {
+     char *force_style;
+     int stream_index;
+     int alpha;
++    int sub2video;
++    int last_image;
+     uint8_t rgba_map[4];
+     int     pix_step[4];       ///< steps per pixel for each plane of the main output
+     int original_w, original_h;
+     int shaping;
++    int64_t max_pts, max_ts_ms;
+     FFDrawContext draw;
+ } AssContext;
+ 
+@@ -70,7 +73,8 @@ typedef struct AssContext {
+     {"f",              "set the filename of file to read",                         OFFSET(filename),   AV_OPT_TYPE_STRING,     {.str = NULL},  0, 0, FLAGS }, \
+     {"original_size",  "set the size of the original video (used to scale fonts)", OFFSET(original_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL},  0, 0, FLAGS }, \
+     {"fontsdir",       "set the directory containing the fonts to read",           OFFSET(fontsdir),   AV_OPT_TYPE_STRING,     {.str = NULL},  0, 0, FLAGS }, \
+-    {"alpha",          "enable processing of alpha channel",                       OFFSET(alpha),      AV_OPT_TYPE_BOOL,       {.i64 = 0   },         0,        1, FLAGS }, \
++    {"alpha",          "enable processing of alpha channel",                       OFFSET(alpha),      AV_OPT_TYPE_BOOL,       {.i64 = 0   },  0, 1, FLAGS }, \
++    {"sub2video",      "enable textual subtitle to video mode",                    OFFSET(sub2video),  AV_OPT_TYPE_BOOL,       {.i64 = 0   },  0, 1, FLAGS }, \
+ 
+ /* libass supports a log level ranging from 0 to 7 */
+ static const int ass_libavfilter_log_level_map[] = {
+@@ -151,6 +155,8 @@ static int config_input(AVFilterLink *in
+     if (ass->shaping != -1)
+         ass_set_shaper(ass->renderer, ass->shaping);
+ 
++    ass->max_pts = ass->max_ts_ms / (av_q2d(inlink->time_base) * 1000);
++
+     return 0;
+ }
+ 
+@@ -181,18 +187,41 @@ static int filter_frame(AVFilterLink *in
+     AVFilterLink *outlink = ctx->outputs[0];
+     AssContext *ass = ctx->priv;
+     int detect_change = 0;
+-    double time_ms = picref->pts * av_q2d(inlink->time_base) * 1000;
++    int64_t time_ms = picref->pts * av_q2d(inlink->time_base) * 1000;
+     ASS_Image *image = ass_render_frame(ass->renderer, ass->track,
+                                         time_ms, &detect_change);
+ 
++    if (ass->sub2video) {
++        if (!image && !ass->last_image && picref->pts <= ass->max_pts && outlink->current_pts != AV_NOPTS_VALUE) {
++            av_log(ctx, AV_LOG_DEBUG, "sub2video skip pts:%"PRId64"\n", picref->pts);
++            av_frame_free(&picref);
++            return 0;
++        }
++        ass->last_image = image != NULL;
++    }
++
+     if (detect_change)
+-        av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%f\n", time_ms);
++        av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%"PRId64"\n", time_ms);
+ 
+     overlay_ass_image(ass, picref, image);
+ 
+     return ff_filter_frame(outlink, picref);
+ }
+ 
++static void get_max_timestamp(AVFilterContext *ctx)
++{
++    AssContext *ass = ctx->priv;
++    int i;
++
++    ass->max_ts_ms = 0;
++    if (ass->track) {
++        for (i = 0; i < ass->track->n_events; i++) {
++            ASS_Event *event = ass->track->events + i;
++            ass->max_ts_ms = FFMAX(event->Start + event->Duration, ass->max_ts_ms);
++        }
++    }
++}
++
+ static const AVFilterPad ass_inputs[] = {
+     {
+         .name             = "default",
+@@ -243,6 +272,9 @@ static av_cold int init_ass(AVFilterCont
+                ass->filename);
+         return AVERROR(EINVAL);
+     }
++
++    get_max_timestamp(ctx);
++
+     return 0;
+ }
+ 
+@@ -264,8 +296,8 @@ AVFilter ff_vf_ass = {
+ static const AVOption subtitles_options[] = {
+     COMMON_OPTIONS
+     {"charenc",      "set input character encoding", OFFSET(charenc),      AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+-    {"stream_index", "set stream index",             OFFSET(stream_index), AV_OPT_TYPE_INT,    { .i64 = -1 }, -1,       INT_MAX,  FLAGS},
+-    {"si",           "set stream index",             OFFSET(stream_index), AV_OPT_TYPE_INT,    { .i64 = -1 }, -1,       INT_MAX,  FLAGS},
++    {"stream_index", "set stream index",             OFFSET(stream_index), AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, INT_MAX, FLAGS},
++    {"si",           "set stream index",             OFFSET(stream_index), AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, INT_MAX, FLAGS},
+     {"force_style",  "force subtitle style",         OFFSET(force_style),  AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+     {NULL},
+ };
+@@ -473,6 +505,8 @@ static av_cold int init_subtitles(AVFilt
+         avsubtitle_free(&sub);
+     }
+ 
++    get_max_timestamp(ctx);
++
+ end:
+     av_dict_free(&codec_opts);
+     avcodec_free_context(&dec_ctx);
diff --git a/debian/patches/series b/debian/patches/series
index fc58f4b6987..757669c5560 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -21,3 +21,4 @@
 0021-add-qsv-dec-support-for-yuv444-8-10bit.patch
 0022-add-fixes-for-warning-on-overlay-filters.patch
 0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch
+0024-add-sub2video-option-to-subtitles-filter.patch

From e966e53a735af39d894b6a7033a961412569d1c2 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sun, 7 Nov 2021 15:47:52 +0800
Subject: [PATCH 34/41] add alphasrc source video filter

---
 ...025-add-alphasrc-source-video-filter.patch | 194 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 195 insertions(+)
 create mode 100644 debian/patches/0025-add-alphasrc-source-video-filter.patch

diff --git a/debian/patches/0025-add-alphasrc-source-video-filter.patch b/debian/patches/0025-add-alphasrc-source-video-filter.patch
new file mode 100644
index 00000000000..c03869c09b7
--- /dev/null
+++ b/debian/patches/0025-add-alphasrc-source-video-filter.patch
@@ -0,0 +1,194 @@
+Index: jellyfin-ffmpeg/libavfilter/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/Makefile
++++ jellyfin-ffmpeg/libavfilter/Makefile
+@@ -495,6 +495,7 @@ OBJS-$(CONFIG_ZSCALE_FILTER)
+ 
+ OBJS-$(CONFIG_ALLRGB_FILTER)                 += vsrc_testsrc.o
+ OBJS-$(CONFIG_ALLYUV_FILTER)                 += vsrc_testsrc.o
++OBJS-$(CONFIG_ALPHASRC_FILTER)               += vsrc_alphasrc.o
+ OBJS-$(CONFIG_CELLAUTO_FILTER)               += vsrc_cellauto.o
+ OBJS-$(CONFIG_COLOR_FILTER)                  += vsrc_testsrc.o
+ OBJS-$(CONFIG_COREIMAGESRC_FILTER)           += vf_coreimage.o
+Index: jellyfin-ffmpeg/libavfilter/allfilters.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c
++++ jellyfin-ffmpeg/libavfilter/allfilters.c
+@@ -472,6 +472,7 @@ extern AVFilter ff_vf_zscale;
+ 
+ extern AVFilter ff_vsrc_allrgb;
+ extern AVFilter ff_vsrc_allyuv;
++extern AVFilter ff_vsrc_alphasrc;
+ extern AVFilter ff_vsrc_cellauto;
+ extern AVFilter ff_vsrc_color;
+ extern AVFilter ff_vsrc_coreimagesrc;
+Index: jellyfin-ffmpeg/libavfilter/vsrc_alphasrc.c
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/vsrc_alphasrc.c
+@@ -0,0 +1,165 @@
++/*
++ * Copyright (c) 2021 NyanMisaka
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * Provide a blank video input with alpha channel.
++ */
++
++#include "libavutil/avstring.h"
++#include "libavutil/imgutils.h"
++#include "libavutil/opt.h"
++#include "libavutil/parseutils.h"
++#include "filters.h"
++#include "avfilter.h"
++#include "internal.h"
++#include "formats.h"
++#include "video.h"
++
++typedef struct AlphaSrc {
++    const AVClass *class;
++    AVRational time_base, frame_rate;
++    int64_t pts;
++    int64_t duration, start;
++    int out_w, out_h;
++    int rgb, planar;
++} AlphaSrc;
++
++static av_cold int alphasrc_init(AVFilterContext *ctx)
++{
++    AlphaSrc *s = ctx->priv;
++
++    s->time_base = av_inv_q(s->frame_rate);
++    s->pts = 0;
++
++    if (s->start > 0)
++        s->pts += av_rescale_q(s->start, AV_TIME_BASE_Q, s->time_base);
++
++    return 0;
++}
++
++static int alphasrc_query_formats(AVFilterContext *ctx)
++{
++    AVFilterLink *outlink = ctx->outputs[0];
++    AVFilterFormats *formats;
++    int ret;
++
++    if ((ret = ff_formats_pixdesc_filter(&formats, AV_PIX_FMT_FLAG_ALPHA, 0)) ||
++        (ret = ff_formats_ref(formats, &outlink->incfg.formats)))
++        return ret;
++
++    return 0;
++}
++
++static int alphasrc_config_output(AVFilterLink *outlink)
++{
++    AVFilterContext *ctx = outlink->src;
++    AlphaSrc *s = ctx->priv;
++    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format);
++
++    s->rgb = desc->flags & AV_PIX_FMT_FLAG_RGB;
++    s->planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
++
++    if (!s->rgb && !s->planar) {
++        av_log(ctx, AV_LOG_ERROR, "Unsupported output format.\n");
++        return AVERROR(EINVAL);
++    }
++
++    if (s->out_w <= 0 || s->out_h <= 0) {
++        av_log(ctx, AV_LOG_ERROR, "Invalid output video size.\n");
++        return AVERROR(EINVAL);
++    }
++
++    outlink->w = s->out_w;
++    outlink->h = s->out_h;
++    outlink->frame_rate = s->frame_rate;
++    outlink->time_base  = s->time_base;
++    outlink->sample_aspect_ratio = (AVRational){1, 1};
++
++    return 0;
++}
++
++static int alphasrc_request_frame(AVFilterLink *outlink)
++{
++    AVFilterContext *ctx = outlink->src;
++    AlphaSrc *s = ctx->priv;
++    AVFrame *out;
++    int i;
++
++    if (s->duration > 0 &&
++        av_rescale_q(s->pts, s->time_base, AV_TIME_BASE_Q) >= s->duration) {
++        ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
++        return 0;
++    }
++
++    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
++    if (!out)
++        return AVERROR(ENOMEM);
++
++    for (i = 0; i < AV_NUM_DATA_POINTERS; i++) {
++        if (out->buf[i]) {
++            if (s->rgb)
++                memset(out->buf[i]->data, 0, out->buf[i]->size);
++            else if (s->planar)
++                memset(out->buf[i]->data, (i == 1 || i == 2) ? 128 : 0, out->buf[i]->size);
++        }
++    }
++
++    out->pts = s->pts++;
++
++    return ff_filter_frame(outlink, out);
++}
++
++#define OFFSET(x) offsetof(AlphaSrc, x)
++#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
++static const AVOption alphasrc_options[] = {
++    {"duration", "set the duration of the video",        OFFSET(duration),   AV_OPT_TYPE_DURATION,   {.i64 = 0   }, 0, INT64_MAX, FLAGS},
++    {"d",        "set the duration of the video",        OFFSET(duration),   AV_OPT_TYPE_DURATION,   {.i64 = 0   }, 0, INT64_MAX, FLAGS},
++    {"start",    "set the start timestamp of the video", OFFSET(start),      AV_OPT_TYPE_DURATION,   {.i64 = 0   }, 0, INT64_MAX, FLAGS},
++    {"rate",     "set the frame rate of the video",      OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "15"}, 1, INT_MAX, FLAGS},
++    {"r",        "set the frame rate of the video",      OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "15"}, 1, INT_MAX, FLAGS},
++    {"size",     "set the size of the video",            OFFSET(out_w),      AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS},
++    {"s",        "set the size of the video",            OFFSET(out_w),      AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS},
++    {NULL}
++};
++
++AVFILTER_DEFINE_CLASS(alphasrc);
++
++static const AVFilterPad alphasrc_outputs[] = {
++    {
++        .name          = "default",
++        .type          = AVMEDIA_TYPE_VIDEO,
++        .config_props  = alphasrc_config_output,
++        .request_frame = alphasrc_request_frame,
++    },
++    { NULL }
++};
++
++AVFilter ff_vsrc_alphasrc = {
++    .name          = "alphasrc",
++    .description   = NULL_IF_CONFIG_SMALL("Provide a blank video input with alpha channel."),
++    .priv_size     = sizeof(AlphaSrc),
++    .priv_class    = &alphasrc_class,
++    .query_formats = alphasrc_query_formats,
++    .init          = alphasrc_init,
++    .uninit        = NULL,
++    .inputs        = NULL,
++    .outputs       = alphasrc_outputs,
++};
diff --git a/debian/patches/series b/debian/patches/series
index 757669c5560..f3d006d95b6 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -22,3 +22,4 @@
 0022-add-fixes-for-warning-on-overlay-filters.patch
 0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch
 0024-add-sub2video-option-to-subtitles-filter.patch
+0025-add-alphasrc-source-video-filter.patch

From 31f04e80b6f31c80a57bfd8c8c08547a0865586f Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Mon, 8 Nov 2021 01:26:40 +0800
Subject: [PATCH 35/41] add fixes for armhf build with gcc 11

---
 ...-add-fixes-for-armhf-build-with-gcc-11.patch | 17 +++++++++++++++++
 debian/patches/series                           |  1 +
 2 files changed, 18 insertions(+)
 create mode 100644 debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch

diff --git a/debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch b/debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch
new file mode 100644
index 00000000000..f823e7a19d8
--- /dev/null
+++ b/debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch
@@ -0,0 +1,17 @@
+Index: jellyfin-ffmpeg/configure
+===================================================================
+--- jellyfin-ffmpeg.orig/configure
++++ jellyfin-ffmpeg/configure
+@@ -4988,9 +4988,11 @@ elif enabled arm; then
+         fi
+     }
+ 
+-    [ "$cpu" = generic ] && cpu=$(probe_arm_arch)
+ 
+     case $cpu in
++        generic)
++            subarch=$(probe_arm_arch | sed 's/[^a-z0-9]//g')
++        ;;
+         armv*)
+             cpuflags="-march=$cpu"
+             subarch=$(echo $cpu | sed 's/[^a-z0-9]//g')
diff --git a/debian/patches/series b/debian/patches/series
index f3d006d95b6..5ca70657cc8 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -23,3 +23,4 @@
 0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch
 0024-add-sub2video-option-to-subtitles-filter.patch
 0025-add-alphasrc-source-video-filter.patch
+0026-add-fixes-for-armhf-build-with-gcc-11.patch

From 772d8499434d28a072fc635b4f689060866d7eaf Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Mon, 8 Nov 2021 16:36:36 +0800
Subject: [PATCH 36/41] add fixes to improve the performance of vaapi encode

---
 ...rove-the-performance-of-vaapi-encode.patch | 235 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 236 insertions(+)
 create mode 100644 debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch

diff --git a/debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch b/debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch
new file mode 100644
index 00000000000..53f86235586
--- /dev/null
+++ b/debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch
@@ -0,0 +1,235 @@
+Index: jellyfin-ffmpeg/libavcodec/vaapi_encode.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/vaapi_encode.c
++++ jellyfin-ffmpeg/libavcodec/vaapi_encode.c
+@@ -134,7 +134,8 @@ static int vaapi_encode_make_misc_param_
+ }
+ 
+ static int vaapi_encode_wait(AVCodecContext *avctx,
+-                             VAAPIEncodePicture *pic)
++                             VAAPIEncodePicture *pic,
++                             uint8_t wait)
+ {
+     VAAPIEncodeContext *ctx = avctx->priv_data;
+     VAStatus vas;
+@@ -150,11 +151,43 @@ static int vaapi_encode_wait(AVCodecCont
+            "(input surface %#x).\n", pic->display_order,
+            pic->encode_order, pic->input_surface);
+ 
+-    vas = vaSyncSurface(ctx->hwctx->display, pic->input_surface);
+-    if (vas != VA_STATUS_SUCCESS) {
+-        av_log(avctx, AV_LOG_ERROR, "Failed to sync to picture completion: "
+-               "%d (%s).\n", vas, vaErrorStr(vas));
++#if VA_CHECK_VERSION(1, 9, 0)
++    // Try vaSyncBuffer.
++    vas = vaSyncBuffer(ctx->hwctx->display,
++                       pic->output_buffer,
++                       wait ? VA_TIMEOUT_INFINITE : 0);
++    if (vas == VA_STATUS_ERROR_TIMEDOUT) {
++        return AVERROR(EAGAIN);
++    } else if (vas != VA_STATUS_SUCCESS && vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to sync to output buffer completion: "
++                "%d (%s).\n", vas, vaErrorStr(vas));
+         return AVERROR(EIO);
++    } else if (vas == VA_STATUS_ERROR_UNIMPLEMENTED)
++    // If vaSyncBuffer is not implemented, try old version API.
++#endif
++    {
++        if (!wait) {
++            VASurfaceStatus surface_status;
++            vas = vaQuerySurfaceStatus(ctx->hwctx->display,
++                                    pic->input_surface,
++                                    &surface_status);
++            if (vas == VA_STATUS_SUCCESS &&
++                surface_status != VASurfaceReady &&
++                surface_status != VASurfaceSkipped) {
++                return AVERROR(EAGAIN);
++            } else if (vas != VA_STATUS_SUCCESS) {
++                av_log(avctx, AV_LOG_ERROR, "Failed to query surface status: "
++                    "%d (%s).\n", vas, vaErrorStr(vas));
++                return AVERROR(EIO);
++            }
++        } else {
++            vas = vaSyncSurface(ctx->hwctx->display, pic->input_surface);
++            if (vas != VA_STATUS_SUCCESS) {
++                av_log(avctx, AV_LOG_ERROR, "Failed to sync to picture completion: "
++                    "%d (%s).\n", vas, vaErrorStr(vas));
++                return AVERROR(EIO);
++            }
++        }
+     }
+ 
+     // Input is definitely finished with now.
+@@ -633,7 +666,7 @@ static int vaapi_encode_output(AVCodecCo
+     uint8_t *ptr;
+     int err;
+ 
+-    err = vaapi_encode_wait(avctx, pic);
++    err = vaapi_encode_wait(avctx, pic, 1);
+     if (err < 0)
+         return err;
+ 
+@@ -695,7 +728,7 @@ fail:
+ static int vaapi_encode_discard(AVCodecContext *avctx,
+                                 VAAPIEncodePicture *pic)
+ {
+-    vaapi_encode_wait(avctx, pic);
++    vaapi_encode_wait(avctx, pic, 1);
+ 
+     if (pic->output_buffer_ref) {
+         av_log(avctx, AV_LOG_DEBUG, "Discard output for pic "
+@@ -951,8 +984,10 @@ static int vaapi_encode_pick_next(AVCode
+     if (!pic && ctx->end_of_stream) {
+         --b_counter;
+         pic = ctx->pic_end;
+-        if (pic->encode_issued)
++        if (pic->encode_complete)
+             return AVERROR_EOF;
++        else if (pic->encode_issued)
++            return AVERROR(EAGAIN);
+     }
+ 
+     if (!pic) {
+@@ -1123,7 +1158,8 @@ static int vaapi_encode_send_frame(AVCod
+         if (ctx->input_order == ctx->decode_delay)
+             ctx->dts_pts_diff = pic->pts - ctx->first_pts;
+         if (ctx->output_delay > 0)
+-            ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
++            ctx->ts_ring[ctx->input_order %
++                        (3 * ctx->output_delay + ctx->async_depth)] = pic->pts;
+ 
+         pic->display_order = ctx->input_order;
+         ++ctx->input_order;
+@@ -1177,20 +1213,40 @@ int ff_vaapi_encode_receive_packet(AVCod
+             return AVERROR(EAGAIN);
+     }
+ 
+-    pic = NULL;
+-    err = vaapi_encode_pick_next(avctx, &pic);
+-    if (err < 0)
+-        return err;
+-    av_assert0(pic);
++    while (av_fifo_size(ctx->encode_fifo) <
++            MAX_ASYNC_DEPTH * sizeof(VAAPIEncodePicture *)) {
++        pic = NULL;
++        err = vaapi_encode_pick_next(avctx, &pic);
++        if (err < 0)
++            break;
++        av_assert0(pic);
+ 
+-    pic->encode_order = ctx->encode_order++;
++        pic->encode_order = ctx->encode_order +
++                            (av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture *));
+ 
+-    err = vaapi_encode_issue(avctx, pic);
+-    if (err < 0) {
+-        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
++        err = vaapi_encode_issue(avctx, pic);
++        if (err < 0) {
++            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
++            return err;
++        }
++
++        av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL);
++    }
++
++    if (!av_fifo_size(ctx->encode_fifo))
+         return err;
++
++    if (av_fifo_size(ctx->encode_fifo) < ctx->async_depth * sizeof(VAAPIEncodePicture *) &&
++        !ctx->end_of_stream) {
++        av_fifo_generic_peek(ctx->encode_fifo, &pic, sizeof(pic), NULL);
++        err = vaapi_encode_wait(avctx, pic, 0);
++        if (err < 0)
++            return err;
+     }
+ 
++    av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
++    ctx->encode_order = pic->encode_order + 1;
++
+     err = vaapi_encode_output(avctx, pic, pkt);
+     if (err < 0) {
+         av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err);
+@@ -1206,7 +1262,7 @@ int ff_vaapi_encode_receive_packet(AVCod
+             pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
+     } else {
+         pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
+-                                (3 * ctx->output_delay)];
++                                (3 * ctx->output_delay + ctx->async_depth)];
+     }
+     av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n",
+            pkt->pts, pkt->dts);
+@@ -2520,6 +2576,11 @@ av_cold int ff_vaapi_encode_init(AVCodec
+         }
+     }
+ 
++    ctx->encode_fifo = av_fifo_alloc(MAX_ASYNC_DEPTH *
++                                     sizeof(VAAPIEncodePicture *));
++    if (!ctx->encode_fifo)
++        return AVERROR(ENOMEM);
++
+     return 0;
+ 
+ fail:
+@@ -2552,6 +2613,7 @@ av_cold int ff_vaapi_encode_close(AVCode
+ 
+     av_freep(&ctx->codec_sequence_params);
+     av_freep(&ctx->codec_picture_params);
++    av_fifo_freep(&ctx->encode_fifo);
+ 
+     av_buffer_unref(&ctx->recon_frames_ref);
+     av_buffer_unref(&ctx->input_frames_ref);
+Index: jellyfin-ffmpeg/libavcodec/vaapi_encode.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/vaapi_encode.h
++++ jellyfin-ffmpeg/libavcodec/vaapi_encode.h
+@@ -29,6 +29,7 @@
+ 
+ #include "libavutil/hwcontext.h"
+ #include "libavutil/hwcontext_vaapi.h"
++#include "libavutil/fifo.h"
+ 
+ #include "avcodec.h"
+ #include "hwconfig.h"
+@@ -47,6 +48,7 @@ enum {
+     MAX_TILE_ROWS          = 22,
+     // A.4.1: table A.6 allows at most 20 tile columns for any level.
+     MAX_TILE_COLS          = 20,
++    MAX_ASYNC_DEPTH        = 64,
+ };
+ 
+ extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
+@@ -297,7 +299,8 @@ typedef struct VAAPIEncodeContext {
+     // Timestamp handling.
+     int64_t         first_pts;
+     int64_t         dts_pts_diff;
+-    int64_t         ts_ring[MAX_REORDER_DELAY * 3];
++    int64_t         ts_ring[MAX_REORDER_DELAY * 3 +
++                            MAX_ASYNC_DEPTH];
+ 
+     // Slice structure.
+     int slice_block_rows;
+@@ -345,6 +348,10 @@ typedef struct VAAPIEncodeContext {
+     int             roi_warned;
+ 
+     AVFrame         *frame;
++
++    AVFifoBuffer *encode_fifo;
++
++    int async_depth;
+ } VAAPIEncodeContext;
+ 
+ enum {
+@@ -455,7 +462,11 @@ int ff_vaapi_encode_close(AVCodecContext
+     { "b_depth", \
+       "Maximum B-frame reference depth", \
+       OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
+-      { .i64 = 1 }, 1, INT_MAX, FLAGS }
++      { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
++    { "async_depth", "Maximum processing parallelism. " \
++      "Increase this to improve single channel performance", \
++      OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
++      { .i64 = 4 }, 0, MAX_ASYNC_DEPTH, FLAGS }
+ 
+ #define VAAPI_ENCODE_RC_MODE(name, desc) \
+     { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
diff --git a/debian/patches/series b/debian/patches/series
index 5ca70657cc8..ffda73fbb33 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -24,3 +24,4 @@
 0024-add-sub2video-option-to-subtitles-filter.patch
 0025-add-alphasrc-source-video-filter.patch
 0026-add-fixes-for-armhf-build-with-gcc-11.patch
+0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch

From a8d4a7992ba531e631db89af15f3a40626e10507 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Mon, 15 Nov 2021 02:44:08 +0800
Subject: [PATCH 37/41] add fixes for alignment issue when upload to qsv

---
 ...r-alignment-issue-when-upload-to-qsv.patch | 96 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 97 insertions(+)
 create mode 100644 debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch

diff --git a/debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch b/debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch
new file mode 100644
index 00000000000..fc38abd89bc
--- /dev/null
+++ b/debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch
@@ -0,0 +1,96 @@
+Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c
++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c
+@@ -47,6 +47,7 @@
+ #include "pixfmt.h"
+ #include "pixdesc.h"
+ #include "time.h"
++#include "imgutils.h"
+ 
+ #define QSV_VERSION_ATLEAST(MAJOR, MINOR)   \
+     (MFX_VERSION_MAJOR > (MAJOR) ||         \
+@@ -90,6 +91,7 @@ typedef struct QSVFramesContext {
+ 
+     mfxExtOpaqueSurfaceAlloc opaque_alloc;
+     mfxExtBuffer *ext_buffers[1];
++    AVFrame realigned_tmp_frame;
+ } QSVFramesContext;
+ 
+ static const struct {
+@@ -226,6 +228,7 @@ static void qsv_frames_uninit(AVHWFrames
+     av_freep(&s->surface_ptrs);
+     av_freep(&s->surfaces_internal);
+     av_freep(&s->handle_pairs_internal);
++    av_frame_unref(&s->realigned_tmp_frame);
+     av_buffer_unref(&s->child_frames_ref);
+ }
+ 
+@@ -1036,7 +1039,7 @@ static int qsv_transfer_data_to(AVHWFram
+     mfxStatus err;
+     int ret = 0;
+     /* make a copy if the input is not padded as libmfx requires */
+-    AVFrame tmp_frame;
++    AVFrame *tmp_frame = &s->realigned_tmp_frame;
+     const AVFrame *src_frame;
+     int realigned = 0;
+ 
+@@ -1067,22 +1070,37 @@ static int qsv_transfer_data_to(AVHWFram
+ 
+     if (src->height & 15 || src->linesize[0] & 15) {
+         realigned = 1;
+-        memset(&tmp_frame, 0, sizeof(tmp_frame));
+-        tmp_frame.format         = src->format;
+-        tmp_frame.width          = FFALIGN(src->width, 16);
+-        tmp_frame.height         = FFALIGN(src->height, 16);
+-        ret = av_frame_get_buffer(&tmp_frame, 0);
+-        if (ret < 0)
+-            return ret;
+-
+-        ret = av_frame_copy(&tmp_frame, src);
++        if (tmp_frame->format != src->format ||
++            tmp_frame->width  != FFALIGN(src->width, 16) ||
++            tmp_frame->height != FFALIGN(src->height, 16)) {
++            ptrdiff_t linesize[4] = {tmp_frame->linesize[0],
++                                     tmp_frame->linesize[1],
++                                     tmp_frame->linesize[2],
++                                     tmp_frame->linesize[3]};
++            av_frame_unref(tmp_frame);
++
++            tmp_frame->format = src->format;
++            tmp_frame->width  = FFALIGN(src->width, 16);
++            tmp_frame->height = FFALIGN(src->height, 16);
++            ret = av_frame_get_buffer(tmp_frame, 0);
++            if (ret < 0)
++                return ret;
++
++            ret = av_image_fill_black(tmp_frame->data, linesize, tmp_frame->format,
++                                    0, tmp_frame->width, tmp_frame->height);
++            if (ret < 0) {
++                av_frame_unref(tmp_frame);
++                return ret;
++            }
++        }
++        ret = av_frame_copy(tmp_frame, src);
+         if (ret < 0) {
+-            av_frame_unref(&tmp_frame);
++            av_frame_unref(tmp_frame);
+             return ret;
+         }
+     }
+ 
+-    src_frame = realigned ? &tmp_frame : src;
++    src_frame = realigned ? tmp_frame : src;
+ 
+     if (!s->session_upload) {
+         if (s->child_frames_ref)
+@@ -1114,9 +1132,6 @@ static int qsv_transfer_data_to(AVHWFram
+         return AVERROR_UNKNOWN;
+     }
+ 
+-    if (realigned)
+-        av_frame_unref(&tmp_frame);
+-
+     return 0;
+ }
+ 
diff --git a/debian/patches/series b/debian/patches/series
index ffda73fbb33..8a9ecb2e8e2 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -25,3 +25,4 @@
 0025-add-alphasrc-source-video-filter.patch
 0026-add-fixes-for-armhf-build-with-gcc-11.patch
 0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch
+0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch

From cc8a2ff0852f3fff24c65449b032492b2d4a85f3 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Mon, 29 Nov 2021 01:44:53 +0800
Subject: [PATCH 38/41] add fixes for qsv overlay to allow external pgssubs

---
 ...sv-overlay-to-allow-external-pgssubs.patch | 92 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 93 insertions(+)
 create mode 100644 debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch

diff --git a/debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch b/debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch
new file mode 100644
index 00000000000..b5d682f29e4
--- /dev/null
+++ b/debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch
@@ -0,0 +1,92 @@
+Index: jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_qsv.c
++++ jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c
+@@ -230,40 +230,48 @@ static int config_overlay_input(AVFilter
+ 
+ static int process_frame(FFFrameSync *fs)
+ {
+-    AVFilterContext  *ctx = fs->parent;
+-    QSVOverlayContext  *s = fs->opaque;
+-    AVFrame        *frame = NULL;
+-    int               ret = 0, i;
+-
+-    for (i = 0; i < ctx->nb_inputs; i++) {
+-        ret = ff_framesync_get_frame(fs, i, &frame, 0);
+-        if (ret == 0)
+-            ret = ff_qsvvpp_filter_frame(s->qsv, ctx->inputs[i], frame);
+-        if (ret < 0 && ret != AVERROR(EAGAIN))
+-            break;
+-    }
++    AVFilterContext *ctx = fs->parent;
++    QSVOverlayContext *s = fs->opaque;
++    AVFilterLink    *in0 = ctx->inputs[0];
++    AVFilterLink    *in1 = ctx->inputs[1];
++    AVFrame        *main = NULL;
++    AVFrame     *overlay = NULL;
++    int              ret = 0;
++
++    ret = ff_framesync_get_frame(fs, 0, &main, 0);
++    if (ret < 0)
++        return ret;
++    ret = ff_framesync_get_frame(fs, 1, &overlay, 0);
++    if (ret < 0)
++        return ret;
+ 
++    if (!main)
++        return AVERROR_BUG;
++
++    /* composite main frame */
++    ret = ff_qsvvpp_filter_frame(s->qsv, in0, main);
++    if (ret < 0 && ret != AVERROR(EAGAIN))
++        return ret;
++
++    /* composite overlay frame */
++    /* or overwrite main frame again if the overlay frame isn't ready yet */
++    ret = ff_qsvvpp_filter_frame(s->qsv, overlay ? in1 : in0, overlay ? overlay : main);
+     return ret;
+ }
+ 
+ static int init_framesync(AVFilterContext *ctx)
+ {
+-    QSVOverlayContext *s = ctx->priv;
+-    int ret, i;
++    QSVOverlayContext  *s = ctx->priv;
++    AVFilterLink *outlink = ctx->outputs[0];
++    int ret;
+ 
+-    s->fs.on_event = process_frame;
+-    s->fs.opaque   = s;
+-    ret = ff_framesync_init(&s->fs, ctx, ctx->nb_inputs);
++    ret = ff_framesync_init_dualinput(&s->fs, ctx);
+     if (ret < 0)
+         return ret;
+ 
+-    for (i = 0; i < ctx->nb_inputs; i++) {
+-        FFFrameSyncIn *in = &s->fs.in[i];
+-        in->before    = EXT_STOP;
+-        in->after     = EXT_INFINITY;
+-        in->sync      = i ? 1 : 2;
+-        in->time_base = ctx->inputs[i]->time_base;
+-    }
++    s->fs.time_base = outlink->time_base;
++    s->fs.on_event  = process_frame;
++    s->fs.opaque    = s;
+ 
+     return ff_framesync_configure(&s->fs);
+ }
+@@ -281,14 +289,6 @@ static int config_output(AVFilterLink *o
+         (in0->format != AV_PIX_FMT_QSV && in1->format == AV_PIX_FMT_QSV)) {
+         av_log(ctx, AV_LOG_ERROR, "Mixing hardware and software pixel formats is not supported.\n");
+         return AVERROR(EINVAL);
+-    } else if (in0->format == AV_PIX_FMT_QSV) {
+-        AVHWFramesContext *hw_frame0 = (AVHWFramesContext *)in0->hw_frames_ctx->data;
+-        AVHWFramesContext *hw_frame1 = (AVHWFramesContext *)in1->hw_frames_ctx->data;
+-
+-        if (hw_frame0->device_ctx != hw_frame1->device_ctx) {
+-            av_log(ctx, AV_LOG_ERROR, "Inputs with different underlying QSV devices are forbidden.\n");
+-            return AVERROR(EINVAL);
+-        }
+     }
+ 
+     outlink->w          = vpp->var_values[VAR_MW];
diff --git a/debian/patches/series b/debian/patches/series
index 8a9ecb2e8e2..90db6b2ec01 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -26,3 +26,4 @@
 0026-add-fixes-for-armhf-build-with-gcc-11.patch
 0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch
 0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch
+0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch

From 0ffdc6e719dcb7d9cb044f2ad6d82a28e43840f4 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sat, 27 Nov 2021 17:43:28 +0800
Subject: [PATCH 39/41] add fixes for a vaapi-qsv mapping error

---
 ...-fixes-for-a-vaapi-qsv-mapping-error.patch | 61 +++++++++++++++++++
 debian/patches/series                         |  1 +
 2 files changed, 62 insertions(+)
 create mode 100644 debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch

diff --git a/debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch b/debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch
new file mode 100644
index 00000000000..b24b23f118d
--- /dev/null
+++ b/debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch
@@ -0,0 +1,61 @@
+Index: jellyfin-ffmpeg/libavcodec/qsvenc_hevc.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsvenc_hevc.c
++++ jellyfin-ffmpeg/libavcodec/qsvenc_hevc.c
+@@ -260,9 +260,9 @@ static const AVClass class = {
+ static const AVCodecDefault qsv_enc_defaults[] = {
+     { "b",         "1M"    },
+     { "refs",      "0"     },
+-    // same as the x264 default
++    // same as the x265 default
+     { "g",         "248"   },
+-    { "bf",        "8"     },
++    { "bf",        "4"     },
+     { "qmin",      "-1"    },
+     { "qmax",      "-1"    },
+     { "trellis",   "-1"    },
+Index: jellyfin-ffmpeg/libavcodec/vaapi_decode.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/vaapi_decode.c
++++ jellyfin-ffmpeg/libavcodec/vaapi_decode.c
+@@ -572,22 +572,24 @@ static int vaapi_decode_make_config(AVCo
+         if (err < 0)
+             goto fail;
+ 
+-        frames->initial_pool_size = 1;
+-        // Add per-codec number of surfaces used for storing reference frames.
+-        switch (avctx->codec_id) {
+-        case AV_CODEC_ID_H264:
+-        case AV_CODEC_ID_HEVC:
+-            frames->initial_pool_size += 16;
+-            break;
+-        case AV_CODEC_ID_VP9:
+-        case AV_CODEC_ID_AV1:
+-            frames->initial_pool_size += 8;
+-            break;
+-        case AV_CODEC_ID_VP8:
+-            frames->initial_pool_size += 3;
+-            break;
+-        default:
+-            frames->initial_pool_size += 2;
++        if (!frames->initial_pool_size) {
++            frames->initial_pool_size = 1;
++            // Add per-codec number of surfaces used for storing reference frames.
++            switch (avctx->codec_id) {
++            case AV_CODEC_ID_H264:
++            case AV_CODEC_ID_HEVC:
++                frames->initial_pool_size += 16;
++                break;
++            case AV_CODEC_ID_VP9:
++            case AV_CODEC_ID_AV1:
++                frames->initial_pool_size += 8;
++                break;
++            case AV_CODEC_ID_VP8:
++                frames->initial_pool_size += 3;
++                break;
++            default:
++                frames->initial_pool_size += 2;
++            }
+         }
+     }
+ 
diff --git a/debian/patches/series b/debian/patches/series
index 90db6b2ec01..1c6e70b155f 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -27,3 +27,4 @@
 0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch
 0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch
 0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch
+0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch

From d9454b53cc75ac9405c18da5fb2fe7b6796cdb60 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Mon, 29 Nov 2021 01:36:37 +0800
Subject: [PATCH 40/41] add a vaapi overlay filter

---
 .../0031-add-a-vaapi-overlay-filter.patch     | 525 ++++++++++++++++++
 debian/patches/series                         |   1 +
 2 files changed, 526 insertions(+)
 create mode 100644 debian/patches/0031-add-a-vaapi-overlay-filter.patch

diff --git a/debian/patches/0031-add-a-vaapi-overlay-filter.patch b/debian/patches/0031-add-a-vaapi-overlay-filter.patch
new file mode 100644
index 00000000000..fd41b3f1737
--- /dev/null
+++ b/debian/patches/0031-add-a-vaapi-overlay-filter.patch
@@ -0,0 +1,525 @@
+Index: jellyfin-ffmpeg/configure
+===================================================================
+--- jellyfin-ffmpeg.orig/configure
++++ jellyfin-ffmpeg/configure
+@@ -3601,6 +3601,7 @@ openclsrc_filter_deps="opencl"
+ overlay_opencl_filter_deps="opencl"
+ overlay_qsv_filter_deps="libmfx"
+ overlay_qsv_filter_select="qsvvpp"
++overlay_vaapi_filter_deps="vaapi"
+ overlay_vulkan_filter_deps="vulkan libglslang"
+ owdenoise_filter_deps="gpl"
+ pad_opencl_filter_deps="opencl"
+@@ -3662,6 +3663,7 @@ tonemap_vaapi_filter_deps="vaapi VAProcF
+ tonemap_opencl_filter_deps="opencl const_nan"
+ transpose_opencl_filter_deps="opencl"
+ transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
++overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags"
+ unsharp_opencl_filter_deps="opencl"
+ uspp_filter_deps="gpl avcodec"
+ vaguedenoiser_filter_deps="gpl"
+@@ -6712,6 +6714,7 @@ if enabled vaapi; then
+     check_struct "va/va.h" "VADecPictureParameterBufferAV1" bit_depth_idx
+     check_type   "va/va.h va/va_vpp.h" "VAProcFilterParameterBufferHDRToneMapping"
+     check_struct "va/va.h va/va_vpp.h" "VAProcPipelineCaps" rotation_flags
++    check_struct "va/va.h va/va_vpp.h" "VAProcPipelineCaps" blend_flags
+     check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC"
+     check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
+     check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
+Index: jellyfin-ffmpeg/libavfilter/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/Makefile
++++ jellyfin-ffmpeg/libavfilter/Makefile
+@@ -353,6 +353,7 @@ OBJS-$(CONFIG_OVERLAY_CUDA_FILTER)
+ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
+                                                 opencl/overlay.o framesync.o
+ OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o framesync.o
++OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER)          += vf_overlay_vaapi.o framesync.o vaapi_vpp.o
+ OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER)         += vf_overlay_vulkan.o vulkan.o
+ OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
+ OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
+Index: jellyfin-ffmpeg/libavfilter/allfilters.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c
++++ jellyfin-ffmpeg/libavfilter/allfilters.c
+@@ -336,6 +336,7 @@ extern AVFilter ff_vf_oscilloscope;
+ extern AVFilter ff_vf_overlay;
+ extern AVFilter ff_vf_overlay_opencl;
+ extern AVFilter ff_vf_overlay_qsv;
++extern AVFilter ff_vf_overlay_vaapi;
+ extern AVFilter ff_vf_overlay_vulkan;
+ extern AVFilter ff_vf_overlay_cuda;
+ extern AVFilter ff_vf_owdenoise;
+Index: jellyfin-ffmpeg/libavfilter/vf_overlay_vaapi.c
+===================================================================
+--- /dev/null
++++ jellyfin-ffmpeg/libavfilter/vf_overlay_vaapi.c
+@@ -0,0 +1,468 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++#include <string.h>
++
++#include "libavutil/avassert.h"
++#include "libavutil/mem.h"
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++
++#include "avfilter.h"
++#include "framesync.h"
++#include "formats.h"
++#include "internal.h"
++#include "vaapi_vpp.h"
++
++typedef struct OverlayVAAPIContext {
++    VAAPIVPPContext  vpp_ctx; // must be the first field
++    FFFrameSync      fs;
++    int              global_alpha_flag;
++    int              premultiplied_alpha_flag;
++    int              pixel_alpha_enabled;
++    int              overlay_ox;
++    int              overlay_oy;
++    int              overlay_ow;
++    int              overlay_oh;
++    float            alpha;
++    int              opt_repeatlast;
++    int              opt_shortest;
++    int              opt_eof_action;
++} OverlayVAAPIContext;
++
++static int overlay_vaapi_query_formats(AVFilterContext *ctx)
++{
++    int ret;
++    enum {
++        MAIN    = 0,
++        OVERLAY = 1,
++    };
++
++    static const enum AVPixelFormat pix_fmts[] = {
++        AV_PIX_FMT_VAAPI,
++        AV_PIX_FMT_NONE
++    };
++
++    ret = ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->inputs[MAIN]->outcfg.formats);
++    if (ret < 0)
++        return ret;
++
++    ret = ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->inputs[OVERLAY]->outcfg.formats);
++    if (ret < 0)
++        return ret;
++
++    ret = ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->outputs[0]->incfg.formats);
++    if (ret < 0)
++        return ret;
++
++    return 0;
++}
++
++static int overlay_vaapi_build_filter_params(AVFilterContext *avctx)
++{
++    VAAPIVPPContext *vpp_ctx = avctx->priv;
++    OverlayVAAPIContext *ctx = avctx->priv;
++    VAProcPipelineCaps pipeline_caps;
++    VAStatus vas;
++
++    memset(&pipeline_caps, 0, sizeof(pipeline_caps));
++    vas = vaQueryVideoProcPipelineCaps(vpp_ctx->hwctx->display,
++                                       vpp_ctx->va_context,
++                                       NULL, 0,
++                                       &pipeline_caps);
++    if (vas != VA_STATUS_SUCCESS) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to query pipeline "
++               "caps: %d (%s).\n", vas, vaErrorStr(vas));
++        return AVERROR(EIO);
++    }
++
++    if (!pipeline_caps.blend_flags) {
++        av_log(avctx, AV_LOG_ERROR, "VAAPI driver doesn't support overlay\n");
++        return AVERROR(EINVAL);
++    }
++
++    ctx->global_alpha_flag = pipeline_caps.blend_flags & VA_BLEND_GLOBAL_ALPHA;
++    if (!ctx->global_alpha_flag) {
++        av_log(avctx, AV_LOG_ERROR, "VAAPI driver doesn't support global alpha blending\n");
++        return AVERROR(EINVAL);
++    }
++
++    ctx->premultiplied_alpha_flag = pipeline_caps.blend_flags & VA_BLEND_PREMULTIPLIED_ALPHA;
++    if (!ctx->premultiplied_alpha_flag) {
++        av_log(avctx, AV_LOG_WARNING, "VAAPI driver doesn't support premultiplied alpha blending, "
++               "alpha planar of the overlay frames will be ignored\n");
++    }
++
++    return 0;
++}
++
++
++static int overlay_vaapi_render_picture(AVFilterContext *avctx,
++                                        VAProcPipelineParameterBuffer *params,
++                                        VAProcPipelineParameterBuffer *subpic_params,
++                                        AVFrame *output_frame,
++                                        int passthrough)
++{
++    VAAPIVPPContext *ctx   = avctx->priv;
++    VASurfaceID output_surface;
++    VABufferID params_id;
++    VABufferID subpic_params_id;
++    VAStatus vas;
++    int err = 0;
++
++    output_surface = (VASurfaceID)(uintptr_t)output_frame->data[3];
++
++    vas = vaBeginPicture(ctx->hwctx->display,
++                         ctx->va_context, output_surface);
++    if (vas != VA_STATUS_SUCCESS) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to attach new picture: "
++               "%d (%s).\n", vas, vaErrorStr(vas));
++        err = AVERROR(EIO);
++        goto fail;
++    }
++
++    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
++                         VAProcPipelineParameterBufferType,
++                         sizeof(*params), 1, params, &params_id);
++    if (vas != VA_STATUS_SUCCESS) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create parameter buffer: "
++               "%d (%s).\n", vas, vaErrorStr(vas));
++        err = AVERROR(EIO);
++        goto fail_after_begin;
++    }
++    av_log(avctx, AV_LOG_DEBUG, "Pipeline parameter buffer is %#x.\n",
++           params_id);
++
++    if (!passthrough) {
++        vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
++                             VAProcPipelineParameterBufferType,
++                             sizeof(*subpic_params), 1, subpic_params, &subpic_params_id);
++        if (vas != VA_STATUS_SUCCESS) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to create parameter buffer: "
++                   "%d (%s).\n", vas, vaErrorStr(vas));
++            err = AVERROR(EIO);
++            goto fail_after_begin;
++        }
++        av_log(avctx, AV_LOG_DEBUG, "Pipeline subpic parameter buffer is %#x.\n",
++               subpic_params_id);
++    }
++
++    vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
++                          &params_id, 1);
++    if (vas != VA_STATUS_SUCCESS) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to render parameter buffer: "
++               "%d (%s).\n", vas, vaErrorStr(vas));
++        err = AVERROR(EIO);
++        goto fail_after_begin;
++    }
++
++    if (!passthrough) {
++        vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
++                              &subpic_params_id, 1);
++        if (vas != VA_STATUS_SUCCESS) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to render subpic parameter buffer: "
++                   "%d (%s).\n", vas, vaErrorStr(vas));
++            err = AVERROR(EIO);
++            goto fail_after_begin;
++        }
++    }
++
++    vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
++    if (vas != VA_STATUS_SUCCESS) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to start picture processing: "
++               "%d (%s).\n", vas, vaErrorStr(vas));
++        err = AVERROR(EIO);
++        goto fail_after_render;
++    }
++
++    if (CONFIG_VAAPI_1 || ctx->hwctx->driver_quirks &
++        AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS) {
++        vas = vaDestroyBuffer(ctx->hwctx->display, params_id);
++        if (vas != VA_STATUS_SUCCESS) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to free parameter buffer: "
++                   "%d (%s).\n", vas, vaErrorStr(vas));
++            // And ignore.
++        }
++    }
++
++    return 0;
++
++    // We want to make sure that if vaBeginPicture has been called, we also
++    // call vaRenderPicture and vaEndPicture.  These calls may well fail or
++    // do something else nasty, but once we're in this failure case there
++    // isn't much else we can do.
++fail_after_begin:
++    vaRenderPicture(ctx->hwctx->display, ctx->va_context, &params_id, 1);
++fail_after_render:
++    vaEndPicture(ctx->hwctx->display, ctx->va_context);
++fail:
++    return err;
++}
++
++static int overlay_vaapi_blend(FFFrameSync *fs)
++{
++    AVFilterContext    *avctx = fs->parent;
++    AVFilterLink     *outlink = avctx->outputs[0];
++    OverlayVAAPIContext *ctx  = avctx->priv;
++    VAAPIVPPContext *vpp_ctx  = avctx->priv;
++    AVFrame *input_main, *input_overlay;
++    AVFrame *output;
++    VAProcPipelineParameterBuffer params, subpic_params;
++    VABlendState blend_state;
++    VARectangle overlay_region, output_region;
++    int err, passthrough = 0;
++
++    err = ff_framesync_get_frame(fs, 0, &input_main, 0);
++    if (err < 0)
++        return err;
++    err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
++    if (err < 0)
++        return err;
++
++    if (!input_main)
++        return AVERROR_BUG;
++
++    if (!input_overlay)
++        passthrough = 1;
++
++    av_log(avctx, AV_LOG_DEBUG, "Filter main: %s, %ux%u (%"PRId64").\n",
++           av_get_pix_fmt_name(input_main->format),
++           input_main->width, input_main->height, input_main->pts);
++
++    if (input_overlay) {
++        av_log(avctx, AV_LOG_DEBUG, "Filter overlay: %s, %ux%u (%"PRId64").\n",
++               av_get_pix_fmt_name(input_overlay->format),
++               input_overlay->width, input_overlay->height, input_overlay->pts);
++    }
++
++    if (vpp_ctx->va_context == VA_INVALID_ID)
++        return AVERROR(EINVAL);
++
++    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
++    if (!output) {
++        err = AVERROR(ENOMEM);
++        goto fail;
++    }
++
++    err = av_frame_copy_props(output, input_main);
++    if (err < 0)
++        goto fail;
++
++    err = ff_vaapi_vpp_init_params(avctx, &params,
++                                   input_main, output);
++    if (err < 0)
++        goto fail;
++
++    output_region = (VARectangle) {
++        .x      = 0,
++        .y      = 0,
++        .width  = output->width,
++        .height = output->height,
++    };
++
++    params.filters     = &vpp_ctx->filter_buffers[0];
++    params.num_filters = vpp_ctx->nb_filter_buffers;
++
++    params.output_region = &output_region;
++    params.output_background_color = VAAPI_VPP_BACKGROUND_BLACK;
++
++    if (!passthrough) {
++        overlay_region = (VARectangle) {
++            .x      = ctx->overlay_ox,
++            .y      = ctx->overlay_oy,
++            .width  = ctx->overlay_ow ? ctx->overlay_ow : input_overlay->width,
++            .height = ctx->overlay_oh ? ctx->overlay_oh : input_overlay->height,
++        };
++
++        if (overlay_region.x + overlay_region.width > input_main->width ||
++            overlay_region.y + overlay_region.height > input_main->height) {
++            av_log(ctx, AV_LOG_WARNING,
++                   "The overlay image exceeds the scope of the main image, "
++                   "will crop the overlay image according based on the main image.\n");
++        }
++
++        memcpy(&subpic_params, &params, sizeof(subpic_params));
++
++        blend_state.flags = VA_BLEND_GLOBAL_ALPHA;
++        if (ctx->pixel_alpha_enabled)
++            blend_state.flags |= VA_BLEND_PREMULTIPLIED_ALPHA;
++
++        blend_state.global_alpha = ctx->alpha;
++        subpic_params.blend_state = &blend_state;
++
++        subpic_params.surface = (VASurfaceID)(uintptr_t)input_overlay->data[3];
++        subpic_params.output_region = &overlay_region;
++    }
++
++    err = overlay_vaapi_render_picture(avctx, &params, &subpic_params, output, passthrough);
++    if (err < 0)
++        goto fail;
++
++    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
++           av_get_pix_fmt_name(output->format),
++           output->width, output->height, output->pts);
++
++    return ff_filter_frame(outlink, output);
++
++fail:
++    av_frame_free(&output);
++    return err;
++}
++
++static int overlay_vaapi_init_framesync(AVFilterContext *avctx)
++{
++    OverlayVAAPIContext *ctx = avctx->priv;
++    AVFilterLink    *outlink = avctx->outputs[0];
++    int ret;
++
++    ret = ff_framesync_init_dualinput(&ctx->fs, avctx);
++    if (ret < 0)
++        return ret;
++
++    ctx->fs.opt_repeatlast = ctx->opt_repeatlast;
++    ctx->fs.opt_shortest   = ctx->opt_shortest;
++    ctx->fs.opt_eof_action = ctx->opt_eof_action;
++    ctx->fs.time_base      = outlink->time_base;
++    ctx->fs.on_event       = overlay_vaapi_blend;
++    ctx->fs.opaque         = ctx;
++
++    return ff_framesync_configure(&ctx->fs);
++}
++
++static int overlay_vaapi_config_output(AVFilterLink *outlink)
++{
++    AVFilterContext   *avctx = outlink->src;
++    OverlayVAAPIContext *ctx = avctx->priv;
++    VAAPIVPPContext *vpp_ctx = avctx->priv;
++    AVFilterLink    *inlink0 = avctx->inputs[0];
++    AVFilterLink    *inlink1 = avctx->inputs[1];
++    AVHWFramesContext *frames_ctx1 =
++        (AVHWFramesContext*)inlink1->hw_frames_ctx->data;
++    const AVPixFmtDescriptor *desc;
++    int err;
++
++    outlink->time_base     = inlink0->time_base;
++    vpp_ctx->output_width  = inlink0->w;
++    vpp_ctx->output_height = inlink0->h;
++
++    err = overlay_vaapi_init_framesync(avctx);
++    if (err < 0)
++        return err;
++
++    err = ff_vaapi_vpp_config_output(outlink);
++    if (err < 0)
++        return err;
++
++    desc = av_pix_fmt_desc_get(frames_ctx1->sw_format);
++    if (!desc)
++        return AVERROR(EINVAL);
++
++    ctx->pixel_alpha_enabled = (desc->flags & AV_PIX_FMT_FLAG_ALPHA)
++        && ctx->premultiplied_alpha_flag;
++
++    return 0;
++}
++
++static av_cold int overlay_vaapi_init(AVFilterContext *avctx)
++{
++    VAAPIVPPContext *vpp_ctx = avctx->priv;
++
++    ff_vaapi_vpp_ctx_init(avctx);
++    vpp_ctx->build_filter_params = overlay_vaapi_build_filter_params;
++    vpp_ctx->pipeline_uninit = ff_vaapi_vpp_pipeline_uninit;
++    vpp_ctx->output_format = AV_PIX_FMT_NONE;
++
++    return 0;
++}
++
++static int overlay_vaapi_activate(AVFilterContext *avctx)
++{
++    OverlayVAAPIContext *ctx = avctx->priv;
++
++    return ff_framesync_activate(&ctx->fs);
++}
++
++static av_cold void overlay_vaapi_uninit(AVFilterContext *avctx)
++{
++    OverlayVAAPIContext *ctx = avctx->priv;
++
++    ff_framesync_uninit(&ctx->fs);
++}
++
++#define OFFSET(x) offsetof(OverlayVAAPIContext, x)
++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
++static const AVOption overlay_vaapi_options[] = {
++    { "x", "Overlay x position",
++      OFFSET(overlay_ox), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
++    { "y", "Overlay y position",
++      OFFSET(overlay_oy), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
++    { "w", "Overlay width",
++      OFFSET(overlay_ow), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
++    { "h", "Overlay height",
++      OFFSET(overlay_oh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
++    { "alpha", "Overlay global alpha",
++      OFFSET(alpha), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.0, 1.0, .flags = FLAGS },
++    { "eof_action", "Action to take when encountering EOF from secondary input ",
++        OFFSET(opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT },
++        EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
++        { "repeat", "Repeat the previous frame.",   0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
++        { "endall", "End both streams.",            0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
++        { "pass",   "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS },   .flags = FLAGS, "eof_action" },
++    { "shortest", "force termination when the shortest input terminates", OFFSET(opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
++    { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
++    { NULL },
++};
++
++AVFILTER_DEFINE_CLASS(overlay_vaapi);
++
++static const AVFilterPad overlay_vaapi_inputs[] = {
++    {
++        .name             = "main",
++        .type             = AVMEDIA_TYPE_VIDEO,
++        .get_video_buffer = ff_default_get_video_buffer,
++        .config_props     = &ff_vaapi_vpp_config_input,
++    },
++    {
++        .name             = "overlay",
++        .type             = AVMEDIA_TYPE_VIDEO,
++        .get_video_buffer = ff_default_get_video_buffer,
++    },
++    { NULL }
++};
++
++static const AVFilterPad overlay_vaapi_outputs[] = {
++    {
++        .name          = "default",
++        .type          = AVMEDIA_TYPE_VIDEO,
++        .config_props  = &overlay_vaapi_config_output,
++    },
++    { NULL }
++};
++
++AVFilter ff_vf_overlay_vaapi = {
++    .name            = "overlay_vaapi",
++    .description     = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"),
++    .priv_size       = sizeof(OverlayVAAPIContext),
++    .priv_class      = &overlay_vaapi_class,
++    .init            = &overlay_vaapi_init,
++    .uninit          = &overlay_vaapi_uninit,
++    .query_formats   = &overlay_vaapi_query_formats,
++    .activate        = &overlay_vaapi_activate,
++    .inputs          = overlay_vaapi_inputs,
++    .outputs         = overlay_vaapi_outputs,
++    .flags_internal  = FF_FILTER_FLAG_HWFRAME_AWARE,
++};
diff --git a/debian/patches/series b/debian/patches/series
index 1c6e70b155f..ec652d1469b 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -28,3 +28,4 @@
 0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch
 0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch
 0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch
+0031-add-a-vaapi-overlay-filter.patch

From 205cbdd109d9a32d6d501f2d53b3986574b481a3 Mon Sep 17 00:00:00 2001
From: nyanmisaka <nst799610810@gmail.com>
Date: Sat, 18 Dec 2021 00:00:04 +0800
Subject: [PATCH 41/41] add async support for qsv vpp

---
 .../0032-add-async-support-for-qsv-vpp.patch  | 2784 +++++++++++++++++
 debian/patches/series                         |    1 +
 2 files changed, 2785 insertions(+)
 create mode 100644 debian/patches/0032-add-async-support-for-qsv-vpp.patch

diff --git a/debian/patches/0032-add-async-support-for-qsv-vpp.patch b/debian/patches/0032-add-async-support-for-qsv-vpp.patch
new file mode 100644
index 00000000000..bd14e399eee
--- /dev/null
+++ b/debian/patches/0032-add-async-support-for-qsv-vpp.patch
@@ -0,0 +1,2784 @@
+Index: jellyfin-ffmpeg/libavcodec/qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavcodec/qsv.c
++++ jellyfin-ffmpeg/libavcodec/qsv.c
+@@ -191,7 +191,7 @@ int ff_qsv_print_warning(void *log_ctx,
+     const char *desc;
+     int ret;
+     ret = ff_qsv_map_error(err, &desc);
+-    av_log(log_ctx, AV_LOG_WARNING, "%s: %s (%d)\n", warning_string, desc, err);
++    av_log(log_ctx, AV_LOG_VERBOSE, "%s: %s (%d)\n", warning_string, desc, err);
+     return ret;
+ }
+ 
+Index: jellyfin-ffmpeg/libavfilter/Makefile
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/Makefile
++++ jellyfin-ffmpeg/libavfilter/Makefile
+@@ -231,7 +231,7 @@ OBJS-$(CONFIG_DECONVOLVE_FILTER)
+ OBJS-$(CONFIG_DEDOT_FILTER)                  += vf_dedot.o
+ OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
+ OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
+-OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_deinterlace_qsv.o
++OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_vpp_qsv.o
+ OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
+ OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
+ OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
+@@ -397,7 +397,7 @@ OBJS-$(CONFIG_SCALE_FILTER)
+ OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o
+ OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
+ OBJS-$(CONFIG_SCALE_OPENCL_FILTER)           += vf_scale_opencl.o opencl.o opencl/scale.o scale_eval.o
+-OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
++OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_vpp_qsv.o
+ OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
+ OBJS-$(CONFIG_SCALE_VULKAN_FILTER)           += vf_scale_vulkan.o vulkan.o
+ OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale_eval.o
+Index: jellyfin-ffmpeg/libavfilter/qsvvpp.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.c
++++ jellyfin-ffmpeg/libavfilter/qsvvpp.c
+@@ -36,38 +36,6 @@
+                                         MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET))
+ #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
+ #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
+-
+-typedef struct QSVFrame {
+-    AVFrame          *frame;
+-    mfxFrameSurface1 *surface;
+-    mfxFrameSurface1  surface_internal;  /* for system memory */
+-    struct QSVFrame  *next;
+-} QSVFrame;
+-
+-/* abstract struct for all QSV filters */
+-struct QSVVPPContext {
+-    mfxSession          session;
+-    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */
+-    enum AVPixelFormat  out_sw_format;   /* Real output format */
+-    mfxVideoParam       vpp_param;
+-    mfxFrameInfo       *frame_infos;     /* frame info for each input */
+-
+-    /* members related to the input/output surface */
+-    int                 in_mem_mode;
+-    int                 out_mem_mode;
+-    QSVFrame           *in_frame_list;
+-    QSVFrame           *out_frame_list;
+-    int                 nb_surface_ptrs_in;
+-    int                 nb_surface_ptrs_out;
+-    mfxFrameSurface1  **surface_ptrs_in;
+-    mfxFrameSurface1  **surface_ptrs_out;
+-
+-    /* MFXVPP extern parameters */
+-    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+-    mfxExtBuffer      **ext_buffers;
+-    int                 nb_ext_buffers;
+-};
+-
+ #define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
+ 
+ static const AVRational default_tb = { 1, 90000 };
+@@ -172,7 +140,7 @@ int ff_qsvvpp_print_warning(void *log_ct
+     const char *desc;
+     int ret;
+     ret = qsv_map_error(err, &desc);
+-    av_log(log_ctx, AV_LOG_WARNING, "%s: %s (%d)\n", warning_string, desc, err);
++    av_log(log_ctx, AV_LOG_VERBOSE, "%s: %s (%d)\n", warning_string, desc, err);
+     return ret;
+ }
+ 
+@@ -329,6 +297,14 @@ static int fill_frameinfo_by_link(mfxFra
+     frameinfo->CropH          = link->h;
+     frameinfo->FrameRateExtN  = link->frame_rate.num;
+     frameinfo->FrameRateExtD  = link->frame_rate.den;
++
++    /* Apparently VPP in the SDK requires the frame rate to be set to some value, otherwise
++     * init will fail */
++    if (frameinfo->FrameRateExtD == 0 || frameinfo->FrameRateExtN == 0) {
++        frameinfo->FrameRateExtN = 25;
++        frameinfo->FrameRateExtD = 1;
++    }
++
+     frameinfo->AspectRatioW   = link->sample_aspect_ratio.num ? link->sample_aspect_ratio.num : 1;
+     frameinfo->AspectRatioH   = link->sample_aspect_ratio.den ? link->sample_aspect_ratio.den : 1;
+ 
+@@ -338,9 +314,11 @@ static int fill_frameinfo_by_link(mfxFra
+ static void clear_unused_frames(QSVFrame *list)
+ {
+     while (list) {
+-        if (list->surface && !list->surface->Data.Locked) {
+-            list->surface = NULL;
++        /* list->queued==1 means the frame is not cached in VPP
++         * process any more, it can be released to pool. */
++        if ((list->queued == 1) && !list->surface.Data.Locked) {
+             av_frame_free(&list->frame);
++            list->queued = 0;
+         }
+         list = list->next;
+     }
+@@ -363,8 +341,10 @@ static QSVFrame *get_free_frame(QSVFrame
+     QSVFrame *out = *list;
+ 
+     for (; out; out = out->next) {
+-        if (!out->surface)
++        if (!out->queued) {
++            out->queued = 1;
+             break;
++        }
+     }
+ 
+     if (!out) {
+@@ -373,8 +353,9 @@ static QSVFrame *get_free_frame(QSVFrame
+             av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n");
+             return NULL;
+         }
+-        out->next  = *list;
+-        *list      = out;
++        out->queued = 1;
++        out->next   = *list;
++        *list       = out;
+     }
+ 
+     return out;
+@@ -404,7 +385,7 @@ static QSVFrame *submit_frame(QSVVPPCont
+             return NULL;
+         }
+         qsv_frame->frame   = av_frame_clone(picref);
+-        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
++        qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3];
+     } else {
+         /* make a copy if the input is not padded as libmfx requires */
+         if (picref->height & 31 || picref->linesize[0] & 31) {
+@@ -427,27 +408,26 @@ static QSVFrame *submit_frame(QSVVPPCont
+             qsv_frame->frame = av_frame_clone(picref);
+ 
+         if (map_frame_to_surface(qsv_frame->frame,
+-                                &qsv_frame->surface_internal) < 0) {
++                                 &qsv_frame->surface) < 0) {
+             av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
+             return NULL;
+         }
+-        qsv_frame->surface = &qsv_frame->surface_internal;
+     }
+ 
+-    qsv_frame->surface->Info           = s->frame_infos[FF_INLINK_IDX(inlink)];
+-    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
++    qsv_frame->surface.Info           = s->frame_infos[FF_INLINK_IDX(inlink)];
++    qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
+                                                       inlink->time_base, default_tb);
+ 
+-    qsv_frame->surface->Info.PicStruct =
++    qsv_frame->surface.Info.PicStruct =
+             !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
+             (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
+                                                  MFX_PICSTRUCT_FIELD_BFF);
+     if (qsv_frame->frame->repeat_pict == 1)
+-        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
++        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
+     else if (qsv_frame->frame->repeat_pict == 2)
+-        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
++        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
+     else if (qsv_frame->frame->repeat_pict == 4)
+-        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
++        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
+ 
+     return qsv_frame;
+ }
+@@ -478,7 +458,7 @@ static QSVFrame *query_frame(QSVVPPConte
+             return NULL;
+         }
+ 
+-        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3];
++        out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3];
+     } else {
+         /* Get a frame with aligned dimensions.
+          * Libmfx need system memory being 128x64 aligned */
+@@ -489,16 +469,14 @@ static QSVFrame *query_frame(QSVVPPConte
+             return NULL;
+ 
+         ret = map_frame_to_surface(out_frame->frame,
+-                                  &out_frame->surface_internal);
++                                   &out_frame->surface);
+         if (ret < 0)
+             return NULL;
+-
+-        out_frame->surface = &out_frame->surface_internal;
+     }
+ 
+     out_frame->frame->width  = outlink->w;
+     out_frame->frame->height = outlink->h;
+-    out_frame->surface->Info = s->vpp_param.vpp.Out;
++    out_frame->surface.Info = s->vpp_param.vpp.Out;
+ 
+     return out_frame;
+ }
+@@ -671,16 +649,23 @@ static int init_vpp_session(AVFilterCont
+     return 0;
+ }
+ 
+-int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param)
++static unsigned int qsv_fifo_item_size(void)
++{
++    return sizeof(mfxSyncPoint) + sizeof(QSVFrame*);
++}
++
++static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
++{
++    return  av_fifo_size(fifo)/qsv_fifo_item_size();
++}
++
++int ff_qsvvpp_init(AVFilterContext *avctx, QSVVPPParam *param)
+ {
+     int i;
+     int ret;
+-    QSVVPPContext *s;
+-
+-    s = av_mallocz(sizeof(*s));
+-    if (!s)
+-        return AVERROR(ENOMEM);
++    QSVVPPContext *s = avctx->priv;
+ 
++    s->last_in_pts   = AV_NOPTS_VALUE;
+     s->filter_frame  = param->filter_frame;
+     if (!s->filter_frame)
+         s->filter_frame = ff_filter_frame;
+@@ -743,7 +728,16 @@ int ff_qsvvpp_create(AVFilterContext *av
+         s->vpp_param.ExtParam    = param->ext_buf;
+     }
+ 
+-    s->vpp_param.AsyncDepth = 1;
++    s->got_frame = 0;
++
++    /** keep fifo size at least 1. Even when async_depth is 0, fifo is used. */
++    s->async_fifo  = av_fifo_alloc((s->async_depth + 1) * qsv_fifo_item_size());
++    if (!s->async_fifo) {
++        ret = AVERROR(ENOMEM);
++        goto failed;
++    }
++
++    s->vpp_param.AsyncDepth = s->async_depth;
+ 
+     if (IS_SYSTEM_MEMORY(s->in_mem_mode))
+         s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
+@@ -770,27 +764,26 @@ int ff_qsvvpp_create(AVFilterContext *av
+     } else if (ret > 0)
+         ff_qsvvpp_print_warning(avctx, ret, "Warning When creating qsvvpp");
+ 
+-    *vpp = s;
+     return 0;
+ 
+ failed:
+-    ff_qsvvpp_free(&s);
++    ff_qsvvpp_close(avctx);
+ 
+     return ret;
+ }
+ 
+-int ff_qsvvpp_free(QSVVPPContext **vpp)
++int ff_qsvvpp_close(AVFilterContext *avctx)
+ {
+-    QSVVPPContext *s = *vpp;
+-
+-    if (!s)
+-        return 0;
++    QSVVPPContext *s = avctx->priv;
+ 
+     if (s->session) {
+         MFXVideoVPP_Close(s->session);
+         MFXClose(s->session);
++        s->session = NULL;
+     }
+ 
++    s->last_in_pts = AV_NOPTS_VALUE;
++
+     /* release all the resources */
+     clear_frame_list(&s->in_frame_list);
+     clear_frame_list(&s->out_frame_list);
+@@ -798,7 +791,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp)
+     av_freep(&s->surface_ptrs_out);
+     av_freep(&s->ext_buffers);
+     av_freep(&s->frame_infos);
+-    av_freep(vpp);
++    av_fifo_free(s->async_fifo);
+ 
+     return 0;
+ }
+@@ -808,8 +801,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext
+     AVFilterContext  *ctx     = inlink->dst;
+     AVFilterLink     *outlink = ctx->outputs[0];
+     mfxSyncPoint      sync;
+-    QSVFrame         *in_frame, *out_frame;
+-    int               ret, filter_ret;
++    QSVFrame         *in_frame, *out_frame, *tmp;
++    int               ret, ret1, filter_ret;
++    int64_t           dpts = 0;
++
++    while (s->eof && qsv_fifo_size(s->async_fifo)) {
++        av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
++        av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
++        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
++            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
++
++        filter_ret = s->filter_frame(outlink, tmp->frame);
++        if (filter_ret < 0) {
++            av_frame_free(&tmp->frame);
++            ret = filter_ret;
++            break;
++        }
++        tmp->queued--;
++        s->got_frame = 1;
++        tmp->frame = NULL;
++    };
++
++    if (!picref)
++        return 0;
+ 
+     in_frame = submit_frame(s, inlink, picref);
+     if (!in_frame) {
+@@ -826,8 +840,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext
+         }
+ 
+         do {
+-            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface,
+-                                               out_frame->surface, NULL, &sync);
++            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame->surface,
++                                               &out_frame->surface, NULL, &sync);
+             if (ret == MFX_WRN_DEVICE_BUSY)
+                 av_usleep(500);
+         } while (ret == MFX_WRN_DEVICE_BUSY);
+@@ -839,20 +853,63 @@ int ff_qsvvpp_filter_frame(QSVVPPContext
+             break;
+         }
+ 
+-        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
+-            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
++        if (in_frame->frame->color_primaries != -1)
++            out_frame->frame->color_primaries = in_frame->frame->color_primaries;
++        if (in_frame->frame->color_trc != -1)
++            out_frame->frame->color_trc = in_frame->frame->color_trc;
++        if (in_frame->frame->colorspace != -1)
++            out_frame->frame->colorspace = in_frame->frame->colorspace;
++        if (in_frame->frame->color_range != -1)
++            out_frame->frame->color_range = in_frame->frame->color_range;
+ 
+-        out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp,
+-                                             default_tb, outlink->time_base);
++        ret = av_frame_copy_side_data(out_frame->frame, in_frame->frame, 0);
++        if (ret < 0)
++            return ret;
+ 
+-        filter_ret = s->filter_frame(outlink, out_frame->frame);
+-        if (filter_ret < 0) {
+-            av_frame_free(&out_frame->frame);
+-            ret = filter_ret;
+-            break;
++        /* TODO: calculate the PTS for other cases */
++        if (s->deinterlace_enabled &&
++            s->last_in_pts != AV_NOPTS_VALUE &&
++            ret == MFX_ERR_MORE_SURFACE &&
++            out_frame->surface.Data.TimeStamp == MFX_TIMESTAMP_UNKNOWN)
++            dpts = (in_frame->frame->pts - s->last_in_pts) / 2;
++        else
++            dpts = 0;
++
++        out_frame->frame->pts = av_rescale_q(in_frame->frame->pts - dpts,
++                                             inlink->time_base,
++                                             outlink->time_base);
++
++        out_frame->queued++;
++        av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame), NULL);
++        av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL);
++
++        if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
++            av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
++            av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
++
++            do {
++                ret1 = MFXVideoCORE_SyncOperation(s->session, sync, 1000);
++            } while (ret1 == MFX_WRN_IN_EXECUTION);
++
++            if (ret1 < 0) {
++                ret = ret1;
++                break;
++            }
++
++            filter_ret = s->filter_frame(outlink, tmp->frame);
++            if (filter_ret < 0) {
++                av_frame_free(&tmp->frame);
++                ret = filter_ret;
++                break;
++            }
++
++            tmp->queued--;
++            s->got_frame = 1;
++            tmp->frame = NULL;
+         }
+-        out_frame->frame = NULL;
+-    } while(ret == MFX_ERR_MORE_SURFACE);
++    } while (ret == MFX_ERR_MORE_SURFACE);
++
++    s->last_in_pts = in_frame->frame->pts;
+ 
+     return ret;
+ }
+Index: jellyfin-ffmpeg/libavfilter/qsvvpp.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.h
++++ jellyfin-ffmpeg/libavfilter/qsvvpp.h
+@@ -27,6 +27,7 @@
+ #include <mfx/mfxvideo.h>
+ 
+ #include "avfilter.h"
++#include "libavutil/fifo.h"
+ 
+ #define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst->input_pads))
+ #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads))
+@@ -39,7 +40,46 @@
+     ((MFX_VERSION.Major > (MAJOR)) ||                           \
+     (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR)))
+ 
+-typedef struct QSVVPPContext QSVVPPContext;
++typedef struct QSVFrame {
++    AVFrame          *frame;
++    mfxFrameSurface1 surface;
++    struct QSVFrame  *next;
++    int queued;
++} QSVFrame;
++
++typedef struct QSVVPPContext {
++    const AVClass      *class;
++
++    mfxSession          session;
++    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**< callback */
++    enum AVPixelFormat  out_sw_format;   /**< Real output format */
++    mfxVideoParam       vpp_param;
++    mfxFrameInfo       *frame_infos;     /**< frame info for each input */
++
++    /** members related to the input/output surface */
++    int                 in_mem_mode;
++    int                 out_mem_mode;
++    QSVFrame           *in_frame_list;
++    QSVFrame           *out_frame_list;
++    int                 nb_surface_ptrs_in;
++    int                 nb_surface_ptrs_out;
++    mfxFrameSurface1  **surface_ptrs_in;
++    mfxFrameSurface1  **surface_ptrs_out;
++
++    /** MFXVPP extern parameters */
++    mfxExtOpaqueSurfaceAlloc opaque_alloc;
++    mfxExtBuffer      **ext_buffers;
++    int                 nb_ext_buffers;
++
++    int got_frame;
++    int async_depth;
++    int eof;
++    int deinterlace_enabled;
++    /** order with frame_out, sync */
++    AVFifoBuffer *async_fifo;
++
++    int64_t last_in_pts;
++} QSVVPPContext;
+ 
+ typedef struct QSVVPPCrop {
+     int in_idx;        ///< Input index
+@@ -63,10 +103,10 @@ typedef struct QSVVPPParam {
+ } QSVVPPParam;
+ 
+ /* create and initialize the QSV session */
+-int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param);
++int ff_qsvvpp_init(AVFilterContext *avctx, QSVVPPParam *param);
+ 
+ /* release the resources (eg.surfaces) */
+-int ff_qsvvpp_free(QSVVPPContext **vpp);
++int ff_qsvvpp_close(AVFilterContext *avctx);
+ 
+ /* vpp filter frame and call the cb if needed */
+ int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame);
+Index: jellyfin-ffmpeg/libavfilter/vf_deinterlace_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_deinterlace_qsv.c
++++ /dev/null
+@@ -1,625 +0,0 @@
+-/*
+- * This file is part of FFmpeg.
+- *
+- * FFmpeg is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU Lesser General Public
+- * License as published by the Free Software Foundation; either
+- * version 2.1 of the License, or (at your option) any later version.
+- *
+- * FFmpeg is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+- * Lesser General Public License for more details.
+- *
+- * You should have received a copy of the GNU Lesser General Public
+- * License along with FFmpeg; if not, write to the Free Software
+- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+- */
+-
+-/**
+- * @file
+- * deinterlace video filter - QSV
+- */
+-
+-#include <mfx/mfxvideo.h>
+-
+-#include <stdio.h>
+-#include <string.h>
+-
+-#include "libavutil/avstring.h"
+-#include "libavutil/common.h"
+-#include "libavutil/hwcontext.h"
+-#include "libavutil/hwcontext_qsv.h"
+-#include "libavutil/internal.h"
+-#include "libavutil/mathematics.h"
+-#include "libavutil/opt.h"
+-#include "libavutil/pixdesc.h"
+-#include "libavutil/time.h"
+-#include "libavfilter/qsvvpp.h"
+-
+-#include "avfilter.h"
+-#include "formats.h"
+-#include "internal.h"
+-#include "video.h"
+-
+-#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
+-
+-enum {
+-    QSVDEINT_MORE_OUTPUT = 1,
+-    QSVDEINT_MORE_INPUT,
+-};
+-
+-typedef struct QSVFrame {
+-    AVFrame *frame;
+-    mfxFrameSurface1 surface;
+-    int used;
+-
+-    struct QSVFrame *next;
+-} QSVFrame;
+-
+-typedef struct QSVDeintContext {
+-    const AVClass *class;
+-
+-    AVBufferRef *hw_frames_ctx;
+-    /* a clone of the main session, used internally for deinterlacing */
+-    mfxSession   session;
+-
+-    mfxMemId *mem_ids;
+-    int    nb_mem_ids;
+-
+-    mfxFrameSurface1 **surface_ptrs;
+-    int             nb_surface_ptrs;
+-
+-    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+-    mfxExtVPPDeinterlacing   deint_conf;
+-    mfxExtBuffer            *ext_buffers[2];
+-    int                      num_ext_buffers;
+-
+-    QSVFrame *work_frames;
+-
+-    int64_t last_pts;
+-
+-    int eof;
+-
+-    /* option for Deinterlacing algorithm to be used */
+-    int mode;
+-} QSVDeintContext;
+-
+-static av_cold void qsvdeint_uninit(AVFilterContext *ctx)
+-{
+-    QSVDeintContext *s = ctx->priv;
+-    QSVFrame *cur;
+-
+-    if (s->session) {
+-        MFXClose(s->session);
+-        s->session = NULL;
+-    }
+-    av_buffer_unref(&s->hw_frames_ctx);
+-
+-    cur = s->work_frames;
+-    while (cur) {
+-        s->work_frames = cur->next;
+-        av_frame_free(&cur->frame);
+-        av_freep(&cur);
+-        cur = s->work_frames;
+-    }
+-
+-    av_freep(&s->mem_ids);
+-    s->nb_mem_ids = 0;
+-
+-    av_freep(&s->surface_ptrs);
+-    s->nb_surface_ptrs = 0;
+-}
+-
+-static int qsvdeint_query_formats(AVFilterContext *ctx)
+-{
+-    static const enum AVPixelFormat pixel_formats[] = {
+-        AV_PIX_FMT_QSV, AV_PIX_FMT_NONE,
+-    };
+-    AVFilterFormats *pix_fmts  = ff_make_format_list(pixel_formats);
+-    int ret;
+-
+-    if ((ret = ff_set_common_formats(ctx, pix_fmts)) < 0)
+-        return ret;
+-
+-    return 0;
+-}
+-
+-static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
+-                             mfxFrameAllocResponse *resp)
+-{
+-    AVFilterContext *ctx = pthis;
+-    QSVDeintContext   *s = ctx->priv;
+-
+-    if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) ||
+-        !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) ||
+-        !(req->Type & MFX_MEMTYPE_EXTERNAL_FRAME))
+-        return MFX_ERR_UNSUPPORTED;
+-
+-    resp->mids           = s->mem_ids;
+-    resp->NumFrameActual = s->nb_mem_ids;
+-
+-    return MFX_ERR_NONE;
+-}
+-
+-static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp)
+-{
+-    return MFX_ERR_NONE;
+-}
+-
+-static mfxStatus frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+-{
+-    return MFX_ERR_UNSUPPORTED;
+-}
+-
+-static mfxStatus frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+-{
+-    return MFX_ERR_UNSUPPORTED;
+-}
+-
+-static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+-{
+-    mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
+-    mfxHDLPair *pair_src = (mfxHDLPair*)mid;
+-
+-    pair_dst->first = pair_src->first;
+-
+-    if (pair_src->second != (mfxMemId)MFX_INFINITE)
+-        pair_dst->second = pair_src->second;
+-    return MFX_ERR_NONE;
+-}
+-
+-static int init_out_session(AVFilterContext *ctx)
+-{
+-
+-    QSVDeintContext                  *s = ctx->priv;
+-    AVHWFramesContext    *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data;
+-    AVQSVFramesContext *hw_frames_hwctx = hw_frames_ctx->hwctx;
+-    AVQSVDeviceContext    *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
+-
+-    int opaque = !!(hw_frames_hwctx->frame_type & MFX_MEMTYPE_OPAQUE_FRAME);
+-
+-    mfxHDL handle = NULL;
+-    mfxHandleType handle_type;
+-    mfxVersion ver;
+-    mfxIMPL impl;
+-    mfxVideoParam par;
+-    mfxStatus err;
+-    int i;
+-
+-    /* extract the properties of the "master" session given to us */
+-    err = MFXQueryIMPL(device_hwctx->session, &impl);
+-    if (err == MFX_ERR_NONE)
+-        err = MFXQueryVersion(device_hwctx->session, &ver);
+-    if (err != MFX_ERR_NONE) {
+-        av_log(ctx, AV_LOG_ERROR, "Error querying the session attributes\n");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
+-        handle_type = MFX_HANDLE_VA_DISPLAY;
+-    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
+-        handle_type = MFX_HANDLE_D3D11_DEVICE;
+-    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
+-        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
+-    } else {
+-        av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle);
+-    if (err < 0)
+-        return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle");
+-    else if (err > 0) {
+-        ff_qsvvpp_print_warning(ctx, err, "Warning in getting the session handle");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    /* create a "slave" session with those same properties, to be used for
+-     * actual deinterlacing */
+-    err = MFXInit(impl, &ver, &s->session);
+-    if (err < 0)
+-        return ff_qsvvpp_print_error(ctx, err, "Error initializing a session for deinterlacing");
+-    else if (err > 0) {
+-        ff_qsvvpp_print_warning(ctx, err, "Warning in session initialization");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    if (handle) {
+-        err = MFXVideoCORE_SetHandle(s->session, handle_type, handle);
+-        if (err != MFX_ERR_NONE)
+-            return AVERROR_UNKNOWN;
+-    }
+-
+-    if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+-        err = MFXJoinSession(device_hwctx->session, s->session);
+-        if (err != MFX_ERR_NONE)
+-            return AVERROR_UNKNOWN;
+-    }
+-
+-    memset(&par, 0, sizeof(par));
+-
+-    s->deint_conf.Header.BufferId = MFX_EXTBUFF_VPP_DEINTERLACING;
+-    s->deint_conf.Header.BufferSz = sizeof(s->deint_conf);
+-    s->deint_conf.Mode = s->mode;
+-
+-    s->ext_buffers[s->num_ext_buffers++] = (mfxExtBuffer *)&s->deint_conf;
+-
+-    if (opaque) {
+-        s->surface_ptrs = av_mallocz_array(hw_frames_hwctx->nb_surfaces,
+-                                           sizeof(*s->surface_ptrs));
+-        if (!s->surface_ptrs)
+-            return AVERROR(ENOMEM);
+-        for (i = 0; i < hw_frames_hwctx->nb_surfaces; i++)
+-            s->surface_ptrs[i] = hw_frames_hwctx->surfaces + i;
+-        s->nb_surface_ptrs = hw_frames_hwctx->nb_surfaces;
+-
+-        s->opaque_alloc.In.Surfaces   = s->surface_ptrs;
+-        s->opaque_alloc.In.NumSurface = s->nb_surface_ptrs;
+-        s->opaque_alloc.In.Type       = hw_frames_hwctx->frame_type;
+-
+-        s->opaque_alloc.Out = s->opaque_alloc.In;
+-
+-        s->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+-        s->opaque_alloc.Header.BufferSz = sizeof(s->opaque_alloc);
+-
+-        s->ext_buffers[s->num_ext_buffers++] = (mfxExtBuffer *)&s->opaque_alloc;
+-
+-        par.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY;
+-    } else {
+-        mfxFrameAllocator frame_allocator = {
+-            .pthis  = ctx,
+-            .Alloc  = frame_alloc,
+-            .Lock   = frame_lock,
+-            .Unlock = frame_unlock,
+-            .GetHDL = frame_get_hdl,
+-            .Free   = frame_free,
+-        };
+-
+-        s->mem_ids = av_mallocz_array(hw_frames_hwctx->nb_surfaces,
+-                                      sizeof(*s->mem_ids));
+-        if (!s->mem_ids)
+-            return AVERROR(ENOMEM);
+-        for (i = 0; i < hw_frames_hwctx->nb_surfaces; i++)
+-            s->mem_ids[i] = hw_frames_hwctx->surfaces[i].Data.MemId;
+-        s->nb_mem_ids = hw_frames_hwctx->nb_surfaces;
+-
+-        err = MFXVideoCORE_SetFrameAllocator(s->session, &frame_allocator);
+-        if (err != MFX_ERR_NONE)
+-            return AVERROR_UNKNOWN;
+-
+-        par.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+-    }
+-
+-    par.ExtParam    = s->ext_buffers;
+-    par.NumExtParam = s->num_ext_buffers;
+-
+-    par.AsyncDepth = 1;    // TODO async
+-
+-    par.vpp.In = hw_frames_hwctx->surfaces[0].Info;
+-
+-    par.vpp.In.CropW = ctx->inputs[0]->w;
+-    par.vpp.In.CropH = ctx->inputs[0]->h;
+-
+-    if (ctx->inputs[0]->frame_rate.num) {
+-        par.vpp.In.FrameRateExtN = ctx->inputs[0]->frame_rate.num;
+-        par.vpp.In.FrameRateExtD = ctx->inputs[0]->frame_rate.den;
+-    } else {
+-        par.vpp.In.FrameRateExtN = ctx->inputs[0]->time_base.num;
+-        par.vpp.In.FrameRateExtD = ctx->inputs[0]->time_base.den;
+-    }
+-
+-    par.vpp.Out = par.vpp.In;
+-
+-    if (ctx->outputs[0]->frame_rate.num) {
+-        par.vpp.Out.FrameRateExtN = ctx->outputs[0]->frame_rate.num;
+-        par.vpp.Out.FrameRateExtD = ctx->outputs[0]->frame_rate.den;
+-    } else {
+-        par.vpp.Out.FrameRateExtN = ctx->outputs[0]->time_base.num;
+-        par.vpp.Out.FrameRateExtD = ctx->outputs[0]->time_base.den;
+-    }
+-
+-    /* Print input memory mode */
+-    ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0x0F, "VPP");
+-    /* Print output memory mode */
+-    ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0xF0, "VPP");
+-    err = MFXVideoVPP_Init(s->session, &par);
+-    if (err < 0)
+-        return ff_qsvvpp_print_error(ctx, err,
+-                                     "Error opening the VPP for deinterlacing");
+-    else if (err > 0) {
+-        ff_qsvvpp_print_warning(ctx, err,
+-                                "Warning in VPP initialization");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    return 0;
+-}
+-
+-static int qsvdeint_config_props(AVFilterLink *outlink)
+-{
+-    AVFilterContext *ctx = outlink->src;
+-    AVFilterLink *inlink = ctx->inputs[0];
+-    QSVDeintContext  *s = ctx->priv;
+-    int ret;
+-
+-    qsvdeint_uninit(ctx);
+-
+-    s->last_pts = AV_NOPTS_VALUE;
+-    outlink->frame_rate = av_mul_q(inlink->frame_rate,
+-                                   (AVRational){ 2, 1 });
+-    outlink->time_base  = av_mul_q(inlink->time_base,
+-                                   (AVRational){ 1, 2 });
+-
+-    /* check that we have a hw context */
+-    if (!inlink->hw_frames_ctx) {
+-        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+-        return AVERROR(EINVAL);
+-    }
+-
+-    s->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+-    if (!s->hw_frames_ctx)
+-        return AVERROR(ENOMEM);
+-
+-    av_buffer_unref(&outlink->hw_frames_ctx);
+-    outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+-    if (!outlink->hw_frames_ctx) {
+-        qsvdeint_uninit(ctx);
+-        return AVERROR(ENOMEM);
+-    }
+-
+-    ret = init_out_session(ctx);
+-    if (ret < 0)
+-        return ret;
+-
+-
+-    return 0;
+-}
+-
+-static void clear_unused_frames(QSVDeintContext *s)
+-{
+-    QSVFrame *cur = s->work_frames;
+-    while (cur) {
+-        if (!cur->surface.Data.Locked) {
+-            av_frame_free(&cur->frame);
+-            cur->used = 0;
+-        }
+-        cur = cur->next;
+-    }
+-}
+-
+-static int get_free_frame(QSVDeintContext *s, QSVFrame **f)
+-{
+-    QSVFrame *frame, **last;
+-
+-    clear_unused_frames(s);
+-
+-    frame = s->work_frames;
+-    last  = &s->work_frames;
+-    while (frame) {
+-        if (!frame->used) {
+-            *f = frame;
+-            return 0;
+-        }
+-
+-        last  = &frame->next;
+-        frame = frame->next;
+-    }
+-
+-    frame = av_mallocz(sizeof(*frame));
+-    if (!frame)
+-        return AVERROR(ENOMEM);
+-    *last = frame;
+-    *f    = frame;
+-
+-    return 0;
+-}
+-
+-static int submit_frame(AVFilterContext *ctx, AVFrame *frame,
+-                        mfxFrameSurface1 **surface)
+-{
+-    QSVDeintContext *s = ctx->priv;
+-    QSVFrame *qf;
+-    int ret;
+-
+-    ret = get_free_frame(s, &qf);
+-    if (ret < 0)
+-        return ret;
+-
+-    qf->frame = frame;
+-
+-    qf->surface = *(mfxFrameSurface1*)qf->frame->data[3];
+-
+-    qf->surface.Data.Locked = 0;
+-    qf->surface.Info.CropW  = qf->frame->width;
+-    qf->surface.Info.CropH  = qf->frame->height;
+-
+-    qf->surface.Info.PicStruct = !qf->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
+-                                 (qf->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
+-                                                           MFX_PICSTRUCT_FIELD_BFF);
+-    if (qf->frame->repeat_pict == 1) {
+-        qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
+-        qf->surface.Info.PicStruct |= qf->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
+-                                                            MFX_PICSTRUCT_FIELD_BFF;
+-    } else if (qf->frame->repeat_pict == 2)
+-        qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
+-    else if (qf->frame->repeat_pict == 4)
+-        qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
+-
+-    if (ctx->inputs[0]->frame_rate.num) {
+-        qf->surface.Info.FrameRateExtN = ctx->inputs[0]->frame_rate.num;
+-        qf->surface.Info.FrameRateExtD = ctx->inputs[0]->frame_rate.den;
+-    } else {
+-        qf->surface.Info.FrameRateExtN = ctx->inputs[0]->time_base.num;
+-        qf->surface.Info.FrameRateExtD = ctx->inputs[0]->time_base.den;
+-    }
+-
+-    qf->surface.Data.TimeStamp = av_rescale_q(qf->frame->pts,
+-                                              ctx->inputs[0]->time_base,
+-                                              (AVRational){1, 90000});
+-
+-    *surface = &qf->surface;
+-    qf->used = 1;
+-
+-    return 0;
+-}
+-
+-static int process_frame(AVFilterContext *ctx, const AVFrame *in,
+-                         mfxFrameSurface1 *surf_in)
+-{
+-    QSVDeintContext    *s = ctx->priv;
+-    AVFilterLink  *inlink = ctx->inputs[0];
+-    AVFilterLink *outlink = ctx->outputs[0];
+-
+-    AVFrame *out;
+-    mfxFrameSurface1 *surf_out;
+-    mfxSyncPoint sync = NULL;
+-    mfxStatus err;
+-    int ret, again = 0;
+-
+-    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+-    if (!out) {
+-        ret = AVERROR(ENOMEM);
+-        goto fail;
+-    }
+-
+-    surf_out = (mfxFrameSurface1*)out->data[3];
+-    surf_out->Info.CropW     = outlink->w;
+-    surf_out->Info.CropH     = outlink->h;
+-    surf_out->Info.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
+-
+-    do {
+-        err = MFXVideoVPP_RunFrameVPPAsync(s->session, surf_in, surf_out,
+-                                           NULL, &sync);
+-        if (err == MFX_WRN_DEVICE_BUSY)
+-            av_usleep(1);
+-    } while (err == MFX_WRN_DEVICE_BUSY);
+-
+-    if (err == MFX_ERR_MORE_DATA) {
+-        av_frame_free(&out);
+-        return QSVDEINT_MORE_INPUT;
+-    }
+-
+-    if (err < 0 && err != MFX_ERR_MORE_SURFACE) {
+-        ret = ff_qsvvpp_print_error(ctx, err, "Error during deinterlacing");
+-        goto fail;
+-    }
+-
+-    if (!sync) {
+-        av_log(ctx, AV_LOG_ERROR, "No sync during deinterlacing\n");
+-        ret = AVERROR_UNKNOWN;
+-        goto fail;
+-    }
+-    if (err == MFX_ERR_MORE_SURFACE)
+-        again = 1;
+-
+-    do {
+-        err = MFXVideoCORE_SyncOperation(s->session, sync, 1000);
+-    } while (err == MFX_WRN_IN_EXECUTION);
+-    if (err < 0) {
+-        ret = ff_qsvvpp_print_error(ctx, err, "Error synchronizing the operation");
+-        goto fail;
+-    }
+-
+-    ret = av_frame_copy_props(out, in);
+-    if (ret < 0)
+-        goto fail;
+-
+-    out->width            = outlink->w;
+-    out->height           = outlink->h;
+-    out->interlaced_frame = 0;
+-
+-    out->pts = av_rescale_q(out->pts, inlink->time_base, outlink->time_base);
+-    if (out->pts == s->last_pts)
+-        out->pts++;
+-    s->last_pts = out->pts;
+-
+-    ret = ff_filter_frame(outlink, out);
+-    if (ret < 0)
+-        return ret;
+-
+-    return again ? QSVDEINT_MORE_OUTPUT : 0;
+-fail:
+-    av_frame_free(&out);
+-    return ret;
+-}
+-
+-static int qsvdeint_filter_frame(AVFilterLink *link, AVFrame *in)
+-{
+-    AVFilterContext *ctx = link->dst;
+-
+-    mfxFrameSurface1 *surf_in;
+-    int ret;
+-
+-    ret = submit_frame(ctx, in, &surf_in);
+-    if (ret < 0) {
+-        av_frame_free(&in);
+-        return ret;
+-    }
+-
+-    do {
+-        ret = process_frame(ctx, in, surf_in);
+-        if (ret < 0)
+-            return ret;
+-    } while (ret == QSVDEINT_MORE_OUTPUT);
+-
+-    return 0;
+-}
+-
+-static int qsvdeint_request_frame(AVFilterLink *outlink)
+-{
+-    AVFilterContext *ctx = outlink->src;
+-
+-    return ff_request_frame(ctx->inputs[0]);
+-}
+-
+-#define OFFSET(x) offsetof(QSVDeintContext, x)
+-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+-static const AVOption options[] = {
+-    { "mode", "set deinterlace mode", OFFSET(mode),   AV_OPT_TYPE_INT, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
+-    { "bob",   "bob algorithm",                  0, AV_OPT_TYPE_CONST,      {.i64 = MFX_DEINTERLACING_BOB}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
+-    { "advanced", "Motion adaptive algorithm",   0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
+-    { NULL },
+-};
+-
+-static const AVClass qsvdeint_class = {
+-    .class_name = "deinterlace_qsv",
+-    .item_name  = av_default_item_name,
+-    .option     = options,
+-    .version    = LIBAVUTIL_VERSION_INT,
+-};
+-
+-static const AVFilterPad qsvdeint_inputs[] = {
+-    {
+-        .name         = "default",
+-        .type         = AVMEDIA_TYPE_VIDEO,
+-        .filter_frame = qsvdeint_filter_frame,
+-    },
+-    { NULL }
+-};
+-
+-static const AVFilterPad qsvdeint_outputs[] = {
+-    {
+-        .name          = "default",
+-        .type          = AVMEDIA_TYPE_VIDEO,
+-        .config_props  = qsvdeint_config_props,
+-        .request_frame = qsvdeint_request_frame,
+-    },
+-    { NULL }
+-};
+-
+-AVFilter ff_vf_deinterlace_qsv = {
+-    .name      = "deinterlace_qsv",
+-    .description = NULL_IF_CONFIG_SMALL("QuickSync video deinterlacing"),
+-
+-    .uninit        = qsvdeint_uninit,
+-    .query_formats = qsvdeint_query_formats,
+-
+-    .priv_size = sizeof(QSVDeintContext),
+-    .priv_class = &qsvdeint_class,
+-
+-    .inputs    = qsvdeint_inputs,
+-    .outputs   = qsvdeint_outputs,
+-
+-    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+-};
+Index: jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_qsv.c
++++ jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c
+@@ -58,10 +58,9 @@ enum var_name {
+ };
+ 
+ typedef struct QSVOverlayContext {
+-    const AVClass      *class;
++    QSVVPPContext      qsv;
+ 
+     FFFrameSync fs;
+-    QSVVPPContext      *qsv;
+     QSVVPPParam        qsv_param;
+     mfxExtVPPComposite comp_conf;
+     double             var_values[VAR_VARS_NB];
+@@ -231,7 +230,7 @@ static int config_overlay_input(AVFilter
+ static int process_frame(FFFrameSync *fs)
+ {
+     AVFilterContext *ctx = fs->parent;
+-    QSVOverlayContext *s = fs->opaque;
++    QSVVPPContext   *qsv = fs->opaque;
+     AVFilterLink    *in0 = ctx->inputs[0];
+     AVFilterLink    *in1 = ctx->inputs[1];
+     AVFrame        *main = NULL;
+@@ -249,14 +248,17 @@ static int process_frame(FFFrameSync *fs
+         return AVERROR_BUG;
+ 
+     /* composite main frame */
+-    ret = ff_qsvvpp_filter_frame(s->qsv, in0, main);
++    ret = ff_qsvvpp_filter_frame(qsv, in0, main);
+     if (ret < 0 && ret != AVERROR(EAGAIN))
+         return ret;
+ 
++    /* remove all side data of the overlay frame*/
++    if (overlay)
++        av_frame_remove_all_side_data(overlay);
++
+     /* composite overlay frame */
+     /* or overwrite main frame again if the overlay frame isn't ready yet */
+-    ret = ff_qsvvpp_filter_frame(s->qsv, overlay ? in1 : in0, overlay ? overlay : main);
+-    return ret;
++    return ff_qsvvpp_filter_frame(qsv, overlay ? in1 : in0, overlay ? overlay : main);
+ }
+ 
+ static int init_framesync(AVFilterContext *ctx)
+@@ -300,7 +302,7 @@ static int config_output(AVFilterLink *o
+     if (ret < 0)
+         return ret;
+ 
+-    return ff_qsvvpp_create(ctx, &vpp->qsv, &vpp->qsv_param);
++    return ff_qsvvpp_init(ctx, &vpp->qsv_param);
+ }
+ 
+ /*
+@@ -349,7 +351,7 @@ static av_cold void overlay_qsv_uninit(A
+ {
+     QSVOverlayContext *vpp = ctx->priv;
+ 
+-    ff_qsvvpp_free(&vpp->qsv);
++    ff_qsvvpp_close(ctx);
+     ff_framesync_uninit(&vpp->fs);
+     av_freep(&vpp->comp_conf.InputStream);
+     av_freep(&vpp->qsv_param.ext_buf);
+Index: jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_qsv.c
++++ /dev/null
+@@ -1,693 +0,0 @@
+-/*
+- * This file is part of FFmpeg.
+- *
+- * FFmpeg is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU Lesser General Public
+- * License as published by the Free Software Foundation; either
+- * version 2.1 of the License, or (at your option) any later version.
+- *
+- * FFmpeg is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+- * Lesser General Public License for more details.
+- *
+- * You should have received a copy of the GNU Lesser General Public
+- * License along with FFmpeg; if not, write to the Free Software
+- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+- */
+-
+-/**
+- * @file
+- * scale video filter - QSV
+- */
+-
+-#include <mfx/mfxvideo.h>
+-
+-#include <stdio.h>
+-#include <string.h>
+-
+-#include "libavutil/avstring.h"
+-#include "libavutil/common.h"
+-#include "libavutil/eval.h"
+-#include "libavutil/hwcontext.h"
+-#include "libavutil/hwcontext_qsv.h"
+-#include "libavutil/internal.h"
+-#include "libavutil/mathematics.h"
+-#include "libavutil/opt.h"
+-#include "libavutil/pixdesc.h"
+-#include "libavutil/time.h"
+-#include "libavfilter/qsvvpp.h"
+-
+-#include "avfilter.h"
+-#include "formats.h"
+-#include "internal.h"
+-#include "video.h"
+-
+-static const char *const var_names[] = {
+-    "PI",
+-    "PHI",
+-    "E",
+-    "in_w",   "iw",
+-    "in_h",   "ih",
+-    "out_w",  "ow",
+-    "out_h",  "oh",
+-    "a", "dar",
+-    "sar",
+-    NULL
+-};
+-
+-enum var_name {
+-    VAR_PI,
+-    VAR_PHI,
+-    VAR_E,
+-    VAR_IN_W,   VAR_IW,
+-    VAR_IN_H,   VAR_IH,
+-    VAR_OUT_W,  VAR_OW,
+-    VAR_OUT_H,  VAR_OH,
+-    VAR_A, VAR_DAR,
+-    VAR_SAR,
+-    VARS_NB
+-};
+-
+-#define QSV_HAVE_SCALING_CONFIG  QSV_VERSION_ATLEAST(1, 19)
+-#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
+-
+-typedef struct QSVScaleContext {
+-    const AVClass *class;
+-
+-    /* a clone of the main session, used internally for scaling */
+-    mfxSession   session;
+-
+-    mfxMemId *mem_ids_in;
+-    int nb_mem_ids_in;
+-
+-    mfxMemId *mem_ids_out;
+-    int nb_mem_ids_out;
+-
+-    mfxFrameSurface1 **surface_ptrs_in;
+-    int             nb_surface_ptrs_in;
+-
+-    mfxFrameSurface1 **surface_ptrs_out;
+-    int             nb_surface_ptrs_out;
+-
+-    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+-
+-#if QSV_HAVE_SCALING_CONFIG
+-    mfxExtVPPScaling         scale_conf;
+-#endif
+-    int                      mode;
+-
+-    mfxExtBuffer             *ext_buffers[1 + QSV_HAVE_SCALING_CONFIG];
+-    int                      num_ext_buf;
+-
+-    int shift_width, shift_height;
+-
+-    /**
+-     * New dimensions. Special values are:
+-     *   0 = original width/height
+-     *  -1 = keep original aspect
+-     */
+-    int w, h;
+-
+-    /**
+-     * Output sw format. AV_PIX_FMT_NONE for no conversion.
+-     */
+-    enum AVPixelFormat format;
+-
+-    char *w_expr;               ///< width  expression string
+-    char *h_expr;               ///< height expression string
+-    char *format_str;
+-} QSVScaleContext;
+-
+-static av_cold int qsvscale_init(AVFilterContext *ctx)
+-{
+-    QSVScaleContext *s = ctx->priv;
+-
+-    if (!strcmp(s->format_str, "same")) {
+-        s->format = AV_PIX_FMT_NONE;
+-    } else {
+-        s->format = av_get_pix_fmt(s->format_str);
+-        if (s->format == AV_PIX_FMT_NONE) {
+-            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
+-            return AVERROR(EINVAL);
+-        }
+-    }
+-
+-    return 0;
+-}
+-
+-static av_cold void qsvscale_uninit(AVFilterContext *ctx)
+-{
+-    QSVScaleContext *s = ctx->priv;
+-
+-    if (s->session) {
+-        MFXClose(s->session);
+-        s->session = NULL;
+-    }
+-
+-    av_freep(&s->mem_ids_in);
+-    av_freep(&s->mem_ids_out);
+-    s->nb_mem_ids_in  = 0;
+-    s->nb_mem_ids_out = 0;
+-
+-    av_freep(&s->surface_ptrs_in);
+-    av_freep(&s->surface_ptrs_out);
+-    s->nb_surface_ptrs_in  = 0;
+-    s->nb_surface_ptrs_out = 0;
+-}
+-
+-static int qsvscale_query_formats(AVFilterContext *ctx)
+-{
+-    static const enum AVPixelFormat pixel_formats[] = {
+-        AV_PIX_FMT_QSV, AV_PIX_FMT_NONE,
+-    };
+-    AVFilterFormats *pix_fmts  = ff_make_format_list(pixel_formats);
+-    int ret;
+-
+-    if ((ret = ff_set_common_formats(ctx, pix_fmts)) < 0)
+-        return ret;
+-
+-    return 0;
+-}
+-
+-static int init_out_pool(AVFilterContext *ctx,
+-                         int out_width, int out_height)
+-{
+-    QSVScaleContext *s = ctx->priv;
+-    AVFilterLink *outlink = ctx->outputs[0];
+-
+-    AVHWFramesContext *in_frames_ctx;
+-    AVHWFramesContext *out_frames_ctx;
+-    AVQSVFramesContext *in_frames_hwctx;
+-    AVQSVFramesContext *out_frames_hwctx;
+-    enum AVPixelFormat in_format;
+-    enum AVPixelFormat out_format;
+-    int i, ret;
+-
+-    /* check that we have a hw context */
+-    if (!ctx->inputs[0]->hw_frames_ctx) {
+-        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+-        return AVERROR(EINVAL);
+-    }
+-    in_frames_ctx   = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
+-    in_frames_hwctx = in_frames_ctx->hwctx;
+-
+-    in_format     = in_frames_ctx->sw_format;
+-    out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
+-
+-    outlink->hw_frames_ctx = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+-    if (!outlink->hw_frames_ctx)
+-        return AVERROR(ENOMEM);
+-    out_frames_ctx   = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
+-    out_frames_hwctx = out_frames_ctx->hwctx;
+-
+-    out_frames_ctx->format            = AV_PIX_FMT_QSV;
+-    out_frames_ctx->width             = FFALIGN(out_width,  16);
+-    out_frames_ctx->height            = FFALIGN(out_height, 16);
+-    out_frames_ctx->sw_format         = out_format;
+-    out_frames_ctx->initial_pool_size = 4;
+-
+-    out_frames_hwctx->frame_type = in_frames_hwctx->frame_type | MFX_MEMTYPE_FROM_VPPOUT;
+-
+-    ret = ff_filter_init_hw_frames(ctx, outlink, 32);
+-    if (ret < 0)
+-        return ret;
+-
+-    ret = av_hwframe_ctx_init(outlink->hw_frames_ctx);
+-    if (ret < 0)
+-        return ret;
+-
+-    for (i = 0; i < out_frames_hwctx->nb_surfaces; i++) {
+-        mfxFrameInfo *info = &out_frames_hwctx->surfaces[i].Info;
+-        info->CropW = out_width;
+-        info->CropH = out_height;
+-    }
+-
+-    return 0;
+-}
+-
+-static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
+-                             mfxFrameAllocResponse *resp)
+-{
+-    AVFilterContext *ctx = pthis;
+-    QSVScaleContext   *s = ctx->priv;
+-
+-    if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) ||
+-        !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) ||
+-        !(req->Type & MFX_MEMTYPE_EXTERNAL_FRAME))
+-        return MFX_ERR_UNSUPPORTED;
+-
+-    if (req->Type & MFX_MEMTYPE_FROM_VPPIN) {
+-        resp->mids           = s->mem_ids_in;
+-        resp->NumFrameActual = s->nb_mem_ids_in;
+-    } else {
+-        resp->mids           = s->mem_ids_out;
+-        resp->NumFrameActual = s->nb_mem_ids_out;
+-    }
+-
+-    return MFX_ERR_NONE;
+-}
+-
+-static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp)
+-{
+-    return MFX_ERR_NONE;
+-}
+-
+-static mfxStatus frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+-{
+-    return MFX_ERR_UNSUPPORTED;
+-}
+-
+-static mfxStatus frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+-{
+-    return MFX_ERR_UNSUPPORTED;
+-}
+-
+-static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+-{
+-    mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
+-    mfxHDLPair *pair_src = (mfxHDLPair*)mid;
+-
+-    pair_dst->first = pair_src->first;
+-
+-    if (pair_src->second != (mfxMemId)MFX_INFINITE)
+-        pair_dst->second = pair_src->second;
+-    return MFX_ERR_NONE;
+-}
+-
+-static int init_out_session(AVFilterContext *ctx, int in_width, int in_height)
+-{
+-
+-    QSVScaleContext                   *s = ctx->priv;
+-    AVHWFramesContext     *in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
+-    AVHWFramesContext    *out_frames_ctx = (AVHWFramesContext*)ctx->outputs[0]->hw_frames_ctx->data;
+-    AVQSVFramesContext  *in_frames_hwctx = in_frames_ctx->hwctx;
+-    AVQSVFramesContext *out_frames_hwctx = out_frames_ctx->hwctx;
+-    AVQSVDeviceContext     *device_hwctx = in_frames_ctx->device_ctx->hwctx;
+-
+-    int opaque = !!(in_frames_hwctx->frame_type & MFX_MEMTYPE_OPAQUE_FRAME);
+-
+-    mfxHDL handle = NULL;
+-    mfxHandleType handle_type;
+-    mfxVersion ver;
+-    mfxIMPL impl;
+-    mfxVideoParam par;
+-    mfxStatus err;
+-    int i;
+-
+-    s->num_ext_buf = 0;
+-
+-    /* extract the properties of the "master" session given to us */
+-    err = MFXQueryIMPL(device_hwctx->session, &impl);
+-    if (err == MFX_ERR_NONE)
+-        err = MFXQueryVersion(device_hwctx->session, &ver);
+-    if (err != MFX_ERR_NONE) {
+-        av_log(ctx, AV_LOG_ERROR, "Error querying the session attributes\n");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
+-        handle_type = MFX_HANDLE_VA_DISPLAY;
+-    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
+-        handle_type = MFX_HANDLE_D3D11_DEVICE;
+-    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
+-        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
+-    } else {
+-        av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle);
+-    if (err < 0)
+-        return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle");
+-    else if (err > 0) {
+-        ff_qsvvpp_print_warning(ctx, err, "Warning in getting the session handle");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    /* create a "slave" session with those same properties, to be used for
+-     * actual scaling */
+-    err = MFXInit(impl, &ver, &s->session);
+-    if (err != MFX_ERR_NONE) {
+-        av_log(ctx, AV_LOG_ERROR, "Error initializing a session for scaling\n");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    if (handle) {
+-        err = MFXVideoCORE_SetHandle(s->session, handle_type, handle);
+-        if (err != MFX_ERR_NONE)
+-            return AVERROR_UNKNOWN;
+-    }
+-
+-    if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+-        err = MFXJoinSession(device_hwctx->session, s->session);
+-            if (err != MFX_ERR_NONE)
+-                return AVERROR_UNKNOWN;
+-    }
+-
+-    memset(&par, 0, sizeof(par));
+-
+-    if (opaque) {
+-        s->surface_ptrs_in = av_mallocz_array(in_frames_hwctx->nb_surfaces,
+-                                              sizeof(*s->surface_ptrs_in));
+-        if (!s->surface_ptrs_in)
+-            return AVERROR(ENOMEM);
+-        for (i = 0; i < in_frames_hwctx->nb_surfaces; i++)
+-            s->surface_ptrs_in[i] = in_frames_hwctx->surfaces + i;
+-        s->nb_surface_ptrs_in = in_frames_hwctx->nb_surfaces;
+-
+-        s->surface_ptrs_out = av_mallocz_array(out_frames_hwctx->nb_surfaces,
+-                                               sizeof(*s->surface_ptrs_out));
+-        if (!s->surface_ptrs_out)
+-            return AVERROR(ENOMEM);
+-        for (i = 0; i < out_frames_hwctx->nb_surfaces; i++)
+-            s->surface_ptrs_out[i] = out_frames_hwctx->surfaces + i;
+-        s->nb_surface_ptrs_out = out_frames_hwctx->nb_surfaces;
+-
+-        s->opaque_alloc.In.Surfaces   = s->surface_ptrs_in;
+-        s->opaque_alloc.In.NumSurface = s->nb_surface_ptrs_in;
+-        s->opaque_alloc.In.Type       = in_frames_hwctx->frame_type;
+-
+-        s->opaque_alloc.Out.Surfaces   = s->surface_ptrs_out;
+-        s->opaque_alloc.Out.NumSurface = s->nb_surface_ptrs_out;
+-        s->opaque_alloc.Out.Type       = out_frames_hwctx->frame_type;
+-
+-        s->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+-        s->opaque_alloc.Header.BufferSz = sizeof(s->opaque_alloc);
+-
+-        s->ext_buffers[s->num_ext_buf++] = (mfxExtBuffer*)&s->opaque_alloc;
+-
+-        par.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY;
+-    } else {
+-        mfxFrameAllocator frame_allocator = {
+-            .pthis  = ctx,
+-            .Alloc  = frame_alloc,
+-            .Lock   = frame_lock,
+-            .Unlock = frame_unlock,
+-            .GetHDL = frame_get_hdl,
+-            .Free   = frame_free,
+-        };
+-
+-        s->mem_ids_in = av_mallocz_array(in_frames_hwctx->nb_surfaces,
+-                                         sizeof(*s->mem_ids_in));
+-        if (!s->mem_ids_in)
+-            return AVERROR(ENOMEM);
+-        for (i = 0; i < in_frames_hwctx->nb_surfaces; i++) {
+-            s->mem_ids_in[i] = in_frames_hwctx->surfaces[i].Data.MemId;
+-            in_frames_hwctx->surfaces[i].Info.CropW = in_width;
+-            in_frames_hwctx->surfaces[i].Info.CropH = in_height;
+-        }
+-        s->nb_mem_ids_in = in_frames_hwctx->nb_surfaces;
+-
+-        s->mem_ids_out = av_mallocz_array(out_frames_hwctx->nb_surfaces,
+-                                          sizeof(*s->mem_ids_out));
+-        if (!s->mem_ids_out)
+-            return AVERROR(ENOMEM);
+-        for (i = 0; i < out_frames_hwctx->nb_surfaces; i++)
+-            s->mem_ids_out[i] = out_frames_hwctx->surfaces[i].Data.MemId;
+-        s->nb_mem_ids_out = out_frames_hwctx->nb_surfaces;
+-
+-        err = MFXVideoCORE_SetFrameAllocator(s->session, &frame_allocator);
+-        if (err != MFX_ERR_NONE)
+-            return AVERROR_UNKNOWN;
+-
+-        par.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+-    }
+-
+-#if QSV_HAVE_SCALING_CONFIG
+-    memset(&s->scale_conf, 0, sizeof(mfxExtVPPScaling));
+-    s->scale_conf.Header.BufferId     = MFX_EXTBUFF_VPP_SCALING;
+-    s->scale_conf.Header.BufferSz     = sizeof(mfxExtVPPScaling);
+-    s->scale_conf.ScalingMode         = s->mode;
+-    s->ext_buffers[s->num_ext_buf++]  = (mfxExtBuffer*)&s->scale_conf;
+-    av_log(ctx, AV_LOG_VERBOSE, "Scaling mode: %d\n", s->mode);
+-#endif
+-
+-    par.ExtParam    = s->ext_buffers;
+-    par.NumExtParam = s->num_ext_buf;
+-
+-    par.AsyncDepth = 1;    // TODO async
+-
+-    par.vpp.In  = in_frames_hwctx->surfaces[0].Info;
+-    par.vpp.Out = out_frames_hwctx->surfaces[0].Info;
+-
+-    /* Apparently VPP requires the frame rate to be set to some value, otherwise
+-     * init will fail (probably for the framerate conversion filter). Since we
+-     * are only doing scaling here, we just invent an arbitrary
+-     * value */
+-    par.vpp.In.FrameRateExtN  = 25;
+-    par.vpp.In.FrameRateExtD  = 1;
+-    par.vpp.Out.FrameRateExtN = 25;
+-    par.vpp.Out.FrameRateExtD = 1;
+-
+-    /* Print input memory mode */
+-    ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0x0F, "VPP");
+-    /* Print output memory mode */
+-    ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0xF0, "VPP");
+-    err = MFXVideoVPP_Init(s->session, &par);
+-    if (err < 0)
+-        return ff_qsvvpp_print_error(ctx, err,
+-                                     "Error opening the VPP for scaling");
+-    else if (err > 0) {
+-        ff_qsvvpp_print_warning(ctx, err,
+-                                "Warning in VPP initialization");
+-        return AVERROR_UNKNOWN;
+-    }
+-
+-    return 0;
+-}
+-
+-static int init_scale_session(AVFilterContext *ctx, int in_width, int in_height,
+-                              int out_width, int out_height)
+-{
+-    int ret;
+-
+-    qsvscale_uninit(ctx);
+-
+-    ret = init_out_pool(ctx, out_width, out_height);
+-    if (ret < 0)
+-        return ret;
+-
+-    ret = init_out_session(ctx, in_width, in_height);
+-    if (ret < 0)
+-        return ret;
+-
+-    return 0;
+-}
+-
+-static int qsvscale_config_props(AVFilterLink *outlink)
+-{
+-    AVFilterContext *ctx = outlink->src;
+-    AVFilterLink *inlink = outlink->src->inputs[0];
+-    QSVScaleContext  *s = ctx->priv;
+-    int64_t w, h;
+-    double var_values[VARS_NB], res;
+-    char *expr;
+-    int ret;
+-
+-    var_values[VAR_PI]    = M_PI;
+-    var_values[VAR_PHI]   = M_PHI;
+-    var_values[VAR_E]     = M_E;
+-    var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
+-    var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
+-    var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
+-    var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN;
+-    var_values[VAR_A]     = (double) inlink->w / inlink->h;
+-    var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+-        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+-    var_values[VAR_DAR]   = var_values[VAR_A] * var_values[VAR_SAR];
+-
+-    /* evaluate width and height */
+-    av_expr_parse_and_eval(&res, (expr = s->w_expr),
+-                           var_names, var_values,
+-                           NULL, NULL, NULL, NULL, NULL, 0, ctx);
+-    s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+-    if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr),
+-                                      var_names, var_values,
+-                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+-        goto fail;
+-    s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
+-    /* evaluate again the width, as it may depend on the output height */
+-    if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
+-                                      var_names, var_values,
+-                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+-        goto fail;
+-    s->w = res;
+-
+-    w = s->w;
+-    h = s->h;
+-
+-    /* sanity check params */
+-    if (w <  -1 || h <  -1) {
+-        av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n");
+-        return AVERROR(EINVAL);
+-    }
+-    if (w == -1 && h == -1)
+-        s->w = s->h = 0;
+-
+-    if (!(w = s->w))
+-        w = inlink->w;
+-    if (!(h = s->h))
+-        h = inlink->h;
+-    if (w == -1)
+-        w = av_rescale(h, inlink->w, inlink->h);
+-    if (h == -1)
+-        h = av_rescale(w, inlink->h, inlink->w);
+-
+-    if (w > INT_MAX || h > INT_MAX ||
+-        (h * inlink->w) > INT_MAX  ||
+-        (w * inlink->h) > INT_MAX)
+-        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
+-
+-    outlink->w = w;
+-    outlink->h = h;
+-
+-    ret = init_scale_session(ctx, inlink->w, inlink->h, w, h);
+-    if (ret < 0)
+-        return ret;
+-
+-    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
+-           inlink->w, inlink->h, outlink->w, outlink->h);
+-
+-    if (inlink->sample_aspect_ratio.num)
+-        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
+-                                                             outlink->w*inlink->h},
+-                                                inlink->sample_aspect_ratio);
+-    else
+-        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+-
+-    return 0;
+-
+-fail:
+-    av_log(ctx, AV_LOG_ERROR,
+-           "Error when evaluating the expression '%s'\n", expr);
+-    return ret;
+-}
+-
+-static int qsvscale_filter_frame(AVFilterLink *link, AVFrame *in)
+-{
+-    AVFilterContext             *ctx = link->dst;
+-    QSVScaleContext               *s = ctx->priv;
+-    AVFilterLink            *outlink = ctx->outputs[0];
+-
+-    mfxSyncPoint sync = NULL;
+-    mfxStatus err;
+-
+-    AVFrame *out = NULL;
+-    int ret = 0;
+-
+-    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+-    if (!out) {
+-        ret = AVERROR(ENOMEM);
+-        goto fail;
+-    }
+-
+-    do {
+-        err = MFXVideoVPP_RunFrameVPPAsync(s->session,
+-                                           (mfxFrameSurface1*)in->data[3],
+-                                           (mfxFrameSurface1*)out->data[3],
+-                                           NULL, &sync);
+-        if (err == MFX_WRN_DEVICE_BUSY)
+-            av_usleep(1);
+-    } while (err == MFX_WRN_DEVICE_BUSY);
+-
+-    if (err < 0) {
+-        ret = ff_qsvvpp_print_error(ctx, err, "Error during scaling");
+-        goto fail;
+-    }
+-
+-    if (!sync) {
+-        av_log(ctx, AV_LOG_ERROR, "No sync during scaling\n");
+-        ret = AVERROR_UNKNOWN;
+-        goto fail;
+-    }
+-
+-    do {
+-        err = MFXVideoCORE_SyncOperation(s->session, sync, 1000);
+-    } while (err == MFX_WRN_IN_EXECUTION);
+-    if (err < 0) {
+-        ret = ff_qsvvpp_print_error(ctx, err, "Error synchronizing the operation");
+-        goto fail;
+-    }
+-
+-    ret = av_frame_copy_props(out, in);
+-    if (ret < 0)
+-        goto fail;
+-
+-    out->width  = outlink->w;
+-    out->height = outlink->h;
+-
+-    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+-              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+-              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+-              INT_MAX);
+-
+-    av_frame_free(&in);
+-    return ff_filter_frame(outlink, out);
+-fail:
+-    av_frame_free(&in);
+-    av_frame_free(&out);
+-    return ret;
+-}
+-
+-#define OFFSET(x) offsetof(QSVScaleContext, x)
+-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+-static const AVOption options[] = {
+-    { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
+-    { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
+-    { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+-
+-#if QSV_HAVE_SCALING_CONFIG
+-    { "mode",      "set scaling mode",    OFFSET(mode),    AV_OPT_TYPE_INT,    { .i64 = MFX_SCALING_MODE_DEFAULT}, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, FLAGS, "mode"},
+-    { "low_power", "low power mode",        0,             AV_OPT_TYPE_CONST,  { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, "mode"},
+-    { "hq",        "high quality mode",     0,             AV_OPT_TYPE_CONST,  { .i64 = MFX_SCALING_MODE_QUALITY},  INT_MIN, INT_MAX, FLAGS, "mode"},
+-#else
+-    { "mode",      "(not supported)",     OFFSET(mode),    AV_OPT_TYPE_INT,    { .i64 = 0}, 0, INT_MAX, FLAGS, "mode"},
+-    { "low_power", "",                      0,             AV_OPT_TYPE_CONST,  { .i64 = 1}, 0,   0,     FLAGS, "mode"},
+-    { "hq",        "",                      0,             AV_OPT_TYPE_CONST,  { .i64 = 2}, 0,   0,     FLAGS, "mode"},
+-#endif
+-
+-    { NULL },
+-};
+-
+-static const AVClass qsvscale_class = {
+-    .class_name = "scale_qsv",
+-    .item_name  = av_default_item_name,
+-    .option     = options,
+-    .version    = LIBAVUTIL_VERSION_INT,
+-};
+-
+-static const AVFilterPad qsvscale_inputs[] = {
+-    {
+-        .name         = "default",
+-        .type         = AVMEDIA_TYPE_VIDEO,
+-        .filter_frame = qsvscale_filter_frame,
+-    },
+-    { NULL }
+-};
+-
+-static const AVFilterPad qsvscale_outputs[] = {
+-    {
+-        .name         = "default",
+-        .type         = AVMEDIA_TYPE_VIDEO,
+-        .config_props = qsvscale_config_props,
+-    },
+-    { NULL }
+-};
+-
+-AVFilter ff_vf_scale_qsv = {
+-    .name      = "scale_qsv",
+-    .description = NULL_IF_CONFIG_SMALL("QuickSync video scaling and format conversion"),
+-
+-    .init          = qsvscale_init,
+-    .uninit        = qsvscale_uninit,
+-    .query_formats = qsvscale_query_formats,
+-
+-    .priv_size = sizeof(QSVScaleContext),
+-    .priv_class = &qsvscale_class,
+-
+-    .inputs    = qsvscale_inputs,
+-    .outputs   = qsvscale_outputs,
+-
+-    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+-};
+Index: jellyfin-ffmpeg/libavfilter/vf_vpp_qsv.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavfilter/vf_vpp_qsv.c
++++ jellyfin-ffmpeg/libavfilter/vf_vpp_qsv.c
+@@ -32,6 +32,7 @@
+ #include "formats.h"
+ #include "internal.h"
+ #include "avfilter.h"
++#include "filters.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavformat/avformat.h"
+ 
+@@ -42,14 +43,13 @@
+ #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+ 
+ /* number of video enhancement filters */
+-#define ENH_FILTERS_COUNT (7)
+-#define QSV_HAVE_ROTATION  QSV_VERSION_ATLEAST(1, 17)
+-#define QSV_HAVE_MIRRORING QSV_VERSION_ATLEAST(1, 19)
++#define ENH_FILTERS_COUNT (8)
++#define QSV_HAVE_ROTATION       QSV_VERSION_ATLEAST(1, 17)
++#define QSV_HAVE_MIRRORING      QSV_VERSION_ATLEAST(1, 19)
++#define QSV_HAVE_SCALING_CONFIG QSV_VERSION_ATLEAST(1, 19)
+ 
+ typedef struct VPPContext{
+-    const AVClass *class;
+-
+-    QSVVPPContext *qsv;
++    QSVVPPContext qsv;
+ 
+     /* Video Enhancement Algorithms */
+     mfxExtVPPDeinterlacing  deinterlace_conf;
+@@ -59,7 +59,15 @@ typedef struct VPPContext{
+     mfxExtVPPProcAmp procamp_conf;
+     mfxExtVPPRotation rotation_conf;
+     mfxExtVPPMirroring mirroring_conf;
++#ifdef QSV_HAVE_SCALING_CONFIG
++    mfxExtVPPScaling scale_conf;
++#endif
+ 
++    /**
++     * New dimensions. Special values are:
++     *   0 = original width/height
++     *  -1 = keep original aspect
++     */
+     int out_width;
+     int out_height;
+     /**
+@@ -83,8 +91,10 @@ typedef struct VPPContext{
+     int rotate;                 /* rotate angle : [0, 90, 180, 270] */
+     int hflip;                  /* flip mode : 0 = off, 1 = HORIZONTAL flip */
+ 
++    int scale_mode;             /* scale mode : 0 = auto, 1 = low power, 2 = high quality */
++
+     /* param for the procamp */
+-    int    procamp;            /* enable procamp */
++    int    procamp;             /* enable procamp */
+     float  hue;
+     float  saturation;
+     float  contrast;
+@@ -93,44 +103,10 @@ typedef struct VPPContext{
+     char *cx, *cy, *cw, *ch;
+     char *ow, *oh;
+     char *output_format_str;
+-} VPPContext;
+-
+-static const AVOption options[] = {
+-    { "deinterlace", "deinterlace mode: 0=off, 1=bob, 2=advanced", OFFSET(deinterlace), AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, MFX_DEINTERLACING_ADVANCED, .flags = FLAGS, "deinterlace" },
+-    { "bob",         "Bob deinterlace mode.",                      0,                   AV_OPT_TYPE_CONST,    { .i64 = MFX_DEINTERLACING_BOB },            .flags = FLAGS, "deinterlace" },
+-    { "advanced",    "Advanced deinterlace mode. ",                0,                   AV_OPT_TYPE_CONST,    { .i64 = MFX_DEINTERLACING_ADVANCED },       .flags = FLAGS, "deinterlace" },
+-
+-    { "denoise",     "denoise level [0, 100]",       OFFSET(denoise),     AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 100, .flags = FLAGS },
+-    { "detail",      "enhancement level [0, 100]",   OFFSET(detail),      AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 100, .flags = FLAGS },
+-    { "framerate",   "output framerate",             OFFSET(framerate),   AV_OPT_TYPE_RATIONAL, { .dbl = 0.0 },0, DBL_MAX, .flags = FLAGS },
+-    { "procamp",     "Enable ProcAmp",               OFFSET(procamp),     AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 1, .flags = FLAGS},
+-    { "hue",         "ProcAmp hue",                  OFFSET(hue),         AV_OPT_TYPE_FLOAT,    { .dbl = 0.0 }, -180.0, 180.0, .flags = FLAGS},
+-    { "saturation",  "ProcAmp saturation",           OFFSET(saturation),  AV_OPT_TYPE_FLOAT,    { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS},
+-    { "contrast",    "ProcAmp contrast",             OFFSET(contrast),    AV_OPT_TYPE_FLOAT,    { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS},
+-    { "brightness",  "ProcAmp brightness",           OFFSET(brightness),  AV_OPT_TYPE_FLOAT,    { .dbl = 0.0 }, -100.0, 100.0, .flags = FLAGS},
+ 
+-    { "transpose",  "set transpose direction",       OFFSET(transpose),   AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 6, FLAGS, "transpose"},
+-        { "cclock_hflip",  "rotate counter-clockwise with horizontal flip",  0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "transpose" },
+-        { "clock",         "rotate clockwise",                               0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK       }, .flags=FLAGS, .unit = "transpose" },
+-        { "cclock",        "rotate counter-clockwise",                       0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK      }, .flags=FLAGS, .unit = "transpose" },
+-        { "clock_hflip",   "rotate clockwise with horizontal flip",          0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP  }, .flags=FLAGS, .unit = "transpose" },
+-        { "reversal",      "rotate by half-turn",                            0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL    }, .flags=FLAGS, .unit = "transpose" },
+-        { "hflip",         "flip horizontally",                              0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP       }, .flags=FLAGS, .unit = "transpose" },
+-        { "vflip",         "flip vertically",                                0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP       }, .flags=FLAGS, .unit = "transpose" },
+-
+-    { "cw",   "set the width crop area expression",   OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, 0, 0, FLAGS },
+-    { "ch",   "set the height crop area expression",  OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, 0, 0, FLAGS },
+-    { "cx",   "set the x crop area expression",       OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(in_w-out_w)/2" }, 0, 0, FLAGS },
+-    { "cy",   "set the y crop area expression",       OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(in_h-out_h)/2" }, 0, 0, FLAGS },
+-
+-    { "w",      "Output video width",  OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS },
+-    { "width",  "Output video width",  OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS },
+-    { "h",      "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
+-    { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
+-    { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+-
+-    { NULL }
+-};
++    int has_passthrough;        /* apply pass through mode if possible */
++    int field_rate;             /* deinterlace mode */
++} VPPContext;
+ 
+ static const char *const var_names[] = {
+     "iw", "in_w",
+@@ -141,32 +117,41 @@ static const char *const var_names[] = {
+     "ch",
+     "cx",
+     "cy",
++    "a", "dar",
++    "sar",
+     NULL
+ };
+ 
+ enum var_name {
+-    VAR_iW, VAR_IN_W,
+-    VAR_iH, VAR_IN_H,
+-    VAR_oW, VAR_OUT_W, VAR_W,
+-    VAR_oH, VAR_OUT_H, VAR_H,
++    VAR_IW, VAR_IN_W,
++    VAR_IH, VAR_IN_H,
++    VAR_OW, VAR_OUT_W, VAR_W,
++    VAR_OH, VAR_OUT_H, VAR_H,
+     CW,
+     CH,
+     CX,
+     CY,
++    VAR_A, VAR_DAR,
++    VAR_SAR,
+     VAR_VARS_NB
+ };
+ 
+ static int eval_expr(AVFilterContext *ctx)
+ {
+ #define PASS_EXPR(e, s) {\
+-    ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \
+-    if (ret < 0) {\
+-        av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s);\
+-        goto release;\
++    if (s) {\
++        ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \
++        if (ret < 0) {                                                  \
++            av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s); \
++            goto release;                                               \
++        }                                                               \
+     }\
+ }
+-#define CALC_EXPR(e, v, i) {\
+-    i = v = av_expr_eval(e, var_values, NULL); \
++#define CALC_EXPR(e, v, i, d) {\
++    if (e)\
++        i = v = av_expr_eval(e, var_values, NULL);      \
++    else\
++        i = v = d;\
+ }
+     VPPContext *vpp = ctx->priv;
+     double  var_values[VAR_VARS_NB] = { NAN };
+@@ -184,39 +169,43 @@ static int eval_expr(AVFilterContext *ct
+     PASS_EXPR(cx_expr, vpp->cx);
+     PASS_EXPR(cy_expr, vpp->cy);
+ 
+-    var_values[VAR_iW] =
++    var_values[VAR_IW] =
+     var_values[VAR_IN_W] = ctx->inputs[0]->w;
+ 
+-    var_values[VAR_iH] =
++    var_values[VAR_IH] =
+     var_values[VAR_IN_H] = ctx->inputs[0]->h;
+ 
++    var_values[VAR_A] = (double)var_values[VAR_IN_W] / var_values[VAR_IN_H];
++    var_values[VAR_SAR] = ctx->inputs[0]->sample_aspect_ratio.num ?
++        (double)ctx->inputs[0]->sample_aspect_ratio.num / ctx->inputs[0]->sample_aspect_ratio.den : 1;
++    var_values[VAR_DAR] = var_values[VAR_A] * var_values[VAR_SAR];
++
+     /* crop params */
+-    CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w);
+-    CALC_EXPR(ch_expr, var_values[CH], vpp->crop_h);
++    CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w, var_values[VAR_IW]);
++    CALC_EXPR(ch_expr, var_values[CH], vpp->crop_h, var_values[VAR_IH]);
+ 
+     /* calc again in case cw is relative to ch */
+-    CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w);
++    CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w, var_values[VAR_IW]);
+ 
+     CALC_EXPR(w_expr,
+-            var_values[VAR_OUT_W] = var_values[VAR_oW] = var_values[VAR_W],
+-            vpp->out_width);
++            var_values[VAR_OUT_W] = var_values[VAR_OW] = var_values[VAR_W],
++            vpp->out_width, var_values[CW]);
+     CALC_EXPR(h_expr,
+-            var_values[VAR_OUT_H] = var_values[VAR_oH] = var_values[VAR_H],
+-            vpp->out_height);
++            var_values[VAR_OUT_H] = var_values[VAR_OH] = var_values[VAR_H],
++            vpp->out_height, var_values[CH]);
+ 
+     /* calc again in case ow is relative to oh */
+     CALC_EXPR(w_expr,
+-            var_values[VAR_OUT_W] = var_values[VAR_oW] = var_values[VAR_W],
+-            vpp->out_width);
++            var_values[VAR_OUT_W] = var_values[VAR_OW] = var_values[VAR_W],
++            vpp->out_width, var_values[CW]);
+ 
+-
+-    CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x);
+-    CALC_EXPR(cy_expr, var_values[CY], vpp->crop_y);
++    CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x, (var_values[VAR_IW] - var_values[VAR_OW]) / 2);
++    CALC_EXPR(cy_expr, var_values[CY], vpp->crop_y, (var_values[VAR_IH] - var_values[VAR_OH]) / 2);
+ 
+     /* calc again in case cx is relative to cy */
+-    CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x);
++    CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x, (var_values[VAR_IW] - var_values[VAR_OW]) / 2);
+ 
+-    if ((vpp->crop_w != var_values[VAR_iW]) || (vpp->crop_h != var_values[VAR_iH]))
++    if ((vpp->crop_w != var_values[VAR_IW]) || (vpp->crop_h != var_values[VAR_IH]))
+         vpp->use_crop = 1;
+ 
+ release:
+@@ -232,11 +221,25 @@ release:
+     return ret;
+ }
+ 
++static av_cold int vpp_preinit(AVFilterContext *ctx)
++{
++    VPPContext  *vpp  = ctx->priv;
++    /* For AV_OPT_TYPE_STRING options, NULL is handled in other way so
++     * we needn't set default value here
++     */
++    vpp->saturation = 1.0;
++    vpp->contrast = 1.0;
++    vpp->transpose = -1;
++    vpp->field_rate = 1;
++
++    return 0;
++}
++
+ static av_cold int vpp_init(AVFilterContext *ctx)
+ {
+     VPPContext  *vpp  = ctx->priv;
+ 
+-    if (!strcmp(vpp->output_format_str, "same")) {
++    if (!vpp->output_format_str || !strcmp(vpp->output_format_str, "same")) {
+         vpp->out_format = AV_PIX_FMT_NONE;
+     } else {
+         vpp->out_format = av_get_pix_fmt(vpp->output_format_str);
+@@ -254,11 +257,15 @@ static int config_input(AVFilterLink *in
+     AVFilterContext *ctx = inlink->dst;
+     VPPContext      *vpp = ctx->priv;
+     int              ret;
++    int64_t          ow, oh;
+ 
+     if (vpp->framerate.den == 0 || vpp->framerate.num == 0)
+         vpp->framerate = inlink->frame_rate;
+ 
+-    if (av_cmp_q(vpp->framerate, inlink->frame_rate))
++    if (vpp->field_rate == 2)
++        vpp->framerate = av_mul_q(inlink->frame_rate,
++                                  (AVRational){ 2, 1 });
++    else if (av_cmp_q(vpp->framerate, inlink->frame_rate))
+         vpp->use_frc = 1;
+ 
+     ret = eval_expr(ctx);
+@@ -267,11 +274,38 @@ static int config_input(AVFilterLink *in
+         return ret;
+     }
+ 
+-    if (vpp->out_height == 0 || vpp->out_width == 0) {
+-        vpp->out_width  = inlink->w;
+-        vpp->out_height = inlink->h;
++    ow = vpp->out_width;
++    oh = vpp->out_height;
++
++    /* sanity check params */
++    if (ow <  -1 || oh <  -1) {
++        av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n");
++        return AVERROR(EINVAL);
+     }
+ 
++    if (ow == -1 && oh == -1)
++        vpp->out_width = vpp->out_height = 0;
++
++    if (!(ow = vpp->out_width))
++        ow = inlink->w;
++
++    if (!(oh = vpp->out_height))
++        oh = inlink->h;
++
++    if (ow == -1)
++        ow = av_rescale(oh, inlink->w, inlink->h);
++
++    if (oh == -1)
++        oh = av_rescale(ow, inlink->h, inlink->w);
++
++    if (ow > INT_MAX || oh > INT_MAX ||
++        (oh * inlink->w) > INT_MAX  ||
++        (ow * inlink->h) > INT_MAX)
++        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
++
++    vpp->out_width = ow;
++    vpp->out_height = oh;
++
+     if (vpp->use_crop) {
+         vpp->crop_x = FFMAX(vpp->crop_x, 0);
+         vpp->crop_y = FFMAX(vpp->crop_y, 0);
+@@ -298,7 +332,7 @@ static int config_output(AVFilterLink *o
+     outlink->w          = vpp->out_width;
+     outlink->h          = vpp->out_height;
+     outlink->frame_rate = vpp->framerate;
+-    outlink->time_base  = av_inv_q(vpp->framerate);
++    outlink->time_base  = inlink->time_base;
+ 
+     param.filter_frame  = NULL;
+     param.num_ext_buf   = 0;
+@@ -327,53 +361,46 @@ static int config_output(AVFilterLink *o
+         param.crop     = &crop;
+     }
+ 
+-    if (vpp->deinterlace) {
+-        memset(&vpp->deinterlace_conf, 0, sizeof(mfxExtVPPDeinterlacing));
+-        vpp->deinterlace_conf.Header.BufferId = MFX_EXTBUFF_VPP_DEINTERLACING;
+-        vpp->deinterlace_conf.Header.BufferSz = sizeof(mfxExtVPPDeinterlacing);
+-        vpp->deinterlace_conf.Mode = vpp->deinterlace == 1 ?
+-                                     MFX_DEINTERLACING_BOB : MFX_DEINTERLACING_ADVANCED;
++#define INIT_MFX_EXTBUF(extbuf, id) do { \
++        memset(&vpp->extbuf, 0, sizeof(vpp->extbuf)); \
++        vpp->extbuf.Header.BufferId = id; \
++        vpp->extbuf.Header.BufferSz = sizeof(vpp->extbuf); \
++        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->extbuf; \
++    } while (0)
++
++#define SET_MFX_PARAM_FIELD(extbuf, field, value) do { \
++        vpp->extbuf.field = value; \
++    } while (0)
+ 
+-        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->deinterlace_conf;
++    vpp->qsv.deinterlace_enabled = !!vpp->deinterlace;
++
++    if (vpp->deinterlace) {
++        INIT_MFX_EXTBUF(deinterlace_conf, MFX_EXTBUFF_VPP_DEINTERLACING);
++        SET_MFX_PARAM_FIELD(deinterlace_conf, Mode, (vpp->deinterlace == 1 ?
++                            MFX_DEINTERLACING_BOB : MFX_DEINTERLACING_ADVANCED));
+     }
+ 
+     if (vpp->use_frc) {
+-        memset(&vpp->frc_conf, 0, sizeof(mfxExtVPPFrameRateConversion));
+-        vpp->frc_conf.Header.BufferId = MFX_EXTBUFF_VPP_FRAME_RATE_CONVERSION;
+-        vpp->frc_conf.Header.BufferSz = sizeof(mfxExtVPPFrameRateConversion);
+-        vpp->frc_conf.Algorithm = MFX_FRCALGM_DISTRIBUTED_TIMESTAMP;
+-
+-        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->frc_conf;
++        INIT_MFX_EXTBUF(frc_conf, MFX_EXTBUFF_VPP_FRAME_RATE_CONVERSION);
++        SET_MFX_PARAM_FIELD(frc_conf, Algorithm, MFX_FRCALGM_DISTRIBUTED_TIMESTAMP);
+     }
+ 
+     if (vpp->denoise) {
+-        memset(&vpp->denoise_conf, 0, sizeof(mfxExtVPPDenoise));
+-        vpp->denoise_conf.Header.BufferId = MFX_EXTBUFF_VPP_DENOISE;
+-        vpp->denoise_conf.Header.BufferSz = sizeof(mfxExtVPPDenoise);
+-        vpp->denoise_conf.DenoiseFactor   = vpp->denoise;
+-
+-        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->denoise_conf;
++        INIT_MFX_EXTBUF(denoise_conf, MFX_EXTBUFF_VPP_DENOISE);
++        SET_MFX_PARAM_FIELD(denoise_conf, DenoiseFactor, vpp->denoise);
+     }
+ 
+     if (vpp->detail) {
+-        memset(&vpp->detail_conf, 0, sizeof(mfxExtVPPDetail));
+-        vpp->detail_conf.Header.BufferId  = MFX_EXTBUFF_VPP_DETAIL;
+-        vpp->detail_conf.Header.BufferSz  = sizeof(mfxExtVPPDetail);
+-        vpp->detail_conf.DetailFactor = vpp->detail;
+-
+-        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->detail_conf;
++        INIT_MFX_EXTBUF(detail_conf, MFX_EXTBUFF_VPP_DETAIL);
++        SET_MFX_PARAM_FIELD(detail_conf, DetailFactor, vpp->detail);
+     }
+ 
+     if (vpp->procamp) {
+-        memset(&vpp->procamp_conf, 0, sizeof(mfxExtVPPProcAmp));
+-        vpp->procamp_conf.Header.BufferId  = MFX_EXTBUFF_VPP_PROCAMP;
+-        vpp->procamp_conf.Header.BufferSz  = sizeof(mfxExtVPPProcAmp);
+-        vpp->procamp_conf.Hue              = vpp->hue;
+-        vpp->procamp_conf.Saturation       = vpp->saturation;
+-        vpp->procamp_conf.Contrast         = vpp->contrast;
+-        vpp->procamp_conf.Brightness       = vpp->brightness;
+-
+-        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->procamp_conf;
++        INIT_MFX_EXTBUF(procamp_conf, MFX_EXTBUFF_VPP_PROCAMP);
++        SET_MFX_PARAM_FIELD(procamp_conf, Hue, vpp->hue);
++        SET_MFX_PARAM_FIELD(procamp_conf, Saturation, vpp->saturation);
++        SET_MFX_PARAM_FIELD(procamp_conf, Contrast, vpp->contrast);
++        SET_MFX_PARAM_FIELD(procamp_conf, Brightness, vpp->brightness);
+     }
+ 
+     if (vpp->transpose >= 0) {
+@@ -420,18 +447,14 @@ static int config_output(AVFilterLink *o
+ 
+     if (vpp->rotate) {
+ #ifdef QSV_HAVE_ROTATION
+-        memset(&vpp->rotation_conf, 0, sizeof(mfxExtVPPRotation));
+-        vpp->rotation_conf.Header.BufferId  = MFX_EXTBUFF_VPP_ROTATION;
+-        vpp->rotation_conf.Header.BufferSz  = sizeof(mfxExtVPPRotation);
+-        vpp->rotation_conf.Angle = vpp->rotate;
++        INIT_MFX_EXTBUF(rotation_conf, MFX_EXTBUFF_VPP_ROTATION);
++        SET_MFX_PARAM_FIELD(rotation_conf, Angle, vpp->rotate);
+ 
+         if (MFX_ANGLE_90 == vpp->rotate || MFX_ANGLE_270 == vpp->rotate) {
+             FFSWAP(int, vpp->out_width, vpp->out_height);
+             FFSWAP(int, outlink->w, outlink->h);
+             av_log(ctx, AV_LOG_DEBUG, "Swap width and height for clock/cclock rotation.\n");
+         }
+-
+-        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->rotation_conf;
+ #else
+         av_log(ctx, AV_LOG_WARNING, "The QSV VPP rotate option is "
+             "not supported with this MSDK version.\n");
+@@ -441,12 +464,8 @@ static int config_output(AVFilterLink *o
+ 
+     if (vpp->hflip) {
+ #ifdef QSV_HAVE_MIRRORING
+-        memset(&vpp->mirroring_conf, 0, sizeof(mfxExtVPPMirroring));
+-        vpp->mirroring_conf.Header.BufferId = MFX_EXTBUFF_VPP_MIRRORING;
+-        vpp->mirroring_conf.Header.BufferSz = sizeof(mfxExtVPPMirroring);
+-        vpp->mirroring_conf.Type = vpp->hflip;
+-
+-        param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->mirroring_conf;
++        INIT_MFX_EXTBUF(mirroring_conf, MFX_EXTBUFF_VPP_MIRRORING);
++        SET_MFX_PARAM_FIELD(mirroring_conf, Type, vpp->hflip);
+ #else
+         av_log(ctx, AV_LOG_WARNING, "The QSV VPP hflip option is "
+             "not supported with this MSDK version.\n");
+@@ -454,11 +473,26 @@ static int config_output(AVFilterLink *o
+ #endif
+     }
+ 
++    if (inlink->w != outlink->w || inlink->h != outlink->h) {
++#ifdef QSV_HAVE_SCALING_CONFIG
++        INIT_MFX_EXTBUF(scale_conf, MFX_EXTBUFF_VPP_SCALING);
++        SET_MFX_PARAM_FIELD(scale_conf, ScalingMode, vpp->scale_mode);
++#else
++        av_log(ctx, AV_LOG_WARNING, "The QSV VPP Scale option is "
++            "not supported with this MSDK version.\n");
++#endif
++    }
++
++#undef INIT_MFX_EXTBUF
++#undef SET_MFX_PARAM_FIELD
++
+     if (vpp->use_frc || vpp->use_crop || vpp->deinterlace || vpp->denoise ||
+         vpp->detail || vpp->procamp || vpp->rotate || vpp->hflip ||
+-        inlink->w != outlink->w || inlink->h != outlink->h || in_format != vpp->out_format)
+-        return ff_qsvvpp_create(ctx, &vpp->qsv, &param);
++        inlink->w != outlink->w || inlink->h != outlink->h || in_format != vpp->out_format ||
++        !vpp->has_passthrough)
++        return ff_qsvvpp_init(ctx, &param);
+     else {
++        /* No MFX session is created in this case */
+         av_log(ctx, AV_LOG_VERBOSE, "qsv vpp pass through mode.\n");
+         if (inlink->hw_frames_ctx)
+             outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+@@ -467,26 +501,161 @@ static int config_output(AVFilterLink *o
+     return 0;
+ }
+ 
+-static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
++static int activate(AVFilterContext *ctx)
+ {
+-    int              ret = 0;
+-    AVFilterContext  *ctx = inlink->dst;
+-    VPPContext       *vpp = inlink->dst->priv;
+-    AVFilterLink     *outlink = ctx->outputs[0];
+-
+-    if (vpp->qsv) {
+-        ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
+-        av_frame_free(&picref);
++    AVFilterLink *inlink = ctx->inputs[0];
++    AVFilterLink *outlink = ctx->outputs[0];
++    QSVVPPContext *qsv = ctx->priv;
++    AVFrame *in = NULL;
++    int ret, status = 0;
++    int64_t pts = AV_NOPTS_VALUE;
++
++    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
++
++    if (!qsv->eof) {
++        ret = ff_inlink_consume_frame(inlink, &in);
++        if (ret < 0)
++            return ret;
++
++        if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
++            if (status == AVERROR_EOF) {
++                qsv->eof = 1;
++            }
++        }
++    }
++
++    if (qsv->session) {
++        if (in || qsv->eof) {
++            ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
++            av_frame_free(&in);
++
++            if (qsv->eof) {
++                ff_outlink_set_status(outlink, status, pts);
++                return 0;
++            }
++
++            if (qsv->got_frame) {
++                qsv->got_frame = 0;
++                return ret;
++            }
++        }
+     } else {
+-        if (picref->pts != AV_NOPTS_VALUE)
+-            picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base);
+-        ret = ff_filter_frame(outlink, picref);
++        /* No MFX session is created in pass-through mode */
++        if (in) {
++            if (in->pts != AV_NOPTS_VALUE)
++                in->pts = av_rescale_q(in->pts, inlink->time_base, outlink->time_base);
++
++            ret = ff_filter_frame(outlink, in);
++            return ret;
++        }
+     }
+ 
+-    return ret;
++    if (qsv->eof) {
++        ff_outlink_set_status(outlink, status, pts);
++        return 0;
++    } else {
++        FF_FILTER_FORWARD_WANTED(outlink, inlink);
++    }
++
++    return FFERROR_NOT_READY;
+ }
+ 
+-static int query_formats(AVFilterContext *ctx)
++static av_cold void vpp_uninit(AVFilterContext *ctx)
++{
++    ff_qsvvpp_close(ctx);
++}
++
++static const AVFilterPad vpp_inputs[] = {
++    {
++        .name          = "default",
++        .type          = AVMEDIA_TYPE_VIDEO,
++        .config_props  = config_input,
++    },
++    { NULL }
++};
++
++static const AVFilterPad vpp_outputs[] = {
++    {
++        .name          = "default",
++        .type          = AVMEDIA_TYPE_VIDEO,
++        .config_props  = config_output,
++    },
++    { NULL }
++};
++
++#define DEFINE_QSV_FILTER(x, sn, ln) \
++static const AVClass x##_class = { \
++    .class_name = #sn "_qsv", \
++    .item_name  = av_default_item_name, \
++    .option     = x##_options, \
++    .version    = LIBAVUTIL_VERSION_INT, \
++}; \
++const AVFilter ff_vf_##sn##_qsv = { \
++    .name           = #sn "_qsv", \
++    .description    = NULL_IF_CONFIG_SMALL("Quick Sync Video " #ln), \
++    .preinit        = x##_preinit, \
++    .init           = vpp_init, \
++    .uninit         = vpp_uninit, \
++    .priv_size      = sizeof(VPPContext), \
++    .priv_class     = &x##_class, \
++    .inputs         = vpp_inputs, \
++    .outputs        = vpp_outputs, \
++    .query_formats  = x##_query_formats, \
++    .activate       = activate, \
++    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, \
++};
++
++static const AVOption vpp_options[] = {
++    { "deinterlace", "deinterlace mode: 0=off, 1=bob, 2=advanced", OFFSET(deinterlace), AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, MFX_DEINTERLACING_ADVANCED, .flags = FLAGS, "deinterlace" },
++    { "bob",         "Bob deinterlace mode.",                      0,                   AV_OPT_TYPE_CONST,    { .i64 = MFX_DEINTERLACING_BOB },            .flags = FLAGS, "deinterlace" },
++    { "advanced",    "Advanced deinterlace mode. ",                0,                   AV_OPT_TYPE_CONST,    { .i64 = MFX_DEINTERLACING_ADVANCED },       .flags = FLAGS, "deinterlace" },
++
++    { "denoise",     "denoise level [0, 100]",       OFFSET(denoise),     AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 100, .flags = FLAGS },
++    { "detail",      "enhancement level [0, 100]",   OFFSET(detail),      AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 100, .flags = FLAGS },
++    { "framerate",   "output framerate",             OFFSET(framerate),   AV_OPT_TYPE_RATIONAL, { .dbl = 0.0 },0, DBL_MAX, .flags = FLAGS },
++    { "procamp",     "Enable ProcAmp",               OFFSET(procamp),     AV_OPT_TYPE_INT,      { .i64 = 0 }, 0, 1, .flags = FLAGS},
++    { "hue",         "ProcAmp hue",                  OFFSET(hue),         AV_OPT_TYPE_FLOAT,    { .dbl = 0.0 }, -180.0, 180.0, .flags = FLAGS},
++    { "saturation",  "ProcAmp saturation",           OFFSET(saturation),  AV_OPT_TYPE_FLOAT,    { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS},
++    { "contrast",    "ProcAmp contrast",             OFFSET(contrast),    AV_OPT_TYPE_FLOAT,    { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS},
++    { "brightness",  "ProcAmp brightness",           OFFSET(brightness),  AV_OPT_TYPE_FLOAT,    { .dbl = 0.0 }, -100.0, 100.0, .flags = FLAGS},
++
++    { "transpose",  "set transpose direction",       OFFSET(transpose),   AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 6, FLAGS, "transpose"},
++        { "cclock_hflip",  "rotate counter-clockwise with horizontal flip",  0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "transpose" },
++        { "clock",         "rotate clockwise",                               0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK       }, .flags=FLAGS, .unit = "transpose" },
++        { "cclock",        "rotate counter-clockwise",                       0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK      }, .flags=FLAGS, .unit = "transpose" },
++        { "clock_hflip",   "rotate clockwise with horizontal flip",          0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP  }, .flags=FLAGS, .unit = "transpose" },
++        { "reversal",      "rotate by half-turn",                            0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL    }, .flags=FLAGS, .unit = "transpose" },
++        { "hflip",         "flip horizontally",                              0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP       }, .flags=FLAGS, .unit = "transpose" },
++        { "vflip",         "flip vertically",                                0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP       }, .flags=FLAGS, .unit = "transpose" },
++
++    { "cw",   "set the width crop area expression",   OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, 0, 0, FLAGS },
++    { "ch",   "set the height crop area expression",  OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, 0, 0, FLAGS },
++    { "cx",   "set the x crop area expression",       OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(in_w-out_w)/2" }, 0, 0, FLAGS },
++    { "cy",   "set the y crop area expression",       OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(in_h-out_h)/2" }, 0, 0, FLAGS },
++
++    { "w",      "Output video width(0=input video width, -1=keep input video aspect)",  OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS },
++    { "width",  "Output video width(0=input video width, -1=keep input video aspect)",  OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS },
++    { "h",      "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
++    { "height", "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
++    { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
++    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS },
++#if QSV_HAVE_SCALING_CONFIG
++    { "scale_mode", "scale mode", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = MFX_SCALING_MODE_DEFAULT }, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, .flags = FLAGS, "scale mode" },
++    { "auto",      "auto mode",             0,    AV_OPT_TYPE_CONST,  { .i64 = MFX_SCALING_MODE_DEFAULT},  INT_MIN, INT_MAX, FLAGS, "scale mode"},
++    { "low_power", "low power mode",        0,    AV_OPT_TYPE_CONST,  { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, "scale mode"},
++    { "hq",        "high quality mode",     0,    AV_OPT_TYPE_CONST,  { .i64 = MFX_SCALING_MODE_QUALITY},  INT_MIN, INT_MAX, FLAGS, "scale mode"},
++#else
++    { "scale_mode", "(not supported)",        OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS, "scale mode" },
++    { "auto",      "",                      0,    AV_OPT_TYPE_CONST,  { .i64 = 0}, 0,   0,     FLAGS, "scale mode"},
++    { "low_power", "",                      0,    AV_OPT_TYPE_CONST,  { .i64 = 1}, 0,   0,     FLAGS, "scale mode"},
++    { "hq",        "",                      0,    AV_OPT_TYPE_CONST,  { .i64 = 2}, 0,   0,     FLAGS, "scale mode"},
++#endif
++    { "passthrough", "Apply pass through mode if possible.", OFFSET(has_passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, .flags = FLAGS },
++
++    { NULL }
++};
++
++static int vpp_query_formats(AVFilterContext *ctx)
+ {
+     int ret;
+     static const enum AVPixelFormat in_pix_fmts[] = {
+@@ -512,48 +681,92 @@ static int query_formats(AVFilterContext
+                           &ctx->outputs[0]->incfg.formats);
+ }
+ 
+-static av_cold void vpp_uninit(AVFilterContext *ctx)
++DEFINE_QSV_FILTER(vpp, vpp, VPP);
++
++static int default_query_formats(AVFilterContext *ctx)
+ {
+-    VPPContext *vpp = ctx->priv;
++    static const enum AVPixelFormat pixel_formats[] = {
++        AV_PIX_FMT_QSV, AV_PIX_FMT_NONE,
++    };
++    AVFilterFormats *pix_fmts  = ff_make_format_list(pixel_formats);
++    int ret;
++
++    if ((ret = ff_set_common_formats(ctx, pix_fmts)) < 0)
++        return ret;
+ 
+-    ff_qsvvpp_free(&vpp->qsv);
++    return 0;
+ }
+ 
+-static const AVClass vpp_class = {
+-    .class_name = "vpp_qsv",
+-    .item_name  = av_default_item_name,
+-    .option     = options,
+-    .version    = LIBAVUTIL_VERSION_INT,
+-};
++static av_cold int default_preinit(AVFilterContext *ctx)
++{
++    VPPContext  *vpp  = ctx->priv;
+ 
+-static const AVFilterPad vpp_inputs[] = {
+-    {
+-        .name          = "default",
+-        .type          = AVMEDIA_TYPE_VIDEO,
+-        .config_props  = config_input,
+-        .filter_frame  = filter_frame,
+-    },
+-    { NULL }
+-};
++    vpp_preinit(ctx);
++    vpp->has_passthrough = 0;
+ 
+-static const AVFilterPad vpp_outputs[] = {
+-    {
+-        .name          = "default",
+-        .type          = AVMEDIA_TYPE_VIDEO,
+-        .config_props  = config_output,
+-    },
+-    { NULL }
++    return 0;
++}
++
++static const AVOption qsvscale_options[] = {
++    { "w",      "Output video width(0=input video width, -1=keep input video aspect)",  OFFSET(ow), AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
++    { "h",      "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
++    { "cw",     "set the width crop area expression",   OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
++    { "ch",     "set the height crop area expression",  OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
++    { "cx",     "set the x crop area expression",       OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(iw-ow)/2" }, .flags = FLAGS },
++    { "cy",     "set the y crop area expression",       OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(ih-oh)/2" }, .flags = FLAGS },
++    { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
++
++#if QSV_HAVE_SCALING_CONFIG
++    { "mode",      "set scaling mode",    OFFSET(scale_mode),    AV_OPT_TYPE_INT,    { .i64 = MFX_SCALING_MODE_DEFAULT}, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, FLAGS, "mode"},
++    { "low_power", "low power mode",        0,             AV_OPT_TYPE_CONST,  { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, "mode"},
++    { "hq",        "high quality mode",     0,             AV_OPT_TYPE_CONST,  { .i64 = MFX_SCALING_MODE_QUALITY},  INT_MIN, INT_MAX, FLAGS, "mode"},
++#else
++    { "mode",      "(not supported)",     OFFSET(scale_mode),    AV_OPT_TYPE_INT,    { .i64 = 0}, 0, INT_MAX, FLAGS, "mode"},
++    { "low_power", "",                      0,             AV_OPT_TYPE_CONST,  { .i64 = 1}, 0,   0,     FLAGS, "mode"},
++    { "hq",        "",                      0,             AV_OPT_TYPE_CONST,  { .i64 = 2}, 0,   0,     FLAGS, "mode"},
++#endif
++
++    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS },
++
++    { NULL },
+ };
+ 
+-AVFilter ff_vf_vpp_qsv = {
+-    .name          = "vpp_qsv",
+-    .description   = NULL_IF_CONFIG_SMALL("Quick Sync Video VPP."),
+-    .priv_size     = sizeof(VPPContext),
+-    .query_formats = query_formats,
+-    .init          = vpp_init,
+-    .uninit        = vpp_uninit,
+-    .inputs        = vpp_inputs,
+-    .outputs       = vpp_outputs,
+-    .priv_class    = &vpp_class,
+-    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
++static int qsvscale_query_formats(AVFilterContext *ctx)
++{
++    return default_query_formats(ctx);
++}
++
++static av_cold int qsvscale_preinit(AVFilterContext *ctx)
++{
++    return default_preinit(ctx);
++}
++
++DEFINE_QSV_FILTER(qsvscale, scale, scaling and format conversion);
++
++static const AVOption qsvdeint_options[] = {
++    { "mode", "set deinterlace mode", OFFSET(deinterlace),   AV_OPT_TYPE_INT, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
++    { "bob",   "bob algorithm",                  0, AV_OPT_TYPE_CONST,      {.i64 = MFX_DEINTERLACING_BOB}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
++    { "advanced", "Motion adaptive algorithm",   0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"},
++
++    { "rate", "Generate output at frame rate or field rate",
++      OFFSET(field_rate), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 2, FLAGS, "rate" },
++    { "frame", "Output at frame rate (one frame of output for each field-pair)",
++      0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "rate" },
++    { "field", "Output at field rate (one frame of output for each field)",
++      0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "rate" },
++
++    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS },
++    { NULL },
+ };
++
++static int qsvdeint_query_formats(AVFilterContext *ctx)
++{
++    return default_query_formats(ctx);
++}
++
++static av_cold int qsvdeint_preinit(AVFilterContext *ctx)
++{
++    return default_preinit(ctx);
++}
++
++DEFINE_QSV_FILTER(qsvdeint, deinterlace, deinterlacing);
+Index: jellyfin-ffmpeg/libavutil/frame.c
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/frame.c
++++ jellyfin-ffmpeg/libavutil/frame.c
+@@ -347,6 +347,36 @@ int av_frame_get_buffer(AVFrame *frame,
+     return AVERROR(EINVAL);
+ }
+ 
++int av_frame_copy_side_data(AVFrame* dst, const AVFrame* src, int flags)
++{
++    for (unsigned i = 0; i < src->nb_side_data; i++) {
++        const AVFrameSideData *sd_src = src->side_data[i];
++        AVFrameSideData *sd_dst;
++        if (   sd_src->type == AV_FRAME_DATA_PANSCAN
++            && (src->width != dst->width || src->height != dst->height))
++            continue;
++        if (flags & AV_FRAME_COPY_PROPS_FORCECOPY) {
++            sd_dst = av_frame_new_side_data(dst, sd_src->type,
++                                            sd_src->size);
++            if (!sd_dst) {
++                wipe_side_data(dst);
++                return AVERROR(ENOMEM);
++            }
++            memcpy(sd_dst->data, sd_src->data, sd_src->size);
++        } else {
++            AVBufferRef *ref = av_buffer_ref(sd_src->buf);
++            sd_dst = av_frame_new_side_data_from_buf(dst, sd_src->type, ref);
++            if (!sd_dst) {
++                av_buffer_unref(&ref);
++                wipe_side_data(dst);
++                return AVERROR(ENOMEM);
++            }
++        }
++        av_dict_copy(&sd_dst->metadata, sd_src->metadata, 0);
++    }
++    return 0;
++}
++
+ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
+ {
+     int ret, i;
+@@ -395,31 +425,9 @@ FF_DISABLE_DEPRECATION_WARNINGS
+ FF_ENABLE_DEPRECATION_WARNINGS
+ #endif
+ 
+-    for (i = 0; i < src->nb_side_data; i++) {
+-        const AVFrameSideData *sd_src = src->side_data[i];
+-        AVFrameSideData *sd_dst;
+-        if (   sd_src->type == AV_FRAME_DATA_PANSCAN
+-            && (src->width != dst->width || src->height != dst->height))
+-            continue;
+-        if (force_copy) {
+-            sd_dst = av_frame_new_side_data(dst, sd_src->type,
+-                                            sd_src->size);
+-            if (!sd_dst) {
+-                wipe_side_data(dst);
+-                return AVERROR(ENOMEM);
+-            }
+-            memcpy(sd_dst->data, sd_src->data, sd_src->size);
+-        } else {
+-            AVBufferRef *ref = av_buffer_ref(sd_src->buf);
+-            sd_dst = av_frame_new_side_data_from_buf(dst, sd_src->type, ref);
+-            if (!sd_dst) {
+-                av_buffer_unref(&ref);
+-                wipe_side_data(dst);
+-                return AVERROR(ENOMEM);
+-            }
+-        }
+-        av_dict_copy(&sd_dst->metadata, sd_src->metadata, 0);
+-    }
++    if (ret = av_frame_copy_side_data(dst, src,
++        force_copy ? AV_FRAME_COPY_PROPS_FORCECOPY : 0) < 0)
++        return ret;
+ 
+ #if FF_API_FRAME_QP
+ FF_DISABLE_DEPRECATION_WARNINGS
+@@ -823,6 +831,17 @@ void av_frame_remove_side_data(AVFrame *
+     }
+ }
+ 
++void av_frame_remove_all_side_data(AVFrame *frame)
++{
++    int i;
++
++    for (i = frame->nb_side_data - 1; i >= 0; i--) {
++        free_side_data(&frame->side_data[i]);
++        frame->side_data[i] = frame->side_data[frame->nb_side_data - 1];
++        frame->nb_side_data--;
++    }
++}
++
+ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
+ {
+     switch(type) {
+Index: jellyfin-ffmpeg/libavutil/frame.h
+===================================================================
+--- jellyfin-ffmpeg.orig/libavutil/frame.h
++++ jellyfin-ffmpeg/libavutil/frame.h
+@@ -897,6 +897,22 @@ int av_frame_copy(AVFrame *dst, const AV
+ int av_frame_copy_props(AVFrame *dst, const AVFrame *src);
+ 
+ /**
++ * Copy actual data buffers instead of references.
++ */
++#define AV_FRAME_COPY_PROPS_FORCECOPY  1
++
++/**
++ * Copy only side-data from src to dst.
++ *
++ * @param dst a frame to which the side data should be copied.
++ * @param src a frame from which to copy the side data.
++ * @param flags flags of type AV_FRAME_COPY_PROPS_*, controlling copy behavior.
++ *
++ * @return >= 0 on success, a negative AVERROR on error.
++ */
++int av_frame_copy_side_data(AVFrame* dst, const AVFrame* src, int flags);
++
++/**
+  * Get the buffer reference a given data plane is stored in.
+  *
+  * @param plane index of the data plane of interest in frame->extended_data.
+@@ -951,6 +967,10 @@ AVFrameSideData *av_frame_get_side_data(
+  */
+ void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type);
+ 
++/**
++ * Remove and free all side data in this frame.
++ */
++void av_frame_remove_all_side_data(AVFrame *frame);
+ 
+ /**
+  * Flags for frame cropping.
diff --git a/debian/patches/series b/debian/patches/series
index ec652d1469b..3fc8da2647e 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -29,3 +29,4 @@
 0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch
 0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch
 0031-add-a-vaapi-overlay-filter.patch
+0032-add-async-support-for-qsv-vpp.patch