From ba3ed26408485bcbccaa206148d7cee3a6bc57d4 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 17:56:04 +0800 Subject: [PATCH 01/41] New upstream version 4.4.1 --- Changelog | 191 ++++++++++++++++++++++++- RELEASE | 2 +- RELEASE_NOTES | 2 +- VERSION | 2 +- configure | 4 +- doc/Doxyfile | 2 +- doc/writing_filters.txt | 2 +- ffbuild/common.mak | 2 +- fftools/ffmpeg.c | 19 ++- libavcodec/aaccoder.c | 8 +- libavcodec/aacdec_template.c | 7 +- libavcodec/aacenc.c | 5 +- libavcodec/aacpsy.c | 5 +- libavcodec/aarch64/hevcdsp_idct_neon.S | 11 +- libavcodec/alsdec.c | 9 +- libavcodec/apedec.c | 10 +- libavcodec/argo.c | 7 +- libavcodec/av1_metadata_bsf.c | 16 ++- libavcodec/cbs_h265_syntax_template.c | 2 +- libavcodec/clearvideo.c | 4 +- libavcodec/cpia.c | 1 + libavcodec/crystalhd.c | 1 + libavcodec/cuviddec.c | 1 + libavcodec/decode.c | 73 +++++----- libavcodec/dnxhddec.c | 22 ++- libavcodec/dpx.c | 5 +- libavcodec/dxva2_av1.c | 2 +- libavcodec/exr.c | 19 ++- libavcodec/faxcompr.c | 18 ++- libavcodec/flac_parser.c | 3 +- libavcodec/flicvideo.c | 2 + libavcodec/frame_thread_encoder.c | 11 +- libavcodec/frame_thread_encoder.h | 4 + libavcodec/h263.c | 12 ++ libavcodec/h263.h | 1 + libavcodec/h263data.c | 14 -- libavcodec/h263data.h | 1 - libavcodec/h264_slice.c | 3 + libavcodec/h264dec.c | 2 +- libavcodec/hevc_sei.c | 2 +- libavcodec/iff.c | 3 +- libavcodec/internal.h | 5 + libavcodec/j2kenc.c | 2 +- libavcodec/jpeg2000dec.c | 2 + libavcodec/jpeglsdec.c | 2 +- libavcodec/libdav1d.c | 20 +++ libavcodec/lpc.c | 2 +- libavcodec/lpc.h | 7 +- libavcodec/mjpegbdec.c | 10 +- libavcodec/mjpegdec.c | 3 + libavcodec/mpeg12dec.c | 8 +- libavcodec/mpegvideo_enc.c | 7 +- libavcodec/mxpegdec.c | 3 + libavcodec/nellymoserenc.c | 6 +- libavcodec/nvenc.c | 142 +++++++++--------- libavcodec/nvenc.h | 2 +- libavcodec/nvenc_hevc.c | 2 +- libavcodec/pngdec.c | 6 +- libavcodec/rv10.c | 8 +- libavcodec/sbrdsp_fixed.c | 2 +- libavcodec/snow.h | 1 + libavcodec/snowdec.c | 17 ++- libavcodec/svq1enc.c | 7 +- libavcodec/ttadata.c | 3 +- libavcodec/ttadsp.c | 6 +- libavcodec/ttmlenc.c | 2 +- libavcodec/utils.c | 49 +++++-- libavcodec/vaapi_av1.c | 2 +- libavcodec/vc1.c | 5 + libavcodec/vc1dec.c | 10 +- libavcodec/vc2enc.c | 2 + libavcodec/videotoolboxenc.c | 4 + libavcodec/webp.c | 3 + libavcodec/wma.c | 4 +- libavcodec/wmadec.c | 11 +- libavcodec/wmaprodec.c | 13 +- libavcodec/xpmdec.c | 14 +- libavfilter/af_drmeter.c | 5 + libavfilter/f_metadata.c | 3 - libavfilter/vf_ciescope.c | 3 +- libavfilter/vf_dctdnoiz.c | 3 + libavfilter/vf_fftdnoiz.c | 2 +- libavfilter/vf_mestimate.c | 3 + libavfilter/vf_overlay_cuda.c | 12 +- libavfilter/vf_scale.c | 12 ++ libavfilter/vf_scale_npp.c | 5 +- libavfilter/vf_vmafmotion.c | 3 + libavfilter/vf_yadif.c | 32 +++-- libavformat/aaxdec.c | 15 +- libavformat/adtsenc.c | 6 +- libavformat/aiffdec.c | 3 + libavformat/asfdec_f.c | 2 +- libavformat/asfdec_o.c | 21 +-- libavformat/avidec.c | 11 +- libavformat/avio.c | 7 +- libavformat/bfi.c | 2 + libavformat/cafdec.c | 2 +- libavformat/dsfdec.c | 2 +- libavformat/dsicin.c | 7 +- libavformat/dxa.c | 2 +- libavformat/fifo.c | 2 +- libavformat/ftp.c | 2 + libavformat/hlsenc.c | 11 +- libavformat/id3v2.c | 6 +- libavformat/iff.c | 2 +- libavformat/jacosubdec.c | 3 + libavformat/matroskadec.c | 22 +-- libavformat/matroskaenc.c | 1 + libavformat/mccdec.c | 3 +- libavformat/moflex.c | 3 - libavformat/mov.c | 32 ++++- libavformat/movenc.c | 21 ++- libavformat/mpc8.c | 13 +- libavformat/mpegts.c | 5 +- libavformat/msf.c | 2 + libavformat/mvdec.c | 5 +- libavformat/mvi.c | 14 +- libavformat/mxfdec.c | 8 +- libavformat/nutdec.c | 5 + libavformat/pp_bnk.c | 2 +- libavformat/qcp.c | 3 +- libavformat/realtextdec.c | 3 +- libavformat/rmdec.c | 16 ++- libavformat/rpl.c | 10 +- libavformat/sbgdec.c | 10 ++ libavformat/subtitles.c | 2 +- libavformat/tta.c | 2 + libavformat/utils.c | 12 +- libavformat/wavdec.c | 4 +- libavformat/wtvdec.c | 6 +- libavutil/cpu.c | 6 + libavutil/mathematics.h | 1 + libswscale/alphablend.c | 32 +++-- libswscale/slice.c | 5 +- tests/ref/fate/ts-opus-demux | 2 +- tools/cws2fws.c | 6 + 136 files changed, 950 insertions(+), 397 deletions(-) diff --git a/Changelog b/Changelog index a96e350e096..a6508cd8ac0 100644 --- a/Changelog +++ b/Changelog @@ -1,7 +1,196 @@ Entries are sorted chronologically from oldest to youngest within each release, releases are sorted from youngest to oldest. -version : +version 4.4.1: +- avcodec/flac_parser: Consider AV_INPUT_BUFFER_PADDING_SIZE +- avcodec/ttadsp: Fix integer overflows in tta_filter_process_c() +- avutil/mathematics: Document av_rescale_rnd() behavior on non int64 results +- avcodec/utils: Ensure 8x8 alignment for ARGO in avcodec_align_dimensions2() +- avformat/matroskadec: Reset state also on failure in matroska_reset_status() +- avformat/wavdec: Check smv_block_size +- avformat/rmdec: Check for multiple audio_stream_info +- avcodec/apedec: Use 64bit to avoid overflow +- avcodec/apedec: Fix undefined integer overflow in long_filter_ehigh_3830() +- oavformat/avidec: Check offset in odml +- avformat/mpegts: use actually read packet size in mpegts_resync special case +- fftools/ffmpeg: Fix crash when flushing non-fully setup output stream +- avfilter/scale_npp: fix non-aligned output frame dimensions +- Revert "avformat/hlsenc: compute video_keyframe_size after write keyframe" +- Changelog: update +- swscale/alphablend: Fix slice handling +- avcodec/apedec: Fix integer overflow in filter_fast_3320() +- avformat/mov: Fix last mfra check +- avcodec/mxpegdec: Check for AVDISCARD_ALL +- avcodec/flicvideo: Check remaining bytes in FLI*COPY +- avcodec/utils: ARGO writes 4x4 blocks without regard to the image dimensions +- avcodec/cbs_h265_syntax_template: Limit sps_num_palette_predictor_initializer_minus1 to 127 +- avcodec/snowdec: Maintain avmv buffer +- avcodec/mpeg12dec: Do not put mpeg_f_code into an invalid state on error return +- avcodec/mpegvideo_enc: Limit bitrate tolerance to the representable +- avcodec/apedec: Fix integer overflow in intermediate +- avformat/mvdec: Do not set invalid sample rate +- avformat/sbgdec: Check for t0 overflow in expand_tseq() +- avformat/rmdec: Use 64bit for intermediate for DEINT_ID_INT4 +- avformat/sbgdec: Check opt_duration and start for overflow +- avcodec/exr: Fix undefined integer multiplication +- avformat/mov: Check for duplicate clli +- avformat/utils: Ignore negative duration in codec_info_duration computation +- avformat/jacosubdec: Check for min in t overflow in get_shift() +- avformat/mxfdec: check channel number in mxf_get_d10_aes3_packet() +- (origin/release/4.4) avcodec/wmadec: handle run_level_decode error +- avcodec/wma: Return specific error code +- avcodec/dxva2_av1: fix superres_denom parameter +- avcodec/libdav1d: fix compilation after recent libdav1d API changes +- Changelog: update +- avcodec/utils: don't return negative values in av_get_audio_frame_duration() +- avcodec/jpeg2000dec: Check that atom header is within bytsetream +- avcodec/apedec: Fix 2 integer overflows in filter_3800() +- avcodec/xpmdec: Move allocations down after more error checks +- avcodec/argo: Move U, fix shift +- avformat/mov: Check dts for overflow in mov_read_trun() +- avformat/avidec: Use 64bit for frame number in odml index parsing +- avcodec/mjpegbdec: Skip SOS on AVDISCARD_ALL as does mjpeg +- avcodec/mjpegdec: Check for bits left in mjpeg_decode_scan_progressive_ac() +- avformat/adtsenc: return value check for init_get_bits in adts_decode_extradata +- avcodec/webp: Check available space in loop in decode_entropy_coded_image() +- avcodec/h264dec: use picture parameters in ff_print_debug_info2() +- avcodec/vc1dec: ff_print_debug_info() does not support WMV3 field_mode +- avcodec/frame_thread_encoder: Free AVCodecContext structure on error during init +- avcodec/faxcompr: Check for end of input in cmode == 1 in decode_group3_2d_line() +- avcodec/vc1dec: Disable error concealment for *IMAGE +- avcodec/sbrdsp_fixed: Fix negation overflow in sbr_neg_odd_64_c() +- avcodec/argo: Check for even dimensions +- avformat/wtvdec: Check for EOF before seeking back in parse_media_type() +- avformat/mpc8: Check first keyframe position for overflow +- avcodec/exr: Check ac_count +- avformat/wavdec: Use 64bit in new_pos computation +- avformat/sbgdec: Check for overflow in timestamp preparation +- avformat/dsicin: Check packet size for overflow +- avformat/dsfdec: Change order of operations in bitrate computation +- avformat/bfi: check nframes +- avformat/avidec: fix position overflow in avi_load_index() +- avformat/asfdec_f: Check sizeX against padding +- avformat/aiffdec: Check for size overflow in header parsing +- avcodec/aaccoder: Add minimal bias in search_for_ms() +- avformat/mov: Fix incorrect overflow detection in mov_read_sidx() +- avformat/mov: Avoid undefined overflow in time_offset calculation +- avfilter/af_drmeter: Check that there is data +- avfilter/vf_fftdnoiz: Use lrintf() in export_row8() +- avfilter/vf_mestimate: Check b_count +- avformat/mov: do not ignore errors in mov_metadata_hmmt() +- avformat/mxfdec: Check size for shrinking +- avcodec/dnxhddec: check and propagate function return value +- swscale/slice: Fix wrong return on error +- avcodec/aacdec_template: Avoid some invalid values to be set by decode_audio_specific_config_gb() +- swscale/slice: Check slice for allocation failure +- avformat/matroskadec: Fix handling of huge default durations +- avcodec/lpc: check for zero err in normalization in compute_lpc_coefs() +- avcodec/j2kenc: Check for av_strtok() failure +- avformat/ftp: Check for av_strtok() failure +- tools/cws2fws: Check read() for failure +- avcodec/cpia: Fix missing src_size update +- avcodec/exr: Better size checks +- avcodec/clearvideo: Check tile_size to be not too large +- avcodec/utils: Use 64bit for intermediate in AV_CODEC_ID_ADPCM_THP* duration calculation +- avformat/aaxdec: Check avio_seek() in header reading +- avcodec/hevc_sei: Use get_bits_long() for time_offset_value +- avformat/rmdec: Check old_format len for overflow +- avformat/realtextdec: Check the pts difference before using it for the duration computation +- avformat/qcp: Avoid negative nb_rates +- avformat/pp_bnk: Use 64bit in bitrate computation +- avformat/nutdec: Check tmp_size +- avformat/msf: Check that channels doesnt overflow during extradata construction +- avformat/subtitles: Check pts difference before use +- avformat/mpc8: Check for position overflow in mpc8_handle_chunk() +- avformat/mccdec: Fix overflows in num/den +- avformat/iff: Use 64bit in duration computation +- avformat/dxa: Check fps to be within the supported range more precissely +- avcodec/iff: Only write palette to plane 1 if its PAL8 +- avformat/tta: Check for EOF in index reading loop +- avfilter/vf_scale: set the RGB matrix coefficients in case of RGB +- avfilter/vf_scale: reset color matrix in case of identity & non-RGB +- ffmpeg: fix order between field order autodetection and override +- avcodec/h264_slice: clear old slice POC values on parsing failure +- avfilter/f_metadata: do not return the frame early if there is no metadata +- ffbuild: Avoid using the --preprocessor argument to windres +- avcodec/crystalhd: signal that the decoder sets all output frame properties +- avcodec/cuviddec: signal that the decoder sets all output frame properties +- avcodec/decode: reindent after the previous commit +- avcodec/decode: add an internal codec flag to signal a decoder sets all output frame properties +- avcodec/decode: fetch packets from the pkt_props FIFO on every frame returned +- Update missed irc links +- avformat/rpl: The associative law doesnt hold for signed integers in C +- avcodec/faxcompr: Check available bits in decode_uncompressed() +- avcodec/faxcompr: Check if bits are available before reading in cmode == 9 || cmode == 10 +- avformat/utils: Avoid overflow in codec_info_duration computation for subtitles +- avformat/utils: check dts/duration to be representable before using them +- avcodec/utils: do "calc from frame_bytes, channels, and block_align" in 64bit +- avcodec/ttadata: Add sentinel at the end of ff_tta_shift_1 +- avformat/mov: Check for duplicate mdcv +- avfilter/vf_dctdnoiz: Check threads +- avfilter/vf_ciescope: Fix undefined behavior in rgb_to_xy() with black +- avcodec/dpx: fix off by 1 in bits_per_color check +- avformat/rpl: Check for EOF and zero framesize +- avcodec/vc2enc: Check for non negative slice bounds +- avformat/rpl: Use 64bit in bitrate computation and check it +- avcodec/mpegvideo_enc: Reset stuffing bits if they are not supported +- avcodec/svq1enc: Do not print debug RD value before it has been computed +- avcodec/aacpsy: Check bandwidth +- avcodec/aacenc: Do not divide by lambda_count if it is 0 +- avcodec/aacenc: Use FLT_EPSILON for lambda minimum +- avfilter/vf_yadif: Fix handing of tiny images +- avfilter/vf_vmafmotion: Check dimensions +- avformat/movenc: Check pal_size before use +- avcodec/lpc: Avoid floating point division by 0 +- avcodec/aacpsy: Avoid floating point division by 0 of norm_fac +- avcodec/aacenc: Avoid 0 lambda +- avcodec/exr: More strictly check dc_count +- avcodec/exr: x/ymax cannot be INT_MAX +- avformat/avio: Check av_opt_copy() for failure +- avformat/moflex: Remove unneeded format variable +- avformat/fifo: check for flushed packets and timeshift +- avcodec/clearvideo: Check for 0 tile_shift +- avcodec/vc1: Check remaining bits in ff_vc1_parse_frame_header() +- avformat/mov: Ignore duplicate CoLL +- avformat/mov: Limit nb_chapter_tracks to input size +- avformat/utils: Use 64bit earlier in r_frame_rate check +- avcodec/alsdec: Fix decoding error with mono audio files +- avformat/mvdec: Check sample rate in parse_audio_var() +- avcodec/faxcompr: Check for end of bitstream in decode_group3_1d_line() and decode_group3_2d_line() +- avcodec/utils: treat PAL8 for jpegs similar to other colorspaces +- avcodec/jpeglsdec: Set alpha plane in PAL8 so image is not 100% transparent +- avformat/asfdec_o: Use ff_get_extradata() +- avformat/id3v2: Check end for overflow in id3v2_parse() +- avformat/mxfdec: Fix file position addition +- avformat/wtvdec: Improve size overflow checks in parse_chunks() +- avcodec/faxcompr: Check remaining bits on error in decode_group3_1d_line() +- avformat/mov: check for pts overflow in mov_read_sidx() +- avcodec/utils: Check ima wav duration for overflow +- avcodec/rv10: Execute whole size check earlier for rv20 +- avformat/cafdec: Check channels +- avcodec/exr: increase vlc depth +- avcodec/dpx: Check bits_per_color earlier +- avformat/mvi: Check audio_data_size to be non negative +- avcodec/nvenc: disable s12m timestamps by default +- aarch64: hevc_idct: Fix overflows in idct_dc +- avcodec/vaapi_av1: pass full buffer size for each tile +- avcodec/videotoolboxenc: #define TARGET_CPU_ARM64 to 0 if not provided by the SDK +- lavc/pngdec: fix updating reference frames for APNG_DISPOSE_OP_BACKGROUND +- ffmpeg: return no chosen output if an uninitialized stream is unavailable +- avcodec/h263, h263data: Move ff_h263_init_rl_inter to h263.c +- configure: Add missing mpegvideo dependency for IPU decoder +- avcodec/ttmlenc: Don't confuse capabilities and caps_internal +- avformat/mpegts: add missing sample_rate value to Opus extradata +- avformat/movenc: fix writing dOps atoms +- avcodec/av1_metadata: don't store the inserted TD OBU in stack +- avcodec/nellymoserenc: Fix segfault when using unsupported channels/rate +- avutil/cpu: Use HW_NCPUONLINE to detect # of online CPUs with OpenBSD +- avcodec/nvenc: fix lossless tuning logic +- avfilter/overlay_cuda: check av_buffer_ref result +- avfilter/overlay_cuda: hold explicit reference to hw_device_ctx +- avformat/matroskaenc: Fix leak when writing attachment without filename + +version 4.4: - AudioToolbox output device - MacCaption demuxer - PGX decoder diff --git a/RELEASE b/RELEASE index 515be8f918d..cca25a93cd0 100644 --- a/RELEASE +++ b/RELEASE @@ -1 +1 @@ -4.4 +4.4.1 diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 67339dca853..312ec6f2191 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -11,5 +11,5 @@ We hope you will like this release as much as we enjoyed working on it, and as usual, if you have any questions about it, or any FFmpeg related topic, - feel free to join us on the #ffmpeg IRC channel (on irc.freenode.net) or ask + feel free to join us on the #ffmpeg IRC channel (on irc.libera.chat) or ask on the mailing-lists. diff --git a/VERSION b/VERSION index 515be8f918d..cca25a93cd0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.4 +4.4.1 diff --git a/configure b/configure index d7a3f507e83..4ba72bf84b6 100755 --- a/configure +++ b/configure @@ -536,7 +536,7 @@ die(){ If you think configure made a mistake, make sure you are using the latest version from Git. If the latest version fails, report the problem to the -ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.freenode.net. +ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.libera.chat. EOF if disabled logging; then cat <pkt; int pkt_size; + if (!pkt) + break; + switch (enc->codec_type) { case AVMEDIA_TYPE_AUDIO: desc = "audio"; @@ -3463,12 +3466,7 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame) enc_ctx->bits_per_raw_sample = frame_bits_per_raw_sample; } - if (ost->top_field_first == 0) { - enc_ctx->field_order = AV_FIELD_BB; - } else if (ost->top_field_first == 1) { - enc_ctx->field_order = AV_FIELD_TT; - } - + // Field order: autodetection if (frame) { if (enc_ctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME) && ost->top_field_first >= 0) @@ -3483,6 +3481,13 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame) enc_ctx->field_order = AV_FIELD_PROGRESSIVE; } + // Field order: override + if (ost->top_field_first == 0) { + enc_ctx->field_order = AV_FIELD_BB; + } else if (ost->top_field_first == 1) { + enc_ctx->field_order = AV_FIELD_TT; + } + if (ost->forced_keyframes) { if (!strncmp(ost->forced_keyframes, "expr:", 5)) { ret = av_expr_parse(&ost->forced_keyframes_pexpr, ost->forced_keyframes+5, @@ -3950,7 +3955,7 @@ static OutputStream *choose_output(void) ost->st->index, ost->st->id, ost->initialized, ost->inputs_done, ost->finished); if (!ost->initialized && !ost->inputs_done) - return ost; + return ost->unavailable ? NULL : ost; if (!ost->finished && opts < opts_min) { opts_min = opts; diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index baa82489b19..11b0559e1cd 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -843,25 +843,25 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe) sce0->ics.swb_sizes[g], sce0->sf_idx[w*16+g], sce0->band_type[w*16+g], - lambda / band0->threshold, INFINITY, &b1, NULL, 0); + lambda / (band0->threshold + FLT_MIN), INFINITY, &b1, NULL, 0); dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], R34, sce1->ics.swb_sizes[g], sce1->sf_idx[w*16+g], sce1->band_type[w*16+g], - lambda / band1->threshold, INFINITY, &b2, NULL, 0); + lambda / (band1->threshold + FLT_MIN), INFINITY, &b2, NULL, 0); dist2 += quantize_band_cost(s, M, M34, sce0->ics.swb_sizes[g], mididx, midcb, - lambda / minthr, INFINITY, &b3, NULL, 0); + lambda / (minthr + FLT_MIN), INFINITY, &b3, NULL, 0); dist2 += quantize_band_cost(s, S, S34, sce1->ics.swb_sizes[g], sididx, sidcb, - mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0); + mslambda / (minthr * bmax + FLT_MIN), INFINITY, &b4, NULL, 0); B0 += b1+b2; B1 += b3+b4; dist1 -= b1+b2; diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c index 98f77a3ad74..3d7f3257db9 100644 --- a/libavcodec/aacdec_template.c +++ b/libavcodec/aacdec_template.c @@ -1076,14 +1076,18 @@ static int decode_audio_specific_config_gb(AACContext *ac, { int i, ret; GetBitContext gbc = *gb; + MPEG4AudioConfig m4ac_bak = *m4ac; - if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) + if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) { + *m4ac = m4ac_bak; return AVERROR_INVALIDDATA; + } if (m4ac->sampling_index > 12) { av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index); + *m4ac = m4ac_bak; return AVERROR_INVALIDDATA; } if (m4ac->object_type == AOT_ER_AAC_LD && @@ -1091,6 +1095,7 @@ static int decode_audio_specific_config_gb(AACContext *ac, av_log(avctx, AV_LOG_ERROR, "invalid low delay sampling rate index %d\n", m4ac->sampling_index); + *m4ac = m4ac_bak; return AVERROR_INVALIDDATA; } diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 070a2e706ab..be5e8e21084 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -28,6 +28,7 @@ * TODOs: * add sane pulse detection ***********************************/ +#include #include "libavutil/libm.h" #include "libavutil/float_dsp.h" @@ -852,7 +853,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, /* Not so fast though */ ratio = sqrtf(ratio); } - s->lambda = FFMIN(s->lambda * ratio, 65536.f); + s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f); /* Keep iterating if we must reduce and lambda is in the sky */ if (ratio > 0.9f && ratio < 1.1f) { @@ -897,7 +898,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx) { AACEncContext *s = avctx->priv_data; - av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count); + av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN); ff_mdct_end(&s->mdct1024); ff_mdct_end(&s->mdct128); diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c index fca692cb153..76458783cea 100644 --- a/libavcodec/aacpsy.c +++ b/libavcodec/aacpsy.c @@ -308,6 +308,9 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) { const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx); const float num_bark = calc_bark((float)bandwidth); + if (bandwidth <= 0) + return AVERROR(EINVAL); + ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext)); if (!ctx->model_priv_data) return AVERROR(ENOMEM); @@ -794,7 +797,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, if (pe < 1.15f * desired_pe) { /* 6.6.1.3.6 "Final threshold modification by linearization" */ - norm_fac = 1.0f / norm_fac; + norm_fac = norm_fac ? 1.0f / norm_fac : 0; for (w = 0; w < wi->num_windows*16; w += 16) { for (g = 0; g < num_bands; g++) { AacPsyBand *band = &pch->band[w+g]; diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S index 28c11e632c7..0869431294d 100644 --- a/libavcodec/aarch64/hevcdsp_idct_neon.S +++ b/libavcodec/aarch64/hevcdsp_idct_neon.S @@ -573,14 +573,13 @@ idct_16x16 10 // void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs) .macro idct_dc size, bitdepth function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1 - movi v1.8h, #((1 << (14 - \bitdepth))+1) ld1r {v4.8h}, [x0] - add v4.8h, v4.8h, v1.8h - sshr v0.8h, v4.8h, #(15 - \bitdepth) - sshr v1.8h, v4.8h, #(15 - \bitdepth) + srshr v4.8h, v4.8h, #1 + srshr v0.8h, v4.8h, #(14 - \bitdepth) + srshr v1.8h, v4.8h, #(14 - \bitdepth) .if \size > 4 - sshr v2.8h, v4.8h, #(15 - \bitdepth) - sshr v3.8h, v4.8h, #(15 - \bitdepth) + srshr v2.8h, v4.8h, #(14 - \bitdepth) + srshr v3.8h, v4.8h, #(14 - \bitdepth) .if \size > 16 /* dc 32x32 */ mov x2, #4 1: diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c index a8c3433fa86..e736905a76d 100644 --- a/libavcodec/alsdec.c +++ b/libavcodec/alsdec.c @@ -1632,7 +1632,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame) AVCodecContext *avctx = ctx->avctx; GetBitContext *gb = &ctx->gb; unsigned int div_blocks[32]; ///< block sizes. - unsigned int c; + int c; unsigned int js_blocks[2]; uint32_t bs_info = 0; int ret; @@ -1810,14 +1810,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, else ctx->cur_frame_length = sconf->frame_length; - ctx->highest_decoded_channel = 0; + ctx->highest_decoded_channel = -1; // decode the frame data if ((invalid_frame = read_frame_data(ctx, ra_frame)) < 0) av_log(ctx->avctx, AV_LOG_WARNING, "Reading frame data failed. Skipping RA unit.\n"); - if (ctx->highest_decoded_channel == 0) + if (ctx->highest_decoded_channel == -1) { + av_log(ctx->avctx, AV_LOG_WARNING, + "No channel data decoded.\n"); return AVERROR_INVALIDDATA; + } ctx->frame_id++; diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index e0c6b6bb8b8..4684e40a466 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -879,7 +879,7 @@ static av_always_inline int filter_fast_3320(APEPredictor *p, } predictionA = p->buf[delayA] * 2U - p->buf[delayA - 1]; - p->lastA[filter] = decoded + ((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9); + p->lastA[filter] = decoded + (unsigned)((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9); if ((decoded ^ predictionA) > 0) p->coeffsA[filter][0]++; @@ -909,8 +909,8 @@ static av_always_inline int filter_3800(APEPredictor *p, return predictionA; } d2 = p->buf[delayA]; - d1 = (p->buf[delayA] - p->buf[delayA - 1]) * 2U; - d0 = p->buf[delayA] + ((p->buf[delayA - 2] - p->buf[delayA - 1]) * 8U); + d1 = (p->buf[delayA] - (unsigned)p->buf[delayA - 1]) * 2; + d0 = p->buf[delayA] + ((p->buf[delayA - 2] - (unsigned)p->buf[delayA - 1]) * 8); d3 = p->buf[delayB] * 2U - p->buf[delayB - 1]; d4 = p->buf[delayB]; @@ -979,7 +979,7 @@ static void long_filter_ehigh_3830(int32_t *buffer, int length) for (j = 7; j > 0; j--) delay[j] = delay[j - 1]; delay[0] = buffer[i]; - buffer[i] -= dotprod >> 9; + buffer[i] -= (unsigned)(dotprod >> 9); } } @@ -1337,7 +1337,7 @@ static void do_apply_filter(APEContext *ctx, int version, APEFilter *f, absres = FFABSU(res); if (absres) *f->adaptcoeffs = APESIGN(res) * - (8 << ((absres > f->avg * 3) + (absres > f->avg * 4 / 3))); + (8 << ((absres > f->avg * 3LL) + (absres > (f->avg + f->avg / 3)))); /* equivalent to the following code if (absres <= f->avg * 4 / 3) *f->adaptcoeffs = APESIGN(res) * 8; diff --git a/libavcodec/argo.c b/libavcodec/argo.c index 7358d102e32..f633ec2691f 100644 --- a/libavcodec/argo.c +++ b/libavcodec/argo.c @@ -59,7 +59,7 @@ static int decode_pal8(AVCodecContext *avctx, uint32_t *pal) return AVERROR_INVALIDDATA; for (int i = 0; i < count; i++) - pal[start + i] = (0xFF << 24U) | bytestream2_get_be24u(gb); + pal[start + i] = (0xFFU << 24) | bytestream2_get_be24u(gb); return 0; } @@ -685,6 +685,11 @@ static av_cold int decode_init(AVCodecContext *avctx) return AVERROR_PATCHWELCOME; } + if (avctx->width % 2 || avctx->height % 2) { + avpriv_request_sample(s, "Odd dimensions\n"); + return AVERROR_PATCHWELCOME; + } + s->frame = av_frame_alloc(); if (!s->frame) return AVERROR(ENOMEM); diff --git a/libavcodec/av1_metadata_bsf.c b/libavcodec/av1_metadata_bsf.c index 328db5c0da2..1fb85d88b7e 100644 --- a/libavcodec/av1_metadata_bsf.c +++ b/libavcodec/av1_metadata_bsf.c @@ -28,6 +28,7 @@ typedef struct AV1MetadataContext { CBSBSFContext common; int td; + AV1RawOBU td_obu; int color_primaries; int transfer_characteristics; @@ -107,12 +108,11 @@ static int av1_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt, CodedBitstreamFragment *frag) { AV1MetadataContext *ctx = bsf->priv_data; - AV1RawOBU td, *obu; int err, i; for (i = 0; i < frag->nb_units; i++) { if (frag->units[i].type == AV1_OBU_SEQUENCE_HEADER) { - obu = frag->units[i].content; + AV1RawOBU *obu = frag->units[i].content; err = av1_metadata_update_sequence_header(bsf, &obu->obu.sequence_header); if (err < 0) return err; @@ -124,12 +124,8 @@ static int av1_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt, if (ctx->td == BSF_ELEMENT_REMOVE) ff_cbs_delete_unit(frag, 0); } else if (pkt && ctx->td == BSF_ELEMENT_INSERT) { - td = (AV1RawOBU) { - .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER, - }; - err = ff_cbs_insert_unit_content(frag, 0, AV1_OBU_TEMPORAL_DELIMITER, - &td, NULL); + &ctx->td_obu, NULL); if (err < 0) { av_log(bsf, AV_LOG_ERROR, "Failed to insert Temporal Delimiter.\n"); return err; @@ -155,6 +151,12 @@ static const CBSBSFType av1_metadata_type = { static int av1_metadata_init(AVBSFContext *bsf) { + AV1MetadataContext *ctx = bsf->priv_data; + + ctx->td_obu = (AV1RawOBU) { + .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER, + }; + return ff_cbs_bsf_generic_init(bsf, &av1_metadata_type); } diff --git a/libavcodec/cbs_h265_syntax_template.c b/libavcodec/cbs_h265_syntax_template.c index 5d216aad365..921daea68c7 100644 --- a/libavcodec/cbs_h265_syntax_template.c +++ b/libavcodec/cbs_h265_syntax_template.c @@ -728,7 +728,7 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw, flag(sps_palette_predictor_initializer_present_flag); if (current->sps_palette_predictor_initializer_present_flag) { - ue(sps_num_palette_predictor_initializer_minus1, 0, 128); + ue(sps_num_palette_predictor_initializer_minus1, 0, 127); for (comp = 0; comp < (current->chroma_format_idc ? 3 : 1); comp++) { int bit_depth = comp == 0 ? current->bit_depth_luma_minus8 + 8 : current->bit_depth_chroma_minus8 + 8; diff --git a/libavcodec/clearvideo.c b/libavcodec/clearvideo.c index 79ba88857cb..a56e09069bf 100644 --- a/libavcodec/clearvideo.c +++ b/libavcodec/clearvideo.c @@ -722,8 +722,8 @@ static av_cold int clv_decode_init(AVCodecContext *avctx) } c->tile_shift = av_log2(c->tile_size); - if (1U << c->tile_shift != c->tile_size) { - av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2.\n", c->tile_size); + if (1U << c->tile_shift != c->tile_size || c->tile_shift < 1 || c->tile_shift > 30) { + av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2 > 1 and < 2^31\n", c->tile_size); return AVERROR_INVALIDDATA; } diff --git a/libavcodec/cpia.c b/libavcodec/cpia.c index 5f12a99a83a..435834d6170 100644 --- a/libavcodec/cpia.c +++ b/libavcodec/cpia.c @@ -111,6 +111,7 @@ static int cpia_decode_frame(AVCodecContext *avctx, // Read line length, two byte little endian linelength = AV_RL16(src); src += 2; + src_size -= 2; if (src_size < linelength) { frame->decode_error_flags = FF_DECODE_ERROR_INVALID_BITSTREAM; diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c index 228803183af..886488602ed 100644 --- a/libavcodec/crystalhd.c +++ b/libavcodec/crystalhd.c @@ -785,6 +785,7 @@ static int crystalhd_receive_frame(AVCodecContext *avctx, AVFrame *frame) .flush = flush, \ .bsfs = bsf_name, \ .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \ + .caps_internal = FF_CODEC_CAP_SETS_FRAME_PROPS, \ .pix_fmts = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE}, \ .wrapper_name = "crystalhd", \ }; diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index ec57afdefe7..5c135d17144 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -1150,6 +1150,7 @@ static const AVCodecHWConfigInternal *const cuvid_hw_configs[] = { .flush = cuvid_flush, \ .bsfs = bsf_name, \ .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \ + .caps_internal = FF_CODEC_CAP_SETS_FRAME_PROPS, \ .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \ AV_PIX_FMT_NV12, \ AV_PIX_FMT_P010, \ diff --git a/libavcodec/decode.c b/libavcodec/decode.c index 5a00aeedaef..936e5d63da8 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -233,9 +233,11 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt) if (ret < 0) return ret; - ret = extract_packet_props(avctx->internal, pkt); - if (ret < 0) - goto finish; + if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) { + ret = extract_packet_props(avctx->internal, pkt); + if (ret < 0) + goto finish; + } ret = apply_param_change(avctx, pkt); if (ret < 0) @@ -502,11 +504,13 @@ FF_ENABLE_DEPRECATION_WARNINGS pkt->data += consumed; pkt->size -= consumed; - avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment. pkt->pts = AV_NOPTS_VALUE; pkt->dts = AV_NOPTS_VALUE; - avci->last_pkt_props->pts = AV_NOPTS_VALUE; - avci->last_pkt_props->dts = AV_NOPTS_VALUE; + if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) { + avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment. + avci->last_pkt_props->pts = AV_NOPTS_VALUE; + avci->last_pkt_props->dts = AV_NOPTS_VALUE; + } } if (got_frame) @@ -548,6 +552,11 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame) if (ret == AVERROR_EOF) avci->draining_done = 1; + if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS) && + IS_EMPTY(avci->last_pkt_props) && av_fifo_size(avci->pkt_props) >= sizeof(*avci->last_pkt_props)) + av_fifo_generic_read(avci->pkt_props, + avci->last_pkt_props, sizeof(*avci->last_pkt_props), NULL); + if (!ret) { frame->best_effort_timestamp = guess_correct_pts(avctx, frame->pts, @@ -1738,39 +1747,37 @@ int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame) { AV_PKT_DATA_S12M_TIMECODE, AV_FRAME_DATA_S12M_TIMECODE }, }; - if (IS_EMPTY(pkt) && av_fifo_size(avctx->internal->pkt_props) >= sizeof(*pkt)) - av_fifo_generic_read(avctx->internal->pkt_props, - pkt, sizeof(*pkt), NULL); - - frame->pts = pkt->pts; + if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) { + frame->pts = pkt->pts; #if FF_API_PKT_PTS FF_DISABLE_DEPRECATION_WARNINGS - frame->pkt_pts = pkt->pts; + frame->pkt_pts = pkt->pts; FF_ENABLE_DEPRECATION_WARNINGS #endif - frame->pkt_pos = pkt->pos; - frame->pkt_duration = pkt->duration; - frame->pkt_size = pkt->size; - - for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) { - buffer_size_t size; - uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size); - if (packet_sd) { - AVFrameSideData *frame_sd = av_frame_new_side_data(frame, - sd[i].frame, - size); - if (!frame_sd) - return AVERROR(ENOMEM); - - memcpy(frame_sd->data, packet_sd, size); + frame->pkt_pos = pkt->pos; + frame->pkt_duration = pkt->duration; + frame->pkt_size = pkt->size; + + for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) { + buffer_size_t size; + uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size); + if (packet_sd) { + AVFrameSideData *frame_sd = av_frame_new_side_data(frame, + sd[i].frame, + size); + if (!frame_sd) + return AVERROR(ENOMEM); + + memcpy(frame_sd->data, packet_sd, size); + } } - } - add_metadata_from_side_data(pkt, frame); + add_metadata_from_side_data(pkt, frame); - if (pkt->flags & AV_PKT_FLAG_DISCARD) { - frame->flags |= AV_FRAME_FLAG_DISCARD; - } else { - frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD); + if (pkt->flags & AV_PKT_FLAG_DISCARD) { + frame->flags |= AV_FRAME_FLAG_DISCARD; + } else { + frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD); + } } frame->reordered_opaque = avctx->reordered_opaque; diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c index c78d55aee51..9b475a6979f 100644 --- a/libavcodec/dnxhddec.c +++ b/libavcodec/dnxhddec.c @@ -112,6 +112,7 @@ static av_cold int dnxhd_decode_init(AVCodecContext *avctx) static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid, int bitdepth) { + int ret; if (cid != ctx->cid) { const CIDEntry *cid_table = ff_dnxhd_get_cid_table(cid); @@ -132,19 +133,26 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid, int bitdepth) ff_free_vlc(&ctx->dc_vlc); ff_free_vlc(&ctx->run_vlc); - init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257, + if ((ret = init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257, ctx->cid_table->ac_bits, 1, 1, - ctx->cid_table->ac_codes, 2, 2, 0); - init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, bitdepth > 8 ? 14 : 12, + ctx->cid_table->ac_codes, 2, 2, 0)) < 0) + goto out; + if ((ret = init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, bitdepth > 8 ? 14 : 12, ctx->cid_table->dc_bits, 1, 1, - ctx->cid_table->dc_codes, 1, 1, 0); - init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62, + ctx->cid_table->dc_codes, 1, 1, 0)) < 0) + goto out; + if ((ret = init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62, ctx->cid_table->run_bits, 1, 1, - ctx->cid_table->run_codes, 2, 2, 0); + ctx->cid_table->run_codes, 2, 2, 0)) < 0) + goto out; ctx->cid = cid; } - return 0; + ret = 0; +out: + if (ret < 0) + av_log(ctx->avctx, AV_LOG_ERROR, "init_vlc failed\n"); + return ret; } static int dnxhd_get_profile(int cid) diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c index 5372e3d5861..915d94077e6 100644 --- a/libavcodec/dpx.c +++ b/libavcodec/dpx.c @@ -242,6 +242,9 @@ static int decode_frame(AVCodecContext *avctx, return AVERROR_PATCHWELCOME; } + if (bits_per_color > 31) + return AVERROR_INVALIDDATA; + buf += 820; avctx->sample_aspect_ratio.num = read32(&buf, endian); avctx->sample_aspect_ratio.den = read32(&buf, endian); @@ -316,7 +319,7 @@ static int decode_frame(AVCodecContext *avctx, minCV = av_int2float(i); maxCV = av_int2float(j); if (bits_per_color >= 1 && - minCV == 0.0f && maxCV == ((1<color_range = AVCOL_RANGE_JPEG; } else if (bits_per_color >= 8 && minCV == (1 <<(bits_per_color - 4)) && diff --git a/libavcodec/dxva2_av1.c b/libavcodec/dxva2_av1.c index aa14e473dfa..c30b57799c2 100644 --- a/libavcodec/dxva2_av1.c +++ b/libavcodec/dxva2_av1.c @@ -73,7 +73,7 @@ static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *c pp->max_height = seq->max_frame_height_minus_1 + 1; pp->CurrPicTextureIndex = ff_dxva2_get_surface_index(avctx, ctx, h->cur_frame.tf.f); - pp->superres_denom = frame_header->use_superres ? frame_header->coded_denom : AV1_SUPERRES_NUM; + pp->superres_denom = frame_header->use_superres ? frame_header->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM; pp->bitdepth = get_bit_depth_from_seq(seq); pp->seq_profile = seq->seq_profile; diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 65e5203c317..49ba7fd6de9 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -418,7 +418,7 @@ static int huf_decode(VLC *vlc, GetByteContext *gb, int nbits, int run_sym, init_get_bits(&gbit, gb->buffer, nbits); while (get_bits_left(&gbit) > 0 && oe < no) { - uint16_t x = get_vlc2(&gbit, vlc->table, 12, 2); + uint16_t x = get_vlc2(&gbit, vlc->table, 12, 3); if (x == run_sym) { int run = get_bits(&gbit, 8); @@ -1014,7 +1014,9 @@ static int dwa_uncompress(EXRContext *s, const uint8_t *src, int compressed_size dc_count = AV_RL64(src + 72); ac_compression = AV_RL64(src + 80); - if (compressed_size < 88LL + lo_size + ac_size + dc_size + rle_csize) + if ( compressed_size < (uint64_t)(lo_size | ac_size | dc_size | rle_csize) || compressed_size < 88LL + lo_size + ac_size + dc_size + rle_csize + || ac_count > (uint64_t)INT_MAX/2 + ) return AVERROR_INVALIDDATA; bytestream2_init(&gb, src + 88, compressed_size - 88); @@ -1031,12 +1033,14 @@ static int dwa_uncompress(EXRContext *s, const uint8_t *src, int compressed_size } if (ac_size > 0) { - unsigned long dest_len = ac_count * 2LL; + unsigned long dest_len; GetByteContext agb = gb; if (ac_count > 3LL * td->xsize * s->scan_lines_per_block) return AVERROR_INVALIDDATA; + dest_len = ac_count * 2LL; + av_fast_padded_malloc(&td->ac_data, &td->ac_size, dest_len); if (!td->ac_data) return AVERROR(ENOMEM); @@ -1059,13 +1063,15 @@ static int dwa_uncompress(EXRContext *s, const uint8_t *src, int compressed_size bytestream2_skip(&gb, ac_size); } - if (dc_size > 0) { - unsigned long dest_len = dc_count * 2LL; + { + unsigned long dest_len; GetByteContext agb = gb; - if (dc_count > (6LL * td->xsize * td->ysize + 63) / 64) + if (dc_count != dc_w * dc_h * 3) return AVERROR_INVALIDDATA; + dest_len = dc_count * 2LL; + av_fast_padded_malloc(&td->dc_data, &td->dc_size, FFALIGN(dest_len, 64) * 2); if (!td->dc_data) return AVERROR(ENOMEM); @@ -1795,6 +1801,7 @@ static int decode_header(EXRContext *s, AVFrame *frame) ymax = bytestream2_get_le32(gb); if (xmin > xmax || ymin > ymax || + ymax == INT_MAX || xmax == INT_MAX || (unsigned)xmax - xmin >= INT_MAX || (unsigned)ymax - ymin >= INT_MAX) { ret = AVERROR_INVALIDDATA; diff --git a/libavcodec/faxcompr.c b/libavcodec/faxcompr.c index 3dd64cf7306..b283831dae7 100644 --- a/libavcodec/faxcompr.c +++ b/libavcodec/faxcompr.c @@ -144,6 +144,8 @@ static int decode_uncompressed(AVCodecContext *avctx, GetBitContext *gb, return AVERROR_INVALIDDATA; } cwi = 10 - av_log2(cwi); + if (get_bits_left(gb) < cwi + 1) + return AVERROR_INVALIDDATA; skip_bits(gb, cwi + 1); if (cwi > 5) { newmode = get_bits1(gb); @@ -209,6 +211,8 @@ static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb, unsigned int run = 0; unsigned int t; for (;;) { + if (get_bits_left(gb) <= 0) + return AVERROR_INVALIDDATA; t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2); run += t; if (t < 64) { @@ -227,7 +231,7 @@ static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb, run = 0; mode = !mode; } else if ((int)t == -1) { - if (show_bits(gb, 12) == 15) { + if (get_bits_left(gb) > 12 && show_bits(gb, 12) == 15) { int ret; skip_bits(gb, 12); ret = decode_uncompressed(avctx, gb, &pix_left, &runs, runend, &mode); @@ -254,7 +258,10 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb, unsigned int offs = 0, run = 0; while (offs < width) { - int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1); + int cmode; + if (get_bits_left(gb) <= 0) + return AVERROR_INVALIDDATA; + cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1); if (cmode == -1) { av_log(avctx, AV_LOG_ERROR, "Incorrect mode VLC\n"); return AVERROR_INVALIDDATA; @@ -276,6 +283,8 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb, for (k = 0; k < 2; k++) { run = 0; for (;;) { + if (get_bits_left(gb) <= 0) + return AVERROR_INVALIDDATA; t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2); if (t == -1) { av_log(avctx, AV_LOG_ERROR, "Incorrect code\n"); @@ -299,7 +308,10 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb, mode = !mode; } } else if (cmode == 9 || cmode == 10) { - int xxx = get_bits(gb, 3); + int xxx; + if (get_bits_left(gb) < 3) + return AVERROR_INVALIDDATA; + xxx = get_bits(gb, 3); if (cmode == 9 && xxx == 7) { int ret; int pix_left = width - offs; diff --git a/libavcodec/flac_parser.c b/libavcodec/flac_parser.c index 3424583c495..b13b3b646ab 100644 --- a/libavcodec/flac_parser.c +++ b/libavcodec/flac_parser.c @@ -55,6 +55,7 @@ /** largest possible size of flac header */ #define MAX_FRAME_HEADER_SIZE 16 +#define MAX_FRAME_VERIFY_SIZE (MAX_FRAME_HEADER_SIZE) typedef struct FLACHeaderMarker { int offset; /**< byte offset from start of FLACParseContext->buffer */ @@ -170,7 +171,7 @@ static int find_headers_search_validate(FLACParseContext *fpc, int offset) uint8_t *header_buf; int size = 0; header_buf = flac_fifo_read_wrap(fpc, offset, - MAX_FRAME_HEADER_SIZE, + MAX_FRAME_VERIFY_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, &fpc->wrap_buf, &fpc->wrap_buf_allocated_size); if (frame_header_is_valid(fpc->avctx, header_buf, &fi)) { diff --git a/libavcodec/flicvideo.c b/libavcodec/flicvideo.c index 276c2ff2a62..67fbaa72490 100644 --- a/libavcodec/flicvideo.c +++ b/libavcodec/flicvideo.c @@ -735,6 +735,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx, bytestream2_skip(&g2, chunk_size - 6); } else { + if (bytestream2_get_bytes_left(&g2) < 2 * s->avctx->width * s->avctx->height ) + return AVERROR_INVALIDDATA; for (y_ptr = 0; y_ptr < s->frame->linesize[0] * s->avctx->height; y_ptr += s->frame->linesize[0]) { diff --git a/libavcodec/frame_thread_encoder.c b/libavcodec/frame_thread_encoder.c index 778317d60bb..0d52f066e58 100644 --- a/libavcodec/frame_thread_encoder.c +++ b/libavcodec/frame_thread_encoder.c @@ -124,7 +124,7 @@ static void * attribute_align_arg worker(void *v){ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){ int i=0; ThreadContext *c; - + AVCodecContext *thread_avctx = NULL; if( !(avctx->thread_type & FF_THREAD_FRAME) || !(avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)) @@ -205,16 +205,17 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){ AVDictionary *tmp = NULL; int ret; void *tmpv; - AVCodecContext *thread_avctx = avcodec_alloc_context3(avctx->codec); + thread_avctx = avcodec_alloc_context3(avctx->codec); if(!thread_avctx) goto fail; tmpv = thread_avctx->priv_data; *thread_avctx = *avctx; + thread_avctx->priv_data = tmpv; + thread_avctx->internal = NULL; + thread_avctx->hw_frames_ctx = NULL; ret = av_opt_copy(thread_avctx, avctx); if (ret < 0) goto fail; - thread_avctx->priv_data = tmpv; - thread_avctx->internal = NULL; if (avctx->codec->priv_class) { int ret = av_opt_copy(thread_avctx->priv_data, avctx->priv_data); if (ret < 0) @@ -243,6 +244,8 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){ return 0; fail: + avcodec_close(thread_avctx); + av_freep(&thread_avctx); avctx->thread_count = i; av_log(avctx, AV_LOG_ERROR, "ff_frame_thread_encoder_init failed\n"); ff_frame_thread_encoder_free(avctx); diff --git a/libavcodec/frame_thread_encoder.h b/libavcodec/frame_thread_encoder.h index c400d6b32c8..9733fcdc2de 100644 --- a/libavcodec/frame_thread_encoder.h +++ b/libavcodec/frame_thread_encoder.h @@ -23,6 +23,10 @@ #include "avcodec.h" +/** + * Initialize frame thread encoder. + * @note hardware encoders are not supported + */ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options); void ff_frame_thread_encoder_free(AVCodecContext *avctx); int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, diff --git a/libavcodec/h263.c b/libavcodec/h263.c index bc5c0d599ff..4a03c710a60 100644 --- a/libavcodec/h263.c +++ b/libavcodec/h263.c @@ -29,6 +29,7 @@ #include +#include "libavutil/thread.h" #include "avcodec.h" #include "mpegvideo.h" #include "h263.h" @@ -38,6 +39,17 @@ #include "flv.h" #include "mpeg4video.h" +static av_cold void h263_init_rl_inter(void) +{ + static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3]; + ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table); +} + +av_cold void ff_h263_init_rl_inter(void) +{ + static AVOnce init_static_once = AV_ONCE_INIT; + ff_thread_once(&init_static_once, h263_init_rl_inter); +} void ff_h263_update_motion_val(MpegEncContext * s){ const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; diff --git a/libavcodec/h263.h b/libavcodec/h263.h index 998f7d0d59a..491f2e0aac4 100644 --- a/libavcodec/h263.h +++ b/libavcodec/h263.h @@ -66,6 +66,7 @@ int16_t *ff_h263_pred_motion(MpegEncContext * s, int block, int dir, int *px, int *py); void ff_h263_encode_init(MpegEncContext *s); void ff_h263_decode_init_vlc(void); +void ff_h263_init_rl_inter(void); int ff_h263_decode_picture_header(MpegEncContext *s); int ff_h263_decode_gob_header(MpegEncContext *s); void ff_h263_update_motion_val(MpegEncContext * s); diff --git a/libavcodec/h263data.c b/libavcodec/h263data.c index 604a0425e1a..20d0436fda5 100644 --- a/libavcodec/h263data.c +++ b/libavcodec/h263data.c @@ -25,8 +25,6 @@ #include -#include "libavutil/thread.h" - #include "h263data.h" #include "mpegvideo.h" @@ -290,15 +288,3 @@ const AVRational ff_h263_pixel_aspect[16] = { { 0, 1 }, { 0, 1 }, }; - -static av_cold void h263_init_rl_inter(void) -{ - static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3]; - ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table); -} - -av_cold void ff_h263_init_rl_inter(void) -{ - static AVOnce init_static_once = AV_ONCE_INIT; - ff_thread_once(&init_static_once, h263_init_rl_inter); -} diff --git a/libavcodec/h263data.h b/libavcodec/h263data.h index 144704d12b1..06554bdf0d4 100644 --- a/libavcodec/h263data.h +++ b/libavcodec/h263data.h @@ -61,7 +61,6 @@ extern const int8_t ff_inter_run[102]; extern RLTable ff_h263_rl_inter; extern RLTable ff_rl_intra_aic; -void ff_h263_init_rl_inter(void); extern const uint16_t ff_h263_format[8][2]; diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 62f7a61aed9..7c69016338b 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -1831,6 +1831,8 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl, if (nal->type == H264_NAL_IDR_SLICE) get_ue_golomb_long(&sl->gb); /* idr_pic_id */ + sl->poc_lsb = 0; + sl->delta_poc_bottom = 0; if (sps->poc_type == 0) { sl->poc_lsb = get_bits(&sl->gb, sps->log2_max_poc_lsb); @@ -1838,6 +1840,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl, sl->delta_poc_bottom = get_se_golomb(&sl->gb); } + sl->delta_poc[0] = sl->delta_poc[1] = 0; if (sps->poc_type == 1 && !sps->delta_pic_order_always_zero_flag) { sl->delta_poc[0] = get_se_golomb(&sl->gb); diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 47b9abbc5c3..485f47d36ec 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -914,7 +914,7 @@ static int finalize_frame(H264Context *h, AVFrame *dst, H264Picture *out, int *g out->qscale_table, out->motion_val, NULL, - h->mb_width, h->mb_height, h->mb_stride, 1); + out->mb_width, out->mb_height, out->mb_stride, 1); } } diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c index c881c4338c2..e6ae777852b 100644 --- a/libavcodec/hevc_sei.c +++ b/libavcodec/hevc_sei.c @@ -386,7 +386,7 @@ static int decode_nal_sei_timecode(HEVCSEITimeCode *s, GetBitContext *gb) s->time_offset_length[i] = get_bits(gb, 5); if (s->time_offset_length[i] > 0) { - s->time_offset_value[i] = get_bits(gb, s->time_offset_length[i]); + s->time_offset_value[i] = get_bits_long(gb, s->time_offset_length[i]); } } } diff --git a/libavcodec/iff.c b/libavcodec/iff.c index 79f6215c770..76d3696bb33 100644 --- a/libavcodec/iff.c +++ b/libavcodec/iff.c @@ -1848,7 +1848,8 @@ static int decode_frame(AVCodecContext *avctx, buf += s->planesize; } } - memcpy(frame->data[1], s->pal, 256 * 4); + if (avctx->pix_fmt == AV_PIX_FMT_PAL8) + memcpy(frame->data[1], s->pal, 256 * 4); } else if (s->ham) { int i, count = 1 << s->ham; diff --git a/libavcodec/internal.h b/libavcodec/internal.h index b57b9968166..d889c1883e7 100644 --- a/libavcodec/internal.h +++ b/libavcodec/internal.h @@ -78,6 +78,11 @@ * Codec handles avctx->thread_count == 0 (auto) internally. */ #define FF_CODEC_CAP_AUTO_THREADS (1 << 7) +/** + * Codec handles output frame properties internally instead of letting the + * internal logic derive them from AVCodecInternal.last_pkt_props. + */ +#define FF_CODEC_CAP_SETS_FRAME_PROPS (1 << 8) /** * AVCodec.codec_tags termination value diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c index e3c5a32188c..212b9601c45 100644 --- a/libavcodec/j2kenc.c +++ b/libavcodec/j2kenc.c @@ -1679,7 +1679,7 @@ static int parse_layer_rates(Jpeg2000EncoderContext *s) } token = av_strtok(s->lr_str, ",", &saveptr); - if (rate = strtol(token, NULL, 10)) { + if (token && (rate = strtol(token, NULL, 10))) { s->layer_rates[0] = rate <= 1 ? 0:rate; nlayers++; } else { diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 63edbcda096..0d7ade5ce88 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -2361,6 +2361,8 @@ static int jp2_find_codestream(Jpeg2000DecoderContext *s) atom_size >= 16) { uint32_t atom2_size, atom2, atom2_end; do { + if (bytestream2_get_bytes_left(&s->g) < 8) + break; atom2_size = bytestream2_get_be32u(&s->g); atom2 = bytestream2_get_be32u(&s->g); atom2_end = bytestream2_tell(&s->g) + atom2_size - 8; diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c index 69980eaa497..c4ffa81f7d5 100644 --- a/libavcodec/jpeglsdec.c +++ b/libavcodec/jpeglsdec.c @@ -122,7 +122,7 @@ int ff_jpegls_decode_lse(MJpegDecodeContext *s) s->avctx->pix_fmt = AV_PIX_FMT_PAL8; for (i=s->palette_index; i<=maxtab; i++) { uint8_t k = i << shift; - pal[k] = 0; + pal[k] = wt < 4 ? 0xFF000000 : 0; for (j=0; jgb, 8) << (8*(wt-j-1)); } diff --git a/libavcodec/libdav1d.c b/libavcodec/libdav1d.c index 3c2a68b7e06..a9c983eacab 100644 --- a/libavcodec/libdav1d.c +++ b/libavcodec/libdav1d.c @@ -33,6 +33,9 @@ #include "decode.h" #include "internal.h" +#define FF_DAV1D_VERSION_AT_LEAST(x,y) \ + (DAV1D_API_VERSION_MAJOR > (x) || DAV1D_API_VERSION_MAJOR == (x) && DAV1D_API_VERSION_MINOR >= (y)) + typedef struct Libdav1dContext { AVClass *class; Dav1dContext *c; @@ -145,6 +148,15 @@ static av_cold int libdav1d_init(AVCodecContext *c) if (dav1d->operating_point >= 0) s.operating_point = dav1d->operating_point; +#if FF_DAV1D_VERSION_AT_LEAST(6,0) + if (dav1d->frame_threads || dav1d->tile_threads) + s.n_threads = FFMAX(dav1d->frame_threads, dav1d->tile_threads); + else + s.n_threads = FFMIN(threads, DAV1D_MAX_THREADS); + s.max_frame_delay = (c->flags & AV_CODEC_FLAG_LOW_DELAY) ? 1 : s.n_threads; + av_log(c, AV_LOG_DEBUG, "Using %d threads, %d max_frame_delay\n", + s.n_threads, s.max_frame_delay); +#else s.n_tile_threads = dav1d->tile_threads ? dav1d->tile_threads : FFMIN(floor(sqrt(threads)), DAV1D_MAX_TILE_THREADS); @@ -153,6 +165,7 @@ static av_cold int libdav1d_init(AVCodecContext *c) : FFMIN(ceil(threads / s.n_tile_threads), DAV1D_MAX_FRAME_THREADS); av_log(c, AV_LOG_DEBUG, "Using %d frame threads, %d tile threads\n", s.n_frame_threads, s.n_tile_threads); +#endif res = dav1d_open(&dav1d->c, &s); if (res < 0) @@ -456,6 +469,13 @@ static av_cold int libdav1d_close(AVCodecContext *c) return 0; } +#ifndef DAV1D_MAX_FRAME_THREADS +#define DAV1D_MAX_FRAME_THREADS DAV1D_MAX_THREADS +#endif +#ifndef DAV1D_MAX_TILE_THREADS +#define DAV1D_MAX_TILE_THREADS DAV1D_MAX_THREADS +#endif + #define OFFSET(x) offsetof(Libdav1dContext, x) #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM static const AVOption libdav1d_options[] = { diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c index 1d1d04fd801..3ed61563ee7 100644 --- a/libavcodec/lpc.c +++ b/libavcodec/lpc.c @@ -189,7 +189,7 @@ double ff_lpc_calc_ref_coefs_f(LPCContext *s, const float *samples, int len, compute_ref_coefs(autoc, order, ref, error); for (i = 0; i < order; i++) avg_err = (avg_err + error[i])/2.0f; - return signal/avg_err; + return avg_err ? signal/avg_err : NAN; } /** diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h index 88ca247f87b..e1b41bfd9ba 100644 --- a/libavcodec/lpc.h +++ b/libavcodec/lpc.h @@ -143,7 +143,7 @@ static inline void compute_ref_coefs(const LPC_TYPE *autoc, int max_order, gen0[i] = gen1[i] = autoc[i + 1]; err = autoc[0]; - ref[0] = -gen1[0] / err; + ref[0] = -gen1[0] / ((USE_FIXED || err) ? err : 1); err += gen1[0] * ref[0]; if (error) error[0] = err; @@ -152,7 +152,7 @@ static inline void compute_ref_coefs(const LPC_TYPE *autoc, int max_order, gen1[j] = gen1[j + 1] + ref[i - 1] * gen0[j]; gen0[j] = gen1[j + 1] * ref[i - 1] + gen0[j]; } - ref[i] = -gen1[0] / err; + ref[i] = -gen1[0] / ((USE_FIXED || err) ? err : 1); err += gen1[0] * ref[i]; if (error) error[i] = err; @@ -186,7 +186,8 @@ static inline int AAC_RENAME(compute_lpc_coefs)(const LPC_TYPE *autoc, int max_o for(j=0; jmjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16)); s->start_code = SOS; - ret = ff_mjpeg_decode_sos(s, NULL, 0, NULL); - if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE)) - return ret; + if (avctx->skip_frame == AVDISCARD_ALL) { + skip_bits(&s->gb, get_bits_left(&s->gb)); + } else { + ret = ff_mjpeg_decode_sos(s, NULL, 0, NULL); + if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE)) + return ret; + } } if (s->interlaced) { diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c index 2df6caa4405..afb117cfc61 100644 --- a/libavcodec/mjpegdec.c +++ b/libavcodec/mjpegdec.c @@ -1573,6 +1573,9 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss, else ret = decode_block_progressive(s, *block, last_nnz, s->ac_index[0], quant_matrix, ss, se, Al, &EOBRUN); + + if (ret >= 0 && get_bits_left(&s->gb) < 0) + ret = AVERROR_INVALIDDATA; if (ret < 0) { av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x); diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c index 94221da2c15..09bf01247d2 100644 --- a/libavcodec/mpeg12dec.c +++ b/libavcodec/mpeg12dec.c @@ -1538,6 +1538,10 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1) s->mpeg_f_code[0][1] = get_bits(&s->gb, 4); s->mpeg_f_code[1][0] = get_bits(&s->gb, 4); s->mpeg_f_code[1][1] = get_bits(&s->gb, 4); + s->mpeg_f_code[0][0] += !s->mpeg_f_code[0][0]; + s->mpeg_f_code[0][1] += !s->mpeg_f_code[0][1]; + s->mpeg_f_code[1][0] += !s->mpeg_f_code[1][0]; + s->mpeg_f_code[1][1] += !s->mpeg_f_code[1][1]; if (!s->pict_type && s1->mpeg_enc_ctx_allocated) { av_log(s->avctx, AV_LOG_ERROR, "Missing picture start code, guessing missing values\n"); @@ -1551,10 +1555,6 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1) s->current_picture.f->pict_type = s->pict_type; s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; } - s->mpeg_f_code[0][0] += !s->mpeg_f_code[0][0]; - s->mpeg_f_code[0][1] += !s->mpeg_f_code[0][1]; - s->mpeg_f_code[1][0] += !s->mpeg_f_code[1][0]; - s->mpeg_f_code[1][1] += !s->mpeg_f_code[1][1]; s->intra_dc_precision = get_bits(&s->gb, 2); s->picture_structure = get_bits(&s->gb, 2); diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index eb13d57d2b2..cee3fdb36bb 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -509,9 +509,13 @@ FF_ENABLE_DEPRECATION_WARNINGS if (!s->fixed_qscale && avctx->bit_rate * av_q2d(avctx->time_base) > avctx->bit_rate_tolerance) { + double nbt = avctx->bit_rate * av_q2d(avctx->time_base) * 5; av_log(avctx, AV_LOG_WARNING, "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate); - avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base); + if (nbt <= INT_MAX) { + avctx->bit_rate_tolerance = nbt; + } else + avctx->bit_rate_tolerance = INT_MAX; } if (avctx->rc_max_rate && @@ -2016,6 +2020,7 @@ FF_ENABLE_DEPRECATION_WARNINGS break; default: av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n"); + s->stuffing_bits = 0; } flush_put_bits(&s->pb); s->frame_bits = put_bits_count(&s->pb); diff --git a/libavcodec/mxpegdec.c b/libavcodec/mxpegdec.c index 763ce5871df..a068baf7586 100644 --- a/libavcodec/mxpegdec.c +++ b/libavcodec/mxpegdec.c @@ -193,6 +193,9 @@ static int mxpeg_decode_frame(AVCodecContext *avctx, int start_code; int ret; + if (avctx->skip_frame == AVDISCARD_ALL) + return AVERROR_PATCHWELCOME; + buf_ptr = buf; buf_end = buf + buf_size; jpg->got_picture = 0; diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c index 99ede2f42b4..8670431dcc4 100644 --- a/libavcodec/nellymoserenc.c +++ b/libavcodec/nellymoserenc.c @@ -138,10 +138,8 @@ static av_cold int encode_end(AVCodecContext *avctx) ff_mdct_end(&s->mdct_ctx); - if (s->avctx->trellis) { - av_freep(&s->opt); - av_freep(&s->path); - } + av_freep(&s->opt); + av_freep(&s->path); ff_af_queue_close(&s->afq); av_freep(&s->fdsp); diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index dddee8cac1e..b09ddbe0fa0 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -144,6 +144,70 @@ static int nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err, return ret; } +typedef struct GUIDTuple { + const GUID guid; + int flags; +} GUIDTuple; + +#define PRESET_ALIAS(alias, name, ...) \ + [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ } + +#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__) + +static void nvenc_map_preset(NvencContext *ctx) +{ + GUIDTuple presets[] = { +#ifdef NVENC_HAVE_NEW_PRESETS + PRESET(P1), + PRESET(P2), + PRESET(P3), + PRESET(P4), + PRESET(P5), + PRESET(P6), + PRESET(P7), + PRESET_ALIAS(SLOW, P7, NVENC_TWO_PASSES), + PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS), + PRESET_ALIAS(FAST, P1, NVENC_ONE_PASS), + // Compat aliases + PRESET_ALIAS(DEFAULT, P4, NVENC_DEPRECATED_PRESET), + PRESET_ALIAS(HP, P1, NVENC_DEPRECATED_PRESET), + PRESET_ALIAS(HQ, P7, NVENC_DEPRECATED_PRESET), + PRESET_ALIAS(BD, P5, NVENC_DEPRECATED_PRESET), + PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), + PRESET_ALIAS(LOW_LATENCY_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), + PRESET_ALIAS(LOW_LATENCY_HQ, P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), + PRESET_ALIAS(LOSSLESS_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), + PRESET_ALIAS(LOSSLESS_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), +#else + PRESET(DEFAULT), + PRESET(HP), + PRESET(HQ), + PRESET(BD), + PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES), + PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS), + PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS), + PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY), + PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY), + PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY), + PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS), + PRESET(LOSSLESS_HP, NVENC_LOSSLESS), +#endif + }; + + GUIDTuple *t = &presets[ctx->preset]; + + ctx->init_encode_params.presetGUID = t->guid; + ctx->flags = t->flags; + +#ifdef NVENC_HAVE_NEW_PRESETS + if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS) + ctx->flags |= NVENC_LOSSLESS; +#endif +} + +#undef PRESET +#undef PRESET_ALIAS + static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level) { #if NVENCAPI_CHECK_VERSION(11, 1) @@ -358,7 +422,7 @@ static int nvenc_check_capabilities(AVCodecContext *avctx) } ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE); - if (ctx->preset >= PRESET_LOSSLESS_DEFAULT && ret <= 0) { + if (ctx->flags & NVENC_LOSSLESS && ret <= 0) { av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n"); return AVERROR(ENOSYS); } @@ -548,6 +612,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx) return AVERROR_BUG; } + nvenc_map_preset(ctx); + + if (ctx->flags & NVENC_DEPRECATED_PRESET) + av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n"); + if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) { AVHWFramesContext *frames_ctx; AVHWDeviceContext *hwdev_ctx; @@ -638,65 +707,6 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx) return 0; } -typedef struct GUIDTuple { - const GUID guid; - int flags; -} GUIDTuple; - -#define PRESET_ALIAS(alias, name, ...) \ - [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ } - -#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__) - -static void nvenc_map_preset(NvencContext *ctx) -{ - GUIDTuple presets[] = { -#ifdef NVENC_HAVE_NEW_PRESETS - PRESET(P1), - PRESET(P2), - PRESET(P3), - PRESET(P4), - PRESET(P5), - PRESET(P6), - PRESET(P7), - PRESET_ALIAS(SLOW, P7, NVENC_TWO_PASSES), - PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS), - PRESET_ALIAS(FAST, P1, NVENC_ONE_PASS), - // Compat aliases - PRESET_ALIAS(DEFAULT, P4, NVENC_DEPRECATED_PRESET), - PRESET_ALIAS(HP, P1, NVENC_DEPRECATED_PRESET), - PRESET_ALIAS(HQ, P7, NVENC_DEPRECATED_PRESET), - PRESET_ALIAS(BD, P5, NVENC_DEPRECATED_PRESET), - PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), - PRESET_ALIAS(LOW_LATENCY_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), - PRESET_ALIAS(LOW_LATENCY_HQ, P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), - PRESET_ALIAS(LOSSLESS_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), - PRESET_ALIAS(LOSSLESS_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), -#else - PRESET(DEFAULT), - PRESET(HP), - PRESET(HQ), - PRESET(BD), - PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES), - PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS), - PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS), - PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY), - PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY), - PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY), - PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS), - PRESET(LOSSLESS_HP, NVENC_LOSSLESS), -#endif - }; - - GUIDTuple *t = &presets[ctx->preset]; - - ctx->init_encode_params.presetGUID = t->guid; - ctx->flags = t->flags; -} - -#undef PRESET -#undef PRESET_ALIAS - static av_cold void set_constqp(AVCodecContext *avctx) { NvencContext *ctx = avctx->priv_data; @@ -1254,18 +1264,15 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) ctx->init_encode_params.encodeConfig = &ctx->encode_config; - nvenc_map_preset(ctx); - - if (ctx->flags & NVENC_DEPRECATED_PRESET) - av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n"); - preset_config.version = NV_ENC_PRESET_CONFIG_VER; preset_config.presetCfg.version = NV_ENC_CONFIG_VER; #ifdef NVENC_HAVE_NEW_PRESETS ctx->init_encode_params.tuningInfo = ctx->tuning_info; - if (ctx->flags & NVENC_LOWLATENCY) + if (ctx->flags & NVENC_LOSSLESS) + ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS; + else if (ctx->flags & NVENC_LOWLATENCY) ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY; nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder, @@ -1307,9 +1314,6 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) * */ if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead) ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth; - - if (ctx->init_encode_params.tuningInfo == NV_ENC_TUNING_INFO_LOSSLESS) - ctx->flags |= NVENC_LOSSLESS; #endif if (ctx->weighted_pred == 1) diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index fefc5f7f0ba..314c270e748 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -103,7 +103,7 @@ enum { PRESET_LOW_LATENCY_DEFAULT , PRESET_LOW_LATENCY_HQ , PRESET_LOW_LATENCY_HP, - PRESET_LOSSLESS_DEFAULT, // lossless presets must be the last ones + PRESET_LOSSLESS_DEFAULT, PRESET_LOSSLESS_HP, #ifdef NVENC_HAVE_NEW_PRESETS PRESET_P1, diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c index 441e7871d2b..82fbb23bf73 100644 --- a/libavcodec/nvenc_hevc.c +++ b/libavcodec/nvenc_hevc.c @@ -148,7 +148,7 @@ static const AVOption options[] = { { "middle", "", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, VE, "b_ref_mode" }, #endif { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, - { "s12m_tc", "Use timecode (if available)", OFFSET(s12m_tc), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, + { "s12m_tc", "Use timecode (if available)", OFFSET(s12m_tc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, { "dpb_size", "Specifies the DPB size used for encoding (0 means automatic)", OFFSET(dpb_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, #ifdef NVENC_HAVE_MULTIPASS diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c index f3b212d5086..6aa3c1b436b 100644 --- a/libavcodec/pngdec.c +++ b/libavcodec/pngdec.c @@ -1644,7 +1644,7 @@ static int decode_frame_apng(AVCodecContext *avctx, if (!(avctx->active_thread_type & FF_THREAD_FRAME)) { if (s->dispose_op == APNG_DISPOSE_OP_PREVIOUS) { ff_thread_release_buffer(avctx, &s->picture); - } else if (s->dispose_op == APNG_DISPOSE_OP_NONE) { + } else { ff_thread_release_buffer(avctx, &s->last_picture); FFSWAP(ThreadFrame, s->picture, s->last_picture); } @@ -1693,8 +1693,8 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src) pdst->hdr_state |= psrc->hdr_state; } - src_frame = psrc->dispose_op == APNG_DISPOSE_OP_NONE ? - &psrc->picture : &psrc->last_picture; + src_frame = psrc->dispose_op == APNG_DISPOSE_OP_PREVIOUS ? + &psrc->last_picture : &psrc->picture; ff_thread_release_buffer(dst, &pdst->last_picture); if (src_frame && src_frame->f->data[0]) { diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c index bd70689caba..9c3a48c2518 100644 --- a/libavcodec/rv10.c +++ b/libavcodec/rv10.c @@ -154,7 +154,7 @@ static int rv10_decode_picture_header(MpegEncContext *s) return mb_count; } -static int rv20_decode_picture_header(RVDecContext *rv) +static int rv20_decode_picture_header(RVDecContext *rv, int whole_size) { MpegEncContext *s = &rv->m; int seq, mb_pos, i, ret; @@ -232,6 +232,10 @@ static int rv20_decode_picture_header(RVDecContext *rv) "attempting to change resolution to %dx%d\n", new_w, new_h); if (av_image_check_size(new_w, new_h, 0, s->avctx) < 0) return AVERROR_INVALIDDATA; + + if (whole_size < (new_w + 15)/16 * ((new_h + 15)/16) / 8) + return AVERROR_INVALIDDATA; + ff_mpv_common_end(s); // attempt to keep aspect during typical resolution switches @@ -447,7 +451,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, const uint8_t *buf, if (s->codec_id == AV_CODEC_ID_RV10) mb_count = rv10_decode_picture_header(s); else - mb_count = rv20_decode_picture_header(rv); + mb_count = rv20_decode_picture_header(rv, whole_size); if (mb_count < 0) { if (mb_count != ERROR_SKIP_FRAME) av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n"); diff --git a/libavcodec/sbrdsp_fixed.c b/libavcodec/sbrdsp_fixed.c index 91fa664c087..43fcc90ae57 100644 --- a/libavcodec/sbrdsp_fixed.c +++ b/libavcodec/sbrdsp_fixed.c @@ -87,7 +87,7 @@ static void sbr_neg_odd_64_c(int *x) { int i; for (i = 1; i < 64; i += 2) - x[i] = -x[i]; + x[i] = -(unsigned)x[i]; } static void sbr_qmf_pre_shuffle_c(int *z) diff --git a/libavcodec/snow.h b/libavcodec/snow.h index 41a3bef4dec..d705188bfdd 100644 --- a/libavcodec/snow.h +++ b/libavcodec/snow.h @@ -186,6 +186,7 @@ typedef struct SnowContext{ uint8_t *emu_edge_buffer; AVMotionVector *avmv; + unsigned avmv_size; int avmv_index; uint64_t encoding_error[AV_NUM_DATA_POINTERS]; diff --git a/libavcodec/snowdec.c b/libavcodec/snowdec.c index 68afe0df26d..177c2fa56dc 100644 --- a/libavcodec/snowdec.c +++ b/libavcodec/snowdec.c @@ -493,9 +493,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, s->spatial_decomposition_count ); - av_assert0(!s->avmv); if (s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_MVS) { - s->avmv = av_malloc_array(s->b_width * s->b_height, sizeof(AVMotionVector) << (s->block_max_depth*2)); + size_t size; + res = av_size_mult(s->b_width * s->b_height, sizeof(AVMotionVector) << (s->block_max_depth*2), &size); + if (res) + return res; + av_fast_malloc(&s->avmv, &s->avmv_size, size); + if (!s->avmv) + return AVERROR(ENOMEM); + } else { + s->avmv_size = 0; + av_freep(&s->avmv); } s->avmv_index = 0; @@ -624,8 +632,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, memcpy(sd->data, s->avmv, s->avmv_index * sizeof(AVMotionVector)); } - av_freep(&s->avmv); - if (res < 0) return res; @@ -645,6 +651,9 @@ static av_cold int decode_end(AVCodecContext *avctx) ff_snow_common_end(s); + s->avmv_size = 0; + av_freep(&s->avmv); + return 0; } diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c index 4fac0c26e5c..1b5da03245a 100644 --- a/libavcodec/svq1enc.c +++ b/libavcodec/svq1enc.c @@ -487,9 +487,10 @@ static av_cold int svq1_encode_end(AVCodecContext *avctx) SVQ1EncContext *const s = avctx->priv_data; int i; - av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", - s->rd_total / (double)(avctx->width * avctx->height * - avctx->frame_number)); + if (avctx->frame_number) + av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", + s->rd_total / (double)(avctx->width * avctx->height * + avctx->frame_number)); s->m.mb_type = NULL; ff_mpv_common_end(&s->m); diff --git a/libavcodec/ttadata.c b/libavcodec/ttadata.c index bf793a4cc85..aa9f418a7d4 100644 --- a/libavcodec/ttadata.c +++ b/libavcodec/ttadata.c @@ -30,7 +30,8 @@ const uint32_t ff_tta_shift_1[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, - 0x80000000, 0x80000000, 0x80000000, 0x80000000 + 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0xFFFFFFFF }; const uint32_t * const ff_tta_shift_16 = ff_tta_shift_1 + 4; diff --git a/libavcodec/ttadsp.c b/libavcodec/ttadsp.c index 1d1443aee05..99dd66a0c2a 100644 --- a/libavcodec/ttadsp.c +++ b/libavcodec/ttadsp.c @@ -47,9 +47,9 @@ static void tta_filter_process_c(int32_t *qmi, int32_t *dx, int32_t *dl, *error = *in; *in += (round >> shift); - dl[4] = -dl[5]; dl[5] = -dl[6]; - dl[6] = *in - dl[7]; dl[7] = *in; - dl[5] += dl[6]; dl[4] += dl[5]; + dl[4] = -(unsigned)dl[5]; dl[5] = -(unsigned)dl[6]; + dl[6] = *in -(unsigned)dl[7]; dl[7] = *in; + dl[5] += (unsigned)dl[6]; dl[4] += (unsigned)dl[5]; } av_cold void ff_ttadsp_init(TTADSPContext *c) diff --git a/libavcodec/ttmlenc.c b/libavcodec/ttmlenc.c index 3972b4368c4..695651bb945 100644 --- a/libavcodec/ttmlenc.c +++ b/libavcodec/ttmlenc.c @@ -206,5 +206,5 @@ AVCodec ff_ttml_encoder = { .init = ttml_encode_init, .encode_sub = ttml_encode_frame, .close = ttml_encode_close, - .capabilities = FF_CODEC_CAP_INIT_CLEANUP, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, }; diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 896b99dc3f2..825094d2f31 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -268,10 +268,21 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, h_align = 4; } if (s->codec_id == AV_CODEC_ID_JV || + s->codec_id == AV_CODEC_ID_ARGO || s->codec_id == AV_CODEC_ID_INTERPLAY_VIDEO) { w_align = 8; h_align = 8; } + if (s->codec_id == AV_CODEC_ID_MJPEG || + s->codec_id == AV_CODEC_ID_MJPEGB || + s->codec_id == AV_CODEC_ID_LJPEG || + s->codec_id == AV_CODEC_ID_SMVJPEG || + s->codec_id == AV_CODEC_ID_AMV || + s->codec_id == AV_CODEC_ID_SP5X || + s->codec_id == AV_CODEC_ID_JPEGLS) { + w_align = 8; + h_align = 2*8; + } break; case AV_PIX_FMT_BGR24: if ((s->codec_id == AV_CODEC_ID_MSZH) || @@ -286,6 +297,12 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, h_align = 4; } break; + case AV_PIX_FMT_BGR0: + if (s->codec_id == AV_CODEC_ID_ARGO) { + w_align = 8; + h_align = 8; + } + break; default: break; } @@ -739,7 +756,7 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba, case AV_CODEC_ID_ADPCM_THP: case AV_CODEC_ID_ADPCM_THP_LE: if (extradata) - return frame_bytes * 14 / (8 * ch); + return frame_bytes * 14LL / (8 * ch); break; case AV_CODEC_ID_ADPCM_XA: return (frame_bytes / 128) * 224 / ch; @@ -773,21 +790,33 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba, if (ba > 0) { /* calc from frame_bytes, channels, and block_align */ int blocks = frame_bytes / ba; + int64_t tmp = 0; switch (id) { case AV_CODEC_ID_ADPCM_IMA_WAV: if (bps < 2 || bps > 5) return 0; - return blocks * (1 + (ba - 4 * ch) / (bps * ch) * 8); + tmp = blocks * (1LL + (ba - 4 * ch) / (bps * ch) * 8); + break; case AV_CODEC_ID_ADPCM_IMA_DK3: - return blocks * (((ba - 16) * 2 / 3 * 4) / ch); + tmp = blocks * (((ba - 16LL) * 2 / 3 * 4) / ch); + break; case AV_CODEC_ID_ADPCM_IMA_DK4: - return blocks * (1 + (ba - 4 * ch) * 2 / ch); + tmp = blocks * (1 + (ba - 4LL * ch) * 2 / ch); + break; case AV_CODEC_ID_ADPCM_IMA_RAD: - return blocks * ((ba - 4 * ch) * 2 / ch); + tmp = blocks * ((ba - 4LL * ch) * 2 / ch); + break; case AV_CODEC_ID_ADPCM_MS: - return blocks * (2 + (ba - 7 * ch) * 2LL / ch); + tmp = blocks * (2 + (ba - 7LL * ch) * 2LL / ch); + break; case AV_CODEC_ID_ADPCM_MTAF: - return blocks * (ba - 16) * 2 / ch; + tmp = blocks * (ba - 16LL) * 2 / ch; + break; + } + if (tmp) { + if (tmp != (int)tmp) + return 0; + return tmp; } } @@ -825,20 +854,22 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba, int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes) { - return get_audio_frame_duration(avctx->codec_id, avctx->sample_rate, + int duration = get_audio_frame_duration(avctx->codec_id, avctx->sample_rate, avctx->channels, avctx->block_align, avctx->codec_tag, avctx->bits_per_coded_sample, avctx->bit_rate, avctx->extradata, avctx->frame_size, frame_bytes); + return FFMAX(0, duration); } int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes) { - return get_audio_frame_duration(par->codec_id, par->sample_rate, + int duration = get_audio_frame_duration(par->codec_id, par->sample_rate, par->channels, par->block_align, par->codec_tag, par->bits_per_coded_sample, par->bit_rate, par->extradata, par->frame_size, frame_bytes); + return FFMAX(0, duration); } #if !HAVE_THREADS diff --git a/libavcodec/vaapi_av1.c b/libavcodec/vaapi_av1.c index 1809b485aa1..16b7e35747c 100644 --- a/libavcodec/vaapi_av1.c +++ b/libavcodec/vaapi_av1.c @@ -292,7 +292,7 @@ static int vaapi_av1_decode_slice(AVCodecContext *avctx, err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param, sizeof(VASliceParameterBufferAV1), buffer, - s->tile_group_info[i].tile_size); + size); if (err) { ff_vaapi_decode_cancel(avctx, pic); return err; diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c index cd9975d8cfd..ab5e3008e0b 100644 --- a/libavcodec/vc1.c +++ b/libavcodec/vc1.c @@ -672,6 +672,8 @@ int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb) if (v->s.pict_type == AV_PICTURE_TYPE_P) v->rnd ^= 1; + if (get_bits_left(gb) < 5) + return AVERROR_INVALIDDATA; /* Quantizer stuff */ pqindex = get_bits(gb, 5); if (!pqindex) @@ -764,6 +766,9 @@ int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb) av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: " "Imode: %i, Invert: %i\n", status>>1, status&1); + if (get_bits_left(gb) < 4) + return AVERROR_INVALIDDATA; + /* Hopefully this is correct for P-frames */ v->s.mv_table_index = get_bits(gb, 2); //but using ff_vc1_ tables v->cbptab = get_bits(gb, 2); diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index ea93e115884..d4ceb60791b 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -1124,7 +1124,9 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ret = AVERROR_INVALIDDATA; goto err; } - if (!v->field_mode) + if ( !v->field_mode + && avctx->codec_id != AV_CODEC_ID_WMV3IMAGE + && avctx->codec_id != AV_CODEC_ID_VC1IMAGE) ff_er_frame_end(&s->er); } @@ -1152,12 +1154,14 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) { if ((ret = av_frame_ref(pict, s->current_picture_ptr->f)) < 0) goto err; - ff_print_debug_info(s, s->current_picture_ptr, pict); + if (!v->field_mode) + ff_print_debug_info(s, s->current_picture_ptr, pict); *got_frame = 1; } else if (s->last_picture_ptr) { if ((ret = av_frame_ref(pict, s->last_picture_ptr->f)) < 0) goto err; - ff_print_debug_info(s, s->last_picture_ptr, pict); + if (!v->field_mode) + ff_print_debug_info(s, s->last_picture_ptr, pict); *got_frame = 1; } } diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 7bd2e4c2aba..295cc21dfae 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -982,6 +982,8 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, } s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); + if (s->slice_min_bytes < 0) + return AVERROR(EINVAL); ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced); if (ret) diff --git a/libavcodec/videotoolboxenc.c b/libavcodec/videotoolboxenc.c index 9b7ee6720c4..8a02d3d1a02 100644 --- a/libavcodec/videotoolboxenc.c +++ b/libavcodec/videotoolboxenc.c @@ -49,6 +49,10 @@ enum { kCVPixelFormatType_420YpCbCr10BiPlanarFullRange = 'xf20' }; enum { kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange = 'x420' }; #endif +#ifndef TARGET_CPU_ARM64 +# define TARGET_CPU_ARM64 0 +#endif + typedef OSStatus (*getParameterSetAtIndex)(CMFormatDescriptionRef videoDesc, size_t parameterSetIndex, const uint8_t **parameterSetPointerOut, diff --git a/libavcodec/webp.c b/libavcodec/webp.c index 5a7aebc5872..06a4cc04a5d 100644 --- a/libavcodec/webp.c +++ b/libavcodec/webp.c @@ -627,6 +627,9 @@ static int decode_entropy_coded_image(WebPContext *s, enum ImageRole role, while (y < img->frame->height) { int v; + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + hg = get_huffman_group(s, img, x, y); v = huff_reader_get_symbol(&hg[HUFF_IDX_GREEN], &s->gb); if (v < NUM_LITERAL_CODES) { diff --git a/libavcodec/wma.c b/libavcodec/wma.c index cfa5fa33550..a979a112bda 100644 --- a/libavcodec/wma.c +++ b/libavcodec/wma.c @@ -457,7 +457,7 @@ int ff_wma_run_level_decode(AVCodecContext *avctx, GetBitContext *gb, if (get_bits1(gb)) { av_log(avctx, AV_LOG_ERROR, "broken escape sequence\n"); - return -1; + return AVERROR_INVALIDDATA; } else offset += get_bits(gb, frame_len_bits) + 4; } else @@ -475,7 +475,7 @@ int ff_wma_run_level_decode(AVCodecContext *avctx, GetBitContext *gb, offset, num_coefs ); - return -1; + return AVERROR_INVALIDDATA; } return 0; diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c index 9c79556bb5a..8710414936c 100644 --- a/libavcodec/wmadec.c +++ b/libavcodec/wmadec.c @@ -590,15 +590,18 @@ static int wma_decode_block(WMACodecContext *s) if (s->channel_coded[ch]) { int tindex; WMACoef *ptr = &s->coefs1[ch][0]; + int ret; /* special VLC tables are used for ms stereo because * there is potentially less energy there */ tindex = (ch == 1 && s->ms_stereo); memset(ptr, 0, s->block_len * sizeof(WMACoef)); - ff_wma_run_level_decode(s->avctx, &s->gb, &s->coef_vlc[tindex], - s->level_table[tindex], s->run_table[tindex], - 0, ptr, 0, nb_coefs[ch], - s->block_len, s->frame_len_bits, coef_nb_bits); + ret = ff_wma_run_level_decode(s->avctx, &s->gb, &s->coef_vlc[tindex], + s->level_table[tindex], s->run_table[tindex], + 0, ptr, 0, nb_coefs[ch], + s->block_len, s->frame_len_bits, coef_nb_bits); + if (ret < 0) + return ret; } if (s->version == 1 && s->avctx->channels >= 2) align_get_bits(&s->gb); diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c index 8024ce16115..7c3044b0b01 100644 --- a/libavcodec/wmaprodec.c +++ b/libavcodec/wmaprodec.c @@ -985,13 +985,16 @@ static int decode_coeffs(WMAProDecodeCtx *s, int c) /** decode run level coded coefficients */ if (cur_coeff < s->subframe_len) { + int ret; + memset(&ci->coeffs[cur_coeff], 0, sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff)); - if (ff_wma_run_level_decode(s->avctx, &s->gb, vlc, - level, run, 1, ci->coeffs, - cur_coeff, s->subframe_len, - s->subframe_len, s->esc_len, 0)) - return AVERROR_INVALIDDATA; + ret = ff_wma_run_level_decode(s->avctx, &s->gb, vlc, + level, run, 1, ci->coeffs, + cur_coeff, s->subframe_len, + s->subframe_len, s->esc_len, 0); + if (ret < 0) + return ret; } return 0; diff --git a/libavcodec/xpmdec.c b/libavcodec/xpmdec.c index 922dfc0f672..993873c5958 100644 --- a/libavcodec/xpmdec.c +++ b/libavcodec/xpmdec.c @@ -341,9 +341,6 @@ static int xpm_decode_frame(AVCodecContext *avctx, void *data, if ((ret = ff_set_dimensions(avctx, width, height)) < 0) return ret; - if ((ret = ff_get_buffer(avctx, p, 0)) < 0) - return ret; - if (cpp <= 0 || cpp >= 5) { av_log(avctx, AV_LOG_ERROR, "unsupported/invalid number of chars per pixel: %d\n", cpp); return AVERROR_INVALIDDATA; @@ -360,14 +357,17 @@ static int xpm_decode_frame(AVCodecContext *avctx, void *data, size *= 4; - av_fast_padded_malloc(&x->pixels, &x->pixels_size, size); - if (!x->pixels) - return AVERROR(ENOMEM); - ptr += mod_strcspn(ptr, ",") + 1; if (end - ptr < 1) return AVERROR_INVALIDDATA; + if ((ret = ff_get_buffer(avctx, p, 0)) < 0) + return ret; + + av_fast_padded_malloc(&x->pixels, &x->pixels_size, size); + if (!x->pixels) + return AVERROR(ENOMEM); + for (i = 0; i < ncolors; i++) { const uint8_t *index; int len; diff --git a/libavfilter/af_drmeter.c b/libavfilter/af_drmeter.c index ecccb651863..425c25ae87e 100644 --- a/libavfilter/af_drmeter.c +++ b/libavfilter/af_drmeter.c @@ -167,6 +167,11 @@ static void print_stats(AVFilterContext *ctx) float chdr, secondpeak, rmssum = 0; int i, j, first = 0; + if (!p->nb_samples) { + av_log(ctx, AV_LOG_INFO, "No data, dynamic range not meassurable\n"); + return; + } + finish_block(p); for (i = 0; i <= 10000; i++) { diff --git a/libavfilter/f_metadata.c b/libavfilter/f_metadata.c index 598257b15b8..3332d91a3e7 100644 --- a/libavfilter/f_metadata.c +++ b/libavfilter/f_metadata.c @@ -304,9 +304,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) AVDictionary **metadata = &frame->metadata; AVDictionaryEntry *e; - if (!*metadata && s->mode != METADATA_ADD) - return ff_filter_frame(outlink, frame); - e = av_dict_get(*metadata, !s->key ? "" : s->key, NULL, !s->key ? AV_DICT_IGNORE_SUFFIX: 0); diff --git a/libavfilter/vf_ciescope.c b/libavfilter/vf_ciescope.c index 719e66ad0f0..2ddec3d06be 100644 --- a/libavfilter/vf_ciescope.c +++ b/libavfilter/vf_ciescope.c @@ -849,7 +849,8 @@ rgb_to_xy(double rc, *z = m[2][0] * rc + m[2][1] * gc + m[2][2] * bc; sum = *x + *y + *z; - + if (sum == 0) + sum = 1; *x = *x / sum; *y = *y / sum; } diff --git a/libavfilter/vf_dctdnoiz.c b/libavfilter/vf_dctdnoiz.c index a89f2631c82..2019a5b456e 100644 --- a/libavfilter/vf_dctdnoiz.c +++ b/libavfilter/vf_dctdnoiz.c @@ -564,6 +564,9 @@ static int config_input(AVFilterLink *inlink) inlink->h - s->pr_height); max_slice_h = s->pr_height / ((s->bsize - 1) * 2); + if (max_slice_h == 0) + return AVERROR(EINVAL); + s->nb_threads = FFMIN3(MAX_THREADS, ff_filter_get_nb_threads(ctx), max_slice_h); av_log(ctx, AV_LOG_DEBUG, "threads: [max=%d hmax=%d user=%d] => %d\n", MAX_THREADS, max_slice_h, ff_filter_get_nb_threads(ctx), s->nb_threads); diff --git a/libavfilter/vf_fftdnoiz.c b/libavfilter/vf_fftdnoiz.c index 856d716be53..eea1887e407 100644 --- a/libavfilter/vf_fftdnoiz.c +++ b/libavfilter/vf_fftdnoiz.c @@ -161,7 +161,7 @@ static void export_row8(FFTComplex *src, uint8_t *dst, int rw, float scale, int int j; for (j = 0; j < rw; j++) - dst[j] = av_clip_uint8(src[j].re * scale + 0.5f); + dst[j] = av_clip_uint8(lrintf(src[j].re * scale)); } static void import_row16(FFTComplex *dst, uint8_t *srcp, int rw) diff --git a/libavfilter/vf_mestimate.c b/libavfilter/vf_mestimate.c index 7ecfe7da603..9a2865a0cba 100644 --- a/libavfilter/vf_mestimate.c +++ b/libavfilter/vf_mestimate.c @@ -100,6 +100,9 @@ static int config_input(AVFilterLink *inlink) s->b_height = inlink->h >> s->log2_mb_size; s->b_count = s->b_width * s->b_height; + if (s->b_count == 0) + return AVERROR(EINVAL); + for (i = 0; i < 3; i++) { s->mv_table[i] = av_mallocz_array(s->b_count, sizeof(*s->mv_table[0])); if (!s->mv_table[i]) diff --git a/libavfilter/vf_overlay_cuda.c b/libavfilter/vf_overlay_cuda.c index f6ee43e9295..34241c8e1b5 100644 --- a/libavfilter/vf_overlay_cuda.c +++ b/libavfilter/vf_overlay_cuda.c @@ -63,6 +63,7 @@ typedef struct OverlayCUDAContext { enum AVPixelFormat in_format_overlay; enum AVPixelFormat in_format_main; + AVBufferRef *hw_device_ctx; AVCUDADeviceContext *hwctx; CUcontext cu_ctx; @@ -256,6 +257,9 @@ static av_cold void overlay_cuda_uninit(AVFilterContext *avctx) CHECK_CU(cu->cuModuleUnload(ctx->cu_module)); CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } + + av_buffer_unref(&ctx->hw_device_ctx); + ctx->hwctx = NULL; } /** @@ -341,13 +345,19 @@ static int overlay_cuda_config_output(AVFilterLink *outlink) // initialize - ctx->hwctx = frames_ctx->device_ctx->hwctx; + ctx->hw_device_ctx = av_buffer_ref(frames_ctx->device_ref); + if (!ctx->hw_device_ctx) + return AVERROR(ENOMEM); + ctx->hwctx = ((AVHWDeviceContext*)ctx->hw_device_ctx->data)->hwctx; + cuda_ctx = ctx->hwctx->cuda_ctx; ctx->fs.time_base = inlink->time_base; ctx->cu_stream = ctx->hwctx->stream; outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); + if (!outlink->hw_frames_ctx) + return AVERROR(ENOMEM); // load functions diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c index 5ad9334d02c..3ca6ba2368d 100644 --- a/libavfilter/vf_scale.c +++ b/libavfilter/vf_scale.c @@ -739,6 +739,18 @@ static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out) out->width = outlink->w; out->height = outlink->h; + // Sanity checks: + // 1. If the output is RGB, set the matrix coefficients to RGB. + // 2. If the output is not RGB and we've got the RGB/XYZ (identity) + // matrix configured, unset the matrix. + // In theory these should be in swscale itself as the AVFrame + // based API gets in, so that not every swscale API user has + // to go through duplicating such sanity checks. + if (av_pix_fmt_desc_get(out->format)->flags & AV_PIX_FMT_FLAG_RGB) + out->colorspace = AVCOL_SPC_RGB; + else if (out->colorspace == AVCOL_SPC_RGB) + out->colorspace = AVCOL_SPC_UNSPECIFIED; + if (scale->output_is_pal) avpriv_set_systematic_pal2((uint32_t*)out->data[1], outlink->format == AV_PIX_FMT_PAL8 ? AV_PIX_FMT_BGR8 : outlink->format); diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c index 502ecfda946..34debc3135a 100644 --- a/libavfilter/vf_scale_npp.c +++ b/libavfilter/vf_scale_npp.c @@ -481,13 +481,16 @@ static int nppscale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in) src = s->stages[i].frame; last_stage = i; } - if (last_stage < 0) return AVERROR_BUG; + ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); if (ret < 0) return ret; + s->tmp_frame->width = src->width; + s->tmp_frame->height = src->height; + av_frame_move_ref(out, src); av_frame_move_ref(src, s->tmp_frame); diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c index 88d0b350959..0730147e7d8 100644 --- a/libavfilter/vf_vmafmotion.c +++ b/libavfilter/vf_vmafmotion.c @@ -238,6 +238,9 @@ int ff_vmafmotion_init(VMAFMotionData *s, int i; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + if (w < 3 || h < 3) + return AVERROR(EINVAL); + s->width = w; s->height = h; s->stride = FFALIGN(w * sizeof(uint16_t), 32); diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c index 43dea67addc..06fd24ecfaf 100644 --- a/libavfilter/vf_yadif.c +++ b/libavfilter/vf_yadif.c @@ -123,20 +123,22 @@ static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1, uint8_t *next2 = parity ? cur : next; const int edge = MAX_ALIGN - 1; + int offset = FFMAX(w - edge, 3); /* Only edge pixels need to be processed here. A constant value of false * for is_not_edge should let the compiler ignore the whole branch. */ - FILTER(0, 3, 0) + FILTER(0, FFMIN(3, w), 0) - dst = (uint8_t*)dst1 + w - edge; - prev = (uint8_t*)prev1 + w - edge; - cur = (uint8_t*)cur1 + w - edge; - next = (uint8_t*)next1 + w - edge; + dst = (uint8_t*)dst1 + offset; + prev = (uint8_t*)prev1 + offset; + cur = (uint8_t*)cur1 + offset; + next = (uint8_t*)next1 + offset; prev2 = (uint8_t*)(parity ? prev : cur); next2 = (uint8_t*)(parity ? cur : next); - FILTER(w - edge, w - 3, 1) - FILTER(w - 3, w, 0) + FILTER(offset, w - 3, 1) + offset = FFMAX(offset, w - 3); + FILTER(offset, w, 0) } @@ -170,21 +172,23 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1, uint16_t *next2 = parity ? cur : next; const int edge = MAX_ALIGN / 2 - 1; + int offset = FFMAX(w - edge, 3); mrefs /= 2; prefs /= 2; - FILTER(0, 3, 0) + FILTER(0, FFMIN(3, w), 0) - dst = (uint16_t*)dst1 + w - edge; - prev = (uint16_t*)prev1 + w - edge; - cur = (uint16_t*)cur1 + w - edge; - next = (uint16_t*)next1 + w - edge; + dst = (uint16_t*)dst1 + offset; + prev = (uint16_t*)prev1 + offset; + cur = (uint16_t*)cur1 + offset; + next = (uint16_t*)next1 + offset; prev2 = (uint16_t*)(parity ? prev : cur); next2 = (uint16_t*)(parity ? cur : next); - FILTER(w - edge, w - 3, 1) - FILTER(w - 3, w, 0) + FILTER(offset, w - 3, 1) + offset = FFMAX(offset, w - 3); + FILTER(offset, w, 0) } static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) diff --git a/libavformat/aaxdec.c b/libavformat/aaxdec.c index c6d2d1c8d11..e69e5615ee4 100644 --- a/libavformat/aaxdec.c +++ b/libavformat/aaxdec.c @@ -117,6 +117,7 @@ static int aax_read_header(AVFormatContext *s) int64_t column_offset = 0; int ret, extradata_size; char *codec; + int64_t ret64; avio_skip(pb, 4); a->table_size = avio_rb32(pb) + 8LL; @@ -218,7 +219,10 @@ static int aax_read_header(AVFormatContext *s) } } - avio_seek(pb, a->strings_offset, SEEK_SET); + ret = ret64 = avio_seek(pb, a->strings_offset, SEEK_SET); + if (ret64 < 0) + goto fail; + ret = avio_read(pb, a->string_table, a->strings_size); if (ret != a->strings_size) { if (ret < 0) @@ -249,7 +253,10 @@ static int aax_read_header(AVFormatContext *s) goto fail; } - avio_seek(pb, data_offset, SEEK_SET); + ret = ret64 = avio_seek(pb, data_offset, SEEK_SET); + if (ret64 < 0) + goto fail; + if (type == COLUMN_TYPE_VLDATA) { int64_t start, size; @@ -281,8 +288,8 @@ static int aax_read_header(AVFormatContext *s) codec = a->string_table + a->name_offset; if (!strcmp(codec, "AAX")) { par->codec_id = AV_CODEC_ID_ADPCM_ADX; - avio_seek(pb, a->segments[0].start, SEEK_SET); - if (avio_rb16(pb) != 0x8000) { + ret64 = avio_seek(pb, a->segments[0].start, SEEK_SET); + if (ret64 < 0 || avio_rb16(pb) != 0x8000) { ret = AVERROR_INVALIDDATA; goto fail; } diff --git a/libavformat/adtsenc.c b/libavformat/adtsenc.c index 3595cb3bb23..c35a12a6283 100644 --- a/libavformat/adtsenc.c +++ b/libavformat/adtsenc.c @@ -51,9 +51,11 @@ static int adts_decode_extradata(AVFormatContext *s, ADTSContext *adts, const ui GetBitContext gb; PutBitContext pb; MPEG4AudioConfig m4ac; - int off; + int off, ret; - init_get_bits(&gb, buf, size * 8); + ret = init_get_bits8(&gb, buf, size); + if (ret < 0) + return ret; off = avpriv_mpeg4audio_get_config2(&m4ac, buf, size, 1, s); if (off < 0) return off; diff --git a/libavformat/aiffdec.c b/libavformat/aiffdec.c index dcaf1560b60..8b85fea809c 100644 --- a/libavformat/aiffdec.c +++ b/libavformat/aiffdec.c @@ -100,6 +100,9 @@ static int get_aiff_header(AVFormatContext *s, int size, int sample_rate; unsigned int num_frames; + if (size == INT_MAX) + return AVERROR_INVALIDDATA; + if (size & 1) size++; par->codec_type = AVMEDIA_TYPE_AUDIO; diff --git a/libavformat/asfdec_f.c b/libavformat/asfdec_f.c index f784e629963..c0265af20d2 100644 --- a/libavformat/asfdec_f.c +++ b/libavformat/asfdec_f.c @@ -522,7 +522,7 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size) tag1 = avio_rl32(pb); avio_skip(pb, 20); if (sizeX > 40) { - if (size < sizeX - 40) + if (size < sizeX - 40 || sizeX - 40 > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) return AVERROR_INVALIDDATA; st->codecpar->extradata_size = ffio_limit(pb, sizeX - 40); st->codecpar->extradata = av_mallocz(st->codecpar->extradata_size + diff --git a/libavformat/asfdec_o.c b/libavformat/asfdec_o.c index 7c085651801..f98ffc76faa 100644 --- a/libavformat/asfdec_o.c +++ b/libavformat/asfdec_o.c @@ -685,7 +685,7 @@ static int asf_read_properties(AVFormatContext *s, const GUIDParseTable *g) return 0; } -static int parse_video_info(AVIOContext *pb, AVStream *st) +static int parse_video_info(AVFormatContext *avfmt, AVIOContext *pb, AVStream *st) { uint16_t size_asf; // ASF-specific Format Data size uint32_t size_bmp; // BMP_HEADER-specific Format Data size @@ -700,19 +700,10 @@ static int parse_video_info(AVIOContext *pb, AVStream *st) st->codecpar->codec_id = ff_codec_get_id(ff_codec_bmp_tags, tag); size_bmp = FFMAX(size_asf, size_bmp); - if (size_bmp > BMP_HEADER_SIZE && - size_bmp < INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) { - int ret; - st->codecpar->extradata_size = size_bmp - BMP_HEADER_SIZE; - if (!(st->codecpar->extradata = av_malloc(st->codecpar->extradata_size + - AV_INPUT_BUFFER_PADDING_SIZE))) { - st->codecpar->extradata_size = 0; - return AVERROR(ENOMEM); - } - memset(st->codecpar->extradata + st->codecpar->extradata_size , 0, - AV_INPUT_BUFFER_PADDING_SIZE); - if ((ret = avio_read(pb, st->codecpar->extradata, - st->codecpar->extradata_size)) < 0) + if (size_bmp > BMP_HEADER_SIZE) { + int ret = ff_get_extradata(avfmt, st->codecpar, pb, size_bmp - BMP_HEADER_SIZE); + + if (ret < 0) return ret; } return 0; @@ -795,7 +786,7 @@ static int asf_read_stream_properties(AVFormatContext *s, const GUIDParseTable * break; case AVMEDIA_TYPE_VIDEO: asf_st->type = AVMEDIA_TYPE_VIDEO; - if ((ret = parse_video_info(pb, st)) < 0) + if ((ret = parse_video_info(s, pb, st)) < 0) return ret; break; default: diff --git a/libavformat/avidec.c b/libavformat/avidec.c index 59929afd498..542161e3600 100644 --- a/libavformat/avidec.c +++ b/libavformat/avidec.c @@ -165,7 +165,7 @@ static int get_riff(AVFormatContext *s, AVIOContext *pb) return 0; } -static int read_odml_index(AVFormatContext *s, int frame_num) +static int read_odml_index(AVFormatContext *s, int64_t frame_num) { AVIContext *avi = s->priv_data; AVIOContext *pb = s->pb; @@ -185,7 +185,7 @@ static int read_odml_index(AVFormatContext *s, int frame_num) av_log(s, AV_LOG_TRACE, "longs_per_entry:%d index_type:%d entries_in_use:%d " - "chunk_id:%X base:%16"PRIX64" frame_num:%d\n", + "chunk_id:%X base:%16"PRIX64" frame_num:%"PRId64"\n", longs_per_entry, index_type, entries_in_use, @@ -245,7 +245,7 @@ static int read_odml_index(AVFormatContext *s, int frame_num) avio_rl32(pb); /* size */ duration = avio_rl32(pb); - if (avio_feof(pb)) + if (avio_feof(pb) || offset > INT64_MAX - 8) return AVERROR_INVALIDDATA; pos = avio_tell(pb); @@ -1783,7 +1783,10 @@ static int avi_load_index(AVFormatContext *s) size = avio_rl32(pb); if (avio_feof(pb)) break; - next = avio_tell(pb) + size + (size & 1); + next = avio_tell(pb); + if (next < 0 || next > INT64_MAX - size - (size & 1)) + break; + next += size + (size & 1LL); if (tag == MKTAG('i', 'd', 'x', '1') && avi_read_idx1(s, size) >= 0) { diff --git a/libavformat/avio.c b/libavformat/avio.c index 8011482e769..cd9b5d9e7f9 100644 --- a/libavformat/avio.c +++ b/libavformat/avio.c @@ -316,8 +316,11 @@ int ffurl_open_whitelist(URLContext **puc, const char *filename, int flags, int ret = ffurl_alloc(puc, filename, flags, int_cb); if (ret < 0) return ret; - if (parent) - av_opt_copy(*puc, parent); + if (parent) { + ret = av_opt_copy(*puc, parent); + if (ret < 0) + goto fail; + } if (options && (ret = av_opt_set_dict(*puc, options)) < 0) goto fail; diff --git a/libavformat/bfi.c b/libavformat/bfi.c index 2dab986f3af..f9e0bb2e30d 100644 --- a/libavformat/bfi.c +++ b/libavformat/bfi.c @@ -73,6 +73,8 @@ static int bfi_read_header(AVFormatContext * s) return AVERROR_INVALIDDATA; bfi->nframes = avio_rl32(pb); + if (bfi->nframes < 0) + return AVERROR_INVALIDDATA; avio_rl32(pb); avio_rl32(pb); avio_rl32(pb); diff --git a/libavformat/cafdec.c b/libavformat/cafdec.c index 76bc4a46552..7f09a279779 100644 --- a/libavformat/cafdec.c +++ b/libavformat/cafdec.c @@ -79,7 +79,7 @@ static int read_desc_chunk(AVFormatContext *s) st->codecpar->channels = avio_rb32(pb); st->codecpar->bits_per_coded_sample = avio_rb32(pb); - if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0) + if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0 || st->codecpar->channels < 0) return AVERROR_INVALIDDATA; /* calculate bit rate for constant size packets */ diff --git a/libavformat/dsfdec.c b/libavformat/dsfdec.c index 1df163e114c..71dbf2f1125 100644 --- a/libavformat/dsfdec.c +++ b/libavformat/dsfdec.c @@ -129,7 +129,7 @@ static int dsf_read_header(AVFormatContext *s) return AVERROR_INVALIDDATA; } st->codecpar->block_align *= st->codecpar->channels; - st->codecpar->bit_rate = st->codecpar->channels * st->codecpar->sample_rate * 8LL; + st->codecpar->bit_rate = st->codecpar->channels * 8LL * st->codecpar->sample_rate; avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); avio_skip(pb, 4); diff --git a/libavformat/dsicin.c b/libavformat/dsicin.c index b18f43b9a0e..5a1f2565952 100644 --- a/libavformat/dsicin.c +++ b/libavformat/dsicin.c @@ -166,7 +166,8 @@ static int cin_read_packet(AVFormatContext *s, AVPacket *pkt) CinDemuxContext *cin = s->priv_data; AVIOContext *pb = s->pb; CinFrameHeader *hdr = &cin->frame_header; - int rc, palette_type, pkt_size; + int rc, palette_type; + int64_t pkt_size; int ret; if (cin->audio_buffer_size == 0) { @@ -182,7 +183,9 @@ static int cin_read_packet(AVFormatContext *s, AVPacket *pkt) } /* palette and video packet */ - pkt_size = (palette_type + 3) * hdr->pal_colors_count + hdr->video_frame_size; + pkt_size = (palette_type + 3LL) * hdr->pal_colors_count + hdr->video_frame_size; + if (pkt_size + 4 > INT_MAX) + return AVERROR_INVALIDDATA; pkt_size = ffio_limit(pb, pkt_size); diff --git a/libavformat/dxa.c b/libavformat/dxa.c index 909c5ba2baf..cd9c4898515 100644 --- a/libavformat/dxa.c +++ b/libavformat/dxa.c @@ -79,7 +79,7 @@ static int dxa_read_header(AVFormatContext *s) if(fps > 0){ den = 1000; num = fps; - }else if (fps < 0){ + }else if (fps < 0 && fps > INT_MIN){ den = 100000; num = -fps; }else{ diff --git a/libavformat/fifo.c b/libavformat/fifo.c index 17748e94ce5..a1dca1bc160 100644 --- a/libavformat/fifo.c +++ b/libavformat/fifo.c @@ -593,7 +593,7 @@ static int fifo_write_packet(AVFormatContext *avf, AVPacket *pkt) goto fail; } - if (fifo->timeshift && pkt->dts != AV_NOPTS_VALUE) + if (fifo->timeshift && pkt && pkt->dts != AV_NOPTS_VALUE) atomic_fetch_add_explicit(&fifo->queue_duration, next_duration(avf, pkt, &fifo->last_sent_dts), memory_order_relaxed); return ret; diff --git a/libavformat/ftp.c b/libavformat/ftp.c index caeea429209..69caa7670ce 100644 --- a/libavformat/ftp.c +++ b/libavformat/ftp.c @@ -972,6 +972,8 @@ static int ftp_parse_entry_mlsd(char *mlsd, AVIODirEntry *next) continue; } fact = av_strtok(fact, "=", &value); + if (!fact) + continue; if (!av_strcasecmp(fact, "type")) { if (!av_strcasecmp(value, "cdir") || !av_strcasecmp(value, "pdir")) return 1; diff --git a/libavformat/hlsenc.c b/libavformat/hlsenc.c index 7d97ce17891..e222b70ffa1 100644 --- a/libavformat/hlsenc.c +++ b/libavformat/hlsenc.c @@ -2672,14 +2672,13 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt) vs->packets_written++; if (oc->pb) { - int64_t keyframe_pre_pos = avio_tell(oc->pb); ret = ff_write_chained(oc, stream_index, pkt, s, 0); - if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) && - (pkt->flags & AV_PKT_FLAG_KEY) && !keyframe_pre_pos) { - av_write_frame(oc, NULL); /* Flush any buffered data */ - vs->video_keyframe_size = avio_tell(oc->pb) - keyframe_pre_pos; + vs->video_keyframe_size += pkt->size; + if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) && (pkt->flags & AV_PKT_FLAG_KEY)) { + vs->video_keyframe_size = avio_tell(oc->pb); + } else { + vs->video_keyframe_pos = avio_tell(vs->out); } - vs->video_keyframe_pos = vs->start_pos; if (hls->ignore_io_errors) ret = 0; } diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c index f33b7ba93a5..1377cef4b89 100644 --- a/libavformat/id3v2.c +++ b/libavformat/id3v2.c @@ -816,7 +816,7 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata, int isv34, unsync; unsigned tlen; char tag[5]; - int64_t next, end = avio_tell(pb) + len; + int64_t next, end = avio_tell(pb); int taghdrlen; const char *reason = NULL; AVIOContext pb_local; @@ -828,6 +828,10 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata, av_unused int uncompressed_buffer_size = 0; const char *comm_frame; + if (end > INT64_MAX - len - 10) + return; + end += len; + av_log(s, AV_LOG_DEBUG, "id3v2 ver:%d flags:%02X len:%d\n", version, flags, len); switch (version) { diff --git a/libavformat/iff.c b/libavformat/iff.c index b07b6c8b187..c15302d3c56 100644 --- a/libavformat/iff.c +++ b/libavformat/iff.c @@ -385,7 +385,7 @@ static int read_dst_frame(AVFormatContext *s, AVPacket *pkt) avio_skip(pb, 1); pkt->flags |= AV_PKT_FLAG_KEY; pkt->stream_index = 0; - pkt->duration = 588 * s->streams[0]->codecpar->sample_rate / 44100; + pkt->duration = 588LL * s->streams[0]->codecpar->sample_rate / 44100; pkt->pos = chunk_pos; chunk_pos = avio_tell(pb); diff --git a/libavformat/jacosubdec.c b/libavformat/jacosubdec.c index 9c6640eef83..2ccbf4c9dec 100644 --- a/libavformat/jacosubdec.c +++ b/libavformat/jacosubdec.c @@ -141,6 +141,9 @@ static int get_shift(int timeres, const char *buf) int n = sscanf(buf, "%d"SSEP"%d"SSEP"%d"SSEP"%d", &a, &b, &c, &d); #undef SSEP + if (a == INT_MIN) + return 0; + if (*buf == '-' || a < 0) { sign = -1; a = FFABS(a); diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index 65756ae06d4..fb1849f9c31 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -802,20 +802,22 @@ static int matroska_read_close(AVFormatContext *s); static int matroska_reset_status(MatroskaDemuxContext *matroska, uint32_t id, int64_t position) { + int64_t err = 0; if (position >= 0) { - int64_t err = avio_seek(matroska->ctx->pb, position, SEEK_SET); - if (err < 0) - return err; - } + err = avio_seek(matroska->ctx->pb, position, SEEK_SET); + if (err > 0) + err = 0; + } else + position = avio_tell(matroska->ctx->pb); matroska->current_id = id; matroska->num_levels = 1; matroska->unknown_count = 0; - matroska->resync_pos = avio_tell(matroska->ctx->pb); + matroska->resync_pos = position; if (id) matroska->resync_pos -= (av_log2(id) + 7) / 8; - return 0; + return err; } static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos) @@ -1871,6 +1873,7 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, uint32_t saved_id = matroska->current_id; int64_t before_pos = avio_tell(matroska->ctx->pb); int ret = 0; + int ret2; /* seek */ if (avio_seek(matroska->ctx->pb, pos, SEEK_SET) == pos) { @@ -1895,7 +1898,9 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, } /* Seek back - notice that in all instances where this is used * it is safe to set the level to 1. */ - matroska_reset_status(matroska, saved_id, before_pos); + ret2 = matroska_reset_status(matroska, saved_id, before_pos); + if (ret >= 0) + ret = ret2; return ret; } @@ -2807,8 +2812,9 @@ static int matroska_parse_tracks(AVFormatContext *s) st->need_parsing = AVSTREAM_PARSE_HEADERS; if (track->default_duration) { + int div = track->default_duration <= INT64_MAX ? 1 : 2; av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den, - 1000000000, track->default_duration, 30000); + 1000000000 / div, track->default_duration / div, 30000); #if FF_API_R_FRAME_RATE if ( st->avg_frame_rate.num < st->avg_frame_rate.den * 1000LL && st->avg_frame_rate.num > st->avg_frame_rate.den * 5LL) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index bbf231f2a46..b4284a87785 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -1768,6 +1768,7 @@ static int mkv_write_attachments(AVFormatContext *s) put_ebml_string(dyn_cp, MATROSKA_ID_FILEDESC, t->value); if (!(t = av_dict_get(st->metadata, "filename", NULL, 0))) { av_log(s, AV_LOG_ERROR, "Attachment stream %d has no filename tag.\n", i); + ffio_free_dyn_buf(&dyn_cp); return AVERROR(EINVAL); } put_ebml_string(dyn_cp, MATROSKA_ID_FILENAME, t->value); diff --git a/libavformat/mccdec.c b/libavformat/mccdec.c index 2a0b7905a01..627471a1fe3 100644 --- a/libavformat/mccdec.c +++ b/libavformat/mccdec.c @@ -127,8 +127,7 @@ static int mcc_read_header(AVFormatContext *s) num = strtol(rate_str, &df, 10); den = 1; if (df && !av_strncasecmp(df, "DF", 2)) { - num *= 1000; - den = 1001; + av_reduce(&num, &den, num * 1000LL, 1001, INT_MAX); } } diff --git a/libavformat/moflex.c b/libavformat/moflex.c index 41335ada789..0706f88e641 100644 --- a/libavformat/moflex.c +++ b/libavformat/moflex.c @@ -172,7 +172,6 @@ static int moflex_read_sync(AVFormatContext *s) unsigned type, ssize, codec_id = 0; unsigned codec_type, width = 0, height = 0, sample_rate = 0, channels = 0; int stream_index = -1; - int format; AVRational fps; read_var_byte(s, &type); @@ -213,7 +212,6 @@ static int moflex_read_sync(AVFormatContext *s) fps.den = avio_rb16(pb); width = avio_rb16(pb); height = avio_rb16(pb); - format = AV_PIX_FMT_YUV420P; avio_skip(pb, type == 3 ? 3 : 2); break; case 4: @@ -235,7 +233,6 @@ static int moflex_read_sync(AVFormatContext *s) st->codecpar->height = height; st->codecpar->sample_rate= sample_rate; st->codecpar->channels = channels; - st->codecpar->format = format; st->priv_data = av_packet_alloc(); if (!st->priv_data) return AVERROR(ENOMEM); diff --git a/libavformat/mov.c b/libavformat/mov.c index 38a70589be2..4af796ee31c 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -294,6 +294,8 @@ static int mov_metadata_hmmt(MOVContext *c, AVIOContext *pb, unsigned len) int moment_time = avio_rb32(pb); avpriv_new_chapter(c->fc, i, av_make_q(1, 1000), moment_time, AV_NOPTS_VALUE, NULL); } + if (avio_feof(pb)) + return AVERROR_INVALIDDATA; return 0; } @@ -3835,7 +3837,11 @@ static void mov_build_index(MOVContext *mov, AVStream *st) if ((empty_duration || start_time) && mov->time_scale > 0) { if (empty_duration) empty_duration = av_rescale(empty_duration, sc->time_scale, mov->time_scale); - sc->time_offset = start_time - empty_duration; + + if (av_sat_sub64(start_time, empty_duration) != start_time - (uint64_t)empty_duration) + av_log(mov->fc, AV_LOG_WARNING, "start_time - empty_duration is not representable\n"); + + sc->time_offset = start_time - (uint64_t)empty_duration; sc->min_corrected_pts = start_time; if (!mov->advanced_editlist) current_dts = -sc->time_offset; @@ -4700,6 +4706,8 @@ static int mov_read_chap(MOVContext *c, AVIOContext *pb, MOVAtom atom) for (i = 0; i < num && !pb->eof_reached; i++) c->chapter_tracks[i] = avio_rb32(pb); + c->nb_chapter_tracks = i; + return 0; } @@ -4984,6 +4992,8 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) "size %u, distance %d, keyframe %d\n", st->index, index_entry_pos, offset, dts, sample_size, distance, keyframe); distance++; + if (av_sat_add64(dts, sample_duration) != dts + (uint64_t)sample_duration) + return AVERROR_INVALIDDATA; dts += sample_duration; offset += sample_size; sc->data_size += sample_size; @@ -5124,7 +5134,9 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom) if (frag_stream_info) frag_stream_info->sidx_pts = timestamp; - if (av_sat_add64(offset, size) != offset + size) + if (av_sat_add64(offset, size) != offset + (uint64_t)size || + av_sat_add64(pts, duration) != pts + (uint64_t)duration + ) return AVERROR_INVALIDDATA; offset += size; pts += duration; @@ -5136,7 +5148,7 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom) // See if the remaining bytes are just an mfra which we can ignore. is_complete = offset == stream_size; - if (!is_complete && (pb->seekable & AVIO_SEEKABLE_NORMAL)) { + if (!is_complete && (pb->seekable & AVIO_SEEKABLE_NORMAL) && stream_size > 0 ) { int64_t ret; int64_t original_pos = avio_tell(pb); if (!c->have_read_mfra_size) { @@ -5147,7 +5159,7 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom) if ((ret = avio_seek(pb, original_pos, SEEK_SET)) < 0) return ret; } - if (offset + c->mfra_size == stream_size) + if (offset == stream_size - c->mfra_size) is_complete = 1; } @@ -5464,7 +5476,7 @@ static int mov_read_mdcv(MOVContext *c, AVIOContext *pb, MOVAtom atom) sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data; - if (atom.size < 24) { + if (atom.size < 24 || sc->mastering) { av_log(c->fc, AV_LOG_ERROR, "Invalid Mastering Display Color Volume box\n"); return AVERROR_INVALIDDATA; } @@ -5512,6 +5524,11 @@ static int mov_read_coll(MOVContext *c, AVIOContext *pb, MOVAtom atom) } avio_skip(pb, 3); /* flags */ + if (sc->coll){ + av_log(c->fc, AV_LOG_WARNING, "Ignoring duplicate COLL\n"); + return 0; + } + sc->coll = av_content_light_metadata_alloc(&sc->coll_size); if (!sc->coll) return AVERROR(ENOMEM); @@ -5536,6 +5553,11 @@ static int mov_read_clli(MOVContext *c, AVIOContext *pb, MOVAtom atom) return AVERROR_INVALIDDATA; } + if (sc->coll){ + av_log(c->fc, AV_LOG_WARNING, "Ignoring duplicate CLLI/COLL\n"); + return 0; + } + sc->coll = av_content_light_metadata_alloc(&sc->coll_size); if (!sc->coll) return AVERROR(ENOMEM); diff --git a/libavformat/movenc.c b/libavformat/movenc.c index bade57dcea9..2cd5773dc5e 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -797,6 +797,7 @@ static int mov_write_dfla_tag(AVIOContext *pb, MOVTrack *track) static int mov_write_dops_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track) { int64_t pos = avio_tell(pb); + int channels, channel_map; avio_wb32(pb, 0); ffio_wfourcc(pb, "dOps"); avio_w8(pb, 0); /* Version */ @@ -807,12 +808,22 @@ static int mov_write_dops_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra /* extradata contains an Ogg OpusHead, other than byte-ordering and OpusHead's preceeding magic/version, OpusSpecificBox is currently identical. */ - avio_w8(pb, AV_RB8(track->par->extradata + 9)); /* OuputChannelCount */ + channels = AV_RB8(track->par->extradata + 9); + channel_map = AV_RB8(track->par->extradata + 18); + + avio_w8(pb, channels); /* OuputChannelCount */ avio_wb16(pb, AV_RL16(track->par->extradata + 10)); /* PreSkip */ avio_wb32(pb, AV_RL32(track->par->extradata + 12)); /* InputSampleRate */ avio_wb16(pb, AV_RL16(track->par->extradata + 16)); /* OutputGain */ + avio_w8(pb, channel_map); /* ChannelMappingFamily */ /* Write the rest of the header out without byte-swapping. */ - avio_write(pb, track->par->extradata + 18, track->par->extradata_size - 18); + if (channel_map) { + if (track->par->extradata_size < 21 + channels) { + av_log(s, AV_LOG_ERROR, "invalid extradata size\n"); + return AVERROR_INVALIDDATA; + } + avio_write(pb, track->par->extradata + 19, 2 + channels); /* ChannelMappingTable */ + } return update_size(pb, pos); } @@ -2166,11 +2177,13 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex avio_wb16(pb, 0x18); /* Reserved */ if (track->mode == MODE_MOV && track->par->format == AV_PIX_FMT_PAL8) { - int pal_size = 1 << track->par->bits_per_coded_sample; - int i; + int pal_size, i; avio_wb16(pb, 0); /* Color table ID */ avio_wb32(pb, 0); /* Color table seed */ avio_wb16(pb, 0x8000); /* Color table flags */ + if (track->par->bits_per_coded_sample < 0 || track->par->bits_per_coded_sample > 8) + return AVERROR(EINVAL); + pal_size = 1 << track->par->bits_per_coded_sample; avio_wb16(pb, pal_size - 1); /* Color table size (zero-relative) */ for (i = 0; i < pal_size; i++) { uint32_t rgb = track->palette[i]; diff --git a/libavformat/mpc8.c b/libavformat/mpc8.c index 88c55e3d22f..c3d7e115a75 100644 --- a/libavformat/mpc8.c +++ b/libavformat/mpc8.c @@ -177,7 +177,13 @@ static void mpc8_parse_seektable(AVFormatContext *s, int64_t off) } seekd = get_bits(&gb, 4); for(i = 0; i < 2; i++){ - pos = gb_get_v(&gb) + c->header_pos; + pos = gb_get_v(&gb); + if (av_sat_add64(pos, c->header_pos) != pos + (uint64_t)c->header_pos) { + av_free(buf); + return; + } + + pos += c->header_pos; ppos[1 - i] = pos; av_add_index_entry(s->streams[0], pos, i, 0, 0, AVINDEX_KEYFRAME); } @@ -205,8 +211,11 @@ static void mpc8_handle_chunk(AVFormatContext *s, int tag, int64_t chunk_pos, in switch(tag){ case TAG_SEEKTBLOFF: - pos = avio_tell(pb) + size; + pos = avio_tell(pb); off = ffio_read_varlen(pb); + if (pos > INT64_MAX - size || off < 0 || off > INT64_MAX - chunk_pos) + return; + pos += size; mpc8_parse_seektable(s, chunk_pos + off); avio_seek(pb, pos, SEEK_SET); break; diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index 6e0d9d74963..a3033134f71 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -2026,6 +2026,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type return AVERROR_INVALIDDATA; if (channel_config_code <= 0x8) { st->codecpar->extradata[9] = channels = channel_config_code ? channel_config_code : 2; + AV_WL32(&st->codecpar->extradata[12], 48000); st->codecpar->extradata[18] = channel_config_code ? (channels > 2) : /* Dual Mono */ 255; st->codecpar->extradata[19] = opus_stream_cnt[channel_config_code]; st->codecpar->extradata[20] = opus_coupled_stream_cnt[channel_config_code]; @@ -2861,8 +2862,8 @@ static int mpegts_resync(AVFormatContext *s, int seekback, const uint8_t *curren int64_t back = FFMIN(seekback, pos); //Special case for files like 01c56b0dc1.ts - if (current_packet[0] == 0x80 && current_packet[12] == 0x47) { - avio_seek(pb, 12 - back, SEEK_CUR); + if (current_packet[0] == 0x80 && current_packet[12] == 0x47 && pos >= TS_PACKET_SIZE) { + avio_seek(pb, 12 - TS_PACKET_SIZE, SEEK_CUR); return 0; } diff --git a/libavformat/msf.c b/libavformat/msf.c index 155f488e440..1eaed54357c 100644 --- a/libavformat/msf.c +++ b/libavformat/msf.c @@ -70,6 +70,8 @@ static int msf_read_header(AVFormatContext *s) case 4: case 5: case 6: st->codecpar->block_align = (codec == 4 ? 96 : codec == 5 ? 152 : 192) * st->codecpar->channels; + if (st->codecpar->channels > UINT16_MAX / 2048) + return AVERROR_INVALIDDATA; ret = ff_alloc_extradata(st->codecpar, 14); if (ret < 0) return ret; diff --git a/libavformat/mvdec.c b/libavformat/mvdec.c index 045c66ac3c1..ab7bc5a328a 100644 --- a/libavformat/mvdec.c +++ b/libavformat/mvdec.c @@ -156,7 +156,10 @@ static int parse_audio_var(AVFormatContext *avctx, AVStream *st, } else if (!strcmp(name, "NUM_CHANNELS")) { return set_channels(avctx, st, var_read_int(pb, size)); } else if (!strcmp(name, "SAMPLE_RATE")) { - st->codecpar->sample_rate = var_read_int(pb, size); + int sample_rate = var_read_int(pb, size); + if (sample_rate <= 0) + return AVERROR_INVALIDDATA; + st->codecpar->sample_rate = sample_rate; avpriv_set_pts_info(st, 33, 1, st->codecpar->sample_rate); } else if (!strcmp(name, "SAMPLE_WIDTH")) { uint64_t bpc = var_read_int(pb, size) * (uint64_t)8; diff --git a/libavformat/mvi.c b/libavformat/mvi.c index cfdbe5d273b..d005001f5a4 100644 --- a/libavformat/mvi.c +++ b/libavformat/mvi.c @@ -32,7 +32,6 @@ typedef struct MviDemuxContext { unsigned int (*get_int)(AVIOContext *); - uint32_t audio_data_size; uint64_t audio_size_counter; uint64_t audio_frame_size; int audio_size_left; @@ -46,6 +45,7 @@ static int read_header(AVFormatContext *s) AVStream *ast, *vst; unsigned int version, frames_count, msecs_per_frame, player_version; int ret; + int audio_data_size; ast = avformat_new_stream(s, NULL); if (!ast) @@ -67,13 +67,13 @@ static int read_header(AVFormatContext *s) vst->codecpar->height = avio_rl16(pb); avio_r8(pb); ast->codecpar->sample_rate = avio_rl16(pb); - mvi->audio_data_size = avio_rl32(pb); + audio_data_size = avio_rl32(pb); avio_r8(pb); player_version = avio_rl32(pb); avio_rl16(pb); avio_r8(pb); - if (frames_count == 0 || mvi->audio_data_size == 0) + if (frames_count == 0 || audio_data_size <= 0) return AVERROR_INVALIDDATA; if (version != 7 || player_version > 213) { @@ -96,16 +96,16 @@ static int read_header(AVFormatContext *s) mvi->get_int = (vst->codecpar->width * (int64_t)vst->codecpar->height < (1 << 16)) ? avio_rl16 : avio_rl24; - mvi->audio_frame_size = ((uint64_t)mvi->audio_data_size << MVI_FRAC_BITS) / frames_count; + mvi->audio_frame_size = ((uint64_t)audio_data_size << MVI_FRAC_BITS) / frames_count; if (mvi->audio_frame_size <= 1 << MVI_FRAC_BITS - 1) { av_log(s, AV_LOG_ERROR, - "Invalid audio_data_size (%"PRIu32") or frames_count (%u)\n", - mvi->audio_data_size, frames_count); + "Invalid audio_data_size (%d) or frames_count (%u)\n", + audio_data_size, frames_count); return AVERROR_INVALIDDATA; } mvi->audio_size_counter = (ast->codecpar->sample_rate * 830 / mvi->audio_frame_size - 1) * mvi->audio_frame_size; - mvi->audio_size_left = mvi->audio_data_size; + mvi->audio_size_left = audio_data_size; return 0; } diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c index 1f372affcb8..50174fcd5fe 100644 --- a/libavformat/mxfdec.c +++ b/libavformat/mxfdec.c @@ -565,6 +565,10 @@ static int mxf_get_d10_aes3_packet(AVIOContext *pb, AVStream *st, AVPacket *pkt, data_ptr = pkt->data; end_ptr = pkt->data + length; buf_ptr = pkt->data + 4; /* skip SMPTE 331M header */ + + if (st->codecpar->channels > 8) + return AVERROR_INVALIDDATA; + for (; end_ptr - buf_ptr >= st->codecpar->channels * 4; ) { for (i = 0; i < st->codecpar->channels; i++) { uint32_t sample = bytestream_get_le32(&buf_ptr); @@ -624,7 +628,7 @@ static int mxf_decrypt_triplet(AVFormatContext *s, AVPacket *pkt, KLVPacket *klv return AVERROR_INVALIDDATA; // enc. code size = klv_decode_ber_length(pb); - if (size < 32 || size - 32 < orig_size) + if (size < 32 || size - 32 < orig_size || (int)orig_size != orig_size) return AVERROR_INVALIDDATA; avio_read(pb, ivec, 16); avio_read(pb, tmpbuf, 16); @@ -2903,7 +2907,7 @@ static int mxf_read_local_tags(MXFContext *mxf, KLVPacket *klv, MXFMetadataReadF meta = NULL; ctx = mxf; } - while (avio_tell(pb) + 4 < klv_end && !avio_feof(pb)) { + while (avio_tell(pb) + 4ULL < klv_end && !avio_feof(pb)) { int ret; int tag = avio_rb16(pb); int size = avio_rb16(pb); /* KLV specified by 0x53 */ diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c index fbecf71328f..58a74612a4a 100644 --- a/libavformat/nutdec.c +++ b/libavformat/nutdec.c @@ -286,6 +286,11 @@ static int decode_main_header(NUTContext *nut) ret = AVERROR_INVALIDDATA; goto fail; } + if (tmp_size < 0 || tmp_size > INT_MAX - count) { + av_log(s, AV_LOG_ERROR, "illegal size\n"); + ret = AVERROR_INVALIDDATA; + goto fail; + } for (j = 0; j < count; j++, i++) { if (i == 'N') { diff --git a/libavformat/pp_bnk.c b/libavformat/pp_bnk.c index 07eeca3cd5b..5ffe733b18a 100644 --- a/libavformat/pp_bnk.c +++ b/libavformat/pp_bnk.c @@ -223,7 +223,7 @@ static int pp_bnk_read_header(AVFormatContext *s) par->bits_per_coded_sample = 4; par->bits_per_raw_sample = 16; par->block_align = 1; - par->bit_rate = par->sample_rate * par->bits_per_coded_sample * par->channels; + par->bit_rate = par->sample_rate * (int64_t)par->bits_per_coded_sample * par->channels; avpriv_set_pts_info(st, 64, 1, par->sample_rate); st->start_time = 0; diff --git a/libavformat/qcp.c b/libavformat/qcp.c index 168030dc161..4478875f2d8 100644 --- a/libavformat/qcp.c +++ b/libavformat/qcp.c @@ -93,7 +93,8 @@ static int qcp_read_header(AVFormatContext *s) QCPContext *c = s->priv_data; AVStream *st = avformat_new_stream(s, NULL); uint8_t buf[16]; - int i, nb_rates; + int i; + unsigned nb_rates; if (!st) return AVERROR(ENOMEM); diff --git a/libavformat/realtextdec.c b/libavformat/realtextdec.c index f534774420a..368a741240b 100644 --- a/libavformat/realtextdec.c +++ b/libavformat/realtextdec.c @@ -111,10 +111,11 @@ static int realtext_read_header(AVFormatContext *s) if (!merge) { const char *begin = ff_smil_get_attr_ptr(buf.str, "begin"); const char *end = ff_smil_get_attr_ptr(buf.str, "end"); + int64_t endi = end ? read_ts(end) : 0; sub->pos = pos; sub->pts = begin ? read_ts(begin) : 0; - sub->duration = end ? (read_ts(end) - sub->pts) : duration; + sub->duration = (end && endi > sub->pts && endi - (uint64_t)sub->pts <= INT64_MAX) ? endi - sub->pts : duration; } } av_bprint_clear(&buf); diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c index fc3bff48590..97378703d10 100644 --- a/libavformat/rmdec.c +++ b/libavformat/rmdec.c @@ -128,6 +128,10 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb, uint32_t version; int ret; + // Duplicate tags + if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) + return AVERROR_INVALIDDATA; + /* ra type header */ version = avio_rb16(pb); /* version */ if (version == 3) { @@ -269,9 +273,9 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb, case DEINT_ID_INT4: if (ast->coded_framesize > ast->audio_framesize || sub_packet_h <= 1 || - ast->coded_framesize * sub_packet_h > (2 + (sub_packet_h & 1)) * ast->audio_framesize) + ast->coded_framesize * (uint64_t)sub_packet_h > (2 + (sub_packet_h & 1)) * ast->audio_framesize) return AVERROR_INVALIDDATA; - if (ast->coded_framesize * sub_packet_h != 2*ast->audio_framesize) { + if (ast->coded_framesize * (uint64_t)sub_packet_h != 2*ast->audio_framesize) { avpriv_request_sample(s, "mismatching interleaver parameters"); return AVERROR_INVALIDDATA; } @@ -1012,8 +1016,8 @@ static int rm_read_packet(AVFormatContext *s, AVPacket *pkt) { RMDemuxContext *rm = s->priv_data; AVStream *st = NULL; // init to silence compiler warning - int i, len, res, seq = 1; - int64_t timestamp, pos; + int i, res, seq = 1; + int64_t timestamp, pos, len; int flags; for (;;) { @@ -1032,7 +1036,9 @@ static int rm_read_packet(AVFormatContext *s, AVPacket *pkt) ast = st->priv_data; timestamp = AV_NOPTS_VALUE; len = !ast->audio_framesize ? RAW_PACKET_SIZE : - ast->coded_framesize * ast->sub_packet_h / 2; + ast->coded_framesize * (int64_t)ast->sub_packet_h / 2; + if (len > INT_MAX) + return AVERROR_INVALIDDATA; flags = (seq++ == 1) ? 2 : 0; pos = avio_tell(s->pb); } else { diff --git a/libavformat/rpl.c b/libavformat/rpl.c index 0f00c03a52e..ad3659e9368 100644 --- a/libavformat/rpl.c +++ b/libavformat/rpl.c @@ -103,7 +103,7 @@ static AVRational read_fps(const char* line, int* error) // Truncate any numerator too large to fit into an int64_t if (num > (INT64_MAX - 9) / 10 || den > INT64_MAX / 10) break; - num = 10 * num + *line - '0'; + num = 10 * num + (*line - '0'); den *= 10; } if (!num) @@ -207,8 +207,10 @@ static int rpl_read_header(AVFormatContext *s) ast->codecpar->bits_per_coded_sample = 4; ast->codecpar->bit_rate = ast->codecpar->sample_rate * - ast->codecpar->bits_per_coded_sample * - ast->codecpar->channels; + (int64_t)ast->codecpar->channels; + if (ast->codecpar->bit_rate > INT64_MAX / ast->codecpar->bits_per_coded_sample) + return AVERROR_INVALIDDATA; + ast->codecpar->bit_rate *= ast->codecpar->bits_per_coded_sample; ast->codecpar->codec_id = AV_CODEC_ID_NONE; switch (audio_format) { @@ -334,7 +336,7 @@ static int rpl_read_packet(AVFormatContext *s, AVPacket *pkt) avio_skip(pb, 4); /* flags */ frame_size = avio_rl32(pb); - if (avio_seek(pb, -8, SEEK_CUR) < 0) + if (avio_feof(pb) || avio_seek(pb, -8, SEEK_CUR) < 0 || !frame_size) return AVERROR(EIO); ret = av_get_packet(pb, pkt, frame_size); diff --git a/libavformat/sbgdec.c b/libavformat/sbgdec.c index 83016d0c134..36cfff20fc4 100644 --- a/libavformat/sbgdec.c +++ b/libavformat/sbgdec.c @@ -935,6 +935,9 @@ static int expand_timestamps(void *log, struct sbg_script *s) } if (s->start_ts == AV_NOPTS_VALUE) s->start_ts = (s->opt_start_at_first && s->tseq) ? s->tseq[0].ts.t : now; + if (s->start_ts > INT64_MAX - s->opt_duration) + return AVERROR_INVALIDDATA; + s->end_ts = s->opt_duration ? s->start_ts + s->opt_duration : AV_NOPTS_VALUE; /* may be overridden later by -E option */ cur_ts = now; @@ -961,6 +964,9 @@ static int expand_tseq(void *log, struct sbg_script *s, int *nb_ev_max, tseq->name_len, tseq->name); return AVERROR(EINVAL); } + if (t0 + (uint64_t)tseq->ts.t != av_sat_add64(t0, tseq->ts.t)) + return AVERROR(EINVAL); + t0 += tseq->ts.t; for (i = 0; i < s->nb_def; i++) { if (s->def[i].name_len == tseq->name_len && @@ -1291,6 +1297,10 @@ static int generate_intervals(void *log, struct sbg_script *s, int sample_rate, ev1 = &s->events[i]; ev2 = &s->events[(i + 1) % s->nb_events]; ev1->ts_int = ev1->ts; + + if (!ev1->fade.slide && ev1 >= ev2 && ev2->ts > INT64_MAX - period) + return AVERROR_INVALIDDATA; + ev1->ts_trans = ev1->fade.slide ? ev1->ts : ev2->ts + (ev1 < ev2 ? 0 : period); } diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c index 05c07cd8524..6368ec74f9e 100644 --- a/libavformat/subtitles.c +++ b/libavformat/subtitles.c @@ -206,7 +206,7 @@ void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q) q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos : cmp_pkt_sub_pos_ts); for (i = 0; i < q->nb_subs; i++) - if (q->subs[i]->duration < 0 && i < q->nb_subs - 1) + if (q->subs[i]->duration < 0 && i < q->nb_subs - 1 && q->subs[i + 1]->pts - (uint64_t)q->subs[i]->pts <= INT64_MAX) q->subs[i]->duration = q->subs[i + 1]->pts - q->subs[i]->pts; if (!q->keep_duplicates) diff --git a/libavformat/tta.c b/libavformat/tta.c index 07faa82eb38..6aa72b5d1d5 100644 --- a/libavformat/tta.c +++ b/libavformat/tta.c @@ -119,6 +119,8 @@ static int tta_read_header(AVFormatContext *s) for (i = 0; i < c->totalframes; i++) { uint32_t size = avio_rl32(s->pb); int r; + if (avio_feof(s->pb)) + return AVERROR_INVALIDDATA; if ((r = av_add_index_entry(st, framepos, i * (int64_t)c->frame_size, size, 0, AVINDEX_KEYFRAME)) < 0) return r; diff --git a/libavformat/utils.c b/libavformat/utils.c index 1384b567714..75e5350a277 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -1208,7 +1208,9 @@ static void update_initial_durations(AVFormatContext *s, AVStream *st, (pktl->pkt.dts == AV_NOPTS_VALUE || pktl->pkt.dts == st->first_dts || pktl->pkt.dts == RELATIVE_TS_BASE) && - !pktl->pkt.duration) { + !pktl->pkt.duration && + av_sat_add64(cur_dts, duration) == cur_dts + (uint64_t)duration + ) { pktl->pkt.dts = cur_dts; if (!st->internal->avctx->has_b_frames) pktl->pkt.pts = cur_dts; @@ -3912,8 +3914,10 @@ FF_ENABLE_DEPRECATION_WARNINGS av_packet_unref(pkt1); break; } - if (pkt->duration) { - if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time) { + if (pkt->duration > 0) { + if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time + && (uint64_t)pkt->pts - st->start_time < INT64_MAX + ) { st->internal->info->codec_info_duration = FFMIN(pkt->pts - st->start_time, st->internal->info->codec_info_duration + pkt->duration); } else st->internal->info->codec_info_duration += pkt->duration; @@ -4059,7 +4063,7 @@ FF_ENABLE_DEPRECATION_WARNINGS if (!st->r_frame_rate.num) { if ( avctx->time_base.den * (int64_t) st->time_base.num - <= avctx->time_base.num * avctx->ticks_per_frame * (uint64_t) st->time_base.den) { + <= avctx->time_base.num * (uint64_t)avctx->ticks_per_frame * st->time_base.den) { av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, avctx->time_base.den, (int64_t)avctx->time_base.num * avctx->ticks_per_frame, INT_MAX); } else { diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c index 8214ab8498d..b11c6091bc3 100644 --- a/libavformat/wavdec.c +++ b/libavformat/wavdec.c @@ -498,6 +498,8 @@ static int wav_read_header(AVFormatContext *s) wav->smv_data_ofs = avio_tell(pb) + (size - 5) * 3; avio_rl24(pb); wav->smv_block_size = avio_rl24(pb); + if (!wav->smv_block_size) + return AVERROR_INVALIDDATA; avpriv_set_pts_info(vst, 32, 1, avio_rl24(pb)); vst->duration = avio_rl24(pb); avio_rl24(pb); @@ -718,7 +720,7 @@ static int wav_read_packet(AVFormatContext *s, AVPacket *pkt) if (wav->smv_last_stream) { uint64_t old_pos = avio_tell(s->pb); uint64_t new_pos = wav->smv_data_ofs + - wav->smv_block * wav->smv_block_size; + wav->smv_block * (int64_t)wav->smv_block_size; if (avio_seek(s->pb, new_pos, SEEK_SET) < 0) { ret = AVERROR_EOF; goto smv_out; diff --git a/libavformat/wtvdec.c b/libavformat/wtvdec.c index 4b3b7fb407b..1d5ba03befa 100644 --- a/libavformat/wtvdec.c +++ b/libavformat/wtvdec.c @@ -660,6 +660,8 @@ static AVStream * parse_media_type(AVFormatContext *s, AVStream *st, int sid, avio_skip(pb, size - 32); ff_get_guid(pb, &actual_subtype); ff_get_guid(pb, &actual_formattype); + if (avio_feof(pb)) + return NULL; avio_seek(pb, -size, SEEK_CUR); st = parse_media_type(s, st, sid, mediatype, actual_subtype, actual_formattype, size - 32); @@ -817,7 +819,7 @@ static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_p avio_skip(pb, 12); ff_get_guid(pb, &formattype); size = avio_rl32(pb); - if (size < 0 || size > INT_MAX - 92) + if (size < 0 || size > INT_MAX - 92 - consumed) return AVERROR_INVALIDDATA; parse_media_type(s, 0, sid, mediatype, subtype, formattype, size); consumed += 92 + size; @@ -833,7 +835,7 @@ static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_p avio_skip(pb, 12); ff_get_guid(pb, &formattype); size = avio_rl32(pb); - if (size < 0 || size > INT_MAX - 76) + if (size < 0 || size > INT_MAX - 76 - consumed) return AVERROR_INVALIDDATA; parse_media_type(s, s->streams[stream_index], sid, mediatype, subtype, formattype, size); consumed += 76 + size; diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 52f6b9a3bf6..6bd0f07a623 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -291,6 +291,12 @@ int av_cpu_count(void) DWORD_PTR proc_aff, sys_aff; if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff)) nb_cpus = av_popcount64(proc_aff); +#elif HAVE_SYSCTL && defined(HW_NCPUONLINE) + int mib[2] = { CTL_HW, HW_NCPUONLINE }; + size_t len = sizeof(nb_cpus); + + if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) + nb_cpus = 0; #elif HAVE_SYSCTL && defined(HW_NCPU) int mib[2] = { CTL_HW, HW_NCPU }; size_t len = sizeof(nb_cpus); diff --git a/libavutil/mathematics.h b/libavutil/mathematics.h index 54901800ba6..64d4137a602 100644 --- a/libavutil/mathematics.h +++ b/libavutil/mathematics.h @@ -134,6 +134,7 @@ int64_t av_rescale(int64_t a, int64_t b, int64_t c) av_const; * * The operation is mathematically equivalent to `a * b / c`, but writing that * directly can overflow, and does not support different rounding methods. + * If the result is not representable then INT64_MIN is returned. * * @see av_rescale(), av_rescale_q(), av_rescale_q_rnd() */ diff --git a/libswscale/alphablend.c b/libswscale/alphablend.c index b5686599c00..b5967c889bc 100644 --- a/libswscale/alphablend.c +++ b/libswscale/alphablend.c @@ -26,7 +26,7 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); int nb_components = desc->nb_components; - int plane, x, y; + int plane, x, ysrc; int plane_count = isGray(c->srcFormat) ? 1 : 3; int sixteen_bits = desc->comp[0].depth >= 9; unsigned off = 1<<(desc->comp[0].depth - 1); @@ -50,14 +50,15 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], int w = plane ? c->chrSrcW : c->srcW; int x_subsample = plane ? desc->log2_chroma_w: 0; int y_subsample = plane ? desc->log2_chroma_h: 0; - for (y = srcSliceY >> y_subsample; y < AV_CEIL_RSHIFT(srcSliceH, y_subsample); y++) { + for (ysrc = 0; ysrc < AV_CEIL_RSHIFT(srcSliceH, y_subsample); ysrc++) { + int y = ysrc + (srcSliceY >> y_subsample); if (x_subsample || y_subsample) { int alpha; unsigned u; if (sixteen_bits) { ptrdiff_t alpha_step = srcStride[plane_count] >> 1; - const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * y); - const uint16_t *a = (const uint16_t *)(src[plane_count] + (srcStride[plane_count] * y << y_subsample)); + const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * ysrc); + const uint16_t *a = (const uint16_t *)(src[plane_count] + (srcStride[plane_count] * ysrc << y_subsample)); uint16_t *d = ( uint16_t *)(dst[plane ] + dstStride[plane ] * y); if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) { for (x = 0; x < w; x++) { @@ -82,8 +83,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], } } else { ptrdiff_t alpha_step = srcStride[plane_count]; - const uint8_t *s = src[plane ] + srcStride[plane] * y; - const uint8_t *a = src[plane_count] + (srcStride[plane_count] * y << y_subsample); + const uint8_t *s = src[plane ] + srcStride[plane] * ysrc; + const uint8_t *a = src[plane_count] + (srcStride[plane_count] * ysrc << y_subsample); uint8_t *d = dst[plane ] + dstStride[plane] * y; for (x = 0; x < w; x++) { if (y_subsample) { @@ -97,8 +98,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], } } else { if (sixteen_bits) { - const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * y); - const uint16_t *a = (const uint16_t *)(src[plane_count] + srcStride[plane_count] * y); + const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * ysrc); + const uint16_t *a = (const uint16_t *)(src[plane_count] + srcStride[plane_count] * ysrc); uint16_t *d = ( uint16_t *)(dst[plane ] + dstStride[plane ] * y); if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) { for (x = 0; x < w; x++) { @@ -113,8 +114,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], } } } else { - const uint8_t *s = src[plane ] + srcStride[plane] * y; - const uint8_t *a = src[plane_count] + srcStride[plane_count] * y; + const uint8_t *s = src[plane ] + srcStride[plane] * ysrc; + const uint8_t *a = src[plane_count] + srcStride[plane_count] * ysrc; uint8_t *d = dst[plane ] + dstStride[plane] * y; for (x = 0; x < w; x++) { unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(255-a[x]) + 128; @@ -127,10 +128,11 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], } else { int alpha_pos = desc->comp[plane_count].offset; int w = c->srcW; - for (y = srcSliceY; y < srcSliceH; y++) { + for (ysrc = 0; ysrc < srcSliceH; ysrc++) { + int y = ysrc + srcSliceY; if (sixteen_bits) { - const uint16_t *s = (const uint16_t *)(src[0] + srcStride[0] * y + 2*!alpha_pos); - const uint16_t *a = (const uint16_t *)(src[0] + srcStride[0] * y + alpha_pos); + const uint16_t *s = (const uint16_t *)(src[0] + srcStride[0] * ysrc + 2*!alpha_pos); + const uint16_t *a = (const uint16_t *)(src[0] + srcStride[0] * ysrc + alpha_pos); uint16_t *d = ( uint16_t *)(dst[0] + dstStride[0] * y); if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) { for (x = 0; x < w; x++) { @@ -151,8 +153,8 @@ int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], } } } else { - const uint8_t *s = src[0] + srcStride[0] * y + !alpha_pos; - const uint8_t *a = src[0] + srcStride[0] * y + alpha_pos; + const uint8_t *s = src[0] + srcStride[0] * ysrc + !alpha_pos; + const uint8_t *a = src[0] + srcStride[0] * ysrc + alpha_pos; uint8_t *d = dst[0] + dstStride[0] * y; for (x = 0; x < w; x++) { for (plane = 0; plane < plane_count; plane++) { diff --git a/libswscale/slice.c b/libswscale/slice.c index d96db133648..b185b4aa189 100644 --- a/libswscale/slice.c +++ b/libswscale/slice.c @@ -288,7 +288,10 @@ int ff_init_filters(SwsContext * c) if (!c->desc) return AVERROR(ENOMEM); c->slice = av_mallocz_array(sizeof(SwsSlice), c->numSlice); - + if (!c->slice) { + res = AVERROR(ENOMEM); + goto cleanup; + } res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); if (res < 0) goto cleanup; diff --git a/tests/ref/fate/ts-opus-demux b/tests/ref/fate/ts-opus-demux index 3c5edffb2cc..37534350a1f 100644 --- a/tests/ref/fate/ts-opus-demux +++ b/tests/ref/fate/ts-opus-demux @@ -1,4 +1,4 @@ -#extradata 0: 30, 0x53be0347 +#extradata 0: 30, 0x69290482 #tb 0: 1/90000 #media_type 0: audio #codec_id 0: opus diff --git a/tools/cws2fws.c b/tools/cws2fws.c index 7046b699571..9ce321fe208 100644 --- a/tools/cws2fws.c +++ b/tools/cws2fws.c @@ -89,6 +89,12 @@ int main(int argc, char *argv[]) for (i = 0; i < comp_len - 8;) { int ret, len = read(fd_in, &buf_in, 1024); + if (len == -1) { + printf("read failure\n"); + inflateEnd(&zstream); + goto out; + } + dbgprintf("read %d bytes\n", len); last_out = zstream.total_out; From de51e04012d2c3714a8c3a3d8dbbd7cfe31312c6 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:11:05 +0800 Subject: [PATCH 02/41] Bump version to 4.4.1-1 --- build.yaml | 2 +- debian/changelog | 6 ++++++ debian/control | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/build.yaml b/build.yaml index 270ce120d58..ab308e87317 100644 --- a/build.yaml +++ b/build.yaml @@ -1,7 +1,7 @@ --- # We just wrap `build` so this is really it name: "jellyfin-ffmpeg" -version: "4.4-1" +version: "4.4.1-1" packages: - stretch-amd64 - stretch-armhf diff --git a/debian/changelog b/debian/changelog index 75790999d87..93b3fd2d66c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +jellyfin-ffmpeg (4.4.1-1) unstable; urgency=medium + + * New upstream version 4.4.1 + + -- nyanmisaka Sun, 7 Nov 2021 15:09:03 +0800 + jellyfin-ffmpeg (4.4-1) unstable; urgency=medium * New upstream version 4.4 diff --git a/debian/control b/debian/control index c4c66d37954..9602939eb4b 100644 --- a/debian/control +++ b/debian/control @@ -5,7 +5,7 @@ Maintainer: Jellyfin Packaging Team Uploaders: Jellyfin Packaging Team Rules-Requires-Root: no Homepage: https://ffmpeg.org/ -Standards-Version: 4.4 +Standards-Version: 4.4.1 Vcs-Git: https://github.com/jellyfin/jellyfin-ffmpeg.git Vcs-Browser: https://github.com/jellyfin/jellyfin-ffmpeg Build-Depends: From a2562e2941ec4a8343ef838cd248dfa0400f1b82 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Tue, 14 Sep 2021 18:28:12 +0800 Subject: [PATCH 03/41] only disable x86 asm in dav1d if nasm is outdated --- debian/rules | 8 +------- docker-build.sh | 9 +++++---- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/debian/rules b/debian/rules index 62c2fdcfb9a..ad7a87b435b 100755 --- a/debian/rules +++ b/debian/rules @@ -35,6 +35,7 @@ CONFIG := --prefix=${TARGET_DIR} \ --enable-libopus \ --enable-libtheora \ --enable-libvorbis \ + --enable-libdav1d \ --enable-libwebp \ --enable-libvpx \ --enable-libx264 \ @@ -44,7 +45,6 @@ CONFIG := --prefix=${TARGET_DIR} \ CONFIG_ARM_COMMON := --toolchain=hardened \ --enable-cross-compile \ - --enable-libdav1d \ --enable-omx \ --enable-omx-rpi \ @@ -70,14 +70,8 @@ CONFIG_x86 := --arch=amd64 \ --enable-nvdec \ --enable-ffnvcodec \ -CONFIG_DAV1D := --enable-libdav1d \ - HOST_ARCH := $(shell arch) BUILD_ARCH := ${DEB_HOST_MULTIARCH} - -ifeq ($(ENABLE_X86_DAV1D),true) - CONFIG_x86 += $(CONFIG_DAV1D) -endif ifeq ($(BUILD_ARCH),x86_64-linux-gnu) # Native amd64 build CONFIG += $(CONFIG_x86) diff --git a/docker-build.sh b/docker-build.sh index f9ad5012a0d..60040b952ed 100755 --- a/docker-build.sh +++ b/docker-build.sh @@ -42,17 +42,18 @@ prepare_extra_common() { nasmminver="2.13.02" nasmavx512ver="2.14.0" if [ "$(printf '%s\n' "$nasmminver" "$nasmver" | sort -V | head -n1)" = "$nasmminver" ]; then - export ENABLE_X86_DAV1D=true + x86asm=true if [ "$(printf '%s\n' "$nasmavx512ver" "$nasmver" | sort -V | head -n1)" = "$nasmavx512ver" ]; then avx512=true else avx512=false fi else - export ENABLE_X86_DAV1D=false + x86asm=false + avx512=false fi - if [ "${ENABLE_X86_DAV1D}" = "true" ] && [ "${ARCH}" = "amd64" ]; then - meson -Denable_asm=true \ + if [ "${ARCH}" = "amd64" ]; then + meson -Denable_asm=$x86asm \ -Denable_avx512=$avx512 \ -Denable_tests=false \ -Ddefault_library=shared \ From d1819de544ae4bb567956453489350c4cba493c9 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sat, 27 Nov 2021 17:55:54 +0800 Subject: [PATCH 04/41] update deps for linux build --- debian/rules | 11 +++++------ docker-build.sh | 16 +++++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/debian/rules b/debian/rules index ad7a87b435b..2cf0e991b4c 100755 --- a/debian/rules +++ b/debian/rules @@ -22,15 +22,14 @@ CONFIG := --prefix=${TARGET_DIR} \ --enable-gpl \ --enable-version3 \ --enable-static \ - --enable-libfontconfig \ - --enable-fontconfig \ --enable-gmp \ --enable-gnutls \ - --enable-libass \ - --enable-libbluray \ --enable-libdrm \ + --enable-libass \ --enable-libfreetype \ --enable-libfribidi \ + --enable-libfontconfig \ + --enable-libbluray \ --enable-libmp3lame \ --enable-libopus \ --enable-libtheora \ @@ -63,12 +62,12 @@ CONFIG_x86 := --arch=amd64 \ --enable-amf \ --enable-libmfx \ --enable-vdpau \ + --enable-ffnvcodec \ --enable-cuda \ --enable-cuda-llvm \ --enable-cuvid \ - --enable-nvenc \ --enable-nvdec \ - --enable-ffnvcodec \ + --enable-nvenc \ HOST_ARCH := $(shell arch) BUILD_ARCH := ${DEB_HOST_MULTIARCH} diff --git a/docker-build.sh b/docker-build.sh index 60040b952ed..c1aaa270160 100755 --- a/docker-build.sh +++ b/docker-build.sh @@ -34,7 +34,7 @@ prepare_extra_common() { # Download and install dav1d pushd ${SOURCE_DIR} - git clone -b 0.9.1 --depth=1 https://code.videolan.org/videolan/dav1d.git + git clone -b 0.9.2 --depth=1 https://code.videolan.org/videolan/dav1d.git pushd dav1d mkdir build pushd build @@ -103,7 +103,7 @@ prepare_extra_amd64() { # Download and install libva pushd ${SOURCE_DIR} - git clone -b v2.12-branch --depth=1 https://github.com/intel/libva + git clone --depth=1 https://github.com/intel/libva pushd libva sed -i 's|getenv("LIBVA_DRIVERS_PATH")|"/usr/lib/jellyfin-ffmpeg/lib/dri:/usr/lib/x86_64-linux-gnu/dri:/usr/lib/dri:/usr/local/lib/dri"|g' va/va.c sed -i 's|getenv("LIBVA_DRIVER_NAME")|NULL|g' va/va.c @@ -117,7 +117,7 @@ prepare_extra_amd64() { # Download and install intel-vaapi-driver pushd ${SOURCE_DIR} - git clone -b v2.4-branch --depth=1 https://github.com/intel/intel-vaapi-driver + git clone --depth=1 https://github.com/intel/intel-vaapi-driver pushd intel-vaapi-driver ./autogen.sh ./configure LIBVA_DRIVERS_PATH=${TARGET_DIR}/lib/dri @@ -130,7 +130,7 @@ prepare_extra_amd64() { # Download and install gmmlib pushd ${SOURCE_DIR} - git clone -b intel-gmmlib-21.2.1 --depth=1 https://github.com/intel/gmmlib + git clone -b intel-gmmlib-21.3.5 --depth=1 https://github.com/intel/gmmlib pushd gmmlib mkdir build && pushd build cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} .. @@ -143,11 +143,13 @@ prepare_extra_amd64() { # Download and install MediaSDK pushd ${SOURCE_DIR} - git clone -b intel-mediasdk-21.2.3 --depth=1 https://github.com/Intel-Media-SDK/MediaSDK + git clone -b intel-mediasdk-21.4.3 --depth=1 https://github.com/Intel-Media-SDK/MediaSDK pushd MediaSDK sed -i 's|MFX_PLUGINS_CONF_DIR "/plugins.cfg"|"/usr/lib/jellyfin-ffmpeg/lib/mfx/plugins.cfg"|g' api/mfx_dispatch/linux/mfxloader.cpp mkdir build && pushd build - cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} .. + cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} \ + -DBUILD_SAMPLES=OFF \ + .. make -j$(nproc) && make install && make install DESTDIR=${SOURCE_DIR}/intel echo "intel${TARGET_DIR}/lib/libmfx* usr/lib/jellyfin-ffmpeg/lib" >> ${SOURCE_DIR}/debian/jellyfin-ffmpeg.install echo "intel${TARGET_DIR}/lib/mfx/*.so usr/lib/jellyfin-ffmpeg/lib/mfx" >> ${SOURCE_DIR}/debian/jellyfin-ffmpeg.install @@ -161,7 +163,7 @@ prepare_extra_amd64() { # Full Feature Build: ENABLE_KERNELS=ON(Default) ENABLE_NONFREE_KERNELS=ON(Default) # Free Kernel Build: ENABLE_KERNELS=ON ENABLE_NONFREE_KERNELS=OFF #pushd ${SOURCE_DIR} - #git clone -b intel-media-21.2.3 --depth=1 https://github.com/intel/media-driver + #git clone -b intel-media-21.4.3 --depth=1 https://github.com/intel/media-driver #pushd media-driver #mkdir build && pushd build #cmake -DCMAKE_INSTALL_PREFIX=${TARGET_DIR} \ From 86b92d1804e1afc585309bd16516f4aeb59531fb Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Thu, 14 Oct 2021 17:29:50 +0800 Subject: [PATCH 05/41] cleanup for adding windows patches --- debian/patches/0001_fix-segment-muxer.patch | 37 - ...for-uploading-normal-frames-to-VAAPI.patch | 241 --- ...-for-the-broken-tonemap_vaapi-filter.patch | 259 --- .../0004-cuda-format-converter-impl.patch | 1438 ------------- debian/patches/0005-cuda-tonemap-impl.patch | 1824 ----------------- ...for-peak-detection-in-opencl-tonemap.patch | 755 ------- .../patches/0007-fix-for-fmp4-in-hlsenc.patch | 24 - ...fix-nvdec-exceeded-32-surfaces-error.patch | 17 - .../0009-fix-for-nvenc-from-upstream.patch | 1716 ---------------- debian/patches/series | 9 - 10 files changed, 6320 deletions(-) delete mode 100644 debian/patches/0001_fix-segment-muxer.patch delete mode 100644 debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch delete mode 100644 debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch delete mode 100644 debian/patches/0004-cuda-format-converter-impl.patch delete mode 100644 debian/patches/0005-cuda-tonemap-impl.patch delete mode 100644 debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch delete mode 100644 debian/patches/0007-fix-for-fmp4-in-hlsenc.patch delete mode 100644 debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch delete mode 100644 debian/patches/0009-fix-for-nvenc-from-upstream.patch delete mode 100644 debian/patches/series diff --git a/debian/patches/0001_fix-segment-muxer.patch b/debian/patches/0001_fix-segment-muxer.patch deleted file mode 100644 index d3b57793d91..00000000000 --- a/debian/patches/0001_fix-segment-muxer.patch +++ /dev/null @@ -1,37 +0,0 @@ -Index: jellyfin-ffmpeg/libavformat/segment.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/segment.c -+++ jellyfin-ffmpeg/libavformat/segment.c -@@ -87,6 +87,7 @@ typedef struct SegmentContext { - int64_t last_val; ///< remember last time for wrap around detection - int cut_pending; - int header_written; ///< whether we've already called avformat_write_header -+ int64_t start_pts; ///< pts of the very first packet processed, used to compute correct segment length - - char *entry_prefix; ///< prefix to add to list entry filenames - int list_type; ///< set the list type -@@ -712,6 +713,7 @@ static int seg_init(AVFormatContext *s) - if ((ret = parse_frames(s, &seg->frames, &seg->nb_frames, seg->frames_str)) < 0) - return ret; - } else { -+ seg->start_pts = -1; - if (seg->use_clocktime) { - if (seg->time <= 0) { - av_log(s, AV_LOG_ERROR, "Invalid negative segment_time with segment_atclocktime option set\n"); -@@ -889,7 +891,15 @@ calc_times: - seg->cut_pending = 1; - seg->last_val = wrapped_val; - } else { -- end_pts = seg->time * (seg->segment_count + 1); -+ if (seg->start_pts != -1) { -+ end_pts = seg->start_pts + seg->time * (seg->segment_count + 1); -+ } else if (pkt->stream_index == seg->reference_stream_index && pkt->pts != AV_NOPTS_VALUE) { -+ // this is the first packet of the reference stream we see, initialize start point -+ seg->start_pts = av_rescale_q(pkt->pts, st->time_base, AV_TIME_BASE_Q); -+ seg->cur_entry.start_time = (double)pkt->pts * av_q2d(st->time_base); -+ seg->cur_entry.start_pts = seg->start_pts; -+ end_pts = seg->start_pts + seg->time * (seg->segment_count + 1); -+ } - } - } - diff --git a/debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch b/debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch deleted file mode 100644 index 63eea729c55..00000000000 --- a/debian/patches/0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch +++ /dev/null @@ -1,241 +0,0 @@ -#From c1fb9225a1b8e26875cb9b4e2b3ae2f4d68c5630 Mon Sep 17 00:00:00 2001 -#From: nyanmisaka -#Date: Sun, 24 Jan 2021 19:58:04 +0800 -#Subject: [PATCH] lavfi: add a filter for uploading normal frames to VAAPI -Index: jellyfin-ffmpeg/configure -=================================================================== ---- jellyfin-ffmpeg.orig/configure -+++ jellyfin-ffmpeg/configure -@@ -3577,6 +3577,7 @@ fspp_filter_deps="gpl" - headphone_filter_select="fft" - histeq_filter_deps="gpl" - hqdn3d_filter_deps="gpl" -+hwupload_vaapi_filter_deps="vaapi" - interlace_filter_deps="gpl" - kerndeint_filter_deps="gpl" - ladspa_filter_deps="ladspa libdl" -Index: jellyfin-ffmpeg/libavfilter/Makefile -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/Makefile -+++ jellyfin-ffmpeg/libavfilter/Makefile -@@ -297,6 +297,7 @@ OBJS-$(CONFIG_HUE_FILTER) - OBJS-$(CONFIG_HWDOWNLOAD_FILTER) += vf_hwdownload.o - OBJS-$(CONFIG_HWMAP_FILTER) += vf_hwmap.o - OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o -+OBJS-$(CONFIG_HWUPLOAD_VAAPI_FILTER) += vf_hwupload_vaapi.o - OBJS-$(CONFIG_HWUPLOAD_FILTER) += vf_hwupload.o - OBJS-$(CONFIG_HYSTERESIS_FILTER) += vf_hysteresis.o framesync.o - OBJS-$(CONFIG_IDENTITY_FILTER) += vf_identity.o -Index: jellyfin-ffmpeg/libavfilter/allfilters.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/allfilters.c -+++ jellyfin-ffmpeg/libavfilter/allfilters.c -@@ -282,6 +282,7 @@ extern AVFilter ff_vf_hwdownload; - extern AVFilter ff_vf_hwmap; - extern AVFilter ff_vf_hwupload; - extern AVFilter ff_vf_hwupload_cuda; -+extern AVFilter ff_vf_hwupload_vaapi; - extern AVFilter ff_vf_hysteresis; - extern AVFilter ff_vf_identity; - extern AVFilter ff_vf_idet; -Index: jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c -@@ -0,0 +1,196 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavutil/buffer.h" -+#include "libavutil/hwcontext.h" -+#include "libavutil/log.h" -+#include "libavutil/opt.h" -+ -+#include "avfilter.h" -+#include "formats.h" -+#include "internal.h" -+#include "video.h" -+ -+typedef struct VaapiUploadContext { -+ const AVClass *class; -+ int device_idx; -+ -+ AVBufferRef *hwdevice; -+ AVBufferRef *hwframe; -+} VaapiUploadContext; -+ -+static av_cold int vaapiupload_init(AVFilterContext *ctx) -+{ -+ VaapiUploadContext *s = ctx->priv; -+ return av_hwdevice_ctx_create(&s->hwdevice, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0); -+} -+ -+static av_cold void vaapiupload_uninit(AVFilterContext *ctx) -+{ -+ VaapiUploadContext *s = ctx->priv; -+ -+ av_buffer_unref(&s->hwframe); -+ av_buffer_unref(&s->hwdevice); -+} -+ -+static int vaapiupload_query_formats(AVFilterContext *ctx) -+{ -+ int ret; -+ -+ static const enum AVPixelFormat input_pix_fmts[] = { -+ AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, -+ AV_PIX_FMT_UYVY422, AV_PIX_FMT_YUYV422, AV_PIX_FMT_Y210, -+ AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, -+ AV_PIX_FMT_GRAY8, AV_PIX_FMT_P010, AV_PIX_FMT_BGRA, -+ AV_PIX_FMT_BGR0, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB0, -+ AV_PIX_FMT_ABGR, AV_PIX_FMT_0BGR, AV_PIX_FMT_ARGB, -+ AV_PIX_FMT_0RGB, AV_PIX_FMT_NONE, -+ }; -+ static const enum AVPixelFormat output_pix_fmts[] = { -+ AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE, -+ }; -+ AVFilterFormats *in_fmts = ff_make_format_list(input_pix_fmts); -+ AVFilterFormats *out_fmts; -+ -+ ret = ff_formats_ref(in_fmts, &ctx->inputs[0]->outcfg.formats); -+ if (ret < 0) -+ return ret; -+ -+ out_fmts = ff_make_format_list(output_pix_fmts); -+ -+ ret = ff_formats_ref(out_fmts, &ctx->outputs[0]->incfg.formats); -+ if (ret < 0) -+ return ret; -+ -+ return 0; -+} -+ -+static int vaapiupload_config_output(AVFilterLink *outlink) -+{ -+ AVFilterContext *ctx = outlink->src; -+ AVFilterLink *inlink = ctx->inputs[0]; -+ VaapiUploadContext *s = ctx->priv; -+ -+ AVHWFramesContext *hwframe_ctx; -+ int ret; -+ -+ av_buffer_unref(&s->hwframe); -+ s->hwframe = av_hwframe_ctx_alloc(s->hwdevice); -+ if (!s->hwframe) -+ return AVERROR(ENOMEM); -+ -+ hwframe_ctx = (AVHWFramesContext*)s->hwframe->data; -+ hwframe_ctx->format = AV_PIX_FMT_VAAPI; -+ if (inlink->hw_frames_ctx) { -+ AVHWFramesContext *in_hwframe_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; -+ hwframe_ctx->sw_format = in_hwframe_ctx->sw_format; -+ } else { -+ hwframe_ctx->sw_format = inlink->format; -+ } -+ hwframe_ctx->width = inlink->w; -+ hwframe_ctx->height = inlink->h; -+ -+ ret = av_hwframe_ctx_init(s->hwframe); -+ if (ret < 0) -+ return ret; -+ -+ outlink->hw_frames_ctx = av_buffer_ref(s->hwframe); -+ if (!outlink->hw_frames_ctx) -+ return AVERROR(ENOMEM); -+ -+ return 0; -+} -+ -+static int vaapiupload_filter_frame(AVFilterLink *link, AVFrame *in) -+{ -+ AVFilterContext *ctx = link->dst; -+ AVFilterLink *outlink = ctx->outputs[0]; -+ -+ AVFrame *out = NULL; -+ int ret; -+ -+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h); -+ if (!out) { -+ ret = AVERROR(ENOMEM); -+ goto fail; -+ } -+ -+ out->width = in->width; -+ out->height = in->height; -+ -+ ret = av_hwframe_transfer_data(out, in, 0); -+ if (ret < 0) { -+ av_log(ctx, AV_LOG_ERROR, "Error transferring data to the GPU\n"); -+ goto fail; -+ } -+ -+ ret = av_frame_copy_props(out, in); -+ if (ret < 0) -+ goto fail; -+ -+ av_frame_free(&in); -+ -+ return ff_filter_frame(ctx->outputs[0], out); -+fail: -+ av_frame_free(&in); -+ av_frame_free(&out); -+ return ret; -+} -+ -+static const AVClass vaapiupload_class = { -+ .class_name = "vaapiupload", -+ .item_name = av_default_item_name, -+ .option = NULL, -+ .version = LIBAVUTIL_VERSION_INT, -+}; -+ -+static const AVFilterPad vaapiupload_inputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO, -+ .filter_frame = vaapiupload_filter_frame, -+ }, -+ { NULL } -+}; -+ -+static const AVFilterPad vaapiupload_outputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO, -+ .config_props = vaapiupload_config_output, -+ }, -+ { NULL } -+}; -+ -+AVFilter ff_vf_hwupload_vaapi = { -+ .name = "hwupload_vaapi", -+ .description = NULL_IF_CONFIG_SMALL("Upload a system memory frame to a VAAPI device."), -+ -+ .init = vaapiupload_init, -+ .uninit = vaapiupload_uninit, -+ -+ .query_formats = vaapiupload_query_formats, -+ -+ .priv_size = sizeof(VaapiUploadContext), -+ .priv_class = &vaapiupload_class, -+ -+ .inputs = vaapiupload_inputs, -+ .outputs = vaapiupload_outputs, -+ -+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -+}; diff --git a/debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch b/debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch deleted file mode 100644 index b5be1c3afb5..00000000000 --- a/debian/patches/0003-fix-for-the-broken-tonemap_vaapi-filter.patch +++ /dev/null @@ -1,259 +0,0 @@ -# Fix for the broken tonemap_vaapi filter -# avfilter/tonemap_vaapi: pass filter parameters to VA parameter buffer -# avfilter: Add H2H support in tonemap_vaapi -Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_vaapi.c -+++ jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c -@@ -41,7 +41,11 @@ typedef struct HDRVAAPIContext { - enum AVColorTransferCharacteristic color_transfer; - enum AVColorSpace color_matrix; - -+ char *master_display; -+ char *content_light; -+ - VAHdrMetaDataHDR10 in_metadata; -+ VAHdrMetaDataHDR10 out_metadata; - - AVFrameSideData *src_display; - AVFrameSideData *src_light; -@@ -148,6 +152,107 @@ static int tonemap_vaapi_save_metadata(A - return 0; - } - -+static int tonemap_vaapi_update_sidedata(AVFilterContext *avctx, AVFrame *output_frame) -+{ -+ HDRVAAPIContext *ctx = avctx->priv; -+ AVFrameSideData *metadata; -+ AVMasteringDisplayMetadata *hdr_meta; -+ AVFrameSideData *metadata_lt; -+ AVContentLightMetadata *hdr_meta_lt; -+ -+ int i; -+ const int mapping[3] = {1, 2, 0}; //green, blue, red -+ const int chroma_den = 50000; -+ const int luma_den = 10000; -+ -+ metadata = av_frame_get_side_data(output_frame, -+ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); -+ if (metadata) { -+ av_frame_remove_side_data(output_frame, -+ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); -+ metadata = av_frame_new_side_data(output_frame, -+ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, -+ sizeof(AVMasteringDisplayMetadata)); -+ } else { -+ metadata = av_frame_new_side_data(output_frame, -+ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, -+ sizeof(AVMasteringDisplayMetadata)); -+ } -+ -+ hdr_meta = (AVMasteringDisplayMetadata *)metadata->data; -+ -+ for (i = 0; i < 3; i++) { -+ const int j = mapping[i]; -+ hdr_meta->display_primaries[j][0].num = ctx->out_metadata.display_primaries_x[i]; -+ hdr_meta->display_primaries[j][0].den = chroma_den; -+ -+ hdr_meta->display_primaries[j][1].num = ctx->out_metadata.display_primaries_y[i]; -+ hdr_meta->display_primaries[j][1].den = chroma_den; -+ } -+ -+ hdr_meta->white_point[0].num = ctx->out_metadata.white_point_x; -+ hdr_meta->white_point[0].den = chroma_den; -+ -+ hdr_meta->white_point[1].num = ctx->out_metadata.white_point_y; -+ hdr_meta->white_point[1].den = chroma_den; -+ hdr_meta->has_primaries = 1; -+ -+ hdr_meta->max_luminance.num = ctx->out_metadata.max_display_mastering_luminance; -+ hdr_meta->max_luminance.den = luma_den; -+ -+ hdr_meta->min_luminance.num = ctx->out_metadata.min_display_mastering_luminance; -+ hdr_meta->min_luminance.den = luma_den; -+ hdr_meta->has_luminance = 1; -+ -+ av_log(avctx, AV_LOG_DEBUG, -+ "Mastering Display Metadata(out luminance):\n"); -+ av_log(avctx, AV_LOG_DEBUG, -+ "min_luminance=%u, max_luminance=%u\n", -+ ctx->out_metadata.min_display_mastering_luminance, -+ ctx->out_metadata.max_display_mastering_luminance); -+ -+ av_log(avctx, AV_LOG_DEBUG, -+ "Mastering Display Metadata(out primaries):\n"); -+ av_log(avctx, AV_LOG_DEBUG, -+ "G(%u,%u) B(%u,%u) R(%u,%u) WP(%u,%u)\n", -+ ctx->out_metadata.display_primaries_x[0], -+ ctx->out_metadata.display_primaries_y[0], -+ ctx->out_metadata.display_primaries_x[1], -+ ctx->out_metadata.display_primaries_y[1], -+ ctx->out_metadata.display_primaries_x[2], -+ ctx->out_metadata.display_primaries_y[2], -+ ctx->out_metadata.white_point_x, -+ ctx->out_metadata.white_point_y); -+ -+ metadata_lt = av_frame_get_side_data(output_frame, -+ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); -+ if (metadata_lt) { -+ av_frame_remove_side_data(output_frame, -+ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); -+ metadata_lt = av_frame_new_side_data(output_frame, -+ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, -+ sizeof(AVContentLightMetadata)); -+ } else { -+ metadata_lt = av_frame_new_side_data(output_frame, -+ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, -+ sizeof(AVContentLightMetadata)); -+ } -+ -+ hdr_meta_lt = (AVContentLightMetadata *)metadata_lt->data; -+ -+ hdr_meta_lt->MaxCLL = FFMIN(ctx->out_metadata.max_content_light_level, 65535); -+ hdr_meta_lt->MaxFALL = FFMIN(ctx->out_metadata.max_pic_average_light_level, 65535); -+ -+ av_log(avctx, AV_LOG_DEBUG, -+ "Mastering Content Light Level (out):\n"); -+ av_log(avctx, AV_LOG_DEBUG, -+ "MaxCLL(%u) MaxFALL(%u)\n", -+ ctx->out_metadata.max_content_light_level, -+ ctx->out_metadata.max_pic_average_light_level); -+ -+ return 0; -+} -+ - static int tonemap_vaapi_set_filter_params(AVFilterContext *avctx, AVFrame *input_frame) - { - VAAPIVPPContext *vpp_ctx = avctx->priv; -@@ -210,15 +315,26 @@ static int tonemap_vaapi_build_filter_pa - return AVERROR(EINVAL); - } - -- for (i = 0; i < num_query_caps; i++) { -- if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag) -- break; -- } -- -- if (i >= num_query_caps) { -- av_log(avctx, AV_LOG_ERROR, -- "VAAPI driver doesn't support HDR to SDR\n"); -- return AVERROR(EINVAL); -+ if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) { -+ for (i = 0; i < num_query_caps; i++) { -+ if (VA_TONE_MAPPING_HDR_TO_HDR & hdr_cap[i].caps_flag) -+ break; -+ } -+ if (i >= num_query_caps) { -+ av_log(avctx, AV_LOG_ERROR, -+ "VAAPI driver doesn't support HDR to HDR\n"); -+ return AVERROR(EINVAL); -+ } -+ } else { -+ for (i = 0; i < num_query_caps; i++) { -+ if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag) -+ break; -+ } -+ if (i >= num_query_caps) { -+ av_log(avctx, AV_LOG_ERROR, -+ "VAAPI driver doesn't support HDR to SDR\n"); -+ return AVERROR(EINVAL); -+ } - } - - hdrtm_param.type = VAProcFilterHighDynamicRangeToneMapping; -@@ -243,6 +359,8 @@ static int tonemap_vaapi_filter_frame(AV - VAProcPipelineParameterBuffer params; - int err; - -+ VAHdrMetaData out_hdr_metadata; -+ - av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", - av_get_pix_fmt_name(input_frame->format), - input_frame->width, input_frame->height, input_frame->pts); -@@ -291,11 +409,26 @@ static int tonemap_vaapi_filter_frame(AV - if (ctx->color_matrix != AVCOL_SPC_UNSPECIFIED) - output_frame->colorspace = ctx->color_matrix; - -+ if (output_frame->color_trc == AVCOL_TRC_SMPTE2084) { -+ err = tonemap_vaapi_update_sidedata(avctx, output_frame); -+ if (err < 0) -+ goto fail; -+ -+ out_hdr_metadata.metadata_type = VAProcHighDynamicRangeMetadataHDR10; -+ out_hdr_metadata.metadata = &ctx->out_metadata; -+ out_hdr_metadata.metadata_size = sizeof(VAHdrMetaDataHDR10); -+ -+ params.output_hdr_metadata = &out_hdr_metadata; -+ } -+ - err = ff_vaapi_vpp_init_params(avctx, ¶ms, - input_frame, output_frame); - if (err < 0) - goto fail; - -+ params.filters = &vpp_ctx->filter_buffers[0]; -+ params.num_filters = vpp_ctx->nb_filter_buffers; -+ - err = ff_vaapi_vpp_render_picture(avctx, ¶ms, output_frame); - if (err < 0) - goto fail; -@@ -355,6 +488,46 @@ static av_cold int tonemap_vaapi_init(AV - STRING_OPTION(color_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED); - STRING_OPTION(color_matrix, color_space, AVCOL_SPC_UNSPECIFIED); - -+ if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) { -+ if (!ctx->master_display) { -+ av_log(avctx, AV_LOG_ERROR, -+ "Option mastering-display input invalid\n"); -+ return AVERROR(EINVAL); -+ } -+ -+ if (10 != sscanf(ctx->master_display, -+ "G(%hu|%hu)B(%hu|%hu)R(%hu|%hu)WP(%hu|%hu)L(%u|%u)", -+ &ctx->out_metadata.display_primaries_x[0], -+ &ctx->out_metadata.display_primaries_y[0], -+ &ctx->out_metadata.display_primaries_x[1], -+ &ctx->out_metadata.display_primaries_y[1], -+ &ctx->out_metadata.display_primaries_x[2], -+ &ctx->out_metadata.display_primaries_y[2], -+ &ctx->out_metadata.white_point_x, -+ &ctx->out_metadata.white_point_y, -+ &ctx->out_metadata.min_display_mastering_luminance, -+ &ctx->out_metadata.max_display_mastering_luminance)) { -+ av_log(avctx, AV_LOG_ERROR, -+ "Option mastering-display input invalid\n"); -+ return AVERROR(EINVAL); -+ } -+ -+ if (!ctx->content_light) { -+ av_log(avctx, AV_LOG_ERROR, -+ "Option content-light input invalid\n"); -+ return AVERROR(EINVAL); -+ } -+ -+ if (2 != sscanf(ctx->content_light, -+ "CLL(%hu)FALL(%hu)", -+ &ctx->out_metadata.max_content_light_level, -+ &ctx->out_metadata.max_pic_average_light_level)) { -+ av_log(avctx, AV_LOG_ERROR, -+ "Option content-light input invalid\n"); -+ return AVERROR(EINVAL); -+ } -+ } -+ - return 0; - } - -@@ -380,10 +553,11 @@ static const AVOption tonemap_vaapi_opti - { "t", "Output color transfer characteristics set", - OFFSET(color_transfer_string), AV_OPT_TYPE_STRING, - { .str = NULL }, .flags = FLAGS, "transfer" }, -+ { "display", "set master display", OFFSET(master_display), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS }, -+ { "light", "set content light", OFFSET(content_light), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS }, - { NULL } - }; - -- - AVFILTER_DEFINE_CLASS(tonemap_vaapi); - - static const AVFilterPad tonemap_vaapi_inputs[] = { diff --git a/debian/patches/0004-cuda-format-converter-impl.patch b/debian/patches/0004-cuda-format-converter-impl.patch deleted file mode 100644 index a01c782d548..00000000000 --- a/debian/patches/0004-cuda-format-converter-impl.patch +++ /dev/null @@ -1,1438 +0,0 @@ -Index: jellyfin-ffmpeg/compat/cuda/cuda_runtime.h -=================================================================== ---- jellyfin-ffmpeg.orig/compat/cuda/cuda_runtime.h -+++ jellyfin-ffmpeg/compat/cuda/cuda_runtime.h -@@ -49,16 +49,6 @@ typedef struct __device_builtin__ __alig - unsigned short x, y; - } ushort2; - --typedef struct __device_builtin__ __align__(8) float2 --{ -- float x, y; --} float2; -- --typedef struct __device_builtin__ __align__(8) int2 --{ -- int x, y; --} int2; -- - typedef struct __device_builtin__ uint3 - { - unsigned int x, y, z; -@@ -66,6 +56,11 @@ typedef struct __device_builtin__ uint3 - - typedef struct uint3 dim3; - -+typedef struct __device_builtin__ __align__(8) int2 -+{ -+ int x, y; -+} int2; -+ - typedef struct __device_builtin__ __align__(4) uchar4 - { - unsigned char x, y, z, w; -@@ -81,11 +76,6 @@ typedef struct __device_builtin__ __alig - int x, y, z, w; - } int4; - --typedef struct __device_builtin__ __align__(16) float4 --{ -- float x, y, z, w; --} float4; -- - // Accessors for special registers - #define GETCOMP(reg, comp) \ - asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \ -@@ -110,31 +100,24 @@ GET(getThreadIdx, tid) - #define threadIdx (getThreadIdx()) - - // Basic initializers (simple macros rather than inline functions) --#define make_int2(a, b) ((int2){.x = a, .y = b}) - #define make_uchar2(a, b) ((uchar2){.x = a, .y = b}) - #define make_ushort2(a, b) ((ushort2){.x = a, .y = b}) --#define make_float2(a, b) ((float2){.x = a, .y = b}) --#define make_int4(a, b, c, d) ((int4){.x = a, .y = b, .z = c, .w = d}) - #define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d}) - #define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d}) --#define make_float4(a, b, c, d) ((float4){.x = a, .y = b, .z = c, .w = d}) - - // Conversions from the tex instruction's 4-register output to various types - #define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} - - TEX2D(unsigned char, a & 0xFF) - TEX2D(unsigned short, a & 0xFFFF) --TEX2D(float, a) --TEX2D(uchar2, make_uchar2(a & 0xFF, b & 0xFF)) --TEX2D(ushort2, make_ushort2(a & 0xFFFF, b & 0xFFFF)) --TEX2D(float2, make_float2(a, b)) --TEX2D(uchar4, make_uchar4(a & 0xFF, b & 0xFF, c & 0xFF, d & 0xFF)) --TEX2D(ushort4, make_ushort4(a & 0xFFFF, b & 0xFFFF, c & 0xFFFF, d & 0xFFFF)) --TEX2D(float4, make_float4(a, b, c, d)) -+TEX2D(uchar2, make_uchar2((unsigned char)a, (unsigned char)b)) -+TEX2D(ushort2, make_ushort2((unsigned short)a, (unsigned short)b)) -+TEX2D(uchar4, make_uchar4((unsigned char)a, (unsigned char)b, (unsigned char)c, (unsigned char)d)) -+TEX2D(ushort4, make_ushort4((unsigned short)a, (unsigned short)b, (unsigned short)c, (unsigned short)d)) - - // Template calling tex instruction and converting the output to the selected type --template --inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) -+template -+static inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) - { - T ret; - unsigned ret1, ret2, ret3, ret4; -@@ -145,44 +128,4 @@ inline __device__ T tex2D(cudaTextureObj - return ret; - } - --template<> --inline __device__ float4 tex2D(cudaTextureObject_t texObject, float x, float y) --{ -- float4 ret; -- asm("tex.2d.v4.f32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : -- "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : -- "l"(texObject), "f"(x), "f"(y)); -- return ret; --} -- --template<> --inline __device__ float tex2D(cudaTextureObject_t texObject, float x, float y) --{ -- return tex2D(texObject, x, y).x; --} -- --template<> --inline __device__ float2 tex2D(cudaTextureObject_t texObject, float x, float y) --{ -- float4 ret = tex2D(texObject, x, y); -- return make_float2(ret.x, ret.y); --} -- --// Math helper functions --static inline __device__ float floorf(float a) { return __builtin_floorf(a); } --static inline __device__ float floor(float a) { return __builtin_floorf(a); } --static inline __device__ double floor(double a) { return __builtin_floor(a); } --static inline __device__ float ceilf(float a) { return __builtin_ceilf(a); } --static inline __device__ float ceil(float a) { return __builtin_ceilf(a); } --static inline __device__ double ceil(double a) { return __builtin_ceil(a); } --static inline __device__ float truncf(float a) { return __builtin_truncf(a); } --static inline __device__ float trunc(float a) { return __builtin_truncf(a); } --static inline __device__ double trunc(double a) { return __builtin_trunc(a); } --static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); } --static inline __device__ float fabs(float a) { return __builtin_fabsf(a); } --static inline __device__ double fabs(double a) { return __builtin_fabs(a); } -- --static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); } --static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); } -- - #endif /* COMPAT_CUDA_CUDA_RUNTIME_H */ -Index: jellyfin-ffmpeg/configure -=================================================================== ---- jellyfin-ffmpeg.orig/configure -+++ jellyfin-ffmpeg/configure -@@ -6250,7 +6250,7 @@ fi - if enabled cuda_nvcc; then - nvccflags="$nvccflags -ptx" - else -- nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -include ${source_link}/compat/cuda/cuda_runtime.h" -+ nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -std=c++14 -include ${source_link}/compat/cuda/cuda_runtime.h" - check_nvcc cuda_llvm - fi - -Index: jellyfin-ffmpeg/libavfilter/Makefile -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/Makefile -+++ jellyfin-ffmpeg/libavfilter/Makefile -@@ -392,8 +392,7 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) - OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o - OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o - OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o --OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \ -- vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o -+OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o - OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o - OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o - OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o -Index: jellyfin-ffmpeg/libavfilter/cuda/vector_helpers.cuh -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/cuda/vector_helpers.cuh -+++ /dev/null -@@ -1,112 +0,0 @@ --/* -- * This file is part of FFmpeg. -- * -- * Permission is hereby granted, free of charge, to any person obtaining a -- * copy of this software and associated documentation files (the "Software"), -- * to deal in the Software without restriction, including without limitation -- * the rights to use, copy, modify, merge, publish, distribute, sublicense, -- * and/or sell copies of the Software, and to permit persons to whom the -- * Software is furnished to do so, subject to the following conditions: -- * -- * The above copyright notice and this permission notice shall be included in -- * all copies or substantial portions of the Software. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -- * DEALINGS IN THE SOFTWARE. -- */ -- --#ifndef AVFILTER_CUDA_VECTORHELPERS_H --#define AVFILTER_CUDA_VECTORHELPERS_H -- --typedef unsigned char uchar; --typedef unsigned short ushort; -- --template struct vector_helper { }; --template<> struct vector_helper { typedef float ftype; typedef int itype; }; --template<> struct vector_helper { typedef float2 ftype; typedef int2 itype; }; --template<> struct vector_helper { typedef float4 ftype; typedef int4 itype; }; --template<> struct vector_helper { typedef float ftype; typedef int itype; }; --template<> struct vector_helper { typedef float2 ftype; typedef int2 itype; }; --template<> struct vector_helper { typedef float4 ftype; typedef int4 itype; }; --template<> struct vector_helper { typedef float ftype; typedef int itype; }; --template<> struct vector_helper { typedef float2 ftype; typedef int2 itype; }; --template<> struct vector_helper { typedef float4 ftype; typedef int4 itype; }; -- --#define floatT typename vector_helper::ftype --#define intT typename vector_helper::itype -- --template inline __device__ V to_floatN(const T &a) { return (V)a; } --template inline __device__ T from_floatN(const V &a) { return (T)a; } -- --#define OPERATORS2(T) \ -- template inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y); } \ -- template inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y); } \ -- template inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b); } \ -- template inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b); } \ -- template inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b); } \ -- template inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b); } \ -- template inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; return a; } \ -- template inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; } \ -- template inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; } \ -- template<> inline __device__ float2 to_floatN(const T &a) { return make_float2(a.x, a.y); } \ -- template<> inline __device__ T from_floatN(const float2 &a) { return make_ ## T(a.x, a.y); } --#define OPERATORS4(T) \ -- template inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } \ -- template inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } \ -- template inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b, a.z * b, a.w * b); } \ -- template inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b, a.z / b, a.w / b); } \ -- template inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b, a.z >> b, a.w >> b); } \ -- template inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b, a.z << b, a.w << b); } \ -- template inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; } \ -- template inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; a.z = b.z; a.w = b.w; } \ -- template inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; a.z = b; a.w = b; } \ -- template<> inline __device__ float4 to_floatN(const T &a) { return make_float4(a.x, a.y, a.z, a.w); } \ -- template<> inline __device__ T from_floatN(const float4 &a) { return make_ ## T(a.x, a.y, a.z, a.w); } -- --OPERATORS2(int2) --OPERATORS2(uchar2) --OPERATORS2(ushort2) --OPERATORS2(float2) --OPERATORS4(int4) --OPERATORS4(uchar4) --OPERATORS4(ushort4) --OPERATORS4(float4) -- --template inline __device__ void vec_set(int &a, V b) { a = b; } --template inline __device__ void vec_set(float &a, V b) { a = b; } --template inline __device__ void vec_set(uchar &a, V b) { a = b; } --template inline __device__ void vec_set(ushort &a, V b) { a = b; } --template inline __device__ void vec_set_scalar(int &a, V b) { a = b; } --template inline __device__ void vec_set_scalar(float &a, V b) { a = b; } --template inline __device__ void vec_set_scalar(uchar &a, V b) { a = b; } --template inline __device__ void vec_set_scalar(ushort &a, V b) { a = b; } -- --template --inline __device__ T lerp_scalar(T v0, T v1, float t) { -- return t*v1 + (1.0f - t)*v0; --} -- --template<> --inline __device__ float2 lerp_scalar(float2 v0, float2 v1, float t) { -- return make_float2( -- lerp_scalar(v0.x, v1.x, t), -- lerp_scalar(v0.y, v1.y, t) -- ); --} -- --template<> --inline __device__ float4 lerp_scalar(float4 v0, float4 v1, float t) { -- return make_float4( -- lerp_scalar(v0.x, v1.x, t), -- lerp_scalar(v0.y, v1.y, t), -- lerp_scalar(v0.z, v1.z, t), -- lerp_scalar(v0.w, v1.w, t) -- ); --} -- --#endif -Index: jellyfin-ffmpeg/libavfilter/dither_matrix.h -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/dither_matrix.h -@@ -0,0 +1,74 @@ -+/* -+ * Dither matrix data -+ * -+ * This file is placed in the public domain. -+ */ -+ -+#include -+static const int ff_fruit_dither_size = 64; -+static const uint16_t ff_fruit_dither_matrix[] = { -+ 332, 2776, 1933, 42, 2598, 1796, 1000, 2978, 1677, 3452, 2164, 1564, 2644, 358, 2012, 3471, 1147, 3071, 596, 1943, 3146, 1191, 2469, 919, 3664, 2359, 441, 2691, 1179, 3027, 1408, 298, 3892, 1825, 182, 2178, 3028, 317, 2412, 858, 3097, 2205, 1145, 2880, 990, 2697, 728, 1969, 2312, 1393, 3232, 1204, 3752, 1529, 448, 3955, 2076, 833, 3856, 1, 3445, 2105, 955, 1761, -+ 4060, 1053, 3038, 1445, 3302, 430, 3702, 2119, 625, 2523, 12, 3003, 959, 3814, 2388, 829, 4059, 2236, 1417, 3447, 198, 4020, 1891, 3368, 76, 1460, 2963, 1680, 3721, 535, 2275, 2916, 1226, 2348, 3580, 823, 1897, 4032, 1245, 2728, 194, 3285, 1941, 399, 3639, 1593, 3775, 1038, 3012, 162, 2687, 2029, 559, 2983, 1809, 2378, 325, 2861, 1331, 2533, 1171, 2701, 3328, 153, -+ 2214, 3412, 501, 3934, 892, 1918, 2686, 1199, 3090, 1351, 3779, 1776, 3371, 1457, 217, 2844, 1726, 311, 2896, 1021, 2604, 1546, 569, 2758, 1818, 3967, 727, 3305, 963, 1866, 3591, 853, 3215, 496, 2651, 1453, 2808, 704, 2247, 3395, 1779, 937, 4014, 2288, 1286, 3110, 331, 3309, 1839, 3866, 932, 3566, 2499, 1005, 3346, 1192, 3712, 1743, 3399, 757, 3765, 391, 1871, 2928, -+ 1411, 820, 2531, 1622, 2192, 3478, 215, 4079, 384, 3298, 742, 2332, 436, 2685, 1963, 3273, 680, 3571, 2033, 3795, 806, 3550, 2319, 1225, 3191, 1027, 2506, 237, 2196, 3126, 28, 2148, 1772, 3959, 1009, 3507, 85, 3742, 1539, 453, 3647, 2154, 573, 2786, 156, 2020, 2569, 1425, 538, 2375, 1723, 300, 1854, 4065, 110, 2741, 678, 3138, 213, 1979, 2330, 1530, 3542, 720, -+ 3811, 1955, 3240, 126, 2909, 760, 2482, 1493, 2301, 1719, 2788, 1180, 3998, 923, 3649, 1094, 1862, 2579, 1272, 30, 3109, 1987, 255, 3816, 471, 1977, 3519, 1557, 3882, 1086, 2754, 3776, 1304, 241, 2262, 1863, 3163, 1111, 2982, 2026, 1056, 2948, 1439, 3323, 1737, 3595, 860, 3980, 2895, 1193, 3365, 2779, 852, 2175, 3069, 1641, 2309, 1237, 2630, 4036, 965, 3052, 1096, 2487, -+ 289, 2863, 1190, 3633, 1330, 3834, 1075, 3429, 602, 3850, 174, 3242, 1657, 3080, 98, 2357, 3899, 374, 3224, 1619, 2431, 1125, 3019, 1367, 2395, 3102, 794, 2850, 426, 2451, 1687, 548, 3070, 2596, 3335, 565, 1630, 2459, 288, 3937, 2504, 51, 3740, 725, 2475, 433, 2706, 2075, 19, 3694, 633, 1982, 3149, 1345, 729, 3843, 411, 3654, 1681, 564, 2721, 104, 3875, 2110, -+ 3504, 1730, 537, 2398, 2058, 443, 2636, 1802, 2925, 953, 2568, 2005, 583, 2108, 2802, 1441, 889, 2726, 1949, 3984, 486, 3407, 624, 2698, 1648, 97, 4055, 1340, 1994, 3667, 913, 3453, 1964, 815, 1400, 4072, 2767, 873, 3457, 684, 1576, 3119, 1884, 1222, 3898, 1535, 3482, 982, 1817, 2520, 1487, 3927, 181, 3493, 2396, 1438, 2871, 985, 2070, 3498, 1370, 3279, 1655, 586, -+ 1280, 2625, 3912, 939, 2999, 1649, 3162, 64, 2130, 3606, 1428, 3469, 1256, 3824, 409, 3562, 1780, 3433, 768, 1206, 2843, 2086, 3869, 926, 3663, 2099, 1047, 2623, 3217, 148, 1810, 2498, 314, 3790, 2126, 129, 2037, 3088, 1356, 2314, 3301, 946, 2716, 2163, 250, 3091, 575, 2227, 3204, 359, 2970, 1110, 2594, 1867, 532, 3338, 43, 2492, 3172, 282, 2394, 842, 2852, 2019, -+ 3111, 31, 1813, 3256, 243, 3687, 851, 4002, 1301, 480, 3004, 264, 2493, 1586, 2947, 714, 2538, 165, 2193, 3597, 1514, 130, 1792, 3132, 348, 2875, 3434, 491, 1251, 2120, 3979, 1093, 2905, 979, 2561, 3625, 1230, 373, 3836, 1953, 197, 4028, 498, 3383, 1713, 2400, 1402, 4083, 1150, 3589, 2106, 592, 3650, 1252, 4012, 1975, 3047, 1574, 690, 3909, 1745, 3616, 349, 3976, -+ 894, 3438, 2251, 1160, 2582, 1467, 2342, 1747, 2837, 2242, 1076, 3946, 866, 3385, 1108, 1985, 4076, 1303, 2915, 438, 2589, 3312, 1085, 2367, 1916, 1403, 759, 2340, 3582, 2835, 637, 3264, 1997, 3506, 515, 1544, 3258, 2612, 1017, 2903, 1322, 1791, 3014, 1154, 3826, 871, 2858, 192, 2739, 719, 1701, 3177, 2222, 345, 2737, 770, 1181, 3774, 2264, 1060, 2667, 1262, 2318, 1584, -+ 2654, 1420, 499, 4045, 688, 3379, 319, 3490, 735, 3699, 1642, 2050, 2683, 40, 2204, 3096, 343, 3261, 1672, 3747, 900, 1981, 4025, 644, 3505, 2575, 3903, 1724, 232, 1517, 2590, 1405, 58, 1663, 3051, 2381, 751, 1756, 3646, 647, 3464, 2363, 826, 2563, 92, 3137, 1928, 3524, 1590, 2159, 3851, 84, 1423, 3082, 1684, 2417, 3417, 369, 1808, 3022, 118, 3254, 661, 3555, -+ 229, 3756, 2917, 1627, 2773, 1235, 2949, 1002, 2470, 140, 3274, 594, 3533, 1678, 3799, 933, 1812, 2457, 673, 2306, 1474, 3055, 292, 2820, 1293, 2, 1106, 3237, 2013, 3823, 439, 3611, 2093, 4015, 1264, 258, 3914, 2294, 16, 2692, 1983, 295, 3723, 1893, 3442, 1306, 619, 2371, 392, 2989, 1071, 2484, 3512, 666, 3796, 195, 1522, 2775, 3586, 875, 4086, 1921, 2883, 1163, -+ 2456, 1935, 1032, 2305, 111, 3845, 2036, 1555, 3964, 2115, 1289, 2911, 977, 2372, 465, 2616, 3618, 1211, 3933, 72, 3404, 772, 2424, 1662, 3755, 2252, 3025, 518, 2524, 855, 2953, 1102, 2811, 795, 2602, 3414, 2060, 891, 3357, 1132, 3965, 1495, 2806, 551, 1633, 2629, 3659, 1172, 3958, 1446, 3319, 798, 1787, 2768, 1271, 3184, 2210, 587, 1338, 2478, 1550, 483, 2198, 3388, -+ 832, 3219, 386, 3674, 1733, 2479, 578, 3214, 416, 3044, 1861, 291, 4033, 1449, 3477, 1940, 205, 2976, 885, 2704, 1886, 3820, 1070, 3288, 700, 1369, 1855, 4087, 1194, 3463, 1764, 2329, 297, 3296, 1742, 539, 1398, 3107, 1696, 2387, 711, 3151, 1223, 2219, 4038, 328, 2095, 2919, 845, 2595, 263, 2235, 4043, 398, 2129, 903, 3924, 1885, 3317, 249, 2732, 3685, 1383, 55, -+ 3926, 1481, 2977, 1177, 3292, 893, 3600, 1418, 2670, 927, 3637, 2477, 1127, 2665, 765, 2879, 1283, 2162, 3333, 1359, 482, 2180, 2942, 219, 2548, 3594, 308, 2759, 2185, 106, 3174, 699, 3738, 1139, 2277, 3833, 2804, 223, 3707, 415, 3518, 1913, 150, 2995, 864, 3222, 1575, 35, 3428, 1694, 3581, 992, 2907, 1494, 3676, 2702, 11, 3103, 849, 3825, 1848, 947, 3125, 1901, -+ 2494, 600, 2017, 2658, 293, 2207, 2851, 9, 1799, 3186, 510, 2077, 3318, 102, 3196, 1571, 3982, 357, 1781, 3717, 2774, 1176, 1834, 4001, 908, 2044, 3195, 986, 1675, 3696, 1339, 2638, 1617, 3026, 96, 1984, 940, 2446, 1563, 2876, 930, 2532, 3426, 1516, 2407, 1134, 3801, 1931, 2307, 521, 2744, 1883, 138, 3356, 597, 1758, 2343, 1389, 2831, 2145, 693, 3485, 2286, 520, -+ 3031, 1601, 3710, 956, 4067, 1849, 1097, 3760, 2406, 1215, 3861, 1596, 846, 3804, 1890, 516, 2295, 3062, 747, 2337, 169, 3178, 603, 2269, 3104, 1537, 627, 3537, 2415, 786, 2988, 379, 4040, 839, 2554, 3561, 1363, 4005, 745, 2128, 3817, 1348, 617, 3910, 397, 3050, 598, 2787, 1037, 3932, 1277, 3741, 2091, 1073, 2503, 3530, 934, 4024, 352, 1610, 2969, 158, 1347, 3992, -+ 1155, 3348, 142, 2259, 3087, 500, 3380, 2096, 730, 2795, 210, 3005, 2335, 1173, 2627, 3703, 1049, 2038, 3514, 983, 3868, 1653, 3552, 1249, 74, 3891, 2586, 1452, 238, 3936, 1738, 2168, 1209, 3384, 1847, 593, 3233, 355, 3036, 1753, 63, 3250, 2150, 1732, 2613, 1259, 3629, 1483, 3120, 280, 2382, 663, 2641, 3176, 1637, 265, 3013, 1239, 2420, 3416, 1088, 3769, 1938, 2555, -+ 347, 2049, 2809, 1443, 761, 2628, 1693, 277, 3975, 1496, 3523, 1961, 547, 3106, 287, 1492, 3340, 24, 1547, 2887, 2147, 370, 2535, 1880, 2962, 2121, 479, 3077, 1922, 2618, 966, 3253, 2439, 327, 2847, 1515, 2226, 2713, 962, 3632, 2465, 1039, 2854, 245, 3179, 2080, 124, 2525, 792, 3486, 1744, 3289, 1404, 476, 3782, 2186, 715, 3579, 1971, 626, 2784, 2201, 741, 3248, -+ 1640, 3551, 1024, 3840, 1827, 3622, 1072, 3238, 2279, 835, 2562, 1295, 4085, 1816, 3535, 2495, 724, 2664, 4042, 570, 1281, 3422, 1013, 3787, 783, 1343, 3722, 1123, 3427, 599, 3634, 27, 1572, 3827, 1014, 3665, 139, 1868, 3197, 1390, 640, 4090, 1459, 3714, 902, 3421, 1676, 4004, 2246, 1128, 2720, 60, 4069, 2002, 1035, 2818, 1352, 2552, 88, 3847, 1801, 309, 3627, 1104, -+ 2894, 686, 2509, 403, 2926, 95, 1991, 2868, 469, 2052, 3370, 66, 2399, 679, 1248, 2081, 3181, 1100, 2224, 1870, 3001, 1506, 2760, 260, 3294, 2707, 2027, 146, 2355, 1323, 2783, 1956, 2946, 676, 2000, 3053, 1325, 3893, 424, 2084, 2998, 1797, 466, 2380, 1875, 507, 2939, 1054, 396, 3700, 1502, 3007, 904, 2418, 3443, 200, 3954, 1588, 3283, 1065, 3084, 1471, 2714, 2270, -+ 34, 4029, 1869, 3446, 1207, 2442, 3919, 879, 1788, 3726, 1115, 3129, 1466, 2742, 3897, 214, 1750, 3763, 405, 3459, 123, 3916, 812, 2317, 1685, 533, 3389, 1589, 3908, 1844, 376, 3990, 1158, 3437, 2377, 458, 2645, 896, 2336, 3396, 193, 2581, 3539, 1083, 2756, 3871, 1426, 2135, 3166, 1824, 566, 2104, 3559, 413, 1814, 2922, 2122, 797, 2750, 2258, 670, 3944, 869, 1729, -+ 3183, 1409, 916, 2220, 3100, 568, 1427, 3351, 2601, 336, 2803, 778, 3656, 432, 2170, 3330, 944, 2302, 2856, 970, 2416, 1766, 3249, 1218, 4066, 2438, 993, 2635, 697, 3021, 2152, 830, 2608, 234, 1421, 4051, 1705, 3577, 1507, 791, 3807, 958, 2051, 3148, 4, 2256, 712, 3529, 179, 2536, 3880, 989, 2655, 1302, 3267, 562, 1210, 3517, 303, 1950, 3393, 168, 2125, 3749, -+ 502, 2745, 3648, 256, 1620, 3684, 2240, 173, 1238, 3999, 2216, 1704, 2039, 2886, 1213, 1889, 2980, 665, 1585, 3715, 1333, 2662, 446, 3011, 41, 1906, 3772, 306, 3509, 1241, 3339, 1645, 3692, 2209, 3121, 607, 2833, 47, 3185, 2507, 1291, 2859, 400, 1580, 3675, 1328, 2676, 1658, 2857, 1183, 1965, 3190, 149, 3972, 1573, 2221, 3832, 1706, 3037, 1290, 2534, 1647, 2923, 1161, -+ 2452, 1942, 809, 3321, 2139, 1022, 2842, 1887, 3074, 1528, 536, 3475, 188, 3837, 752, 3573, 79, 4018, 2141, 285, 3354, 738, 3680, 2173, 1473, 2927, 1144, 2816, 877, 2368, 91, 2727, 635, 1253, 1888, 3513, 974, 2280, 1990, 488, 3326, 1948, 4007, 2153, 660, 3314, 302, 4039, 784, 3626, 445, 2327, 1674, 2513, 827, 3099, 26, 2464, 749, 4095, 912, 3704, 556, 3495, -+ 225, 3962, 1567, 2550, 713, 4053, 371, 3436, 796, 2541, 3169, 1174, 2428, 1414, 2609, 1628, 2753, 1263, 3206, 1951, 1129, 2885, 1740, 911, 3376, 672, 3603, 1616, 2015, 3873, 1477, 3543, 2087, 3846, 171, 2549, 1612, 3730, 1157, 3923, 1650, 151, 1136, 3033, 2466, 996, 2931, 1450, 2391, 1763, 3362, 883, 3678, 333, 3496, 1422, 2730, 1084, 3225, 375, 2780, 1480, 2354, 1882, -+ 2961, 1028, 2814, 80, 3154, 1720, 2463, 1444, 3733, 23, 1666, 3917, 611, 3398, 388, 3235, 696, 2344, 425, 2580, 3839, 154, 2266, 3957, 381, 2056, 2476, 196, 3180, 527, 3009, 1044, 383, 2347, 2973, 781, 3227, 342, 2766, 654, 2967, 2101, 3546, 457, 1534, 3732, 1917, 616, 3134, 52, 2174, 2981, 1169, 2813, 2116, 541, 3781, 2156, 1448, 3578, 2199, 87, 3359, 1313, -+ 618, 3424, 1397, 3818, 2098, 1095, 2943, 961, 2316, 1959, 2975, 1026, 2724, 1768, 2155, 1153, 3945, 1751, 3612, 814, 1551, 3304, 1178, 2764, 1643, 3136, 1081, 3981, 2213, 1310, 2587, 1826, 3369, 1533, 1092, 3953, 1296, 2433, 1833, 3598, 1023, 2543, 1309, 3187, 2043, 125, 2519, 3528, 1324, 3757, 1511, 492, 4057, 1910, 994, 3200, 1777, 212, 2960, 702, 1811, 3122, 943, 3925, -+ 2611, 2046, 477, 2397, 650, 3541, 235, 3864, 632, 3342, 423, 2234, 3592, 109, 3758, 2933, 239, 2790, 1051, 2985, 2113, 630, 2515, 257, 3778, 843, 2708, 1549, 653, 3521, 233, 4058, 748, 3086, 2231, 484, 2062, 3075, 115, 1501, 3287, 334, 3885, 856, 2763, 3971, 1195, 414, 2822, 681, 2700, 1830, 2422, 112, 3556, 2267, 901, 3983, 2373, 1217, 3770, 454, 2481, 1845, -+ 266, 3662, 1654, 3208, 1287, 1934, 3060, 1807, 2648, 1395, 4074, 828, 1531, 2640, 1350, 872, 2454, 1512, 3499, 8, 4092, 1762, 3540, 1434, 2146, 3286, 71, 3683, 1902, 2421, 1385, 2719, 1972, 13, 3660, 1688, 3450, 1001, 4077, 2328, 907, 2913, 1690, 2353, 545, 1485, 2991, 2134, 1728, 3948, 938, 3353, 1298, 2891, 1592, 387, 3029, 1896, 524, 3378, 2092, 1560, 2972, 1089, -+ 3150, 882, 2672, 183, 3693, 2572, 506, 1109, 3218, 164, 2467, 1930, 3175, 706, 3480, 2111, 3831, 584, 2211, 2679, 1292, 2819, 442, 2935, 601, 1790, 2496, 1041, 2906, 435, 3230, 880, 3780, 2512, 909, 2770, 301, 2657, 1914, 508, 3754, 2079, 39, 3698, 1879, 3455, 800, 3322, 159, 2225, 3046, 304, 3621, 615, 3870, 2567, 1077, 3651, 1484, 2749, 190, 4008, 606, 3564, -+ 2203, 1410, 4046, 1988, 837, 1468, 3978, 2260, 1638, 3764, 929, 3547, 322, 1819, 2855, 394, 1416, 3006, 1702, 785, 3262, 960, 2249, 3624, 1224, 4023, 790, 3432, 1254, 3939, 1621, 2283, 531, 1519, 3394, 1258, 3819, 756, 3244, 1609, 2583, 1294, 3141, 1052, 2674, 261, 2427, 1553, 3653, 1165, 1962, 2606, 1040, 2320, 1937, 1284, 3160, 56, 2237, 920, 3194, 1372, 2401, 1952, -+ 0, 2901, 689, 2338, 3352, 2840, 73, 3405, 739, 2054, 2959, 1377, 2202, 3941, 1067, 2021, 3306, 167, 3905, 2042, 307, 3792, 1860, 144, 2615, 1978, 3064, 278, 1945, 2383, 119, 3544, 1874, 3131, 203, 2187, 1767, 2462, 1229, 3585, 364, 2798, 683, 3503, 1412, 4049, 921, 2762, 447, 3144, 733, 3797, 1489, 2986, 231, 3474, 780, 2793, 3935, 1670, 2526, 367, 3315, 841, -+ 3855, 1734, 3489, 305, 1240, 1836, 2195, 1011, 3078, 390, 2556, 642, 3209, 44, 2743, 3679, 917, 2695, 1133, 3400, 2436, 1098, 3056, 1415, 3277, 487, 1568, 2643, 3670, 925, 3101, 1034, 2747, 1167, 4030, 2848, 580, 3094, 99, 2287, 1031, 3989, 1968, 2379, 514, 2053, 3092, 1234, 3894, 1623, 2472, 14, 3259, 822, 3968, 2248, 1401, 2040, 449, 3269, 766, 3706, 1603, 2944, -+ 1250, 2650, 1015, 2537, 3943, 579, 3630, 2593, 1274, 3467, 1583, 3829, 1162, 2471, 1543, 553, 2365, 1613, 3072, 677, 1748, 2752, 528, 3929, 906, 2321, 3810, 1288, 628, 2829, 2094, 3852, 401, 2392, 773, 1591, 3468, 1424, 3881, 1789, 3331, 1520, 172, 3220, 1636, 3596, 89, 1924, 2284, 1003, 3435, 1739, 2124, 2778, 1806, 517, 2621, 3403, 1205, 1829, 2869, 1062, 2293, 244, -+ 3608, 431, 3246, 1379, 2123, 2937, 1532, 296, 3922, 1865, 177, 2143, 2974, 427, 4011, 1912, 3576, 326, 2109, 4031, 75, 3655, 2206, 1541, 2048, 3401, 33, 2161, 3364, 1476, 254, 1691, 3337, 1375, 3605, 2055, 362, 2738, 716, 2634, 450, 2212, 3777, 1187, 2870, 863, 2485, 3363, 337, 3008, 634, 4063, 452, 1268, 3313, 1112, 3727, 253, 2228, 3798, 103, 1974, 4075, 1406, -+ 2430, 2061, 776, 3720, 152, 1114, 3293, 2325, 840, 2660, 3272, 988, 1725, 3366, 1305, 3020, 968, 3228, 799, 2544, 1840, 1016, 3170, 259, 2873, 777, 1838, 3142, 455, 3985, 2010, 2993, 646, 2689, 45, 2547, 3745, 1137, 3212, 1317, 3023, 825, 2566, 410, 2127, 3931, 1538, 775, 3812, 1785, 2757, 1368, 2542, 3584, 127, 3041, 2011, 1548, 2940, 723, 2490, 3307, 530, 2789, -+ 948, 3123, 1579, 2777, 2035, 3848, 542, 1946, 3057, 1433, 558, 3731, 2600, 755, 2326, 108, 2785, 1380, 3750, 1276, 3451, 2063, 737, 3567, 1247, 4062, 2671, 884, 2411, 1126, 2588, 952, 3794, 1858, 3161, 924, 1804, 2333, 218, 3974, 1679, 3456, 1394, 3572, 1757, 211, 3205, 2659, 1242, 2346, 176, 3140, 850, 2188, 1716, 2460, 560, 4019, 997, 3522, 1735, 1105, 3048, 1661, -+ 3773, 61, 3548, 609, 1273, 2553, 1682, 3609, 25, 4056, 1898, 2233, 224, 3604, 1107, 3802, 1673, 2410, 246, 2849, 459, 3066, 2578, 1625, 2425, 368, 1463, 3734, 1671, 3510, 107, 3257, 1341, 460, 2239, 3947, 550, 3441, 1993, 971, 2408, 20, 2765, 1061, 2952, 2292, 1371, 434, 3642, 972, 3460, 1659, 3736, 330, 3904, 888, 3276, 1362, 2735, 356, 2281, 3901, 227, 2171, -+ 805, 2528, 1774, 2183, 3165, 267, 2860, 1008, 2461, 1285, 2834, 935, 3081, 1509, 2694, 2073, 671, 3168, 1939, 1141, 3969, 1429, 133, 3815, 973, 3231, 2085, 201, 2918, 698, 2781, 1644, 2311, 3387, 1057, 1561, 2821, 1216, 2599, 3355, 701, 3791, 1892, 481, 4091, 651, 3310, 2191, 1700, 2899, 707, 2448, 1312, 2668, 1454, 2792, 2107, 36, 2030, 3234, 1465, 865, 2597, 3484, -+ 1518, 4000, 429, 3415, 870, 3949, 1503, 3492, 721, 3372, 361, 3789, 1831, 529, 3487, 320, 4081, 1012, 3607, 2250, 629, 2169, 3347, 1976, 2997, 722, 3620, 2351, 1200, 3889, 2144, 810, 4082, 236, 2023, 3515, 117, 3709, 444, 1835, 2845, 1299, 3198, 2140, 1510, 2546, 887, 3857, 49, 2009, 3994, 404, 3035, 1048, 3410, 461, 3668, 1618, 3828, 590, 2807, 3613, 1354, 620, -+ 3157, 1152, 2867, 1297, 2711, 2265, 490, 2131, 2955, 1712, 2350, 1214, 3203, 2179, 1069, 2577, 1857, 2924, 17, 2699, 1692, 2950, 1197, 485, 1582, 2740, 1407, 544, 3278, 1784, 382, 2941, 1208, 2510, 3063, 844, 2722, 2215, 1595, 4026, 226, 2370, 743, 3681, 145, 3465, 2068, 2782, 1392, 3153, 1006, 2255, 3638, 105, 1821, 2517, 813, 2920, 1189, 2409, 1967, 122, 3058, 2067, -+ 2403, 252, 1980, 3686, 116, 1652, 3746, 1337, 204, 3970, 669, 2715, 82, 3895, 1635, 3409, 763, 1526, 3502, 967, 3719, 268, 3900, 2393, 3558, 62, 3991, 1973, 2540, 987, 3716, 1909, 3327, 554, 1615, 3761, 1366, 595, 3300, 876, 3116, 1138, 2893, 1342, 2678, 1754, 378, 1079, 3563, 525, 2733, 1853, 1143, 2112, 4073, 1228, 3360, 1904, 377, 3391, 1042, 4050, 1721, 478, -+ 3860, 1624, 3270, 936, 2088, 2929, 610, 3145, 2444, 1486, 3534, 2007, 2996, 808, 2402, 199, 2828, 2100, 505, 3127, 1435, 2558, 2003, 613, 1852, 2633, 1117, 3164, 271, 3476, 2271, 3, 1469, 3853, 2362, 340, 2898, 1878, 2570, 1462, 2166, 3872, 335, 3367, 639, 3988, 2964, 1908, 2374, 1577, 3835, 281, 3481, 2839, 582, 3018, 187, 2276, 3928, 1597, 2652, 831, 2557, 3425, -+ 1119, 2836, 561, 2514, 4088, 1064, 3458, 1957, 886, 2841, 341, 1101, 1683, 3236, 1365, 3617, 1033, 3921, 2447, 1846, 750, 3402, 1087, 3065, 1308, 3439, 705, 1752, 2872, 1374, 848, 3207, 2637, 1030, 2065, 3462, 910, 3911, 57, 3623, 534, 1894, 2559, 1611, 2297, 1413, 746, 3783, 175, 3211, 905, 2527, 754, 1464, 2369, 1665, 3751, 1396, 652, 3040, 272, 3266, 1307, 2102, -+ 15, 2274, 3526, 1432, 350, 1795, 2565, 48, 3883, 1269, 3411, 2136, 4027, 310, 2626, 645, 3113, 1311, 290, 3784, 2680, 113, 2268, 4068, 339, 1947, 2468, 3830, 526, 2078, 3950, 1711, 636, 3049, 166, 1731, 3108, 1184, 1999, 2669, 1349, 3500, 890, 3711, 78, 3290, 2502, 1201, 2853, 2184, 1357, 3303, 1926, 3884, 313, 2675, 789, 3171, 2437, 1265, 3718, 1920, 608, 2951, -+ 3766, 1569, 857, 2897, 2118, 3645, 762, 3182, 1629, 2360, 703, 2900, 954, 1876, 3759, 1488, 2289, 1911, 2956, 981, 1664, 3644, 918, 1562, 2904, 3661, 147, 1475, 3311, 2649, 230, 2245, 3636, 1386, 4022, 2603, 509, 2376, 3345, 847, 3059, 208, 2866, 1186, 2646, 1703, 366, 3392, 1823, 540, 4017, 6, 2987, 991, 3375, 1159, 3601, 2066, 68, 2772, 767, 2322, 3587, 1399, -+ 519, 2181, 3879, 161, 3229, 1149, 2032, 2656, 417, 3767, 2018, 134, 3549, 2223, 552, 3284, 59, 3987, 589, 3444, 2339, 422, 2805, 2151, 563, 1196, 3199, 2345, 874, 1300, 3531, 1142, 2746, 372, 2182, 821, 3744, 1631, 269, 4070, 1587, 2405, 1782, 3887, 674, 3560, 2071, 942, 3669, 1513, 2571, 2089, 691, 2453, 1877, 2874, 468, 1717, 3525, 1430, 3960, 1626, 207, 2734, -+ 1832, 3000, 1203, 2585, 1749, 463, 3986, 1355, 3349, 975, 3054, 1604, 2607, 1182, 2938, 1698, 2574, 1227, 2794, 2069, 1146, 3260, 1316, 3876, 1794, 2681, 1996, 473, 3993, 1936, 3032, 567, 2001, 3397, 1478, 3252, 1148, 2954, 1929, 2272, 623, 3167, 420, 2158, 2930, 1121, 3085, 2441, 221, 2892, 1082, 3610, 1602, 3803, 157, 1523, 4044, 2285, 1043, 2912, 456, 2516, 3454, 978, -+ 4037, 315, 3361, 622, 3691, 2194, 2984, 189, 1815, 2530, 497, 3271, 740, 3859, 270, 3671, 834, 3511, 1771, 186, 3743, 718, 3015, 29, 3373, 819, 3762, 1578, 2755, 77, 2414, 1634, 3915, 881, 2826, 94, 2560, 467, 3520, 1260, 3724, 1360, 3473, 1498, 163, 4009, 555, 1851, 3800, 787, 3251, 299, 3039, 1232, 3268, 2229, 662, 3002, 242, 1899, 3295, 1198, 1989, 3159, -+ 709, 2443, 2041, 1525, 2666, 1275, 859, 2090, 3557, 1116, 4054, 1437, 2404, 1944, 2810, 1068, 2352, 385, 3156, 2450, 1472, 2639, 1710, 2366, 1140, 2884, 262, 3239, 964, 3690, 807, 3135, 251, 2384, 1699, 3865, 1318, 3098, 779, 2486, 18, 2631, 914, 2799, 1919, 2364, 1566, 3334, 1255, 2304, 1656, 2688, 2047, 574, 2653, 1091, 3408, 1332, 3844, 2445, 631, 3737, 93, 2290, -+ 1646, 3672, 1046, 3508, 70, 3886, 2419, 3213, 346, 2815, 2138, 5, 3430, 614, 1479, 3374, 1651, 4064, 1353, 771, 3888, 344, 3545, 667, 4035, 1859, 2497, 1320, 2300, 1765, 2632, 1436, 3658, 1118, 3350, 731, 2315, 1843, 3956, 1010, 3316, 2082, 3918, 472, 3640, 744, 2690, 65, 2823, 428, 4078, 854, 3570, 1458, 3952, 354, 2014, 2624, 836, 1497, 3042, 1709, 2771, 1103, -+ 2990, 228, 2862, 811, 3189, 1736, 512, 1482, 3809, 710, 1773, 3034, 1346, 3951, 2291, 141, 2890, 585, 2696, 2022, 2910, 1055, 2197, 3139, 1382, 489, 3635, 621, 3461, 220, 4080, 546, 1881, 2958, 412, 1998, 3588, 184, 1455, 2761, 1605, 717, 1778, 3188, 1170, 3030, 1391, 3862, 1905, 3423, 1063, 2505, 101, 1932, 2429, 1267, 3701, 37, 3488, 2083, 294, 4013, 803, 3470, -+ 1387, 3907, 1793, 2324, 1244, 2800, 2006, 3068, 980, 2385, 3628, 1050, 2729, 380, 1798, 3739, 928, 2232, 3413, 81, 1669, 3329, 1524, 216, 2703, 2097, 2936, 1639, 2045, 2712, 1344, 2167, 3494, 1019, 2458, 3158, 1168, 2592, 3448, 440, 3788, 2945, 202, 2474, 1995, 321, 3501, 1018, 577, 2176, 3083, 1373, 2865, 3344, 513, 3093, 1600, 2832, 1099, 3143, 2303, 1025, 2622, 363, -+ 2263, 668, 2717, 395, 4071, 659, 3673, 137, 2682, 1660, 275, 3210, 824, 2423, 3308, 1270, 3095, 1856, 1221, 3652, 2529, 576, 3961, 1895, 3735, 1124, 90, 3890, 941, 3255, 817, 2881, 32, 2731, 1715, 312, 4047, 649, 1986, 2358, 1246, 1927, 3574, 838, 4048, 2341, 1722, 2610, 3247, 1556, 276, 3677, 643, 1164, 3806, 2261, 774, 1903, 3913, 641, 1461, 3282, 1769, 3657, -+ 1614, 3325, 1175, 3133, 1565, 2483, 1113, 3297, 1319, 4003, 2241, 1236, 3838, 1606, 655, 2751, 248, 3920, 464, 2149, 950, 3045, 1261, 2825, 736, 2432, 3358, 1442, 2511, 323, 3725, 1527, 3940, 862, 3602, 2257, 1581, 2797, 998, 3147, 284, 2705, 1045, 2889, 1504, 648, 3073, 185, 1243, 3902, 2310, 1746, 2117, 2710, 1521, 180, 3516, 2647, 318, 2489, 3682, 143, 2908, 732, -+ 2508, 22, 3822, 1954, 222, 3483, 2137, 474, 2902, 588, 3114, 2034, 114, 2882, 3666, 1822, 1059, 2323, 3216, 1329, 3805, 155, 2298, 402, 3128, 1540, 2177, 656, 3061, 1800, 2313, 523, 1915, 3089, 1376, 694, 3281, 83, 3688, 1727, 3966, 1451, 3265, 50, 2064, 3697, 1335, 3449, 2028, 758, 2791, 949, 3996, 351, 3241, 2172, 1805, 995, 3079, 1131, 1872, 2389, 1220, 3973, -+ 1336, 2838, 931, 2361, 2992, 788, 1714, 3878, 2208, 1842, 976, 3532, 1358, 2103, 475, 2230, 3491, 1558, 734, 2817, 1741, 3431, 1384, 3705, 899, 3565, 274, 3997, 1166, 3538, 922, 3390, 2614, 209, 2390, 3877, 1188, 3010, 2157, 801, 2500, 522, 2218, 3849, 2539, 470, 2723, 897, 2473, 3614, 21, 3386, 1447, 3016, 895, 3874, 504, 3619, 1542, 4089, 692, 3440, 2057, 493, -+ 3245, 1759, 3615, 543, 1491, 3768, 2576, 1266, 46, 3643, 2545, 353, 2661, 4094, 1135, 2932, 38, 2564, 4010, 286, 2386, 802, 2684, 1770, 2522, 1321, 2914, 1689, 2673, 121, 2888, 1755, 1212, 3466, 1992, 462, 2605, 1864, 360, 3299, 1219, 3527, 1667, 685, 1185, 3223, 1966, 4006, 418, 1594, 3112, 2253, 591, 1900, 2449, 1364, 2864, 1970, 67, 2254, 2827, 329, 2971, 1058, -+ 2296, 283, 2617, 1873, 3152, 1120, 389, 3263, 2725, 1552, 726, 3226, 1686, 604, 3280, 1500, 3695, 999, 2025, 3192, 1078, 2979, 451, 4061, 10, 3221, 753, 2331, 984, 3842, 2160, 407, 4021, 878, 2966, 1545, 3536, 945, 3813, 1599, 2748, 178, 3118, 2008, 3631, 1608, 136, 1419, 2934, 2189, 682, 1326, 3729, 2736, 131, 3420, 818, 2521, 3320, 898, 1668, 3841, 1431, 3713, -+ 1536, 3406, 1007, 4034, 128, 2244, 3575, 2016, 951, 3930, 2299, 1020, 3748, 2435, 1925, 324, 2200, 3043, 571, 1775, 3785, 1456, 3275, 1151, 2165, 1803, 3793, 316, 3291, 1923, 1090, 3076, 2426, 1841, 612, 3201, 160, 2278, 2830, 549, 2132, 4093, 867, 2591, 365, 2769, 2114, 3553, 1004, 3821, 1828, 3336, 338, 1607, 4041, 1202, 3067, 419, 1327, 3583, 2190, 793, 2677, 100, -+ 2480, 764, 2921, 2133, 1282, 2801, 1388, 657, 3173, 279, 1820, 2965, 132, 1231, 2812, 3938, 861, 1440, 3590, 2663, 120, 2282, 675, 2620, 3554, 969, 2488, 1598, 2824, 495, 3641, 1505, 54, 3771, 2243, 1381, 3977, 1960, 1279, 3569, 1036, 1786, 2994, 1378, 3906, 1074, 3343, 572, 2518, 206, 2877, 868, 2413, 3155, 581, 2308, 1697, 3808, 2024, 2718, 240, 3202, 1233, 3497, -+ 1718, 3863, 406, 1632, 3381, 511, 3963, 2440, 1470, 2619, 3341, 1315, 2217, 3593, 557, 1760, 3324, 2501, 408, 1314, 3479, 1708, 3896, 1554, 247, 3017, 638, 3418, 1334, 2455, 816, 3377, 2142, 1029, 2878, 437, 2551, 687, 2957, 7, 2491, 3332, 273, 2334, 782, 2968, 1783, 1278, 3124, 2074, 1490, 3942, 2004, 1066, 1907, 3568, 191, 2796, 605, 1122, 3995, 1850, 2273, 695, -+ 3130, 1156, 2356, 3728, 915, 3105, 2059, 170, 3753, 1080, 503, 4016, 804, 3115, 1361, 2693, 86, 1837, 3854, 2349, 769, 2846, 393, 3117, 2072, 1257, 3867, 2031, 135, 4084, 1958, 2709, 708, 3243, 1570, 3708, 1130, 3419, 1695, 3858, 1508, 658, 3786, 1707, 3472, 69, 2434, 4052, 421, 3599, 664, 2573, 53, 3382, 2642, 957, 3193, 1499, 2238, 3024, 1559, 494, 3689, 2584, -+}; -Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.c -+++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c -@@ -1,5 +1,8 @@ - /* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. -+* Copyright (c) 2019 rcombs -+* -+* This file is part of FFmpeg. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), -@@ -20,10 +23,10 @@ - * DEALINGS IN THE SOFTWARE. - */ - --#include - #include - #include - -+#include "libavutil/avassert.h" - #include "libavutil/avstring.h" - #include "libavutil/common.h" - #include "libavutil/hwcontext.h" -@@ -34,13 +37,12 @@ - #include "libavutil/pixdesc.h" - - #include "avfilter.h" -+#include "dither_matrix.h" - #include "formats.h" - #include "internal.h" - #include "scale_eval.h" - #include "video.h" - --#include "vf_scale_cuda.h" -- - static const enum AVPixelFormat supported_formats[] = { - AV_PIX_FMT_YUV420P, - AV_PIX_FMT_NV12, -@@ -48,8 +50,6 @@ static const enum AVPixelFormat supporte - AV_PIX_FMT_P010, - AV_PIX_FMT_P016, - AV_PIX_FMT_YUV444P16, -- AV_PIX_FMT_0RGB32, -- AV_PIX_FMT_0BGR32, - }; - - #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) -@@ -58,17 +58,6 @@ static const enum AVPixelFormat supporte - - #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) - --enum { -- INTERP_ALGO_DEFAULT, -- -- INTERP_ALGO_NEAREST, -- INTERP_ALGO_BILINEAR, -- INTERP_ALGO_BICUBIC, -- INTERP_ALGO_LANCZOS, -- -- INTERP_ALGO_COUNT --}; -- - typedef struct CUDAScaleContext { - const AVClass *class; - -@@ -87,6 +76,7 @@ typedef struct CUDAScaleContext { - * Output sw format. AV_PIX_FMT_NONE for no conversion. - */ - enum AVPixelFormat format; -+ char *format_str; - - char *w_expr; ///< width expression string - char *h_expr; ///< height expression string -@@ -96,30 +86,56 @@ typedef struct CUDAScaleContext { - - CUcontext cu_ctx; - CUmodule cu_module; -- CUfunction cu_func_uchar; -- CUfunction cu_func_uchar2; -- CUfunction cu_func_uchar4; -- CUfunction cu_func_ushort; -- CUfunction cu_func_ushort2; -- CUfunction cu_func_ushort4; -+ -+#define VARIANT(NAME) \ -+ CUfunction cu_func_ ## NAME; -+#define VARIANTSET(NAME) \ -+ VARIANT(NAME) \ -+ VARIANT(NAME ## _c) \ -+ VARIANT(NAME ## _p2) \ -+ VARIANT(NAME ## _2) \ -+ VARIANT(NAME ## _2_u) \ -+ VARIANT(NAME ## _2_v) \ -+ VARIANT(NAME ## _4) -+ -+ VARIANTSET(8_8) -+ VARIANTSET(16_16) -+ VARIANTSET(8_16) -+ VARIANTSET(16_8) -+#undef VARIANTSET -+#undef VARIANT -+ -+ CUfunction cu_func_luma; -+ CUfunction cu_func_chroma_u; -+ CUfunction cu_func_chroma_v; -+ - CUstream cu_stream; - - CUdeviceptr srcBuffer; - CUdeviceptr dstBuffer; - int tex_alignment; - -- int interp_algo; -- int interp_use_linear; -- int interp_as_integer; -+ const AVPixFmtDescriptor *in_desc, *out_desc; -+ int in_planes, out_planes; - -- float param; -+ CUdeviceptr ditherBuffer; -+ CUtexObject ditherTex; - } CUDAScaleContext; - - static av_cold int cudascale_init(AVFilterContext *ctx) - { - CUDAScaleContext *s = ctx->priv; - -- s->format = AV_PIX_FMT_NONE; -+ if (!strcmp(s->format_str, "same")) { -+ s->format = AV_PIX_FMT_NONE; -+ } else { -+ s->format = av_get_pix_fmt(s->format_str); -+ if (s->format == AV_PIX_FMT_NONE) { -+ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); -+ return AVERROR(EINVAL); -+ } -+ } -+ - s->frame = av_frame_alloc(); - if (!s->frame) - return AVERROR(ENOMEM); -@@ -135,13 +151,22 @@ static av_cold void cudascale_uninit(AVF - { - CUDAScaleContext *s = ctx->priv; - -- if (s->hwctx && s->cu_module) { -+ if (s->hwctx) { - CudaFunctions *cu = s->hwctx->internal->cuda_dl; -- CUcontext dummy; -+ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; -+ -+ CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); -+ -+ if (s->ditherTex) { -+ CHECK_CU(cu->cuTexObjectDestroy(s->ditherTex)); -+ s->ditherTex = 0; -+ } -+ -+ if (s->ditherBuffer) { -+ CHECK_CU(cu->cuMemFree(s->ditherBuffer)); -+ s->ditherBuffer = 0; -+ } - -- CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); -- CHECK_CU(cu->cuModuleUnload(s->cu_module)); -- s->cu_module = NULL; - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - } - -@@ -262,6 +287,63 @@ static av_cold int init_processing_chain - return 0; - } - -+static av_cold int cudascale_setup_dither(AVFilterContext *ctx) -+{ -+ CUDAScaleContext *s = ctx->priv; -+ AVFilterLink *inlink = ctx->inputs[0]; -+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; -+ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; -+ CudaFunctions *cu = device_hwctx->internal->cuda_dl; -+ CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; -+ int ret = 0; -+ -+ CUDA_MEMCPY2D cpy = { -+ .srcMemoryType = CU_MEMORYTYPE_HOST, -+ .dstMemoryType = CU_MEMORYTYPE_DEVICE, -+ .srcHost = ff_fruit_dither_matrix, -+ .dstDevice = 0, -+ .srcPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), -+ .dstPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), -+ .WidthInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), -+ .Height = ff_fruit_dither_size, -+ }; -+ -+ CUDA_TEXTURE_DESC tex_desc = { -+ .filterMode = CU_TR_FILTER_MODE_POINT, -+ .flags = CU_TRSF_READ_AS_INTEGER, -+ }; -+ -+ CUDA_RESOURCE_DESC res_desc = { -+ .resType = CU_RESOURCE_TYPE_PITCH2D, -+ .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT16, -+ .res.pitch2D.numChannels = 1, -+ .res.pitch2D.width = ff_fruit_dither_size, -+ .res.pitch2D.height = ff_fruit_dither_size, -+ .res.pitch2D.pitchInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), -+ .res.pitch2D.devPtr = 0, -+ }; -+ -+ av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size); -+ -+ if ((ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx))) < 0) -+ return ret; -+ -+ if ((ret = CHECK_CU(cu->cuMemAlloc(&s->ditherBuffer, sizeof(ff_fruit_dither_matrix)))) < 0) -+ goto fail; -+ -+ res_desc.res.pitch2D.devPtr = cpy.dstDevice = s->ditherBuffer; -+ -+ if ((ret = CHECK_CU(cu->cuMemcpy2D(&cpy))) < 0) -+ goto fail; -+ -+ if ((ret = CHECK_CU(cu->cuTexObjectCreate(&s->ditherTex, &res_desc, &tex_desc, NULL))) < 0) -+ goto fail; -+ -+fail: -+ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); -+ return ret; -+} -+ - static av_cold int cudascale_config_props(AVFilterLink *outlink) - { - AVFilterContext *ctx = outlink->src; -@@ -271,46 +353,11 @@ static av_cold int cudascale_config_prop - AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; - CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; - CudaFunctions *cu = device_hwctx->internal->cuda_dl; -- char buf[64]; - int w, h; -+ int i; - int ret; - -- char *scaler_ptx; -- const char *function_infix = ""; -- - extern char vf_scale_cuda_ptx[]; -- extern char vf_scale_cuda_bicubic_ptx[]; -- -- switch(s->interp_algo) { -- case INTERP_ALGO_NEAREST: -- scaler_ptx = vf_scale_cuda_ptx; -- function_infix = "_Nearest"; -- s->interp_use_linear = 0; -- s->interp_as_integer = 1; -- break; -- case INTERP_ALGO_BILINEAR: -- scaler_ptx = vf_scale_cuda_ptx; -- function_infix = "_Bilinear"; -- s->interp_use_linear = 1; -- s->interp_as_integer = 1; -- break; -- case INTERP_ALGO_DEFAULT: -- case INTERP_ALGO_BICUBIC: -- scaler_ptx = vf_scale_cuda_bicubic_ptx; -- function_infix = "_Bicubic"; -- s->interp_use_linear = 0; -- s->interp_as_integer = 0; -- break; -- case INTERP_ALGO_LANCZOS: -- scaler_ptx = vf_scale_cuda_bicubic_ptx; -- function_infix = "_Lanczos"; -- s->interp_use_linear = 0; -- s->interp_as_integer = 0; -- break; -- default: -- av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n"); -- return AVERROR_BUG; -- } - - s->hwctx = device_hwctx; - s->cu_stream = s->hwctx->stream; -@@ -319,40 +366,30 @@ static av_cold int cudascale_config_prop - if (ret < 0) - goto fail; - -- ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, scaler_ptx)); -- if (ret < 0) -- goto fail; -- -- snprintf(buf, sizeof(buf), "Subsample%s_uchar", function_infix); -- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, buf)); -- if (ret < 0) -- goto fail; -- -- snprintf(buf, sizeof(buf), "Subsample%s_uchar2", function_infix); -- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, buf)); -- if (ret < 0) -- goto fail; -- -- snprintf(buf, sizeof(buf), "Subsample%s_uchar4", function_infix); -- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, buf)); -- if (ret < 0) -- goto fail; -- -- snprintf(buf, sizeof(buf), "Subsample%s_ushort", function_infix); -- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, buf)); -- if (ret < 0) -- goto fail; -- -- snprintf(buf, sizeof(buf), "Subsample%s_ushort2", function_infix); -- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, buf)); -- if (ret < 0) -- goto fail; -- -- snprintf(buf, sizeof(buf), "Subsample%s_ushort4", function_infix); -- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, buf)); -+ ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); - if (ret < 0) - goto fail; - -+#define VARIANT(NAME) \ -+ CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ ## NAME, s->cu_module, "Subsample_Bilinear_" #NAME)); \ -+ if (ret < 0) \ -+ goto fail; -+ -+#define VARIANTSET(NAME) \ -+ VARIANT(NAME) \ -+ VARIANT(NAME ## _c) \ -+ VARIANT(NAME ## _2) \ -+ VARIANT(NAME ## _p2) \ -+ VARIANT(NAME ## _2_u) \ -+ VARIANT(NAME ## _2_v) \ -+ VARIANT(NAME ## _4) -+ -+ VARIANTSET(8_8) -+ VARIANTSET(16_16) -+ VARIANTSET(8_16) -+ VARIANTSET(16_8) -+#undef VARIANTSET -+#undef VARIANT - - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - -@@ -376,6 +413,53 @@ static av_cold int cudascale_config_prop - if (ret < 0) - return ret; - -+ s->in_desc = av_pix_fmt_desc_get(s->in_fmt); -+ s->out_desc = av_pix_fmt_desc_get(s->out_fmt); -+ -+ for (i = 0; i < s->in_desc->nb_components; i++) -+ s->in_planes = FFMAX(s->in_planes, s->in_desc ->comp[i].plane + 1); -+ -+ for (i = 0; i < s->in_desc->nb_components; i++) -+ s->out_planes = FFMAX(s->out_planes, s->out_desc->comp[i].plane + 1); -+ -+#define VARIANT(INDEPTH, OUTDEPTH, SUFFIX) s->cu_func_ ## INDEPTH ## _ ## OUTDEPTH ## SUFFIX -+#define BITS(n) ((n + 7) & ~7) -+#define VARIANTSET(INDEPTH, OUTDEPTH) \ -+ else if (BITS(s->in_desc->comp[0].depth) == INDEPTH && \ -+ BITS(s->out_desc->comp[0].depth) == OUTDEPTH) { \ -+ s->cu_func_luma = VARIANT(INDEPTH, OUTDEPTH,); \ -+ if (s->in_planes == 3 && s->out_planes == 3) { \ -+ s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _c); \ -+ } else if (s->in_planes == 3 && s->out_planes == 2) { \ -+ s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _p2); \ -+ } else if (s->in_planes == 2 && s->out_planes == 2) { \ -+ s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2); \ -+ } else if (s->in_planes == 2 && s->out_planes == 3) { \ -+ s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2_u); \ -+ s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _2_v); \ -+ } else { \ -+ ret = AVERROR_BUG; \ -+ goto fail; \ -+ } \ -+ } -+ -+ if (0) {} -+ VARIANTSET(8, 8) -+ VARIANTSET(16, 16) -+ VARIANTSET(8, 16) -+ VARIANTSET(16, 8) -+ else { -+ ret = AVERROR_BUG; -+ goto fail; -+ } -+#undef VARIANTSET -+#undef VARIANT -+ -+ if (s->in_desc->comp[0].depth > s->out_desc->comp[0].depth) { -+ if ((ret = cudascale_setup_dither(ctx)) < 0) -+ goto fail; -+ } -+ - av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n", - inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : ""); - -@@ -396,21 +480,18 @@ fail: - static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, - uint8_t *src_dptr, int src_width, int src_height, int src_pitch, - uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, -- int pixel_size, int bit_depth) -+ int pixel_size) - { - CUDAScaleContext *s = ctx->priv; - CudaFunctions *cu = s->hwctx->internal->cuda_dl; - CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; - CUtexObject tex = 0; -- void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, -- &src_width, &src_height, &bit_depth, &s->param }; -+ void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &s->ditherTex }; - int ret; - - CUDA_TEXTURE_DESC tex_desc = { -- .filterMode = s->interp_use_linear ? -- CU_TR_FILTER_MODE_LINEAR : -- CU_TR_FILTER_MODE_POINT, -- .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0, -+ .filterMode = CU_TR_FILTER_MODE_LINEAR, -+ .flags = CU_TRSF_READ_AS_INTEGER, - }; - - CUDA_RESOURCE_DESC res_desc = { -@@ -425,10 +506,6 @@ static int call_resize_kernel(AVFilterCo - .res.pitch2D.devPtr = (CUdeviceptr)src_dptr, - }; - -- // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch -- // Same for pixel_size, which is represented via datatypes on the cuda side of things. -- dst_pitch /= channels * pixel_size; -- - ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL)); - if (ret < 0) - goto exit; -@@ -447,91 +524,37 @@ exit: - static int scalecuda_resize(AVFilterContext *ctx, - AVFrame *out, AVFrame *in) - { -- AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data; - CUDAScaleContext *s = ctx->priv; - -- switch (in_frames_ctx->sw_format) { -- case AV_PIX_FMT_YUV420P: -- call_resize_kernel(ctx, s->cu_func_uchar, 1, -- in->data[0], in->width, in->height, in->linesize[0], -- out->data[0], out->width, out->height, out->linesize[0], -- 1, 8); -- call_resize_kernel(ctx, s->cu_func_uchar, 1, -- in->data[1], in->width / 2, in->height / 2, in->linesize[1], -- out->data[1], out->width / 2, out->height / 2, out->linesize[1], -- 1, 8); -- call_resize_kernel(ctx, s->cu_func_uchar, 1, -- in->data[2], in->width / 2, in->height / 2, in->linesize[2], -- out->data[2], out->width / 2, out->height / 2, out->linesize[2], -- 1, 8); -- break; -- case AV_PIX_FMT_YUV444P: -- call_resize_kernel(ctx, s->cu_func_uchar, 1, -- in->data[0], in->width, in->height, in->linesize[0], -- out->data[0], out->width, out->height, out->linesize[0], -- 1, 8); -- call_resize_kernel(ctx, s->cu_func_uchar, 1, -- in->data[1], in->width, in->height, in->linesize[1], -- out->data[1], out->width, out->height, out->linesize[1], -- 1, 8); -- call_resize_kernel(ctx, s->cu_func_uchar, 1, -- in->data[2], in->width, in->height, in->linesize[2], -- out->data[2], out->width, out->height, out->linesize[2], -- 1, 8); -- break; -- case AV_PIX_FMT_YUV444P16: -- call_resize_kernel(ctx, s->cu_func_ushort, 1, -- in->data[0], in->width, in->height, in->linesize[0], -- out->data[0], out->width, out->height, out->linesize[0], -- 2, 16); -- call_resize_kernel(ctx, s->cu_func_ushort, 1, -- in->data[1], in->width, in->height, in->linesize[1], -- out->data[1], out->width, out->height, out->linesize[1], -- 2, 16); -- call_resize_kernel(ctx, s->cu_func_ushort, 1, -- in->data[2], in->width, in->height, in->linesize[2], -- out->data[2], out->width, out->height, out->linesize[2], -- 2, 16); -- break; -- case AV_PIX_FMT_NV12: -- call_resize_kernel(ctx, s->cu_func_uchar, 1, -- in->data[0], in->width, in->height, in->linesize[0], -- out->data[0], out->width, out->height, out->linesize[0], -- 1, 8); -- call_resize_kernel(ctx, s->cu_func_uchar2, 2, -- in->data[1], in->width / 2, in->height / 2, in->linesize[1], -- out->data[1], out->width / 2, out->height / 2, out->linesize[1], -- 1, 8); -- break; -- case AV_PIX_FMT_P010LE: -- call_resize_kernel(ctx, s->cu_func_ushort, 1, -- in->data[0], in->width, in->height, in->linesize[0], -- out->data[0], out->width, out->height, out->linesize[0], -- 2, 10); -- call_resize_kernel(ctx, s->cu_func_ushort2, 2, -- in->data[1], in->width / 2, in->height / 2, in->linesize[1], -- out->data[1], out->width / 2, out->height / 2, out->linesize[1], -- 2, 10); -- break; -- case AV_PIX_FMT_P016LE: -- call_resize_kernel(ctx, s->cu_func_ushort, 1, -- in->data[0], in->width, in->height, in->linesize[0], -- out->data[0], out->width, out->height, out->linesize[0], -- 2, 16); -- call_resize_kernel(ctx, s->cu_func_ushort2, 2, -- in->data[1], in->width / 2, in->height / 2, in->linesize[1], -- out->data[1], out->width / 2, out->height / 2, out->linesize[1], -- 2, 16); -- break; -- case AV_PIX_FMT_0RGB32: -- case AV_PIX_FMT_0BGR32: -- call_resize_kernel(ctx, s->cu_func_uchar4, 4, -- in->data[0], in->width, in->height, in->linesize[0], -- out->data[0], out->width, out->height, out->linesize[0], -- 1, 8); -- break; -- default: -- return AVERROR_BUG; -+#define DEPTH_BYTES(depth) (((depth) + 7) / 8) -+ -+ call_resize_kernel(ctx, s->cu_func_luma, 1, -+ in->data[0], in->width, in->height, in->linesize[0], -+ out->data[0], out->width, out->height, out->linesize[0], -+ DEPTH_BYTES(s->in_desc->comp[0].depth)); -+ -+ call_resize_kernel(ctx, s->cu_func_chroma_u, s->in_planes == 2 ? 2 : 1, -+ in->data[1], -+ AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), -+ AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), -+ in->linesize[1], -+ out->data[1], -+ AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), -+ AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), -+ out->linesize[1], -+ DEPTH_BYTES(s->in_desc->comp[1].depth)); -+ -+ if (s->cu_func_chroma_v) { -+ call_resize_kernel(ctx, s->cu_func_chroma_v, s->in_planes == 2 ? 2 : 1, -+ in->data[s->in_desc->comp[2].plane], -+ AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), -+ AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), -+ in->linesize[s->in_desc->comp[2].plane], -+ out->data[s->out_desc->comp[2].plane] + s->out_desc->comp[2].offset, -+ AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), -+ AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), -+ out->linesize[s->out_desc->comp[2].plane], -+ DEPTH_BYTES(s->in_desc->comp[2].depth)); - } - - return 0; -@@ -621,20 +644,15 @@ static AVFrame *cudascale_get_video_buff - #define OFFSET(x) offsetof(CUDAScaleContext, x) - #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) - static const AVOption options[] = { -- { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, -- { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, -- { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" }, -- { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" }, -- { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" }, -- { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" }, -- { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" }, -+ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, -+ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, -+ { "format", "Output format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, - { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, -- { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS }, -- { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" }, -- { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" }, -- { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" }, -- { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" }, -- { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS }, -+ { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" }, -+ { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" }, -+ { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" }, -+ { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" }, -+ { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, - { NULL }, - }; - -Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.cu -+++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu -@@ -20,35 +20,115 @@ - * DEALINGS IN THE SOFTWARE. - */ - --#include "cuda/vector_helpers.cuh" -+typedef unsigned char uchar; -+typedef unsigned short ushort; - --template --__device__ inline void Subsample_Nearest(cudaTextureObject_t tex, -- T *dst, -- int dst_width, int dst_height, int dst_pitch, -- int src_width, int src_height, -- int bit_depth) -+#define SHIFTDOWN(val) (dstbase)(val >> abs(2 + shift)) -+#define SHIFTUP(val) (dstbase)(val << abs(-shift - 2)) -+ -+template struct add_conv_shift1_d - { -- int xo = blockIdx.x * blockDim.x + threadIdx.x; -- int yo = blockIdx.y * blockDim.y + threadIdx.y; -+ typedef DST dstbase; - -- if (yo < dst_height && xo < dst_width) -+ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) - { -- float hscale = (float)src_width / (float)dst_width; -- float vscale = (float)src_height / (float)dst_height; -- float xi = (xo + 0.5f) * hscale; -- float yi = (yo + 0.5f) * vscale; -+ unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + ((1 + d) >> (sizeof(SRC) * 8 - dither + 3)); - -- dst[yo*dst_pitch+xo] = tex2D(tex, xi, yi); -+ if (shift > -2) -+ return SHIFTDOWN(ret); -+ else -+ return SHIFTUP(ret); - } --} -+}; -+ -+template struct add_conv_shift1 -+{ -+ typedef DST dstbase; - --template --__device__ inline void Subsample_Bilinear(cudaTextureObject_t tex, -- T *dst, -- int dst_width, int dst_height, int dst_pitch, -- int src_width, int src_height, -- int bit_depth) -+ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) -+ { -+ unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + 2; -+ -+ if (shift > -2) -+ return SHIFTDOWN(ret); -+ else -+ return SHIFTUP(ret); -+ } -+}; -+ -+template struct add_conv_shift2 -+{ -+ typedef decltype(DST::x) dstbase; -+ -+ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) -+ { -+ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; -+ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; -+ -+ if (shift > -2) -+ return { SHIFTDOWN(retx), SHIFTDOWN(rety) }; -+ else -+ return { SHIFTUP(retx), SHIFTUP(rety) }; -+ } -+}; -+ -+template struct add_conv_shift2_x -+{ -+ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) -+ { -+ return add_conv_shift1()(i1.x, i2.x, i3.x, i4.x, d); -+ } -+}; -+ -+template struct add_conv_shift2_y -+{ -+ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) -+ { -+ return add_conv_shift1()(i1.y, i2.y, i3.y, i4.y, d); -+ } -+}; -+ -+template struct add_conv_shift3 -+{ -+ typedef decltype(DST::x) dstbase; -+ -+ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) -+ { -+ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; -+ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; -+ unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2; -+ -+ if (shift > -2) -+ return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz) }; -+ else -+ return { SHIFTUP(retx), SHIFTUP(rety), SHIFTUP(retz) }; -+ } -+}; -+ -+template struct add_conv_shift4 -+{ -+ typedef decltype(DST::x) dstbase; -+ -+ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) -+ { -+ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; -+ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; -+ unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2; -+ unsigned retw = (unsigned)i1.w + (unsigned)i2.w + (unsigned)i3.w + (unsigned)i4.w + 2; -+ -+ if (shift > -2) -+ return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz), SHIFTDOWN(retw) }; -+ else -+ return { SHIFTUP(retx), SHIFTUP(rety), SHIFTUP(retz), SHIFTUP(retw) }; -+ } -+}; -+ -+template class conv, int pitch, int shift, int dither> -+__inline__ __device__ void Subsample_Bilinear(cudaTextureObject_t tex, -+ DST *dst, -+ int dst_width, int dst_height, int dst_pitch, -+ int src_width, int src_height, -+ cudaTextureObject_t ditherTex) - { - int xo = blockIdx.x * blockDim.x + threadIdx.x; - int yo = blockIdx.y * blockDim.y + threadIdx.y; -@@ -66,58 +146,48 @@ __device__ inline void Subsample_Bilinea - float dx = wh / (0.5f + wh); - float dy = wv / (0.5f + wv); - -- intT r = { 0 }; -- vec_set_scalar(r, 2); -- r += tex2D(tex, xi - dx, yi - dy); -- r += tex2D(tex, xi + dx, yi - dy); -- r += tex2D(tex, xi - dx, yi + dy); -- r += tex2D(tex, xi + dx, yi + dy); -- vec_set(dst[yo*dst_pitch+xo], r >> 2); -+ SRC i0 = tex2D(tex, xi-dx, yi-dy); -+ SRC i1 = tex2D(tex, xi+dx, yi-dy); -+ SRC i2 = tex2D(tex, xi-dx, yi+dy); -+ SRC i3 = tex2D(tex, xi+dx, yi+dy); -+ -+ ushort ditherVal = dither ? tex2D(ditherTex, xo, yo) : 0; -+ -+ dst[yo*(dst_pitch / sizeof(DST))+xo*pitch] = conv()(i0, i1, i2, i3, ditherVal); - } - } - - extern "C" { - --#define NEAREST_KERNEL(T) \ -- __global__ void Subsample_Nearest_ ## T(cudaTextureObject_t src_tex, \ -- T *dst, \ -- int dst_width, int dst_height, int dst_pitch, \ -- int src_width, int src_height, \ -- int bit_depth) \ -- { \ -- Subsample_Nearest(src_tex, dst, \ -- dst_width, dst_height, dst_pitch, \ -- src_width, src_height, \ -- bit_depth); \ -- } -- --NEAREST_KERNEL(uchar) --NEAREST_KERNEL(uchar2) --NEAREST_KERNEL(uchar4) -- --NEAREST_KERNEL(ushort) --NEAREST_KERNEL(ushort2) --NEAREST_KERNEL(ushort4) -- --#define BILINEAR_KERNEL(T) \ -- __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex, \ -- T *dst, \ -- int dst_width, int dst_height, int dst_pitch, \ -- int src_width, int src_height, \ -- int bit_depth) \ -- { \ -- Subsample_Bilinear(src_tex, dst, \ -- dst_width, dst_height, dst_pitch, \ -- src_width, src_height, \ -- bit_depth); \ -- } -- --BILINEAR_KERNEL(uchar) --BILINEAR_KERNEL(uchar2) --BILINEAR_KERNEL(uchar4) -- --BILINEAR_KERNEL(ushort) --BILINEAR_KERNEL(ushort2) --BILINEAR_KERNEL(ushort4) -+#define VARIANT(SRC, DST, CONV, SHIFT, PITCH, DITHER, NAME) \ -+__global__ void Subsample_Bilinear_ ## NAME(cudaTextureObject_t tex, \ -+ DST *dst, \ -+ int dst_width, int dst_height, int dst_pitch, \ -+ int src_width, int src_height, \ -+ cudaTextureObject_t ditherTex) \ -+{ \ -+ Subsample_Bilinear(tex, dst, dst_width, dst_height, dst_pitch, \ -+ src_width, src_height, ditherTex); \ -+} -+ -+#define VARIANTSET2(SRC, DST, SHIFT, NAME) \ -+ VARIANT(SRC, DST, add_conv_shift1_d, SHIFT, 1, (sizeof(DST) < sizeof(SRC)) ? sizeof(DST) : 0, NAME) \ -+ VARIANT(SRC, DST, add_conv_shift1, SHIFT, 1, 0, NAME ## _c) \ -+ VARIANT(SRC, DST, add_conv_shift1, SHIFT, 2, 0, NAME ## _p2) \ -+ VARIANT(SRC ## 2, DST ## 2, add_conv_shift2, SHIFT, 1, 0, NAME ## _2) \ -+ VARIANT(SRC ## 2, DST, add_conv_shift2_x, SHIFT, 1, 0, NAME ## _2_u) \ -+ VARIANT(SRC ## 2, DST, add_conv_shift2_y, SHIFT, 1, 0, NAME ## _2_v) \ -+ VARIANT(SRC ## 4, DST ## 4, add_conv_shift4, SHIFT, 1, 0, NAME ## _4) -+ -+#define VARIANTSET(SRC, DST, SRCSIZE, DSTSIZE) \ -+ VARIANTSET2(SRC, DST, (SRCSIZE - DSTSIZE), SRCSIZE ## _ ## DSTSIZE) -+ -+// Straight no-conversion -+VARIANTSET(uchar, uchar, 8, 8) -+VARIANTSET(ushort, ushort, 16, 16) -+ -+// Conversion between 8- and 16-bit -+VARIANTSET(uchar, ushort, 8, 16) -+VARIANTSET(ushort, uchar, 16, 8) - - } -Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.h -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.h -+++ /dev/null -@@ -1,28 +0,0 @@ --/* -- * This file is part of FFmpeg. -- * -- * Permission is hereby granted, free of charge, to any person obtaining a -- * copy of this software and associated documentation files (the "Software"), -- * to deal in the Software without restriction, including without limitation -- * the rights to use, copy, modify, merge, publish, distribute, sublicense, -- * and/or sell copies of the Software, and to permit persons to whom the -- * Software is furnished to do so, subject to the following conditions: -- * -- * The above copyright notice and this permission notice shall be included in -- * all copies or substantial portions of the Software. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -- * DEALINGS IN THE SOFTWARE. -- */ -- --#ifndef AVFILTER_SCALE_CUDA_H --#define AVFILTER_SCALE_CUDA_H -- --#define SCALE_CUDA_PARAM_DEFAULT 999999.0f -- --#endif -Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda_bicubic.cu -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda_bicubic.cu -+++ /dev/null -@@ -1,224 +0,0 @@ --/* -- * This file is part of FFmpeg. -- * -- * Permission is hereby granted, free of charge, to any person obtaining a -- * copy of this software and associated documentation files (the "Software"), -- * to deal in the Software without restriction, including without limitation -- * the rights to use, copy, modify, merge, publish, distribute, sublicense, -- * and/or sell copies of the Software, and to permit persons to whom the -- * Software is furnished to do so, subject to the following conditions: -- * -- * The above copyright notice and this permission notice shall be included in -- * all copies or substantial portions of the Software. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -- * DEALINGS IN THE SOFTWARE. -- */ -- --#include "cuda/vector_helpers.cuh" --#include "vf_scale_cuda.h" -- --typedef float4 (*coeffs_function_t)(float, float); -- --__device__ inline float4 lanczos_coeffs(float x, float param) --{ -- const float pi = 3.141592654f; -- -- float4 res = make_float4( -- pi * (x + 1), -- pi * x, -- pi * (x - 1), -- pi * (x - 2)); -- -- res.x = res.x == 0.0f ? 1.0f : -- __sinf(res.x) * __sinf(res.x / 2.0f) / (res.x * res.x / 2.0f); -- res.y = res.y == 0.0f ? 1.0f : -- __sinf(res.y) * __sinf(res.y / 2.0f) / (res.y * res.y / 2.0f); -- res.z = res.z == 0.0f ? 1.0f : -- __sinf(res.z) * __sinf(res.z / 2.0f) / (res.z * res.z / 2.0f); -- res.w = res.w == 0.0f ? 1.0f : -- __sinf(res.w) * __sinf(res.w / 2.0f) / (res.w * res.w / 2.0f); -- -- return res / (res.x + res.y + res.z + res.w); --} -- --__device__ inline float4 bicubic_coeffs(float x, float param) --{ -- const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param; -- -- float4 res; -- res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A; -- res.y = ((A + 2) * x - (A + 3)) * x * x + 1; -- res.z = ((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1; -- res.w = 1.0f - res.x - res.y - res.z; -- -- return res; --} -- --__device__ inline void derived_fast_coeffs(float4 coeffs, float x, float *h0, float *h1, float *s) --{ -- float g0 = coeffs.x + coeffs.y; -- float g1 = coeffs.z + coeffs.w; -- -- *h0 = coeffs.y / g0 - 0.5f; -- *h1 = coeffs.w / g1 + 1.5f; -- *s = g0 / (g0 + g1); --} -- --template --__device__ inline V apply_coeffs(float4 coeffs, V c0, V c1, V c2, V c3) --{ -- V res = c0 * coeffs.x; -- res += c1 * coeffs.y; -- res += c2 * coeffs.z; -- res += c3 * coeffs.w; -- -- return res; --} -- --template --__device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function, -- cudaTextureObject_t src_tex, -- T *dst, -- int dst_width, int dst_height, int dst_pitch, -- int src_width, int src_height, -- int bit_depth, float param) --{ -- int xo = blockIdx.x * blockDim.x + threadIdx.x; -- int yo = blockIdx.y * blockDim.y + threadIdx.y; -- -- if (yo < dst_height && xo < dst_width) -- { -- float hscale = (float)src_width / (float)dst_width; -- float vscale = (float)src_height / (float)dst_height; -- float xi = (xo + 0.5f) * hscale - 0.5f; -- float yi = (yo + 0.5f) * vscale - 0.5f; -- float px = floor(xi); -- float py = floor(yi); -- float fx = xi - px; -- float fy = yi - py; -- -- float factor = bit_depth > 8 ? 0xFFFF : 0xFF; -- -- float4 coeffsX = coeffs_function(fx, param); -- float4 coeffsY = coeffs_function(fy, param); -- --#define PIX(x, y) tex2D(src_tex, (x), (y)) -- -- dst[yo * dst_pitch + xo] = from_floatN( -- apply_coeffs(coeffsY, -- apply_coeffs(coeffsX, PIX(px - 1, py - 1), PIX(px, py - 1), PIX(px + 1, py - 1), PIX(px + 2, py - 1)), -- apply_coeffs(coeffsX, PIX(px - 1, py ), PIX(px, py ), PIX(px + 1, py ), PIX(px + 2, py )), -- apply_coeffs(coeffsX, PIX(px - 1, py + 1), PIX(px, py + 1), PIX(px + 1, py + 1), PIX(px + 2, py + 1)), -- apply_coeffs(coeffsX, PIX(px - 1, py + 2), PIX(px, py + 2), PIX(px + 1, py + 2), PIX(px + 2, py + 2)) -- ) * factor -- ); -- --#undef PIX -- } --} -- --/* This does not yield correct results. Most likely because of low internal precision in tex2D linear interpolation */ --template --__device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function, -- cudaTextureObject_t src_tex, -- T *dst, -- int dst_width, int dst_height, int dst_pitch, -- int src_width, int src_height, -- int bit_depth, float param) --{ -- int xo = blockIdx.x * blockDim.x + threadIdx.x; -- int yo = blockIdx.y * blockDim.y + threadIdx.y; -- -- if (yo < dst_height && xo < dst_width) -- { -- float hscale = (float)src_width / (float)dst_width; -- float vscale = (float)src_height / (float)dst_height; -- float xi = (xo + 0.5f) * hscale - 0.5f; -- float yi = (yo + 0.5f) * vscale - 0.5f; -- float px = floor(xi); -- float py = floor(yi); -- float fx = xi - px; -- float fy = yi - py; -- -- float factor = bit_depth > 8 ? 0xFFFF : 0xFF; -- -- float4 coeffsX = coeffs_function(fx, param); -- float4 coeffsY = coeffs_function(fy, param); -- -- float h0x, h1x, sx; -- float h0y, h1y, sy; -- derived_fast_coeffs(coeffsX, fx, &h0x, &h1x, &sx); -- derived_fast_coeffs(coeffsY, fy, &h0y, &h1y, &sy); -- --#define PIX(x, y) tex2D(src_tex, (x), (y)) -- -- floatT pix[4] = { -- PIX(px + h0x, py + h0y), -- PIX(px + h1x, py + h0y), -- PIX(px + h0x, py + h1y), -- PIX(px + h1x, py + h1y) -- }; -- --#undef PIX -- -- dst[yo * dst_pitch + xo] = from_floatN( -- lerp_scalar( -- lerp_scalar(pix[3], pix[2], sx), -- lerp_scalar(pix[1], pix[0], sx), -- sy) * factor -- ); -- } --} -- --extern "C" { -- --#define BICUBIC_KERNEL(T) \ -- __global__ void Subsample_Bicubic_ ## T(cudaTextureObject_t src_tex, \ -- T *dst, \ -- int dst_width, int dst_height, int dst_pitch, \ -- int src_width, int src_height, \ -- int bit_depth, float param) \ -- { \ -- Subsample_Bicubic(&bicubic_coeffs, src_tex, dst, \ -- dst_width, dst_height, dst_pitch, \ -- src_width, src_height, \ -- bit_depth, param); \ -- } -- --BICUBIC_KERNEL(uchar) --BICUBIC_KERNEL(uchar2) --BICUBIC_KERNEL(uchar4) -- --BICUBIC_KERNEL(ushort) --BICUBIC_KERNEL(ushort2) --BICUBIC_KERNEL(ushort4) -- -- --#define LANCZOS_KERNEL(T) \ -- __global__ void Subsample_Lanczos_ ## T(cudaTextureObject_t src_tex, \ -- T *dst, \ -- int dst_width, int dst_height, int dst_pitch, \ -- int src_width, int src_height, \ -- int bit_depth, float param) \ -- { \ -- Subsample_Bicubic(&lanczos_coeffs, src_tex, dst, \ -- dst_width, dst_height, dst_pitch, \ -- src_width, src_height, \ -- bit_depth, param); \ -- } -- --LANCZOS_KERNEL(uchar) --LANCZOS_KERNEL(uchar2) --LANCZOS_KERNEL(uchar4) -- --LANCZOS_KERNEL(ushort) --LANCZOS_KERNEL(ushort2) --LANCZOS_KERNEL(ushort4) -- --} diff --git a/debian/patches/0005-cuda-tonemap-impl.patch b/debian/patches/0005-cuda-tonemap-impl.patch deleted file mode 100644 index 4a4c483206b..00000000000 --- a/debian/patches/0005-cuda-tonemap-impl.patch +++ /dev/null @@ -1,1824 +0,0 @@ -Index: jellyfin-ffmpeg/compat/cuda/cuda_runtime.h -=================================================================== ---- jellyfin-ffmpeg.orig/compat/cuda/cuda_runtime.h -+++ jellyfin-ffmpeg/compat/cuda/cuda_runtime.h -@@ -33,55 +33,69 @@ - #define max(a, b) ((a) > (b) ? (a) : (b)) - #define min(a, b) ((a) < (b) ? (a) : (b)) - #define abs(x) ((x) < 0 ? -(x) : (x)) -+#define clamp(a, b, c) min(max((a), (b)), (c)) - - #define atomicAdd(a, b) (__atomic_fetch_add(a, b, __ATOMIC_SEQ_CST)) - - // Basic typedefs - typedef __device_builtin__ unsigned long long cudaTextureObject_t; - --typedef struct __device_builtin__ __align__(2) uchar2 --{ -- unsigned char x, y; --} uchar2; -- --typedef struct __device_builtin__ __align__(4) ushort2 --{ -- unsigned short x, y; --} ushort2; -- --typedef struct __device_builtin__ uint3 --{ -- unsigned int x, y, z; --} uint3; -- --typedef struct uint3 dim3; -- --typedef struct __device_builtin__ __align__(8) int2 --{ -- int x, y; --} int2; -+#define MAKE_VECTORS(type, base) \ -+typedef struct __device_builtin__ type##1 { \ -+ base x; \ -+} type##1; \ -+static __inline__ __device__ type##1 make_##type##1(base x) { \ -+ type##1 ret; \ -+ ret.x = x; \ -+ return ret; \ -+} \ -+typedef struct __device_builtin__ __align__(sizeof(base) * 2) type##2 { \ -+ base x, y; \ -+} type##2; \ -+static __inline__ __device__ type##2 make_##type##2(base x, base y) { \ -+ type##2 ret; \ -+ ret.x = x; \ -+ ret.y = y; \ -+ return ret; \ -+} \ -+typedef struct __device_builtin__ type##3 { \ -+ base x, y, z; \ -+} type##3; \ -+static __inline__ __device__ type##3 make_##type##3(base x, base y, base z) { \ -+ type##3 ret; \ -+ ret.x = x; \ -+ ret.y = y; \ -+ ret.z = z; \ -+ return ret; \ -+} \ -+typedef struct __device_builtin__ __align__(sizeof(base) * 4) type##4 { \ -+ base x, y, z, w; \ -+} type##4; \ -+static __inline__ __device__ type##4 make_##type##4(base x, base y, base z, base w) { \ -+ type##4 ret; \ -+ ret.x = x; \ -+ ret.y = y; \ -+ ret.z = z; \ -+ ret.w = w; \ -+ return ret; \ -+} - --typedef struct __device_builtin__ __align__(4) uchar4 --{ -- unsigned char x, y, z, w; --} uchar4; -+#define MAKE_TYPE - --typedef struct __device_builtin__ __align__(8) ushort4 --{ -- unsigned short x, y, z, w; --} ushort4; -+MAKE_VECTORS(uchar, unsigned char) -+MAKE_VECTORS(ushort, unsigned short) -+MAKE_VECTORS(int, int) -+MAKE_VECTORS(uint, unsigned int) -+MAKE_VECTORS(float, float) - --typedef struct __device_builtin__ __align__(16) int4 --{ -- int x, y, z, w; --} int4; -+typedef struct __device_builtin__ uint3 dim3; - - // Accessors for special registers - #define GETCOMP(reg, comp) \ - asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \ - ret.comp = tmp; - --#define GET(name, reg) static inline __device__ uint3 name() {\ -+#define GET(name, reg) static __inline__ __device__ uint3 name() {\ - uint3 ret; \ - unsigned tmp; \ - GETCOMP(reg, x) \ -@@ -99,14 +113,8 @@ GET(getThreadIdx, tid) - #define blockDim (getBlockDim()) - #define threadIdx (getThreadIdx()) - --// Basic initializers (simple macros rather than inline functions) --#define make_uchar2(a, b) ((uchar2){.x = a, .y = b}) --#define make_ushort2(a, b) ((ushort2){.x = a, .y = b}) --#define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d}) --#define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d}) -- - // Conversions from the tex instruction's 4-register output to various types --#define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} -+#define TEX2D(type, ret) static __inline__ __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} - - TEX2D(unsigned char, a & 0xFF) - TEX2D(unsigned short, a & 0xFFFF) -@@ -117,15 +125,47 @@ TEX2D(ushort4, make_ushort4((unsigned sh - - // Template calling tex instruction and converting the output to the selected type - template --static inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) -+static __inline__ __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) -+{ -+ T ret; -+ unsigned ret1, ret2, ret3, ret4; -+ asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : -+ "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) : -+ "l"(texObject), "f"(x), "f"(y)); -+ conv(&ret, ret1, ret2, ret3, ret4); -+ return ret; -+} -+ -+static __inline__ __device__ float __exp2f(float x) -+{ -+ float ret; -+ asm("ex2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x)); -+ return ret; -+} -+ -+#define __expf(x) (__exp2f((x) * 1.4427f)) -+ -+static __inline__ __device__ float __log2f(float x) - { -- T ret; -- unsigned ret1, ret2, ret3, ret4; -- asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : -- "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) : -- "l"(texObject), "f"(x), "f"(y)); -- conv(&ret, ret1, ret2, ret3, ret4); -- return ret; -+ float ret; -+ asm("lg2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x)); -+ return ret; - } - --#endif /* COMPAT_CUDA_CUDA_RUNTIME_H */ -+#define __logf(x) (__log2f((x)) * 0.693147f) -+#define __log10f(x) (__log2f((x)) * 0.30103f) -+ -+static __inline__ __device__ float __powf(float x, float y) -+{ -+ return __exp2f(y * __log2f(x)); -+} -+ -+static __inline__ __device__ float __sqrtf(float x) -+{ -+ float ret; -+ asm("sqrtf.approx.f32 %0, %1;" : "=f"(ret) : "f"(x)); -+ return ret; -+} -+ -+#endif -+ -Index: jellyfin-ffmpeg/configure -=================================================================== ---- jellyfin-ffmpeg.orig/configure -+++ jellyfin-ffmpeg/configure -@@ -3057,6 +3057,8 @@ scale_cuda_filter_deps="ffnvcodec" - scale_cuda_filter_deps_any="cuda_nvcc cuda_llvm" - thumbnail_cuda_filter_deps="ffnvcodec" - thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" -+tonemap_cuda_filter_deps="ffnvcodec const_nan" -+tonemap_cuda_filter_deps_any="cuda_nvcc cuda_llvm" - transpose_npp_filter_deps="ffnvcodec libnpp" - overlay_cuda_filter_deps="ffnvcodec" - overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" -Index: jellyfin-ffmpeg/ffbuild/common.mak -=================================================================== ---- jellyfin-ffmpeg.orig/ffbuild/common.mak -+++ jellyfin-ffmpeg/ffbuild/common.mak -@@ -38,6 +38,7 @@ OBJCCFLAGS = $(CPPFLAGS) $(CFLAGS) $(OB - ASFLAGS := $(CPPFLAGS) $(ASFLAGS) - CXXFLAGS := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) - X86ASMFLAGS += $(IFLAGS:%=%/) -I$( 0.0f) { -+ float xpow = __powf(x, 1.0f / ST2084_M2); -+ float num = max(xpow - ST2084_C1, 0.0f); -+ float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); -+ x = __powf(num / den, 1.0f / ST2084_M1); -+ return x * ST2084_MAX_LUMINANCE / REFERENCE_WHITE; -+ } else { -+ return 0.0f; -+ } -+} -+ -+// delinearizer for PQ/ST2084 -+static __inline__ __device__ float inverse_eotf_st2084(float x) { -+ if (x > 0.0f) { -+ x *= REFERENCE_WHITE / ST2084_MAX_LUMINANCE; -+ float xpow = __powf(x, ST2084_M1); -+#if 0 -+ // Original formulation from SMPTE ST 2084:2014 publication. -+ float num = ST2084_C1 + ST2084_C2 * xpow; -+ float den = 1.0f + ST2084_C3 * xpow; -+ return __powf(num / den, ST2084_M2); -+#else -+ // More stable arrangement that avoids some cancellation error. -+ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; -+ float den = 1.0f + ST2084_C3 * xpow; -+ return __powf(1.0f + num / den, ST2084_M2); -+#endif -+ } else { -+ return 0.0f; -+ } -+} -+ -+static __inline__ __device__ float ootf_1_2(float x) { -+ return x < 0.0f ? x : __powf(x, 1.2f); -+} -+ -+static __inline__ __device__ float inverse_ootf_1_2(float x) { -+ return x < 0.0f ? x : __powf(x, 1.0f / 1.2f); -+} -+ -+static __inline__ __device__ float oetf_arib_b67(float x) { -+ x = max(x, 0.0f); -+ return x <= (1.0f / 12.0f) -+ ? __sqrtf(3.0f * x) -+ : (ARIB_B67_A * __logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C); -+} -+ -+static __inline__ __device__ float inverse_oetf_arib_b67(float x) { -+ x = max(x, 0.0f); -+ return x <= 0.5f -+ ? (x * x) * (1.0f / 3.0f) -+ : (__expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); -+} -+ -+// linearizer for HLG/ARIB-B67 -+static __inline__ __device__ float eotf_arib_b67(float x) { -+ return ootf_1_2(inverse_oetf_arib_b67(x)); -+} -+ -+// delinearizer for HLG/ARIB-B67 -+static __inline__ __device__ float inverse_eotf_arib_b67(float x) { -+ return oetf_arib_b67(inverse_ootf_1_2(x)); -+} -+ -+static __inline__ __device__ float inverse_eotf_bt1886(float x) { -+ return x < 0.0f ? 0.0f : __powf(x, 1.0f / 2.4f); -+} -+ -+static __inline__ __device__ float oetf_bt709(float x) { -+ x = max(0.0f, x); -+ return x < BT709_BETA -+ ? (x * 4.5f) -+ : (BT709_ALPHA * __powf(x, 0.45f) - (BT709_ALPHA - 1.0f)); -+} -+ -+static __inline__ __device__ float inverse_oetf_bt709(float x) { -+ return x < (4.5f * BT709_BETA) -+ ? (x / 4.5f) -+ : (__powf((x + (BT709_ALPHA - 1.0f)) / BT709_ALPHA, 1.0f / 0.45f)); -+} -+ -+static __inline__ __device__ float linearize(float x) -+{ -+ if (trc_src == AVCOL_TRC_SMPTE2084) -+ return eotf_st2084(x); -+ else if (trc_src == AVCOL_TRC_ARIB_STD_B67) -+ return eotf_arib_b67(x); -+ else -+ return x; -+} -+ -+static __inline__ __device__ float delinearize(float x) -+{ -+ if (trc_dst == AVCOL_TRC_BT709 || trc_dst == AVCOL_TRC_BT2020_10) -+ return inverse_eotf_bt1886(x); -+ else -+ return x; -+} -+ -+static __inline__ __device__ float3 yuv2rgb(float y, float u, float v) { -+ if (range_src == AVCOL_RANGE_JPEG) { -+ u -= 0.5f; v -= 0.5f; -+ } else { -+ y = (y * 255.0f - 16.0f) / 219.0f; -+ u = (u * 255.0f - 128.0f) / 224.0f; -+ v = (v * 255.0f - 128.0f) / 224.0f; -+ } -+ float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; -+ float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; -+ float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; -+ return make_float3(r, g, b); -+} -+ -+static __inline__ __device__ float3 yuv2lrgb(float3 yuv) { -+ float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z); -+ return make_float3(linearize(rgb.x), -+ linearize(rgb.y), -+ linearize(rgb.z)); -+} -+ -+static __inline__ __device__ float3 rgb2yuv(float r, float g, float b) { -+ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; -+ float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; -+ float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; -+ if (range_dst == AVCOL_RANGE_JPEG) { -+ u += 0.5f; v += 0.5f; -+ } else { -+ y = (219.0f * y + 16.0f) / 255.0f; -+ u = (224.0f * u + 128.0f) / 255.0f; -+ v = (224.0f * v + 128.0f) / 255.0f; -+ } -+ return make_float3(y, u, v); -+} -+ -+static __inline__ __device__ float rgb2y(float r, float g, float b) { -+ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; -+ if (range_dst != AVCOL_RANGE_JPEG) -+ y = (219.0f * y + 16.0f) / 255.0f; -+ return y; -+} -+ -+static __inline__ __device__ float3 lrgb2yuv(float3 c) { -+ float r = delinearize(c.x); -+ float g = delinearize(c.y); -+ float b = delinearize(c.z); -+ return rgb2yuv(r, g, b); -+} -+ -+static __inline__ __device__ float3 lrgb2lrgb(float3 c) { -+ if (rgb2rgb_passthrough) { -+ return c; -+ } else { -+ float r = c.x, g = c.y, b = c.z; -+ float rr = rgb2rgb_matrix[0] * r + rgb2rgb_matrix[1] * g + rgb2rgb_matrix[2] * b; -+ float gg = rgb2rgb_matrix[3] * r + rgb2rgb_matrix[4] * g + rgb2rgb_matrix[5] * b; -+ float bb = rgb2rgb_matrix[6] * r + rgb2rgb_matrix[7] * g + rgb2rgb_matrix[8] * b; -+ return make_float3(rr, gg, bb); -+ } -+} -+ -+#endif /* AVFILTER_CUDA_COLORSPACE_COMMON_H */ -Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.c -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/cuda/host_util.c -@@ -0,0 +1,35 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavfilter/colorspace.h" -+#include "host_util.h" -+ -+int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src) -+{ -+ int i = 0; -+ for (i = 0; i < 4; i++) { -+ dst->data[i] = src->data[i]; -+ dst->linesize[i] = src->linesize[i]; -+ } -+ -+ dst->width = src->width; -+ dst->height = src->height; -+ -+ return 0; -+} -+ -Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.h -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/cuda/host_util.h -@@ -0,0 +1,29 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#ifndef AVFILTER_CUDA_HOST_UTIL_H -+#define AVFILTER_CUDA_HOST_UTIL_H -+ -+#include "libavutil/frame.h" -+ -+#include "shared.h" -+ -+int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src); -+ -+#endif /* AVFILTER_CUDA_HOST_UTIL_H */ -+ -Index: jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h -@@ -0,0 +1,209 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#ifndef AVFILTER_CUDA_PIXFMT_H -+#define AVFILTER_CUDA_PIXFMT_H -+ -+#include "shared.h" -+ -+extern const enum AVPixelFormat fmt_src, fmt_dst; -+extern const int depth_src, depth_dst; -+ -+// Single-sample read function -+template -+static __inline__ __device__ T read_sample(const FFCUDAFrame& frame, int x, int y) -+{ -+ T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p])); -+ return ptr[x]; -+} -+ -+// Per-format read functions -+static __inline__ __device__ ushort3 read_p016(const FFCUDAFrame& frame, int x, int y) -+{ -+ return make_ushort3(read_sample(frame, x, y), -+ read_sample(frame, (x & ~1), y / 2), -+ read_sample(frame, (x & ~1) + 1, y / 2)); -+} -+ -+static __inline__ __device__ ushort3 read_p010(const FFCUDAFrame& frame, int x, int y) -+{ -+ ushort3 val = read_p016(frame, x, y); -+ return make_ushort3(val.x >> 6, -+ val.y >> 6, -+ val.z >> 6); -+} -+ -+static __inline__ __device__ ushort3 read_yuv420p16(const FFCUDAFrame& frame, int x, int y) -+{ -+ return make_ushort3(read_sample(frame, x, y), -+ read_sample(frame, x / 2, y / 2), -+ read_sample(frame, x / 2, y / 2)); -+} -+ -+static __inline__ __device__ ushort3 read_yuv420p10(const FFCUDAFrame& frame, int x, int y) -+{ -+ ushort3 val = read_yuv420p16(frame, x, y); -+ return make_ushort3(val.x >> 6, -+ val.y >> 6, -+ val.z >> 6); -+} -+ -+// Generic read functions -+static __inline__ __device__ ushort3 read_px(const FFCUDAFrame& frame, int x, int y) -+{ -+ if (fmt_src == AV_PIX_FMT_P016) -+ return read_p016(frame, x, y); -+ else if (fmt_src == AV_PIX_FMT_P010) -+ return read_p010(frame, x, y); -+ else -+ return make_ushort3(0, 0, 0); -+} -+ -+static __inline__ __device__ float sample_to_float(unsigned short i) -+{ -+ return (float)i / ((1 << depth_src) - 1); -+} -+ -+static __inline__ __device__ float3 pixel_to_float3(ushort3 flt) -+{ -+ return make_float3(sample_to_float(flt.x), -+ sample_to_float(flt.y), -+ sample_to_float(flt.z)); -+} -+ -+static __inline__ __device__ float3 read_px_flt(const FFCUDAFrame& frame, int x, int y) -+{ -+ return pixel_to_float3(read_px(frame, x, y)); -+} -+ -+// Single-sample write function -+template -+static __inline__ __device__ void write_sample(const FFCUDAFrame& frame, int x, int y, T sample) -+{ -+ T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p])); -+ ptr[x] = sample; -+} -+ -+// Per-format write functions -+static __inline__ __device__ void write_nv12_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) -+{ -+ write_sample<0>(frame, x, y, (unsigned char)a.x); -+ write_sample<0>(frame, x + 1, y, (unsigned char)b.x); -+ write_sample<0>(frame, x, y + 1, (unsigned char)c.x); -+ write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x); -+ -+ write_sample<1>(frame, (x & ~1), y / 2, (unsigned char)chroma.y); -+ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned char)chroma.z); -+} -+ -+static __inline__ __device__ void write_yuv420p_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) -+{ -+ write_sample<0>(frame, x, y, (unsigned char)a.x); -+ write_sample<0>(frame, x + 1, y, (unsigned char)b.x); -+ write_sample<0>(frame, x, y + 1, (unsigned char)c.x); -+ write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x); -+ -+ write_sample<1>(frame, x / 2, y / 2, (unsigned char)chroma.y); -+ write_sample<2>(frame, x / 2, y / 2, (unsigned char)chroma.z); -+} -+ -+static __inline__ __device__ void write_p016_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) -+{ -+ write_sample<0>(frame, x, y, (unsigned short)a.x); -+ write_sample<0>(frame, x + 1, y, (unsigned short)b.x); -+ write_sample<0>(frame, x, y + 1, (unsigned short)c.x); -+ write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x); -+ -+ write_sample<1>(frame, (x & ~1), y / 2, (unsigned short)chroma.y); -+ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)chroma.z); -+} -+ -+static __inline__ __device__ void write_p010_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) -+{ -+ write_sample<0>(frame, x, y, (unsigned short)(a.x << 6)); -+ write_sample<0>(frame, x + 1, y, (unsigned short)(b.x << 6)); -+ write_sample<0>(frame, x, y + 1, (unsigned short)(c.x << 6)); -+ write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6)); -+ -+ write_sample<1>(frame, (x & ~1), y / 2, (unsigned short)(chroma.y << 6)); -+ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)(chroma.z << 6)); -+} -+ -+static __inline__ __device__ void write_yuv420p16_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) -+{ -+ write_sample<0>(frame, x, y, (unsigned short)a.x); -+ write_sample<0>(frame, x + 1, y, (unsigned short)b.x); -+ write_sample<0>(frame, x, y + 1, (unsigned short)c.x); -+ write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x); -+ -+ write_sample<1>(frame, x / 2, y / 2, (unsigned short)chroma.y); -+ write_sample<2>(frame, x / 2, y / 2, (unsigned short)chroma.z); -+} -+ -+static __inline__ __device__ void write_yuv420p10_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) -+{ -+ write_sample<0>(frame, x, y, (unsigned short)(a.x << 6)); -+ write_sample<0>(frame, x + 1, y, (unsigned short)(b.x << 6)); -+ write_sample<0>(frame, x, y + 1, (unsigned short)(c.x << 6)); -+ write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6)); -+ -+ write_sample<1>(frame, x / 2, y / 2, (unsigned short)(chroma.y << 6)); -+ write_sample<2>(frame, x / 2, y / 2, (unsigned short)(chroma.z << 6)); -+} -+ -+// Generic write functions -+static __inline__ __device__ void write_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) -+{ -+ if (fmt_dst == AV_PIX_FMT_YUV420P) -+ write_yuv420p_2x2(frame, x, y, a, b, c, d, chroma); -+ else if (fmt_dst == AV_PIX_FMT_NV12) -+ write_nv12_2x2(frame, x, y, a, b, c, d, chroma); -+ else if (fmt_dst == AV_PIX_FMT_P016) -+ write_p016_2x2(frame, x, y, a, b, c, d, chroma); -+ else if (fmt_dst == AV_PIX_FMT_P010) -+ write_p010_2x2(frame, x, y, a, b, c, d, chroma); -+} -+ -+static __inline__ __device__ unsigned short sample_to_ushort(float flt) -+{ -+ return (unsigned short)(flt * ((1 << depth_dst) - 1)); -+} -+ -+static __inline__ __device__ ushort3 pixel_to_ushort3(float3 flt) -+{ -+ return make_ushort3(sample_to_ushort(flt.x), -+ sample_to_ushort(flt.y), -+ sample_to_ushort(flt.z)); -+} -+ -+static __inline__ __device__ void write_2x2_flt(const FFCUDAFrame& frame, int x, int y, float3 a, float3 b, float3 c, float3 d) -+{ -+ float3 chroma = get_chroma_sample(a, b, c, d); -+ -+ ushort3 ia = pixel_to_ushort3(a); -+ ushort3 ib = pixel_to_ushort3(b); -+ ushort3 ic = pixel_to_ushort3(c); -+ ushort3 id = pixel_to_ushort3(d); -+ -+ ushort3 ichroma = pixel_to_ushort3(chroma); -+ -+ write_2x2(frame, x, y, ia, ib, ic, id, ichroma); -+} -+ -+#endif /* AVFILTER_CUDA_PIXFMT_H */ -+ -Index: jellyfin-ffmpeg/libavfilter/cuda/shared.h -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/cuda/shared.h -@@ -0,0 +1,32 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#ifndef AVFILTER_CUDA_SHARED_H -+#define AVFILTER_CUDA_SHARED_H -+ -+typedef struct FFCUDAFrame { -+ unsigned char *data[4]; -+ int linesize[4]; -+ -+ int width, height; -+ -+ float peak; -+} FFCUDAFrame; -+ -+#endif /* AVFILTER_CUDA_SHARED_H */ -+ -Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu -@@ -0,0 +1,201 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "colorspace_common.h" -+#include "pixfmt.h" -+#include "tonemap.h" -+#include "util.h" -+ -+extern const enum TonemapAlgorithm tonemap_func; -+extern const float tone_param; -+extern const float desat_param; -+ -+#define mix(x, y, a) ((x) + ((y) - (x)) * (a)) -+ -+static __inline__ __device__ -+float hable_f(float in) { -+ float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; -+ return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; -+} -+ -+static __inline__ __device__ -+float direct(float s, float peak) { -+ return s; -+} -+ -+static __inline__ __device__ -+float linear(float s, float peak) { -+ return s * tone_param / peak; -+} -+ -+static __inline__ __device__ -+float gamma(float s, float peak) { -+ float p = s > 0.05f ? s / peak : 0.05f / peak; -+ float v = __powf(p, 1.0f / tone_param); -+ return s > 0.05f ? v : (s * v / 0.05f); -+} -+ -+static __inline__ __device__ -+float clip(float s, float peak) { -+ return clamp(s * tone_param, 0.0f, 1.0f); -+} -+ -+static __inline__ __device__ -+float reinhard(float s, float peak) { -+ return s / (s + tone_param) * (peak + tone_param) / peak; -+} -+ -+static __inline__ __device__ -+float hable(float s, float peak) { -+ return hable_f(s) / hable_f(peak); -+} -+ -+static __inline__ __device__ -+float mobius(float s, float peak) { -+ float j = tone_param; -+ float a, b; -+ -+ if (s <= j) -+ return s; -+ -+ a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); -+ b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS); -+ -+ return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); -+} -+ -+static __inline__ __device__ -+float bt2390(float s, float peak, float dst_peak) { -+ float peak_pq = inverse_eotf_st2084(peak); -+ float scale = 1.0f / peak_pq; -+ -+ float s_pq = inverse_eotf_st2084(s) * scale; -+ float maxLum = inverse_eotf_st2084(dst_peak) * scale; -+ -+ float ks = 1.5f * maxLum - 0.5f; -+ float tb = (s_pq - ks) / (1.0f - ks); -+ float tb2 = tb * tb; -+ float tb3 = tb2 * tb; -+ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + -+ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + -+ (-2.0f * tb3 + 3.0f * tb2) * maxLum; -+ float sig = (s_pq < ks) ? s_pq : pb; -+ -+ return eotf_st2084(sig * peak_pq); -+} -+ -+static __inline__ __device__ -+float map(float s, float peak, float dst_peak) -+{ -+ switch (tonemap_func) { -+ case TONEMAP_NONE: -+ default: -+ return direct(s, peak); -+ case TONEMAP_LINEAR: -+ return linear(s, peak); -+ case TONEMAP_GAMMA: -+ return gamma(s, peak); -+ case TONEMAP_CLIP: -+ return clip(s, peak); -+ case TONEMAP_REINHARD: -+ return reinhard(s, peak); -+ case TONEMAP_HABLE: -+ return hable(s, peak); -+ case TONEMAP_MOBIUS: -+ return mobius(s, peak); -+ case TONEMAP_BT2390: -+ return bt2390(s, peak, dst_peak); -+ } -+} -+ -+static __inline__ __device__ -+float3 map_one_pixel_rgb(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) { -+ float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS); -+ float peak = src.peak; -+ float dst_peak = dst.peak; -+ -+ // Rescale the variables in order to bring it into a representation where -+ // 1.0 represents the dst_peak. This is because all of the tone mapping -+ // algorithms are defined in such a way that they map to the range [0.0, 1.0]. -+ if (dst.peak > 1.0f) { -+ sig *= 1.0f / dst.peak; -+ peak *= 1.0f / dst.peak; -+ } -+ -+ float sig_old = sig; -+ -+ // Desaturate the color using a coefficient dependent on the signal level -+ if (desat_param > 0.0f) { -+ float luma = get_luma_dst(rgb, luma_dst); -+ float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS); -+ coeff = __powf(coeff, 10.0f / desat_param); -+ rgb = mix(rgb, make_float3(luma, luma, luma), make_float3(coeff, coeff, coeff)); -+ } -+ -+ sig = map(sig, peak, dst_peak); -+ -+ sig = min(sig, 1.0f); -+ rgb = rgb * (sig / sig_old); -+ return rgb; -+} -+ -+// Map from source space YUV to destination space RGB -+static __inline__ __device__ -+float3 map_to_dst_space_from_yuv(float3 yuv, float peak) { -+ float3 c = yuv2lrgb(yuv); -+ c = lrgb2lrgb(c); -+ return c; -+} -+ -+extern "C" { -+ -+__global__ void tonemap(FFCUDAFrame src, FFCUDAFrame dst) -+{ -+ int xi = blockIdx.x * blockDim.x + threadIdx.x; -+ int yi = blockIdx.y * blockDim.y + threadIdx.y; -+ // each work item process four pixels -+ int x = 2 * xi; -+ int y = 2 * yi; -+ -+ if (y + 1 < src.height && x + 1 < src.width) -+ { -+ float3 yuv0 = read_px_flt(src, x, y); -+ float3 yuv1 = read_px_flt(src, x + 1, y); -+ float3 yuv2 = read_px_flt(src, x, y + 1); -+ float3 yuv3 = read_px_flt(src, x + 1, y + 1); -+ -+ float3 c0 = map_to_dst_space_from_yuv(yuv0, src.peak); -+ float3 c1 = map_to_dst_space_from_yuv(yuv1, src.peak); -+ float3 c2 = map_to_dst_space_from_yuv(yuv2, src.peak); -+ float3 c3 = map_to_dst_space_from_yuv(yuv3, src.peak); -+ -+ c0 = map_one_pixel_rgb(c0, src, dst); -+ c1 = map_one_pixel_rgb(c1, src, dst); -+ c2 = map_one_pixel_rgb(c2, src, dst); -+ c3 = map_one_pixel_rgb(c3, src, dst); -+ -+ yuv0 = lrgb2yuv(c0); -+ yuv1 = lrgb2yuv(c1); -+ yuv2 = lrgb2yuv(c2); -+ yuv3 = lrgb2yuv(c3); -+ -+ write_2x2_flt(dst, x, y, yuv0, yuv1, yuv2, yuv3); -+ } -+} -+ -+} -Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.h -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.h -@@ -0,0 +1,35 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#ifndef AVFILTER_CUDA_TONEMAP_H -+#define AVFILTER_CUDA_TONEMAP_H -+ -+enum TonemapAlgorithm { -+ TONEMAP_NONE, -+ TONEMAP_LINEAR, -+ TONEMAP_GAMMA, -+ TONEMAP_CLIP, -+ TONEMAP_REINHARD, -+ TONEMAP_HABLE, -+ TONEMAP_MOBIUS, -+ TONEMAP_BT2390, -+ TONEMAP_MAX, -+}; -+ -+#endif /* AVFILTER_CUDA_TONEMAP_H */ -+ -Index: jellyfin-ffmpeg/libavfilter/cuda/util.h -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/cuda/util.h -@@ -0,0 +1,55 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#ifndef AVFILTER_CUDA_UTIL_H -+#define AVFILTER_CUDA_UTIL_H -+ -+static inline __device__ float3 operator+(const float3 &a, const float3 &b) { -+ return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -+} -+ -+static inline __device__ float3 operator+(const float3 &a, float b) { -+ return make_float3(a.x + b, a.y + b, a.z + b); -+} -+ -+static inline __device__ float3 operator-(const float3 &a, const float3 &b) { -+ return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -+} -+ -+static inline __device__ float3 operator-(const float3 &a, float b) { -+ return make_float3(a.x - b, a.y - b, a.z - b); -+} -+ -+static inline __device__ float3 operator*(const float3 &a, const float3 &b) { -+ return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -+} -+ -+static inline __device__ float3 operator*(const float3 &a, float b) { -+ return make_float3(a.x * b, a.y * b, a.z * b); -+} -+ -+static inline __device__ float3 operator/(const float3 &a, const float3 &b) { -+ return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -+} -+ -+static inline __device__ float3 operator/(const float3 &a, float b) { -+ return make_float3(a.x / b, a.y / b, a.z / b); -+} -+ -+#endif /* AVFILTER_CUDA_UTIL_H */ -+ -Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c -=================================================================== ---- /dev/null -+++ jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c -@@ -0,0 +1,712 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include -+#include -+#include -+ -+#include "libavutil/avassert.h" -+#include "libavutil/avstring.h" -+#include "libavutil/bprint.h" -+#include "libavutil/common.h" -+#include "libavutil/hwcontext.h" -+#include "libavutil/hwcontext_cuda_internal.h" -+#include "libavutil/cuda_check.h" -+#include "libavutil/internal.h" -+#include "libavutil/opt.h" -+#include "libavutil/pixdesc.h" -+ -+#include "avfilter.h" -+#include "colorspace.h" -+#include "cuda/host_util.h" -+#include "cuda/shared.h" -+#include "cuda/tonemap.h" -+#include "formats.h" -+#include "internal.h" -+#include "scale_eval.h" -+#include "video.h" -+ -+static const enum AVPixelFormat supported_formats[] = { -+ AV_PIX_FMT_YUV420P, -+ AV_PIX_FMT_NV12, -+ AV_PIX_FMT_P010, -+ AV_PIX_FMT_P016 -+}; -+ -+#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) -+#define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1)) -+#define NUM_BUFFERS 2 -+#define BLOCKX 32 -+#define BLOCKY 16 -+ -+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) -+ -+typedef struct TonemapCUDAContext { -+ const AVClass *class; -+ -+ AVCUDADeviceContext *hwctx; -+ -+ enum AVPixelFormat in_fmt, out_fmt; -+ -+ enum AVColorTransferCharacteristic trc, in_trc, out_trc; -+ enum AVColorSpace spc, in_spc, out_spc; -+ enum AVColorPrimaries pri, in_pri, out_pri; -+ enum AVColorRange range, in_range, out_range; -+ enum AVChromaLocation in_chroma_loc, out_chroma_loc; -+ -+ AVBufferRef *frames_ctx; -+ AVFrame *frame; -+ -+ AVFrame *tmp_frame; -+ -+ /** -+ * Output sw format. AV_PIX_FMT_NONE for no conversion. -+ */ -+ enum AVPixelFormat format; -+ char *format_str; -+ -+ CUcontext cu_ctx; -+ CUmodule cu_module; -+ -+ CUfunction cu_func; -+ -+ CUdeviceptr srcBuffer; -+ CUdeviceptr dstBuffer; -+ -+ enum TonemapAlgorithm tonemap; -+ double param; -+ double desat_param; -+ double peak; -+ double dst_peak; -+ double scene_threshold; -+ -+ const AVPixFmtDescriptor *in_desc, *out_desc; -+} TonemapCUDAContext; -+ -+static av_cold int init(AVFilterContext *ctx) -+{ -+ TonemapCUDAContext *s = ctx->priv; -+ -+ if (!strcmp(s->format_str, "same")) { -+ s->format = AV_PIX_FMT_NONE; -+ } else { -+ s->format = av_get_pix_fmt(s->format_str); -+ if (s->format == AV_PIX_FMT_NONE) { -+ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); -+ return AVERROR(EINVAL); -+ } -+ } -+ -+ s->frame = av_frame_alloc(); -+ if (!s->frame) -+ return AVERROR(ENOMEM); -+ -+ s->tmp_frame = av_frame_alloc(); -+ if (!s->tmp_frame) -+ return AVERROR(ENOMEM); -+ -+ return 0; -+} -+ -+static av_cold void uninit(AVFilterContext *ctx) -+{ -+ TonemapCUDAContext *s = ctx->priv; -+ -+ if (s->hwctx) { -+ CudaFunctions *cu = s->hwctx->internal->cuda_dl; -+ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; -+ -+ CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); -+ -+ if (s->cu_module) { -+ CHECK_CU(cu->cuModuleUnload(s->cu_module)); -+ s->cu_func = NULL; -+ s->cu_module = NULL; -+ } -+ -+ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); -+ } -+ -+ av_frame_free(&s->frame); -+ av_buffer_unref(&s->frames_ctx); -+ av_frame_free(&s->tmp_frame); -+} -+ -+static int query_formats(AVFilterContext *ctx) -+{ -+ static const enum AVPixelFormat pixel_formats[] = { -+ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, -+ }; -+ AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); -+ -+ return ff_set_common_formats(ctx, pix_fmts); -+} -+ -+static av_cold int init_stage(TonemapCUDAContext *s, AVBufferRef *device_ctx, -+ AVFilterLink *outlink) -+{ -+ AVBufferRef *out_ref = NULL; -+ AVHWFramesContext *out_ctx; -+ int ret; -+ -+ out_ref = av_hwframe_ctx_alloc(device_ctx); -+ if (!out_ref) -+ return AVERROR(ENOMEM); -+ out_ctx = (AVHWFramesContext*)out_ref->data; -+ -+ out_ctx->format = AV_PIX_FMT_CUDA; -+ out_ctx->sw_format = s->out_fmt; -+ out_ctx->width = FFALIGN(outlink->w, 32); -+ out_ctx->height = FFALIGN(outlink->h, 32); -+ -+ ret = av_hwframe_ctx_init(out_ref); -+ if (ret < 0) -+ goto fail; -+ -+ av_frame_unref(s->frame); -+ ret = av_hwframe_get_buffer(out_ref, s->frame, 0); -+ if (ret < 0) -+ goto fail; -+ -+ s->frame->width = outlink->w; -+ s->frame->height = outlink->h; -+ -+ av_buffer_unref(&s->frames_ctx); -+ s->frames_ctx = out_ref; -+ -+ return 0; -+fail: -+ av_buffer_unref(&out_ref); -+ return ret; -+} -+ -+static int format_is_supported(enum AVPixelFormat fmt) -+{ -+ int i; -+ -+ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) -+ if (supported_formats[i] == fmt) -+ return 1; -+ return 0; -+} -+ -+static av_cold int init_processing_chain(AVFilterContext *ctx, AVFilterLink *outlink) -+{ -+ TonemapCUDAContext *s = ctx->priv; -+ -+ AVHWFramesContext *in_frames_ctx; -+ -+ enum AVPixelFormat in_format; -+ enum AVPixelFormat out_format; -+ const AVPixFmtDescriptor *in_desc; -+ const AVPixFmtDescriptor *out_desc; -+ int ret; -+ -+ /* check that we have a hw context */ -+ if (!ctx->inputs[0]->hw_frames_ctx) { -+ av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); -+ return AVERROR(EINVAL); -+ } -+ in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; -+ in_format = in_frames_ctx->sw_format; -+ out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format; -+ in_desc = av_pix_fmt_desc_get(in_format); -+ out_desc = av_pix_fmt_desc_get(out_format); -+ -+ if (!format_is_supported(in_format)) { -+ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", -+ av_get_pix_fmt_name(in_format)); -+ return AVERROR(ENOSYS); -+ } -+ if (!format_is_supported(out_format)) { -+ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", -+ av_get_pix_fmt_name(out_format)); -+ return AVERROR(ENOSYS); -+ } -+ if (!(in_desc->comp[0].depth == 10 || -+ in_desc->comp[0].depth == 16)) { -+ av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", -+ in_desc->comp[0].depth); -+ return AVERROR(ENOSYS); -+ } -+ -+ s->in_fmt = in_format; -+ s->out_fmt = out_format; -+ s->in_desc = in_desc; -+ s->out_desc = out_desc; -+ -+ ret = init_stage(s, in_frames_ctx->device_ref, outlink); -+ if (ret < 0) -+ return ret; -+ -+ ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); -+ if (!ctx->outputs[0]->hw_frames_ctx) -+ return AVERROR(ENOMEM); -+ -+ return 0; -+} -+ -+static const struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = { -+ [AVCOL_PRI_BT709] = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 }, -+ [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 }, -+}; -+ -+static const struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = { -+ [AVCOL_PRI_BT709] = { 0.3127, 0.3290 }, -+ [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 }, -+}; -+ -+static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, -+ double rgb2rgb[3][3]) { -+ double rgb2xyz[3][3], xyz2rgb[3][3]; -+ -+ ff_fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz); -+ ff_matrix_invert_3x3(rgb2xyz, xyz2rgb); -+ ff_fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz); -+ ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb); -+ -+ return 0; -+} -+ -+static av_cold int compile(AVFilterLink *inlink) -+{ -+ int ret = 0; -+ AVFilterContext *ctx = inlink->dst; -+ TonemapCUDAContext *s = ctx->priv; -+ CudaFunctions *cu = s->hwctx->internal->cuda_dl; -+ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; -+ AVBPrint constants; -+ CUlinkState link_state; -+ void *cubin; -+ size_t cubin_size; -+ double rgb_matrix[3][3], yuv_matrix[3][3], rgb2rgb_matrix[3][3]; -+ const struct LumaCoefficients *in_coeffs, *out_coeffs; -+ enum AVColorTransferCharacteristic in_trc = s->in_trc, out_trc = s->out_trc; -+ enum AVColorSpace in_spc = s->in_spc, out_spc = s->out_spc; -+ enum AVColorPrimaries in_pri = s->in_pri, out_pri = s->out_pri; -+ enum AVColorRange in_range = s->in_range, out_range = s->out_range; -+ char info_log[4096], error_log[4096]; -+ CUjit_option options[] = {CU_JIT_INFO_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; -+ void *option_values[] = {&info_log, &error_log, (void*)(intptr_t)sizeof(info_log), (void*)(intptr_t)sizeof(error_log)}; -+ -+ extern char tonemap_ptx[]; -+ -+ switch(s->tonemap) { -+ case TONEMAP_GAMMA: -+ if (isnan(s->param)) -+ s->param = 1.8f; -+ break; -+ case TONEMAP_REINHARD: -+ if (!isnan(s->param)) -+ s->param = (1.0f - s->param) / s->param; -+ break; -+ case TONEMAP_MOBIUS: -+ if (isnan(s->param)) -+ s->param = 0.3f; -+ break; -+ } -+ -+ if (isnan(s->param)) -+ s->param = 1.0f; -+ -+ s->dst_peak = 1.0f; -+ -+ if (in_trc == AVCOL_TRC_UNSPECIFIED) -+ in_trc = AVCOL_TRC_SMPTE2084; -+ if (out_trc == AVCOL_TRC_UNSPECIFIED) -+ out_trc = AVCOL_TRC_BT709; -+ -+ if (in_spc == AVCOL_SPC_UNSPECIFIED) -+ in_spc = AVCOL_SPC_BT2020_NCL; -+ if (out_spc == AVCOL_SPC_UNSPECIFIED) -+ out_spc = AVCOL_SPC_BT709; -+ -+ if (in_pri == AVCOL_PRI_UNSPECIFIED) -+ in_pri = AVCOL_PRI_BT2020; -+ if (out_pri == AVCOL_PRI_UNSPECIFIED) -+ out_pri = AVCOL_PRI_BT709; -+ -+ if (in_range == AVCOL_RANGE_UNSPECIFIED) -+ in_range = AVCOL_RANGE_MPEG; -+ if (out_range == AVCOL_RANGE_UNSPECIFIED) -+ out_range = AVCOL_RANGE_MPEG; -+ -+ av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n", -+ av_color_transfer_name(in_trc), -+ av_color_transfer_name(out_trc)); -+ av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n", -+ av_color_space_name(in_spc), -+ av_color_space_name(out_spc)); -+ av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n", -+ av_color_primaries_name(in_pri), -+ av_color_primaries_name(out_pri)); -+ av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n", -+ av_color_range_name(in_range), -+ av_color_range_name(out_range)); -+ -+ if (!(in_coeffs = ff_get_luma_coefficients(in_spc))) -+ return AVERROR(EINVAL); -+ -+ ff_fill_rgb2yuv_table(in_coeffs, yuv_matrix); -+ ff_matrix_invert_3x3(yuv_matrix, rgb_matrix); -+ -+ if (!(out_coeffs = ff_get_luma_coefficients(out_spc))) -+ return AVERROR(EINVAL); -+ -+ ff_fill_rgb2yuv_table(out_coeffs, yuv_matrix); -+ -+ if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0) -+ return ret; -+ -+ av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED); -+ -+ av_bprintf(&constants, ".version 3.2\n"); -+ av_bprintf(&constants, ".target sm_30\n"); -+ av_bprintf(&constants, ".address_size %zu\n", sizeof(void*) * 8); -+ -+#define CONSTANT_A(decl, align, ...) \ -+ av_bprintf(&constants, ".visible .const .align " #align " " decl ";\n", __VA_ARGS__) -+#define CONSTANT(decl, ...) CONSTANT_A(decl, 4, __VA_ARGS__) -+#define CONSTANT_M(a, b) \ -+ CONSTANT(".f32 " a "[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f}", \ -+ b[0][0], b[0][1], b[0][2], \ -+ b[1][0], b[1][1], b[1][2], \ -+ b[2][0], b[2][1], b[2][2]) -+#define CONSTANT_C(a, b) \ -+ CONSTANT(".f32 " a "[] = {%f, %f, %f}", \ -+ b->cr, b->cg, b->cb) -+ -+ CONSTANT(".u32 depth_src = %i", (int)s->in_desc->comp[0].depth); -+ CONSTANT(".u32 depth_dst = %i", (int)s->out_desc->comp[0].depth); -+ CONSTANT(".u32 fmt_src = %i", (int)s->in_fmt); -+ CONSTANT(".u32 fmt_dst = %i", (int)s->out_fmt); -+ CONSTANT(".u32 range_src = %i", (int)in_range); -+ CONSTANT(".u32 range_dst = %i", (int)out_range); -+ CONSTANT(".u32 trc_src = %i", (int)in_trc); -+ CONSTANT(".u32 trc_dst = %i", (int)out_trc); -+ CONSTANT(".u32 chroma_loc_src = %i", (int)s->in_chroma_loc); -+ CONSTANT(".u32 chroma_loc_dst = %i", (int)s->out_chroma_loc); -+ CONSTANT(".u32 tonemap_func = %i", (int)s->tonemap); -+ CONSTANT(".f32 tone_param = %f", s->param); -+ CONSTANT(".f32 desat_param = %f", s->desat_param); -+ CONSTANT_M("rgb_matrix", rgb_matrix); -+ CONSTANT_M("yuv_matrix", yuv_matrix); -+ CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri); -+ CONSTANT_M("rgb2rgb_matrix", rgb2rgb_matrix); -+ CONSTANT_C("luma_src", in_coeffs); -+ CONSTANT_C("luma_dst", out_coeffs); -+ -+ ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); -+ if (ret < 0) -+ return ret; -+ -+ if (s->cu_module) { -+ ret = CHECK_CU(cu->cuModuleUnload(s->cu_module)); -+ if (ret < 0) -+ goto fail; -+ -+ s->cu_func = NULL; -+ s->cu_module = NULL; -+ } -+ -+ ret = CHECK_CU(cu->cuLinkCreate(sizeof(options) / sizeof(options[0]), options, option_values, &link_state)); -+ if (ret < 0) -+ goto fail; -+ -+ ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, constants.str, -+ constants.len, "constants", 0, NULL, NULL)); -+ if (ret < 0) -+ goto fail2; -+ -+ ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, tonemap_ptx, -+ strlen(tonemap_ptx), "tonemap.ptx", 0, NULL, NULL)); -+ if (ret < 0) -+ goto fail2; -+ -+ ret = CHECK_CU(cu->cuLinkComplete(link_state, &cubin, &cubin_size)); -+ if (ret < 0) -+ goto fail2; -+ -+ ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, cubin)); -+ if (ret < 0) -+ goto fail2; -+ -+ CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, "tonemap")); -+ if (ret < 0) -+ goto fail2; -+ -+fail2: -+ CHECK_CU(cu->cuLinkDestroy(link_state)); -+ -+fail: -+ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); -+ -+ av_bprint_finalize(&constants, NULL); -+ -+ if ((intptr_t)option_values[2] > 0) -+ av_log(ctx, AV_LOG_INFO, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[2], info_log); -+ -+ if ((intptr_t)option_values[3] > 0) -+ av_log(ctx, AV_LOG_ERROR, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[3], error_log); -+ -+ return ret; -+} -+ -+static av_cold int config_props(AVFilterLink *outlink) -+{ -+ AVFilterContext *ctx = outlink->src; -+ AVFilterLink *inlink = outlink->src->inputs[0]; -+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; -+ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; -+ TonemapCUDAContext *s = ctx->priv; -+ int ret; -+ -+ s->hwctx = device_hwctx; -+ -+ outlink->w = inlink->w; -+ outlink->h = inlink->h; -+ -+ ret = init_processing_chain(ctx, outlink); -+ if (ret < 0) -+ return ret; -+ -+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; -+ -+ return 0; -+} -+ -+static int run_kernel(AVFilterContext *ctx, -+ AVFrame *out, AVFrame *in) -+{ -+ TonemapCUDAContext *s = ctx->priv; -+ CudaFunctions *cu = s->hwctx->internal->cuda_dl; -+ FFCUDAFrame src, dst; -+ void *args_uchar[] = { &src, &dst }; -+ int ret; -+ -+ ret = ff_make_cuda_frame(&src, in); -+ if (ret < 0) -+ goto fail; -+ -+ ret = ff_make_cuda_frame(&dst, out); -+ if (ret < 0) -+ goto fail; -+ -+ src.peak = s->peak; -+ if (!src.peak) { -+ src.peak = ff_determine_signal_peak(in); -+ av_log(s, AV_LOG_DEBUG, "Computed signal peak: %f\n", src.peak); -+ } -+ -+ dst.peak = s->dst_peak; -+ -+ ret = CHECK_CU(cu->cuLaunchKernel(s->cu_func, -+ DIV_UP(src.width / 2, BLOCKX), DIV_UP(src.height / 2, BLOCKY), 1, -+ BLOCKX, BLOCKY, 1, 0, s->hwctx->stream, args_uchar, NULL)); -+ -+fail: -+ return ret; -+} -+ -+static int do_tonemap(AVFilterContext *ctx, AVFrame *out, AVFrame *in) -+{ -+ TonemapCUDAContext *s = ctx->priv; -+ AVFrame *src = in; -+ int ret; -+ -+ ret = run_kernel(ctx, s->frame, src); -+ if (ret < 0) -+ return ret; -+ -+ src = s->frame; -+ ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); -+ if (ret < 0) -+ return ret; -+ -+ av_frame_move_ref(out, s->frame); -+ av_frame_move_ref(s->frame, s->tmp_frame); -+ -+ s->frame->width = in->width; -+ s->frame->height = in->height; -+ -+ ret = av_frame_copy_props(out, in); -+ if (ret < 0) -+ return ret; -+ -+ if (s->out_trc != out->color_trc || -+ s->out_spc != out->colorspace || -+ s->out_pri != out->color_primaries || -+ s->out_range != out->color_range || -+ s->out_chroma_loc != out->chroma_location) { -+ out->color_trc = s->out_trc; -+ out->colorspace = s->out_spc; -+ out->color_primaries = s->out_pri; -+ out->color_range = s->out_range; -+ out->chroma_location = s->out_chroma_loc; -+ } -+ -+ return 0; -+} -+ -+static int filter_frame(AVFilterLink *link, AVFrame *in) -+{ -+ AVFilterContext *ctx = link->dst; -+ TonemapCUDAContext *s = ctx->priv; -+ AVFilterLink *outlink = ctx->outputs[0]; -+ CudaFunctions *cu = s->hwctx->internal->cuda_dl; -+ -+ AVFrame *out = NULL; -+ CUcontext dummy; -+ int ret = 0; -+ -+ out = av_frame_alloc(); -+ if (!out) { -+ ret = AVERROR(ENOMEM); -+ goto fail; -+ } -+ -+ if (!(in->color_trc == AVCOL_TRC_SMPTE2084 || -+ in->color_trc == AVCOL_TRC_ARIB_STD_B67)) { -+ av_log(ctx, AV_LOG_ERROR, "Unsupported input transfer characteristic: %s\n", -+ av_color_transfer_name(in->color_trc)); -+ ret = AVERROR(EINVAL); -+ goto fail; -+ } -+ -+ if (!s->cu_func || -+ s->in_trc != in->color_trc || -+ s->in_spc != in->colorspace || -+ s->in_pri != in->color_primaries || -+ s->in_range != in->color_range || -+ s->in_chroma_loc != in->chroma_location) { -+ s->in_trc = in->color_trc; -+ s->in_spc = in->colorspace; -+ s->in_pri = in->color_primaries; -+ s->in_range = in->color_range; -+ s->in_chroma_loc = in->chroma_location; -+ -+ s->out_trc = s->trc; -+ s->out_spc = s->spc; -+ s->out_pri = s->pri; -+ s->out_range = s->range; -+ s->out_chroma_loc = s->in_chroma_loc; -+ -+ if ((ret = compile(link)) < 0) -+ goto fail; -+ } -+ -+ ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); -+ if (ret < 0) -+ goto fail; -+ -+ ret = do_tonemap(ctx, out, in); -+ -+ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); -+ if (ret < 0) -+ goto fail; -+ -+ av_frame_free(&in); -+ -+ ff_update_hdr_metadata(out, s->dst_peak); -+ -+ return ff_filter_frame(outlink, out); -+fail: -+ av_frame_free(&in); -+ av_frame_free(&out); -+ return ret; -+} -+ -+#define OFFSET(x) offsetof(TonemapCUDAContext, x) -+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) -+static const AVOption options[] = { -+ { "tonemap", "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" }, -+ { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, "tonemap" }, -+ { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, "tonemap" }, -+ { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, "tonemap" }, -+ { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, "tonemap" }, -+ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, "tonemap" }, -+ { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, "tonemap" }, -+ { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, "tonemap" }, -+ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390}, 0, 0, FLAGS, "tonemap" }, -+ { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, -+ { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, -+ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, "transfer" }, -+ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, "transfer" }, -+ { "matrix", "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" }, -+ { "m", "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" }, -+ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, "matrix" }, -+ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, "matrix" }, -+ { "primaries", "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" }, -+ { "p", "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" }, -+ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, "primaries" }, -+ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, "primaries" }, -+ { "range", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" }, -+ { "r", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" }, -+ { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, -+ { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, -+ { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, -+ { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, -+ { "format", "Output format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, -+ { "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, -+ { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, -+ { "desat", "desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, -+ { "threshold", "scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, -+ { NULL }, -+}; -+ -+static const AVClass tonemap_cuda_class = { -+ .class_name = "tonemap_cuda", -+ .item_name = av_default_item_name, -+ .option = options, -+ .version = LIBAVUTIL_VERSION_INT, -+}; -+ -+static const AVFilterPad inputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO, -+ .filter_frame = filter_frame, -+ }, -+ { NULL } -+}; -+ -+static const AVFilterPad outputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO, -+ .config_props = config_props, -+ }, -+ { NULL } -+}; -+ -+AVFilter ff_vf_tonemap_cuda = { -+ .name = "tonemap_cuda", -+ .description = NULL_IF_CONFIG_SMALL("GPU accelerated HDR to SDR tonemapping"), -+ -+ .init = init, -+ .uninit = uninit, -+ .query_formats = query_formats, -+ -+ .priv_size = sizeof(TonemapCUDAContext), -+ .priv_class = &tonemap_cuda_class, -+ -+ .inputs = inputs, -+ .outputs = outputs, -+ -+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -+}; diff --git a/debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch b/debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch deleted file mode 100644 index d88e553a90d..00000000000 --- a/debian/patches/0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch +++ /dev/null @@ -1,755 +0,0 @@ -Index: jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/opencl/colorspace_common.cl -+++ jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl -@@ -16,8 +16,23 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -+#define BT709_ALPHA 1.09929682680944f -+#define BT709_BETA 0.018053968510807f -+ - #define ST2084_MAX_LUMINANCE 10000.0f --#define REFERENCE_WHITE 100.0f -+#define REFERENCE_WHITE 203.0f -+ -+#define ST2084_M1 0.1593017578125f -+#define ST2084_M2 78.84375f -+#define ST2084_C1 0.8359375f -+#define ST2084_C2 18.8515625f -+#define ST2084_C3 18.6875f -+ -+#define ARIB_B67_A 0.17883277f -+#define ARIB_B67_B 0.28466892f -+#define ARIB_B67_C 0.55991073f -+ -+#define FLOAT_EPS 1.175494351e-38f - - #if chroma_loc == 1 - #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f) -@@ -33,12 +48,6 @@ - #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f) - #endif - --constant const float ST2084_M1 = 0.1593017578125f; --constant const float ST2084_M2 = 78.84375f; --constant const float ST2084_C1 = 0.8359375f; --constant const float ST2084_C2 = 18.8515625f; --constant const float ST2084_C3 = 18.6875f; -- - float get_luma_dst(float3 c) { - return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; - } -@@ -51,61 +60,87 @@ float3 get_chroma_sample(float3 a, float - return chroma_sample(a, b, c, d); - } - -+// linearizer for PQ/ST2084 - float eotf_st2084(float x) { -- float p = powr(x, 1.0f / ST2084_M2); -- float a = max(p -ST2084_C1, 0.0f); -- float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f); -- float c = powr(a / b, 1.0f / ST2084_M1); -- return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f; --} -- --__constant const float HLG_A = 0.17883277f; --__constant const float HLG_B = 0.28466892f; --__constant const float HLG_C = 0.55991073f; -- --// linearizer for HLG --float inverse_oetf_hlg(float x) { -- float a = 4.0f * x * x; -- float b = exp((x - HLG_C) / HLG_A) + HLG_B; -- return x < 0.5f ? a : b; --} -- --// delinearizer for HLG --float oetf_hlg(float x) { -- float a = 0.5f * sqrt(x); -- float b = HLG_A * log(x - HLG_B) + HLG_C; -- return x <= 1.0f ? a : b; --} -- --float3 ootf_hlg(float3 c, float peak) { -- float luma = get_luma_src(c); -- float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); -- gamma = max(1.0f, gamma); -- float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma); -- return c * factor; --} -- --float3 inverse_ootf_hlg(float3 c, float peak) { -- float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); -- c *= powr(12.0f, gamma) / peak; -- c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma); -- return c; -+ if (x > 0.0f) { -+ float xpow = powr(x, 1.0f / ST2084_M2); -+ float num = max(xpow - ST2084_C1, 0.0f); -+ float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); -+ x = powr(num / den, 1.0f / ST2084_M1); -+ return x * ST2084_MAX_LUMINANCE / REFERENCE_WHITE; -+ } else { -+ return 0.0f; -+ } -+} -+ -+// delinearizer for PQ/ST2084 -+float inverse_eotf_st2084(float x) { -+ if (x > 0.0f) { -+ x *= REFERENCE_WHITE / ST2084_MAX_LUMINANCE; -+ float xpow = powr(x, ST2084_M1); -+#if 0 -+ // Original formulation from SMPTE ST 2084:2014 publication. -+ float num = ST2084_C1 + ST2084_C2 * xpow; -+ float den = 1.0f + ST2084_C3 * xpow; -+ return powr(num / den, ST2084_M2); -+#else -+ // More stable arrangement that avoids some cancellation error. -+ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; -+ float den = 1.0f + ST2084_C3 * xpow; -+ return powr(1.0f + num / den, ST2084_M2); -+#endif -+ } else { -+ return 0.0f; -+ } -+} -+ -+float ootf_1_2(float x) { -+ return x < 0.0f ? x : powr(x, 1.2f); -+} -+ -+float inverse_ootf_1_2(float x) { -+ return x < 0.0f ? x : powr(x, 1.0f / 1.2f); -+} -+ -+float oetf_arib_b67(float x) { -+ x = max(x, 0.0f); -+ return x <= (1.0f / 12.0f) -+ ? sqrt(3.0f * x) -+ : (ARIB_B67_A * log(12.0f * x - ARIB_B67_B) + ARIB_B67_C); -+} -+ -+float inverse_oetf_arib_b67(float x) { -+ x = max(x, 0.0f); -+ return x <= 0.5f -+ ? (x * x) * (1.0f / 3.0f) -+ : (exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); - } - --float inverse_eotf_bt1886(float c) { -- return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); -+// linearizer for HLG/ARIB-B67 -+float eotf_arib_b67(float x) { -+ return ootf_1_2(inverse_oetf_arib_b67(x)); - } - --float oetf_bt709(float c) { -- c = c < 0.0f ? 0.0f : c; -- float r1 = 4.5f * c; -- float r2 = 1.099f * powr(c, 0.45f) - 0.099f; -- return c < 0.018f ? r1 : r2; --} --float inverse_oetf_bt709(float c) { -- float r1 = c / 4.5f; -- float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); -- return c < 0.081f ? r1 : r2; -+// delinearizer for HLG/ARIB-B67 -+float inverse_eotf_arib_b67(float x) { -+ return oetf_arib_b67(inverse_ootf_1_2(x)); -+} -+ -+float inverse_eotf_bt1886(float x) { -+ return x < 0.0f ? 0.0f : powr(x, 1.0f / 2.4f); -+} -+ -+float oetf_bt709(float x) { -+ x = max(0.0f, x); -+ return x < BT709_BETA -+ ? (x * 4.5f) -+ : (BT709_ALPHA * powr(x, 0.45f) - (BT709_ALPHA - 1.0f)); -+} -+ -+float inverse_oetf_bt709(float x) { -+ return x < (4.5f * BT709_BETA) -+ ? (x / 4.5f) -+ : (powr((x + (BT709_ALPHA - 1.0f)) / BT709_ALPHA, 1.0f / 0.45f)); - } - - float3 yuv2rgb(float y, float u, float v) { -@@ -187,19 +222,3 @@ float3 lrgb2lrgb(float3 c) { - return (float3)(rr, gg, bb); - #endif - } -- --float3 ootf(float3 c, float peak) { --#ifdef ootf_impl -- return ootf_impl(c, peak); --#else -- return c; --#endif --} -- --float3 inverse_ootf(float3 c, float peak) { --#ifdef inverse_ootf_impl -- return inverse_ootf_impl(c, peak); --#else -- return c; --#endif --} -Index: jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/opencl/tonemap.cl -+++ jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl -@@ -16,54 +16,50 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - --#define REFERENCE_WHITE 100.0f -+#define FLOAT_EPS 1.175494351e-38f -+ - extern float3 lrgb2yuv(float3); - extern float lrgb2y(float3); - extern float3 yuv2lrgb(float3); - extern float3 lrgb2lrgb(float3); - extern float get_luma_src(float3); - extern float get_luma_dst(float3); --extern float3 ootf(float3 c, float peak); --extern float3 inverse_ootf(float3 c, float peak); -+extern float eotf_st2084(float); -+extern float inverse_eotf_st2084(float); - extern float3 get_chroma_sample(float3, float3, float3, float3); - --struct detection_result { -- float peak; -- float average; --}; -- - float hable_f(float in) { - float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; - return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; - } - --float direct(float s, float peak) { -+float direct(float s, float peak, float target_peak) { - return s; - } - --float linear(float s, float peak) { -+float linear(float s, float peak, float target_peak) { - return s * tone_param / peak; - } - --float gamma(float s, float peak) { -- float p = s > 0.05f ? s /peak : 0.05f / peak; -+float gamma(float s, float peak, float target_peak) { -+ float p = s > 0.05f ? s / peak : 0.05f / peak; - float v = powr(p, 1.0f / tone_param); -- return s > 0.05f ? v : (s * v /0.05f); -+ return s > 0.05f ? v : (s * v / 0.05f); - } - --float clip(float s, float peak) { -+float clip(float s, float peak, float target_peak) { - return clamp(s * tone_param, 0.0f, 1.0f); - } - --float reinhard(float s, float peak) { -+float reinhard(float s, float peak, float target_peak) { - return s / (s + tone_param) * (peak + tone_param) / peak; - } - --float hable(float s, float peak) { -- return hable_f(s)/hable_f(peak); -+float hable(float s, float peak, float target_peak) { -+ return hable_f(s) / hable_f(peak); - } - --float mobius(float s, float peak) { -+float mobius(float s, float peak, float target_peak) { - float j = tone_param; - float a, b; - -@@ -71,102 +67,32 @@ float mobius(float s, float peak) { - return s; - - a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); -- b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f); -+ b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS); - - return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); - } - --// detect peak/average signal of a frame, the algorithm was ported from: --// libplacebo (https://github.com/haasn/libplacebo) --struct detection_result --detect_peak_avg(global uint *util_buf, __local uint *sum_wg, -- float signal, float peak) { --// layout of the util buffer --// --// Name: : Size (units of 4-bytes) --// average buffer : detection_frames + 1 --// peak buffer : detection_frames + 1 --// workgroup counter : 1 --// total of peak : 1 --// total of average : 1 --// frame index : 1 --// frame number : 1 -- global uint *avg_buf = util_buf; -- global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1; -- global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1; -- global uint *max_total_p = counter_wg_p + 1; -- global uint *avg_total_p = max_total_p + 1; -- global uint *frame_idx_p = avg_total_p + 1; -- global uint *scene_frame_num_p = frame_idx_p + 1; -- -- uint frame_idx = *frame_idx_p; -- uint scene_frame_num = *scene_frame_num_p; -- -- size_t lidx = get_local_id(0); -- size_t lidy = get_local_id(1); -- size_t lsizex = get_local_size(0); -- size_t lsizey = get_local_size(1); -- uint num_wg = get_num_groups(0) * get_num_groups(1); -- size_t group_idx = get_group_id(0); -- size_t group_idy = get_group_id(1); -- struct detection_result r = {peak, sdr_avg}; -- if (lidx == 0 && lidy == 0) -- *sum_wg = 0; -- barrier(CLK_LOCAL_MEM_FENCE); -- -- // update workgroup sum -- atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE)); -- barrier(CLK_LOCAL_MEM_FENCE); -- -- // update frame peak/avg using work-group-average. -- if (lidx == 0 && lidy == 0) { -- uint avg_wg = *sum_wg / (lsizex * lsizey); -- atomic_max(&peak_buf[frame_idx], avg_wg); -- atomic_add(&avg_buf[frame_idx], avg_wg); -- } -- -- if (scene_frame_num > 0) { -- float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num); -- float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num); -- r.peak = max(1.0f, peak); -- r.average = max(0.25f, avg); -- } -+float bt2390(float s, float peak, float target_peak) { -+ float peak_pq = inverse_eotf_st2084(peak); -+ float scale = 1.0f / peak_pq; -+ -+ float s_pq = inverse_eotf_st2084(s) * scale; -+ float maxLum = inverse_eotf_st2084(target_peak) * scale; -+ -+ float ks = 1.5f * maxLum - 0.5f; -+ float tb = (s_pq - ks) / (1.0f - ks); -+ float tb2 = tb * tb; -+ float tb3 = tb2 * tb; -+ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + -+ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + -+ (-2.0f * tb3 + 3.0f * tb2) * maxLum; -+ float sig = (s_pq < ks) ? s_pq : pb; - -- if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) { -- *counter_wg_p = 0; -- avg_buf[frame_idx] /= num_wg; -- -- if (scene_threshold > 0.0f) { -- uint cur_max = peak_buf[frame_idx]; -- uint cur_avg = avg_buf[frame_idx]; -- int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p; -- -- if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) { -- for (uint i = 0; i < DETECTION_FRAMES + 1; i++) -- avg_buf[i] = 0; -- for (uint i = 0; i < DETECTION_FRAMES + 1; i++) -- peak_buf[i] = 0; -- *avg_total_p = *max_total_p = 0; -- *scene_frame_num_p = 0; -- avg_buf[frame_idx] = cur_avg; -- peak_buf[frame_idx] = cur_max; -- } -- } -- uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1); -- // add current frame, subtract next frame -- *max_total_p += peak_buf[frame_idx] - peak_buf[next]; -- *avg_total_p += avg_buf[frame_idx] - avg_buf[next]; -- // reset next frame -- peak_buf[next] = avg_buf[next] = 0; -- *frame_idx_p = next; -- *scene_frame_num_p = min(*scene_frame_num_p + 1, -- (uint)DETECTION_FRAMES); -- } -- return r; -+ return eotf_st2084(sig * peak_pq); - } - --float3 map_one_pixel_rgb(float3 rgb, float peak, float average) { -- float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f); -+float3 map_one_pixel_rgb(float3 rgb, float peak) { -+ float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS); - - // Rescale the variables in order to bring it into a representation where - // 1.0 represents the dst_peak. This is because all of the tone mapping -@@ -178,30 +104,24 @@ float3 map_one_pixel_rgb(float3 rgb, flo - - float sig_old = sig; - -- // Scale the signal to compensate for differences in the average brightness -- float slope = min(1.0f, sdr_avg / average); -- sig *= slope; -- peak *= slope; -- - // Desaturate the color using a coefficient dependent on the signal level - if (desat_param > 0.0f) { - float luma = get_luma_dst(rgb); -- float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f); -- coeff = native_powr(coeff, 10.0f / desat_param); -+ float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS); -+ coeff = powr(coeff, 10.0f / desat_param); - rgb = mix(rgb, (float3)luma, (float3)coeff); -- sig = mix(sig, luma * slope, coeff); - } - -- sig = TONE_FUNC(sig, peak); -+ sig = TONE_FUNC(sig, peak, target_peak); - - sig = min(sig, 1.0f); -- rgb *= (sig/sig_old); -+ rgb *= (sig / sig_old); - return rgb; - } --// map from source space YUV to destination space RGB -+ -+// Map from source space YUV to destination space RGB - float3 map_to_dst_space_from_yuv(float3 yuv, float peak) { - float3 c = yuv2lrgb(yuv); -- c = ootf(c, peak); - c = lrgb2lrgb(c); - return c; - } -@@ -210,7 +130,6 @@ __kernel void tonemap(__write_only image - __read_only image2d_t src1, - __write_only image2d_t dst2, - __read_only image2d_t src2, -- global uint *util_buf, - float peak - ) - { -@@ -241,23 +160,17 @@ __kernel void tonemap(__write_only image - float sig3 = max(c3.x, max(c3.y, c3.z)); - float sig = max(sig0, max(sig1, max(sig2, sig3))); - -- struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak); -- - float3 c0_old = c0, c1_old = c1, c2_old = c2; -- c0 = map_one_pixel_rgb(c0, r.peak, r.average); -- c1 = map_one_pixel_rgb(c1, r.peak, r.average); -- c2 = map_one_pixel_rgb(c2, r.peak, r.average); -- c3 = map_one_pixel_rgb(c3, r.peak, r.average); -- -- c0 = inverse_ootf(c0, target_peak); -- c1 = inverse_ootf(c1, target_peak); -- c2 = inverse_ootf(c2, target_peak); -- c3 = inverse_ootf(c3, target_peak); -+ c0 = map_one_pixel_rgb(c0, peak); -+ c1 = map_one_pixel_rgb(c1, peak); -+ c2 = map_one_pixel_rgb(c2, peak); -+ c3 = map_one_pixel_rgb(c3, peak); - - y0 = lrgb2y(c0); - y1 = lrgb2y(c1); - y2 = lrgb2y(c2); - y3 = lrgb2y(c3); -+ - float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); - float3 chroma = lrgb2yuv(chroma_c); - -Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_opencl.c -+++ jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c -@@ -15,6 +15,7 @@ - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ -+ - #include - - #include "libavutil/avassert.h" -@@ -31,13 +32,6 @@ - #include "video.h" - #include "colorspace.h" - --// TODO: --// - separate peak-detection from tone-mapping kernel to solve --// one-frame-delay issue. --// - more format support -- --#define DETECTION_FRAMES 63 -- - enum TonemapAlgorithm { - TONEMAP_NONE, - TONEMAP_LINEAR, -@@ -46,6 +40,7 @@ enum TonemapAlgorithm { - TONEMAP_REINHARD, - TONEMAP_HABLE, - TONEMAP_MOBIUS, -+ TONEMAP_BT2390, - TONEMAP_MAX, - }; - -@@ -68,12 +63,11 @@ typedef struct TonemapOpenCLContext { - int initialised; - cl_kernel kernel; - cl_command_queue command_queue; -- cl_mem util_mem; - } TonemapOpenCLContext; - - static const char *const linearize_funcs[AVCOL_TRC_NB] = { -- [AVCOL_TRC_SMPTE2084] = "eotf_st2084", -- [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg", -+ [AVCOL_TRC_SMPTE2084] = "eotf_st2084", -+ [AVCOL_TRC_ARIB_STD_B67] = "eotf_arib_b67", - }; - - static const char *const delinearize_funcs[AVCOL_TRC_NB] = { -@@ -99,6 +93,7 @@ static const char *const tonemap_func[TO - [TONEMAP_REINHARD] = "reinhard", - [TONEMAP_HABLE] = "hable", - [TONEMAP_MOBIUS] = "mobius", -+ [TONEMAP_BT2390] = "bt2390", - }; - - static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, -@@ -112,9 +107,6 @@ static void get_rgb2rgb_matrix(enum AVCo - } - - #define OPENCL_SOURCE_NB 3 --// Average light level for SDR signals. This is equal to a signal level of 0.5 --// under a typical presentation gamma of about 2.0. --static const float sdr_avg = 0.25f; - - static int tonemap_opencl_init(AVFilterContext *avctx) - { -@@ -127,7 +119,7 @@ static int tonemap_opencl_init(AVFilterC - AVBPrint header; - const char *opencl_sources[OPENCL_SOURCE_NB]; - -- av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC); -+ av_bprint_init(&header, 2048, AV_BPRINT_SIZE_UNLIMITED); - - switch(ctx->tonemap) { - case TONEMAP_GAMMA: -@@ -149,18 +141,20 @@ static int tonemap_opencl_init(AVFilterC - - // SDR peak is 1.0f - ctx->target_peak = 1.0f; -- av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n", -+ -+ av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n", - av_color_transfer_name(ctx->trc_in), - av_color_transfer_name(ctx->trc_out)); -- av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n", -+ av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n", - av_color_space_name(ctx->colorspace_in), - av_color_space_name(ctx->colorspace_out)); -- av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n", -+ av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n", - av_color_primaries_name(ctx->primaries_in), - av_color_primaries_name(ctx->primaries_out)); -- av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n", -+ av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n", - av_color_range_name(ctx->range_in), - av_color_range_name(ctx->range_out)); -+ - // checking valid value just because of limited implementaion - // please remove when more functionalities are implemented - av_assert0(ctx->trc_out == AVCOL_TRC_BT709 || -@@ -178,11 +172,9 @@ static int tonemap_opencl_init(AVFilterC - ctx->desat_param); - av_bprintf(&header, "__constant const float target_peak = %.4ff;\n", - ctx->target_peak); -- av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg); - av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n", - ctx->scene_threshold); - av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]); -- av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES); - - if (ctx->primaries_out != ctx->primaries_in) { - get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb); -@@ -196,6 +188,16 @@ static int tonemap_opencl_init(AVFilterC - - av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc); - -+ av_bprintf(&header, "#define powr native_powr\n"); -+ -+ av_bprintf(&header, "#define exp native_exp\n"); -+ -+ av_bprintf(&header, "#define log native_log\n"); -+ -+ av_bprintf(&header, "#define log10 native_log10\n"); -+ -+ av_bprintf(&header, "#define sqrt native_sqrt\n"); -+ - if (rgb2rgb_passthrough) - av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n"); - else -@@ -205,7 +207,7 @@ static int tonemap_opencl_init(AVFilterC - luma_src = ff_get_luma_coefficients(ctx->colorspace_in); - if (!luma_src) { - err = AVERROR(EINVAL); -- av_log(avctx, AV_LOG_ERROR, "unsupported input colorspace %d (%s)\n", -+ av_log(avctx, AV_LOG_ERROR, "Unsupported input colorspace %d (%s)\n", - ctx->colorspace_in, av_color_space_name(ctx->colorspace_in)); - goto fail; - } -@@ -213,7 +215,7 @@ static int tonemap_opencl_init(AVFilterC - luma_dst = ff_get_luma_coefficients(ctx->colorspace_out); - if (!luma_dst) { - err = AVERROR(EINVAL); -- av_log(avctx, AV_LOG_ERROR, "unsupported output colorspace %d (%s)\n", -+ av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace %d (%s)\n", - ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); - goto fail; - } -@@ -225,21 +227,16 @@ static int tonemap_opencl_init(AVFilterC - ff_matrix_invert_3x3(rgb2yuv, yuv2rgb); - ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", yuv2rgb); - -- av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", -+ av_bprintf(&header, "__constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", - luma_src->cr, luma_src->cg, luma_src->cb); -- av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", -+ av_bprintf(&header, "__constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", - luma_dst->cr, luma_dst->cg, luma_dst->cb); - -- av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); -+ av_bprintf(&header, "#define linearize %s\n", -+ linearize_funcs[ctx->trc_in]); - av_bprintf(&header, "#define delinearize %s\n", - delinearize_funcs[ctx->trc_out]); - -- if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67) -- av_bprintf(&header, "#define ootf_impl ootf_hlg\n"); -- -- if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67) -- av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n"); -- - av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); - opencl_sources[0] = header.str; - opencl_sources[1] = ff_opencl_source_tonemap; -@@ -259,19 +256,11 @@ static int tonemap_opencl_init(AVFilterC - ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle); - CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle); - -- ctx->util_mem = -- clCreateBuffer(ctx->ocf.hwctx->context, 0, -- (2 * DETECTION_FRAMES + 7) * sizeof(unsigned), -- NULL, &cle); -- CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create util buffer: %d.\n", cle); -- - ctx->initialised = 1; - return 0; - - fail: - av_bprint_finalize(&header, NULL); -- if (ctx->util_mem) -- clReleaseMemObject(ctx->util_mem); - if (ctx->command_queue) - clReleaseCommandQueue(ctx->command_queue); - if (ctx->kernel) -@@ -285,11 +274,11 @@ static int tonemap_opencl_config_output( - TonemapOpenCLContext *s = avctx->priv; - int ret; - if (s->format == AV_PIX_FMT_NONE) -- av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n"); -+ av_log(avctx, AV_LOG_WARNING, "Format not set, use default format NV12\n"); - else { - if (s->format != AV_PIX_FMT_P010 && - s->format != AV_PIX_FMT_NV12) { -- av_log(avctx, AV_LOG_ERROR, "unsupported output format," -+ av_log(avctx, AV_LOG_ERROR, "Unsupported output format," - "only p010/nv12 supported now\n"); - return AVERROR(EINVAL); - } -@@ -315,8 +304,7 @@ static int launch_kernel(AVFilterContext - CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]); - CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]); - CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]); -- CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem); -- CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak); -+ CL_SET_KERNEL_ARG(kernel, 4, cl_float, &peak); - - local_work[0] = 16; - local_work[1] = 16; -@@ -390,13 +378,15 @@ static int tonemap_opencl_filter_frame(A - if (!ctx->initialised) { - if (!(input->color_trc == AVCOL_TRC_SMPTE2084 || - input->color_trc == AVCOL_TRC_ARIB_STD_B67)) { -- av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n"); -+ av_log(ctx, AV_LOG_ERROR, "Unsupported transfer function characteristic: %s\n", -+ av_color_transfer_name(input->color_trc)); - err = AVERROR(ENOSYS); - goto fail; - } - - if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) { -- av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n"); -+ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", -+ av_get_pix_fmt_name(input_frames_ctx->sw_format)); - err = AVERROR(ENOSYS); - goto fail; - } -@@ -423,31 +413,9 @@ static int tonemap_opencl_filter_frame(A - - ff_update_hdr_metadata(output, ctx->target_peak); - -- av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n", -+ av_log(ctx, AV_LOG_DEBUG, "Tonemapping output: %s, %ux%u (%"PRId64").\n", - av_get_pix_fmt_name(output->format), - output->width, output->height, output->pts); --#ifndef NDEBUG -- { -- uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p; -- float peak_detected, avg_detected; -- unsigned map_size = (2 * DETECTION_FRAMES + 7) * sizeof(unsigned); -- ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem, -- CL_TRUE, CL_MAP_READ, 0, map_size, -- 0, NULL, NULL, &cle); -- // For the layout of the util buffer, refer tonemap.cl -- if (ptr) { -- max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1; -- avg_total_p = max_total_p + 1; -- frame_number_p = avg_total_p + 2; -- peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p)); -- avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p)); -- av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n", -- peak_detected, avg_detected); -- clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0, -- NULL, NULL); -- } -- } --#endif - - return ff_filter_frame(outlink, output); - -@@ -463,8 +431,6 @@ static av_cold void tonemap_opencl_unini - TonemapOpenCLContext *ctx = avctx->priv; - cl_int cle; - -- if (ctx->util_mem) -- clReleaseMemObject(ctx->util_mem); - if (ctx->kernel) { - cle = clReleaseKernel(ctx->kernel); - if (cle != CL_SUCCESS) -@@ -493,6 +459,7 @@ static const AVOption tonemap_opencl_opt - { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, "tonemap" }, - { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, "tonemap" }, - { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, "tonemap" }, -+ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390}, 0, 0, FLAGS, "tonemap" }, - { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, - { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, - { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, "transfer" }, diff --git a/debian/patches/0007-fix-for-fmp4-in-hlsenc.patch b/debian/patches/0007-fix-for-fmp4-in-hlsenc.patch deleted file mode 100644 index 5610f02b425..00000000000 --- a/debian/patches/0007-fix-for-fmp4-in-hlsenc.patch +++ /dev/null @@ -1,24 +0,0 @@ -Index: jellyfin-ffmpeg/libavformat/hlsenc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/hlsenc.c -+++ jellyfin-ffmpeg/libavformat/hlsenc.c -@@ -2672,14 +2672,13 @@ static int hls_write_packet(AVFormatCont - - vs->packets_written++; - if (oc->pb) { -- int64_t keyframe_pre_pos = avio_tell(oc->pb); - ret = ff_write_chained(oc, stream_index, pkt, s, 0); -- if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) && -- (pkt->flags & AV_PKT_FLAG_KEY) && !keyframe_pre_pos) { -- av_write_frame(oc, NULL); /* Flush any buffered data */ -- vs->video_keyframe_size = avio_tell(oc->pb) - keyframe_pre_pos; -+ vs->video_keyframe_size += pkt->size; -+ if ((st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) && (pkt->flags & AV_PKT_FLAG_KEY)) { -+ vs->video_keyframe_size = avio_tell(oc->pb); -+ } else { -+ vs->video_keyframe_pos = avio_tell(vs->out); - } -- vs->video_keyframe_pos = vs->start_pos; - if (hls->ignore_io_errors) - ret = 0; - } diff --git a/debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch b/debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch deleted file mode 100644 index ed02508ae5f..00000000000 --- a/debian/patches/0008-fix-nvdec-exceeded-32-surfaces-error.patch +++ /dev/null @@ -1,17 +0,0 @@ -Index: jellyfin-ffmpeg/libavcodec/nvdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/nvdec.c -+++ jellyfin-ffmpeg/libavcodec/nvdec.c -@@ -303,8 +303,10 @@ static int nvdec_init_hwframes(AVCodecCo - frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data; - - if (dummy) { -- // Copied from ff_decode_get_hw_frames_ctx for compatibility -- frames_ctx->initial_pool_size += 3; -+ // The function above guarantees 1 work surface, We must guarantee 4 work surfaces. -+ // (the absolute minimum), so add the missing count without exceeding the maximum -+ // recommended for nvdec. -+ frames_ctx->initial_pool_size = FFMIN(frames_ctx->initial_pool_size + 3, 32); - - frames_ctx->free = nvdec_free_dummy; - frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy); diff --git a/debian/patches/0009-fix-for-nvenc-from-upstream.patch b/debian/patches/0009-fix-for-nvenc-from-upstream.patch deleted file mode 100644 index 0eca70f361f..00000000000 --- a/debian/patches/0009-fix-for-nvenc-from-upstream.patch +++ /dev/null @@ -1,1716 +0,0 @@ -Index: jellyfin-ffmpeg/Changelog -=================================================================== ---- jellyfin-ffmpeg.orig/Changelog -+++ jellyfin-ffmpeg/Changelog -@@ -1,7 +1,7 @@ - Entries are sorted chronologically from oldest to youngest within each release, - releases are sorted from youngest to oldest. - --version : -+version 4.4: - - AudioToolbox output device - - MacCaption demuxer - - PGX decoder -Index: jellyfin-ffmpeg/RELEASE_NOTES -=================================================================== ---- jellyfin-ffmpeg.orig/RELEASE_NOTES -+++ jellyfin-ffmpeg/RELEASE_NOTES -@@ -11,5 +11,5 @@ - - We hope you will like this release as much as we enjoyed working on it, and - as usual, if you have any questions about it, or any FFmpeg related topic, -- feel free to join us on the #ffmpeg IRC channel (on irc.freenode.net) or ask -+ feel free to join us on the #ffmpeg IRC channel (on irc.libera.chat) or ask - on the mailing-lists. -Index: jellyfin-ffmpeg/configure -=================================================================== ---- jellyfin-ffmpeg.orig/configure -+++ jellyfin-ffmpeg/configure -@@ -536,7 +536,7 @@ die(){ - - If you think configure made a mistake, make sure you are using the latest - version from Git. If the latest version fails, report the problem to the --ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.freenode.net. -+ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.libera.chat. - EOF - if disabled logging; then - cat <st->index, ost->st->id, ost->initialized, ost->inputs_done, ost->finished); - - if (!ost->initialized && !ost->inputs_done) -- return ost; -+ return ost->unavailable ? NULL : ost; - - if (!ost->finished && opts < opts_min) { - opts_min = opts; -Index: jellyfin-ffmpeg/libavcodec/aacenc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/aacenc.c -+++ jellyfin-ffmpeg/libavcodec/aacenc.c -@@ -28,6 +28,7 @@ - * TODOs: - * add sane pulse detection - ***********************************/ -+#include - - #include "libavutil/libm.h" - #include "libavutil/float_dsp.h" -@@ -852,7 +853,7 @@ static int aac_encode_frame(AVCodecConte - /* Not so fast though */ - ratio = sqrtf(ratio); - } -- s->lambda = FFMIN(s->lambda * ratio, 65536.f); -+ s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f); - - /* Keep iterating if we must reduce and lambda is in the sky */ - if (ratio > 0.9f && ratio < 1.1f) { -@@ -897,7 +898,7 @@ static av_cold int aac_encode_end(AVCode - { - AACEncContext *s = avctx->priv_data; - -- av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count); -+ av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN); - - ff_mdct_end(&s->mdct1024); - ff_mdct_end(&s->mdct128); -Index: jellyfin-ffmpeg/libavcodec/aacpsy.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/aacpsy.c -+++ jellyfin-ffmpeg/libavcodec/aacpsy.c -@@ -308,6 +308,9 @@ static av_cold int psy_3gpp_init(FFPsyCo - const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx); - const float num_bark = calc_bark((float)bandwidth); - -+ if (bandwidth <= 0) -+ return AVERROR(EINVAL); -+ - ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext)); - if (!ctx->model_priv_data) - return AVERROR(ENOMEM); -@@ -794,7 +797,7 @@ static void psy_3gpp_analyze_channel(FFP - - if (pe < 1.15f * desired_pe) { - /* 6.6.1.3.6 "Final threshold modification by linearization" */ -- norm_fac = 1.0f / norm_fac; -+ norm_fac = norm_fac ? 1.0f / norm_fac : 0; - for (w = 0; w < wi->num_windows*16; w += 16) { - for (g = 0; g < num_bands; g++) { - AacPsyBand *band = &pch->band[w+g]; -Index: jellyfin-ffmpeg/libavcodec/aarch64/hevcdsp_idct_neon.S -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/aarch64/hevcdsp_idct_neon.S -+++ jellyfin-ffmpeg/libavcodec/aarch64/hevcdsp_idct_neon.S -@@ -573,14 +573,13 @@ idct_16x16 10 - // void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs) - .macro idct_dc size, bitdepth - function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1 -- movi v1.8h, #((1 << (14 - \bitdepth))+1) - ld1r {v4.8h}, [x0] -- add v4.8h, v4.8h, v1.8h -- sshr v0.8h, v4.8h, #(15 - \bitdepth) -- sshr v1.8h, v4.8h, #(15 - \bitdepth) -+ srshr v4.8h, v4.8h, #1 -+ srshr v0.8h, v4.8h, #(14 - \bitdepth) -+ srshr v1.8h, v4.8h, #(14 - \bitdepth) - .if \size > 4 -- sshr v2.8h, v4.8h, #(15 - \bitdepth) -- sshr v3.8h, v4.8h, #(15 - \bitdepth) -+ srshr v2.8h, v4.8h, #(14 - \bitdepth) -+ srshr v3.8h, v4.8h, #(14 - \bitdepth) - .if \size > 16 /* dc 32x32 */ - mov x2, #4 - 1: -Index: jellyfin-ffmpeg/libavcodec/alsdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/alsdec.c -+++ jellyfin-ffmpeg/libavcodec/alsdec.c -@@ -1632,7 +1632,7 @@ static int read_frame_data(ALSDecContext - AVCodecContext *avctx = ctx->avctx; - GetBitContext *gb = &ctx->gb; - unsigned int div_blocks[32]; ///< block sizes. -- unsigned int c; -+ int c; - unsigned int js_blocks[2]; - uint32_t bs_info = 0; - int ret; -@@ -1810,14 +1810,17 @@ static int decode_frame(AVCodecContext * - else - ctx->cur_frame_length = sconf->frame_length; - -- ctx->highest_decoded_channel = 0; -+ ctx->highest_decoded_channel = -1; - // decode the frame data - if ((invalid_frame = read_frame_data(ctx, ra_frame)) < 0) - av_log(ctx->avctx, AV_LOG_WARNING, - "Reading frame data failed. Skipping RA unit.\n"); - -- if (ctx->highest_decoded_channel == 0) -+ if (ctx->highest_decoded_channel == -1) { -+ av_log(ctx->avctx, AV_LOG_WARNING, -+ "No channel data decoded.\n"); - return AVERROR_INVALIDDATA; -+ } - - ctx->frame_id++; - -Index: jellyfin-ffmpeg/libavcodec/av1_metadata_bsf.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/av1_metadata_bsf.c -+++ jellyfin-ffmpeg/libavcodec/av1_metadata_bsf.c -@@ -28,6 +28,7 @@ typedef struct AV1MetadataContext { - CBSBSFContext common; - - int td; -+ AV1RawOBU td_obu; - - int color_primaries; - int transfer_characteristics; -@@ -107,12 +108,11 @@ static int av1_metadata_update_fragment( - CodedBitstreamFragment *frag) - { - AV1MetadataContext *ctx = bsf->priv_data; -- AV1RawOBU td, *obu; - int err, i; - - for (i = 0; i < frag->nb_units; i++) { - if (frag->units[i].type == AV1_OBU_SEQUENCE_HEADER) { -- obu = frag->units[i].content; -+ AV1RawOBU *obu = frag->units[i].content; - err = av1_metadata_update_sequence_header(bsf, &obu->obu.sequence_header); - if (err < 0) - return err; -@@ -124,12 +124,8 @@ static int av1_metadata_update_fragment( - if (ctx->td == BSF_ELEMENT_REMOVE) - ff_cbs_delete_unit(frag, 0); - } else if (pkt && ctx->td == BSF_ELEMENT_INSERT) { -- td = (AV1RawOBU) { -- .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER, -- }; -- - err = ff_cbs_insert_unit_content(frag, 0, AV1_OBU_TEMPORAL_DELIMITER, -- &td, NULL); -+ &ctx->td_obu, NULL); - if (err < 0) { - av_log(bsf, AV_LOG_ERROR, "Failed to insert Temporal Delimiter.\n"); - return err; -@@ -155,6 +151,12 @@ static const CBSBSFType av1_metadata_typ - - static int av1_metadata_init(AVBSFContext *bsf) - { -+ AV1MetadataContext *ctx = bsf->priv_data; -+ -+ ctx->td_obu = (AV1RawOBU) { -+ .header.obu_type = AV1_OBU_TEMPORAL_DELIMITER, -+ }; -+ - return ff_cbs_bsf_generic_init(bsf, &av1_metadata_type); - } - -Index: jellyfin-ffmpeg/libavcodec/clearvideo.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/clearvideo.c -+++ jellyfin-ffmpeg/libavcodec/clearvideo.c -@@ -722,8 +722,8 @@ static av_cold int clv_decode_init(AVCod - } - - c->tile_shift = av_log2(c->tile_size); -- if (1U << c->tile_shift != c->tile_size) { -- av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2.\n", c->tile_size); -+ if (1U << c->tile_shift != c->tile_size || c->tile_shift < 1) { -+ av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2 > 1\n", c->tile_size); - return AVERROR_INVALIDDATA; - } - -Index: jellyfin-ffmpeg/libavcodec/crystalhd.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/crystalhd.c -+++ jellyfin-ffmpeg/libavcodec/crystalhd.c -@@ -785,6 +785,7 @@ static int crystalhd_receive_frame(AVCod - .flush = flush, \ - .bsfs = bsf_name, \ - .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \ -+ .caps_internal = FF_CODEC_CAP_SETS_FRAME_PROPS, \ - .pix_fmts = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE}, \ - .wrapper_name = "crystalhd", \ - }; -Index: jellyfin-ffmpeg/libavcodec/cuviddec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/cuviddec.c -+++ jellyfin-ffmpeg/libavcodec/cuviddec.c -@@ -1150,6 +1150,7 @@ static const AVCodecHWConfigInternal *co - .flush = cuvid_flush, \ - .bsfs = bsf_name, \ - .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \ -+ .caps_internal = FF_CODEC_CAP_SETS_FRAME_PROPS, \ - .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \ - AV_PIX_FMT_NV12, \ - AV_PIX_FMT_P010, \ -Index: jellyfin-ffmpeg/libavcodec/decode.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/decode.c -+++ jellyfin-ffmpeg/libavcodec/decode.c -@@ -233,9 +233,11 @@ int ff_decode_get_packet(AVCodecContext - if (ret < 0) - return ret; - -- ret = extract_packet_props(avctx->internal, pkt); -- if (ret < 0) -- goto finish; -+ if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) { -+ ret = extract_packet_props(avctx->internal, pkt); -+ if (ret < 0) -+ goto finish; -+ } - - ret = apply_param_change(avctx, pkt); - if (ret < 0) -@@ -502,11 +504,13 @@ FF_ENABLE_DEPRECATION_WARNINGS - - pkt->data += consumed; - pkt->size -= consumed; -- avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment. - pkt->pts = AV_NOPTS_VALUE; - pkt->dts = AV_NOPTS_VALUE; -- avci->last_pkt_props->pts = AV_NOPTS_VALUE; -- avci->last_pkt_props->dts = AV_NOPTS_VALUE; -+ if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) { -+ avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment. -+ avci->last_pkt_props->pts = AV_NOPTS_VALUE; -+ avci->last_pkt_props->dts = AV_NOPTS_VALUE; -+ } - } - - if (got_frame) -@@ -548,6 +552,11 @@ static int decode_receive_frame_internal - if (ret == AVERROR_EOF) - avci->draining_done = 1; - -+ if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS) && -+ IS_EMPTY(avci->last_pkt_props) && av_fifo_size(avci->pkt_props) >= sizeof(*avci->last_pkt_props)) -+ av_fifo_generic_read(avci->pkt_props, -+ avci->last_pkt_props, sizeof(*avci->last_pkt_props), NULL); -+ - if (!ret) { - frame->best_effort_timestamp = guess_correct_pts(avctx, - frame->pts, -@@ -1738,39 +1747,37 @@ int ff_decode_frame_props(AVCodecContext - { AV_PKT_DATA_S12M_TIMECODE, AV_FRAME_DATA_S12M_TIMECODE }, - }; - -- if (IS_EMPTY(pkt) && av_fifo_size(avctx->internal->pkt_props) >= sizeof(*pkt)) -- av_fifo_generic_read(avctx->internal->pkt_props, -- pkt, sizeof(*pkt), NULL); -- -- frame->pts = pkt->pts; -+ if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_FRAME_PROPS)) { -+ frame->pts = pkt->pts; - #if FF_API_PKT_PTS - FF_DISABLE_DEPRECATION_WARNINGS -- frame->pkt_pts = pkt->pts; -+ frame->pkt_pts = pkt->pts; - FF_ENABLE_DEPRECATION_WARNINGS - #endif -- frame->pkt_pos = pkt->pos; -- frame->pkt_duration = pkt->duration; -- frame->pkt_size = pkt->size; -- -- for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) { -- buffer_size_t size; -- uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size); -- if (packet_sd) { -- AVFrameSideData *frame_sd = av_frame_new_side_data(frame, -- sd[i].frame, -- size); -- if (!frame_sd) -- return AVERROR(ENOMEM); -+ frame->pkt_pos = pkt->pos; -+ frame->pkt_duration = pkt->duration; -+ frame->pkt_size = pkt->size; -+ -+ for (int i = 0; i < FF_ARRAY_ELEMS(sd); i++) { -+ buffer_size_t size; -+ uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size); -+ if (packet_sd) { -+ AVFrameSideData *frame_sd = av_frame_new_side_data(frame, -+ sd[i].frame, -+ size); -+ if (!frame_sd) -+ return AVERROR(ENOMEM); - -- memcpy(frame_sd->data, packet_sd, size); -+ memcpy(frame_sd->data, packet_sd, size); -+ } - } -- } -- add_metadata_from_side_data(pkt, frame); -+ add_metadata_from_side_data(pkt, frame); - -- if (pkt->flags & AV_PKT_FLAG_DISCARD) { -- frame->flags |= AV_FRAME_FLAG_DISCARD; -- } else { -- frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD); -+ if (pkt->flags & AV_PKT_FLAG_DISCARD) { -+ frame->flags |= AV_FRAME_FLAG_DISCARD; -+ } else { -+ frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD); -+ } - } - frame->reordered_opaque = avctx->reordered_opaque; - -Index: jellyfin-ffmpeg/libavcodec/dpx.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/dpx.c -+++ jellyfin-ffmpeg/libavcodec/dpx.c -@@ -242,6 +242,9 @@ static int decode_frame(AVCodecContext * - return AVERROR_PATCHWELCOME; - } - -+ if (bits_per_color > 31) -+ return AVERROR_INVALIDDATA; -+ - buf += 820; - avctx->sample_aspect_ratio.num = read32(&buf, endian); - avctx->sample_aspect_ratio.den = read32(&buf, endian); -@@ -316,7 +319,7 @@ static int decode_frame(AVCodecContext * - minCV = av_int2float(i); - maxCV = av_int2float(j); - if (bits_per_color >= 1 && -- minCV == 0.0f && maxCV == ((1<color_range = AVCOL_RANGE_JPEG; - } else if (bits_per_color >= 8 && - minCV == (1 <<(bits_per_color - 4)) && -Index: jellyfin-ffmpeg/libavcodec/exr.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/exr.c -+++ jellyfin-ffmpeg/libavcodec/exr.c -@@ -418,7 +418,7 @@ static int huf_decode(VLC *vlc, GetByteC - - init_get_bits(&gbit, gb->buffer, nbits); - while (get_bits_left(&gbit) > 0 && oe < no) { -- uint16_t x = get_vlc2(&gbit, vlc->table, 12, 2); -+ uint16_t x = get_vlc2(&gbit, vlc->table, 12, 3); - - if (x == run_sym) { - int run = get_bits(&gbit, 8); -@@ -1059,11 +1059,11 @@ static int dwa_uncompress(EXRContext *s, - bytestream2_skip(&gb, ac_size); - } - -- if (dc_size > 0) { -+ { - unsigned long dest_len = dc_count * 2LL; - GetByteContext agb = gb; - -- if (dc_count > (6LL * td->xsize * td->ysize + 63) / 64) -+ if (dc_count != dc_w * dc_h * 3) - return AVERROR_INVALIDDATA; - - av_fast_padded_malloc(&td->dc_data, &td->dc_size, FFALIGN(dest_len, 64) * 2); -@@ -1795,6 +1795,7 @@ static int decode_header(EXRContext *s, - ymax = bytestream2_get_le32(gb); - - if (xmin > xmax || ymin > ymax || -+ ymax == INT_MAX || xmax == INT_MAX || - (unsigned)xmax - xmin >= INT_MAX || - (unsigned)ymax - ymin >= INT_MAX) { - ret = AVERROR_INVALIDDATA; -Index: jellyfin-ffmpeg/libavcodec/faxcompr.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/faxcompr.c -+++ jellyfin-ffmpeg/libavcodec/faxcompr.c -@@ -144,6 +144,8 @@ static int decode_uncompressed(AVCodecCo - return AVERROR_INVALIDDATA; - } - cwi = 10 - av_log2(cwi); -+ if (get_bits_left(gb) < cwi + 1) -+ return AVERROR_INVALIDDATA; - skip_bits(gb, cwi + 1); - if (cwi > 5) { - newmode = get_bits1(gb); -@@ -209,6 +211,8 @@ static int decode_group3_1d_line(AVCodec - unsigned int run = 0; - unsigned int t; - for (;;) { -+ if (get_bits_left(gb) <= 0) -+ return AVERROR_INVALIDDATA; - t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2); - run += t; - if (t < 64) { -@@ -227,7 +231,7 @@ static int decode_group3_1d_line(AVCodec - run = 0; - mode = !mode; - } else if ((int)t == -1) { -- if (show_bits(gb, 12) == 15) { -+ if (get_bits_left(gb) > 12 && show_bits(gb, 12) == 15) { - int ret; - skip_bits(gb, 12); - ret = decode_uncompressed(avctx, gb, &pix_left, &runs, runend, &mode); -@@ -254,7 +258,10 @@ static int decode_group3_2d_line(AVCodec - unsigned int offs = 0, run = 0; - - while (offs < width) { -- int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1); -+ int cmode; -+ if (get_bits_left(gb) <= 0) -+ return AVERROR_INVALIDDATA; -+ cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1); - if (cmode == -1) { - av_log(avctx, AV_LOG_ERROR, "Incorrect mode VLC\n"); - return AVERROR_INVALIDDATA; -@@ -299,7 +306,10 @@ static int decode_group3_2d_line(AVCodec - mode = !mode; - } - } else if (cmode == 9 || cmode == 10) { -- int xxx = get_bits(gb, 3); -+ int xxx; -+ if (get_bits_left(gb) < 3) -+ return AVERROR_INVALIDDATA; -+ xxx = get_bits(gb, 3); - if (cmode == 9 && xxx == 7) { - int ret; - int pix_left = width - offs; -Index: jellyfin-ffmpeg/libavcodec/h263.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/h263.c -+++ jellyfin-ffmpeg/libavcodec/h263.c -@@ -29,6 +29,7 @@ - - #include - -+#include "libavutil/thread.h" - #include "avcodec.h" - #include "mpegvideo.h" - #include "h263.h" -@@ -38,6 +39,17 @@ - #include "flv.h" - #include "mpeg4video.h" - -+static av_cold void h263_init_rl_inter(void) -+{ -+ static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3]; -+ ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table); -+} -+ -+av_cold void ff_h263_init_rl_inter(void) -+{ -+ static AVOnce init_static_once = AV_ONCE_INIT; -+ ff_thread_once(&init_static_once, h263_init_rl_inter); -+} - - void ff_h263_update_motion_val(MpegEncContext * s){ - const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; -Index: jellyfin-ffmpeg/libavcodec/h263.h -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/h263.h -+++ jellyfin-ffmpeg/libavcodec/h263.h -@@ -66,6 +66,7 @@ int16_t *ff_h263_pred_motion(MpegEncCont - int *px, int *py); - void ff_h263_encode_init(MpegEncContext *s); - void ff_h263_decode_init_vlc(void); -+void ff_h263_init_rl_inter(void); - int ff_h263_decode_picture_header(MpegEncContext *s); - int ff_h263_decode_gob_header(MpegEncContext *s); - void ff_h263_update_motion_val(MpegEncContext * s); -Index: jellyfin-ffmpeg/libavcodec/h263data.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/h263data.c -+++ jellyfin-ffmpeg/libavcodec/h263data.c -@@ -25,8 +25,6 @@ - - #include - --#include "libavutil/thread.h" -- - #include "h263data.h" - #include "mpegvideo.h" - -@@ -290,15 +288,3 @@ const AVRational ff_h263_pixel_aspect[16 - { 0, 1 }, - { 0, 1 }, - }; -- --static av_cold void h263_init_rl_inter(void) --{ -- static uint8_t h263_rl_inter_table[2][2 * MAX_RUN + MAX_LEVEL + 3]; -- ff_rl_init(&ff_h263_rl_inter, h263_rl_inter_table); --} -- --av_cold void ff_h263_init_rl_inter(void) --{ -- static AVOnce init_static_once = AV_ONCE_INIT; -- ff_thread_once(&init_static_once, h263_init_rl_inter); --} -Index: jellyfin-ffmpeg/libavcodec/h263data.h -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/h263data.h -+++ jellyfin-ffmpeg/libavcodec/h263data.h -@@ -61,7 +61,6 @@ extern const int8_t ff_inter_run[102]; - - extern RLTable ff_h263_rl_inter; - extern RLTable ff_rl_intra_aic; --void ff_h263_init_rl_inter(void); - - extern const uint16_t ff_h263_format[8][2]; - -Index: jellyfin-ffmpeg/libavcodec/internal.h -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/internal.h -+++ jellyfin-ffmpeg/libavcodec/internal.h -@@ -78,6 +78,11 @@ - * Codec handles avctx->thread_count == 0 (auto) internally. - */ - #define FF_CODEC_CAP_AUTO_THREADS (1 << 7) -+/** -+ * Codec handles output frame properties internally instead of letting the -+ * internal logic derive them from AVCodecInternal.last_pkt_props. -+ */ -+#define FF_CODEC_CAP_SETS_FRAME_PROPS (1 << 8) - - /** - * AVCodec.codec_tags termination value -Index: jellyfin-ffmpeg/libavcodec/jpeglsdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/jpeglsdec.c -+++ jellyfin-ffmpeg/libavcodec/jpeglsdec.c -@@ -122,7 +122,7 @@ int ff_jpegls_decode_lse(MJpegDecodeCont - s->avctx->pix_fmt = AV_PIX_FMT_PAL8; - for (i=s->palette_index; i<=maxtab; i++) { - uint8_t k = i << shift; -- pal[k] = 0; -+ pal[k] = wt < 4 ? 0xFF000000 : 0; - for (j=0; jgb, 8) << (8*(wt-j-1)); - } -Index: jellyfin-ffmpeg/libavcodec/lpc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/lpc.c -+++ jellyfin-ffmpeg/libavcodec/lpc.c -@@ -189,7 +189,7 @@ double ff_lpc_calc_ref_coefs_f(LPCContex - compute_ref_coefs(autoc, order, ref, error); - for (i = 0; i < order; i++) - avg_err = (avg_err + error[i])/2.0f; -- return signal/avg_err; -+ return avg_err ? signal/avg_err : NAN; - } - - /** -Index: jellyfin-ffmpeg/libavcodec/mpegvideo_enc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/mpegvideo_enc.c -+++ jellyfin-ffmpeg/libavcodec/mpegvideo_enc.c -@@ -2016,6 +2016,7 @@ FF_ENABLE_DEPRECATION_WARNINGS - break; - default: - av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n"); -+ s->stuffing_bits = 0; - } - flush_put_bits(&s->pb); - s->frame_bits = put_bits_count(&s->pb); -Index: jellyfin-ffmpeg/libavcodec/nellymoserenc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/nellymoserenc.c -+++ jellyfin-ffmpeg/libavcodec/nellymoserenc.c -@@ -138,10 +138,8 @@ static av_cold int encode_end(AVCodecCon - - ff_mdct_end(&s->mdct_ctx); - -- if (s->avctx->trellis) { -- av_freep(&s->opt); -- av_freep(&s->path); -- } -+ av_freep(&s->opt); -+ av_freep(&s->path); - ff_af_queue_close(&s->afq); - av_freep(&s->fdsp); - -Index: jellyfin-ffmpeg/libavcodec/nvenc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/nvenc.c -+++ jellyfin-ffmpeg/libavcodec/nvenc.c -@@ -144,6 +144,70 @@ static int nvenc_print_error(AVCodecCont - return ret; - } - -+typedef struct GUIDTuple { -+ const GUID guid; -+ int flags; -+} GUIDTuple; -+ -+#define PRESET_ALIAS(alias, name, ...) \ -+ [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ } -+ -+#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__) -+ -+static void nvenc_map_preset(NvencContext *ctx) -+{ -+ GUIDTuple presets[] = { -+#ifdef NVENC_HAVE_NEW_PRESETS -+ PRESET(P1), -+ PRESET(P2), -+ PRESET(P3), -+ PRESET(P4), -+ PRESET(P5), -+ PRESET(P6), -+ PRESET(P7), -+ PRESET_ALIAS(SLOW, P7, NVENC_TWO_PASSES), -+ PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS), -+ PRESET_ALIAS(FAST, P1, NVENC_ONE_PASS), -+ // Compat aliases -+ PRESET_ALIAS(DEFAULT, P4, NVENC_DEPRECATED_PRESET), -+ PRESET_ALIAS(HP, P1, NVENC_DEPRECATED_PRESET), -+ PRESET_ALIAS(HQ, P7, NVENC_DEPRECATED_PRESET), -+ PRESET_ALIAS(BD, P5, NVENC_DEPRECATED_PRESET), -+ PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), -+ PRESET_ALIAS(LOW_LATENCY_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), -+ PRESET_ALIAS(LOW_LATENCY_HQ, P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), -+ PRESET_ALIAS(LOSSLESS_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), -+ PRESET_ALIAS(LOSSLESS_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), -+#else -+ PRESET(DEFAULT), -+ PRESET(HP), -+ PRESET(HQ), -+ PRESET(BD), -+ PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES), -+ PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS), -+ PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS), -+ PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY), -+ PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY), -+ PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY), -+ PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS), -+ PRESET(LOSSLESS_HP, NVENC_LOSSLESS), -+#endif -+ }; -+ -+ GUIDTuple *t = &presets[ctx->preset]; -+ -+ ctx->init_encode_params.presetGUID = t->guid; -+ ctx->flags = t->flags; -+ -+#ifdef NVENC_HAVE_NEW_PRESETS -+ if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS) -+ ctx->flags |= NVENC_LOSSLESS; -+#endif -+} -+ -+#undef PRESET -+#undef PRESET_ALIAS -+ - static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level) - { - #if NVENCAPI_CHECK_VERSION(11, 1) -@@ -358,7 +422,7 @@ static int nvenc_check_capabilities(AVCo - } - - ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE); -- if (ctx->preset >= PRESET_LOSSLESS_DEFAULT && ret <= 0) { -+ if (ctx->flags & NVENC_LOSSLESS && ret <= 0) { - av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n"); - return AVERROR(ENOSYS); - } -@@ -548,6 +612,11 @@ static av_cold int nvenc_setup_device(AV - return AVERROR_BUG; - } - -+ nvenc_map_preset(ctx); -+ -+ if (ctx->flags & NVENC_DEPRECATED_PRESET) -+ av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n"); -+ - if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) { - AVHWFramesContext *frames_ctx; - AVHWDeviceContext *hwdev_ctx; -@@ -638,65 +707,6 @@ static av_cold int nvenc_setup_device(AV - return 0; - } - --typedef struct GUIDTuple { -- const GUID guid; -- int flags; --} GUIDTuple; -- --#define PRESET_ALIAS(alias, name, ...) \ -- [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ } -- --#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__) -- --static void nvenc_map_preset(NvencContext *ctx) --{ -- GUIDTuple presets[] = { --#ifdef NVENC_HAVE_NEW_PRESETS -- PRESET(P1), -- PRESET(P2), -- PRESET(P3), -- PRESET(P4), -- PRESET(P5), -- PRESET(P6), -- PRESET(P7), -- PRESET_ALIAS(SLOW, P7, NVENC_TWO_PASSES), -- PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS), -- PRESET_ALIAS(FAST, P1, NVENC_ONE_PASS), -- // Compat aliases -- PRESET_ALIAS(DEFAULT, P4, NVENC_DEPRECATED_PRESET), -- PRESET_ALIAS(HP, P1, NVENC_DEPRECATED_PRESET), -- PRESET_ALIAS(HQ, P7, NVENC_DEPRECATED_PRESET), -- PRESET_ALIAS(BD, P5, NVENC_DEPRECATED_PRESET), -- PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), -- PRESET_ALIAS(LOW_LATENCY_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), -- PRESET_ALIAS(LOW_LATENCY_HQ, P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), -- PRESET_ALIAS(LOSSLESS_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), -- PRESET_ALIAS(LOSSLESS_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), --#else -- PRESET(DEFAULT), -- PRESET(HP), -- PRESET(HQ), -- PRESET(BD), -- PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES), -- PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS), -- PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS), -- PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY), -- PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY), -- PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY), -- PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS), -- PRESET(LOSSLESS_HP, NVENC_LOSSLESS), --#endif -- }; -- -- GUIDTuple *t = &presets[ctx->preset]; -- -- ctx->init_encode_params.presetGUID = t->guid; -- ctx->flags = t->flags; --} -- --#undef PRESET --#undef PRESET_ALIAS -- - static av_cold void set_constqp(AVCodecContext *avctx) - { - NvencContext *ctx = avctx->priv_data; -@@ -1254,18 +1264,15 @@ static av_cold int nvenc_setup_encoder(A - - ctx->init_encode_params.encodeConfig = &ctx->encode_config; - -- nvenc_map_preset(ctx); -- -- if (ctx->flags & NVENC_DEPRECATED_PRESET) -- av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n"); -- - preset_config.version = NV_ENC_PRESET_CONFIG_VER; - preset_config.presetCfg.version = NV_ENC_CONFIG_VER; - - #ifdef NVENC_HAVE_NEW_PRESETS - ctx->init_encode_params.tuningInfo = ctx->tuning_info; - -- if (ctx->flags & NVENC_LOWLATENCY) -+ if (ctx->flags & NVENC_LOSSLESS) -+ ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS; -+ else if (ctx->flags & NVENC_LOWLATENCY) - ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY; - - nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder, -@@ -1307,9 +1314,6 @@ static av_cold int nvenc_setup_encoder(A - * */ - if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead) - ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth; -- -- if (ctx->init_encode_params.tuningInfo == NV_ENC_TUNING_INFO_LOSSLESS) -- ctx->flags |= NVENC_LOSSLESS; - #endif - - if (ctx->weighted_pred == 1) -Index: jellyfin-ffmpeg/libavcodec/nvenc.h -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/nvenc.h -+++ jellyfin-ffmpeg/libavcodec/nvenc.h -@@ -103,7 +103,7 @@ enum { - PRESET_LOW_LATENCY_DEFAULT , - PRESET_LOW_LATENCY_HQ , - PRESET_LOW_LATENCY_HP, -- PRESET_LOSSLESS_DEFAULT, // lossless presets must be the last ones -+ PRESET_LOSSLESS_DEFAULT, - PRESET_LOSSLESS_HP, - #ifdef NVENC_HAVE_NEW_PRESETS - PRESET_P1, -Index: jellyfin-ffmpeg/libavcodec/nvenc_hevc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/nvenc_hevc.c -+++ jellyfin-ffmpeg/libavcodec/nvenc_hevc.c -@@ -148,7 +148,7 @@ static const AVOption options[] = { - { "middle", "", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, VE, "b_ref_mode" }, - #endif - { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, -- { "s12m_tc", "Use timecode (if available)", OFFSET(s12m_tc), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, -+ { "s12m_tc", "Use timecode (if available)", OFFSET(s12m_tc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, - { "dpb_size", "Specifies the DPB size used for encoding (0 means automatic)", - OFFSET(dpb_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, - #ifdef NVENC_HAVE_MULTIPASS -Index: jellyfin-ffmpeg/libavcodec/pngdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/pngdec.c -+++ jellyfin-ffmpeg/libavcodec/pngdec.c -@@ -1644,7 +1644,7 @@ static int decode_frame_apng(AVCodecCont - if (!(avctx->active_thread_type & FF_THREAD_FRAME)) { - if (s->dispose_op == APNG_DISPOSE_OP_PREVIOUS) { - ff_thread_release_buffer(avctx, &s->picture); -- } else if (s->dispose_op == APNG_DISPOSE_OP_NONE) { -+ } else { - ff_thread_release_buffer(avctx, &s->last_picture); - FFSWAP(ThreadFrame, s->picture, s->last_picture); - } -@@ -1693,8 +1693,8 @@ static int update_thread_context(AVCodec - pdst->hdr_state |= psrc->hdr_state; - } - -- src_frame = psrc->dispose_op == APNG_DISPOSE_OP_NONE ? -- &psrc->picture : &psrc->last_picture; -+ src_frame = psrc->dispose_op == APNG_DISPOSE_OP_PREVIOUS ? -+ &psrc->last_picture : &psrc->picture; - - ff_thread_release_buffer(dst, &pdst->last_picture); - if (src_frame && src_frame->f->data[0]) { -Index: jellyfin-ffmpeg/libavcodec/rv10.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/rv10.c -+++ jellyfin-ffmpeg/libavcodec/rv10.c -@@ -154,7 +154,7 @@ static int rv10_decode_picture_header(Mp - return mb_count; - } - --static int rv20_decode_picture_header(RVDecContext *rv) -+static int rv20_decode_picture_header(RVDecContext *rv, int whole_size) - { - MpegEncContext *s = &rv->m; - int seq, mb_pos, i, ret; -@@ -232,6 +232,10 @@ static int rv20_decode_picture_header(RV - "attempting to change resolution to %dx%d\n", new_w, new_h); - if (av_image_check_size(new_w, new_h, 0, s->avctx) < 0) - return AVERROR_INVALIDDATA; -+ -+ if (whole_size < (new_w + 15)/16 * ((new_h + 15)/16) / 8) -+ return AVERROR_INVALIDDATA; -+ - ff_mpv_common_end(s); - - // attempt to keep aspect during typical resolution switches -@@ -447,7 +451,7 @@ static int rv10_decode_packet(AVCodecCon - if (s->codec_id == AV_CODEC_ID_RV10) - mb_count = rv10_decode_picture_header(s); - else -- mb_count = rv20_decode_picture_header(rv); -+ mb_count = rv20_decode_picture_header(rv, whole_size); - if (mb_count < 0) { - if (mb_count != ERROR_SKIP_FRAME) - av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n"); -Index: jellyfin-ffmpeg/libavcodec/svq1enc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/svq1enc.c -+++ jellyfin-ffmpeg/libavcodec/svq1enc.c -@@ -487,9 +487,10 @@ static av_cold int svq1_encode_end(AVCod - SVQ1EncContext *const s = avctx->priv_data; - int i; - -- av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", -- s->rd_total / (double)(avctx->width * avctx->height * -- avctx->frame_number)); -+ if (avctx->frame_number) -+ av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", -+ s->rd_total / (double)(avctx->width * avctx->height * -+ avctx->frame_number)); - - s->m.mb_type = NULL; - ff_mpv_common_end(&s->m); -Index: jellyfin-ffmpeg/libavcodec/ttadata.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/ttadata.c -+++ jellyfin-ffmpeg/libavcodec/ttadata.c -@@ -30,7 +30,8 @@ const uint32_t ff_tta_shift_1[] = { - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x80000000, 0x80000000, 0x80000000, 0x80000000, -- 0x80000000, 0x80000000, 0x80000000, 0x80000000 -+ 0x80000000, 0x80000000, 0x80000000, 0x80000000, -+ 0xFFFFFFFF - }; - - const uint32_t * const ff_tta_shift_16 = ff_tta_shift_1 + 4; -Index: jellyfin-ffmpeg/libavcodec/ttmlenc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/ttmlenc.c -+++ jellyfin-ffmpeg/libavcodec/ttmlenc.c -@@ -206,5 +206,5 @@ AVCodec ff_ttml_encoder = { - .init = ttml_encode_init, - .encode_sub = ttml_encode_frame, - .close = ttml_encode_close, -- .capabilities = FF_CODEC_CAP_INIT_CLEANUP, -+ .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, - }; -Index: jellyfin-ffmpeg/libavcodec/utils.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/utils.c -+++ jellyfin-ffmpeg/libavcodec/utils.c -@@ -272,6 +272,16 @@ void avcodec_align_dimensions2(AVCodecCo - w_align = 8; - h_align = 8; - } -+ if (s->codec_id == AV_CODEC_ID_MJPEG || -+ s->codec_id == AV_CODEC_ID_MJPEGB || -+ s->codec_id == AV_CODEC_ID_LJPEG || -+ s->codec_id == AV_CODEC_ID_SMVJPEG || -+ s->codec_id == AV_CODEC_ID_AMV || -+ s->codec_id == AV_CODEC_ID_SP5X || -+ s->codec_id == AV_CODEC_ID_JPEGLS) { -+ w_align = 8; -+ h_align = 2*8; -+ } - break; - case AV_PIX_FMT_BGR24: - if ((s->codec_id == AV_CODEC_ID_MSZH) || -@@ -773,21 +783,33 @@ static int get_audio_frame_duration(enum - if (ba > 0) { - /* calc from frame_bytes, channels, and block_align */ - int blocks = frame_bytes / ba; -+ int64_t tmp = 0; - switch (id) { - case AV_CODEC_ID_ADPCM_IMA_WAV: - if (bps < 2 || bps > 5) - return 0; -- return blocks * (1 + (ba - 4 * ch) / (bps * ch) * 8); -+ tmp = blocks * (1LL + (ba - 4 * ch) / (bps * ch) * 8); -+ break; - case AV_CODEC_ID_ADPCM_IMA_DK3: -- return blocks * (((ba - 16) * 2 / 3 * 4) / ch); -+ tmp = blocks * (((ba - 16LL) * 2 / 3 * 4) / ch); -+ break; - case AV_CODEC_ID_ADPCM_IMA_DK4: -- return blocks * (1 + (ba - 4 * ch) * 2 / ch); -+ tmp = blocks * (1 + (ba - 4LL * ch) * 2 / ch); -+ break; - case AV_CODEC_ID_ADPCM_IMA_RAD: -- return blocks * ((ba - 4 * ch) * 2 / ch); -+ tmp = blocks * ((ba - 4LL * ch) * 2 / ch); -+ break; - case AV_CODEC_ID_ADPCM_MS: -- return blocks * (2 + (ba - 7 * ch) * 2LL / ch); -+ tmp = blocks * (2 + (ba - 7LL * ch) * 2LL / ch); -+ break; - case AV_CODEC_ID_ADPCM_MTAF: -- return blocks * (ba - 16) * 2 / ch; -+ tmp = blocks * (ba - 16LL) * 2 / ch; -+ break; -+ } -+ if (tmp) { -+ if (tmp != (int)tmp) -+ return 0; -+ return tmp; - } - } - -Index: jellyfin-ffmpeg/libavcodec/vaapi_av1.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/vaapi_av1.c -+++ jellyfin-ffmpeg/libavcodec/vaapi_av1.c -@@ -292,7 +292,7 @@ static int vaapi_av1_decode_slice(AVCode - err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param, - sizeof(VASliceParameterBufferAV1), - buffer, -- s->tile_group_info[i].tile_size); -+ size); - if (err) { - ff_vaapi_decode_cancel(avctx, pic); - return err; -Index: jellyfin-ffmpeg/libavcodec/vc1.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/vc1.c -+++ jellyfin-ffmpeg/libavcodec/vc1.c -@@ -672,6 +672,8 @@ int ff_vc1_parse_frame_header(VC1Context - if (v->s.pict_type == AV_PICTURE_TYPE_P) - v->rnd ^= 1; - -+ if (get_bits_left(gb) < 5) -+ return AVERROR_INVALIDDATA; - /* Quantizer stuff */ - pqindex = get_bits(gb, 5); - if (!pqindex) -@@ -764,6 +766,9 @@ int ff_vc1_parse_frame_header(VC1Context - av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: " - "Imode: %i, Invert: %i\n", status>>1, status&1); - -+ if (get_bits_left(gb) < 4) -+ return AVERROR_INVALIDDATA; -+ - /* Hopefully this is correct for P-frames */ - v->s.mv_table_index = get_bits(gb, 2); //but using ff_vc1_ tables - v->cbptab = get_bits(gb, 2); -Index: jellyfin-ffmpeg/libavcodec/vc2enc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavcodec/vc2enc.c -+++ jellyfin-ffmpeg/libavcodec/vc2enc.c -@@ -982,6 +982,8 @@ static av_cold int vc2_encode_frame(AVCo - } - - s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); -+ if (s->slice_min_bytes < 0) -+ return AVERROR(EINVAL); - - ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced); - if (ret) -Index: jellyfin-ffmpeg/libavfilter/f_metadata.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/f_metadata.c -+++ jellyfin-ffmpeg/libavfilter/f_metadata.c -@@ -304,9 +304,6 @@ static int filter_frame(AVFilterLink *in - AVDictionary **metadata = &frame->metadata; - AVDictionaryEntry *e; - -- if (!*metadata && s->mode != METADATA_ADD) -- return ff_filter_frame(outlink, frame); -- - e = av_dict_get(*metadata, !s->key ? "" : s->key, NULL, - !s->key ? AV_DICT_IGNORE_SUFFIX: 0); - -Index: jellyfin-ffmpeg/libavfilter/vf_dctdnoiz.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_dctdnoiz.c -+++ jellyfin-ffmpeg/libavfilter/vf_dctdnoiz.c -@@ -564,6 +564,9 @@ static int config_input(AVFilterLink *in - inlink->h - s->pr_height); - - max_slice_h = s->pr_height / ((s->bsize - 1) * 2); -+ if (max_slice_h == 0) -+ return AVERROR(EINVAL); -+ - s->nb_threads = FFMIN3(MAX_THREADS, ff_filter_get_nb_threads(ctx), max_slice_h); - av_log(ctx, AV_LOG_DEBUG, "threads: [max=%d hmax=%d user=%d] => %d\n", - MAX_THREADS, max_slice_h, ff_filter_get_nb_threads(ctx), s->nb_threads); -Index: jellyfin-ffmpeg/libavfilter/vf_overlay_cuda.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_cuda.c -+++ jellyfin-ffmpeg/libavfilter/vf_overlay_cuda.c -@@ -63,6 +63,7 @@ typedef struct OverlayCUDAContext { - enum AVPixelFormat in_format_overlay; - enum AVPixelFormat in_format_main; - -+ AVBufferRef *hw_device_ctx; - AVCUDADeviceContext *hwctx; - - CUcontext cu_ctx; -@@ -256,6 +257,9 @@ static av_cold void overlay_cuda_uninit( - CHECK_CU(cu->cuModuleUnload(ctx->cu_module)); - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - } -+ -+ av_buffer_unref(&ctx->hw_device_ctx); -+ ctx->hwctx = NULL; - } - - /** -@@ -341,13 +345,19 @@ static int overlay_cuda_config_output(AV - - // initialize - -- ctx->hwctx = frames_ctx->device_ctx->hwctx; -+ ctx->hw_device_ctx = av_buffer_ref(frames_ctx->device_ref); -+ if (!ctx->hw_device_ctx) -+ return AVERROR(ENOMEM); -+ ctx->hwctx = ((AVHWDeviceContext*)ctx->hw_device_ctx->data)->hwctx; -+ - cuda_ctx = ctx->hwctx->cuda_ctx; - ctx->fs.time_base = inlink->time_base; - - ctx->cu_stream = ctx->hwctx->stream; - - outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); -+ if (!outlink->hw_frames_ctx) -+ return AVERROR(ENOMEM); - - // load functions - -Index: jellyfin-ffmpeg/libavfilter/vf_vmafmotion.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_vmafmotion.c -+++ jellyfin-ffmpeg/libavfilter/vf_vmafmotion.c -@@ -238,6 +238,9 @@ int ff_vmafmotion_init(VMAFMotionData *s - int i; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); - -+ if (w < 3 || h < 3) -+ return AVERROR(EINVAL); -+ - s->width = w; - s->height = h; - s->stride = FFALIGN(w * sizeof(uint16_t), 32); -Index: jellyfin-ffmpeg/libavfilter/vf_yadif.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavfilter/vf_yadif.c -+++ jellyfin-ffmpeg/libavfilter/vf_yadif.c -@@ -123,20 +123,22 @@ static void filter_edges(void *dst1, voi - uint8_t *next2 = parity ? cur : next; - - const int edge = MAX_ALIGN - 1; -+ int offset = FFMAX(w - edge, 3); - - /* Only edge pixels need to be processed here. A constant value of false - * for is_not_edge should let the compiler ignore the whole branch. */ -- FILTER(0, 3, 0) -+ FILTER(0, FFMIN(3, w), 0) - -- dst = (uint8_t*)dst1 + w - edge; -- prev = (uint8_t*)prev1 + w - edge; -- cur = (uint8_t*)cur1 + w - edge; -- next = (uint8_t*)next1 + w - edge; -+ dst = (uint8_t*)dst1 + offset; -+ prev = (uint8_t*)prev1 + offset; -+ cur = (uint8_t*)cur1 + offset; -+ next = (uint8_t*)next1 + offset; - prev2 = (uint8_t*)(parity ? prev : cur); - next2 = (uint8_t*)(parity ? cur : next); - -- FILTER(w - edge, w - 3, 1) -- FILTER(w - 3, w, 0) -+ FILTER(offset, w - 3, 1) -+ offset = FFMAX(offset, w - 3); -+ FILTER(offset, w, 0) - } - - -@@ -170,21 +172,23 @@ static void filter_edges_16bit(void *dst - uint16_t *next2 = parity ? cur : next; - - const int edge = MAX_ALIGN / 2 - 1; -+ int offset = FFMAX(w - edge, 3); - - mrefs /= 2; - prefs /= 2; - -- FILTER(0, 3, 0) -+ FILTER(0, FFMIN(3, w), 0) - -- dst = (uint16_t*)dst1 + w - edge; -- prev = (uint16_t*)prev1 + w - edge; -- cur = (uint16_t*)cur1 + w - edge; -- next = (uint16_t*)next1 + w - edge; -+ dst = (uint16_t*)dst1 + offset; -+ prev = (uint16_t*)prev1 + offset; -+ cur = (uint16_t*)cur1 + offset; -+ next = (uint16_t*)next1 + offset; - prev2 = (uint16_t*)(parity ? prev : cur); - next2 = (uint16_t*)(parity ? cur : next); - -- FILTER(w - edge, w - 3, 1) -- FILTER(w - 3, w, 0) -+ FILTER(offset, w - 3, 1) -+ offset = FFMAX(offset, w - 3); -+ FILTER(offset, w, 0) - } - - static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) -Index: jellyfin-ffmpeg/libavformat/asfdec_o.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/asfdec_o.c -+++ jellyfin-ffmpeg/libavformat/asfdec_o.c -@@ -685,7 +685,7 @@ static int asf_read_properties(AVFormatC - return 0; - } - --static int parse_video_info(AVIOContext *pb, AVStream *st) -+static int parse_video_info(AVFormatContext *avfmt, AVIOContext *pb, AVStream *st) - { - uint16_t size_asf; // ASF-specific Format Data size - uint32_t size_bmp; // BMP_HEADER-specific Format Data size -@@ -700,19 +700,10 @@ static int parse_video_info(AVIOContext - st->codecpar->codec_id = ff_codec_get_id(ff_codec_bmp_tags, tag); - size_bmp = FFMAX(size_asf, size_bmp); - -- if (size_bmp > BMP_HEADER_SIZE && -- size_bmp < INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) { -- int ret; -- st->codecpar->extradata_size = size_bmp - BMP_HEADER_SIZE; -- if (!(st->codecpar->extradata = av_malloc(st->codecpar->extradata_size + -- AV_INPUT_BUFFER_PADDING_SIZE))) { -- st->codecpar->extradata_size = 0; -- return AVERROR(ENOMEM); -- } -- memset(st->codecpar->extradata + st->codecpar->extradata_size , 0, -- AV_INPUT_BUFFER_PADDING_SIZE); -- if ((ret = avio_read(pb, st->codecpar->extradata, -- st->codecpar->extradata_size)) < 0) -+ if (size_bmp > BMP_HEADER_SIZE) { -+ int ret = ff_get_extradata(avfmt, st->codecpar, pb, size_bmp - BMP_HEADER_SIZE); -+ -+ if (ret < 0) - return ret; - } - return 0; -@@ -795,7 +786,7 @@ static int asf_read_stream_properties(AV - break; - case AVMEDIA_TYPE_VIDEO: - asf_st->type = AVMEDIA_TYPE_VIDEO; -- if ((ret = parse_video_info(pb, st)) < 0) -+ if ((ret = parse_video_info(s, pb, st)) < 0) - return ret; - break; - default: -Index: jellyfin-ffmpeg/libavformat/avio.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/avio.c -+++ jellyfin-ffmpeg/libavformat/avio.c -@@ -316,8 +316,11 @@ int ffurl_open_whitelist(URLContext **pu - int ret = ffurl_alloc(puc, filename, flags, int_cb); - if (ret < 0) - return ret; -- if (parent) -- av_opt_copy(*puc, parent); -+ if (parent) { -+ ret = av_opt_copy(*puc, parent); -+ if (ret < 0) -+ goto fail; -+ } - if (options && - (ret = av_opt_set_dict(*puc, options)) < 0) - goto fail; -Index: jellyfin-ffmpeg/libavformat/cafdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/cafdec.c -+++ jellyfin-ffmpeg/libavformat/cafdec.c -@@ -79,7 +79,7 @@ static int read_desc_chunk(AVFormatConte - st->codecpar->channels = avio_rb32(pb); - st->codecpar->bits_per_coded_sample = avio_rb32(pb); - -- if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0) -+ if (caf->bytes_per_packet < 0 || caf->frames_per_packet < 0 || st->codecpar->channels < 0) - return AVERROR_INVALIDDATA; - - /* calculate bit rate for constant size packets */ -Index: jellyfin-ffmpeg/libavformat/fifo.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/fifo.c -+++ jellyfin-ffmpeg/libavformat/fifo.c -@@ -593,7 +593,7 @@ static int fifo_write_packet(AVFormatCon - goto fail; - } - -- if (fifo->timeshift && pkt->dts != AV_NOPTS_VALUE) -+ if (fifo->timeshift && pkt && pkt->dts != AV_NOPTS_VALUE) - atomic_fetch_add_explicit(&fifo->queue_duration, next_duration(avf, pkt, &fifo->last_sent_dts), memory_order_relaxed); - - return ret; -Index: jellyfin-ffmpeg/libavformat/id3v2.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/id3v2.c -+++ jellyfin-ffmpeg/libavformat/id3v2.c -@@ -816,7 +816,7 @@ static void id3v2_parse(AVIOContext *pb, - int isv34, unsync; - unsigned tlen; - char tag[5]; -- int64_t next, end = avio_tell(pb) + len; -+ int64_t next, end = avio_tell(pb); - int taghdrlen; - const char *reason = NULL; - AVIOContext pb_local; -@@ -828,6 +828,10 @@ static void id3v2_parse(AVIOContext *pb, - av_unused int uncompressed_buffer_size = 0; - const char *comm_frame; - -+ if (end > INT64_MAX - len - 10) -+ return; -+ end += len; -+ - av_log(s, AV_LOG_DEBUG, "id3v2 ver:%d flags:%02X len:%d\n", version, flags, len); - - switch (version) { -Index: jellyfin-ffmpeg/libavformat/matroskaenc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/matroskaenc.c -+++ jellyfin-ffmpeg/libavformat/matroskaenc.c -@@ -1768,6 +1768,7 @@ static int mkv_write_attachments(AVForma - put_ebml_string(dyn_cp, MATROSKA_ID_FILEDESC, t->value); - if (!(t = av_dict_get(st->metadata, "filename", NULL, 0))) { - av_log(s, AV_LOG_ERROR, "Attachment stream %d has no filename tag.\n", i); -+ ffio_free_dyn_buf(&dyn_cp); - return AVERROR(EINVAL); - } - put_ebml_string(dyn_cp, MATROSKA_ID_FILENAME, t->value); -Index: jellyfin-ffmpeg/libavformat/moflex.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/moflex.c -+++ jellyfin-ffmpeg/libavformat/moflex.c -@@ -172,7 +172,6 @@ static int moflex_read_sync(AVFormatCont - unsigned type, ssize, codec_id = 0; - unsigned codec_type, width = 0, height = 0, sample_rate = 0, channels = 0; - int stream_index = -1; -- int format; - AVRational fps; - - read_var_byte(s, &type); -@@ -213,7 +212,6 @@ static int moflex_read_sync(AVFormatCont - fps.den = avio_rb16(pb); - width = avio_rb16(pb); - height = avio_rb16(pb); -- format = AV_PIX_FMT_YUV420P; - avio_skip(pb, type == 3 ? 3 : 2); - break; - case 4: -@@ -235,7 +233,6 @@ static int moflex_read_sync(AVFormatCont - st->codecpar->height = height; - st->codecpar->sample_rate= sample_rate; - st->codecpar->channels = channels; -- st->codecpar->format = format; - st->priv_data = av_packet_alloc(); - if (!st->priv_data) - return AVERROR(ENOMEM); -Index: jellyfin-ffmpeg/libavformat/mov.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/mov.c -+++ jellyfin-ffmpeg/libavformat/mov.c -@@ -4700,6 +4700,8 @@ static int mov_read_chap(MOVContext *c, - for (i = 0; i < num && !pb->eof_reached; i++) - c->chapter_tracks[i] = avio_rb32(pb); - -+ c->nb_chapter_tracks = i; -+ - return 0; - } - -@@ -5124,7 +5126,9 @@ static int mov_read_sidx(MOVContext *c, - if (frag_stream_info) - frag_stream_info->sidx_pts = timestamp; - -- if (av_sat_add64(offset, size) != offset + size) -+ if (av_sat_add64(offset, size) != offset + size || -+ av_sat_add64(pts, duration) != pts + (uint64_t)duration -+ ) - return AVERROR_INVALIDDATA; - offset += size; - pts += duration; -@@ -5464,7 +5468,7 @@ static int mov_read_mdcv(MOVContext *c, - - sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data; - -- if (atom.size < 24) { -+ if (atom.size < 24 || sc->mastering) { - av_log(c->fc, AV_LOG_ERROR, "Invalid Mastering Display Color Volume box\n"); - return AVERROR_INVALIDDATA; - } -@@ -5512,6 +5516,11 @@ static int mov_read_coll(MOVContext *c, - } - avio_skip(pb, 3); /* flags */ - -+ if (sc->coll){ -+ av_log(c->fc, AV_LOG_WARNING, "Ignoring duplicate COLL\n"); -+ return 0; -+ } -+ - sc->coll = av_content_light_metadata_alloc(&sc->coll_size); - if (!sc->coll) - return AVERROR(ENOMEM); -Index: jellyfin-ffmpeg/libavformat/movenc.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/movenc.c -+++ jellyfin-ffmpeg/libavformat/movenc.c -@@ -797,6 +797,7 @@ static int mov_write_dfla_tag(AVIOContex - static int mov_write_dops_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track) - { - int64_t pos = avio_tell(pb); -+ int channels, channel_map; - avio_wb32(pb, 0); - ffio_wfourcc(pb, "dOps"); - avio_w8(pb, 0); /* Version */ -@@ -807,12 +808,22 @@ static int mov_write_dops_tag(AVFormatCo - /* extradata contains an Ogg OpusHead, other than byte-ordering and - OpusHead's preceeding magic/version, OpusSpecificBox is currently - identical. */ -- avio_w8(pb, AV_RB8(track->par->extradata + 9)); /* OuputChannelCount */ -+ channels = AV_RB8(track->par->extradata + 9); -+ channel_map = AV_RB8(track->par->extradata + 18); -+ -+ avio_w8(pb, channels); /* OuputChannelCount */ - avio_wb16(pb, AV_RL16(track->par->extradata + 10)); /* PreSkip */ - avio_wb32(pb, AV_RL32(track->par->extradata + 12)); /* InputSampleRate */ - avio_wb16(pb, AV_RL16(track->par->extradata + 16)); /* OutputGain */ -+ avio_w8(pb, channel_map); /* ChannelMappingFamily */ - /* Write the rest of the header out without byte-swapping. */ -- avio_write(pb, track->par->extradata + 18, track->par->extradata_size - 18); -+ if (channel_map) { -+ if (track->par->extradata_size < 21 + channels) { -+ av_log(s, AV_LOG_ERROR, "invalid extradata size\n"); -+ return AVERROR_INVALIDDATA; -+ } -+ avio_write(pb, track->par->extradata + 19, 2 + channels); /* ChannelMappingTable */ -+ } - - return update_size(pb, pos); - } -@@ -2166,11 +2177,13 @@ static int mov_write_video_tag(AVFormatC - avio_wb16(pb, 0x18); /* Reserved */ - - if (track->mode == MODE_MOV && track->par->format == AV_PIX_FMT_PAL8) { -- int pal_size = 1 << track->par->bits_per_coded_sample; -- int i; -+ int pal_size, i; - avio_wb16(pb, 0); /* Color table ID */ - avio_wb32(pb, 0); /* Color table seed */ - avio_wb16(pb, 0x8000); /* Color table flags */ -+ if (track->par->bits_per_coded_sample < 0 || track->par->bits_per_coded_sample > 8) -+ return AVERROR(EINVAL); -+ pal_size = 1 << track->par->bits_per_coded_sample; - avio_wb16(pb, pal_size - 1); /* Color table size (zero-relative) */ - for (i = 0; i < pal_size; i++) { - uint32_t rgb = track->palette[i]; -Index: jellyfin-ffmpeg/libavformat/mpegts.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/mpegts.c -+++ jellyfin-ffmpeg/libavformat/mpegts.c -@@ -2026,6 +2026,7 @@ int ff_parse_mpeg2_descriptor(AVFormatCo - return AVERROR_INVALIDDATA; - if (channel_config_code <= 0x8) { - st->codecpar->extradata[9] = channels = channel_config_code ? channel_config_code : 2; -+ AV_WL32(&st->codecpar->extradata[12], 48000); - st->codecpar->extradata[18] = channel_config_code ? (channels > 2) : /* Dual Mono */ 255; - st->codecpar->extradata[19] = opus_stream_cnt[channel_config_code]; - st->codecpar->extradata[20] = opus_coupled_stream_cnt[channel_config_code]; -Index: jellyfin-ffmpeg/libavformat/mvdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/mvdec.c -+++ jellyfin-ffmpeg/libavformat/mvdec.c -@@ -157,6 +157,8 @@ static int parse_audio_var(AVFormatConte - return set_channels(avctx, st, var_read_int(pb, size)); - } else if (!strcmp(name, "SAMPLE_RATE")) { - st->codecpar->sample_rate = var_read_int(pb, size); -+ if (st->codecpar->sample_rate <= 0) -+ return AVERROR_INVALIDDATA; - avpriv_set_pts_info(st, 33, 1, st->codecpar->sample_rate); - } else if (!strcmp(name, "SAMPLE_WIDTH")) { - uint64_t bpc = var_read_int(pb, size) * (uint64_t)8; -Index: jellyfin-ffmpeg/libavformat/mvi.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/mvi.c -+++ jellyfin-ffmpeg/libavformat/mvi.c -@@ -32,7 +32,6 @@ - - typedef struct MviDemuxContext { - unsigned int (*get_int)(AVIOContext *); -- uint32_t audio_data_size; - uint64_t audio_size_counter; - uint64_t audio_frame_size; - int audio_size_left; -@@ -46,6 +45,7 @@ static int read_header(AVFormatContext * - AVStream *ast, *vst; - unsigned int version, frames_count, msecs_per_frame, player_version; - int ret; -+ int audio_data_size; - - ast = avformat_new_stream(s, NULL); - if (!ast) -@@ -67,13 +67,13 @@ static int read_header(AVFormatContext * - vst->codecpar->height = avio_rl16(pb); - avio_r8(pb); - ast->codecpar->sample_rate = avio_rl16(pb); -- mvi->audio_data_size = avio_rl32(pb); -+ audio_data_size = avio_rl32(pb); - avio_r8(pb); - player_version = avio_rl32(pb); - avio_rl16(pb); - avio_r8(pb); - -- if (frames_count == 0 || mvi->audio_data_size == 0) -+ if (frames_count == 0 || audio_data_size <= 0) - return AVERROR_INVALIDDATA; - - if (version != 7 || player_version > 213) { -@@ -96,16 +96,16 @@ static int read_header(AVFormatContext * - - mvi->get_int = (vst->codecpar->width * (int64_t)vst->codecpar->height < (1 << 16)) ? avio_rl16 : avio_rl24; - -- mvi->audio_frame_size = ((uint64_t)mvi->audio_data_size << MVI_FRAC_BITS) / frames_count; -+ mvi->audio_frame_size = ((uint64_t)audio_data_size << MVI_FRAC_BITS) / frames_count; - if (mvi->audio_frame_size <= 1 << MVI_FRAC_BITS - 1) { - av_log(s, AV_LOG_ERROR, -- "Invalid audio_data_size (%"PRIu32") or frames_count (%u)\n", -- mvi->audio_data_size, frames_count); -+ "Invalid audio_data_size (%d) or frames_count (%u)\n", -+ audio_data_size, frames_count); - return AVERROR_INVALIDDATA; - } - - mvi->audio_size_counter = (ast->codecpar->sample_rate * 830 / mvi->audio_frame_size - 1) * mvi->audio_frame_size; -- mvi->audio_size_left = mvi->audio_data_size; -+ mvi->audio_size_left = audio_data_size; - - return 0; - } -Index: jellyfin-ffmpeg/libavformat/mxfdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/mxfdec.c -+++ jellyfin-ffmpeg/libavformat/mxfdec.c -@@ -2903,7 +2903,7 @@ static int mxf_read_local_tags(MXFContex - meta = NULL; - ctx = mxf; - } -- while (avio_tell(pb) + 4 < klv_end && !avio_feof(pb)) { -+ while (avio_tell(pb) + 4ULL < klv_end && !avio_feof(pb)) { - int ret; - int tag = avio_rb16(pb); - int size = avio_rb16(pb); /* KLV specified by 0x53 */ -Index: jellyfin-ffmpeg/libavformat/rpl.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/rpl.c -+++ jellyfin-ffmpeg/libavformat/rpl.c -@@ -103,7 +103,7 @@ static AVRational read_fps(const char* l - // Truncate any numerator too large to fit into an int64_t - if (num > (INT64_MAX - 9) / 10 || den > INT64_MAX / 10) - break; -- num = 10 * num + *line - '0'; -+ num = 10 * num + (*line - '0'); - den *= 10; - } - if (!num) -@@ -207,8 +207,10 @@ static int rpl_read_header(AVFormatConte - ast->codecpar->bits_per_coded_sample = 4; - - ast->codecpar->bit_rate = ast->codecpar->sample_rate * -- ast->codecpar->bits_per_coded_sample * -- ast->codecpar->channels; -+ (int64_t)ast->codecpar->channels; -+ if (ast->codecpar->bit_rate > INT64_MAX / ast->codecpar->bits_per_coded_sample) -+ return AVERROR_INVALIDDATA; -+ ast->codecpar->bit_rate *= ast->codecpar->bits_per_coded_sample; - - ast->codecpar->codec_id = AV_CODEC_ID_NONE; - switch (audio_format) { -@@ -334,7 +336,7 @@ static int rpl_read_packet(AVFormatConte - - avio_skip(pb, 4); /* flags */ - frame_size = avio_rl32(pb); -- if (avio_seek(pb, -8, SEEK_CUR) < 0) -+ if (avio_feof(pb) || avio_seek(pb, -8, SEEK_CUR) < 0 || !frame_size) - return AVERROR(EIO); - - ret = av_get_packet(pb, pkt, frame_size); -Index: jellyfin-ffmpeg/libavformat/utils.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/utils.c -+++ jellyfin-ffmpeg/libavformat/utils.c -@@ -1208,7 +1208,9 @@ static void update_initial_durations(AVF - (pktl->pkt.dts == AV_NOPTS_VALUE || - pktl->pkt.dts == st->first_dts || - pktl->pkt.dts == RELATIVE_TS_BASE) && -- !pktl->pkt.duration) { -+ !pktl->pkt.duration && -+ av_sat_add64(cur_dts, duration) == cur_dts + (uint64_t)duration -+ ) { - pktl->pkt.dts = cur_dts; - if (!st->internal->avctx->has_b_frames) - pktl->pkt.pts = cur_dts; -@@ -3913,7 +3915,9 @@ FF_ENABLE_DEPRECATION_WARNINGS - break; - } - if (pkt->duration) { -- if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time) { -+ if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time -+ && (uint64_t)pkt->pts - st->start_time < INT64_MAX -+ ) { - st->internal->info->codec_info_duration = FFMIN(pkt->pts - st->start_time, st->internal->info->codec_info_duration + pkt->duration); - } else - st->internal->info->codec_info_duration += pkt->duration; -@@ -4059,7 +4063,7 @@ FF_ENABLE_DEPRECATION_WARNINGS - - if (!st->r_frame_rate.num) { - if ( avctx->time_base.den * (int64_t) st->time_base.num -- <= avctx->time_base.num * avctx->ticks_per_frame * (uint64_t) st->time_base.den) { -+ <= avctx->time_base.num * (uint64_t)avctx->ticks_per_frame * st->time_base.den) { - av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, - avctx->time_base.den, (int64_t)avctx->time_base.num * avctx->ticks_per_frame, INT_MAX); - } else { -Index: jellyfin-ffmpeg/libavformat/wtvdec.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavformat/wtvdec.c -+++ jellyfin-ffmpeg/libavformat/wtvdec.c -@@ -817,7 +817,7 @@ static int parse_chunks(AVFormatContext - avio_skip(pb, 12); - ff_get_guid(pb, &formattype); - size = avio_rl32(pb); -- if (size < 0 || size > INT_MAX - 92) -+ if (size < 0 || size > INT_MAX - 92 - consumed) - return AVERROR_INVALIDDATA; - parse_media_type(s, 0, sid, mediatype, subtype, formattype, size); - consumed += 92 + size; -@@ -833,7 +833,7 @@ static int parse_chunks(AVFormatContext - avio_skip(pb, 12); - ff_get_guid(pb, &formattype); - size = avio_rl32(pb); -- if (size < 0 || size > INT_MAX - 76) -+ if (size < 0 || size > INT_MAX - 76 - consumed) - return AVERROR_INVALIDDATA; - parse_media_type(s, s->streams[stream_index], sid, mediatype, subtype, formattype, size); - consumed += 76 + size; -Index: jellyfin-ffmpeg/libavutil/cpu.c -=================================================================== ---- jellyfin-ffmpeg.orig/libavutil/cpu.c -+++ jellyfin-ffmpeg/libavutil/cpu.c -@@ -291,6 +291,12 @@ int av_cpu_count(void) - DWORD_PTR proc_aff, sys_aff; - if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff)) - nb_cpus = av_popcount64(proc_aff); -+#elif HAVE_SYSCTL && defined(HW_NCPUONLINE) -+ int mib[2] = { CTL_HW, HW_NCPUONLINE }; -+ size_t len = sizeof(nb_cpus); -+ -+ if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) -+ nb_cpus = 0; - #elif HAVE_SYSCTL && defined(HW_NCPU) - int mib[2] = { CTL_HW, HW_NCPU }; - size_t len = sizeof(nb_cpus); -Index: jellyfin-ffmpeg/tests/ref/fate/ts-opus-demux -=================================================================== ---- jellyfin-ffmpeg.orig/tests/ref/fate/ts-opus-demux -+++ jellyfin-ffmpeg/tests/ref/fate/ts-opus-demux -@@ -1,4 +1,4 @@ --#extradata 0: 30, 0x53be0347 -+#extradata 0: 30, 0x69290482 - #tb 0: 1/90000 - #media_type 0: audio - #codec_id 0: opus diff --git a/debian/patches/series b/debian/patches/series deleted file mode 100644 index 40831a18763..00000000000 --- a/debian/patches/series +++ /dev/null @@ -1,9 +0,0 @@ -0001_fix-segment-muxer.patch -0002-lavfi-add-a-filter-for-uploading-normal-frames-to-VAAPI.patch -0003-fix-for-the-broken-tonemap_vaapi-filter.patch -0004-cuda-format-converter-impl.patch -0005-cuda-tonemap-impl.patch -0006-bt2390-and-fix-for-peak-detection-in-opencl-tonemap.patch -0007-fix-for-fmp4-in-hlsenc.patch -0008-fix-nvdec-exceeded-32-surfaces-error.patch -0009-fix-for-nvenc-from-upstream.patch From 0517e76837d897c8fe85cfb1e28abcdad93f31f0 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Thu, 14 Oct 2021 16:27:43 +0800 Subject: [PATCH 06/41] add build script for windows win64 --- Dockerfile.win64.in | 45 ++++ Dockerfile.win64.make | 8 + build-win64 | 40 +++ cross-win64.meson | 16 ++ docker-build-win64.sh | 562 ++++++++++++++++++++++++++++++++++++++++++ toolchain-win64.cmake | 13 + 6 files changed, 684 insertions(+) create mode 100644 Dockerfile.win64.in create mode 100644 Dockerfile.win64.make create mode 100755 build-win64 create mode 100644 cross-win64.meson create mode 100755 docker-build-win64.sh create mode 100644 toolchain-win64.cmake diff --git a/Dockerfile.win64.in b/Dockerfile.win64.in new file mode 100644 index 00000000000..de08c7a9b53 --- /dev/null +++ b/Dockerfile.win64.in @@ -0,0 +1,45 @@ +FROM DISTRO + +# Docker build arguments +ARG SOURCE_DIR=/ffmpeg +ARG ARTIFACT_DIR=/dist + +# Docker run environment +ENV DEBIAN_FRONTEND=noninteractive \ + SOURCE_DIR=/ffmpeg \ + ARTIFACT_DIR=/dist \ + FF_REV=FFMPEG_REV \ + FF_PREFIX=/opt/ffmpeg \ + FF_DEPS_PREFIX=/opt/ffdeps \ + FF_TOOLCHAIN=x86_64-w64-mingw32 \ + FF_CROSS_PREFIX=x86_64-w64-mingw32- \ + FF_CMAKE_TOOLCHAIN=${SOURCE_DIR}/toolchain-win64.cmake \ + FF_MESON_TOOLCHAIN=${SOURCE_DIR}/cross-win64.meson \ + FF_TARGET_FLAGS="--arch=x86_64 --target-os=mingw32 --cross-prefix=x86_64-w64-mingw32- --pkg-config=pkg-config --pkg-config-flags=--static" \ + PKG_CONFIG=pkg-config \ + PKG_CONFIG_LIBDIR=/opt/ffdeps/lib/pkgconfig:/opt/ffdeps/share/pkgconfig \ + CFLAGS="-static-libgcc -static-libstdc++ -I/opt/ffdeps/include -mtune=generic -O2 -pipe -D_FORTIFY_SOURCE=0" \ + CXXFLAGS="-static-libgcc -static-libstdc++ -I/opt/ffdeps/include -mtune=generic -O2 -pipe -D_FORTIFY_SOURCE=0" \ + LDFLAGS="-static-libgcc -static-libstdc++ -L/opt/ffdeps/lib -O2 -pipe" \ + DLLTOOL="x86_64-w64-mingw32-dlltool" + +# Prepare Debian and mingw-w64 build environment +RUN \ + apt-get -y update && \ + apt-get -y install build-essential yasm nasm xxd pkgconf git curl wget unzip subversion autoconf automake libtool libtool-bin autopoint cmake clang texinfo texi2html help2man flex bison gperf gettext itstool ragel libc6-dev libssl-dev gtk-doc-tools gobject-introspection gawk meson ninja-build p7zip-full python3-distutils python3-apt python-is-python3 zip quilt binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 gfortran-mingw-w64-x86-64 && \ + rm /usr/lib/gcc/*-w64-mingw32/*/libstdc++*.dll* && \ + rm /usr/lib/gcc/*-w64-mingw32/*/libgcc_s* && \ + rm /usr/lib/gcc/*-w64-mingw32/*/*.dll.a && \ + rm /usr/*-w64-mingw32/lib/*.dll.a + +# Prepare build script and patches +RUN \ + mkdir -p /opt/ffmpeg /opt/ffdeps ${SOURCE_DIR} && \ + ln -sf ${SOURCE_DIR}/debian/patches ${SOURCE_DIR} && \ + ln -sf ${SOURCE_DIR}/docker-build-win64.sh /docker-build-win64.sh + +VOLUME ${ARTIFACT_DIR}/ + +COPY . ${SOURCE_DIR}/ + +ENTRYPOINT ["/docker-build-win64.sh"] diff --git a/Dockerfile.win64.make b/Dockerfile.win64.make new file mode 100644 index 00000000000..0ea81a73609 --- /dev/null +++ b/Dockerfile.win64.make @@ -0,0 +1,8 @@ +#!/usr/bin/make +DISTRO=ubuntu:hirsute +FF_REV=1 +.PHONY: Dockerfile +Dockerfile: Dockerfile.win64.in + sed 's/DISTRO/$(DISTRO)/; s/FFMPEG_REV/$(FF_REV)/' $< > $@ || rm -f $@ +clean: + rm -f Dockerfile diff --git a/build-win64 b/build-win64 new file mode 100755 index 00000000000..6353a192e61 --- /dev/null +++ b/build-win64 @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +set -o xtrace +set -o errexit + +# Check for dependencies +for dep in docker make; do + command -v ${dep} &>/dev/null || { echo "The command '${dep}' is required."; exit 1; } +done + +# Use the latest distro for toolchains +distro="ubuntu:impish" +ffrevison="1" +image_name="jellyfin-ffmpeg-build-windows-win64" +package_temporary_dir="$( mktemp -d )" +current_user="$( whoami )" + +# Trap cleanup for latter sections +cleanup() { + # Clean up the Dockerfile + make -f Dockerfile.win64.make clean + # Remove tempdir + rm -rf "${package_temporary_dir}" +} +trap cleanup EXIT INT + +# Generate Dockerfile +make -f Dockerfile.win64.make DISTRO=${distro} FF_REV=${ffrevison} +# Set up the build environment docker image +docker build . -t "${image_name}" +# Build the APKs and copy out to ${package_temporary_dir} +docker run --rm -v "${package_temporary_dir}:/dist" "${image_name}" +# If no 1st parameter was specified, move APKs to parent directory +if [[ -z ${1} ]]; then + path="../bin" +else + path="${1}" +fi +mkdir ${path} &>/dev/null || true +mv "${package_temporary_dir}"/zip/jellyfin-ffmpeg*.{zip,sha256sum} "${path}" diff --git a/cross-win64.meson b/cross-win64.meson new file mode 100644 index 00000000000..f5743f0865b --- /dev/null +++ b/cross-win64.meson @@ -0,0 +1,16 @@ +[binaries] +c = 'x86_64-w64-mingw32-gcc' +cpp = 'x86_64-w64-mingw32-g++' +ar = 'x86_64-w64-mingw32-ar' +strip = 'x86_64-w64-mingw32-strip' +windres = 'x86_64-w64-mingw32-windres' +exe_wrapper = ['wine'] + +[properties] +needs_exe_wrapper = true + +[host_machine] +system = 'windows' +cpu_family = 'x86_64' +cpu = 'x86_64' +endian = 'little' diff --git a/docker-build-win64.sh b/docker-build-win64.sh new file mode 100755 index 00000000000..8b871829e10 --- /dev/null +++ b/docker-build-win64.sh @@ -0,0 +1,562 @@ +#!/bin/bash + +# Builds the EXE/ZIP inside the Docker container + +set -o errexit +set -o xtrace + +# Update mingw-w64 headers +git clone --depth=1 https://github.com/mirror/mingw-w64.git +pushd mingw-w64/mingw-w64-headers +./configure \ + --prefix=/usr/${FF_TOOLCHAIN} \ + --host=${FF_TOOLCHAIN} \ + --with-default-win32-winnt="0x601" \ + --enable-idl +make -j$(nproc) +make install +popd + +# ICONV +mkdir iconv +pushd iconv +iconv_ver="1.16" +iconv_link="https://ftp.gnu.org/pub/gnu/libiconv/libiconv-${iconv_ver}.tar.gz" +wget ${iconv_link} -O iconv.tar.gz +tar xaf iconv.tar.gz +pushd libiconv-${iconv_ver} +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-{static,extra-encodings} \ + --with-pic +make -j$(nproc) +make install +popd +popd + +# LIBXML2 +git clone --depth=1 https://gitlab.gnome.org/GNOME/libxml2.git +pushd libxml2 +./autogen.sh \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,maintainer-mode} \ + --enable-static \ + --without-python +make -j$(nproc) +make install +popd + +# ZLIB +git clone --depth=1 https://github.com/madler/zlib.git +pushd zlib +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --static +make -j$(nproc) CC=${FF_CROSS_PREFIX}gcc AR=${FF_CROSS_PREFIX}ar +make install +popd + +# FREETYPE +mkdir freetype +pushd freetype +ft_ver="2.11.0" +ft_link="https://sourceforge.net/projects/freetype/files/freetype2/${ft_ver}/freetype-${ft_ver}.tar.xz/download" +wget ${ft_link} -O ft.tar.gz +tar xaf ft.tar.gz +pushd freetype-${ft_ver} +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static +make -j$(nproc) +make install +popd +popd + +# FRIBIDI +git clone --depth=1 https://github.com/fribidi/fribidi.git +pushd fribidi +mkdir build +pushd build +meson \ + --prefix=${FF_DEPS_PREFIX} \ + --cross-file=${FF_MESON_TOOLCHAIN} \ + --buildtype=release \ + --default-library=static \ + -D{bin,docs,tests}=false \ + .. +ninja -j$(nproc) +meson install +sed -i 's/Cflags:/Cflags: -DFRIBIDI_LIB_STATIC/' ${FF_DEPS_PREFIX}/lib/pkgconfig/fribidi.pc +popd +popd + +# GMP +mkdir gmp +pushd gmp +gmp_ver="6.2.1" +gmp_link="https://ftp.gnu.org/gnu/gmp/gmp-${gmp_ver}.tar.xz" +wget ${gmp_link} -O gmp.tar.gz +tar xaf gmp.tar.gz +pushd gmp-${gmp_ver} +autoreconf -i +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static +make -j$(nproc) +make install +popd +popd + +# LZMA +mkdir xz +pushd xz +xz_ver="5.2.5" +xz_link="https://sourceforge.net/projects/lzmautils/files/xz-${xz_ver}.tar.xz/download" +wget ${xz_link} -O xz.tar.xz +tar xaf xz.tar.xz +pushd xz-${xz_ver} +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd +popd + +# SDL2 +mkdir sdl2 +pushd sdl2 +sdl2_ver="2.0.16" +sdl2_link="https://libsdl.org/release/SDL2-${sdl2_ver}.tar.gz" +wget ${sdl2_link} -O sdl2.tar.gz +tar xaf sdl2.tar.gz +pushd SDL2-${sdl2_ver} +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static +make -j$(nproc) +make install +popd +popd + +# FONTCONFIG +mkdir fontconfig +pushd fontconfig +fc_ver="2.13.94" +fc_link="https://www.freedesktop.org/software/fontconfig/release/fontconfig-${fc_ver}.tar.xz" +wget ${fc_link} -O fc.tar.gz +tar xaf fc.tar.gz +pushd fontconfig-${fc_ver} +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,docs} \ + --enable-{static,libxml2,iconv} +make -j$(nproc) +make install +popd +popd + +# HARFBUZZ +git clone --depth=1 https://github.com/harfbuzz/harfbuzz.git +pushd harfbuzz +./autogen.sh \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd + +# LIBUDFREAD +git clone --depth=1 https://code.videolan.org/videolan/libudfread.git +pushd libudfread +./bootstrap +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd + +# LIBASS +git clone --depth=1 https://github.com/libass/libass.git +pushd libass +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd + +# LIBBLURAY +git clone --depth=1 https://code.videolan.org/videolan/libbluray.git +pushd libbluray +./bootstrap +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,examples,bdjava-jar} \ + --disable-doxygen-{doc,dot,html,ps,pdf} \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd + +# LAME +mkdir lame +pushd lame +lame_ver="3.100" +lame_link="https://sourceforge.net/projects/lame/files/lame/${lame_ver}/lame-${lame_ver}.tar.gz/download" +wget ${lame_link} -O lame.tar.gz +tar xaf lame.tar.gz +pushd lame-${lame_ver} +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,gtktest,cpml,frontend} \ + --enable-{static,nasm} +make -j$(nproc) +make install +popd +popd + +# OGG +git clone --depth=1 https://github.com/xiph/ogg.git +pushd ogg +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd + +# OPUS +git clone --depth=1 https://github.com/xiph/opus.git +pushd opus +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,extra-programs} \ + --enable-static +make -j$(nproc) +make install +popd + +# THEORA +git clone --depth=1 https://github.com/xiph/theora.git +pushd theora +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,examples,extra-programs,oggtest,vorbistest,spec,doc} \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd + +# VORBIS +git clone --depth=1 https://github.com/xiph/vorbis.git +pushd vorbis +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,oggtest} \ + --enable-static +make -j$(nproc) +make install +popd + +# LIBWEBP +git clone --depth=1 https://chromium.googlesource.com/webm/libwebp +pushd libwebp +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-{shared,libwebpextras,libwebpdemux,sdl,gl,png,jpeg,tiff,gif} \ + --enable-{static,libwebpmux} \ + --with-pic +make -j$(nproc) +make install +popd + +# LIBVPX +git clone --depth=1 https://chromium.googlesource.com/webm/libvpx +pushd libvpx +export CROSS=${FF_CROSS_PREFIX} +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --target=x86_64-win64-gcc \ + --disable-{shared,unit-tests,examples,tools,docs,install-bins} \ + --enable-{static,pic,vp9-postproc,vp9-highbitdepth} +make -j$(nproc) +make install +popd + +# ZIMG +git clone --depth=1 https://github.com/sekrit-twc/zimg.git +pushd zimg +./autogen.sh +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-pic \ + --with-pic +make -j$(nproc) +make install +popd + +# X264 +git clone --depth=1 https://code.videolan.org/videolan/x264.git +pushd x264 +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --cross-prefix=${FF_CROSS_PREFIX} \ + --disable-cli \ + --enable-{static,lto,strip,pic} +make -j$(nproc) +make install +popd + +# X265 +git clone -b 3.5 --depth=1 https://bitbucket.org/multicoreware/x265_git.git +pushd x265_git +x265_conf=" + -DCMAKE_TOOLCHAIN_FILE=${FF_CMAKE_TOOLCHAIN} + -DCMAKE_INSTALL_PREFIX=${FF_DEPS_PREFIX} + -DCMAKE_ASM_NASM_FLAGS=-w-macro-params-legacy + -DENABLE_ASSEMBLY=ON + -DENABLE_SHARED=OFF + -DENABLE_TESTS=OFF + -DENABLE_CLI=OFF + -DENABLE_PIC=ON +" +mkdir 8b 10b 12b +cmake \ + ${x265_conf} \ + -DHIGH_BIT_DEPTH=ON \ + -DEXPORT_C_API=OFF \ + -DENABLE_HDR10_PLUS=ON \ + -DMAIN12=ON \ + -S source \ + -B 12b & +cmake \ + ${x265_conf} \ + -DHIGH_BIT_DEPTH=ON \ + -DEXPORT_C_API=OFF \ + -DENABLE_HDR10_PLUS=ON \ + -S source \ + -B 10b & +cmake \ + ${x265_conf} \ + -DEXTRA_LIB="x265_main10.a;x265_main12.a" \ + -DEXTRA_LINK_FLAGS=-L. \ + -DLINKED_{10,12}BIT=ON \ + -S source \ + -B 8b & +wait +cat > Makefile << "EOF" +all: 12b/libx265.a 10b/libx265.a 8b/libx265.a +%/libx265.a: + $(MAKE) -C $(subst /libx265.a,,$@) +.PHONY: all +EOF +make -j$(nproc) +pushd 8b +mv ../12b/libx265.a ../8b/libx265_main12.a +mv ../10b/libx265.a ../8b/libx265_main10.a +mv libx265.a libx265_main.a +${FF_CROSS_PREFIX}ar -M << "EOF" +CREATE libx265.a +ADDLIB libx265_main.a +ADDLIB libx265_main10.a +ADDLIB libx265_main12.a +SAVE +END +EOF +make install +popd +popd + +# DAV1D +git clone -b 0.9.2 --depth=1 https://code.videolan.org/videolan/dav1d.git +pushd dav1d +mkdir build +pushd build +meson \ + --prefix=${FF_DEPS_PREFIX} \ + --cross-file=${FF_MESON_TOOLCHAIN} \ + --buildtype=release \ + --default-library=static \ + -Denable_{asm,avx512}=true \ + -Denable_{tests,examples}=false \ + .. +ninja -j$(nproc) +meson install +popd +popd + +# OpenCL headers +svn checkout https://github.com/KhronosGroup/OpenCL-Headers/trunk/CL +pushd CL +mkdir -p ${FF_DEPS_PREFIX}/include/CL +mv * ${FF_DEPS_PREFIX}/include/CL +popd + +# OpenCL ICD loader +git clone -b v2021.06.30 --depth=1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git +pushd OpenCL-ICD-Loader +sed -i 's|VERSION "1.2" SOVERSION "1"|PREFIX ""|g' CMakeLists.txt +mkdir build +pushd build +cmake \ + -DCMAKE_TOOLCHAIN_FILE=${FF_CMAKE_TOOLCHAIN} \ + -DCMAKE_INSTALL_PREFIX=${FF_DEPS_PREFIX} \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=ON \ + -DOPENCL_ICD_LOADER_HEADERS_DIR=${FF_DEPS_PREFIX}/include \ + -DOPENCL_ICD_LOADER_{PIC,DISABLE_OPENCLON12}=ON \ + -DOPENCL_ICD_LOADER_{BUILD_TESTING,REQUIRE_WDK}=OFF \ + .. +make -j$(nproc) +make install +mv ${FF_DEPS_PREFIX}/lib/libOpenCL.dll.a ${FF_DEPS_PREFIX}/lib/libOpenCL.a +popd +mkdir -p ${FF_DEPS_PREFIX}/lib/pkgconfig +cat > ${FF_DEPS_PREFIX}/lib/pkgconfig/OpenCL.pc << EOF +prefix=${FF_DEPS_PREFIX} +exec_prefix=\${prefix} +libdir=\${prefix}/lib +includedir=\${prefix}/include +Name: OpenCL +Description: OpenCL ICD Loader +Version: 3.0 +Libs: -L\${libdir} -lOpenCL +Cflags: -I\${includedir} +EOF +popd + +# FFNVCODEC +git clone -b n11.0.10.1 --depth=1 https://github.com/FFmpeg/nv-codec-headers.git +pushd nv-codec-headers +make PREFIX=${FF_DEPS_PREFIX} install +popd + +# AMF +svn checkout https://github.com/GPUOpen-LibrariesAndSDKs/AMF/trunk/amf/public/include +pushd include +mkdir -p ${FF_DEPS_PREFIX}/include/AMF +mv * ${FF_DEPS_PREFIX}/include/AMF +popd + +# LIBMFX +git clone -b 1.35.1 --depth=1 https://github.com/lu-zero/mfx_dispatch.git +pushd mfx_dispatch +autoreconf -i +./configure \ + --prefix=${FF_DEPS_PREFIX} \ + --host=${FF_TOOLCHAIN} \ + --disable-shared \ + --enable-static \ + --with-pic +make -j$(nproc) +make install +popd + +# Jellyfin-FFmpeg +pushd ${SOURCE_DIR} +ffversion="$(cat RELEASE)-${FF_REV}" +if [[ -f "patches/series" ]]; then + quilt push -a +fi +./configure \ + --prefix=${FF_PREFIX} \ + ${FF_TARGET_FLAGS} \ + --extra-version=Jellyfin \ + --disable-ffplay \ + --disable-debug \ + --disable-doc \ + --enable-shared \ + --enable-gpl \ + --enable-version3 \ + --enable-schannel \ + --enable-iconv \ + --enable-libxml2 \ + --enable-zlib \ + --enable-lzma \ + --enable-sdl2 \ + --enable-gmp \ + --enable-libfreetype \ + --enable-libfribidi \ + --enable-libfontconfig \ + --enable-libass \ + --enable-libbluray \ + --enable-libmp3lame \ + --enable-libopus \ + --enable-libtheora \ + --enable-libvorbis \ + --enable-libwebp \ + --enable-libvpx \ + --enable-libzimg \ + --enable-libx264 \ + --enable-libx265 \ + --enable-libdav1d \ + --enable-opencl \ + --enable-dxva2 \ + --enable-d3d11va \ + --enable-amf \ + --enable-libmfx \ + --enable-ffnvcodec \ + --enable-cuda \ + --enable-cuda-llvm \ + --enable-cuvid \ + --enable-nvdec \ + --enable-nvenc +make -j$(nproc) +make install +popd + +# Zip and copy artifacts +mkdir -p ${ARTIFACT_DIR}/zip +pushd ${FF_PREFIX}/bin +ffpackage="jellyfin-ffmpeg_${ffversion}-windows_win64" +zip -r ${ARTIFACT_DIR}/zip/${ffpackage}.zip ./*.{exe,dll} +pushd ${ARTIFACT_DIR}/zip +sha256sum ./${ffpackage}.zip > ./${ffpackage}.zip.sha256sum +chown -Rc $(stat -c %u:%g ${ARTIFACT_DIR}) ${ARTIFACT_DIR} +popd +popd diff --git a/toolchain-win64.cmake b/toolchain-win64.cmake new file mode 100644 index 00000000000..11d1344d2e1 --- /dev/null +++ b/toolchain-win64.cmake @@ -0,0 +1,13 @@ +set(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_PROCESSOR x86_64) + +set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) +set(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++) +set(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres) +set(CMAKE_RANLIB x86_64-w64-mingw32-ranlib) + +set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32 /opt/ffdeps) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) From fe9619afc0cf043448f20174910423bb887f3c31 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Thu, 14 Oct 2021 16:28:00 +0800 Subject: [PATCH 07/41] add github workflow for windows win64 --- .github/workflows/main.yml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 99f32e8be19..a44242cfa01 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,4 +1,4 @@ -name: Build Linux +name: Build jellyfin-ffmpeg on: push: @@ -12,7 +12,7 @@ on: workflow_dispatch: jobs: - build: + build_linux: name: Build ${{ matrix.release }} ${{ matrix.arch }} runs-on: ubuntu-latest @@ -46,3 +46,24 @@ jobs: with: name: ${{ matrix.release }} ${{ matrix.arch }} package path: dist + build_win64: + name: Build windows win64 + runs-on: ubuntu-latest + + strategy: + fail-fast: false + + steps: + - uses: actions/checkout@v2 + + - name: Install make + run: sudo apt-get install make + + - name: Build + run: ./build-win64 dist + + - name: Upload Packages + uses: actions/upload-artifact@v2.2.3 + with: + name: windows win64 package + path: dist From 8467063cbb74ff4fcdc3b0e4fba8d4f261886014 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 22:04:02 +0800 Subject: [PATCH 08/41] add support for Ubuntu Impish 21.10 --- .github/workflows/main.yml | 4 +++- build | 7 ++++++- build.yaml | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a44242cfa01..6f4573462e6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,9 +23,11 @@ jobs: - bullseye - buster - stretch - - groovy - focal - bionic + - impish + - hirsute + - groovy arch: - amd64 diff --git a/build b/build index dc6c07981c2..d2c7de092ba 100755 --- a/build +++ b/build @@ -15,6 +15,7 @@ usage() { echo -e " * focal" echo -e " * groovy" echo -e " * hirsute" + echo -e " * impish" } if [[ -z ${1} ]]; then @@ -34,7 +35,7 @@ case ${cli_release} in ;; 'bullseye') release="debian:bullseye" - gcc_version="9" + gcc_version="10" ;; 'xenial') release="ubuntu:xenial" @@ -68,6 +69,10 @@ case ${cli_release} in release="ubuntu:hirsute" gcc_version="10" ;; + 'impish') + release="ubuntu:impish" + gcc_version="11" + ;; *) echo "Invalid release." usage diff --git a/build.yaml b/build.yaml index ab308e87317..5a1c65c48ce 100644 --- a/build.yaml +++ b/build.yaml @@ -31,3 +31,6 @@ packages: - hirsute-amd64 - hirsute-armhf - hirsute-arm64 + - impish-amd64 + - impish-armhf + - impish-arm64 From bcea7a6e5953c305f329edca36b025757da66bb1 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 22:06:41 +0800 Subject: [PATCH 09/41] drop support for EOL Ubuntu versions --- .github/workflows/main.yml | 1 - build | 4 ---- build.yaml | 9 --------- 3 files changed, 14 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6f4573462e6..7c8b94fd437 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -27,7 +27,6 @@ jobs: - bionic - impish - hirsute - - groovy arch: - amd64 diff --git a/build b/build index d2c7de092ba..b5357dcd89d 100755 --- a/build +++ b/build @@ -9,11 +9,7 @@ usage() { echo -e " * bullseye * arm64" echo -e " * xenial" echo -e " * bionic" - echo -e " * cosmic" - echo -e " * disco" - echo -e " * eoan" echo -e " * focal" - echo -e " * groovy" echo -e " * hirsute" echo -e " * impish" } diff --git a/build.yaml b/build.yaml index 5a1c65c48ce..7656ea4f063 100644 --- a/build.yaml +++ b/build.yaml @@ -16,18 +16,9 @@ packages: - bionic-amd64 - bionic-armhf - bionic-arm64 - - disco-amd64 - - disco-armhf - - disco-arm64 - - eoan-amd64 - - eoan-armhf - - eoan-arm64 - focal-amd64 - focal-armhf - focal-arm64 - - groovy-amd64 - - groovy-armhf - - groovy-arm64 - hirsute-amd64 - hirsute-armhf - hirsute-arm64 From 2085628cd3dd6bf583b5eb5cc8721d5741a51cfc Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:36:46 +0800 Subject: [PATCH 10/41] add fixes for segement muxer --- .../0001-add-fixes-for-segement-muxer.patch | 37 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 38 insertions(+) create mode 100644 debian/patches/0001-add-fixes-for-segement-muxer.patch create mode 100644 debian/patches/series diff --git a/debian/patches/0001-add-fixes-for-segement-muxer.patch b/debian/patches/0001-add-fixes-for-segement-muxer.patch new file mode 100644 index 00000000000..d3b57793d91 --- /dev/null +++ b/debian/patches/0001-add-fixes-for-segement-muxer.patch @@ -0,0 +1,37 @@ +Index: jellyfin-ffmpeg/libavformat/segment.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavformat/segment.c ++++ jellyfin-ffmpeg/libavformat/segment.c +@@ -87,6 +87,7 @@ typedef struct SegmentContext { + int64_t last_val; ///< remember last time for wrap around detection + int cut_pending; + int header_written; ///< whether we've already called avformat_write_header ++ int64_t start_pts; ///< pts of the very first packet processed, used to compute correct segment length + + char *entry_prefix; ///< prefix to add to list entry filenames + int list_type; ///< set the list type +@@ -712,6 +713,7 @@ static int seg_init(AVFormatContext *s) + if ((ret = parse_frames(s, &seg->frames, &seg->nb_frames, seg->frames_str)) < 0) + return ret; + } else { ++ seg->start_pts = -1; + if (seg->use_clocktime) { + if (seg->time <= 0) { + av_log(s, AV_LOG_ERROR, "Invalid negative segment_time with segment_atclocktime option set\n"); +@@ -889,7 +891,15 @@ calc_times: + seg->cut_pending = 1; + seg->last_val = wrapped_val; + } else { +- end_pts = seg->time * (seg->segment_count + 1); ++ if (seg->start_pts != -1) { ++ end_pts = seg->start_pts + seg->time * (seg->segment_count + 1); ++ } else if (pkt->stream_index == seg->reference_stream_index && pkt->pts != AV_NOPTS_VALUE) { ++ // this is the first packet of the reference stream we see, initialize start point ++ seg->start_pts = av_rescale_q(pkt->pts, st->time_base, AV_TIME_BASE_Q); ++ seg->cur_entry.start_time = (double)pkt->pts * av_q2d(st->time_base); ++ seg->cur_entry.start_pts = seg->start_pts; ++ end_pts = seg->start_pts + seg->time * (seg->segment_count + 1); ++ } + } + } + diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 00000000000..c2e011753c2 --- /dev/null +++ b/debian/patches/series @@ -0,0 +1 @@ +0001-add-fixes-for-segement-muxer.patch From b482f5a8b70e4a9f2c6746934222f52860ae2431 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 22:21:10 +0800 Subject: [PATCH 11/41] add cuda pixfmt converter impl --- .../0002-add-cuda-pixfmt-converter-impl.patch | 1539 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 1540 insertions(+) create mode 100644 debian/patches/0002-add-cuda-pixfmt-converter-impl.patch diff --git a/debian/patches/0002-add-cuda-pixfmt-converter-impl.patch b/debian/patches/0002-add-cuda-pixfmt-converter-impl.patch new file mode 100644 index 00000000000..b29deea9767 --- /dev/null +++ b/debian/patches/0002-add-cuda-pixfmt-converter-impl.patch @@ -0,0 +1,1539 @@ +Index: jellyfin-ffmpeg/compat/cuda/cuda_runtime.h +=================================================================== +--- jellyfin-ffmpeg.orig/compat/cuda/cuda_runtime.h ++++ jellyfin-ffmpeg/compat/cuda/cuda_runtime.h +@@ -24,6 +24,7 @@ + #define COMPAT_CUDA_CUDA_RUNTIME_H + + // Common macros ++#define __constant__ __attribute__((constant)) + #define __global__ __attribute__((global)) + #define __device__ __attribute__((device)) + #define __device_builtin__ __attribute__((device_builtin)) +@@ -33,65 +34,69 @@ + #define max(a, b) ((a) > (b) ? (a) : (b)) + #define min(a, b) ((a) < (b) ? (a) : (b)) + #define abs(x) ((x) < 0 ? -(x) : (x)) ++#define clamp(a, b, c) min(max((a), (b)), (c)) + + #define atomicAdd(a, b) (__atomic_fetch_add(a, b, __ATOMIC_SEQ_CST)) + + // Basic typedefs + typedef __device_builtin__ unsigned long long cudaTextureObject_t; + +-typedef struct __device_builtin__ __align__(2) uchar2 +-{ +- unsigned char x, y; +-} uchar2; +- +-typedef struct __device_builtin__ __align__(4) ushort2 +-{ +- unsigned short x, y; +-} ushort2; +- +-typedef struct __device_builtin__ __align__(8) float2 +-{ +- float x, y; +-} float2; +- +-typedef struct __device_builtin__ __align__(8) int2 +-{ +- int x, y; +-} int2; +- +-typedef struct __device_builtin__ uint3 +-{ +- unsigned int x, y, z; +-} uint3; +- +-typedef struct uint3 dim3; +- +-typedef struct __device_builtin__ __align__(4) uchar4 +-{ +- unsigned char x, y, z, w; +-} uchar4; ++#define MAKE_VECTORS(type, base) \ ++typedef struct __device_builtin__ type##1 { \ ++ base x; \ ++} type##1; \ ++static __inline__ __device__ type##1 make_##type##1(base x) { \ ++ type##1 ret; \ ++ ret.x = x; \ ++ return ret; \ ++} \ ++typedef struct __device_builtin__ __align__(sizeof(base) * 2) type##2 { \ ++ base x, y; \ ++} type##2; \ ++static __inline__ __device__ type##2 make_##type##2(base x, base y) { \ ++ type##2 ret; \ ++ ret.x = x; \ ++ ret.y = y; \ ++ return ret; \ ++} \ ++typedef struct __device_builtin__ type##3 { \ ++ base x, y, z; \ ++} type##3; \ ++static __inline__ __device__ type##3 make_##type##3(base x, base y, base z) { \ ++ type##3 ret; \ ++ ret.x = x; \ ++ ret.y = y; \ ++ ret.z = z; \ ++ return ret; \ ++} \ ++typedef struct __device_builtin__ __align__(sizeof(base) * 4) type##4 { \ ++ base x, y, z, w; \ ++} type##4; \ ++static __inline__ __device__ type##4 make_##type##4(base x, base y, base z, base w) { \ ++ type##4 ret; \ ++ ret.x = x; \ ++ ret.y = y; \ ++ ret.z = z; \ ++ ret.w = w; \ ++ return ret; \ ++} + +-typedef struct __device_builtin__ __align__(8) ushort4 +-{ +- unsigned short x, y, z, w; +-} ushort4; ++#define MAKE_TYPE + +-typedef struct __device_builtin__ __align__(16) int4 +-{ +- int x, y, z, w; +-} int4; ++MAKE_VECTORS(uchar, unsigned char) ++MAKE_VECTORS(ushort, unsigned short) ++MAKE_VECTORS(int, int) ++MAKE_VECTORS(uint, unsigned int) ++MAKE_VECTORS(float, float) + +-typedef struct __device_builtin__ __align__(16) float4 +-{ +- float x, y, z, w; +-} float4; ++typedef struct __device_builtin__ uint3 dim3; + + // Accessors for special registers + #define GETCOMP(reg, comp) \ + asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \ + ret.comp = tmp; + +-#define GET(name, reg) static inline __device__ uint3 name() {\ ++#define GET(name, reg) static __inline__ __device__ uint3 name() {\ + uint3 ret; \ + unsigned tmp; \ + GETCOMP(reg, x) \ +@@ -109,80 +114,59 @@ GET(getThreadIdx, tid) + #define blockDim (getBlockDim()) + #define threadIdx (getThreadIdx()) + +-// Basic initializers (simple macros rather than inline functions) +-#define make_int2(a, b) ((int2){.x = a, .y = b}) +-#define make_uchar2(a, b) ((uchar2){.x = a, .y = b}) +-#define make_ushort2(a, b) ((ushort2){.x = a, .y = b}) +-#define make_float2(a, b) ((float2){.x = a, .y = b}) +-#define make_int4(a, b, c, d) ((int4){.x = a, .y = b, .z = c, .w = d}) +-#define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d}) +-#define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d}) +-#define make_float4(a, b, c, d) ((float4){.x = a, .y = b, .z = c, .w = d}) +- + // Conversions from the tex instruction's 4-register output to various types +-#define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} ++#define TEX2D(type, ret) static __inline__ __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} + + TEX2D(unsigned char, a & 0xFF) + TEX2D(unsigned short, a & 0xFFFF) +-TEX2D(float, a) +-TEX2D(uchar2, make_uchar2(a & 0xFF, b & 0xFF)) +-TEX2D(ushort2, make_ushort2(a & 0xFFFF, b & 0xFFFF)) +-TEX2D(float2, make_float2(a, b)) +-TEX2D(uchar4, make_uchar4(a & 0xFF, b & 0xFF, c & 0xFF, d & 0xFF)) +-TEX2D(ushort4, make_ushort4(a & 0xFFFF, b & 0xFFFF, c & 0xFFFF, d & 0xFFFF)) +-TEX2D(float4, make_float4(a, b, c, d)) ++TEX2D(uchar2, make_uchar2((unsigned char)a, (unsigned char)b)) ++TEX2D(ushort2, make_ushort2((unsigned short)a, (unsigned short)b)) ++TEX2D(uchar4, make_uchar4((unsigned char)a, (unsigned char)b, (unsigned char)c, (unsigned char)d)) ++TEX2D(ushort4, make_ushort4((unsigned short)a, (unsigned short)b, (unsigned short)c, (unsigned short)d)) + + // Template calling tex instruction and converting the output to the selected type +-template +-inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) ++template ++static __inline__ __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) + { +- T ret; +- unsigned ret1, ret2, ret3, ret4; +- asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : +- "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) : +- "l"(texObject), "f"(x), "f"(y)); +- conv(&ret, ret1, ret2, ret3, ret4); +- return ret; ++ T ret; ++ unsigned ret1, ret2, ret3, ret4; ++ asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : ++ "=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) : ++ "l"(texObject), "f"(x), "f"(y)); ++ conv(&ret, ret1, ret2, ret3, ret4); ++ return ret; + } + +-template<> +-inline __device__ float4 tex2D(cudaTextureObject_t texObject, float x, float y) ++static __inline__ __device__ float __exp2f(float x) + { +- float4 ret; +- asm("tex.2d.v4.f32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" : +- "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : +- "l"(texObject), "f"(x), "f"(y)); ++ float ret; ++ asm("ex2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x)); + return ret; + } + +-template<> +-inline __device__ float tex2D(cudaTextureObject_t texObject, float x, float y) ++#define __expf(x) (__exp2f((x) * 1.4427f)) ++ ++static __inline__ __device__ float __log2f(float x) + { +- return tex2D(texObject, x, y).x; ++ float ret; ++ asm("lg2.approx.f32 %0, %1;" : "=f"(ret) : "f"(x)); ++ return ret; + } + +-template<> +-inline __device__ float2 tex2D(cudaTextureObject_t texObject, float x, float y) ++#define __logf(x) (__log2f((x)) * 0.693147f) ++#define __log10f(x) (__log2f((x)) * 0.30103f) ++ ++static __inline__ __device__ float __powf(float x, float y) + { +- float4 ret = tex2D(texObject, x, y); +- return make_float2(ret.x, ret.y); ++ return __exp2f(y * __log2f(x)); + } + +-// Math helper functions +-static inline __device__ float floorf(float a) { return __builtin_floorf(a); } +-static inline __device__ float floor(float a) { return __builtin_floorf(a); } +-static inline __device__ double floor(double a) { return __builtin_floor(a); } +-static inline __device__ float ceilf(float a) { return __builtin_ceilf(a); } +-static inline __device__ float ceil(float a) { return __builtin_ceilf(a); } +-static inline __device__ double ceil(double a) { return __builtin_ceil(a); } +-static inline __device__ float truncf(float a) { return __builtin_truncf(a); } +-static inline __device__ float trunc(float a) { return __builtin_truncf(a); } +-static inline __device__ double trunc(double a) { return __builtin_trunc(a); } +-static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); } +-static inline __device__ float fabs(float a) { return __builtin_fabsf(a); } +-static inline __device__ double fabs(double a) { return __builtin_fabs(a); } ++static __inline__ __device__ float __sqrtf(float x) ++{ ++ float ret; ++ asm("sqrtf.approx.f32 %0, %1;" : "=f"(ret) : "f"(x)); ++ return ret; ++} + +-static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); } +-static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); } ++#endif + +-#endif /* COMPAT_CUDA_CUDA_RUNTIME_H */ +Index: jellyfin-ffmpeg/libavfilter/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/Makefile ++++ jellyfin-ffmpeg/libavfilter/Makefile +@@ -392,8 +392,7 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) + OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o + OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o + OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o +-OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \ +- vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o ++OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o + OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o + OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o + OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o +Index: jellyfin-ffmpeg/libavfilter/cuda/vector_helpers.cuh +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/cuda/vector_helpers.cuh ++++ /dev/null +@@ -1,112 +0,0 @@ +-/* +- * This file is part of FFmpeg. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +- * DEALINGS IN THE SOFTWARE. +- */ +- +-#ifndef AVFILTER_CUDA_VECTORHELPERS_H +-#define AVFILTER_CUDA_VECTORHELPERS_H +- +-typedef unsigned char uchar; +-typedef unsigned short ushort; +- +-template struct vector_helper { }; +-template<> struct vector_helper { typedef float ftype; typedef int itype; }; +-template<> struct vector_helper { typedef float2 ftype; typedef int2 itype; }; +-template<> struct vector_helper { typedef float4 ftype; typedef int4 itype; }; +-template<> struct vector_helper { typedef float ftype; typedef int itype; }; +-template<> struct vector_helper { typedef float2 ftype; typedef int2 itype; }; +-template<> struct vector_helper { typedef float4 ftype; typedef int4 itype; }; +-template<> struct vector_helper { typedef float ftype; typedef int itype; }; +-template<> struct vector_helper { typedef float2 ftype; typedef int2 itype; }; +-template<> struct vector_helper { typedef float4 ftype; typedef int4 itype; }; +- +-#define floatT typename vector_helper::ftype +-#define intT typename vector_helper::itype +- +-template inline __device__ V to_floatN(const T &a) { return (V)a; } +-template inline __device__ T from_floatN(const V &a) { return (T)a; } +- +-#define OPERATORS2(T) \ +- template inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y); } \ +- template inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y); } \ +- template inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b); } \ +- template inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b); } \ +- template inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b); } \ +- template inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b); } \ +- template inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; return a; } \ +- template inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; } \ +- template inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; } \ +- template<> inline __device__ float2 to_floatN(const T &a) { return make_float2(a.x, a.y); } \ +- template<> inline __device__ T from_floatN(const float2 &a) { return make_ ## T(a.x, a.y); } +-#define OPERATORS4(T) \ +- template inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } \ +- template inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } \ +- template inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, a.y * b, a.z * b, a.w * b); } \ +- template inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, a.y / b, a.z / b, a.w / b); } \ +- template inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b, a.y >> b, a.z >> b, a.w >> b); } \ +- template inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b, a.y << b, a.z << b, a.w << b); } \ +- template inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; } \ +- template inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; a.z = b.z; a.w = b.w; } \ +- template inline __device__ void vec_set_scalar(T &a, V b) { a.x = b; a.y = b; a.z = b; a.w = b; } \ +- template<> inline __device__ float4 to_floatN(const T &a) { return make_float4(a.x, a.y, a.z, a.w); } \ +- template<> inline __device__ T from_floatN(const float4 &a) { return make_ ## T(a.x, a.y, a.z, a.w); } +- +-OPERATORS2(int2) +-OPERATORS2(uchar2) +-OPERATORS2(ushort2) +-OPERATORS2(float2) +-OPERATORS4(int4) +-OPERATORS4(uchar4) +-OPERATORS4(ushort4) +-OPERATORS4(float4) +- +-template inline __device__ void vec_set(int &a, V b) { a = b; } +-template inline __device__ void vec_set(float &a, V b) { a = b; } +-template inline __device__ void vec_set(uchar &a, V b) { a = b; } +-template inline __device__ void vec_set(ushort &a, V b) { a = b; } +-template inline __device__ void vec_set_scalar(int &a, V b) { a = b; } +-template inline __device__ void vec_set_scalar(float &a, V b) { a = b; } +-template inline __device__ void vec_set_scalar(uchar &a, V b) { a = b; } +-template inline __device__ void vec_set_scalar(ushort &a, V b) { a = b; } +- +-template +-inline __device__ T lerp_scalar(T v0, T v1, float t) { +- return t*v1 + (1.0f - t)*v0; +-} +- +-template<> +-inline __device__ float2 lerp_scalar(float2 v0, float2 v1, float t) { +- return make_float2( +- lerp_scalar(v0.x, v1.x, t), +- lerp_scalar(v0.y, v1.y, t) +- ); +-} +- +-template<> +-inline __device__ float4 lerp_scalar(float4 v0, float4 v1, float t) { +- return make_float4( +- lerp_scalar(v0.x, v1.x, t), +- lerp_scalar(v0.y, v1.y, t), +- lerp_scalar(v0.z, v1.z, t), +- lerp_scalar(v0.w, v1.w, t) +- ); +-} +- +-#endif +Index: jellyfin-ffmpeg/libavfilter/dither_matrix.h +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/dither_matrix.h +@@ -0,0 +1,74 @@ ++/* ++ * Dither matrix data ++ * ++ * This file is placed in the public domain. ++ */ ++ ++#include ++static const int ff_fruit_dither_size = 64; ++static const uint16_t ff_fruit_dither_matrix[] = { ++ 332, 2776, 1933, 42, 2598, 1796, 1000, 2978, 1677, 3452, 2164, 1564, 2644, 358, 2012, 3471, 1147, 3071, 596, 1943, 3146, 1191, 2469, 919, 3664, 2359, 441, 2691, 1179, 3027, 1408, 298, 3892, 1825, 182, 2178, 3028, 317, 2412, 858, 3097, 2205, 1145, 2880, 990, 2697, 728, 1969, 2312, 1393, 3232, 1204, 3752, 1529, 448, 3955, 2076, 833, 3856, 1, 3445, 2105, 955, 1761, ++ 4060, 1053, 3038, 1445, 3302, 430, 3702, 2119, 625, 2523, 12, 3003, 959, 3814, 2388, 829, 4059, 2236, 1417, 3447, 198, 4020, 1891, 3368, 76, 1460, 2963, 1680, 3721, 535, 2275, 2916, 1226, 2348, 3580, 823, 1897, 4032, 1245, 2728, 194, 3285, 1941, 399, 3639, 1593, 3775, 1038, 3012, 162, 2687, 2029, 559, 2983, 1809, 2378, 325, 2861, 1331, 2533, 1171, 2701, 3328, 153, ++ 2214, 3412, 501, 3934, 892, 1918, 2686, 1199, 3090, 1351, 3779, 1776, 3371, 1457, 217, 2844, 1726, 311, 2896, 1021, 2604, 1546, 569, 2758, 1818, 3967, 727, 3305, 963, 1866, 3591, 853, 3215, 496, 2651, 1453, 2808, 704, 2247, 3395, 1779, 937, 4014, 2288, 1286, 3110, 331, 3309, 1839, 3866, 932, 3566, 2499, 1005, 3346, 1192, 3712, 1743, 3399, 757, 3765, 391, 1871, 2928, ++ 1411, 820, 2531, 1622, 2192, 3478, 215, 4079, 384, 3298, 742, 2332, 436, 2685, 1963, 3273, 680, 3571, 2033, 3795, 806, 3550, 2319, 1225, 3191, 1027, 2506, 237, 2196, 3126, 28, 2148, 1772, 3959, 1009, 3507, 85, 3742, 1539, 453, 3647, 2154, 573, 2786, 156, 2020, 2569, 1425, 538, 2375, 1723, 300, 1854, 4065, 110, 2741, 678, 3138, 213, 1979, 2330, 1530, 3542, 720, ++ 3811, 1955, 3240, 126, 2909, 760, 2482, 1493, 2301, 1719, 2788, 1180, 3998, 923, 3649, 1094, 1862, 2579, 1272, 30, 3109, 1987, 255, 3816, 471, 1977, 3519, 1557, 3882, 1086, 2754, 3776, 1304, 241, 2262, 1863, 3163, 1111, 2982, 2026, 1056, 2948, 1439, 3323, 1737, 3595, 860, 3980, 2895, 1193, 3365, 2779, 852, 2175, 3069, 1641, 2309, 1237, 2630, 4036, 965, 3052, 1096, 2487, ++ 289, 2863, 1190, 3633, 1330, 3834, 1075, 3429, 602, 3850, 174, 3242, 1657, 3080, 98, 2357, 3899, 374, 3224, 1619, 2431, 1125, 3019, 1367, 2395, 3102, 794, 2850, 426, 2451, 1687, 548, 3070, 2596, 3335, 565, 1630, 2459, 288, 3937, 2504, 51, 3740, 725, 2475, 433, 2706, 2075, 19, 3694, 633, 1982, 3149, 1345, 729, 3843, 411, 3654, 1681, 564, 2721, 104, 3875, 2110, ++ 3504, 1730, 537, 2398, 2058, 443, 2636, 1802, 2925, 953, 2568, 2005, 583, 2108, 2802, 1441, 889, 2726, 1949, 3984, 486, 3407, 624, 2698, 1648, 97, 4055, 1340, 1994, 3667, 913, 3453, 1964, 815, 1400, 4072, 2767, 873, 3457, 684, 1576, 3119, 1884, 1222, 3898, 1535, 3482, 982, 1817, 2520, 1487, 3927, 181, 3493, 2396, 1438, 2871, 985, 2070, 3498, 1370, 3279, 1655, 586, ++ 1280, 2625, 3912, 939, 2999, 1649, 3162, 64, 2130, 3606, 1428, 3469, 1256, 3824, 409, 3562, 1780, 3433, 768, 1206, 2843, 2086, 3869, 926, 3663, 2099, 1047, 2623, 3217, 148, 1810, 2498, 314, 3790, 2126, 129, 2037, 3088, 1356, 2314, 3301, 946, 2716, 2163, 250, 3091, 575, 2227, 3204, 359, 2970, 1110, 2594, 1867, 532, 3338, 43, 2492, 3172, 282, 2394, 842, 2852, 2019, ++ 3111, 31, 1813, 3256, 243, 3687, 851, 4002, 1301, 480, 3004, 264, 2493, 1586, 2947, 714, 2538, 165, 2193, 3597, 1514, 130, 1792, 3132, 348, 2875, 3434, 491, 1251, 2120, 3979, 1093, 2905, 979, 2561, 3625, 1230, 373, 3836, 1953, 197, 4028, 498, 3383, 1713, 2400, 1402, 4083, 1150, 3589, 2106, 592, 3650, 1252, 4012, 1975, 3047, 1574, 690, 3909, 1745, 3616, 349, 3976, ++ 894, 3438, 2251, 1160, 2582, 1467, 2342, 1747, 2837, 2242, 1076, 3946, 866, 3385, 1108, 1985, 4076, 1303, 2915, 438, 2589, 3312, 1085, 2367, 1916, 1403, 759, 2340, 3582, 2835, 637, 3264, 1997, 3506, 515, 1544, 3258, 2612, 1017, 2903, 1322, 1791, 3014, 1154, 3826, 871, 2858, 192, 2739, 719, 1701, 3177, 2222, 345, 2737, 770, 1181, 3774, 2264, 1060, 2667, 1262, 2318, 1584, ++ 2654, 1420, 499, 4045, 688, 3379, 319, 3490, 735, 3699, 1642, 2050, 2683, 40, 2204, 3096, 343, 3261, 1672, 3747, 900, 1981, 4025, 644, 3505, 2575, 3903, 1724, 232, 1517, 2590, 1405, 58, 1663, 3051, 2381, 751, 1756, 3646, 647, 3464, 2363, 826, 2563, 92, 3137, 1928, 3524, 1590, 2159, 3851, 84, 1423, 3082, 1684, 2417, 3417, 369, 1808, 3022, 118, 3254, 661, 3555, ++ 229, 3756, 2917, 1627, 2773, 1235, 2949, 1002, 2470, 140, 3274, 594, 3533, 1678, 3799, 933, 1812, 2457, 673, 2306, 1474, 3055, 292, 2820, 1293, 2, 1106, 3237, 2013, 3823, 439, 3611, 2093, 4015, 1264, 258, 3914, 2294, 16, 2692, 1983, 295, 3723, 1893, 3442, 1306, 619, 2371, 392, 2989, 1071, 2484, 3512, 666, 3796, 195, 1522, 2775, 3586, 875, 4086, 1921, 2883, 1163, ++ 2456, 1935, 1032, 2305, 111, 3845, 2036, 1555, 3964, 2115, 1289, 2911, 977, 2372, 465, 2616, 3618, 1211, 3933, 72, 3404, 772, 2424, 1662, 3755, 2252, 3025, 518, 2524, 855, 2953, 1102, 2811, 795, 2602, 3414, 2060, 891, 3357, 1132, 3965, 1495, 2806, 551, 1633, 2629, 3659, 1172, 3958, 1446, 3319, 798, 1787, 2768, 1271, 3184, 2210, 587, 1338, 2478, 1550, 483, 2198, 3388, ++ 832, 3219, 386, 3674, 1733, 2479, 578, 3214, 416, 3044, 1861, 291, 4033, 1449, 3477, 1940, 205, 2976, 885, 2704, 1886, 3820, 1070, 3288, 700, 1369, 1855, 4087, 1194, 3463, 1764, 2329, 297, 3296, 1742, 539, 1398, 3107, 1696, 2387, 711, 3151, 1223, 2219, 4038, 328, 2095, 2919, 845, 2595, 263, 2235, 4043, 398, 2129, 903, 3924, 1885, 3317, 249, 2732, 3685, 1383, 55, ++ 3926, 1481, 2977, 1177, 3292, 893, 3600, 1418, 2670, 927, 3637, 2477, 1127, 2665, 765, 2879, 1283, 2162, 3333, 1359, 482, 2180, 2942, 219, 2548, 3594, 308, 2759, 2185, 106, 3174, 699, 3738, 1139, 2277, 3833, 2804, 223, 3707, 415, 3518, 1913, 150, 2995, 864, 3222, 1575, 35, 3428, 1694, 3581, 992, 2907, 1494, 3676, 2702, 11, 3103, 849, 3825, 1848, 947, 3125, 1901, ++ 2494, 600, 2017, 2658, 293, 2207, 2851, 9, 1799, 3186, 510, 2077, 3318, 102, 3196, 1571, 3982, 357, 1781, 3717, 2774, 1176, 1834, 4001, 908, 2044, 3195, 986, 1675, 3696, 1339, 2638, 1617, 3026, 96, 1984, 940, 2446, 1563, 2876, 930, 2532, 3426, 1516, 2407, 1134, 3801, 1931, 2307, 521, 2744, 1883, 138, 3356, 597, 1758, 2343, 1389, 2831, 2145, 693, 3485, 2286, 520, ++ 3031, 1601, 3710, 956, 4067, 1849, 1097, 3760, 2406, 1215, 3861, 1596, 846, 3804, 1890, 516, 2295, 3062, 747, 2337, 169, 3178, 603, 2269, 3104, 1537, 627, 3537, 2415, 786, 2988, 379, 4040, 839, 2554, 3561, 1363, 4005, 745, 2128, 3817, 1348, 617, 3910, 397, 3050, 598, 2787, 1037, 3932, 1277, 3741, 2091, 1073, 2503, 3530, 934, 4024, 352, 1610, 2969, 158, 1347, 3992, ++ 1155, 3348, 142, 2259, 3087, 500, 3380, 2096, 730, 2795, 210, 3005, 2335, 1173, 2627, 3703, 1049, 2038, 3514, 983, 3868, 1653, 3552, 1249, 74, 3891, 2586, 1452, 238, 3936, 1738, 2168, 1209, 3384, 1847, 593, 3233, 355, 3036, 1753, 63, 3250, 2150, 1732, 2613, 1259, 3629, 1483, 3120, 280, 2382, 663, 2641, 3176, 1637, 265, 3013, 1239, 2420, 3416, 1088, 3769, 1938, 2555, ++ 347, 2049, 2809, 1443, 761, 2628, 1693, 277, 3975, 1496, 3523, 1961, 547, 3106, 287, 1492, 3340, 24, 1547, 2887, 2147, 370, 2535, 1880, 2962, 2121, 479, 3077, 1922, 2618, 966, 3253, 2439, 327, 2847, 1515, 2226, 2713, 962, 3632, 2465, 1039, 2854, 245, 3179, 2080, 124, 2525, 792, 3486, 1744, 3289, 1404, 476, 3782, 2186, 715, 3579, 1971, 626, 2784, 2201, 741, 3248, ++ 1640, 3551, 1024, 3840, 1827, 3622, 1072, 3238, 2279, 835, 2562, 1295, 4085, 1816, 3535, 2495, 724, 2664, 4042, 570, 1281, 3422, 1013, 3787, 783, 1343, 3722, 1123, 3427, 599, 3634, 27, 1572, 3827, 1014, 3665, 139, 1868, 3197, 1390, 640, 4090, 1459, 3714, 902, 3421, 1676, 4004, 2246, 1128, 2720, 60, 4069, 2002, 1035, 2818, 1352, 2552, 88, 3847, 1801, 309, 3627, 1104, ++ 2894, 686, 2509, 403, 2926, 95, 1991, 2868, 469, 2052, 3370, 66, 2399, 679, 1248, 2081, 3181, 1100, 2224, 1870, 3001, 1506, 2760, 260, 3294, 2707, 2027, 146, 2355, 1323, 2783, 1956, 2946, 676, 2000, 3053, 1325, 3893, 424, 2084, 2998, 1797, 466, 2380, 1875, 507, 2939, 1054, 396, 3700, 1502, 3007, 904, 2418, 3443, 200, 3954, 1588, 3283, 1065, 3084, 1471, 2714, 2270, ++ 34, 4029, 1869, 3446, 1207, 2442, 3919, 879, 1788, 3726, 1115, 3129, 1466, 2742, 3897, 214, 1750, 3763, 405, 3459, 123, 3916, 812, 2317, 1685, 533, 3389, 1589, 3908, 1844, 376, 3990, 1158, 3437, 2377, 458, 2645, 896, 2336, 3396, 193, 2581, 3539, 1083, 2756, 3871, 1426, 2135, 3166, 1824, 566, 2104, 3559, 413, 1814, 2922, 2122, 797, 2750, 2258, 670, 3944, 869, 1729, ++ 3183, 1409, 916, 2220, 3100, 568, 1427, 3351, 2601, 336, 2803, 778, 3656, 432, 2170, 3330, 944, 2302, 2856, 970, 2416, 1766, 3249, 1218, 4066, 2438, 993, 2635, 697, 3021, 2152, 830, 2608, 234, 1421, 4051, 1705, 3577, 1507, 791, 3807, 958, 2051, 3148, 4, 2256, 712, 3529, 179, 2536, 3880, 989, 2655, 1302, 3267, 562, 1210, 3517, 303, 1950, 3393, 168, 2125, 3749, ++ 502, 2745, 3648, 256, 1620, 3684, 2240, 173, 1238, 3999, 2216, 1704, 2039, 2886, 1213, 1889, 2980, 665, 1585, 3715, 1333, 2662, 446, 3011, 41, 1906, 3772, 306, 3509, 1241, 3339, 1645, 3692, 2209, 3121, 607, 2833, 47, 3185, 2507, 1291, 2859, 400, 1580, 3675, 1328, 2676, 1658, 2857, 1183, 1965, 3190, 149, 3972, 1573, 2221, 3832, 1706, 3037, 1290, 2534, 1647, 2923, 1161, ++ 2452, 1942, 809, 3321, 2139, 1022, 2842, 1887, 3074, 1528, 536, 3475, 188, 3837, 752, 3573, 79, 4018, 2141, 285, 3354, 738, 3680, 2173, 1473, 2927, 1144, 2816, 877, 2368, 91, 2727, 635, 1253, 1888, 3513, 974, 2280, 1990, 488, 3326, 1948, 4007, 2153, 660, 3314, 302, 4039, 784, 3626, 445, 2327, 1674, 2513, 827, 3099, 26, 2464, 749, 4095, 912, 3704, 556, 3495, ++ 225, 3962, 1567, 2550, 713, 4053, 371, 3436, 796, 2541, 3169, 1174, 2428, 1414, 2609, 1628, 2753, 1263, 3206, 1951, 1129, 2885, 1740, 911, 3376, 672, 3603, 1616, 2015, 3873, 1477, 3543, 2087, 3846, 171, 2549, 1612, 3730, 1157, 3923, 1650, 151, 1136, 3033, 2466, 996, 2931, 1450, 2391, 1763, 3362, 883, 3678, 333, 3496, 1422, 2730, 1084, 3225, 375, 2780, 1480, 2354, 1882, ++ 2961, 1028, 2814, 80, 3154, 1720, 2463, 1444, 3733, 23, 1666, 3917, 611, 3398, 388, 3235, 696, 2344, 425, 2580, 3839, 154, 2266, 3957, 381, 2056, 2476, 196, 3180, 527, 3009, 1044, 383, 2347, 2973, 781, 3227, 342, 2766, 654, 2967, 2101, 3546, 457, 1534, 3732, 1917, 616, 3134, 52, 2174, 2981, 1169, 2813, 2116, 541, 3781, 2156, 1448, 3578, 2199, 87, 3359, 1313, ++ 618, 3424, 1397, 3818, 2098, 1095, 2943, 961, 2316, 1959, 2975, 1026, 2724, 1768, 2155, 1153, 3945, 1751, 3612, 814, 1551, 3304, 1178, 2764, 1643, 3136, 1081, 3981, 2213, 1310, 2587, 1826, 3369, 1533, 1092, 3953, 1296, 2433, 1833, 3598, 1023, 2543, 1309, 3187, 2043, 125, 2519, 3528, 1324, 3757, 1511, 492, 4057, 1910, 994, 3200, 1777, 212, 2960, 702, 1811, 3122, 943, 3925, ++ 2611, 2046, 477, 2397, 650, 3541, 235, 3864, 632, 3342, 423, 2234, 3592, 109, 3758, 2933, 239, 2790, 1051, 2985, 2113, 630, 2515, 257, 3778, 843, 2708, 1549, 653, 3521, 233, 4058, 748, 3086, 2231, 484, 2062, 3075, 115, 1501, 3287, 334, 3885, 856, 2763, 3971, 1195, 414, 2822, 681, 2700, 1830, 2422, 112, 3556, 2267, 901, 3983, 2373, 1217, 3770, 454, 2481, 1845, ++ 266, 3662, 1654, 3208, 1287, 1934, 3060, 1807, 2648, 1395, 4074, 828, 1531, 2640, 1350, 872, 2454, 1512, 3499, 8, 4092, 1762, 3540, 1434, 2146, 3286, 71, 3683, 1902, 2421, 1385, 2719, 1972, 13, 3660, 1688, 3450, 1001, 4077, 2328, 907, 2913, 1690, 2353, 545, 1485, 2991, 2134, 1728, 3948, 938, 3353, 1298, 2891, 1592, 387, 3029, 1896, 524, 3378, 2092, 1560, 2972, 1089, ++ 3150, 882, 2672, 183, 3693, 2572, 506, 1109, 3218, 164, 2467, 1930, 3175, 706, 3480, 2111, 3831, 584, 2211, 2679, 1292, 2819, 442, 2935, 601, 1790, 2496, 1041, 2906, 435, 3230, 880, 3780, 2512, 909, 2770, 301, 2657, 1914, 508, 3754, 2079, 39, 3698, 1879, 3455, 800, 3322, 159, 2225, 3046, 304, 3621, 615, 3870, 2567, 1077, 3651, 1484, 2749, 190, 4008, 606, 3564, ++ 2203, 1410, 4046, 1988, 837, 1468, 3978, 2260, 1638, 3764, 929, 3547, 322, 1819, 2855, 394, 1416, 3006, 1702, 785, 3262, 960, 2249, 3624, 1224, 4023, 790, 3432, 1254, 3939, 1621, 2283, 531, 1519, 3394, 1258, 3819, 756, 3244, 1609, 2583, 1294, 3141, 1052, 2674, 261, 2427, 1553, 3653, 1165, 1962, 2606, 1040, 2320, 1937, 1284, 3160, 56, 2237, 920, 3194, 1372, 2401, 1952, ++ 0, 2901, 689, 2338, 3352, 2840, 73, 3405, 739, 2054, 2959, 1377, 2202, 3941, 1067, 2021, 3306, 167, 3905, 2042, 307, 3792, 1860, 144, 2615, 1978, 3064, 278, 1945, 2383, 119, 3544, 1874, 3131, 203, 2187, 1767, 2462, 1229, 3585, 364, 2798, 683, 3503, 1412, 4049, 921, 2762, 447, 3144, 733, 3797, 1489, 2986, 231, 3474, 780, 2793, 3935, 1670, 2526, 367, 3315, 841, ++ 3855, 1734, 3489, 305, 1240, 1836, 2195, 1011, 3078, 390, 2556, 642, 3209, 44, 2743, 3679, 917, 2695, 1133, 3400, 2436, 1098, 3056, 1415, 3277, 487, 1568, 2643, 3670, 925, 3101, 1034, 2747, 1167, 4030, 2848, 580, 3094, 99, 2287, 1031, 3989, 1968, 2379, 514, 2053, 3092, 1234, 3894, 1623, 2472, 14, 3259, 822, 3968, 2248, 1401, 2040, 449, 3269, 766, 3706, 1603, 2944, ++ 1250, 2650, 1015, 2537, 3943, 579, 3630, 2593, 1274, 3467, 1583, 3829, 1162, 2471, 1543, 553, 2365, 1613, 3072, 677, 1748, 2752, 528, 3929, 906, 2321, 3810, 1288, 628, 2829, 2094, 3852, 401, 2392, 773, 1591, 3468, 1424, 3881, 1789, 3331, 1520, 172, 3220, 1636, 3596, 89, 1924, 2284, 1003, 3435, 1739, 2124, 2778, 1806, 517, 2621, 3403, 1205, 1829, 2869, 1062, 2293, 244, ++ 3608, 431, 3246, 1379, 2123, 2937, 1532, 296, 3922, 1865, 177, 2143, 2974, 427, 4011, 1912, 3576, 326, 2109, 4031, 75, 3655, 2206, 1541, 2048, 3401, 33, 2161, 3364, 1476, 254, 1691, 3337, 1375, 3605, 2055, 362, 2738, 716, 2634, 450, 2212, 3777, 1187, 2870, 863, 2485, 3363, 337, 3008, 634, 4063, 452, 1268, 3313, 1112, 3727, 253, 2228, 3798, 103, 1974, 4075, 1406, ++ 2430, 2061, 776, 3720, 152, 1114, 3293, 2325, 840, 2660, 3272, 988, 1725, 3366, 1305, 3020, 968, 3228, 799, 2544, 1840, 1016, 3170, 259, 2873, 777, 1838, 3142, 455, 3985, 2010, 2993, 646, 2689, 45, 2547, 3745, 1137, 3212, 1317, 3023, 825, 2566, 410, 2127, 3931, 1538, 775, 3812, 1785, 2757, 1368, 2542, 3584, 127, 3041, 2011, 1548, 2940, 723, 2490, 3307, 530, 2789, ++ 948, 3123, 1579, 2777, 2035, 3848, 542, 1946, 3057, 1433, 558, 3731, 2600, 755, 2326, 108, 2785, 1380, 3750, 1276, 3451, 2063, 737, 3567, 1247, 4062, 2671, 884, 2411, 1126, 2588, 952, 3794, 1858, 3161, 924, 1804, 2333, 218, 3974, 1679, 3456, 1394, 3572, 1757, 211, 3205, 2659, 1242, 2346, 176, 3140, 850, 2188, 1716, 2460, 560, 4019, 997, 3522, 1735, 1105, 3048, 1661, ++ 3773, 61, 3548, 609, 1273, 2553, 1682, 3609, 25, 4056, 1898, 2233, 224, 3604, 1107, 3802, 1673, 2410, 246, 2849, 459, 3066, 2578, 1625, 2425, 368, 1463, 3734, 1671, 3510, 107, 3257, 1341, 460, 2239, 3947, 550, 3441, 1993, 971, 2408, 20, 2765, 1061, 2952, 2292, 1371, 434, 3642, 972, 3460, 1659, 3736, 330, 3904, 888, 3276, 1362, 2735, 356, 2281, 3901, 227, 2171, ++ 805, 2528, 1774, 2183, 3165, 267, 2860, 1008, 2461, 1285, 2834, 935, 3081, 1509, 2694, 2073, 671, 3168, 1939, 1141, 3969, 1429, 133, 3815, 973, 3231, 2085, 201, 2918, 698, 2781, 1644, 2311, 3387, 1057, 1561, 2821, 1216, 2599, 3355, 701, 3791, 1892, 481, 4091, 651, 3310, 2191, 1700, 2899, 707, 2448, 1312, 2668, 1454, 2792, 2107, 36, 2030, 3234, 1465, 865, 2597, 3484, ++ 1518, 4000, 429, 3415, 870, 3949, 1503, 3492, 721, 3372, 361, 3789, 1831, 529, 3487, 320, 4081, 1012, 3607, 2250, 629, 2169, 3347, 1976, 2997, 722, 3620, 2351, 1200, 3889, 2144, 810, 4082, 236, 2023, 3515, 117, 3709, 444, 1835, 2845, 1299, 3198, 2140, 1510, 2546, 887, 3857, 49, 2009, 3994, 404, 3035, 1048, 3410, 461, 3668, 1618, 3828, 590, 2807, 3613, 1354, 620, ++ 3157, 1152, 2867, 1297, 2711, 2265, 490, 2131, 2955, 1712, 2350, 1214, 3203, 2179, 1069, 2577, 1857, 2924, 17, 2699, 1692, 2950, 1197, 485, 1582, 2740, 1407, 544, 3278, 1784, 382, 2941, 1208, 2510, 3063, 844, 2722, 2215, 1595, 4026, 226, 2370, 743, 3681, 145, 3465, 2068, 2782, 1392, 3153, 1006, 2255, 3638, 105, 1821, 2517, 813, 2920, 1189, 2409, 1967, 122, 3058, 2067, ++ 2403, 252, 1980, 3686, 116, 1652, 3746, 1337, 204, 3970, 669, 2715, 82, 3895, 1635, 3409, 763, 1526, 3502, 967, 3719, 268, 3900, 2393, 3558, 62, 3991, 1973, 2540, 987, 3716, 1909, 3327, 554, 1615, 3761, 1366, 595, 3300, 876, 3116, 1138, 2893, 1342, 2678, 1754, 378, 1079, 3563, 525, 2733, 1853, 1143, 2112, 4073, 1228, 3360, 1904, 377, 3391, 1042, 4050, 1721, 478, ++ 3860, 1624, 3270, 936, 2088, 2929, 610, 3145, 2444, 1486, 3534, 2007, 2996, 808, 2402, 199, 2828, 2100, 505, 3127, 1435, 2558, 2003, 613, 1852, 2633, 1117, 3164, 271, 3476, 2271, 3, 1469, 3853, 2362, 340, 2898, 1878, 2570, 1462, 2166, 3872, 335, 3367, 639, 3988, 2964, 1908, 2374, 1577, 3835, 281, 3481, 2839, 582, 3018, 187, 2276, 3928, 1597, 2652, 831, 2557, 3425, ++ 1119, 2836, 561, 2514, 4088, 1064, 3458, 1957, 886, 2841, 341, 1101, 1683, 3236, 1365, 3617, 1033, 3921, 2447, 1846, 750, 3402, 1087, 3065, 1308, 3439, 705, 1752, 2872, 1374, 848, 3207, 2637, 1030, 2065, 3462, 910, 3911, 57, 3623, 534, 1894, 2559, 1611, 2297, 1413, 746, 3783, 175, 3211, 905, 2527, 754, 1464, 2369, 1665, 3751, 1396, 652, 3040, 272, 3266, 1307, 2102, ++ 15, 2274, 3526, 1432, 350, 1795, 2565, 48, 3883, 1269, 3411, 2136, 4027, 310, 2626, 645, 3113, 1311, 290, 3784, 2680, 113, 2268, 4068, 339, 1947, 2468, 3830, 526, 2078, 3950, 1711, 636, 3049, 166, 1731, 3108, 1184, 1999, 2669, 1349, 3500, 890, 3711, 78, 3290, 2502, 1201, 2853, 2184, 1357, 3303, 1926, 3884, 313, 2675, 789, 3171, 2437, 1265, 3718, 1920, 608, 2951, ++ 3766, 1569, 857, 2897, 2118, 3645, 762, 3182, 1629, 2360, 703, 2900, 954, 1876, 3759, 1488, 2289, 1911, 2956, 981, 1664, 3644, 918, 1562, 2904, 3661, 147, 1475, 3311, 2649, 230, 2245, 3636, 1386, 4022, 2603, 509, 2376, 3345, 847, 3059, 208, 2866, 1186, 2646, 1703, 366, 3392, 1823, 540, 4017, 6, 2987, 991, 3375, 1159, 3601, 2066, 68, 2772, 767, 2322, 3587, 1399, ++ 519, 2181, 3879, 161, 3229, 1149, 2032, 2656, 417, 3767, 2018, 134, 3549, 2223, 552, 3284, 59, 3987, 589, 3444, 2339, 422, 2805, 2151, 563, 1196, 3199, 2345, 874, 1300, 3531, 1142, 2746, 372, 2182, 821, 3744, 1631, 269, 4070, 1587, 2405, 1782, 3887, 674, 3560, 2071, 942, 3669, 1513, 2571, 2089, 691, 2453, 1877, 2874, 468, 1717, 3525, 1430, 3960, 1626, 207, 2734, ++ 1832, 3000, 1203, 2585, 1749, 463, 3986, 1355, 3349, 975, 3054, 1604, 2607, 1182, 2938, 1698, 2574, 1227, 2794, 2069, 1146, 3260, 1316, 3876, 1794, 2681, 1996, 473, 3993, 1936, 3032, 567, 2001, 3397, 1478, 3252, 1148, 2954, 1929, 2272, 623, 3167, 420, 2158, 2930, 1121, 3085, 2441, 221, 2892, 1082, 3610, 1602, 3803, 157, 1523, 4044, 2285, 1043, 2912, 456, 2516, 3454, 978, ++ 4037, 315, 3361, 622, 3691, 2194, 2984, 189, 1815, 2530, 497, 3271, 740, 3859, 270, 3671, 834, 3511, 1771, 186, 3743, 718, 3015, 29, 3373, 819, 3762, 1578, 2755, 77, 2414, 1634, 3915, 881, 2826, 94, 2560, 467, 3520, 1260, 3724, 1360, 3473, 1498, 163, 4009, 555, 1851, 3800, 787, 3251, 299, 3039, 1232, 3268, 2229, 662, 3002, 242, 1899, 3295, 1198, 1989, 3159, ++ 709, 2443, 2041, 1525, 2666, 1275, 859, 2090, 3557, 1116, 4054, 1437, 2404, 1944, 2810, 1068, 2352, 385, 3156, 2450, 1472, 2639, 1710, 2366, 1140, 2884, 262, 3239, 964, 3690, 807, 3135, 251, 2384, 1699, 3865, 1318, 3098, 779, 2486, 18, 2631, 914, 2799, 1919, 2364, 1566, 3334, 1255, 2304, 1656, 2688, 2047, 574, 2653, 1091, 3408, 1332, 3844, 2445, 631, 3737, 93, 2290, ++ 1646, 3672, 1046, 3508, 70, 3886, 2419, 3213, 346, 2815, 2138, 5, 3430, 614, 1479, 3374, 1651, 4064, 1353, 771, 3888, 344, 3545, 667, 4035, 1859, 2497, 1320, 2300, 1765, 2632, 1436, 3658, 1118, 3350, 731, 2315, 1843, 3956, 1010, 3316, 2082, 3918, 472, 3640, 744, 2690, 65, 2823, 428, 4078, 854, 3570, 1458, 3952, 354, 2014, 2624, 836, 1497, 3042, 1709, 2771, 1103, ++ 2990, 228, 2862, 811, 3189, 1736, 512, 1482, 3809, 710, 1773, 3034, 1346, 3951, 2291, 141, 2890, 585, 2696, 2022, 2910, 1055, 2197, 3139, 1382, 489, 3635, 621, 3461, 220, 4080, 546, 1881, 2958, 412, 1998, 3588, 184, 1455, 2761, 1605, 717, 1778, 3188, 1170, 3030, 1391, 3862, 1905, 3423, 1063, 2505, 101, 1932, 2429, 1267, 3701, 37, 3488, 2083, 294, 4013, 803, 3470, ++ 1387, 3907, 1793, 2324, 1244, 2800, 2006, 3068, 980, 2385, 3628, 1050, 2729, 380, 1798, 3739, 928, 2232, 3413, 81, 1669, 3329, 1524, 216, 2703, 2097, 2936, 1639, 2045, 2712, 1344, 2167, 3494, 1019, 2458, 3158, 1168, 2592, 3448, 440, 3788, 2945, 202, 2474, 1995, 321, 3501, 1018, 577, 2176, 3083, 1373, 2865, 3344, 513, 3093, 1600, 2832, 1099, 3143, 2303, 1025, 2622, 363, ++ 2263, 668, 2717, 395, 4071, 659, 3673, 137, 2682, 1660, 275, 3210, 824, 2423, 3308, 1270, 3095, 1856, 1221, 3652, 2529, 576, 3961, 1895, 3735, 1124, 90, 3890, 941, 3255, 817, 2881, 32, 2731, 1715, 312, 4047, 649, 1986, 2358, 1246, 1927, 3574, 838, 4048, 2341, 1722, 2610, 3247, 1556, 276, 3677, 643, 1164, 3806, 2261, 774, 1903, 3913, 641, 1461, 3282, 1769, 3657, ++ 1614, 3325, 1175, 3133, 1565, 2483, 1113, 3297, 1319, 4003, 2241, 1236, 3838, 1606, 655, 2751, 248, 3920, 464, 2149, 950, 3045, 1261, 2825, 736, 2432, 3358, 1442, 2511, 323, 3725, 1527, 3940, 862, 3602, 2257, 1581, 2797, 998, 3147, 284, 2705, 1045, 2889, 1504, 648, 3073, 185, 1243, 3902, 2310, 1746, 2117, 2710, 1521, 180, 3516, 2647, 318, 2489, 3682, 143, 2908, 732, ++ 2508, 22, 3822, 1954, 222, 3483, 2137, 474, 2902, 588, 3114, 2034, 114, 2882, 3666, 1822, 1059, 2323, 3216, 1329, 3805, 155, 2298, 402, 3128, 1540, 2177, 656, 3061, 1800, 2313, 523, 1915, 3089, 1376, 694, 3281, 83, 3688, 1727, 3966, 1451, 3265, 50, 2064, 3697, 1335, 3449, 2028, 758, 2791, 949, 3996, 351, 3241, 2172, 1805, 995, 3079, 1131, 1872, 2389, 1220, 3973, ++ 1336, 2838, 931, 2361, 2992, 788, 1714, 3878, 2208, 1842, 976, 3532, 1358, 2103, 475, 2230, 3491, 1558, 734, 2817, 1741, 3431, 1384, 3705, 899, 3565, 274, 3997, 1166, 3538, 922, 3390, 2614, 209, 2390, 3877, 1188, 3010, 2157, 801, 2500, 522, 2218, 3849, 2539, 470, 2723, 897, 2473, 3614, 21, 3386, 1447, 3016, 895, 3874, 504, 3619, 1542, 4089, 692, 3440, 2057, 493, ++ 3245, 1759, 3615, 543, 1491, 3768, 2576, 1266, 46, 3643, 2545, 353, 2661, 4094, 1135, 2932, 38, 2564, 4010, 286, 2386, 802, 2684, 1770, 2522, 1321, 2914, 1689, 2673, 121, 2888, 1755, 1212, 3466, 1992, 462, 2605, 1864, 360, 3299, 1219, 3527, 1667, 685, 1185, 3223, 1966, 4006, 418, 1594, 3112, 2253, 591, 1900, 2449, 1364, 2864, 1970, 67, 2254, 2827, 329, 2971, 1058, ++ 2296, 283, 2617, 1873, 3152, 1120, 389, 3263, 2725, 1552, 726, 3226, 1686, 604, 3280, 1500, 3695, 999, 2025, 3192, 1078, 2979, 451, 4061, 10, 3221, 753, 2331, 984, 3842, 2160, 407, 4021, 878, 2966, 1545, 3536, 945, 3813, 1599, 2748, 178, 3118, 2008, 3631, 1608, 136, 1419, 2934, 2189, 682, 1326, 3729, 2736, 131, 3420, 818, 2521, 3320, 898, 1668, 3841, 1431, 3713, ++ 1536, 3406, 1007, 4034, 128, 2244, 3575, 2016, 951, 3930, 2299, 1020, 3748, 2435, 1925, 324, 2200, 3043, 571, 1775, 3785, 1456, 3275, 1151, 2165, 1803, 3793, 316, 3291, 1923, 1090, 3076, 2426, 1841, 612, 3201, 160, 2278, 2830, 549, 2132, 4093, 867, 2591, 365, 2769, 2114, 3553, 1004, 3821, 1828, 3336, 338, 1607, 4041, 1202, 3067, 419, 1327, 3583, 2190, 793, 2677, 100, ++ 2480, 764, 2921, 2133, 1282, 2801, 1388, 657, 3173, 279, 1820, 2965, 132, 1231, 2812, 3938, 861, 1440, 3590, 2663, 120, 2282, 675, 2620, 3554, 969, 2488, 1598, 2824, 495, 3641, 1505, 54, 3771, 2243, 1381, 3977, 1960, 1279, 3569, 1036, 1786, 2994, 1378, 3906, 1074, 3343, 572, 2518, 206, 2877, 868, 2413, 3155, 581, 2308, 1697, 3808, 2024, 2718, 240, 3202, 1233, 3497, ++ 1718, 3863, 406, 1632, 3381, 511, 3963, 2440, 1470, 2619, 3341, 1315, 2217, 3593, 557, 1760, 3324, 2501, 408, 1314, 3479, 1708, 3896, 1554, 247, 3017, 638, 3418, 1334, 2455, 816, 3377, 2142, 1029, 2878, 437, 2551, 687, 2957, 7, 2491, 3332, 273, 2334, 782, 2968, 1783, 1278, 3124, 2074, 1490, 3942, 2004, 1066, 1907, 3568, 191, 2796, 605, 1122, 3995, 1850, 2273, 695, ++ 3130, 1156, 2356, 3728, 915, 3105, 2059, 170, 3753, 1080, 503, 4016, 804, 3115, 1361, 2693, 86, 1837, 3854, 2349, 769, 2846, 393, 3117, 2072, 1257, 3867, 2031, 135, 4084, 1958, 2709, 708, 3243, 1570, 3708, 1130, 3419, 1695, 3858, 1508, 658, 3786, 1707, 3472, 69, 2434, 4052, 421, 3599, 664, 2573, 53, 3382, 2642, 957, 3193, 1499, 2238, 3024, 1559, 494, 3689, 2584, ++}; +Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.c ++++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.c +@@ -1,5 +1,8 @@ + /* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. ++* Copyright (c) 2019 rcombs ++* ++* This file is part of FFmpeg. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -20,10 +23,10 @@ + * DEALINGS IN THE SOFTWARE. + */ + +-#include + #include + #include + ++#include "libavutil/avassert.h" + #include "libavutil/avstring.h" + #include "libavutil/common.h" + #include "libavutil/hwcontext.h" +@@ -34,13 +37,12 @@ + #include "libavutil/pixdesc.h" + + #include "avfilter.h" ++#include "dither_matrix.h" + #include "formats.h" + #include "internal.h" + #include "scale_eval.h" + #include "video.h" + +-#include "vf_scale_cuda.h" +- + static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_NV12, +@@ -48,8 +50,6 @@ static const enum AVPixelFormat supporte + AV_PIX_FMT_P010, + AV_PIX_FMT_P016, + AV_PIX_FMT_YUV444P16, +- AV_PIX_FMT_0RGB32, +- AV_PIX_FMT_0BGR32, + }; + + #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) +@@ -58,17 +58,6 @@ static const enum AVPixelFormat supporte + + #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) + +-enum { +- INTERP_ALGO_DEFAULT, +- +- INTERP_ALGO_NEAREST, +- INTERP_ALGO_BILINEAR, +- INTERP_ALGO_BICUBIC, +- INTERP_ALGO_LANCZOS, +- +- INTERP_ALGO_COUNT +-}; +- + typedef struct CUDAScaleContext { + const AVClass *class; + +@@ -87,6 +76,7 @@ typedef struct CUDAScaleContext { + * Output sw format. AV_PIX_FMT_NONE for no conversion. + */ + enum AVPixelFormat format; ++ char *format_str; + + char *w_expr; ///< width expression string + char *h_expr; ///< height expression string +@@ -96,30 +86,56 @@ typedef struct CUDAScaleContext { + + CUcontext cu_ctx; + CUmodule cu_module; +- CUfunction cu_func_uchar; +- CUfunction cu_func_uchar2; +- CUfunction cu_func_uchar4; +- CUfunction cu_func_ushort; +- CUfunction cu_func_ushort2; +- CUfunction cu_func_ushort4; ++ ++#define VARIANT(NAME) \ ++ CUfunction cu_func_ ## NAME; ++#define VARIANTSET(NAME) \ ++ VARIANT(NAME) \ ++ VARIANT(NAME ## _c) \ ++ VARIANT(NAME ## _p2) \ ++ VARIANT(NAME ## _2) \ ++ VARIANT(NAME ## _2_u) \ ++ VARIANT(NAME ## _2_v) \ ++ VARIANT(NAME ## _4) ++ ++ VARIANTSET(8_8) ++ VARIANTSET(16_16) ++ VARIANTSET(8_16) ++ VARIANTSET(16_8) ++#undef VARIANTSET ++#undef VARIANT ++ ++ CUfunction cu_func_luma; ++ CUfunction cu_func_chroma_u; ++ CUfunction cu_func_chroma_v; ++ + CUstream cu_stream; + + CUdeviceptr srcBuffer; + CUdeviceptr dstBuffer; + int tex_alignment; + +- int interp_algo; +- int interp_use_linear; +- int interp_as_integer; ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++ int in_planes, out_planes; + +- float param; ++ CUdeviceptr ditherBuffer; ++ CUtexObject ditherTex; + } CUDAScaleContext; + + static av_cold int cudascale_init(AVFilterContext *ctx) + { + CUDAScaleContext *s = ctx->priv; + +- s->format = AV_PIX_FMT_NONE; ++ if (!strcmp(s->format_str, "same")) { ++ s->format = AV_PIX_FMT_NONE; ++ } else { ++ s->format = av_get_pix_fmt(s->format_str); ++ if (s->format == AV_PIX_FMT_NONE) { ++ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); ++ return AVERROR(EINVAL); ++ } ++ } ++ + s->frame = av_frame_alloc(); + if (!s->frame) + return AVERROR(ENOMEM); +@@ -135,13 +151,22 @@ static av_cold void cudascale_uninit(AVF + { + CUDAScaleContext *s = ctx->priv; + +- if (s->hwctx && s->cu_module) { ++ if (s->hwctx) { + CudaFunctions *cu = s->hwctx->internal->cuda_dl; +- CUcontext dummy; ++ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; ++ ++ CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ ++ if (s->ditherTex) { ++ CHECK_CU(cu->cuTexObjectDestroy(s->ditherTex)); ++ s->ditherTex = 0; ++ } ++ ++ if (s->ditherBuffer) { ++ CHECK_CU(cu->cuMemFree(s->ditherBuffer)); ++ s->ditherBuffer = 0; ++ } + +- CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); +- CHECK_CU(cu->cuModuleUnload(s->cu_module)); +- s->cu_module = NULL; + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + } + +@@ -262,6 +287,63 @@ static av_cold int init_processing_chain + return 0; + } + ++static av_cold int cudascale_setup_dither(AVFilterContext *ctx) ++{ ++ CUDAScaleContext *s = ctx->priv; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; ++ CudaFunctions *cu = device_hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; ++ int ret = 0; ++ ++ CUDA_MEMCPY2D cpy = { ++ .srcMemoryType = CU_MEMORYTYPE_HOST, ++ .dstMemoryType = CU_MEMORYTYPE_DEVICE, ++ .srcHost = ff_fruit_dither_matrix, ++ .dstDevice = 0, ++ .srcPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .dstPitch = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .WidthInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .Height = ff_fruit_dither_size, ++ }; ++ ++ CUDA_TEXTURE_DESC tex_desc = { ++ .filterMode = CU_TR_FILTER_MODE_POINT, ++ .flags = CU_TRSF_READ_AS_INTEGER, ++ }; ++ ++ CUDA_RESOURCE_DESC res_desc = { ++ .resType = CU_RESOURCE_TYPE_PITCH2D, ++ .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT16, ++ .res.pitch2D.numChannels = 1, ++ .res.pitch2D.width = ff_fruit_dither_size, ++ .res.pitch2D.height = ff_fruit_dither_size, ++ .res.pitch2D.pitchInBytes = ff_fruit_dither_size * sizeof(ff_fruit_dither_matrix[0]), ++ .res.pitch2D.devPtr = 0, ++ }; ++ ++ av_assert0(sizeof(ff_fruit_dither_matrix) == sizeof(ff_fruit_dither_matrix[0]) * ff_fruit_dither_size * ff_fruit_dither_size); ++ ++ if ((ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx))) < 0) ++ return ret; ++ ++ if ((ret = CHECK_CU(cu->cuMemAlloc(&s->ditherBuffer, sizeof(ff_fruit_dither_matrix)))) < 0) ++ goto fail; ++ ++ res_desc.res.pitch2D.devPtr = cpy.dstDevice = s->ditherBuffer; ++ ++ if ((ret = CHECK_CU(cu->cuMemcpy2D(&cpy))) < 0) ++ goto fail; ++ ++ if ((ret = CHECK_CU(cu->cuTexObjectCreate(&s->ditherTex, &res_desc, &tex_desc, NULL))) < 0) ++ goto fail; ++ ++fail: ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ return ret; ++} ++ + static av_cold int cudascale_config_props(AVFilterLink *outlink) + { + AVFilterContext *ctx = outlink->src; +@@ -271,46 +353,11 @@ static av_cold int cudascale_config_prop + AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; + CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; + CudaFunctions *cu = device_hwctx->internal->cuda_dl; +- char buf[64]; + int w, h; ++ int i; + int ret; + +- char *scaler_ptx; +- const char *function_infix = ""; +- + extern char vf_scale_cuda_ptx[]; +- extern char vf_scale_cuda_bicubic_ptx[]; +- +- switch(s->interp_algo) { +- case INTERP_ALGO_NEAREST: +- scaler_ptx = vf_scale_cuda_ptx; +- function_infix = "_Nearest"; +- s->interp_use_linear = 0; +- s->interp_as_integer = 1; +- break; +- case INTERP_ALGO_BILINEAR: +- scaler_ptx = vf_scale_cuda_ptx; +- function_infix = "_Bilinear"; +- s->interp_use_linear = 1; +- s->interp_as_integer = 1; +- break; +- case INTERP_ALGO_DEFAULT: +- case INTERP_ALGO_BICUBIC: +- scaler_ptx = vf_scale_cuda_bicubic_ptx; +- function_infix = "_Bicubic"; +- s->interp_use_linear = 0; +- s->interp_as_integer = 0; +- break; +- case INTERP_ALGO_LANCZOS: +- scaler_ptx = vf_scale_cuda_bicubic_ptx; +- function_infix = "_Lanczos"; +- s->interp_use_linear = 0; +- s->interp_as_integer = 0; +- break; +- default: +- av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n"); +- return AVERROR_BUG; +- } + + s->hwctx = device_hwctx; + s->cu_stream = s->hwctx->stream; +@@ -319,40 +366,30 @@ static av_cold int cudascale_config_prop + if (ret < 0) + goto fail; + +- ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, scaler_ptx)); +- if (ret < 0) +- goto fail; +- +- snprintf(buf, sizeof(buf), "Subsample%s_uchar", function_infix); +- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, buf)); +- if (ret < 0) +- goto fail; +- +- snprintf(buf, sizeof(buf), "Subsample%s_uchar2", function_infix); +- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, buf)); +- if (ret < 0) +- goto fail; +- +- snprintf(buf, sizeof(buf), "Subsample%s_uchar4", function_infix); +- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, buf)); +- if (ret < 0) +- goto fail; +- +- snprintf(buf, sizeof(buf), "Subsample%s_ushort", function_infix); +- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, buf)); +- if (ret < 0) +- goto fail; +- +- snprintf(buf, sizeof(buf), "Subsample%s_ushort2", function_infix); +- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, buf)); +- if (ret < 0) +- goto fail; +- +- snprintf(buf, sizeof(buf), "Subsample%s_ushort4", function_infix); +- CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, buf)); ++ ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); + if (ret < 0) + goto fail; + ++#define VARIANT(NAME) \ ++ CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ ## NAME, s->cu_module, "Subsample_Bilinear_" #NAME)); \ ++ if (ret < 0) \ ++ goto fail; ++ ++#define VARIANTSET(NAME) \ ++ VARIANT(NAME) \ ++ VARIANT(NAME ## _c) \ ++ VARIANT(NAME ## _2) \ ++ VARIANT(NAME ## _p2) \ ++ VARIANT(NAME ## _2_u) \ ++ VARIANT(NAME ## _2_v) \ ++ VARIANT(NAME ## _4) ++ ++ VARIANTSET(8_8) ++ VARIANTSET(16_16) ++ VARIANTSET(8_16) ++ VARIANTSET(16_8) ++#undef VARIANTSET ++#undef VARIANT + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + +@@ -376,6 +413,53 @@ static av_cold int cudascale_config_prop + if (ret < 0) + return ret; + ++ s->in_desc = av_pix_fmt_desc_get(s->in_fmt); ++ s->out_desc = av_pix_fmt_desc_get(s->out_fmt); ++ ++ for (i = 0; i < s->in_desc->nb_components; i++) ++ s->in_planes = FFMAX(s->in_planes, s->in_desc ->comp[i].plane + 1); ++ ++ for (i = 0; i < s->in_desc->nb_components; i++) ++ s->out_planes = FFMAX(s->out_planes, s->out_desc->comp[i].plane + 1); ++ ++#define VARIANT(INDEPTH, OUTDEPTH, SUFFIX) s->cu_func_ ## INDEPTH ## _ ## OUTDEPTH ## SUFFIX ++#define BITS(n) ((n + 7) & ~7) ++#define VARIANTSET(INDEPTH, OUTDEPTH) \ ++ else if (BITS(s->in_desc->comp[0].depth) == INDEPTH && \ ++ BITS(s->out_desc->comp[0].depth) == OUTDEPTH) { \ ++ s->cu_func_luma = VARIANT(INDEPTH, OUTDEPTH,); \ ++ if (s->in_planes == 3 && s->out_planes == 3) { \ ++ s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _c); \ ++ } else if (s->in_planes == 3 && s->out_planes == 2) { \ ++ s->cu_func_chroma_u = s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _p2); \ ++ } else if (s->in_planes == 2 && s->out_planes == 2) { \ ++ s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2); \ ++ } else if (s->in_planes == 2 && s->out_planes == 3) { \ ++ s->cu_func_chroma_u = VARIANT(INDEPTH, OUTDEPTH, _2_u); \ ++ s->cu_func_chroma_v = VARIANT(INDEPTH, OUTDEPTH, _2_v); \ ++ } else { \ ++ ret = AVERROR_BUG; \ ++ goto fail; \ ++ } \ ++ } ++ ++ if (0) {} ++ VARIANTSET(8, 8) ++ VARIANTSET(16, 16) ++ VARIANTSET(8, 16) ++ VARIANTSET(16, 8) ++ else { ++ ret = AVERROR_BUG; ++ goto fail; ++ } ++#undef VARIANTSET ++#undef VARIANT ++ ++ if (s->in_desc->comp[0].depth > s->out_desc->comp[0].depth) { ++ if ((ret = cudascale_setup_dither(ctx)) < 0) ++ goto fail; ++ } ++ + av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n", + inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : ""); + +@@ -396,21 +480,18 @@ fail: + static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, + uint8_t *src_dptr, int src_width, int src_height, int src_pitch, + uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, +- int pixel_size, int bit_depth) ++ int pixel_size) + { + CUDAScaleContext *s = ctx->priv; + CudaFunctions *cu = s->hwctx->internal->cuda_dl; + CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; + CUtexObject tex = 0; +- void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, +- &src_width, &src_height, &bit_depth, &s->param }; ++ void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &s->ditherTex }; + int ret; + + CUDA_TEXTURE_DESC tex_desc = { +- .filterMode = s->interp_use_linear ? +- CU_TR_FILTER_MODE_LINEAR : +- CU_TR_FILTER_MODE_POINT, +- .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0, ++ .filterMode = CU_TR_FILTER_MODE_LINEAR, ++ .flags = CU_TRSF_READ_AS_INTEGER, + }; + + CUDA_RESOURCE_DESC res_desc = { +@@ -425,10 +506,6 @@ static int call_resize_kernel(AVFilterCo + .res.pitch2D.devPtr = (CUdeviceptr)src_dptr, + }; + +- // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch +- // Same for pixel_size, which is represented via datatypes on the cuda side of things. +- dst_pitch /= channels * pixel_size; +- + ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL)); + if (ret < 0) + goto exit; +@@ -447,91 +524,37 @@ exit: + static int scalecuda_resize(AVFilterContext *ctx, + AVFrame *out, AVFrame *in) + { +- AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data; + CUDAScaleContext *s = ctx->priv; + +- switch (in_frames_ctx->sw_format) { +- case AV_PIX_FMT_YUV420P: +- call_resize_kernel(ctx, s->cu_func_uchar, 1, +- in->data[0], in->width, in->height, in->linesize[0], +- out->data[0], out->width, out->height, out->linesize[0], +- 1, 8); +- call_resize_kernel(ctx, s->cu_func_uchar, 1, +- in->data[1], in->width / 2, in->height / 2, in->linesize[1], +- out->data[1], out->width / 2, out->height / 2, out->linesize[1], +- 1, 8); +- call_resize_kernel(ctx, s->cu_func_uchar, 1, +- in->data[2], in->width / 2, in->height / 2, in->linesize[2], +- out->data[2], out->width / 2, out->height / 2, out->linesize[2], +- 1, 8); +- break; +- case AV_PIX_FMT_YUV444P: +- call_resize_kernel(ctx, s->cu_func_uchar, 1, +- in->data[0], in->width, in->height, in->linesize[0], +- out->data[0], out->width, out->height, out->linesize[0], +- 1, 8); +- call_resize_kernel(ctx, s->cu_func_uchar, 1, +- in->data[1], in->width, in->height, in->linesize[1], +- out->data[1], out->width, out->height, out->linesize[1], +- 1, 8); +- call_resize_kernel(ctx, s->cu_func_uchar, 1, +- in->data[2], in->width, in->height, in->linesize[2], +- out->data[2], out->width, out->height, out->linesize[2], +- 1, 8); +- break; +- case AV_PIX_FMT_YUV444P16: +- call_resize_kernel(ctx, s->cu_func_ushort, 1, +- in->data[0], in->width, in->height, in->linesize[0], +- out->data[0], out->width, out->height, out->linesize[0], +- 2, 16); +- call_resize_kernel(ctx, s->cu_func_ushort, 1, +- in->data[1], in->width, in->height, in->linesize[1], +- out->data[1], out->width, out->height, out->linesize[1], +- 2, 16); +- call_resize_kernel(ctx, s->cu_func_ushort, 1, +- in->data[2], in->width, in->height, in->linesize[2], +- out->data[2], out->width, out->height, out->linesize[2], +- 2, 16); +- break; +- case AV_PIX_FMT_NV12: +- call_resize_kernel(ctx, s->cu_func_uchar, 1, +- in->data[0], in->width, in->height, in->linesize[0], +- out->data[0], out->width, out->height, out->linesize[0], +- 1, 8); +- call_resize_kernel(ctx, s->cu_func_uchar2, 2, +- in->data[1], in->width / 2, in->height / 2, in->linesize[1], +- out->data[1], out->width / 2, out->height / 2, out->linesize[1], +- 1, 8); +- break; +- case AV_PIX_FMT_P010LE: +- call_resize_kernel(ctx, s->cu_func_ushort, 1, +- in->data[0], in->width, in->height, in->linesize[0], +- out->data[0], out->width, out->height, out->linesize[0], +- 2, 10); +- call_resize_kernel(ctx, s->cu_func_ushort2, 2, +- in->data[1], in->width / 2, in->height / 2, in->linesize[1], +- out->data[1], out->width / 2, out->height / 2, out->linesize[1], +- 2, 10); +- break; +- case AV_PIX_FMT_P016LE: +- call_resize_kernel(ctx, s->cu_func_ushort, 1, +- in->data[0], in->width, in->height, in->linesize[0], +- out->data[0], out->width, out->height, out->linesize[0], +- 2, 16); +- call_resize_kernel(ctx, s->cu_func_ushort2, 2, +- in->data[1], in->width / 2, in->height / 2, in->linesize[1], +- out->data[1], out->width / 2, out->height / 2, out->linesize[1], +- 2, 16); +- break; +- case AV_PIX_FMT_0RGB32: +- case AV_PIX_FMT_0BGR32: +- call_resize_kernel(ctx, s->cu_func_uchar4, 4, +- in->data[0], in->width, in->height, in->linesize[0], +- out->data[0], out->width, out->height, out->linesize[0], +- 1, 8); +- break; +- default: +- return AVERROR_BUG; ++#define DEPTH_BYTES(depth) (((depth) + 7) / 8) ++ ++ call_resize_kernel(ctx, s->cu_func_luma, 1, ++ in->data[0], in->width, in->height, in->linesize[0], ++ out->data[0], out->width, out->height, out->linesize[0], ++ DEPTH_BYTES(s->in_desc->comp[0].depth)); ++ ++ call_resize_kernel(ctx, s->cu_func_chroma_u, s->in_planes == 2 ? 2 : 1, ++ in->data[1], ++ AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), ++ in->linesize[1], ++ out->data[1], ++ AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), ++ out->linesize[1], ++ DEPTH_BYTES(s->in_desc->comp[1].depth)); ++ ++ if (s->cu_func_chroma_v) { ++ call_resize_kernel(ctx, s->cu_func_chroma_v, s->in_planes == 2 ? 2 : 1, ++ in->data[s->in_desc->comp[2].plane], ++ AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), ++ in->linesize[s->in_desc->comp[2].plane], ++ out->data[s->out_desc->comp[2].plane] + s->out_desc->comp[2].offset, ++ AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), ++ AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), ++ out->linesize[s->out_desc->comp[2].plane], ++ DEPTH_BYTES(s->in_desc->comp[2].depth)); + } + + return 0; +@@ -621,20 +644,15 @@ static AVFrame *cudascale_get_video_buff + #define OFFSET(x) offsetof(CUDAScaleContext, x) + #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) + static const AVOption options[] = { +- { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, +- { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, +- { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" }, +- { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" }, +- { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" }, +- { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" }, +- { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" }, ++ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, ++ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, ++ { "format", "Output format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, + { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, +- { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS }, +- { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" }, +- { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" }, +- { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" }, +- { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" }, +- { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS }, ++ { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" }, ++ { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" }, ++ { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" }, ++ { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" }, ++ { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, + { NULL }, + }; + +Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.cu ++++ jellyfin-ffmpeg/libavfilter/vf_scale_cuda.cu +@@ -20,35 +20,115 @@ + * DEALINGS IN THE SOFTWARE. + */ + +-#include "cuda/vector_helpers.cuh" ++typedef unsigned char uchar; ++typedef unsigned short ushort; + +-template +-__device__ inline void Subsample_Nearest(cudaTextureObject_t tex, +- T *dst, +- int dst_width, int dst_height, int dst_pitch, +- int src_width, int src_height, +- int bit_depth) ++#define SHIFTDOWN(val) (dstbase)(val >> abs(2 + shift)) ++#define SHIFTUP(val) (dstbase)(val << abs(-shift - 2)) ++ ++template struct add_conv_shift1_d + { +- int xo = blockIdx.x * blockDim.x + threadIdx.x; +- int yo = blockIdx.y * blockDim.y + threadIdx.y; ++ typedef DST dstbase; + +- if (yo < dst_height && xo < dst_width) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) + { +- float hscale = (float)src_width / (float)dst_width; +- float vscale = (float)src_height / (float)dst_height; +- float xi = (xo + 0.5f) * hscale; +- float yi = (yo + 0.5f) * vscale; ++ unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + ((1 + d) >> (sizeof(SRC) * 8 - dither + 3)); + +- dst[yo*dst_pitch+xo] = tex2D(tex, xi, yi); ++ if (shift > -2) ++ return SHIFTDOWN(ret); ++ else ++ return SHIFTUP(ret); + } +-} ++}; ++ ++template struct add_conv_shift1 ++{ ++ typedef DST dstbase; + +-template +-__device__ inline void Subsample_Bilinear(cudaTextureObject_t tex, +- T *dst, +- int dst_width, int dst_height, int dst_pitch, +- int src_width, int src_height, +- int bit_depth) ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) ++ { ++ unsigned ret = (unsigned)i1 + (unsigned)i2 + (unsigned)i3 + (unsigned)i4 + 2; ++ ++ if (shift > -2) ++ return SHIFTDOWN(ret); ++ else ++ return SHIFTUP(ret); ++ } ++}; ++ ++template struct add_conv_shift2 ++{ ++ typedef decltype(DST::x) dstbase; ++ ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) ++ { ++ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; ++ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; ++ ++ if (shift > -2) ++ return { SHIFTDOWN(retx), SHIFTDOWN(rety) }; ++ else ++ return { SHIFTUP(retx), SHIFTUP(rety) }; ++ } ++}; ++ ++template struct add_conv_shift2_x ++{ ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) ++ { ++ return add_conv_shift1()(i1.x, i2.x, i3.x, i4.x, d); ++ } ++}; ++ ++template struct add_conv_shift2_y ++{ ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) ++ { ++ return add_conv_shift1()(i1.y, i2.y, i3.y, i4.y, d); ++ } ++}; ++ ++template struct add_conv_shift3 ++{ ++ typedef decltype(DST::x) dstbase; ++ ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) ++ { ++ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; ++ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; ++ unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2; ++ ++ if (shift > -2) ++ return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz) }; ++ else ++ return { SHIFTUP(retx), SHIFTUP(rety), SHIFTUP(retz) }; ++ } ++}; ++ ++template struct add_conv_shift4 ++{ ++ typedef decltype(DST::x) dstbase; ++ ++ __inline__ __device__ DST operator()(SRC i1, SRC i2, SRC i3, SRC i4, ushort d) ++ { ++ unsigned retx = (unsigned)i1.x + (unsigned)i2.x + (unsigned)i3.x + (unsigned)i4.x + 2; ++ unsigned rety = (unsigned)i1.y + (unsigned)i2.y + (unsigned)i3.y + (unsigned)i4.y + 2; ++ unsigned retz = (unsigned)i1.z + (unsigned)i2.z + (unsigned)i3.z + (unsigned)i4.z + 2; ++ unsigned retw = (unsigned)i1.w + (unsigned)i2.w + (unsigned)i3.w + (unsigned)i4.w + 2; ++ ++ if (shift > -2) ++ return { SHIFTDOWN(retx), SHIFTDOWN(rety), SHIFTDOWN(retz), SHIFTDOWN(retw) }; ++ else ++ return { SHIFTUP(retx), SHIFTUP(rety), SHIFTUP(retz), SHIFTUP(retw) }; ++ } ++}; ++ ++template class conv, int pitch, int shift, int dither> ++__inline__ __device__ void Subsample_Bilinear(cudaTextureObject_t tex, ++ DST *dst, ++ int dst_width, int dst_height, int dst_pitch, ++ int src_width, int src_height, ++ cudaTextureObject_t ditherTex) + { + int xo = blockIdx.x * blockDim.x + threadIdx.x; + int yo = blockIdx.y * blockDim.y + threadIdx.y; +@@ -66,58 +146,48 @@ __device__ inline void Subsample_Bilinea + float dx = wh / (0.5f + wh); + float dy = wv / (0.5f + wv); + +- intT r = { 0 }; +- vec_set_scalar(r, 2); +- r += tex2D(tex, xi - dx, yi - dy); +- r += tex2D(tex, xi + dx, yi - dy); +- r += tex2D(tex, xi - dx, yi + dy); +- r += tex2D(tex, xi + dx, yi + dy); +- vec_set(dst[yo*dst_pitch+xo], r >> 2); ++ SRC i0 = tex2D(tex, xi-dx, yi-dy); ++ SRC i1 = tex2D(tex, xi+dx, yi-dy); ++ SRC i2 = tex2D(tex, xi-dx, yi+dy); ++ SRC i3 = tex2D(tex, xi+dx, yi+dy); ++ ++ ushort ditherVal = dither ? tex2D(ditherTex, xo, yo) : 0; ++ ++ dst[yo*(dst_pitch / sizeof(DST))+xo*pitch] = conv()(i0, i1, i2, i3, ditherVal); + } + } + + extern "C" { + +-#define NEAREST_KERNEL(T) \ +- __global__ void Subsample_Nearest_ ## T(cudaTextureObject_t src_tex, \ +- T *dst, \ +- int dst_width, int dst_height, int dst_pitch, \ +- int src_width, int src_height, \ +- int bit_depth) \ +- { \ +- Subsample_Nearest(src_tex, dst, \ +- dst_width, dst_height, dst_pitch, \ +- src_width, src_height, \ +- bit_depth); \ +- } +- +-NEAREST_KERNEL(uchar) +-NEAREST_KERNEL(uchar2) +-NEAREST_KERNEL(uchar4) +- +-NEAREST_KERNEL(ushort) +-NEAREST_KERNEL(ushort2) +-NEAREST_KERNEL(ushort4) +- +-#define BILINEAR_KERNEL(T) \ +- __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex, \ +- T *dst, \ +- int dst_width, int dst_height, int dst_pitch, \ +- int src_width, int src_height, \ +- int bit_depth) \ +- { \ +- Subsample_Bilinear(src_tex, dst, \ +- dst_width, dst_height, dst_pitch, \ +- src_width, src_height, \ +- bit_depth); \ +- } +- +-BILINEAR_KERNEL(uchar) +-BILINEAR_KERNEL(uchar2) +-BILINEAR_KERNEL(uchar4) +- +-BILINEAR_KERNEL(ushort) +-BILINEAR_KERNEL(ushort2) +-BILINEAR_KERNEL(ushort4) ++#define VARIANT(SRC, DST, CONV, SHIFT, PITCH, DITHER, NAME) \ ++__global__ void Subsample_Bilinear_ ## NAME(cudaTextureObject_t tex, \ ++ DST *dst, \ ++ int dst_width, int dst_height, int dst_pitch, \ ++ int src_width, int src_height, \ ++ cudaTextureObject_t ditherTex) \ ++{ \ ++ Subsample_Bilinear(tex, dst, dst_width, dst_height, dst_pitch, \ ++ src_width, src_height, ditherTex); \ ++} ++ ++#define VARIANTSET2(SRC, DST, SHIFT, NAME) \ ++ VARIANT(SRC, DST, add_conv_shift1_d, SHIFT, 1, (sizeof(DST) < sizeof(SRC)) ? sizeof(DST) : 0, NAME) \ ++ VARIANT(SRC, DST, add_conv_shift1, SHIFT, 1, 0, NAME ## _c) \ ++ VARIANT(SRC, DST, add_conv_shift1, SHIFT, 2, 0, NAME ## _p2) \ ++ VARIANT(SRC ## 2, DST ## 2, add_conv_shift2, SHIFT, 1, 0, NAME ## _2) \ ++ VARIANT(SRC ## 2, DST, add_conv_shift2_x, SHIFT, 1, 0, NAME ## _2_u) \ ++ VARIANT(SRC ## 2, DST, add_conv_shift2_y, SHIFT, 1, 0, NAME ## _2_v) \ ++ VARIANT(SRC ## 4, DST ## 4, add_conv_shift4, SHIFT, 1, 0, NAME ## _4) ++ ++#define VARIANTSET(SRC, DST, SRCSIZE, DSTSIZE) \ ++ VARIANTSET2(SRC, DST, (SRCSIZE - DSTSIZE), SRCSIZE ## _ ## DSTSIZE) ++ ++// Straight no-conversion ++VARIANTSET(uchar, uchar, 8, 8) ++VARIANTSET(ushort, ushort, 16, 16) ++ ++// Conversion between 8- and 16-bit ++VARIANTSET(uchar, ushort, 8, 16) ++VARIANTSET(ushort, uchar, 16, 8) + + } +Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda.h ++++ /dev/null +@@ -1,28 +0,0 @@ +-/* +- * This file is part of FFmpeg. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +- * DEALINGS IN THE SOFTWARE. +- */ +- +-#ifndef AVFILTER_SCALE_CUDA_H +-#define AVFILTER_SCALE_CUDA_H +- +-#define SCALE_CUDA_PARAM_DEFAULT 999999.0f +- +-#endif +Index: jellyfin-ffmpeg/libavfilter/vf_scale_cuda_bicubic.cu +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_cuda_bicubic.cu ++++ /dev/null +@@ -1,224 +0,0 @@ +-/* +- * This file is part of FFmpeg. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +- * DEALINGS IN THE SOFTWARE. +- */ +- +-#include "cuda/vector_helpers.cuh" +-#include "vf_scale_cuda.h" +- +-typedef float4 (*coeffs_function_t)(float, float); +- +-__device__ inline float4 lanczos_coeffs(float x, float param) +-{ +- const float pi = 3.141592654f; +- +- float4 res = make_float4( +- pi * (x + 1), +- pi * x, +- pi * (x - 1), +- pi * (x - 2)); +- +- res.x = res.x == 0.0f ? 1.0f : +- __sinf(res.x) * __sinf(res.x / 2.0f) / (res.x * res.x / 2.0f); +- res.y = res.y == 0.0f ? 1.0f : +- __sinf(res.y) * __sinf(res.y / 2.0f) / (res.y * res.y / 2.0f); +- res.z = res.z == 0.0f ? 1.0f : +- __sinf(res.z) * __sinf(res.z / 2.0f) / (res.z * res.z / 2.0f); +- res.w = res.w == 0.0f ? 1.0f : +- __sinf(res.w) * __sinf(res.w / 2.0f) / (res.w * res.w / 2.0f); +- +- return res / (res.x + res.y + res.z + res.w); +-} +- +-__device__ inline float4 bicubic_coeffs(float x, float param) +-{ +- const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param; +- +- float4 res; +- res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A; +- res.y = ((A + 2) * x - (A + 3)) * x * x + 1; +- res.z = ((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1; +- res.w = 1.0f - res.x - res.y - res.z; +- +- return res; +-} +- +-__device__ inline void derived_fast_coeffs(float4 coeffs, float x, float *h0, float *h1, float *s) +-{ +- float g0 = coeffs.x + coeffs.y; +- float g1 = coeffs.z + coeffs.w; +- +- *h0 = coeffs.y / g0 - 0.5f; +- *h1 = coeffs.w / g1 + 1.5f; +- *s = g0 / (g0 + g1); +-} +- +-template +-__device__ inline V apply_coeffs(float4 coeffs, V c0, V c1, V c2, V c3) +-{ +- V res = c0 * coeffs.x; +- res += c1 * coeffs.y; +- res += c2 * coeffs.z; +- res += c3 * coeffs.w; +- +- return res; +-} +- +-template +-__device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function, +- cudaTextureObject_t src_tex, +- T *dst, +- int dst_width, int dst_height, int dst_pitch, +- int src_width, int src_height, +- int bit_depth, float param) +-{ +- int xo = blockIdx.x * blockDim.x + threadIdx.x; +- int yo = blockIdx.y * blockDim.y + threadIdx.y; +- +- if (yo < dst_height && xo < dst_width) +- { +- float hscale = (float)src_width / (float)dst_width; +- float vscale = (float)src_height / (float)dst_height; +- float xi = (xo + 0.5f) * hscale - 0.5f; +- float yi = (yo + 0.5f) * vscale - 0.5f; +- float px = floor(xi); +- float py = floor(yi); +- float fx = xi - px; +- float fy = yi - py; +- +- float factor = bit_depth > 8 ? 0xFFFF : 0xFF; +- +- float4 coeffsX = coeffs_function(fx, param); +- float4 coeffsY = coeffs_function(fy, param); +- +-#define PIX(x, y) tex2D(src_tex, (x), (y)) +- +- dst[yo * dst_pitch + xo] = from_floatN( +- apply_coeffs(coeffsY, +- apply_coeffs(coeffsX, PIX(px - 1, py - 1), PIX(px, py - 1), PIX(px + 1, py - 1), PIX(px + 2, py - 1)), +- apply_coeffs(coeffsX, PIX(px - 1, py ), PIX(px, py ), PIX(px + 1, py ), PIX(px + 2, py )), +- apply_coeffs(coeffsX, PIX(px - 1, py + 1), PIX(px, py + 1), PIX(px + 1, py + 1), PIX(px + 2, py + 1)), +- apply_coeffs(coeffsX, PIX(px - 1, py + 2), PIX(px, py + 2), PIX(px + 1, py + 2), PIX(px + 2, py + 2)) +- ) * factor +- ); +- +-#undef PIX +- } +-} +- +-/* This does not yield correct results. Most likely because of low internal precision in tex2D linear interpolation */ +-template +-__device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function, +- cudaTextureObject_t src_tex, +- T *dst, +- int dst_width, int dst_height, int dst_pitch, +- int src_width, int src_height, +- int bit_depth, float param) +-{ +- int xo = blockIdx.x * blockDim.x + threadIdx.x; +- int yo = blockIdx.y * blockDim.y + threadIdx.y; +- +- if (yo < dst_height && xo < dst_width) +- { +- float hscale = (float)src_width / (float)dst_width; +- float vscale = (float)src_height / (float)dst_height; +- float xi = (xo + 0.5f) * hscale - 0.5f; +- float yi = (yo + 0.5f) * vscale - 0.5f; +- float px = floor(xi); +- float py = floor(yi); +- float fx = xi - px; +- float fy = yi - py; +- +- float factor = bit_depth > 8 ? 0xFFFF : 0xFF; +- +- float4 coeffsX = coeffs_function(fx, param); +- float4 coeffsY = coeffs_function(fy, param); +- +- float h0x, h1x, sx; +- float h0y, h1y, sy; +- derived_fast_coeffs(coeffsX, fx, &h0x, &h1x, &sx); +- derived_fast_coeffs(coeffsY, fy, &h0y, &h1y, &sy); +- +-#define PIX(x, y) tex2D(src_tex, (x), (y)) +- +- floatT pix[4] = { +- PIX(px + h0x, py + h0y), +- PIX(px + h1x, py + h0y), +- PIX(px + h0x, py + h1y), +- PIX(px + h1x, py + h1y) +- }; +- +-#undef PIX +- +- dst[yo * dst_pitch + xo] = from_floatN( +- lerp_scalar( +- lerp_scalar(pix[3], pix[2], sx), +- lerp_scalar(pix[1], pix[0], sx), +- sy) * factor +- ); +- } +-} +- +-extern "C" { +- +-#define BICUBIC_KERNEL(T) \ +- __global__ void Subsample_Bicubic_ ## T(cudaTextureObject_t src_tex, \ +- T *dst, \ +- int dst_width, int dst_height, int dst_pitch, \ +- int src_width, int src_height, \ +- int bit_depth, float param) \ +- { \ +- Subsample_Bicubic(&bicubic_coeffs, src_tex, dst, \ +- dst_width, dst_height, dst_pitch, \ +- src_width, src_height, \ +- bit_depth, param); \ +- } +- +-BICUBIC_KERNEL(uchar) +-BICUBIC_KERNEL(uchar2) +-BICUBIC_KERNEL(uchar4) +- +-BICUBIC_KERNEL(ushort) +-BICUBIC_KERNEL(ushort2) +-BICUBIC_KERNEL(ushort4) +- +- +-#define LANCZOS_KERNEL(T) \ +- __global__ void Subsample_Lanczos_ ## T(cudaTextureObject_t src_tex, \ +- T *dst, \ +- int dst_width, int dst_height, int dst_pitch, \ +- int src_width, int src_height, \ +- int bit_depth, float param) \ +- { \ +- Subsample_Bicubic(&lanczos_coeffs, src_tex, dst, \ +- dst_width, dst_height, dst_pitch, \ +- src_width, src_height, \ +- bit_depth, param); \ +- } +- +-LANCZOS_KERNEL(uchar) +-LANCZOS_KERNEL(uchar2) +-LANCZOS_KERNEL(uchar4) +- +-LANCZOS_KERNEL(ushort) +-LANCZOS_KERNEL(ushort2) +-LANCZOS_KERNEL(ushort4) +- +-} diff --git a/debian/patches/series b/debian/patches/series index c2e011753c2..2b13748bf23 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1 +1,2 @@ 0001-add-fixes-for-segement-muxer.patch +0002-add-cuda-pixfmt-converter-impl.patch From cd47bf1feb6235f6bc53288dc4121e04aeba436b Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 22:21:28 +0800 Subject: [PATCH 12/41] add cuda tonemap impl --- .../patches/0003-add-cuda-tonemap-impl.patch | 1639 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 1640 insertions(+) create mode 100644 debian/patches/0003-add-cuda-tonemap-impl.patch diff --git a/debian/patches/0003-add-cuda-tonemap-impl.patch b/debian/patches/0003-add-cuda-tonemap-impl.patch new file mode 100644 index 00000000000..81b702a0257 --- /dev/null +++ b/debian/patches/0003-add-cuda-tonemap-impl.patch @@ -0,0 +1,1639 @@ +Index: jellyfin-ffmpeg/configure +=================================================================== +--- jellyfin-ffmpeg.orig/configure ++++ jellyfin-ffmpeg/configure +@@ -3058,6 +3058,8 @@ scale_cuda_filter_deps="ffnvcodec" + scale_cuda_filter_deps_any="cuda_nvcc cuda_llvm" + thumbnail_cuda_filter_deps="ffnvcodec" + thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" ++tonemap_cuda_filter_deps="ffnvcodec const_nan" ++tonemap_cuda_filter_deps_any="cuda_nvcc cuda_llvm" + transpose_npp_filter_deps="ffnvcodec libnpp" + overlay_cuda_filter_deps="ffnvcodec" + overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +@@ -6251,7 +6253,7 @@ fi + if enabled cuda_nvcc; then + nvccflags="$nvccflags -ptx" + else +- nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -include ${source_link}/compat/cuda/cuda_runtime.h" ++ nvccflags="$nvccflags -S -nocudalib -nocudainc --cuda-device-only -Wno-c++11-narrowing -std=c++14 -include ${source_link}/compat/cuda/cuda_runtime.h" + check_nvcc cuda_llvm + fi + +Index: jellyfin-ffmpeg/ffbuild/common.mak +=================================================================== +--- jellyfin-ffmpeg.orig/ffbuild/common.mak ++++ jellyfin-ffmpeg/ffbuild/common.mak +@@ -38,6 +38,7 @@ OBJCCFLAGS = $(CPPFLAGS) $(CFLAGS) $(OB + ASFLAGS := $(CPPFLAGS) $(ASFLAGS) + CXXFLAGS := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) + X86ASMFLAGS += $(IFLAGS:%=%/) -I$( 0.0f ? __powf(x, 1.2f) : x; ++} ++ ++static __inline__ __device__ float inverse_ootf_1_2(float x) { ++ return x > 0.0f ? __powf(x, 1.0f / 1.2f) : x; ++} ++ ++static __inline__ __device__ float oetf_arib_b67(float x) { ++ x = max(x, 0.0f); ++ return x <= (1.0f / 12.0f) ++ ? __sqrtf(3.0f * x) ++ : (ARIB_B67_A * __logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C); ++} ++ ++static __inline__ __device__ float inverse_oetf_arib_b67(float x) { ++ x = max(x, 0.0f); ++ return x <= 0.5f ++ ? (x * x) * (1.0f / 3.0f) ++ : (__expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); ++} ++ ++// linearizer for HLG/ARIB-B67 ++static __inline__ __device__ float eotf_arib_b67(float x) { ++ return ootf_1_2(inverse_oetf_arib_b67(x)); ++} ++ ++// delinearizer for HLG/ARIB-B67 ++static __inline__ __device__ float inverse_eotf_arib_b67(float x) { ++ return oetf_arib_b67(inverse_ootf_1_2(x)); ++} ++ ++// delinearizer for BT709, BT2020-10 ++static __inline__ __device__ float inverse_eotf_bt1886(float x) { ++ return x > 0.0f ? __powf(x, 1.0f / 2.4f) : 0.0f; ++} ++ ++static __inline__ __device__ float linearize(float x) ++{ ++ if (trc_src == AVCOL_TRC_SMPTE2084) ++ return eotf_st2084(x); ++ else if (trc_src == AVCOL_TRC_ARIB_STD_B67) ++ return eotf_arib_b67(x); ++ else ++ return x; ++} ++ ++static __inline__ __device__ float delinearize(float x) ++{ ++ if (trc_dst == AVCOL_TRC_BT709 || trc_dst == AVCOL_TRC_BT2020_10) ++ return inverse_eotf_bt1886(x); ++ else ++ return x; ++} ++ ++static __inline__ __device__ float3 yuv2rgb(float y, float u, float v) { ++ if (range_src == AVCOL_RANGE_JPEG) { ++ u -= 0.5f; v -= 0.5f; ++ } else { ++ y = (y * 255.0f - 16.0f) / 219.0f; ++ u = (u * 255.0f - 128.0f) / 224.0f; ++ v = (v * 255.0f - 128.0f) / 224.0f; ++ } ++ float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2]; ++ float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5]; ++ float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8]; ++ return make_float3(r, g, b); ++} ++ ++static __inline__ __device__ float3 yuv2lrgb(float3 yuv) { ++ float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z); ++ return make_float3(linearize(rgb.x), ++ linearize(rgb.y), ++ linearize(rgb.z)); ++} ++ ++static __inline__ __device__ float3 rgb2yuv(float r, float g, float b) { ++ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; ++ float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5]; ++ float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8]; ++ if (range_dst == AVCOL_RANGE_JPEG) { ++ u += 0.5f; v += 0.5f; ++ } else { ++ y = (219.0f * y + 16.0f) / 255.0f; ++ u = (224.0f * u + 128.0f) / 255.0f; ++ v = (224.0f * v + 128.0f) / 255.0f; ++ } ++ return make_float3(y, u, v); ++} ++ ++static __inline__ __device__ float rgb2y(float r, float g, float b) { ++ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2]; ++ if (range_dst != AVCOL_RANGE_JPEG) ++ y = (219.0f * y + 16.0f) / 255.0f; ++ return y; ++} ++ ++static __inline__ __device__ float3 lrgb2yuv(float3 c) { ++ float r = delinearize(c.x); ++ float g = delinearize(c.y); ++ float b = delinearize(c.z); ++ return rgb2yuv(r, g, b); ++} ++ ++static __inline__ __device__ float3 lrgb2lrgb(float3 c) { ++ if (rgb2rgb_passthrough) { ++ return c; ++ } else { ++ float r = c.x, g = c.y, b = c.z; ++ float rr = rgb2rgb_matrix[0] * r + rgb2rgb_matrix[1] * g + rgb2rgb_matrix[2] * b; ++ float gg = rgb2rgb_matrix[3] * r + rgb2rgb_matrix[4] * g + rgb2rgb_matrix[5] * b; ++ float bb = rgb2rgb_matrix[6] * r + rgb2rgb_matrix[7] * g + rgb2rgb_matrix[8] * b; ++ return make_float3(rr, gg, bb); ++ } ++} ++ ++#endif /* AVFILTER_CUDA_COLORSPACE_COMMON_H */ +Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.c +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/cuda/host_util.c +@@ -0,0 +1,35 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavfilter/colorspace.h" ++#include "host_util.h" ++ ++int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src) ++{ ++ int i = 0; ++ for (i = 0; i < 4; i++) { ++ dst->data[i] = src->data[i]; ++ dst->linesize[i] = src->linesize[i]; ++ } ++ ++ dst->width = src->width; ++ dst->height = src->height; ++ ++ return 0; ++} ++ +Index: jellyfin-ffmpeg/libavfilter/cuda/host_util.h +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/cuda/host_util.h +@@ -0,0 +1,29 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_HOST_UTIL_H ++#define AVFILTER_CUDA_HOST_UTIL_H ++ ++#include "libavutil/frame.h" ++ ++#include "shared.h" ++ ++int ff_make_cuda_frame(FFCUDAFrame *dst, const AVFrame *src); ++ ++#endif /* AVFILTER_CUDA_HOST_UTIL_H */ ++ +Index: jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/cuda/pixfmt.h +@@ -0,0 +1,209 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_PIXFMT_H ++#define AVFILTER_CUDA_PIXFMT_H ++ ++#include "shared.h" ++ ++extern __constant__ const enum AVPixelFormat fmt_src, fmt_dst; ++extern __constant__ const int depth_src, depth_dst; ++ ++// Single-sample read function ++template ++static __inline__ __device__ T read_sample(const FFCUDAFrame& frame, int x, int y) ++{ ++ T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p])); ++ return ptr[x]; ++} ++ ++// Per-format read functions ++static __inline__ __device__ ushort3 read_p016(const FFCUDAFrame& frame, int x, int y) ++{ ++ return make_ushort3(read_sample(frame, x, y), ++ read_sample(frame, (x & ~1), y / 2), ++ read_sample(frame, (x & ~1) + 1, y / 2)); ++} ++ ++static __inline__ __device__ ushort3 read_p010(const FFCUDAFrame& frame, int x, int y) ++{ ++ ushort3 val = read_p016(frame, x, y); ++ return make_ushort3(val.x >> 6, ++ val.y >> 6, ++ val.z >> 6); ++} ++ ++static __inline__ __device__ ushort3 read_yuv420p16(const FFCUDAFrame& frame, int x, int y) ++{ ++ return make_ushort3(read_sample(frame, x, y), ++ read_sample(frame, x / 2, y / 2), ++ read_sample(frame, x / 2, y / 2)); ++} ++ ++static __inline__ __device__ ushort3 read_yuv420p10(const FFCUDAFrame& frame, int x, int y) ++{ ++ ushort3 val = read_yuv420p16(frame, x, y); ++ return make_ushort3(val.x >> 6, ++ val.y >> 6, ++ val.z >> 6); ++} ++ ++// Generic read functions ++static __inline__ __device__ ushort3 read_px(const FFCUDAFrame& frame, int x, int y) ++{ ++ if (fmt_src == AV_PIX_FMT_P016) ++ return read_p016(frame, x, y); ++ else if (fmt_src == AV_PIX_FMT_P010) ++ return read_p010(frame, x, y); ++ else ++ return make_ushort3(0, 0, 0); ++} ++ ++static __inline__ __device__ float sample_to_float(unsigned short i) ++{ ++ return (float)i / ((1 << depth_src) - 1); ++} ++ ++static __inline__ __device__ float3 pixel_to_float3(ushort3 flt) ++{ ++ return make_float3(sample_to_float(flt.x), ++ sample_to_float(flt.y), ++ sample_to_float(flt.z)); ++} ++ ++static __inline__ __device__ float3 read_px_flt(const FFCUDAFrame& frame, int x, int y) ++{ ++ return pixel_to_float3(read_px(frame, x, y)); ++} ++ ++// Single-sample write function ++template ++static __inline__ __device__ void write_sample(const FFCUDAFrame& frame, int x, int y, T sample) ++{ ++ T* ptr = (T*)(frame.data[p] + (y * frame.linesize[p])); ++ ptr[x] = sample; ++} ++ ++// Per-format write functions ++static __inline__ __device__ void write_nv12_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned char)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned char)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned char)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x); ++ ++ write_sample<1>(frame, (x & ~1), y / 2, (unsigned char)chroma.y); ++ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned char)chroma.z); ++} ++ ++static __inline__ __device__ void write_yuv420p_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned char)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned char)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned char)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned char)d.x); ++ ++ write_sample<1>(frame, x / 2, y / 2, (unsigned char)chroma.y); ++ write_sample<2>(frame, x / 2, y / 2, (unsigned char)chroma.z); ++} ++ ++static __inline__ __device__ void write_p016_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned short)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned short)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x); ++ ++ write_sample<1>(frame, (x & ~1), y / 2, (unsigned short)chroma.y); ++ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)chroma.z); ++} ++ ++static __inline__ __device__ void write_p010_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)(a.x << 6)); ++ write_sample<0>(frame, x + 1, y, (unsigned short)(b.x << 6)); ++ write_sample<0>(frame, x, y + 1, (unsigned short)(c.x << 6)); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6)); ++ ++ write_sample<1>(frame, (x & ~1), y / 2, (unsigned short)(chroma.y << 6)); ++ write_sample<1>(frame, (x & ~1) + 1, y / 2, (unsigned short)(chroma.z << 6)); ++} ++ ++static __inline__ __device__ void write_yuv420p16_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)a.x); ++ write_sample<0>(frame, x + 1, y, (unsigned short)b.x); ++ write_sample<0>(frame, x, y + 1, (unsigned short)c.x); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)d.x); ++ ++ write_sample<1>(frame, x / 2, y / 2, (unsigned short)chroma.y); ++ write_sample<2>(frame, x / 2, y / 2, (unsigned short)chroma.z); ++} ++ ++static __inline__ __device__ void write_yuv420p10_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ write_sample<0>(frame, x, y, (unsigned short)(a.x << 6)); ++ write_sample<0>(frame, x + 1, y, (unsigned short)(b.x << 6)); ++ write_sample<0>(frame, x, y + 1, (unsigned short)(c.x << 6)); ++ write_sample<0>(frame, x + 1, y + 1, (unsigned short)(d.x << 6)); ++ ++ write_sample<1>(frame, x / 2, y / 2, (unsigned short)(chroma.y << 6)); ++ write_sample<2>(frame, x / 2, y / 2, (unsigned short)(chroma.z << 6)); ++} ++ ++// Generic write functions ++static __inline__ __device__ void write_2x2(const FFCUDAFrame& frame, int x, int y, ushort3 a, ushort3 b, ushort3 c, ushort3 d, ushort3 chroma) ++{ ++ if (fmt_dst == AV_PIX_FMT_YUV420P) ++ write_yuv420p_2x2(frame, x, y, a, b, c, d, chroma); ++ else if (fmt_dst == AV_PIX_FMT_NV12) ++ write_nv12_2x2(frame, x, y, a, b, c, d, chroma); ++ else if (fmt_dst == AV_PIX_FMT_P016) ++ write_p016_2x2(frame, x, y, a, b, c, d, chroma); ++ else if (fmt_dst == AV_PIX_FMT_P010) ++ write_p010_2x2(frame, x, y, a, b, c, d, chroma); ++} ++ ++static __inline__ __device__ unsigned short sample_to_ushort(float flt) ++{ ++ return (unsigned short)(flt * ((1 << depth_dst) - 1)); ++} ++ ++static __inline__ __device__ ushort3 pixel_to_ushort3(float3 flt) ++{ ++ return make_ushort3(sample_to_ushort(flt.x), ++ sample_to_ushort(flt.y), ++ sample_to_ushort(flt.z)); ++} ++ ++static __inline__ __device__ void write_2x2_flt(const FFCUDAFrame& frame, int x, int y, float3 a, float3 b, float3 c, float3 d) ++{ ++ float3 chroma = get_chroma_sample(a, b, c, d); ++ ++ ushort3 ia = pixel_to_ushort3(a); ++ ushort3 ib = pixel_to_ushort3(b); ++ ushort3 ic = pixel_to_ushort3(c); ++ ushort3 id = pixel_to_ushort3(d); ++ ++ ushort3 ichroma = pixel_to_ushort3(chroma); ++ ++ write_2x2(frame, x, y, ia, ib, ic, id, ichroma); ++} ++ ++#endif /* AVFILTER_CUDA_PIXFMT_H */ ++ +Index: jellyfin-ffmpeg/libavfilter/cuda/shared.h +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/cuda/shared.h +@@ -0,0 +1,32 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_SHARED_H ++#define AVFILTER_CUDA_SHARED_H ++ ++typedef struct FFCUDAFrame { ++ unsigned char *data[4]; ++ int linesize[4]; ++ ++ int width, height; ++ ++ float peak; ++} FFCUDAFrame; ++ ++#endif /* AVFILTER_CUDA_SHARED_H */ ++ +Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.cu +@@ -0,0 +1,201 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "colorspace_common.h" ++#include "pixfmt.h" ++#include "tonemap.h" ++#include "util.h" ++ ++extern __constant__ const enum TonemapAlgorithm tonemap_func; ++extern __constant__ const float tone_param; ++extern __constant__ const float desat_param; ++ ++#define mix(x, y, a) ((x) + ((y) - (x)) * (a)) ++ ++static __inline__ __device__ ++float hable_f(float in) { ++ float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; ++ return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; ++} ++ ++static __inline__ __device__ ++float direct(float s, float peak) { ++ return s; ++} ++ ++static __inline__ __device__ ++float linear(float s, float peak) { ++ return s * tone_param / peak; ++} ++ ++static __inline__ __device__ ++float gamma(float s, float peak) { ++ float p = s > 0.05f ? s / peak : 0.05f / peak; ++ float v = __powf(p, 1.0f / tone_param); ++ return s > 0.05f ? v : (s * v / 0.05f); ++} ++ ++static __inline__ __device__ ++float clip(float s, float peak) { ++ return clamp(s * tone_param, 0.0f, 1.0f); ++} ++ ++static __inline__ __device__ ++float reinhard(float s, float peak) { ++ return s / (s + tone_param) * (peak + tone_param) / peak; ++} ++ ++static __inline__ __device__ ++float hable(float s, float peak) { ++ return hable_f(s) / hable_f(peak); ++} ++ ++static __inline__ __device__ ++float mobius(float s, float peak) { ++ float j = tone_param; ++ float a, b; ++ ++ if (s <= j) ++ return s; ++ ++ a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); ++ b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS); ++ ++ return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); ++} ++ ++static __inline__ __device__ ++float bt2390(float s, float peak, float dst_peak) { ++ float peak_pq = inverse_eotf_st2084(peak); ++ float scale = peak_pq > 0.0f ? (1.0f / peak_pq) : 1.0f; ++ ++ float s_pq = inverse_eotf_st2084(s) * scale; ++ float max_lum = inverse_eotf_st2084(dst_peak) * scale; ++ ++ float ks = 1.5f * max_lum - 0.5f; ++ float tb = (s_pq - ks) / (1.0f - ks); ++ float tb2 = tb * tb; ++ float tb3 = tb2 * tb; ++ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + ++ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + ++ (-2.0f * tb3 + 3.0f * tb2) * max_lum; ++ float sig = mix(pb, s_pq, s_pq < ks); ++ ++ return eotf_st2084(sig * peak_pq); ++} ++ ++static __inline__ __device__ ++float map(float s, float peak, float dst_peak) ++{ ++ switch (tonemap_func) { ++ case TONEMAP_NONE: ++ default: ++ return direct(s, peak); ++ case TONEMAP_LINEAR: ++ return linear(s, peak); ++ case TONEMAP_GAMMA: ++ return gamma(s, peak); ++ case TONEMAP_CLIP: ++ return clip(s, peak); ++ case TONEMAP_REINHARD: ++ return reinhard(s, peak); ++ case TONEMAP_HABLE: ++ return hable(s, peak); ++ case TONEMAP_MOBIUS: ++ return mobius(s, peak); ++ case TONEMAP_BT2390: ++ return bt2390(s, peak, dst_peak); ++ } ++} ++ ++static __inline__ __device__ ++float3 map_one_pixel_rgb(float3 rgb, const FFCUDAFrame& src, const FFCUDAFrame& dst) { ++ float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS); ++ float peak = src.peak; ++ float dst_peak = dst.peak; ++ ++ // Rescale the variables in order to bring it into a representation where ++ // 1.0 represents the dst_peak. This is because all of the tone mapping ++ // algorithms are defined in such a way that they map to the range [0.0, 1.0]. ++ if (dst.peak > 1.0f) { ++ sig *= 1.0f / dst.peak; ++ peak *= 1.0f / dst.peak; ++ } ++ ++ float sig_old = sig; ++ ++ // Desaturate the color using a coefficient dependent on the signal level ++ if (desat_param > 0.0f) { ++ float luma = get_luma_dst(rgb, luma_dst); ++ float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS); ++ coeff = __powf(coeff, 10.0f / desat_param); ++ rgb = mix(rgb, make_float3(luma, luma, luma), make_float3(coeff, coeff, coeff)); ++ } ++ ++ sig = map(sig, peak, dst_peak); ++ ++ sig = min(sig, 1.0f); ++ rgb = rgb * (sig / sig_old); ++ return rgb; ++} ++ ++// Map from source space YUV to destination space RGB ++static __inline__ __device__ ++float3 map_to_dst_space_from_yuv(float3 yuv) { ++ float3 c = yuv2lrgb(yuv); ++ c = lrgb2lrgb(c); ++ return c; ++} ++ ++extern "C" { ++ ++__global__ void tonemap(FFCUDAFrame src, FFCUDAFrame dst) ++{ ++ int xi = blockIdx.x * blockDim.x + threadIdx.x; ++ int yi = blockIdx.y * blockDim.y + threadIdx.y; ++ // each work item process four pixels ++ int x = 2 * xi; ++ int y = 2 * yi; ++ ++ if (y + 1 < src.height && x + 1 < src.width) ++ { ++ float3 yuv0 = read_px_flt(src, x, y); ++ float3 yuv1 = read_px_flt(src, x + 1, y); ++ float3 yuv2 = read_px_flt(src, x, y + 1); ++ float3 yuv3 = read_px_flt(src, x + 1, y + 1); ++ ++ float3 c0 = map_to_dst_space_from_yuv(yuv0); ++ float3 c1 = map_to_dst_space_from_yuv(yuv1); ++ float3 c2 = map_to_dst_space_from_yuv(yuv2); ++ float3 c3 = map_to_dst_space_from_yuv(yuv3); ++ ++ c0 = map_one_pixel_rgb(c0, src, dst); ++ c1 = map_one_pixel_rgb(c1, src, dst); ++ c2 = map_one_pixel_rgb(c2, src, dst); ++ c3 = map_one_pixel_rgb(c3, src, dst); ++ ++ yuv0 = lrgb2yuv(c0); ++ yuv1 = lrgb2yuv(c1); ++ yuv2 = lrgb2yuv(c2); ++ yuv3 = lrgb2yuv(c3); ++ ++ write_2x2_flt(dst, x, y, yuv0, yuv1, yuv2, yuv3); ++ } ++} ++ ++} +Index: jellyfin-ffmpeg/libavfilter/cuda/tonemap.h +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/cuda/tonemap.h +@@ -0,0 +1,35 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_TONEMAP_H ++#define AVFILTER_CUDA_TONEMAP_H ++ ++enum TonemapAlgorithm { ++ TONEMAP_NONE, ++ TONEMAP_LINEAR, ++ TONEMAP_GAMMA, ++ TONEMAP_CLIP, ++ TONEMAP_REINHARD, ++ TONEMAP_HABLE, ++ TONEMAP_MOBIUS, ++ TONEMAP_BT2390, ++ TONEMAP_MAX, ++}; ++ ++#endif /* AVFILTER_CUDA_TONEMAP_H */ ++ +Index: jellyfin-ffmpeg/libavfilter/cuda/util.h +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/cuda/util.h +@@ -0,0 +1,55 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVFILTER_CUDA_UTIL_H ++#define AVFILTER_CUDA_UTIL_H ++ ++static inline __device__ float3 operator+(const float3 &a, const float3 &b) { ++ return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); ++} ++ ++static inline __device__ float3 operator+(const float3 &a, float b) { ++ return make_float3(a.x + b, a.y + b, a.z + b); ++} ++ ++static inline __device__ float3 operator-(const float3 &a, const float3 &b) { ++ return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); ++} ++ ++static inline __device__ float3 operator-(const float3 &a, float b) { ++ return make_float3(a.x - b, a.y - b, a.z - b); ++} ++ ++static inline __device__ float3 operator*(const float3 &a, const float3 &b) { ++ return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); ++} ++ ++static inline __device__ float3 operator*(const float3 &a, float b) { ++ return make_float3(a.x * b, a.y * b, a.z * b); ++} ++ ++static inline __device__ float3 operator/(const float3 &a, const float3 &b) { ++ return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); ++} ++ ++static inline __device__ float3 operator/(const float3 &a, float b) { ++ return make_float3(a.x / b, a.y / b, a.z / b); ++} ++ ++#endif /* AVFILTER_CUDA_UTIL_H */ ++ +Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/vf_tonemap_cuda.c +@@ -0,0 +1,720 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include ++ ++#include "libavutil/avassert.h" ++#include "libavutil/avstring.h" ++#include "libavutil/bprint.h" ++#include "libavutil/common.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_cuda_internal.h" ++#include "libavutil/cuda_check.h" ++#include "libavutil/internal.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "avfilter.h" ++#include "colorspace.h" ++#include "cuda/host_util.h" ++#include "cuda/shared.h" ++#include "cuda/tonemap.h" ++#include "formats.h" ++#include "internal.h" ++#include "scale_eval.h" ++#include "video.h" ++ ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016 ++}; ++ ++#define REF_WHITE_BT2390 203.0f ++#define REF_WHITE_DEFAULT 100.0f ++ ++#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) ++#define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1)) ++#define NUM_BUFFERS 2 ++#define BLOCKX 32 ++#define BLOCKY 16 ++ ++#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) ++ ++typedef struct TonemapCUDAContext { ++ const AVClass *class; ++ ++ AVCUDADeviceContext *hwctx; ++ ++ enum AVPixelFormat in_fmt, out_fmt; ++ ++ enum AVColorTransferCharacteristic trc, in_trc, out_trc; ++ enum AVColorSpace spc, in_spc, out_spc; ++ enum AVColorPrimaries pri, in_pri, out_pri; ++ enum AVColorRange range, in_range, out_range; ++ enum AVChromaLocation in_chroma_loc, out_chroma_loc; ++ ++ AVBufferRef *frames_ctx; ++ AVFrame *frame; ++ ++ AVFrame *tmp_frame; ++ ++ /** ++ * Output sw format. AV_PIX_FMT_NONE for no conversion. ++ */ ++ enum AVPixelFormat format; ++ char *format_str; ++ ++ CUcontext cu_ctx; ++ CUmodule cu_module; ++ ++ CUfunction cu_func; ++ ++ CUdeviceptr srcBuffer; ++ CUdeviceptr dstBuffer; ++ ++ enum TonemapAlgorithm tonemap; ++ double ref_white; ++ double param; ++ double desat_param; ++ double peak; ++ double dst_peak; ++ double scene_threshold; ++ ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++} TonemapCUDAContext; ++ ++static av_cold int init(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ if (!strcmp(s->format_str, "same")) { ++ s->format = AV_PIX_FMT_NONE; ++ } else { ++ s->format = av_get_pix_fmt(s->format_str); ++ if (s->format == AV_PIX_FMT_NONE) { ++ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ s->frame = av_frame_alloc(); ++ if (!s->frame) ++ return AVERROR(ENOMEM); ++ ++ s->tmp_frame = av_frame_alloc(); ++ if (!s->tmp_frame) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static av_cold void uninit(AVFilterContext *ctx) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ if (s->hwctx) { ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; ++ ++ CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ ++ if (s->cu_module) { ++ CHECK_CU(cu->cuModuleUnload(s->cu_module)); ++ s->cu_func = NULL; ++ s->cu_module = NULL; ++ } ++ ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ } ++ ++ av_frame_free(&s->frame); ++ av_buffer_unref(&s->frames_ctx); ++ av_frame_free(&s->tmp_frame); ++} ++ ++static int query_formats(AVFilterContext *ctx) ++{ ++ static const enum AVPixelFormat pixel_formats[] = { ++ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, ++ }; ++ AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); ++ ++ return ff_set_common_formats(ctx, pix_fmts); ++} ++ ++static av_cold int init_stage(TonemapCUDAContext *s, AVBufferRef *device_ctx, ++ AVFilterLink *outlink) ++{ ++ AVBufferRef *out_ref = NULL; ++ AVHWFramesContext *out_ctx; ++ int ret; ++ ++ out_ref = av_hwframe_ctx_alloc(device_ctx); ++ if (!out_ref) ++ return AVERROR(ENOMEM); ++ out_ctx = (AVHWFramesContext*)out_ref->data; ++ ++ out_ctx->format = AV_PIX_FMT_CUDA; ++ out_ctx->sw_format = s->out_fmt; ++ out_ctx->width = FFALIGN(outlink->w, 32); ++ out_ctx->height = FFALIGN(outlink->h, 32); ++ ++ ret = av_hwframe_ctx_init(out_ref); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_unref(s->frame); ++ ret = av_hwframe_get_buffer(out_ref, s->frame, 0); ++ if (ret < 0) ++ goto fail; ++ ++ s->frame->width = outlink->w; ++ s->frame->height = outlink->h; ++ ++ av_buffer_unref(&s->frames_ctx); ++ s->frames_ctx = out_ref; ++ ++ return 0; ++fail: ++ av_buffer_unref(&out_ref); ++ return ret; ++} ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ int i; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static av_cold int init_processing_chain(AVFilterContext *ctx, AVFilterLink *outlink) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ ++ AVHWFramesContext *in_frames_ctx; ++ ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ const AVPixFmtDescriptor *in_desc; ++ const AVPixFmtDescriptor *out_desc; ++ int ret; ++ ++ /* check that we have a hw context */ ++ if (!ctx->inputs[0]->hw_frames_ctx) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); ++ return AVERROR(EINVAL); ++ } ++ in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format; ++ in_desc = av_pix_fmt_desc_get(in_format); ++ out_desc = av_pix_fmt_desc_get(out_format); ++ ++ if (!format_is_supported(in_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(in_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!format_is_supported(out_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!(in_desc->comp[0].depth == 10 || ++ in_desc->comp[0].depth == 16)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", ++ in_desc->comp[0].depth); ++ return AVERROR(ENOSYS); ++ } ++ ++ s->in_fmt = in_format; ++ s->out_fmt = out_format; ++ s->in_desc = in_desc; ++ s->out_desc = out_desc; ++ ++ ret = init_stage(s, in_frames_ctx->device_ref, outlink); ++ if (ret < 0) ++ return ret; ++ ++ ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); ++ if (!ctx->outputs[0]->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static const struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = { ++ [AVCOL_PRI_BT709] = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 }, ++ [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 }, ++}; ++ ++static const struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = { ++ [AVCOL_PRI_BT709] = { 0.3127, 0.3290 }, ++ [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 }, ++}; ++ ++static int get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, ++ double rgb2rgb[3][3]) { ++ double rgb2xyz[3][3], xyz2rgb[3][3]; ++ ++ ff_fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz); ++ ff_matrix_invert_3x3(rgb2xyz, xyz2rgb); ++ ff_fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz); ++ ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb); ++ ++ return 0; ++} ++ ++static av_cold int compile(AVFilterLink *inlink) ++{ ++ int ret = 0; ++ AVFilterContext *ctx = inlink->dst; ++ TonemapCUDAContext *s = ctx->priv; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; ++ AVBPrint constants; ++ CUlinkState link_state; ++ void *cubin; ++ size_t cubin_size; ++ double rgb_matrix[3][3], yuv_matrix[3][3], rgb2rgb_matrix[3][3]; ++ const struct LumaCoefficients *in_coeffs, *out_coeffs; ++ enum AVColorTransferCharacteristic in_trc = s->in_trc, out_trc = s->out_trc; ++ enum AVColorSpace in_spc = s->in_spc, out_spc = s->out_spc; ++ enum AVColorPrimaries in_pri = s->in_pri, out_pri = s->out_pri; ++ enum AVColorRange in_range = s->in_range, out_range = s->out_range; ++ char info_log[4096], error_log[4096]; ++ CUjit_option options[] = {CU_JIT_INFO_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; ++ void *option_values[] = {&info_log, &error_log, (void*)(intptr_t)sizeof(info_log), (void*)(intptr_t)sizeof(error_log)}; ++ ++ extern char tonemap_ptx[]; ++ ++ switch(s->tonemap) { ++ case TONEMAP_GAMMA: ++ if (isnan(s->param)) ++ s->param = 1.8f; ++ break; ++ case TONEMAP_REINHARD: ++ if (!isnan(s->param)) ++ s->param = (1.0f - s->param) / s->param; ++ break; ++ case TONEMAP_MOBIUS: ++ if (isnan(s->param)) ++ s->param = 0.3f; ++ break; ++ } ++ ++ if (isnan(s->param)) ++ s->param = 1.0f; ++ ++ s->ref_white = s->tonemap == TONEMAP_BT2390 ? REF_WHITE_BT2390 ++ : REF_WHITE_DEFAULT; ++ ++ s->dst_peak = 1.0f; ++ ++ if (in_trc == AVCOL_TRC_UNSPECIFIED) ++ in_trc = AVCOL_TRC_SMPTE2084; ++ if (out_trc == AVCOL_TRC_UNSPECIFIED) ++ out_trc = AVCOL_TRC_BT709; ++ ++ if (in_spc == AVCOL_SPC_UNSPECIFIED) ++ in_spc = AVCOL_SPC_BT2020_NCL; ++ if (out_spc == AVCOL_SPC_UNSPECIFIED) ++ out_spc = AVCOL_SPC_BT709; ++ ++ if (in_pri == AVCOL_PRI_UNSPECIFIED) ++ in_pri = AVCOL_PRI_BT2020; ++ if (out_pri == AVCOL_PRI_UNSPECIFIED) ++ out_pri = AVCOL_PRI_BT709; ++ ++ if (in_range == AVCOL_RANGE_UNSPECIFIED) ++ in_range = AVCOL_RANGE_MPEG; ++ if (out_range == AVCOL_RANGE_UNSPECIFIED) ++ out_range = AVCOL_RANGE_MPEG; ++ ++ av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n", ++ av_color_transfer_name(in_trc), ++ av_color_transfer_name(out_trc)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n", ++ av_color_space_name(in_spc), ++ av_color_space_name(out_spc)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n", ++ av_color_primaries_name(in_pri), ++ av_color_primaries_name(out_pri)); ++ av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n", ++ av_color_range_name(in_range), ++ av_color_range_name(out_range)); ++ ++ if (!(in_coeffs = ff_get_luma_coefficients(in_spc))) ++ return AVERROR(EINVAL); ++ ++ ff_fill_rgb2yuv_table(in_coeffs, yuv_matrix); ++ ff_matrix_invert_3x3(yuv_matrix, rgb_matrix); ++ ++ if (!(out_coeffs = ff_get_luma_coefficients(out_spc))) ++ return AVERROR(EINVAL); ++ ++ ff_fill_rgb2yuv_table(out_coeffs, yuv_matrix); ++ ++ if ((ret = get_rgb2rgb_matrix(in_pri, out_pri, rgb2rgb_matrix)) < 0) ++ return ret; ++ ++ av_bprint_init(&constants, 2048, AV_BPRINT_SIZE_UNLIMITED); ++ ++ av_bprintf(&constants, ".version 3.2\n"); ++ av_bprintf(&constants, ".target sm_30\n"); ++ av_bprintf(&constants, ".address_size %zu\n", sizeof(void*) * 8); ++ ++#define CONSTANT_A(decl, align, ...) \ ++ av_bprintf(&constants, ".visible .const .align " #align " " decl ";\n", __VA_ARGS__) ++#define CONSTANT(decl, ...) CONSTANT_A(decl, 4, __VA_ARGS__) ++#define CONSTANT_M(a, b) \ ++ CONSTANT(".f32 " a "[] = {%f, %f, %f, %f, %f, %f, %f, %f, %f}", \ ++ b[0][0], b[0][1], b[0][2], \ ++ b[1][0], b[1][1], b[1][2], \ ++ b[2][0], b[2][1], b[2][2]) ++#define CONSTANT_C(a, b) \ ++ CONSTANT(".f32 " a "[] = {%f, %f, %f}", \ ++ b->cr, b->cg, b->cb) ++ ++ CONSTANT(".u32 depth_src = %i", (int)s->in_desc->comp[0].depth); ++ CONSTANT(".u32 depth_dst = %i", (int)s->out_desc->comp[0].depth); ++ CONSTANT(".u32 fmt_src = %i", (int)s->in_fmt); ++ CONSTANT(".u32 fmt_dst = %i", (int)s->out_fmt); ++ CONSTANT(".u32 range_src = %i", (int)in_range); ++ CONSTANT(".u32 range_dst = %i", (int)out_range); ++ CONSTANT(".u32 trc_src = %i", (int)in_trc); ++ CONSTANT(".u32 trc_dst = %i", (int)out_trc); ++ CONSTANT(".u32 chroma_loc_src = %i", (int)s->in_chroma_loc); ++ CONSTANT(".u32 chroma_loc_dst = %i", (int)s->out_chroma_loc); ++ CONSTANT(".u32 tonemap_func = %i", (int)s->tonemap); ++ CONSTANT(".f32 ref_white = %f", s->ref_white); ++ CONSTANT(".f32 tone_param = %f", s->param); ++ CONSTANT(".f32 desat_param = %f", s->desat_param); ++ CONSTANT_M("rgb_matrix", rgb_matrix); ++ CONSTANT_M("yuv_matrix", yuv_matrix); ++ CONSTANT_A(".u8 rgb2rgb_passthrough = %i", 1, in_pri == out_pri); ++ CONSTANT_M("rgb2rgb_matrix", rgb2rgb_matrix); ++ CONSTANT_C("luma_src", in_coeffs); ++ CONSTANT_C("luma_dst", out_coeffs); ++ ++ ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ++ if (ret < 0) ++ return ret; ++ ++ if (s->cu_module) { ++ ret = CHECK_CU(cu->cuModuleUnload(s->cu_module)); ++ if (ret < 0) ++ goto fail; ++ ++ s->cu_func = NULL; ++ s->cu_module = NULL; ++ } ++ ++ ret = CHECK_CU(cu->cuLinkCreate(sizeof(options) / sizeof(options[0]), options, option_values, &link_state)); ++ if (ret < 0) ++ goto fail; ++ ++ ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, constants.str, ++ constants.len, "constants", 0, NULL, NULL)); ++ if (ret < 0) ++ goto fail2; ++ ++ ret = CHECK_CU(cu->cuLinkAddData(link_state, CU_JIT_INPUT_PTX, tonemap_ptx, ++ strlen(tonemap_ptx), "tonemap.ptx", 0, NULL, NULL)); ++ if (ret < 0) ++ goto fail2; ++ ++ ret = CHECK_CU(cu->cuLinkComplete(link_state, &cubin, &cubin_size)); ++ if (ret < 0) ++ goto fail2; ++ ++ ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, cubin)); ++ if (ret < 0) ++ goto fail2; ++ ++ CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, "tonemap")); ++ if (ret < 0) ++ goto fail2; ++ ++fail2: ++ CHECK_CU(cu->cuLinkDestroy(link_state)); ++ ++fail: ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ ++ av_bprint_finalize(&constants, NULL); ++ ++ if ((intptr_t)option_values[2] > 0) ++ av_log(ctx, AV_LOG_INFO, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[2], info_log); ++ ++ if ((intptr_t)option_values[3] > 0) ++ av_log(ctx, AV_LOG_ERROR, "CUDA linker output: %.*s\n", (int)(intptr_t)option_values[3], error_log); ++ ++ return ret; ++} ++ ++static av_cold int config_props(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AVFilterLink *inlink = outlink->src->inputs[0]; ++ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; ++ TonemapCUDAContext *s = ctx->priv; ++ int ret; ++ ++ s->hwctx = device_hwctx; ++ ++ outlink->w = inlink->w; ++ outlink->h = inlink->h; ++ ++ ret = init_processing_chain(ctx, outlink); ++ if (ret < 0) ++ return ret; ++ ++ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; ++ ++ return 0; ++} ++ ++static int run_kernel(AVFilterContext *ctx, ++ AVFrame *out, AVFrame *in) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ FFCUDAFrame src, dst; ++ void *args_uchar[] = { &src, &dst }; ++ int ret; ++ ++ ret = ff_make_cuda_frame(&src, in); ++ if (ret < 0) ++ goto fail; ++ ++ ret = ff_make_cuda_frame(&dst, out); ++ if (ret < 0) ++ goto fail; ++ ++ src.peak = s->peak; ++ if (!src.peak) { ++ src.peak = ff_determine_signal_peak(in); ++ av_log(s, AV_LOG_DEBUG, "Computed signal peak: %f\n", src.peak); ++ } ++ ++ dst.peak = s->dst_peak; ++ ++ ret = CHECK_CU(cu->cuLaunchKernel(s->cu_func, ++ DIV_UP(src.width / 2, BLOCKX), DIV_UP(src.height / 2, BLOCKY), 1, ++ BLOCKX, BLOCKY, 1, 0, s->hwctx->stream, args_uchar, NULL)); ++ ++fail: ++ return ret; ++} ++ ++static int do_tonemap(AVFilterContext *ctx, AVFrame *out, AVFrame *in) ++{ ++ TonemapCUDAContext *s = ctx->priv; ++ AVFrame *src = in; ++ int ret; ++ ++ ret = run_kernel(ctx, s->frame, src); ++ if (ret < 0) ++ return ret; ++ ++ src = s->frame; ++ ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); ++ if (ret < 0) ++ return ret; ++ ++ av_frame_move_ref(out, s->frame); ++ av_frame_move_ref(s->frame, s->tmp_frame); ++ ++ s->frame->width = in->width; ++ s->frame->height = in->height; ++ ++ ret = av_frame_copy_props(out, in); ++ if (ret < 0) ++ return ret; ++ ++ if (s->out_trc != out->color_trc || ++ s->out_spc != out->colorspace || ++ s->out_pri != out->color_primaries || ++ s->out_range != out->color_range || ++ s->out_chroma_loc != out->chroma_location) { ++ out->color_trc = s->out_trc; ++ out->colorspace = s->out_spc; ++ out->color_primaries = s->out_pri; ++ out->color_range = s->out_range; ++ out->chroma_location = s->out_chroma_loc; ++ } ++ ++ return 0; ++} ++ ++static int filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *ctx = link->dst; ++ TonemapCUDAContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ CudaFunctions *cu = s->hwctx->internal->cuda_dl; ++ ++ AVFrame *out = NULL; ++ CUcontext dummy; ++ int ret = 0; ++ ++ out = av_frame_alloc(); ++ if (!out) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ if (!(in->color_trc == AVCOL_TRC_SMPTE2084 || ++ in->color_trc == AVCOL_TRC_ARIB_STD_B67)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input transfer characteristic: %s\n", ++ av_color_transfer_name(in->color_trc)); ++ ret = AVERROR(EINVAL); ++ goto fail; ++ } ++ ++ if (!s->cu_func || ++ s->in_trc != in->color_trc || ++ s->in_spc != in->colorspace || ++ s->in_pri != in->color_primaries || ++ s->in_range != in->color_range || ++ s->in_chroma_loc != in->chroma_location) { ++ s->in_trc = in->color_trc; ++ s->in_spc = in->colorspace; ++ s->in_pri = in->color_primaries; ++ s->in_range = in->color_range; ++ s->in_chroma_loc = in->chroma_location; ++ ++ s->out_trc = s->trc; ++ s->out_spc = s->spc; ++ s->out_pri = s->pri; ++ s->out_range = s->range; ++ s->out_chroma_loc = s->in_chroma_loc; ++ ++ if ((ret = compile(link)) < 0) ++ goto fail; ++ } ++ ++ ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); ++ if (ret < 0) ++ goto fail; ++ ++ ret = do_tonemap(ctx, out, in); ++ ++ CHECK_CU(cu->cuCtxPopCurrent(&dummy)); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_free(&in); ++ ++ ff_update_hdr_metadata(out, s->dst_peak); ++ ++ return ff_filter_frame(outlink, out); ++fail: ++ av_frame_free(&in); ++ av_frame_free(&out); ++ return ret; ++} ++ ++#define OFFSET(x) offsetof(TonemapCUDAContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) ++static const AVOption options[] = { ++ { "tonemap", "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" }, ++ { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, "tonemap" }, ++ { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, "tonemap" }, ++ { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, "tonemap" }, ++ { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, "tonemap" }, ++ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, "tonemap" }, ++ { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, "tonemap" }, ++ { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, "tonemap" }, ++ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_BT2390}, 0, 0, FLAGS, "tonemap" }, ++ { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, ++ { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, "transfer" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, "transfer" }, ++ { "matrix", "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" }, ++ { "m", "set colorspace matrix", OFFSET(spc), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_BT709}, -1, INT_MAX, FLAGS, "matrix" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, "matrix" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, "matrix" }, ++ { "primaries", "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" }, ++ { "p", "set color primaries", OFFSET(pri), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_BT709}, -1, INT_MAX, FLAGS, "primaries" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, "primaries" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, "primaries" }, ++ { "range", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" }, ++ { "r", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_MPEG}, -1, INT_MAX, FLAGS, "range" }, ++ { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, ++ { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, ++ { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, ++ { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, ++ { "format", "Output format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, ++ { "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, ++ { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, ++ { "desat", "desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "threshold", "scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, ++ { NULL }, ++}; ++ ++static const AVClass tonemap_cuda_class = { ++ .class_name = "tonemap_cuda", ++ .item_name = av_default_item_name, ++ .option = options, ++ .version = LIBAVUTIL_VERSION_INT, ++}; ++ ++static const AVFilterPad inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = filter_frame, ++ }, ++ { NULL } ++}; ++ ++static const AVFilterPad outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = config_props, ++ }, ++ { NULL } ++}; ++ ++AVFilter ff_vf_tonemap_cuda = { ++ .name = "tonemap_cuda", ++ .description = NULL_IF_CONFIG_SMALL("GPU accelerated HDR to SDR tonemapping"), ++ ++ .init = init, ++ .uninit = uninit, ++ .query_formats = query_formats, ++ ++ .priv_size = sizeof(TonemapCUDAContext), ++ .priv_class = &tonemap_cuda_class, ++ ++ .inputs = inputs, ++ .outputs = outputs, ++ ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/debian/patches/series b/debian/patches/series index 2b13748bf23..5f90ee2c53b 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,2 +1,3 @@ 0001-add-fixes-for-segement-muxer.patch 0002-add-cuda-pixfmt-converter-impl.patch +0003-add-cuda-tonemap-impl.patch From 0a67055b046aff93c77b0af4b8aab7f5c9e7d97b Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 5 Dec 2021 00:07:28 +0800 Subject: [PATCH 13/41] add amf refactor and hevc 10-bit encoding --- ...mf-refactor-and-hevc-10-bit-encoding.patch | 2513 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 2514 insertions(+) create mode 100644 debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch diff --git a/debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch b/debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch new file mode 100644 index 00000000000..689f8a818e8 --- /dev/null +++ b/debian/patches/0004-add-amf-refactor-and-hevc-10-bit-encoding.patch @@ -0,0 +1,2513 @@ +Index: jellyfin-ffmpeg/libavcodec/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/Makefile ++++ jellyfin-ffmpeg/libavcodec/Makefile +@@ -63,7 +63,7 @@ OBJS = ac3_parser.o + OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o + OBJS-$(CONFIG_AC3DSP) += ac3dsp.o ac3.o ac3tab.o + OBJS-$(CONFIG_ADTS_HEADER) += adts_header.o mpeg4audio.o +-OBJS-$(CONFIG_AMF) += amfenc.o ++OBJS-$(CONFIG_AMF) += amfenc.o amf.o + OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o + OBJS-$(CONFIG_ATSC_A53) += atsc_a53.o + OBJS-$(CONFIG_AUDIODSP) += audiodsp.o +@@ -1196,7 +1196,7 @@ SKIPHEADERS + + aacenc_quantization_misc.h \ + $(ARCH)/vp56_arith.h \ + +-SKIPHEADERS-$(CONFIG_AMF) += amfenc.h ++SKIPHEADERS-$(CONFIG_AMF) += amfenc.h amf.h + SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h + SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h + SKIPHEADERS-$(CONFIG_JNI) += ffjni.h +Index: jellyfin-ffmpeg/libavcodec/amf.c +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavcodec/amf.c +@@ -0,0 +1,371 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "amf.h" ++ ++#define FFMPEG_AMF_WRITER_ID L"ffmpeg_amf" ++ ++const FormatMap format_map[] = ++{ ++ { AV_PIX_FMT_NONE, AMF_SURFACE_UNKNOWN }, ++ { AV_PIX_FMT_NV12, AMF_SURFACE_NV12 }, ++ { AV_PIX_FMT_P010, AMF_SURFACE_P010 }, ++ { AV_PIX_FMT_BGR0, AMF_SURFACE_BGRA }, ++ { AV_PIX_FMT_RGB0, AMF_SURFACE_RGBA }, ++ { AV_PIX_FMT_GRAY8, AMF_SURFACE_GRAY8 }, ++ { AV_PIX_FMT_YUV420P, AMF_SURFACE_YUV420P }, ++ { AV_PIX_FMT_YUYV422, AMF_SURFACE_YUY2 }, ++}; ++ ++enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt) ++{ ++ int i; ++ for (i = 0; i < amf_countof(format_map); i++) { ++ if (format_map[i].av_format == fmt) { ++ return format_map[i].amf_format; ++ } ++ } ++ return AMF_SURFACE_UNKNOWN; ++} ++ ++enum AVPixelFormat amf_to_av_format(enum AMF_SURFACE_FORMAT fmt) ++{ ++ int i; ++ for (i = 0; i < amf_countof(format_map); i++) { ++ if (format_map[i].amf_format == fmt) { ++ return format_map[i].av_format; ++ } ++ } ++ return AMF_SURFACE_UNKNOWN; ++} ++ ++const ColorTransferMap color_trc_map[] = ++{ ++ { AVCOL_TRC_RESERVED0, AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED }, ++ { AVCOL_TRC_BT709, AMF_COLOR_TRANSFER_CHARACTERISTIC_BT709 }, ++ { AVCOL_TRC_UNSPECIFIED, AMF_COLOR_TRANSFER_CHARACTERISTIC_UNSPECIFIED }, ++ { AVCOL_TRC_RESERVED, AMF_COLOR_TRANSFER_CHARACTERISTIC_RESERVED }, ++ { AVCOL_TRC_GAMMA22, AMF_COLOR_TRANSFER_CHARACTERISTIC_GAMMA22 }, ++ { AVCOL_TRC_GAMMA28, AMF_COLOR_TRANSFER_CHARACTERISTIC_GAMMA28 }, ++ { AVCOL_TRC_SMPTE170M, AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE170M }, ++ { AVCOL_TRC_SMPTE240M, AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE240M }, ++ { AVCOL_TRC_LINEAR, AMF_COLOR_TRANSFER_CHARACTERISTIC_LINEAR }, ++ { AVCOL_TRC_LOG, AMF_COLOR_TRANSFER_CHARACTERISTIC_LOG }, ++ { AVCOL_TRC_LOG_SQRT, AMF_COLOR_TRANSFER_CHARACTERISTIC_LOG_SQRT }, ++ { AVCOL_TRC_IEC61966_2_4, AMF_COLOR_TRANSFER_CHARACTERISTIC_IEC61966_2_4 }, ++ { AVCOL_TRC_BT1361_ECG, AMF_COLOR_TRANSFER_CHARACTERISTIC_BT1361_ECG }, ++ { AVCOL_TRC_IEC61966_2_1, AMF_COLOR_TRANSFER_CHARACTERISTIC_IEC61966_2_1 }, ++ { AVCOL_TRC_BT2020_10, AMF_COLOR_TRANSFER_CHARACTERISTIC_BT2020_10 }, ++ { AVCOL_TRC_BT2020_12, AMF_COLOR_TRANSFER_CHARACTERISTIC_BT2020_12 }, ++ { AVCOL_TRC_SMPTE2084, AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE2084 }, ++ { AVCOL_TRC_SMPTE428, AMF_COLOR_TRANSFER_CHARACTERISTIC_SMPTE428 }, ++ { AVCOL_TRC_ARIB_STD_B67, AMF_COLOR_TRANSFER_CHARACTERISTIC_ARIB_STD_B67 }, ++}; ++ ++enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM amf_av_to_amf_color_trc(enum AVColorTransferCharacteristic trc) ++{ ++ int i; ++ for (i = 0; i < amf_countof(color_trc_map); i++) { ++ if (color_trc_map[i].av_color_trc == trc) { ++ return color_trc_map[i].amf_color_trc; ++ } ++ } ++ return AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED; ++} ++ ++const ColorPrimariesMap color_prm_map[] = ++{ ++ { AVCOL_PRI_RESERVED0, AMF_COLOR_PRIMARIES_UNDEFINED }, ++ { AVCOL_PRI_BT709, AMF_COLOR_PRIMARIES_BT709 }, ++ { AVCOL_PRI_UNSPECIFIED, AMF_COLOR_PRIMARIES_UNSPECIFIED }, ++ { AVCOL_PRI_RESERVED, AMF_COLOR_PRIMARIES_RESERVED }, ++ { AVCOL_PRI_BT470M, AMF_COLOR_PRIMARIES_BT470M }, ++ { AVCOL_PRI_BT470BG, AMF_COLOR_PRIMARIES_BT470BG }, ++ { AVCOL_PRI_SMPTE170M, AMF_COLOR_PRIMARIES_SMPTE170M }, ++ { AVCOL_PRI_SMPTE240M, AMF_COLOR_PRIMARIES_SMPTE240M }, ++ { AVCOL_PRI_FILM, AMF_COLOR_PRIMARIES_FILM }, ++ { AVCOL_PRI_BT2020, AMF_COLOR_PRIMARIES_BT2020 }, ++ { AVCOL_PRI_SMPTE428, AMF_COLOR_PRIMARIES_SMPTE428 }, ++ { AVCOL_PRI_SMPTE431, AMF_COLOR_PRIMARIES_SMPTE431 }, ++ { AVCOL_PRI_SMPTE432, AMF_COLOR_PRIMARIES_SMPTE432 }, ++ { AVCOL_PRI_JEDEC_P22, AMF_COLOR_PRIMARIES_JEDEC_P22 }, ++}; ++ ++enum AMF_COLOR_PRIMARIES_ENUM amf_av_to_amf_color_prm(enum AVColorPrimaries prm) ++{ ++ int i; ++ for (i = 0; i < amf_countof(color_prm_map); i++) { ++ if (color_prm_map[i].av_color_prm == prm) { ++ return color_prm_map[i].amf_color_prm; ++ } ++ } ++ return AMF_COLOR_PRIMARIES_UNDEFINED; ++} ++ ++static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter *pThis, ++ const wchar_t *scope, const wchar_t *message) ++{ ++ AVAMFLogger *logger = (AVAMFLogger*)pThis; ++ av_log(logger->avcl, AV_LOG_DEBUG, "%ls: %ls", scope, message); ++} ++ ++static void AMF_CDECL_CALL AMFTraceWriter_Flush(AMFTraceWriter *pThis) {} ++ ++static AMFTraceWriterVtbl tracer_vtbl = ++{ ++ .Write = AMFTraceWriter_Write, ++ .Flush = AMFTraceWriter_Flush, ++}; ++ ++int amf_load_library(AVAMFContext *ctx) ++{ ++ AMFInit_Fn init_fun; ++ AMFQueryVersion_Fn version_fun; ++ AMF_RESULT res; ++ ++ ctx->library = dlopen(AMF_DLL_NAMEA, RTLD_NOW | RTLD_LOCAL); ++ AMF_RETURN_IF_FALSE(ctx->avclass, ctx->library != NULL, ++ AVERROR_UNKNOWN, "DLL %s failed to open\n", AMF_DLL_NAMEA); ++ ++ init_fun = (AMFInit_Fn)dlsym(ctx->library, AMF_INIT_FUNCTION_NAME); ++ AMF_RETURN_IF_FALSE(ctx->avclass, init_fun != NULL, ++ AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_INIT_FUNCTION_NAME); ++ ++ version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library, AMF_QUERY_VERSION_FUNCTION_NAME); ++ AMF_RETURN_IF_FALSE(ctx->avclass, version_fun != NULL, ++ AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME); ++ ++ res = version_fun(&ctx->version); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_QUERY_VERSION_FUNCTION_NAME, res); ++ ++ res = init_fun(AMF_FULL_VERSION, &ctx->factory); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_INIT_FUNCTION_NAME, res); ++ ++ res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res); ++ ++ res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res); ++ ++ return 0; ++} ++ ++int amf_create_context(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ // configure AMF logger ++ ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, !!ctx->log_to_dbg); ++ if (ctx->log_to_dbg) ++ ctx->trace->pVtbl->SetWriterLevel(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE); ++ ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_CONSOLE, 0); ++ ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE); ++ ++ // connect AMF logger to av_log ++ ctx->logger.vtbl = &tracer_vtbl; ++ ctx->logger.avcl = ctx->avclass; ++ ctx->trace->pVtbl->RegisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID, (AMFTraceWriter*)&ctx->logger, 1); ++ ctx->trace->pVtbl->SetWriterLevel(ctx->trace, FFMPEG_AMF_WRITER_ID, AMF_TRACE_TRACE); ++ ++ res = ctx->factory->pVtbl->CreateContext(ctx->factory, &ctx->context); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, ++ AVERROR_UNKNOWN, "CreateContext() failed with error %d\n", res); ++ ++ return 0; ++} ++ ++void amf_unload_library(AVAMFContext *ctx) ++{ ++ if (ctx->context) { ++ ctx->context->pVtbl->Terminate(ctx->context); ++ ctx->context->pVtbl->Release(ctx->context); ++ ctx->context = NULL; ++ } ++ if (ctx->trace) { ++ ctx->trace->pVtbl->UnregisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID); ++ } ++ if (ctx->library) { ++ dlclose(ctx->library); ++ ctx->library = NULL; ++ } ++ ctx->trace = NULL; ++ ctx->debug = NULL; ++ ctx->factory = NULL; ++ ctx->version = 0; ++} ++ ++int amf_context_init_dx11(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_1); ++ if (res != AMF_OK) { ++ res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_0); ++ } ++ ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via DX11\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX11 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default DX11 device: %d\n", res); ++ } ++ return res; ++} ++ ++int amf_context_init_dx9(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitDX9(ctx->context, NULL); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via DX9\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX9 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default DX9 device: %d\n", res); ++ } ++ return res; ++} ++ ++int amf_context_init_vulkan(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ AMFContext1* context1 = NULL; ++ AMFGuid guid = IID_AMFContext1(); ++ ++ res = ctx->context->pVtbl->QueryInterface(ctx->context, &guid, (void**)&context1); ++ AMF_RETURN_IF_FALSE(ctx->avclass, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext1() failed with error %d\n", res); ++ ++ res = context1->pVtbl->InitVulkan(context1, NULL); ++ context1->pVtbl->Release(context1); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via Vulkan\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via Vulkan is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default Vulkan device: %d\n", res); ++ } ++ return res; ++} ++ ++int amf_context_init_opencl(AVAMFContext *ctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitOpenCL(ctx->context, NULL); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF initialization succeeded via OpenCL\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via OpenCL is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to initialize on the default OpenCL device: %d\n", res); ++ } ++ return res; ++} ++ ++#if CONFIG_D3D11VA ++int amf_context_derive_dx11(AVAMFContext *ctx, AVD3D11VADeviceContext *hwctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_1); ++ if (res != AMF_OK) { ++ res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_0); ++ } ++ ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via DX11\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX11 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given DX11 device: %d\n", res); ++ return AVERROR(ENODEV); ++ } ++ return res; ++} ++#endif ++ ++#if CONFIG_DXVA2 ++int amf_context_derive_dx9(AVAMFContext *ctx, AVDXVA2DeviceContext *hwctx) ++{ ++ AMF_RESULT res; ++ HRESULT hr; ++ HANDLE device_handle; ++ IDirect3DDevice9* device; ++ ++ hr = IDirect3DDeviceManager9_OpenDeviceHandle(hwctx->devmgr, &device_handle); ++ if (FAILED(hr)) { ++ av_log(ctx->avclass, AV_LOG_ERROR, "Failed to open device handle for DX9 device: %lx\n", (unsigned long)hr); ++ return AVERROR_EXTERNAL; ++ } ++ ++ hr = IDirect3DDeviceManager9_LockDevice(hwctx->devmgr, device_handle, &device, FALSE); ++ if (SUCCEEDED(hr)) { ++ IDirect3DDeviceManager9_UnlockDevice(hwctx->devmgr, device_handle, FALSE); ++ } else { ++ av_log(ctx->avclass, AV_LOG_ERROR, "Failed to lock device handle for DX9 device: %lx\n", (unsigned long)hr); ++ return AVERROR_EXTERNAL; ++ } ++ ++ IDirect3DDeviceManager9_CloseDeviceHandle(hwctx->devmgr, device_handle); ++ ++ res = ctx->context->pVtbl->InitDX9(ctx->context, device); ++ ++ IDirect3DDevice9_Release(device); ++ ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via DX9\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via DX9 is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given DX9 device: %d\n", res); ++ return AVERROR(ENODEV); ++ } ++ return res; ++} ++#endif ++ ++#if CONFIG_OPENCL ++int amf_context_derive_opencl(AVAMFContext *ctx, AVOpenCLDeviceContext *hwctx) ++{ ++ AMF_RESULT res; ++ ++ res = ctx->context->pVtbl->InitOpenCL(ctx->context, hwctx->command_queue); ++ if (res == AMF_OK) { ++ av_log(ctx->avclass, AV_LOG_VERBOSE, "AMF derived succeeded via OpenCL\n"); ++ } else { ++ if (res == AMF_NOT_SUPPORTED) ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF via OpenCL is not supported on the given device\n"); ++ else ++ av_log(ctx->avclass, AV_LOG_ERROR, "AMF failed to derive from the given OpenCL device: %d\n", res); ++ return AVERROR(ENODEV); ++ } ++ return res; ++} ++#endif +Index: jellyfin-ffmpeg/libavcodec/amf.h +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavcodec/amf.h +@@ -0,0 +1,156 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifndef AVCODEC_AMF_H ++#define AVCODEC_AMF_H ++ ++#include ++#include ++#include ++ ++#include "config.h" ++#include "avcodec.h" ++ ++#include "libavutil/pixdesc.h" ++ ++#if CONFIG_D3D11VA ++#include "libavutil/hwcontext_d3d11va.h" ++#endif ++ ++#if CONFIG_DXVA2 ++#define COBJMACROS ++#include "libavutil/hwcontext_dxva2.h" ++#endif ++ ++#if CONFIG_OPENCL ++#include "libavutil/hwcontext_opencl.h" ++#endif ++ ++#ifdef _WIN32 ++#include "compat/w32dlfcn.h" ++#else ++#include ++#endif ++ ++/** ++* Error handling helper ++*/ ++#define AMF_RETURN_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \ ++ if (!(exp)) { \ ++ av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \ ++ return ret_value; \ ++ } ++ ++#define AMF_GOTO_FAIL_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \ ++ if (!(exp)) { \ ++ av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \ ++ ret = ret_value; \ ++ goto fail; \ ++ } ++ ++/** ++* AMF trace writer callback class ++* Used to capture all AMF logging ++*/ ++typedef struct AVAMFLogger { ++ AMFTraceWriterVtbl *vtbl; ++ void *avcl; ++} AVAMFLogger; ++ ++typedef struct AVAMFContext { ++ void *avclass; ++ int log_to_dbg; ++ ++ // access to AMF runtime ++ amf_handle library; ///< handle to DLL library ++ AMFFactory *factory; ///< pointer to AMF factory ++ AMFDebug *debug; ///< pointer to AMF debug interface ++ AMFTrace *trace; ///< pointer to AMF trace interface ++ ++ amf_uint64 version; ///< version of AMF runtime ++ AVAMFLogger logger; ///< AMF writer registered with AMF ++ AMFContext *context; ///< AMF context ++} AVAMFContext; ++ ++/** ++* Surface/Pixel format ++*/ ++typedef struct FormatMap { ++ enum AVPixelFormat av_format; ++ enum AMF_SURFACE_FORMAT amf_format; ++} FormatMap; ++ ++extern const FormatMap format_map[]; ++enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt); ++enum AVPixelFormat amf_to_av_format(enum AMF_SURFACE_FORMAT fmt); ++ ++/** ++* Color Transfer ++*/ ++typedef struct ColorTransferMap { ++ enum AVColorTransferCharacteristic av_color_trc; ++ enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM amf_color_trc; ++} ColorTransferMap; ++ ++extern const ColorTransferMap color_trc_map[]; ++enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM amf_av_to_amf_color_trc(enum AVColorTransferCharacteristic trc); ++enum AVColorTransferCharacteristic amf_to_av_color_trc(enum AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM trc); ++ ++/** ++* Color Primaries ++*/ ++typedef struct ColorPrimariesMap { ++ enum AVColorPrimaries av_color_prm; ++ enum AMF_COLOR_PRIMARIES_ENUM amf_color_prm; ++} ColorPrimariesMap; ++ ++extern const ColorPrimariesMap color_prm_map[]; ++enum AMF_COLOR_PRIMARIES_ENUM amf_av_to_amf_color_prm(enum AVColorPrimaries prm); ++enum AVColorPrimaries amf_to_av_color_prm(enum AMF_COLOR_PRIMARIES_ENUM prm); ++ ++/** ++* Load AMFContext ++*/ ++int amf_load_library(AVAMFContext *ctx); ++int amf_create_context(AVAMFContext *ctx); ++void amf_unload_library(AVAMFContext *ctx); ++ ++/** ++* Init AMFContext standalone ++*/ ++int amf_context_init_dx11(AVAMFContext *ctx); ++int amf_context_init_dx9(AVAMFContext *ctx); ++int amf_context_init_vulkan(AVAMFContext *ctx); ++int amf_context_init_opencl(AVAMFContext *ctx); ++ ++/** ++* Derive AMFContext from builtin hwcontext ++*/ ++#if CONFIG_D3D11VA ++int amf_context_derive_dx11(AVAMFContext *ctx, AVD3D11VADeviceContext *hwctx); ++#endif ++ ++#if CONFIG_DXVA2 ++int amf_context_derive_dx9(AVAMFContext *ctx, AVDXVA2DeviceContext *hwctx); ++#endif ++ ++#if CONFIG_OPENCL ++int amf_context_derive_opencl(AVAMFContext *ctx, AVOpenCLDeviceContext *hwctx); ++#endif ++ ++#endif /* AVCODEC_AMF_H */ +Index: jellyfin-ffmpeg/libavcodec/amfenc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/amfenc.c ++++ jellyfin-ffmpeg/libavcodec/amfenc.c +@@ -16,227 +16,54 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#include "config.h" +- + #include "libavutil/avassert.h" +-#include "libavutil/imgutils.h" + #include "libavutil/hwcontext.h" +-#if CONFIG_D3D11VA +-#include "libavutil/hwcontext_d3d11va.h" +-#endif +-#if CONFIG_DXVA2 +-#define COBJMACROS +-#include "libavutil/hwcontext_dxva2.h" +-#endif ++#include "libavutil/imgutils.h" ++ + #include "libavutil/mem.h" +-#include "libavutil/pixdesc.h" + #include "libavutil/time.h" + + #include "amfenc.h" + #include "encode.h" + #include "internal.h" + +-#if CONFIG_D3D11VA +-#include +-#endif +- +-#ifdef _WIN32 +-#include "compat/w32dlfcn.h" +-#else +-#include +-#endif +- +-#define FFMPEG_AMF_WRITER_ID L"ffmpeg_amf" +- + #define PTS_PROP L"PtsProp" + +-const enum AVPixelFormat ff_amf_pix_fmts[] = { +- AV_PIX_FMT_NV12, +- AV_PIX_FMT_YUV420P, +-#if CONFIG_D3D11VA +- AV_PIX_FMT_D3D11, +-#endif +-#if CONFIG_DXVA2 +- AV_PIX_FMT_DXVA2_VLD, +-#endif +- AV_PIX_FMT_NONE +-}; +- +-typedef struct FormatMap { +- enum AVPixelFormat av_format; +- enum AMF_SURFACE_FORMAT amf_format; +-} FormatMap; +- +-static const FormatMap format_map[] = +-{ +- { AV_PIX_FMT_NONE, AMF_SURFACE_UNKNOWN }, +- { AV_PIX_FMT_NV12, AMF_SURFACE_NV12 }, +- { AV_PIX_FMT_BGR0, AMF_SURFACE_BGRA }, +- { AV_PIX_FMT_RGB0, AMF_SURFACE_RGBA }, +- { AV_PIX_FMT_GRAY8, AMF_SURFACE_GRAY8 }, +- { AV_PIX_FMT_YUV420P, AMF_SURFACE_YUV420P }, +- { AV_PIX_FMT_YUYV422, AMF_SURFACE_YUY2 }, +-}; +- +-static enum AMF_SURFACE_FORMAT amf_av_to_amf_format(enum AVPixelFormat fmt) +-{ +- int i; +- for (i = 0; i < amf_countof(format_map); i++) { +- if (format_map[i].av_format == fmt) { +- return format_map[i].amf_format; +- } +- } +- return AMF_SURFACE_UNKNOWN; +-} +- +-static void AMF_CDECL_CALL AMFTraceWriter_Write(AMFTraceWriter *pThis, +- const wchar_t *scope, const wchar_t *message) +-{ +- AmfTraceWriter *tracer = (AmfTraceWriter*)pThis; +- av_log(tracer->avctx, AV_LOG_DEBUG, "%ls: %ls", scope, message); // \n is provided from AMF +-} +- +-static void AMF_CDECL_CALL AMFTraceWriter_Flush(AMFTraceWriter *pThis) +-{ +-} +- +-static AMFTraceWriterVtbl tracer_vtbl = ++static int amf_init_context(AVCodecContext *avctx) + { +- .Write = AMFTraceWriter_Write, +- .Flush = AMFTraceWriter_Flush, +-}; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = NULL; ++ AMF_RESULT res; ++ int ret; + +-static int amf_load_library(AVCodecContext *avctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- AMFInit_Fn init_fun; +- AMFQueryVersion_Fn version_fun; +- AMF_RESULT res; ++ ctx->dts_delay = 0; ++ ctx->hwsurfaces_in_queue = 0; ++ ctx->hwsurfaces_in_queue_max = 16; + + ctx->delayed_frame = av_frame_alloc(); +- if (!ctx->delayed_frame) { ++ if (!ctx->delayed_frame) + return AVERROR(ENOMEM); +- } ++ + // hardcoded to current HW queue size - will realloc in timestamp_queue_enqueue() if too small + ctx->timestamp_list = av_fifo_alloc((avctx->max_b_frames + 16) * sizeof(int64_t)); +- if (!ctx->timestamp_list) { ++ if (!ctx->timestamp_list) + return AVERROR(ENOMEM); +- } +- ctx->dts_delay = 0; + ++ amfctx = av_mallocz(sizeof(AVAMFContext)); ++ if (!amfctx) ++ return AVERROR(ENOMEM); + +- ctx->library = dlopen(AMF_DLL_NAMEA, RTLD_NOW | RTLD_LOCAL); +- AMF_RETURN_IF_FALSE(ctx, ctx->library != NULL, +- AVERROR_UNKNOWN, "DLL %s failed to open\n", AMF_DLL_NAMEA); +- +- init_fun = (AMFInit_Fn)dlsym(ctx->library, AMF_INIT_FUNCTION_NAME); +- AMF_RETURN_IF_FALSE(ctx, init_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_INIT_FUNCTION_NAME); +- +- version_fun = (AMFQueryVersion_Fn)dlsym(ctx->library, AMF_QUERY_VERSION_FUNCTION_NAME); +- AMF_RETURN_IF_FALSE(ctx, version_fun != NULL, AVERROR_UNKNOWN, "DLL %s failed to find function %s\n", AMF_DLL_NAMEA, AMF_QUERY_VERSION_FUNCTION_NAME); +- +- res = version_fun(&ctx->version); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_QUERY_VERSION_FUNCTION_NAME, res); +- res = init_fun(AMF_FULL_VERSION, &ctx->factory); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "%s failed with error %d\n", AMF_INIT_FUNCTION_NAME, res); +- res = ctx->factory->pVtbl->GetTrace(ctx->factory, &ctx->trace); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetTrace() failed with error %d\n", res); +- res = ctx->factory->pVtbl->GetDebug(ctx->factory, &ctx->debug); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetDebug() failed with error %d\n", res); +- return 0; +-} +- +-#if CONFIG_D3D11VA +-static int amf_init_from_d3d11_device(AVCodecContext *avctx, AVD3D11VADeviceContext *hwctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- AMF_RESULT res; +- +- res = ctx->context->pVtbl->InitDX11(ctx->context, hwctx->device, AMF_DX11_1); +- if (res != AMF_OK) { +- if (res == AMF_NOT_SUPPORTED) +- av_log(avctx, AV_LOG_ERROR, "AMF via D3D11 is not supported on the given device.\n"); +- else +- av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on the given D3D11 device: %d.\n", res); +- return AVERROR(ENODEV); +- } +- +- return 0; +-} +-#endif +- +-#if CONFIG_DXVA2 +-static int amf_init_from_dxva2_device(AVCodecContext *avctx, AVDXVA2DeviceContext *hwctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- HANDLE device_handle; +- IDirect3DDevice9 *device; +- HRESULT hr; +- AMF_RESULT res; +- int ret; +- +- hr = IDirect3DDeviceManager9_OpenDeviceHandle(hwctx->devmgr, &device_handle); +- if (FAILED(hr)) { +- av_log(avctx, AV_LOG_ERROR, "Failed to open device handle for Direct3D9 device: %lx.\n", (unsigned long)hr); +- return AVERROR_EXTERNAL; +- } +- +- hr = IDirect3DDeviceManager9_LockDevice(hwctx->devmgr, device_handle, &device, FALSE); +- if (SUCCEEDED(hr)) { +- IDirect3DDeviceManager9_UnlockDevice(hwctx->devmgr, device_handle, FALSE); +- ret = 0; +- } else { +- av_log(avctx, AV_LOG_ERROR, "Failed to lock device handle for Direct3D9 device: %lx.\n", (unsigned long)hr); +- ret = AVERROR_EXTERNAL; +- } +- +- IDirect3DDeviceManager9_CloseDeviceHandle(hwctx->devmgr, device_handle); ++ ctx->amfctx = amfctx; ++ amfctx->avclass = avctx; ++ amfctx->log_to_dbg = ctx->log_to_dbg; + ++ ret = amf_load_library(amfctx); + if (ret < 0) + return ret; + +- res = ctx->context->pVtbl->InitDX9(ctx->context, device); +- +- IDirect3DDevice9_Release(device); +- +- if (res != AMF_OK) { +- if (res == AMF_NOT_SUPPORTED) +- av_log(avctx, AV_LOG_ERROR, "AMF via D3D9 is not supported on the given device.\n"); +- else +- av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on given D3D9 device: %d.\n", res); +- return AVERROR(ENODEV); +- } +- +- return 0; +-} +-#endif +- +-static int amf_init_context(AVCodecContext *avctx) +-{ +- AmfContext *ctx = avctx->priv_data; +- AMFContext1 *context1 = NULL; +- AMF_RESULT res; +- av_unused int ret; +- +- ctx->hwsurfaces_in_queue = 0; +- ctx->hwsurfaces_in_queue_max = 16; +- +- // configure AMF logger +- // the return of these functions indicates old state and do not affect behaviour +- ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, ctx->log_to_dbg != 0 ); +- if (ctx->log_to_dbg) +- ctx->trace->pVtbl->SetWriterLevel(ctx->trace, AMF_TRACE_WRITER_DEBUG_OUTPUT, AMF_TRACE_TRACE); +- ctx->trace->pVtbl->EnableWriter(ctx->trace, AMF_TRACE_WRITER_CONSOLE, 0); +- ctx->trace->pVtbl->SetGlobalLevel(ctx->trace, AMF_TRACE_TRACE); +- +- // connect AMF logger to av_log +- ctx->tracer.vtbl = &tracer_vtbl; +- ctx->tracer.avctx = avctx; +- ctx->trace->pVtbl->RegisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID,(AMFTraceWriter*)&ctx->tracer, 1); +- ctx->trace->pVtbl->SetWriterLevel(ctx->trace, FFMPEG_AMF_WRITER_ID, AMF_TRACE_TRACE); +- +- res = ctx->factory->pVtbl->CreateContext(ctx->factory, &ctx->context); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext() failed with error %d\n", res); ++ ret = amf_create_context(amfctx); ++ if (ret < 0) ++ return ret; + + // If a device was passed to the encoder, try to initialise from that. + if (avctx->hw_frames_ctx) { +@@ -251,16 +78,16 @@ static int amf_init_context(AVCodecConte + switch (frames_ctx->device_ctx->type) { + #if CONFIG_D3D11VA + case AV_HWDEVICE_TYPE_D3D11VA: +- ret = amf_init_from_d3d11_device(avctx, frames_ctx->device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx11(amfctx, frames_ctx->device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: +- ret = amf_init_from_dxva2_device(avctx, frames_ctx->device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx9(amfctx, frames_ctx->device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + default: +@@ -282,16 +109,16 @@ static int amf_init_context(AVCodecConte + switch (device_ctx->type) { + #if CONFIG_D3D11VA + case AV_HWDEVICE_TYPE_D3D11VA: +- ret = amf_init_from_d3d11_device(avctx, device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx11(amfctx, device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: +- ret = amf_init_from_dxva2_device(avctx, device_ctx->hwctx); +- if (ret < 0) +- return ret; ++ res = amf_context_derive_dx9(amfctx, device_ctx->hwctx); ++ if (res != AMF_OK) ++ return res; + break; + #endif + default: +@@ -305,40 +132,57 @@ static int amf_init_context(AVCodecConte + return AVERROR(ENOMEM); + + } else { +- res = ctx->context->pVtbl->InitDX11(ctx->context, NULL, AMF_DX11_1); +- if (res == AMF_OK) { +- av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D11.\n"); +- } else { +- res = ctx->context->pVtbl->InitDX9(ctx->context, NULL); +- if (res == AMF_OK) { +- av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via D3D9.\n"); +- } else { +- AMFGuid guid = IID_AMFContext1(); +- res = ctx->context->pVtbl->QueryInterface(ctx->context, &guid, (void**)&context1); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "CreateContext1() failed with error %d\n", res); +- +- res = context1->pVtbl->InitVulkan(context1, NULL); +- context1->pVtbl->Release(context1); ++#ifdef _WIN32 ++ res = amf_context_init_dx11(amfctx); ++ if (res != AMF_OK) { ++ res = amf_context_init_dx9(amfctx); ++ if (res != AMF_OK) { ++#endif ++ res = amf_context_init_vulkan(amfctx); + if (res != AMF_OK) { +- if (res == AMF_NOT_SUPPORTED) +- av_log(avctx, AV_LOG_ERROR, "AMF via Vulkan is not supported on the given device.\n"); +- else +- av_log(avctx, AV_LOG_ERROR, "AMF failed to initialise on the given Vulkan device: %d.\n", res); ++ av_log(avctx, AV_LOG_ERROR, "AMF initialisation is not supported.\n"); + return AVERROR(ENOSYS); + } +- av_log(avctx, AV_LOG_VERBOSE, "AMF initialisation succeeded via Vulkan.\n"); ++#ifdef _WIN32 + } + } ++#endif + } ++ + return 0; + } + ++static int amf_check_hevc_encoder_10bit_support(AVCodecContext *avctx) ++{ ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = ctx->amfctx; ++ const wchar_t *codec_id = AMFVideoEncoder_HEVC; ++ AMF_RESULT res; ++ ++ res = amfctx->factory->pVtbl->CreateComponent(amfctx->factory, amfctx->context, codec_id, &ctx->encoder); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res); ++ ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_10); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(EINVAL), "Assigning 10-bit property failed with error %d\n", res); ++ ++ res = ctx->encoder->pVtbl->Init(ctx->encoder, AMF_SURFACE_P010, avctx->width, avctx->height); ++ if (res == AMF_OK) { ++ ctx->encoder->pVtbl->Terminate(ctx->encoder); ++ ctx->encoder->pVtbl->Release(ctx->encoder); ++ ctx->encoder = NULL; ++ } else { ++ return AVERROR(EINVAL); ++ } ++ return res; ++} ++ + static int amf_init_encoder(AVCodecContext *avctx) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = ctx->amfctx; + const wchar_t *codec_id = NULL; +- AMF_RESULT res; + enum AVPixelFormat pix_fmt; ++ AMF_RESULT res; + + switch (avctx->codec->id) { + case AV_CODEC_ID_H264: +@@ -350,26 +194,70 @@ static int amf_init_encoder(AVCodecConte + default: + break; + } +- AMF_RETURN_IF_FALSE(ctx, codec_id != NULL, AVERROR(EINVAL), "Codec %d is not supported\n", avctx->codec->id); ++ AMF_RETURN_IF_FALSE(avctx, codec_id != NULL, ++ AVERROR(EINVAL), "Codec %d is not supported\n", avctx->codec->id); + +- if (ctx->hw_frames_ctx) +- pix_fmt = ((AVHWFramesContext*)ctx->hw_frames_ctx->data)->sw_format; +- else +- pix_fmt = avctx->pix_fmt; ++ pix_fmt = avctx->hw_frames_ctx ? ((AVHWFramesContext*)avctx->hw_frames_ctx->data)->sw_format ++ : avctx->pix_fmt; + + ctx->format = amf_av_to_amf_format(pix_fmt); +- AMF_RETURN_IF_FALSE(ctx, ctx->format != AMF_SURFACE_UNKNOWN, AVERROR(EINVAL), +- "Format %s is not supported\n", av_get_pix_fmt_name(pix_fmt)); ++ AMF_RETURN_IF_FALSE(avctx, ctx->format != AMF_SURFACE_UNKNOWN, ++ AVERROR(EINVAL), "Format %s is not supported\n", av_get_pix_fmt_name(pix_fmt)); ++ ++ ctx->bit_depth = 8; ++ if (pix_fmt == AV_PIX_FMT_P010) { ++ switch (avctx->codec->id) { ++ case AV_CODEC_ID_HEVC: ++ // GPU >= Navi or APU >= Renoir is required. ++ res = amf_check_hevc_encoder_10bit_support(avctx); ++ if (res == AMF_OK) { ++ ctx->bit_depth = 10; ++ } else { ++ av_log(avctx, AV_LOG_ERROR, "HEVC 10-bit encoding is not supported by the given AMF device\n"); ++ return res; ++ } ++ break; ++ default: ++ av_log(avctx, AV_LOG_ERROR, "10-bit encoding is not supported by AMF %s encoder\n", avctx->codec->name); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ ctx->out_color_trc = amf_av_to_amf_color_trc(avctx->color_trc); ++ ctx->out_color_prm = amf_av_to_amf_color_prm(avctx->color_primaries); ++ ++ switch (avctx->colorspace) { ++ case AVCOL_SPC_BT470BG: ++ case AVCOL_SPC_SMPTE170M: ++ case AVCOL_SPC_SMPTE240M: ++ ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_601; ++ break; ++ case AVCOL_SPC_BT709: ++ ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_709; ++ break; ++ case AVCOL_SPC_BT2020_NCL: ++ case AVCOL_SPC_BT2020_CL: ++ ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_2020; ++ break; ++ case AVCOL_SPC_RGB: ++ ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_JPEG; ++ break; ++ default: ++ ctx->out_color_profile = AMF_VIDEO_CONVERTER_COLOR_PROFILE_UNKNOWN; ++ break; ++ } + +- res = ctx->factory->pVtbl->CreateComponent(ctx->factory, ctx->context, codec_id, &ctx->encoder); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res); ++ res = amfctx->factory->pVtbl->CreateComponent(amfctx->factory, amfctx->context, codec_id, &ctx->encoder); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, ++ AVERROR_ENCODER_NOT_FOUND, "CreateComponent(%ls) failed with error %d\n", codec_id, res); + + return 0; + } + +-int av_cold ff_amf_encode_close(AVCodecContext *avctx) ++av_cold int ff_amf_encode_close(AVCodecContext *avctx) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = ctx->amfctx; + + if (ctx->delayed_surface) { + ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface); +@@ -382,34 +270,33 @@ int av_cold ff_amf_encode_close(AVCodecC + ctx->encoder = NULL; + } + +- if (ctx->context) { +- ctx->context->pVtbl->Terminate(ctx->context); +- ctx->context->pVtbl->Release(ctx->context); +- ctx->context = NULL; +- } ++ amf_unload_library(amfctx); ++ av_freep(&amfctx); ++ ++ ctx->delayed_drain = 0; + av_buffer_unref(&ctx->hw_device_ctx); + av_buffer_unref(&ctx->hw_frames_ctx); + +- if (ctx->trace) { +- ctx->trace->pVtbl->UnregisterWriter(ctx->trace, FFMPEG_AMF_WRITER_ID); +- } +- if (ctx->library) { +- dlclose(ctx->library); +- ctx->library = NULL; +- } +- ctx->trace = NULL; +- ctx->debug = NULL; +- ctx->factory = NULL; +- ctx->version = 0; +- ctx->delayed_drain = 0; + av_frame_free(&ctx->delayed_frame); + av_fifo_freep(&ctx->timestamp_list); +- + return 0; + } + +-static int amf_copy_surface(AVCodecContext *avctx, const AVFrame *frame, +- AMFSurface* surface) ++av_cold int ff_amf_encode_init(AVCodecContext *avctx) ++{ ++ int ret; ++ ++ if ((ret = amf_init_context(avctx)) == 0) ++ if ((ret = amf_init_encoder(avctx)) == 0) ++ return 0; ++ ++ ff_amf_encode_close(avctx); ++ return ret; ++} ++ ++static int amf_copy_surface(AVCodecContext *avctx, ++ const AVFrame *frame, ++ AMFSurface* surface) + { + AMFPlane *plane; + uint8_t *dst_data[4]; +@@ -425,16 +312,16 @@ static int amf_copy_surface(AVCodecConte + dst_data[i] = plane->pVtbl->GetNative(plane); + dst_linesize[i] = plane->pVtbl->GetHPitch(plane); + } ++ + av_image_copy(dst_data, dst_linesize, + (const uint8_t**)frame->data, frame->linesize, frame->format, + avctx->width, avctx->height); +- + return 0; + } + + static inline int timestamp_queue_enqueue(AVCodecContext *avctx, int64_t timestamp) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; + if (av_fifo_space(ctx->timestamp_list) < sizeof(timestamp)) { + if (av_fifo_grow(ctx->timestamp_list, sizeof(timestamp)) < 0) { + return AVERROR(ENOMEM); +@@ -444,31 +331,30 @@ static inline int timestamp_queue_enqueu + return 0; + } + +-static int amf_copy_buffer(AVCodecContext *avctx, AVPacket *pkt, AMFBuffer *buffer) ++static int amf_copy_buffer(AVCodecContext *avctx, ++ AVPacket *pkt, ++ AMFBuffer *buffer) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; + int ret; +- AMFVariantStruct var = {0}; ++ AMFVariantStruct var = { 0 }; + int64_t timestamp = AV_NOPTS_VALUE; + int64_t size = buffer->pVtbl->GetSize(buffer); + +- if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) { ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) + return ret; +- } + memcpy(pkt->data, buffer->pVtbl->GetNative(buffer), size); + + switch (avctx->codec->id) { + case AV_CODEC_ID_H264: + buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE, &var); +- if(var.int64Value == AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE_IDR) { ++ if (var.int64Value == AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE_IDR) + pkt->flags = AV_PKT_FLAG_KEY; +- } + break; + case AV_CODEC_ID_HEVC: + buffer->pVtbl->GetProperty(buffer, AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE, &var); +- if (var.int64Value == AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE_IDR) { ++ if (var.int64Value == AMF_VIDEO_ENCODER_HEVC_OUTPUT_DATA_TYPE_IDR) + pkt->flags = AV_PKT_FLAG_KEY; +- } + break; + default: + break; +@@ -478,48 +364,33 @@ static int amf_copy_buffer(AVCodecContex + + pkt->pts = var.int64Value; // original pts + +- +- AMF_RETURN_IF_FALSE(ctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN, "timestamp_list is empty\n"); ++ AMF_RETURN_IF_FALSE(avctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN, "timestamp_list is empty\n"); + + av_fifo_generic_read(ctx->timestamp_list, ×tamp, sizeof(timestamp), NULL); + + // calc dts shift if max_b_frames > 0 + if (avctx->max_b_frames > 0 && ctx->dts_delay == 0) { + int64_t timestamp_last = AV_NOPTS_VALUE; +- AMF_RETURN_IF_FALSE(ctx, av_fifo_size(ctx->timestamp_list) > 0, AVERROR_UNKNOWN, +- "timestamp_list is empty while max_b_frames = %d\n", avctx->max_b_frames); +- av_fifo_generic_peek_at( +- ctx->timestamp_list, +- ×tamp_last, +- (av_fifo_size(ctx->timestamp_list) / sizeof(timestamp) - 1) * sizeof(timestamp_last), +- sizeof(timestamp_last), +- NULL); +- if (timestamp < 0 || timestamp_last < AV_NOPTS_VALUE) { ++ AMF_RETURN_IF_FALSE(avctx, av_fifo_size(ctx->timestamp_list) > 0, ++ AVERROR_UNKNOWN, "timestamp_list is empty while max_b_frames = %d\n", avctx->max_b_frames); ++ ++ av_fifo_generic_peek_at(ctx->timestamp_list, ++ ×tamp_last, ++ (av_fifo_size(ctx->timestamp_list) / sizeof(timestamp) - 1) * sizeof(timestamp_last), ++ sizeof(timestamp_last), ++ NULL); ++ ++ if (timestamp < 0 || timestamp_last < AV_NOPTS_VALUE) + return AVERROR(ERANGE); +- } + ctx->dts_delay = timestamp_last - timestamp; + } + pkt->dts = timestamp - ctx->dts_delay; + return 0; + } + +-// amfenc API implementation +-int ff_amf_encode_init(AVCodecContext *avctx) +-{ +- int ret; +- +- if ((ret = amf_load_library(avctx)) == 0) { +- if ((ret = amf_init_context(avctx)) == 0) { +- if ((ret = amf_init_encoder(avctx)) == 0) { +- return 0; +- } +- } +- } +- ff_amf_encode_close(avctx); +- return ret; +-} +- +-static AMF_RESULT amf_set_property_buffer(AMFSurface *object, const wchar_t *name, AMFBuffer *val) ++static AMF_RESULT amf_set_property_buffer(AMFSurface *object, ++ const wchar_t *name, ++ AMFBuffer *val) + { + AMF_RESULT res; + AMFVariantStruct var; +@@ -533,15 +404,16 @@ static AMF_RESULT amf_set_property_buffe + res = AMFVariantAssignInterface(&var, amf_interface); + amf_interface->pVtbl->Release(amf_interface); + } +- if (res == AMF_OK) { ++ if (res == AMF_OK) + res = object->pVtbl->SetProperty(object, name, var); +- } + AMFVariantClear(&var); + } + return res; + } + +-static AMF_RESULT amf_get_property_buffer(AMFData *object, const wchar_t *name, AMFBuffer **val) ++static AMF_RESULT amf_get_property_buffer(AMFData *object, ++ const wchar_t *name, ++ AMFBuffer **val) + { + AMF_RESULT res; + AMFVariantStruct var; +@@ -591,7 +463,8 @@ static void amf_release_buffer_with_fram + + int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) + { +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; ++ AVAMFContext *amfctx = ctx->amfctx; + AMFSurface *surface; + AMF_RESULT res; + int ret; +@@ -618,10 +491,9 @@ int ff_amf_receive_packet(AVCodecContext + if (res == AMF_INPUT_FULL) { + ctx->delayed_drain = 1; // input queue is full: resubmit Drain() in ff_amf_receive_packet + } else { +- if (res == AMF_OK) { ++ if (res == AMF_OK) + ctx->eof = 1; // drain started +- } +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res); + } + } + } +@@ -642,8 +514,8 @@ int ff_amf_receive_packet(AVCodecContext + + texture->lpVtbl->SetPrivateData(texture, &AMFTextureArrayIndexGUID, sizeof(index), &index); + +- res = ctx->context->pVtbl->CreateSurfaceFromDX11Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX11Native() failed with error %d\n", res); ++ res = amfctx->context->pVtbl->CreateSurfaceFromDX11Native(amfctx->context, texture, &surface, NULL); // wrap to AMF surface ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX11Native() failed with error %d\n", res); + + hw_surface = 1; + } +@@ -654,8 +526,8 @@ int ff_amf_receive_packet(AVCodecContext + { + IDirect3DSurface9 *texture = (IDirect3DSurface9 *)frame->data[3]; // actual texture + +- res = ctx->context->pVtbl->CreateSurfaceFromDX9Native(ctx->context, texture, &surface, NULL); // wrap to AMF surface +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX9Native() failed with error %d\n", res); ++ res = amfctx->context->pVtbl->CreateSurfaceFromDX9Native(amfctx->context, texture, &surface, NULL); // wrap to AMF surface ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "CreateSurfaceFromDX9Native() failed with error %d\n", res); + + hw_surface = 1; + } +@@ -663,8 +535,8 @@ int ff_amf_receive_packet(AVCodecContext + #endif + default: + { +- res = ctx->context->pVtbl->AllocSurface(ctx->context, AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height, &surface); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR(ENOMEM), "AllocSurface() failed with error %d\n", res); ++ res = amfctx->context->pVtbl->AllocSurface(amfctx->context, AMF_MEMORY_HOST, ctx->format, avctx->width, avctx->height, &surface); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR(ENOMEM), "AllocSurface() failed with error %d\n", res); + amf_copy_surface(avctx, frame, surface); + } + break; +@@ -676,11 +548,12 @@ int ff_amf_receive_packet(AVCodecContext + // input HW surfaces can be vertically aligned by 16; tell AMF the real size + surface->pVtbl->SetCrop(surface, 0, 0, frame->width, frame->height); + +- frame_ref_storage_buffer = amf_create_buffer_with_frame_ref(frame, ctx->context); +- AMF_RETURN_IF_FALSE(ctx, frame_ref_storage_buffer != NULL, AVERROR(ENOMEM), "create_buffer_with_frame_ref() returned NULL\n"); ++ frame_ref_storage_buffer = amf_create_buffer_with_frame_ref(frame, amfctx->context); ++ AMF_RETURN_IF_FALSE(avctx, frame_ref_storage_buffer != NULL, AVERROR(ENOMEM), "create_buffer_with_frame_ref() returned NULL\n"); + + res = amf_set_property_buffer(surface, L"av_frame_ref", frame_ref_storage_buffer); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_ref\" with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "SetProperty failed for \"av_frame_ref\" with error %d\n", res); ++ + ctx->hwsurfaces_in_queue++; + frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer); + } +@@ -690,13 +563,9 @@ int ff_amf_receive_packet(AVCodecContext + + switch (avctx->codec->id) { + case AV_CODEC_ID_H264: +- AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_AUD, !!ctx->aud); +- break; ++ AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_INSERT_AUD, !!ctx->aud); break; + case AV_CODEC_ID_HEVC: +- AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_HEVC_INSERT_AUD, !!ctx->aud); +- break; +- default: +- break; ++ AMF_ASSIGN_PROPERTY_INT64(res, surface, AMF_VIDEO_ENCODER_HEVC_INSERT_AUD, !!ctx->aud); break; + } + + // submit surface +@@ -707,16 +576,14 @@ int ff_amf_receive_packet(AVCodecContext + } else { + int64_t pts = frame->pts; + surface->pVtbl->Release(surface); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res); + + av_frame_unref(frame); +- if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) { ++ if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) + return ret; +- } + } + } + +- + do { + block_and_wait = 0; + // poll data +@@ -733,14 +600,14 @@ int ff_amf_receive_packet(AVCodecContext + if (data->pVtbl->HasProperty(data, L"av_frame_ref")) { + AMFBuffer *frame_ref_storage_buffer; + res = amf_get_property_buffer(data, L"av_frame_ref", &frame_ref_storage_buffer); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_ref\" with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "GetProperty failed for \"av_frame_ref\" with error %d\n", res); + amf_release_buffer_with_frame_ref(frame_ref_storage_buffer); + ctx->hwsurfaces_in_queue--; + } + + data->pVtbl->Release(data); + +- AMF_RETURN_IF_FALSE(ctx, ret >= 0, ret, "amf_copy_buffer() failed with error %d\n", ret); ++ AMF_RETURN_IF_FALSE(avctx, ret >= 0, ret, "amf_copy_buffer() failed with error %d\n", ret); + + if (ctx->delayed_surface != NULL) { // try to resubmit frame + res = ctx->encoder->pVtbl->SubmitInput(ctx->encoder, (AMFData*)ctx->delayed_surface); +@@ -749,11 +616,10 @@ int ff_amf_receive_packet(AVCodecContext + ctx->delayed_surface->pVtbl->Release(ctx->delayed_surface); + ctx->delayed_surface = NULL; + av_frame_unref(ctx->delayed_frame); +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated SubmitInput() failed with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated SubmitInput() failed with error %d\n", res); + +- if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) { ++ if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) + return ret; +- } + } else { + av_log(avctx, AV_LOG_WARNING, "Data acquired but delayed frame submission got AMF_INPUT_FULL- should not happen\n"); + } +@@ -762,24 +628,26 @@ int ff_amf_receive_packet(AVCodecContext + if (res != AMF_INPUT_FULL) { + ctx->delayed_drain = 0; + ctx->eof = 1; // drain started +- AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated Drain() failed with error %d\n", res); ++ AMF_RETURN_IF_FALSE(avctx, res == AMF_OK, AVERROR_UNKNOWN, "Repeated Drain() failed with error %d\n", res); + } else { + av_log(avctx, AV_LOG_WARNING, "Data acquired but delayed drain submission got AMF_INPUT_FULL- should not happen\n"); + } + } +- } else if (ctx->delayed_surface != NULL || ctx->delayed_drain || (ctx->eof && res_query != AMF_EOF) || (ctx->hwsurfaces_in_queue >= ctx->hwsurfaces_in_queue_max)) { ++ } else if (ctx->delayed_surface != NULL || ++ ctx->delayed_drain || ++ (ctx->eof && res_query != AMF_EOF) || ++ (ctx->hwsurfaces_in_queue >= ctx->hwsurfaces_in_queue_max)) { + block_and_wait = 1; + av_usleep(1000); // wait and poll again + } + } while (block_and_wait); + +- if (res_query == AMF_EOF) { ++ if (res_query == AMF_EOF) + ret = AVERROR_EOF; +- } else if (data == NULL) { ++ else if (data == NULL) + ret = AVERROR(EAGAIN); +- } else { ++ else + ret = 0; +- } + return ret; + } + +Index: jellyfin-ffmpeg/libavcodec/amfenc.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/amfenc.h ++++ jellyfin-ffmpeg/libavcodec/amfenc.h +@@ -1,64 +1,46 @@ + /* +-* This file is part of FFmpeg. +-* +-* FFmpeg is free software; you can redistribute it and/or +-* modify it under the terms of the GNU Lesser General Public +-* License as published by the Free Software Foundation; either +-* version 2.1 of the License, or (at your option) any later version. +-* +-* FFmpeg is distributed in the hope that it will be useful, +-* but WITHOUT ANY WARRANTY; without even the implied warranty of +-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-* Lesser General Public License for more details. +-* +-* You should have received a copy of the GNU Lesser General Public +-* License along with FFmpeg; if not, write to the Free Software +-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +-*/ ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + + #ifndef AVCODEC_AMFENC_H + #define AVCODEC_AMFENC_H + +-#include +- + #include + #include + + #include "libavutil/fifo.h" + +-#include "avcodec.h" ++#include "amf.h" + #include "hwconfig.h" + +- +-/** +-* AMF trace writer callback class +-* Used to capture all AMF logging +-*/ +- +-typedef struct AmfTraceWriter { +- AMFTraceWriterVtbl *vtbl; +- AVCodecContext *avctx; +-} AmfTraceWriter; +- + /** + * AMF encoder context + */ +- +-typedef struct AmfContext { +- AVClass *avclass; +- // access to AMF runtime +- amf_handle library; ///< handle to DLL library +- AMFFactory *factory; ///< pointer to AMF factory +- AMFDebug *debug; ///< pointer to AMF debug interface +- AMFTrace *trace; ///< pointer to AMF trace interface +- +- amf_uint64 version; ///< version of AMF runtime +- AmfTraceWriter tracer; ///< AMF writer registered with AMF +- AMFContext *context; ///< AMF context +- //encoder +- AMFComponent *encoder; ///< AMF encoder object +- amf_bool eof; ///< flag indicating EOF happened +- AMF_SURFACE_FORMAT format; ///< AMF surface format ++typedef struct AMFEncContext { ++ void *avclass; ++ void *amfctx; ++ ++ // encoder ++ AMFComponent *encoder; ///< AMF encoder object ++ amf_bool eof; ///< flag indicating EOF happened ++ AMF_SURFACE_FORMAT format; ///< AMF surface format ++ AMF_VIDEO_CONVERTER_COLOR_PROFILE_ENUM out_color_profile; ++ AMF_COLOR_TRANSFER_CHARACTERISTIC_ENUM out_color_trc; ++ AMF_COLOR_PRIMARIES_ENUM out_color_prm; + + AVBufferRef *hw_device_ctx; ///< pointer to HW accelerator (decoder) + AVBufferRef *hw_frames_ctx; ///< pointer to HW accelerator (frame allocator) +@@ -76,24 +58,25 @@ typedef struct AmfContext { + int64_t dts_delay; + + // common encoder option options +- + int log_to_dbg; + + // Static options, have to be set before Init() call + int usage; + int profile; + int level; +- int preanalysis; ++ int pre_encode; + int quality; ++ int bit_depth; ++ int qvbr_level; + int b_frame_delta_qp; + int ref_b_frame_delta_qp; + + // Dynamic options, can be set after Init() call +- + int rate_control_mode; + int enforce_hrd; + int filler_data; + int enable_vbaq; ++ int enable_hmqb; + int skip_frame; + int qp_i; + int qp_p; +@@ -108,7 +91,6 @@ typedef struct AmfContext { + int aud; + + // HEVC - specific options +- + int gops_per_idr; + int header_insertion_mode; + int min_qp_i; +@@ -116,7 +98,7 @@ typedef struct AmfContext { + int min_qp_p; + int max_qp_p; + int tier; +-} AmfContext; ++} AMFEncContext; + + extern const AVCodecHWConfigInternal *const ff_amfenc_hw_configs[]; + +@@ -134,18 +116,4 @@ int ff_amf_encode_close(AVCodecContext * + */ + int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt); + +-/** +-* Supported formats +-*/ +-extern const enum AVPixelFormat ff_amf_pix_fmts[]; +- +-/** +-* Error handling helper +-*/ +-#define AMF_RETURN_IF_FALSE(avctx, exp, ret_value, /*message,*/ ...) \ +- if (!(exp)) { \ +- av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \ +- return ret_value; \ +- } +- +-#endif //AVCODEC_AMFENC_H ++#endif /* AVCODEC_AMFENC_H */ +Index: jellyfin-ffmpeg/libavcodec/amfenc_h264.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/amfenc_h264.c ++++ jellyfin-ffmpeg/libavcodec/amfenc_h264.c +@@ -16,111 +16,102 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +- + #include "libavutil/internal.h" + #include "libavutil/opt.h" + #include "amfenc.h" + #include "internal.h" + +-#define OFFSET(x) offsetof(AmfContext, x) ++#define OFFSET(x) offsetof(AMFEncContext, x) + #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM ++#define ENUM(a, b, c, d) { a, b, 0, AV_OPT_TYPE_CONST, { .i64 = c }, 0, 0, VE, d } + +-static const AVOption options[] = { +- // Static +- /// Usage +- { "usage", "Encoder Usage", OFFSET(usage), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCONDING }, AMF_VIDEO_ENCODER_USAGE_TRANSCONDING, AMF_VIDEO_ENCODER_USAGE_WEBCAM, VE, "usage" }, +- { "transcoding", "Generic Transcoding", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCONDING }, 0, 0, VE, "usage" }, +- { "ultralowlatency","", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY }, 0, 0, VE, "usage" }, +- { "lowlatency", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY }, 0, 0, VE, "usage" }, +- { "webcam", "Webcam", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_USAGE_WEBCAM }, 0, 0, VE, "usage" }, +- +- /// Profile, +- { "profile", "Profile", OFFSET(profile),AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN }, AMF_VIDEO_ENCODER_PROFILE_BASELINE, AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH, VE, "profile" }, +- { "main", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN }, 0, 0, VE, "profile" }, +- { "high", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_HIGH }, 0, 0, VE, "profile" }, +- { "constrained_baseline", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_BASELINE }, 0, 0, VE, "profile" }, +- { "constrained_high", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH }, 0, 0, VE, "profile" }, +- +- /// Profile Level +- { "level", "Profile Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 62, VE, "level" }, +- { "auto", "", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, VE, "level" }, +- { "1.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = 10 }, 0, 0, VE, "level" }, +- { "1.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = 11 }, 0, 0, VE, "level" }, +- { "1.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = 12 }, 0, 0, VE, "level" }, +- { "1.3", "", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, 0, 0, VE, "level" }, +- { "2.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = 20 }, 0, 0, VE, "level" }, +- { "2.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = 21 }, 0, 0, VE, "level" }, +- { "2.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = 22 }, 0, 0, VE, "level" }, +- { "3.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, 0, 0, VE, "level" }, +- { "3.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, 0, 0, VE, "level" }, +- { "3.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, 0, 0, VE, "level" }, +- { "4.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, 0, 0, VE, "level" }, +- { "4.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, 0, 0, VE, "level" }, +- { "4.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, 0, 0, VE, "level" }, +- { "5.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, 0, 0, VE, "level" }, +- { "5.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, 0, 0, VE, "level" }, +- { "5.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, 0, 0, VE, "level" }, +- { "6.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = 60 }, 0, 0, VE, "level" }, +- { "6.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = 61 }, 0, 0, VE, "level" }, +- { "6.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = 62 }, 0, 0, VE, "level" }, +- +- +- /// Quality Preset +- { "quality", "Quality Preference", OFFSET(quality), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED, AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY, VE, "quality" }, +- { "speed", "Prefer Speed", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED }, 0, 0, VE, "quality" }, +- { "balanced", "Balanced", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED }, 0, 0, VE, "quality" }, +- { "quality", "Prefer Quality", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY }, 0, 0, VE, "quality" }, +- +- // Dynamic +- /// Rate Control Method +- { "rc", "Rate Control Method", OFFSET(rate_control_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR, VE, "rc" }, +- { "cqp", "Constant Quantization Parameter", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP }, 0, 0, VE, "rc" }, +- { "cbr", "Constant Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR }, 0, 0, VE, "rc" }, +- { "vbr_peak", "Peak Contrained Variable Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR }, 0, 0, VE, "rc" }, +- { "vbr_latency", "Latency Constrained Variable Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR }, 0, 0, VE, "rc" }, +- +- /// Enforce HRD, Filler Data, VBAQ, Frame Skipping +- { "enforce_hrd", "Enforce HRD", OFFSET(enforce_hrd), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, +- { "filler_data", "Filler Data Enable", OFFSET(filler_data), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, +- { "vbaq", "Enable VBAQ", OFFSET(enable_vbaq), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, +- { "frame_skipping", "Rate Control Based Frame Skip", OFFSET(skip_frame), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, +- +- /// QP Values +- { "qp_i", "Quantization Parameter for I-Frame", OFFSET(qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "qp_p", "Quantization Parameter for P-Frame", OFFSET(qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "qp_b", "Quantization Parameter for B-Frame", OFFSET(qp_b), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- +- /// Pre-Pass, Pre-Analysis, Two-Pass +- { "preanalysis", "Pre-Analysis Mode", OFFSET(preanalysis), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE, NULL }, +- +- /// Maximum Access Unit Size +- { "max_au_size", "Maximum Access Unit Size for rate control (in bits)", OFFSET(max_au_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, +- +- /// Header Insertion Spacing +- { "header_spacing", "Header Insertion Spacing", OFFSET(header_spacing), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, +- +- /// B-Frames +- // BPicturesPattern=bf +- { "bf_delta_qp", "B-Picture Delta QP", OFFSET(b_frame_delta_qp), AV_OPT_TYPE_INT, { .i64 = 4 }, -10, 10, VE }, +- { "bf_ref", "Enable Reference to B-Frames", OFFSET(b_frame_ref), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, +- { "bf_ref_delta_qp","Reference B-Picture Delta QP", OFFSET(ref_b_frame_delta_qp), AV_OPT_TYPE_INT, { .i64 = 4 }, -10, 10, VE }, +- +- /// Intra-Refresh +- { "intra_refresh_mb","Intra Refresh MBs Number Per Slot in Macroblocks", OFFSET(intra_refresh_mb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, +- +- /// coder +- { "coder", "Coding Type", OFFSET(coding_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, AMF_VIDEO_ENCODER_UNDEFINED, AMF_VIDEO_ENCODER_CALV, VE, "coder" }, +- { "auto", "Automatic", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, 0, 0, VE, "coder" }, +- { "cavlc", "Context Adaptive Variable-Length Coding", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_CALV }, 0, 0, VE, "coder" }, +- { "cabac", "Context Adaptive Binary Arithmetic Coding", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_CABAC }, 0, 0, VE, "coder" }, +- +- { "me_half_pel", "Enable ME Half Pixel", OFFSET(me_half_pel), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, +- { "me_quarter_pel", "Enable ME Quarter Pixel", OFFSET(me_quarter_pel),AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, ++static const enum AVPixelFormat ff_amfenc_h264_pix_fmts[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++#if CONFIG_D3D11VA ++ AV_PIX_FMT_D3D11, ++#endif ++#if CONFIG_DXVA2 ++ AV_PIX_FMT_DXVA2_VLD, ++#endif ++ AV_PIX_FMT_NONE ++}; + +- { "aud", "Inserts AU Delimiter NAL unit", OFFSET(aud) ,AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++static const AVOption options[] = { ++ { "usage", "Encoder Usage", OFFSET(usage), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_USAGE_TRANSCODING }, AMF_VIDEO_ENCODER_USAGE_TRANSCODING, AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY_HIGH_QUALITY, VE, "usage" }, ++ ENUM("transcoding", "Transcoding, video editing", AMF_VIDEO_ENCODER_USAGE_TRANSCODING, "usage"), ++ ENUM("ultralowlatency", "Video game streaming", AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY, "usage"), ++ ENUM("lowlatency", "Video collaboration, RDP", AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY, "usage"), ++ ENUM("webcam", "Video conferencing", AMF_VIDEO_ENCODER_USAGE_WEBCAM, "usage"), ++ ENUM("highquality", "High-quality encoding", AMF_VIDEO_ENCODER_USAGE_HIGH_QUALITY, "usage"), ++ ENUM("llhighquality", "High-quality encoding (low latency)", AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY_HIGH_QUALITY, "usage"), ++ ++ { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_PROFILE_MAIN }, AMF_VIDEO_ENCODER_PROFILE_BASELINE, AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH, VE, "profile" }, ++ ENUM("main", "", AMF_VIDEO_ENCODER_PROFILE_MAIN, "profile"), ++ ENUM("high", "", AMF_VIDEO_ENCODER_PROFILE_HIGH, "profile"), ++ ENUM("constrained_baseline", "", AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_BASELINE, "profile"), ++ ENUM("constrained_high", "", AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH, "profile"), ++ ++ { "level", "Profile Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 62, VE, "level" }, ++ ENUM("auto", "", 0, "level"), ++ ENUM("1.0", "", 10, "level"), ++ ENUM("1.1", "", 11, "level"), ++ ENUM("1.2", "", 12, "level"), ++ ENUM("1.3", "", 13, "level"), ++ ENUM("2.0", "", 20, "level"), ++ ENUM("2.1", "", 21, "level"), ++ ENUM("2.2", "", 22, "level"), ++ ENUM("3.0", "", 30, "level"), ++ ENUM("3.1", "", 31, "level"), ++ ENUM("3.2", "", 32, "level"), ++ ENUM("4.0", "", 40, "level"), ++ ENUM("4.1", "", 41, "level"), ++ ENUM("4.2", "", 42, "level"), ++ ENUM("5.0", "", 50, "level"), ++ ENUM("5.1", "", 51, "level"), ++ ENUM("5.2", "", 52, "level"), ++ ENUM("6.0", "", 60, "level"), ++ ENUM("6.1", "", 61, "level"), ++ ENUM("6.2", "", 62, "level"), ++ ++ { "quality", "Quality Preset", OFFSET(quality), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED, AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY, VE, "quality" }, ++ ENUM("speed", "Prefer Speed", AMF_VIDEO_ENCODER_QUALITY_PRESET_SPEED, "quality"), ++ ENUM("balanced", "Balanced", AMF_VIDEO_ENCODER_QUALITY_PRESET_BALANCED, "quality"), ++ ENUM("quality", "Prefer Quality", AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY, "quality"), ++ ++ { "rc", "Rate Control Method", OFFSET(rate_control_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR, VE, "rc" }, ++ ENUM("cqp", "Constant Quantization Parameter", AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP, "rc"), ++ ENUM("cbr", "Constant Bitrate", AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR, "rc"), ++ ENUM("vbr_peak", "Peak Constrained Variable Bitrate", AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR, "rc"), ++ ENUM("vbr_latency", "Latency Constrained Variable Bitrate", AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR, "rc"), ++ ENUM("qvbr", "Quality-defined Variable Bitrate", AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR, "rc"), ++ ++ { "preanalysis", "Enable Pre-Encode/Analysis for Rate Control (2-Pass)", OFFSET(pre_encode), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "vbaq", "Enable VBAQ", OFFSET(enable_vbaq), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "hmqb", "Enable High Motion Quality Boost", OFFSET(enable_hmqb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "enforce_hrd", "Enforce HRD", OFFSET(enforce_hrd), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "filler_data", "Filler Data Enable", OFFSET(filler_data), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "frame_skipping", "Rate Control Based Frame Skip", OFFSET(skip_frame), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "qvbr_level", "Quality level for QVBR rate control", OFFSET(qvbr_level), AV_OPT_TYPE_INT, { .i64 = 23 }, 1, 51, VE }, ++ { "qp_i", "Quantization Parameter for I-Frame", OFFSET(qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "qp_p", "Quantization Parameter for P-Frame", OFFSET(qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "qp_b", "Quantization Parameter for B-Frame", OFFSET(qp_b), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "max_au_size", "Maximum Access Unit Size for rate control (in bits)", OFFSET(max_au_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, ++ { "header_spacing", "Header Insertion Spacing", OFFSET(header_spacing), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, ++ { "bf_delta_qp", "B-Picture Delta QP", OFFSET(b_frame_delta_qp), AV_OPT_TYPE_INT, { .i64 = 4 }, -10, 10, VE }, ++ { "bf_ref", "Enable Reference to B-Frames", OFFSET(b_frame_ref), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, ++ { "bf_ref_delta_qp", "Reference B-Picture Delta QP", OFFSET(ref_b_frame_delta_qp), AV_OPT_TYPE_INT, { .i64 = 4 }, -10, 10, VE }, ++ { "intra_refresh_mb", "Intra Refresh MBs Number Per Slot in Macroblocks", OFFSET(intra_refresh_mb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, ++ ++ { "coder", "Coding Type", OFFSET(coding_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_UNDEFINED }, AMF_VIDEO_ENCODER_UNDEFINED, AMF_VIDEO_ENCODER_CALV, VE, "coder" }, ++ ENUM("auto", "Automatic", AMF_VIDEO_ENCODER_UNDEFINED, "coder"), ++ ENUM("cavlc", "Context Adaptive Variable-Length Coding", AMF_VIDEO_ENCODER_CALV, "coder"), ++ ENUM("cabac", "Context Adaptive Binary Arithmetic Coding", AMF_VIDEO_ENCODER_CABAC, "coder"), + +- { "log_to_dbg", "Enable AMF logging to debug output", OFFSET(log_to_dbg) , AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "me_half_pel", "Enable ME Half Pixel", OFFSET(me_half_pel), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, ++ { "me_quarter_pel", "Enable ME Quarter Pixel", OFFSET(me_quarter_pel), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, + ++ { "log_to_dbg", "Enable AMF logging to debug output", OFFSET(log_to_dbg), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, + { NULL } + }; + +@@ -128,7 +119,7 @@ static av_cold int amf_encode_init_h264( + { + int ret = 0; + AMF_RESULT res = AMF_OK; +- AmfContext *ctx = avctx->priv_data; ++ AMFEncContext *ctx = avctx->priv_data; + AMFVariantStruct var = { 0 }; + amf_int64 profile = 0; + amf_int64 profile_level = 0; +@@ -136,13 +127,13 @@ static av_cold int amf_encode_init_h264( + AMFGuid guid; + AMFRate framerate; + AMFSize framesize = AMFConstructSize(avctx->width, avctx->height); ++ int probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN; + int deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0; + +- if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { ++ if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den); +- } else { ++ else + framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num * avctx->ticks_per_frame); +- } + + if ((ret = ff_amf_encode_init(avctx)) != 0) + return ret; +@@ -171,62 +162,84 @@ static av_cold int amf_encode_init_h264( + profile = AMF_VIDEO_ENCODER_PROFILE_CONSTRAINED_HIGH; + break; + } +- if (profile == 0) { ++ if (profile == 0) + profile = ctx->profile; +- } + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE, profile); + + profile_level = avctx->level; +- if (profile_level == FF_LEVEL_UNKNOWN) { ++ if (profile_level == FF_LEVEL_UNKNOWN) + profile_level = ctx->level; +- } +- if (profile_level != 0) { ++ if (profile_level != 0) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PROFILE_LEVEL, profile_level); +- } + + // Maximum Reference Frames +- if (avctx->refs != -1) { ++ if (avctx->refs != -1) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_NUM_REFRAMES, avctx->refs); +- } + if (avctx->sample_aspect_ratio.den && avctx->sample_aspect_ratio.num) { + AMFRatio ratio = AMFConstructRatio(avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den); + AMF_ASSIGN_PROPERTY_RATIO(res, ctx->encoder, AMF_VIDEO_ENCODER_ASPECT_RATIO, ratio); + } + +- /// Color Range (Partial/TV/MPEG or Full/PC/JPEG) +- if (avctx->color_range == AVCOL_RANGE_JPEG) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 1); ++ // Auto detect rate control method ++ if (ctx->qp_i != -1 || ctx->qp_p != -1 || ctx->qp_b != -1) { ++ probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP; ++ } else if (avctx->rc_max_rate > 0 ) { ++ probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; ++ } else { ++ probed_rc_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR; + } + +- // autodetect rate control method + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_UNKNOWN) { +- if (ctx->qp_i != -1 || ctx->qp_p != -1 || ctx->qp_b != -1) { +- ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n"); +- } else if (avctx->rc_max_rate > 0 ) { +- ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n"); +- } else { +- ctx->rate_control_mode = AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n"); ++ switch (probed_rc_mode) { ++ case AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP: ++ ctx->rate_control_mode = probed_rc_mode; ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CQP\n"); ++ break; ++ case AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR: ++ ctx->rate_control_mode = probed_rc_mode; ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to Peak VBR\n"); ++ break; ++ case AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR: ++ ctx->rate_control_mode = probed_rc_mode; ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CBR\n"); ++ break; + } + } + ++ // Pre-Encode/Two-Pass(pre-encode assisted rate control) + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_PREANALYSIS_ENABLE, AMF_VIDEO_ENCODER_PREENCODE_DISABLED); +- if (ctx->preanalysis) +- av_log(ctx, AV_LOG_WARNING, "Pre-Analysis is not supported by cqp Rate Control Method, automatically disabled\n"); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PREENCODE_ENABLE, AMF_VIDEO_ENCODER_PREENCODE_DISABLED); ++ if (ctx->pre_encode) { ++ ctx->pre_encode = 0; ++ av_log(ctx, AV_LOG_WARNING, "Pre-Encode is not supported by CQP rate control method, automatically disabled\n"); ++ } + } else { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_PREANALYSIS_ENABLE, ctx->preanalysis); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PREENCODE_ENABLE, ctx->pre_encode); + } + ++ // Quality preset + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QUALITY_PRESET, ctx->quality); + + // Dynamic parmaters + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD, ctx->rate_control_mode); ++ if (res != AMF_OK && ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR) { ++ ctx->rate_control_mode = probed_rc_mode; ++ av_log(ctx, AV_LOG_WARNING, "QVBR is not supported by this GPU, switch to auto detect rate control method\n"); ++ } + +- /// VBV Buffer ++ // High Motion Quality Boost mode ++ if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR) { ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HIGH_MOTION_QUALITY_BOOST_ENABLE, 0); ++ if (ctx->enable_hmqb) { ++ ctx->enable_hmqb = 0; ++ av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by QVBR rate control method, automatically disabled\n"); ++ } ++ } else { ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HIGH_MOTION_QUALITY_BOOST_ENABLE, !!ctx->enable_hmqb); ++ } ++ ++ // VBV Buffer + if (avctx->rc_buffer_size != 0) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_VBV_BUFFER_SIZE, avctx->rc_buffer_size); + if (avctx->rc_initial_buffer_occupancy != 0) { +@@ -236,7 +249,8 @@ static av_cold int amf_encode_init_h264( + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_INITIAL_VBV_BUFFER_FULLNESS, amf_buffer_fullness); + } + } +- /// Maximum Access Unit Size ++ ++ // Maximum Access Unit Size + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_AU_SIZE, ctx->max_au_size); + + if (ctx->max_au_size) +@@ -246,7 +260,25 @@ static av_cold int amf_encode_init_h264( + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 0); + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 51); ++ } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_QUALITY_VBR) { ++ if (ctx->qvbr_level) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_QVBR_QUALITY_LEVEL, ctx->qvbr_level); ++ } + } else { ++ if (avctx->qmin == -1 && avctx->qmax == -1) { ++ switch (ctx->usage) { ++ case AMF_VIDEO_ENCODER_USAGE_TRANSCONDING: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 18); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 46); ++ break; ++ case AMF_VIDEO_ENCODER_USAGE_ULTRA_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_USAGE_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_USAGE_WEBCAM: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, 22); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MAX_QP, 48); ++ break; ++ } ++ } + if (avctx->qmin != -1) { + int qval = avctx->qmin > 51 ? 51 : avctx->qmin; + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_MIN_QP, qval); +@@ -266,31 +298,50 @@ static av_cold int amf_encode_init_h264( + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_TARGET_BITRATE, avctx->bit_rate); + +- if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR) { ++ if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CBR) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->bit_rate); +- } ++ + if (avctx->rc_max_rate) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_PEAK_BITRATE, avctx->rc_max_rate); + } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) { +- av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); ++ av_log(ctx, AV_LOG_WARNING, "Rate control method is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); ++ } ++ ++ // Color Range (Partial/TV/MPEG or Full/PC/JPEG) ++ if (avctx->color_range == AVCOL_RANGE_JPEG) { ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 1); ++ } else { ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FULL_RANGE_COLOR, 0); + } + ++ // Set output color profile, transfer and primaries ++ if (ctx->out_color_profile > AMF_VIDEO_CONVERTER_COLOR_PROFILE_UNKNOWN) ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_COLOR_PROFILE, ctx->out_color_profile); ++ if (ctx->out_color_trc > AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED) ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_TRANSFER_CHARACTERISTIC, ctx->out_color_trc); ++ if (ctx->out_color_prm > AMF_COLOR_PRIMARIES_UNDEFINED) ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_OUTPUT_COLOR_PRIMARIES, ctx->out_color_prm); ++ + // Initialize Encoder + res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res); + +- // Enforce HRD, Filler Data, VBAQ, Frame Skipping, Deblocking Filter ++ // Enforce HRD, Filler Data, Frame Skipping, Deblocking Filter + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENFORCE_HRD, !!ctx->enforce_hrd); + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_FILLER_DATA_ENABLE, !!ctx->filler_data); + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_RATE_CONTROL_SKIP_FRAME_ENABLE, !!ctx->skip_frame); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_DE_BLOCKING_FILTER, !!deblocking_filter); ++ ++ // VBAQ + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_RATE_CONTROL_METHOD_CONSTANT_QP) { + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, 0); +- if (ctx->enable_vbaq) ++ if (ctx->enable_vbaq) { ++ ctx->enable_vbaq = 0; + av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n"); ++ } + } else { + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_ENABLE_VBAQ, !!ctx->enable_vbaq); + } +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_DE_BLOCKING_FILTER, !!deblocking_filter); + + // B-Frames + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_B_PIC_PATTERN, avctx->max_b_frames); +@@ -338,9 +389,8 @@ static av_cold int amf_encode_init_h264( + guid = IID_AMFBuffer(); + + res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface +- if (res != AMF_OK) { ++ if (res != AMF_OK) + var.pInterface->pVtbl->Release(var.pInterface); +- } + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res); + + avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer); +@@ -359,15 +409,15 @@ static av_cold int amf_encode_init_h264( + } + + static const AVCodecDefault defaults[] = { +- { "refs", "-1" }, +- { "aspect", "0" }, +- { "qmin", "-1" }, +- { "qmax", "-1" }, +- { "b", "2M" }, +- { "g", "250" }, +- { "slices", "1" }, +- { "flags", "+loop"}, +- { NULL }, ++ { "refs", "-1" }, ++ { "aspect", "0" }, ++ { "qmin", "-1" }, ++ { "qmax", "-1" }, ++ { "b", "2M" }, ++ { "g", "250" }, ++ { "slices", "1" }, ++ { "flags", "+loop" }, ++ { NULL }, + }; + + static const AVClass h264_amf_class = { +@@ -385,13 +435,13 @@ AVCodec ff_h264_amf_encoder = { + .init = amf_encode_init_h264, + .receive_packet = ff_amf_receive_packet, + .close = ff_amf_encode_close, +- .priv_data_size = sizeof(AmfContext), ++ .priv_data_size = sizeof(AMFEncContext), + .priv_class = &h264_amf_class, + .defaults = defaults, + .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, +- .pix_fmts = ff_amf_pix_fmts, ++ .pix_fmts = ff_amfenc_h264_pix_fmts, + .wrapper_name = "amf", + .hw_configs = ff_amfenc_hw_configs, + }; +Index: jellyfin-ffmpeg/libavcodec/amfenc_hevc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/amfenc_hevc.c ++++ jellyfin-ffmpeg/libavcodec/amfenc_hevc.c +@@ -21,73 +21,91 @@ + #include "amfenc.h" + #include "internal.h" + +-#define OFFSET(x) offsetof(AmfContext, x) ++#define OFFSET(x) offsetof(AMFEncContext, x) + #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM +-static const AVOption options[] = { +- { "usage", "Set the encoding usage", OFFSET(usage), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING }, AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING, AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM, VE, "usage" }, +- { "transcoding", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING }, 0, 0, VE, "usage" }, +- { "ultralowlatency","", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY }, 0, 0, VE, "usage" }, +- { "lowlatency", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY }, 0, 0, VE, "usage" }, +- { "webcam", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM }, 0, 0, VE, "usage" }, +- +- { "profile", "Set the profile (default main)", OFFSET(profile), AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, VE, "profile" }, +- { "main", "", 0, AV_OPT_TYPE_CONST,{ .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" }, +- +- { "profile_tier", "Set the profile tier (default main)", OFFSET(tier), AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, AMF_VIDEO_ENCODER_HEVC_TIER_MAIN, AMF_VIDEO_ENCODER_HEVC_TIER_HIGH, VE, "tier" }, +- { "main", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, 0, 0, VE, "tier" }, +- { "high", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_HIGH }, 0, 0, VE, "tier" }, +- +- { "level", "Set the encoding level (default auto)", OFFSET(level), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, AMF_LEVEL_6_2, VE, "level" }, +- { "auto", "", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, VE, "level" }, +- { "1.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_1 }, 0, 0, VE, "level" }, +- { "2.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_2 }, 0, 0, VE, "level" }, +- { "2.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_2_1 }, 0, 0, VE, "level" }, +- { "3.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_3 }, 0, 0, VE, "level" }, +- { "3.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_3_1 }, 0, 0, VE, "level" }, +- { "4.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_4 }, 0, 0, VE, "level" }, +- { "4.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_4_1 }, 0, 0, VE, "level" }, +- { "5.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5 }, 0, 0, VE, "level" }, +- { "5.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5_1 }, 0, 0, VE, "level" }, +- { "5.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_5_2 }, 0, 0, VE, "level" }, +- { "6.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6 }, 0, 0, VE, "level" }, +- { "6.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6_1 }, 0, 0, VE, "level" }, +- { "6.2", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_LEVEL_6_2 }, 0, 0, VE, "level" }, +- +- { "quality", "Set the encoding quality", OFFSET(quality), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED, VE, "quality" }, +- { "balanced", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_BALANCED }, 0, 0, VE, "quality" }, +- { "speed", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED }, 0, 0, VE, "quality" }, +- { "quality", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY }, 0, 0, VE, "quality" }, +- +- { "rc", "Set the rate control mode", OFFSET(rate_control_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR, VE, "rc" }, +- { "cqp", "Constant Quantization Parameter", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP }, 0, 0, VE, "rc" }, +- { "cbr", "Constant Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR }, 0, 0, VE, "rc" }, +- { "vbr_peak", "Peak Contrained Variable Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR }, 0, 0, VE, "rc" }, +- { "vbr_latency", "Latency Constrained Variable Bitrate", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR }, 0, 0, VE, "rc" }, +- +- { "header_insertion_mode", "Set header insertion mode", OFFSET(header_insertion_mode), AV_OPT_TYPE_INT,{ .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE }, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED, VE, "hdrmode" }, +- { "none", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE }, 0, 0, VE, "hdrmode" }, +- { "gop", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_GOP_ALIGNED }, 0, 0, VE, "hdrmode" }, +- { "idr", "", 0, AV_OPT_TYPE_CONST, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED }, 0, 0, VE, "hdrmode" }, +- +- { "gops_per_idr", "GOPs per IDR 0-no IDR will be inserted", OFFSET(gops_per_idr), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, INT_MAX, VE }, +- { "preanalysis", "Enable preanalysis", OFFSET(preanalysis), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE}, +- { "vbaq", "Enable VBAQ", OFFSET(enable_vbaq), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE}, +- { "enforce_hrd", "Enforce HRD", OFFSET(enforce_hrd), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE}, +- { "filler_data", "Filler Data Enable", OFFSET(filler_data), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE}, +- { "max_au_size", "Maximum Access Unit Size for rate control (in bits)", OFFSET(max_au_size), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, VE}, +- { "min_qp_i", "min quantization parameter for I-frame", OFFSET(min_qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "max_qp_i", "max quantization parameter for I-frame", OFFSET(max_qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "min_qp_p", "min quantization parameter for P-frame", OFFSET(min_qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "max_qp_p", "max quantization parameter for P-frame", OFFSET(max_qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "qp_p", "quantization parameter for P-frame", OFFSET(qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "qp_i", "quantization parameter for I-frame", OFFSET(qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, +- { "skip_frame", "Rate Control Based Frame Skip", OFFSET(skip_frame), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, +- { "me_half_pel", "Enable ME Half Pixel", OFFSET(me_half_pel), AV_OPT_TYPE_BOOL,{ .i64 = 1 }, 0, 1, VE }, +- { "me_quarter_pel", "Enable ME Quarter Pixel ", OFFSET(me_quarter_pel),AV_OPT_TYPE_BOOL,{ .i64 = 1 }, 0, 1, VE }, ++#define ENUM(a, b, c, d) { a, b, 0, AV_OPT_TYPE_CONST, { .i64 = c }, 0, 0, VE, d } ++ ++static const enum AVPixelFormat ff_amfenc_hevc_pix_fmts[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_P010, ++#if CONFIG_D3D11VA ++ AV_PIX_FMT_D3D11, ++#endif ++#if CONFIG_DXVA2 ++ AV_PIX_FMT_DXVA2_VLD, ++#endif ++ AV_PIX_FMT_NONE ++}; + +- { "aud", "Inserts AU Delimiter NAL unit", OFFSET(aud) ,AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, ++static const AVOption options[] = { ++ { "usage", "Encoder Usage", OFFSET(usage), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING }, AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING, AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY_HIGH_QUALITY, VE, "usage" }, ++ ENUM("transcoding", "Transcoding, video editing", AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCODING, "usage"), ++ ENUM("ultralowlatency", "Video game streaming", AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY, "usage"), ++ ENUM("lowlatency", "Video collaboration, RDP", AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY, "usage"), ++ ENUM("webcam", "Video conferencing", AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM, "usage"), ++ ENUM("highquality", "High-quality encoding", AMF_VIDEO_ENCODER_HEVC_USAGE_HIGH_QUALITY, "usage"), ++ ENUM("llhighquality", "High-quality encoding (low latency)", AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY_HIGH_QUALITY, "usage"), ++ ++ { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN }, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN_10, VE, "profile" }, ++ ENUM("main", "", AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN, "profile"), ++ ENUM("main10", "", AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN_10, "profile"), ++ ++ ++ { "profile_tier", "Profile Tier", OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_TIER_MAIN }, AMF_VIDEO_ENCODER_HEVC_TIER_MAIN, AMF_VIDEO_ENCODER_HEVC_TIER_HIGH, VE, "tier" }, ++ ENUM("main", "", AMF_VIDEO_ENCODER_HEVC_TIER_MAIN, "tier"), ++ ENUM("high", "", AMF_VIDEO_ENCODER_HEVC_TIER_HIGH, "tier"), ++ ++ { "level", "Profile Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, AMF_LEVEL_6_2, VE, "level" }, ++ ENUM("auto", "", 0, "level"), ++ ENUM("1.0", "", AMF_LEVEL_1, "level"), ++ ENUM("2.0", "", AMF_LEVEL_2, "level"), ++ ENUM("2.1", "", AMF_LEVEL_2_1, "level"), ++ ENUM("3.0", "", AMF_LEVEL_3, "level"), ++ ENUM("3.1", "", AMF_LEVEL_3_1, "level"), ++ ENUM("4.0", "", AMF_LEVEL_4, "level"), ++ ENUM("4.1", "", AMF_LEVEL_4_1, "level"), ++ ENUM("5.0", "", AMF_LEVEL_5, "level"), ++ ENUM("5.1", "", AMF_LEVEL_5_1, "level"), ++ ENUM("5.2", "", AMF_LEVEL_5_2, "level"), ++ ENUM("6.0", "", AMF_LEVEL_6, "level"), ++ ENUM("6.1", "", AMF_LEVEL_6_1, "level"), ++ ENUM("6.2", "", AMF_LEVEL_6_2, "level"), ++ ++ { "quality", "Quality Preset", OFFSET(quality), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED }, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED, VE, "quality" }, ++ ENUM("speed", "Prefer Speed", AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_SPEED, "quality"), ++ ENUM("balanced", "Balanced", AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_BALANCED, "quality"), ++ ENUM("quality", "Prefer Quality", AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_QUALITY, "quality"), ++ ++ { "rc", "Rate Control Method", OFFSET(rate_control_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN }, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR, VE, "rc" }, ++ ENUM("cqp", "Constant Quantization Parameter", AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP, "rc"), ++ ENUM("cbr", "Constant Bitrate", AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR, "rc"), ++ ENUM("vbr_peak", "Peak Contrained Variable Bitrate", AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR, "rc"), ++ ENUM("vbr_latency", "Latency Constrained Variable Bitrate", AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR, "rc"), ++ ++ { "header_insertion_mode", "Set header insertion mode", OFFSET(header_insertion_mode), AV_OPT_TYPE_INT, { .i64 = AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE }, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED, VE, "hdrmode" }, ++ ENUM("none", "", AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_NONE, "hdrmode"), ++ ENUM("gop", "", AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_GOP_ALIGNED, "hdrmode"), ++ ENUM("idr", "", AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE_IDR_ALIGNED, "hdrmode"), ++ ++ { "gops_per_idr", "GOPs per IDR 0-no IDR will be inserted", OFFSET(gops_per_idr), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, INT_MAX, VE }, ++ { "preanalysis", "Enable Pre-Encode/Analysis for rate rontrol (2-Pass)", OFFSET(pre_encode), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "vbaq", "Enable VBAQ", OFFSET(enable_vbaq), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "hmqb", "Enable High Motion Quality Boost", OFFSET(enable_hmqb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "enforce_hrd", "Enforce HRD", OFFSET(enforce_hrd), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "filler_data", "Filler Data Enable", OFFSET(filler_data), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "max_au_size", "Maximum Access Unit Size for rate control (in bits)", OFFSET(max_au_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE}, ++ { "min_qp_i", "Min Quantization Parameter for I-frame", OFFSET(min_qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "max_qp_i", "Max Quantization Parameter for I-frame", OFFSET(max_qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "min_qp_p", "Min Quantization Parameter for P-frame", OFFSET(min_qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "max_qp_p", "Max Quantization Parameter for P-frame", OFFSET(max_qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "qp_p", "Quantization Parameter for P-frame", OFFSET(qp_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "qp_i", "Quantization Parameter for I-frame", OFFSET(qp_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 51, VE }, ++ { "skip_frame", "Rate Control Based Frame Skip", OFFSET(skip_frame), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, ++ { "me_half_pel", "Enable ME Half Pixel", OFFSET(me_half_pel), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, ++ { "me_quarter_pel", "Enable ME Quarter Pixel", OFFSET(me_quarter_pel), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE }, + +- { "log_to_dbg", "Enable AMF logging to debug output", OFFSET(log_to_dbg), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, ++ { "log_to_dbg", "Enable AMF logging to debug output", OFFSET(log_to_dbg), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, + { NULL } + }; + +@@ -95,8 +113,8 @@ static av_cold int amf_encode_init_hevc( + { + int ret = 0; + AMF_RESULT res = AMF_OK; +- AmfContext *ctx = avctx->priv_data; +- AMFVariantStruct var = {0}; ++ AMFEncContext *ctx = avctx->priv_data; ++ AMFVariantStruct var = { 0 }; + amf_int64 profile = 0; + amf_int64 profile_level = 0; + AMFBuffer *buffer; +@@ -105,16 +123,15 @@ static av_cold int amf_encode_init_hevc( + AMFSize framesize = AMFConstructSize(avctx->width, avctx->height); + int deblocking_filter = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0; + +- if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { ++ if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = AMFConstructRate(avctx->framerate.num, avctx->framerate.den); +- } else { ++ else + framerate = AMFConstructRate(avctx->time_base.den, avctx->time_base.num * avctx->ticks_per_frame); +- } + + if ((ret = ff_amf_encode_init(avctx)) < 0) + return ret; + +- // init static parameters ++ // Static parameters + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_USAGE, ctx->usage); + + AMF_ASSIGN_PROPERTY_SIZE(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_FRAMESIZE, framesize); +@@ -125,28 +142,28 @@ static av_cold int amf_encode_init_hevc( + case FF_PROFILE_HEVC_MAIN: + profile = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN; + break; ++ case FF_PROFILE_HEVC_MAIN_10: ++ profile = AMF_VIDEO_ENCODER_HEVC_PROFILE_MAIN_10; ++ break; + default: + break; + } +- if (profile == 0) { ++ if (profile == 0) + profile = ctx->profile; +- } + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE, profile); + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_TIER, ctx->tier); + + profile_level = avctx->level; +- if (profile_level == FF_LEVEL_UNKNOWN) { ++ if (profile_level == FF_LEVEL_UNKNOWN) + profile_level = ctx->level; +- } +- if (profile_level != 0) { ++ if (profile_level != 0) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PROFILE_LEVEL, profile_level); +- } ++ + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET, ctx->quality); + // Maximum Reference Frames +- if (avctx->refs != -1) { ++ if (avctx->refs != -1) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_NUM_REFRAMES, avctx->refs); +- } + // Aspect Ratio + if (avctx->sample_aspect_ratio.den && avctx->sample_aspect_ratio.num) { + AMFRatio ratio = AMFConstructRatio(avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den); +@@ -156,30 +173,28 @@ static av_cold int amf_encode_init_hevc( + // Picture control properties + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NUM_GOPS_PER_IDR, ctx->gops_per_idr); + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_GOP_SIZE, avctx->gop_size); +- if (avctx->slices > 1) { ++ if (avctx->slices > 1) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_SLICES_PER_FRAME, avctx->slices); +- } + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_DE_BLOCKING_FILTER_DISABLE, deblocking_filter); + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HEADER_INSERTION_MODE, ctx->header_insertion_mode); + +- // Rate control +- // autodetect rate control method ++ // Rate control properties ++ // Auto detect rate control method + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_UNKNOWN) { + if (ctx->min_qp_i != -1 || ctx->max_qp_i != -1 || + ctx->min_qp_p != -1 || ctx->max_qp_p != -1 || + ctx->qp_i !=-1 || ctx->qp_p != -1) { + ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CQP\n"); ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CQP\n"); + } else if (avctx->rc_max_rate > 0) { + ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to Peak VBR\n"); ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to Peak VBR\n"); + } else { + ctx->rate_control_mode = AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR; +- av_log(ctx, AV_LOG_DEBUG, "Rate control turned to CBR\n"); ++ av_log(ctx, AV_LOG_DEBUG, "Rate control method turned to CBR\n"); + } + } + +- + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD, ctx->rate_control_mode); + if (avctx->rc_buffer_size) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_VBV_BUFFER_SIZE, avctx->rc_buffer_size); +@@ -191,20 +206,37 @@ static av_cold int amf_encode_init_hevc( + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_INITIAL_VBV_BUFFER_FULLNESS, amf_buffer_fullness); + } + } +- // Pre-Pass, Pre-Analysis, Two-Pass +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_PREANALYSIS_ENABLE, ctx->preanalysis); + ++ // Pre-Encode/Two-Pass(pre-encode assisted rate control) ++ if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PREENCODE_ENABLE, 0); ++ if (ctx->pre_encode) { ++ ctx->pre_encode = 0; ++ av_log(ctx, AV_LOG_WARNING, "Pre-Encode is not supported by CQP rate control method, automatically disabled\n"); ++ } ++ } else { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PREENCODE_ENABLE, ctx->pre_encode); ++ } ++ ++ // VBAQ + if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) { +- AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, false); +- if (ctx->enable_vbaq) +- av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by cqp Rate Control Method, automatically disabled\n"); ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, 0); ++ if (ctx->enable_vbaq) { ++ ctx->enable_vbaq = 0; ++ av_log(ctx, AV_LOG_WARNING, "VBAQ is not supported by CQP rate control method, automatically disabled\n"); ++ } + } else { + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENABLE_VBAQ, !!ctx->enable_vbaq); + } ++ ++ // High Motion Quality Boost mode ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_HIGH_MOTION_QUALITY_BOOST_ENABLE, !!ctx->enable_hmqb); ++ ++ // Motion estimation + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_HALF_PIXEL, ctx->me_half_pel); + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MOTION_QUARTERPIXEL, ctx->me_quarter_pel); + +- // init dynamic rate control params ++ // Dynamic rate control params + if (ctx->max_au_size) + ctx->enforce_hrd = 1; + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_ENFORCE_HRD, ctx->enforce_hrd); +@@ -212,57 +244,99 @@ static av_cold int amf_encode_init_hevc( + + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_TARGET_BITRATE, avctx->bit_rate); + +- if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR) { ++ if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CBR) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PEAK_BITRATE, avctx->bit_rate); +- } + if (avctx->rc_max_rate) { + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_PEAK_BITRATE, avctx->rc_max_rate); + } else if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR) { +- av_log(ctx, AV_LOG_WARNING, "rate control mode is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); ++ av_log(ctx, AV_LOG_WARNING, "Rate control method is PEAK_CONSTRAINED_VBR but rc_max_rate is not set\n"); ++ } ++ ++ // Color Range (Studio/Partial/TV/MPEG or Full/PC/JPEG) ++ if (avctx->color_range == AVCOL_RANGE_JPEG) { ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE_FULL); ++ } else { ++ AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE, AMF_VIDEO_ENCODER_HEVC_NOMINAL_RANGE_STUDIO); + } + +- // init encoder ++ // Output color profile, transfer and primaries ++ if (ctx->out_color_profile > AMF_VIDEO_CONVERTER_COLOR_PROFILE_UNKNOWN) ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_COLOR_PROFILE, ctx->out_color_profile); ++ if (ctx->out_color_trc > AMF_COLOR_TRANSFER_CHARACTERISTIC_UNDEFINED) ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_TRANSFER_CHARACTERISTIC, ctx->out_color_trc); ++ if (ctx->out_color_prm > AMF_COLOR_PRIMARIES_UNDEFINED) ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_OUTPUT_COLOR_PRIMARIES, ctx->out_color_prm); ++ ++ // Set 10-bit encoding if possible ++ if (ctx->bit_depth == 10) ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_COLOR_BIT_DEPTH, AMF_COLOR_BIT_DEPTH_10); ++ ++ // Init encoder + res = ctx->encoder->pVtbl->Init(ctx->encoder, ctx->format, avctx->width, avctx->height); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "encoder->Init() failed with error %d\n", res); + +- // init dynamic picture control params ++ // Dynamic picture control params + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_AU_SIZE, ctx->max_au_size); + +- if (ctx->min_qp_i != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, ctx->min_qp_i); +- } else if (avctx->qmin != -1) { +- int qval = avctx->qmin > 51 ? 51 : avctx->qmin; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, qval); +- } +- if (ctx->max_qp_i != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, ctx->max_qp_i); +- } else if (avctx->qmax != -1) { +- int qval = avctx->qmax > 51 ? 51 : avctx->qmax; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, qval); +- } +- if (ctx->min_qp_p != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, ctx->min_qp_p); +- } else if (avctx->qmin != -1) { +- int qval = avctx->qmin > 51 ? 51 : avctx->qmin; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, qval); +- } +- if (ctx->max_qp_p != -1) { +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, ctx->max_qp_p); +- } else if (avctx->qmax != -1) { +- int qval = avctx->qmax > 51 ? 51 : avctx->qmax; +- AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, qval); ++ // QP Minimum / Maximum ++ if (ctx->rate_control_mode == AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_METHOD_CONSTANT_QP) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 0); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 51); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 0); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 51); ++ } else { ++ if (ctx->min_qp_i != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, ctx->min_qp_i); ++ } else if (avctx->qmin != -1) { ++ int qval = avctx->qmin > 51 ? 51 : avctx->qmin; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, qval); ++ } ++ if (ctx->max_qp_i != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, ctx->max_qp_i); ++ } else if (avctx->qmax != -1) { ++ int qval = avctx->qmax > 51 ? 51 : avctx->qmax; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, qval); ++ } ++ if (ctx->min_qp_p != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, ctx->min_qp_p); ++ } else if (avctx->qmin != -1) { ++ int qval = avctx->qmin > 51 ? 51 : avctx->qmin; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, qval); ++ } ++ if (ctx->max_qp_p != -1) { ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, ctx->max_qp_p); ++ } else if (avctx->qmax != -1) { ++ int qval = avctx->qmax > 51 ? 51 : avctx->qmax; ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, qval); ++ } ++ if (ctx->min_qp_i == -1 && ctx->max_qp_i == -1 && ctx->min_qp_p == -1 && ctx->max_qp_p == -1 && ++ avctx->qmin == -1 && avctx->qmax == -1) { ++ switch (ctx->usage) { ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_TRANSCONDING: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 18); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 46); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 18); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 46); ++ break; ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_ULTRA_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_LOW_LATENCY: ++ case AMF_VIDEO_ENCODER_HEVC_USAGE_WEBCAM: ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_I, 22); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_I, 48); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MIN_QP_P, 22); ++ AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_MAX_QP_P, 48); ++ break; ++ } ++ } + } + +- if (ctx->qp_p != -1) { ++ if (ctx->qp_p != -1) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QP_P, ctx->qp_p); +- } +- if (ctx->qp_i != -1) { ++ if (ctx->qp_i != -1) + AMF_ASSIGN_PROPERTY_INT64(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_QP_I, ctx->qp_i); +- } + AMF_ASSIGN_PROPERTY_BOOL(res, ctx->encoder, AMF_VIDEO_ENCODER_HEVC_RATE_CONTROL_SKIP_FRAME_ENABLE, ctx->skip_frame); + +- +- // fill extradata ++ // Fill extradata + res = AMFVariantInit(&var); + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "AMFVariantInit() failed with error %d\n", res); + +@@ -273,9 +347,8 @@ static av_cold int amf_encode_init_hevc( + guid = IID_AMFBuffer(); + + res = var.pInterface->pVtbl->QueryInterface(var.pInterface, &guid, (void**)&buffer); // query for buffer interface +- if (res != AMF_OK) { ++ if (res != AMF_OK) + var.pInterface->pVtbl->Release(var.pInterface); +- } + AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_BUG, "QueryInterface(IID_AMFBuffer) failed with error %d\n", res); + + avctx->extradata_size = (int)buffer->pVtbl->GetSize(buffer); +@@ -292,6 +365,7 @@ static av_cold int amf_encode_init_hevc( + + return 0; + } ++ + static const AVCodecDefault defaults[] = { + { "refs", "-1" }, + { "aspect", "0" }, +@@ -317,13 +391,13 @@ AVCodec ff_hevc_amf_encoder = { + .init = amf_encode_init_hevc, + .receive_packet = ff_amf_receive_packet, + .close = ff_amf_encode_close, +- .priv_data_size = sizeof(AmfContext), ++ .priv_data_size = sizeof(AMFEncContext), + .priv_class = &hevc_amf_class, + .defaults = defaults, + .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, +- .pix_fmts = ff_amf_pix_fmts, ++ .pix_fmts = ff_amfenc_hevc_pix_fmts, + .wrapper_name = "amf", + .hw_configs = ff_amfenc_hw_configs, + }; diff --git a/debian/patches/series b/debian/patches/series index 5f90ee2c53b..2f13511a402 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1,4 @@ 0001-add-fixes-for-segement-muxer.patch 0002-add-cuda-pixfmt-converter-impl.patch 0003-add-cuda-tonemap-impl.patch +0004-add-amf-refactor-and-hevc-10-bit-encoding.patch From 15afff4147ad6453b14cc79cf0a7fb29d34ec35e Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:38:09 +0800 Subject: [PATCH 14/41] add opencl scaler and pixfmt converter impl --- ...ncl-scaler-and-pixfmt-converter-impl.patch | 985 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 986 insertions(+) create mode 100644 debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch diff --git a/debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch b/debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch new file mode 100644 index 00000000000..74486fd0108 --- /dev/null +++ b/debian/patches/0005-add-opencl-scaler-and-pixfmt-converter-impl.patch @@ -0,0 +1,985 @@ +Index: jellyfin-ffmpeg/configure +=================================================================== +--- jellyfin-ffmpeg.orig/configure ++++ jellyfin-ffmpeg/configure +@@ -3619,6 +3619,7 @@ rubberband_filter_deps="librubberband" + sab_filter_deps="gpl swscale" + scale2ref_filter_deps="swscale" + scale_filter_deps="swscale" ++scale_opencl_filter_deps="opencl" + scale_qsv_filter_deps="libmfx" + scdet_filter_select="scene_sad" + select_filter_select="scene_sad" +Index: jellyfin-ffmpeg/libavfilter/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/Makefile ++++ jellyfin-ffmpeg/libavfilter/Makefile +@@ -394,6 +394,7 @@ OBJS-$(CONFIG_SAB_FILTER) + OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o + OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o + OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o ++OBJS-$(CONFIG_SCALE_OPENCL_FILTER) += vf_scale_opencl.o opencl.o opencl/scale.o scale_eval.o + OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o + OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o + OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o vulkan.o +Index: jellyfin-ffmpeg/libavfilter/allfilters.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c ++++ jellyfin-ffmpeg/libavfilter/allfilters.c +@@ -376,6 +376,7 @@ extern AVFilter ff_vf_sab; + extern AVFilter ff_vf_scale; + extern AVFilter ff_vf_scale_cuda; + extern AVFilter ff_vf_scale_npp; ++extern AVFilter ff_vf_scale_opencl; + extern AVFilter ff_vf_scale_qsv; + extern AVFilter ff_vf_scale_vaapi; + extern AVFilter ff_vf_scale_vulkan; +Index: jellyfin-ffmpeg/libavfilter/opencl/scale.cl +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/opencl/scale.cl +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (c) 2018 Gabriel Machado ++ * Copyright (c) 2021 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_ADDRESS_CLAMP_TO_EDGE | ++ CLK_FILTER_NEAREST); ++ ++__constant sampler_t sampler2 = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_ADDRESS_NONE | ++ CLK_FILTER_NEAREST); ++ ++#ifdef CONV ++__kernel void conv_yuv(__write_only image2d_t dst1, ++ __read_only image2d_t src1, ++ __write_only image2d_t dst2, ++ __read_only image2d_t src2 ++#ifdef NON_SEMI_PLANAR_OUT ++ ,__write_only image2d_t dst3 ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ ,__read_only image2d_t src3 ++#endif ++ ) ++{ ++ int xi = get_global_id(0); ++ int yi = get_global_id(1); ++ // each work item process four pixels ++ int x = 2 * xi; ++ int y = 2 * yi; ++ ++ if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) { ++ float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; ++ float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; ++ float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; ++ float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; ++#ifdef NON_SEMI_PLANAR_IN ++ float u = read_imagef(src2, sampler, (int2)(xi, yi)).x; ++ float v = read_imagef(src3, sampler, (int2)(xi, yi)).x; ++#else ++ float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; ++ float u = uv.x; ++ float v = uv.y; ++#endif ++ ++ write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x, y + 1), (float4)(y2, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y + 1), (float4)(y3, 0.0f, 0.0f, 1.0f)); ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, (int2)(xi, yi), (float4)(u, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, (int2)(xi, yi), (float4)(v, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, (int2)(xi, yi), (float4)(u, v, 0.0f, 1.0f)); ++#endif ++ } ++} ++#endif ++ ++#ifdef NEIGHBOR ++__kernel void neighbor(__write_only image2d_t dst1, ++ __read_only image2d_t src1, ++ int2 src_size) ++{ ++ int2 dst_pos = { get_global_id(0), get_global_id(1) }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++ int2 read_pos = clamp(src_pos, 0, src_size - 1); ++ float4 c = read_imagef(src1, sampler2, read_pos); ++ write_imagef(dst1, dst_pos, (float4)(c.x, 0.0f, 0.0f, 1.0f)); ++} ++ ++__kernel void neighbor_uv(__write_only image2d_t dst2, ++ __read_only image2d_t src2, ++#ifdef NON_SEMI_PLANAR_OUT ++ __write_only image2d_t dst3, ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ __read_only image2d_t src3, ++#endif ++ int2 src_size) ++{ ++ int2 dst_pos = { get_global_id(0), get_global_id(1) }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++ int2 read_pos = clamp(src_pos, 0, src_size - 1); ++#ifdef NON_SEMI_PLANAR_IN ++ float u = read_imagef(src2, sampler2, read_pos).x; ++ float v = read_imagef(src3, sampler2, read_pos).x; ++#else ++ float2 uv = read_imagef(src2, sampler2, read_pos).xy; ++ float u = uv.x; ++ float v = uv.y; ++#endif ++ ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, dst_pos, (float4)(u, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, dst_pos, (float4)(v, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, dst_pos, (float4)(u, v, 0.0f, 1.0f)); ++#endif ++} ++#endif ++ ++#ifdef SCALE ++__kernel void scale(__write_only image2d_t dst1, ++ __read_only image2d_t src1, ++ __constant float *cx, ++ __constant float *cy, ++ int2 src_size) ++{ ++ int2 dst_pos = { get_global_id(0), get_global_id(1) }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++ int i, j; ++ int filterw2 = filterw >> 1; ++ int filterh2 = filterh >> 1; ++ int2 src_size_edge = src_size - 1; ++ float4 col1 = 0.0f, s1 = 0.0f; ++ ++ #pragma unroll ++ for (i = 0; i < filterh; ++i, s1 = 0.0f) { ++ #pragma unroll ++ for (j = 0; j < filterw; ++j) { ++ int2 read_pos = clamp(src_pos + (int2)(filterw2 - j, filterh2 - i), 0, src_size_edge); ++ float4 c1 = read_imagef(src1, sampler2, read_pos); ++ s1 += c1 * cx[dst_pos.x * filterw + j]; ++ } ++ col1 += s1 * cy[dst_pos.y * filterh + i]; ++ } ++ write_imagef(dst1, dst_pos, (float4)(col1.x, 0.0f, 0.0f, 1.0f)); ++} ++ ++__kernel void scale_uv(__write_only image2d_t dst2, ++ __read_only image2d_t src2, ++#ifdef NON_SEMI_PLANAR_OUT ++ __write_only image2d_t dst3, ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ __read_only image2d_t src3, ++#endif ++ __constant float *cx, ++ __constant float *cy, ++ int2 src_size) ++{ ++ int2 dst_pos = { get_global_id(0), get_global_id(1) }; ++ float2 dst_size = { get_global_size(0), get_global_size(1) }; ++ ++ float2 src_coord = (convert_float2(dst_pos) + 0.5f) * convert_float2(src_size) * native_recip(dst_size); ++ int2 src_pos = convert_int2(floor(src_coord - 0.5f)); ++ ++ int i, j; ++ int filterw2 = filterw >> 1; ++ int filterh2 = filterh >> 1; ++ int2 src_size_edge = src_size - 1; ++ float4 col2 = 0.0f, col3 = 0.0f, s2 = 0.0f, s3 = 0.0f; ++ ++ #pragma unroll ++ for (i = 0; i < filterh; ++i, s2 = s3 = 0.0f) { ++ #pragma unroll ++ for (j = 0; j < filterw; ++j) { ++ int2 read_pos = clamp(src_pos + (int2)(filterw2 - j, filterh2 - i), 0, src_size_edge); ++ float4 c2 = read_imagef(src2, sampler2, read_pos); ++ s2 += c2 * cx[dst_pos.x * filterw + j]; ++#ifdef NON_SEMI_PLANAR_IN ++ float4 c3 = read_imagef(src3, sampler2, read_pos); ++ s3 += c3 * cx[dst_pos.x * filterw + j]; ++#endif ++ } ++ col2 += s2 * cy[dst_pos.y * filterh + i]; ++#ifdef NON_SEMI_PLANAR_IN ++ col3 += s3 * cy[dst_pos.y * filterh + i]; ++#endif ++ } ++ ++#ifdef NON_SEMI_PLANAR_IN ++ float u = col2.x; ++ float v = col3.x; ++#else ++ float u = col2.x; ++ float v = col2.y; ++#endif ++ ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, dst_pos, (float4)(u, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, dst_pos, (float4)(v, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, dst_pos, (float4)(u, v, 0.0f, 1.0f)); ++#endif ++} ++#endif +Index: jellyfin-ffmpeg/libavfilter/opencl_source.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/opencl_source.h ++++ jellyfin-ffmpeg/libavfilter/opencl_source.h +@@ -27,6 +27,7 @@ extern const char *ff_opencl_source_desh + extern const char *ff_opencl_source_neighbor; + extern const char *ff_opencl_source_nlmeans; + extern const char *ff_opencl_source_overlay; ++extern const char *ff_opencl_source_scale; + extern const char *ff_opencl_source_pad; + extern const char *ff_opencl_source_tonemap; + extern const char *ff_opencl_source_transpose; +Index: jellyfin-ffmpeg/libavfilter/vf_scale_opencl.c +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/vf_scale_opencl.c +@@ -0,0 +1,710 @@ ++/* ++ * Copyright (c) 2018 Gabriel Machado ++ * Copyright (c) 2021 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/common.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/mem.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "avfilter.h" ++#include "internal.h" ++#include "opencl.h" ++#include "opencl_source.h" ++#include "scale_eval.h" ++#include "video.h" ++ ++#define OPENCL_SOURCE_NB 2 ++ ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUV420P16, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++}; ++ ++enum filters { ++ F_AREA, ++ F_BICUBIC, ++ F_BILINEAR, ++ F_GAUSSIAN, ++ F_LANCZOS, ++ F_NEIGHBOR, ++ F_SINC, ++ F_SPLINE, ++ F_EXPERIMENTAL ++}; ++ ++static const int filter_radius[] = { ++ [F_AREA] = 1, ++ [F_BICUBIC] = 2, ++ [F_BILINEAR] = 1, ++ [F_GAUSSIAN] = 4, ++ [F_LANCZOS] = 3, ++ [F_NEIGHBOR] = -1, ++ [F_SINC] = 10, ++ [F_SPLINE] = 10, ++ [F_EXPERIMENTAL] = 4 ++}; ++ ++typedef struct ScaleOpenCLContext { ++ OpenCLFilterContext ocf; ++ ++ cl_command_queue command_queue; ++ cl_mem cx, cy; ++ cl_kernel kernel; ++ cl_kernel kernel_uv; ++ const char *kernel_name; ++ const char *kernel_name_uv; ++ ++ char *w_expr, *h_expr; ++ int dst_w, dst_h; ++ int src_w, src_h; ++ int passthrough; ++ int algorithm; ++ int force_original_aspect_ratio; ++ int force_divisible_by; ++ enum AVPixelFormat format; ++ ++ enum AVPixelFormat in_fmt, out_fmt; ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++ int in_planes, out_planes; ++ ++ int filterw, filterh; ++ int initialised; ++} ScaleOpenCLContext; ++ ++static float netravali(float t, float B, float C) ++{ ++ if (t > 2) { ++ return 0; ++ } else { ++ float tt = t * t; ++ float ttt = t * tt; ++ if (t < 1) { ++ return ((12 - 9 * B - 6 * C) * ttt + ++ (-18 + 12 * B + 6 * C) * tt + ++ (6 - 2 * B)) / 6; ++ } else { ++ return ((-B - 6 * C) * ttt + ++ (6 * B + 30 * C) * tt + ++ (-12 * B - 48 * C) * t + ++ (8 * B + 24 * C)) / 6; ++ } ++ } ++} ++ ++static float sinc(float t) ++{ ++ return (t == 0) ? 1.0 : sin(t * M_PI) / (t * M_PI); ++} ++ ++static float lanczos(float t, float a) ++{ ++ return (t < a) ? sinc(t) * sinc(t / a) : 0; ++} ++ ++static double spline(double a, double b, double c, double d, double dist) ++{ ++ if (dist <= 1.0) ++ return ((d * dist + c) * dist + b) * dist + a; ++ else ++ return spline(0.0, ++ b + 2.0 * c + 3.0 * d, ++ c + 3.0 * d, ++ -b - 3.0 * c - 6.0 * d, ++ dist - 1.0); ++} ++ ++static float calc_weight(int algorithm, float ratio, float t) ++{ ++ t = fabs(t); ++ ++ switch (algorithm) { ++ case F_AREA: { ++ float t2 = t - 0.5; ++ if (t2 * ratio < -0.5) ++ return 1; ++ else if (t2 * ratio < 0.5) ++ return -t2 * ratio + 0.5; ++ else ++ return 0; ++ } ++ ++ case F_BICUBIC: { ++ const float B = 0, C = 0.6; ++ return netravali(t, B, C); ++ } ++ ++ case F_BILINEAR: ++ return t < 1 ? (1 - t) : 0; ++ ++ case F_EXPERIMENTAL: { ++ double A = 1.0; ++ double c; ++ ++ if (t < 1.0) ++ c = cos(t * M_PI); ++ else ++ c = -1.0; ++ if (c < 0.0) ++ c = -pow(-c, A); ++ else ++ c = pow(c, A); ++ return c * 0.5 + 0.5; ++ } ++ ++ case F_GAUSSIAN: { ++ const float p = 3.0; ++ return exp2(-p * t * t); ++ } ++ ++ case F_LANCZOS: { ++ return lanczos(t, filter_radius[algorithm]); ++ } ++ ++ case F_NEIGHBOR: ++ return 1; ++ ++ case F_SINC: ++ return sinc(t); ++ ++ case F_SPLINE: { ++ const double p = -2.196152422706632; ++ return spline(1.0, 0.0, p, -p - 1.0, t); ++ } ++ } ++ ++ return 0; ++} ++ ++static int scale_opencl_init(AVFilterContext *avctx) ++{ ++ ScaleOpenCLContext *ctx = avctx->priv; ++ AVBPrint header; ++ const char *opencl_sources[OPENCL_SOURCE_NB]; ++ cl_int cle; ++ int i, j, err; ++ float scalex, scaley; ++ float *cx = NULL, *cy = NULL; ++ ++ av_bprint_init(&header, 512, AV_BPRINT_SIZE_UNLIMITED); ++ ++ if (ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h) { ++ if (ctx->passthrough && ctx->in_fmt == ctx->out_fmt) { ++ ctx->initialised = 1; ++ return 0; ++ } else { ++ av_bprintf(&header, "#define CONV\n"); ++ ctx->kernel_name = "conv_yuv"; ++ } ++ } else if (ctx->algorithm == F_NEIGHBOR) { ++ av_bprintf(&header, "#define NEIGHBOR\n"); ++ ctx->kernel_name = "neighbor"; ++ ctx->kernel_name_uv = "neighbor_uv"; ++ } else { ++ av_bprintf(&header, "#define SCALE\n"); ++ ctx->kernel_name = "scale"; ++ ctx->kernel_name_uv = "scale_uv"; ++ ++ scalex = FFMAX((float)(ctx->src_w / ctx->dst_w), 1); ++ scaley = FFMAX((float)(ctx->src_h / ctx->dst_h), 1); ++ ctx->filterw = ceil(2 * filter_radius[ctx->algorithm] * scalex); ++ ctx->filterh = ceil(2 * filter_radius[ctx->algorithm] * scaley); ++ ++ ctx->filterw = FFMIN(ctx->filterw, ctx->src_w - 2); ++ ctx->filterw = FFMAX(ctx->filterw, 1); ++ ctx->filterh = FFMIN(ctx->filterh, ctx->src_h - 2); ++ ctx->filterh = FFMAX(ctx->filterh, 1); ++ ++ av_bprintf(&header, "#define filterw %d\n", ctx->filterw); ++ av_bprintf(&header, "#define filterh %d\n", ctx->filterh); ++ ++ av_log(avctx, AV_LOG_DEBUG, "Filter size: %dx%d.\n", ctx->filterw, ctx->filterh); ++ ++ cx = av_malloc_array(ctx->dst_w * ctx->filterw, sizeof(cl_float)); ++ cy = av_malloc_array(ctx->dst_h * ctx->filterh, sizeof(cl_float)); ++ ++ if (!cx || !cy) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ for (i = 0; i < ctx->dst_w; ++i) { ++ float s_x = (i + 0.5) * ctx->src_w / ctx->dst_w - 0.5; ++ float t = s_x - floor(s_x); // fract ++ ++ float sum = 0; ++ for (j = 0; j < ctx->filterw; ++j) { ++ int x = ctx->filterw / 2 - j; ++ sum += cx[i * ctx->filterw + j] = calc_weight(ctx->algorithm, ++ scalex, ++ (x - t) / scalex); ++ } ++ ++ for (j = 0; j < ctx->filterw; ++j) ++ cx[i * ctx->filterw + j] /= sum; ++ } ++ ++ for (i = 0; i < ctx->dst_h; ++i) { ++ float s_y = (i + 0.5) * ctx->src_h / ctx->dst_h - 0.5; ++ float t = s_y - floor(s_y); // fract ++ ++ float sum = 0; ++ for (j = 0; j < ctx->filterh; ++j) { ++ int y = ctx->filterh / 2 - j; ++ sum += cy[i * ctx->filterh + j] = calc_weight(ctx->algorithm, ++ scaley, ++ (y - t) / scaley); ++ } ++ ++ for (j = 0; j < ctx->filterh; ++j) ++ cy[i * ctx->filterh + j] /= sum; ++ } ++ ++ ctx->cx = clCreateBuffer(ctx->ocf.hwctx->context, ++ CL_MEM_READ_ONLY | ++ CL_MEM_COPY_HOST_PTR | ++ CL_MEM_HOST_NO_ACCESS, ++ ctx->dst_w * ctx->filterw * sizeof(cl_float), ++ cx, ++ &cle); ++ ++ ctx->cy = clCreateBuffer(ctx->ocf.hwctx->context, ++ CL_MEM_READ_ONLY | ++ CL_MEM_COPY_HOST_PTR | ++ CL_MEM_HOST_NO_ACCESS, ++ ctx->dst_h * ctx->filterh * sizeof(cl_float), ++ cy, ++ &cle); ++ av_free(cx); ++ av_free(cy); ++ if (!ctx->cx || !ctx->cy) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create weights buffer: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ ++ if (ctx->in_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_IN\n"); ++ ++ if (ctx->out_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_OUT\n"); ++ ++ av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); ++ opencl_sources[0] = header.str; ++ opencl_sources[1] = ff_opencl_source_scale; ++ err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB); ++ ++ av_bprint_finalize(&header, NULL); ++ if (err < 0) ++ goto fail; ++ ++ ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, ++ ctx->ocf.hwctx->device_id, ++ 0, &cle); ++ if (!ctx->command_queue) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL command queue: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle); ++ if (!ctx->kernel) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->kernel_name_uv) { ++ ctx->kernel_uv = clCreateKernel(ctx->ocf.program, ctx->kernel_name_uv, &cle); ++ if (!ctx->kernel_uv) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create kernel_uv: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ ++ ctx->initialised = 1; ++ return 0; ++ ++fail: ++ av_bprint_finalize(&header, NULL); ++ if (ctx->command_queue) ++ clReleaseCommandQueue(ctx->command_queue); ++ if (ctx->kernel) ++ clReleaseKernel(ctx->kernel); ++ if (ctx->kernel_uv) ++ clReleaseKernel(ctx->kernel_uv); ++ if (ctx->cx) ++ clReleaseMemObject(ctx->cx); ++ if (ctx->cy) ++ clReleaseMemObject(ctx->cy); ++ if (cx) ++ av_free(cx); ++ if (cy) ++ av_free(cy); ++ return err; ++} ++ ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static int scale_opencl_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *avctx = outlink->src; ++ AVFilterLink *inlink = avctx->inputs[0]; ++ ScaleOpenCLContext *ctx = avctx->priv; ++ AVHWFramesContext *in_frames_ctx; ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ const AVPixFmtDescriptor *in_desc; ++ const AVPixFmtDescriptor *out_desc; ++ int ret; ++ ++ if (!inlink->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ in_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (ctx->format == AV_PIX_FMT_NONE) ? in_format : ctx->format; ++ in_desc = av_pix_fmt_desc_get(in_format); ++ out_desc = av_pix_fmt_desc_get(out_format); ++ ++ if (!format_is_supported(in_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(in_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!format_is_supported(out_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ ++ ctx->in_fmt = in_format; ++ ctx->out_fmt = out_format; ++ ctx->in_desc = in_desc; ++ ctx->out_desc = out_desc; ++ ctx->in_planes = av_pix_fmt_count_planes(ctx->in_fmt); ++ ctx->out_planes = av_pix_fmt_count_planes(ctx->out_fmt); ++ ctx->ocf.output_format = out_format; ++ ++ if ((ret = ff_scale_eval_dimensions(ctx, ++ ctx->w_expr, ctx->h_expr, ++ inlink, outlink, ++ &ctx->dst_w, &ctx->dst_h)) < 0) ++ return ret; ++ ++ ff_scale_adjust_dimensions(inlink, &ctx->dst_w, &ctx->dst_h, ++ ctx->force_original_aspect_ratio, ctx->force_divisible_by); ++ ++ if (((int64_t)(ctx->dst_h * inlink->w)) > INT_MAX || ++ ((int64_t)(ctx->dst_w * inlink->h)) > INT_MAX) ++ av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); ++ ++ ctx->src_w = inlink->w; ++ ctx->src_h = inlink->h; ++ ctx->ocf.output_width = ctx->dst_w; ++ ctx->ocf.output_height = ctx->dst_h; ++ ++ if (ctx->passthrough && ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h && ctx->in_fmt == ctx->out_fmt) { ++ av_buffer_unref(&outlink->hw_frames_ctx); ++ outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); ++ if (!outlink->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ return 0; ++ } else { ++ ctx->passthrough = 0; ++ } ++ ++ ret = ff_opencl_filter_config_output(outlink); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static AVFrame *scale_opencl_get_video_buffer(AVFilterLink *inlink, int w, int h) ++{ ++ ScaleOpenCLContext *ctx = inlink->dst->priv; ++ ++ return ctx->passthrough ? ff_null_get_video_buffer(inlink, w, h) : ++ ff_default_get_video_buffer(inlink, w, h); ++} ++ ++static int scale_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) ++{ ++ AVFilterContext *avctx = inlink->dst; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ ScaleOpenCLContext *ctx = avctx->priv; ++ int x_subsample = 1 << ctx->in_desc->log2_chroma_w; ++ int y_subsample = 1 << ctx->in_desc->log2_chroma_h; ++ AVFrame *output = NULL; ++ size_t global_work[2]; ++ cl_int cle; ++ cl_int2 src_size, uv_size; ++ int err, idx_arg1, idx_arg2; ++ ++ av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(input->format), ++ input->width, input->height, input->pts); ++ ++ if (!input->hw_frames_ctx) ++ return AVERROR(EINVAL); ++ ++ if (!ctx->initialised) { ++ err = scale_opencl_init(avctx); ++ if (err < 0) ++ goto fail; ++ } ++ ++ if (ctx->passthrough && ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h && ctx->in_fmt == ctx->out_fmt) ++ return ff_filter_frame(outlink, input); ++ ++ output = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!output) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ err = av_frame_copy_props(output, input); ++ if (err < 0) ++ goto fail; ++ output->width = outlink->w; ++ output->height = outlink->h; ++ ++ if (!output->data[0] || !input->data[0] || !output->data[1] || !input->data[1]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->out_planes > 2 && !output->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->in_planes > 2 && !input->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &output->data[0]); ++ CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &input->data[0]); ++ ++ if (ctx->src_w == ctx->dst_w && ctx->src_h == ctx->dst_h) { ++ CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_mem, &output->data[1]); ++ CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem, &input->data[1]); ++ ++ idx_arg1 = 4; ++ if (ctx->out_planes > 2) ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg1++, cl_mem, &output->data[2]); ++ if (ctx->in_planes > 2) ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg1++, cl_mem, &input->data[2]); ++ ++ // conv_yuv ++ global_work[0] = output->width / x_subsample; ++ global_work[1] = output->height / y_subsample; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Run kernel %s " ++ "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", ++ ctx->kernel_name, global_work[0], global_work[1]); ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); ++ } else { ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, 0, cl_mem, &output->data[1]); ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, 1, cl_mem, &input->data[1]); ++ ++ idx_arg1 = 2; ++ if (ctx->out_planes > 2) ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &output->data[2]); ++ if (ctx->in_planes > 2) ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &input->data[2]); ++ ++ idx_arg2 = 2; ++ if (ctx->algorithm != F_NEIGHBOR) { ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_mem, &ctx->cx); ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_mem, &ctx->cy); ++ ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &ctx->cx); ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_mem, &ctx->cy); ++ } ++ ++ src_size.s[0] = ctx->src_w; ++ src_size.s[1] = ctx->src_h; ++ uv_size.s[0] = src_size.s[0] / x_subsample; ++ uv_size.s[1] = src_size.s[1] / y_subsample; ++ CL_SET_KERNEL_ARG(ctx->kernel, idx_arg2++, cl_int2, &src_size); ++ CL_SET_KERNEL_ARG(ctx->kernel_uv, idx_arg1++, cl_int2, &uv_size); ++ ++ // scale, neighbor ++ global_work[0] = output->width; ++ global_work[1] = output->height; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Run kernel %s " ++ "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", ++ ctx->kernel_name, global_work[0], global_work[1]); ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); ++ ++ // scale_uv, neighbor_uv ++ global_work[0] = output->width / x_subsample; ++ global_work[1] = output->height / y_subsample; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Run kernel %s " ++ "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", ++ ctx->kernel_name_uv, global_work[0], global_work[1]); ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_uv, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); ++ } ++ ++ cle = clFinish(ctx->command_queue); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle); ++ ++ av_frame_free(&input); ++ ++ av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(output->format), ++ output->width, output->height, output->pts); ++ ++ return ff_filter_frame(outlink, output); ++ ++fail: ++ clFinish(ctx->command_queue); ++ av_frame_free(&input); ++ av_frame_free(&output); ++ return err; ++} ++ ++static av_cold void scale_opencl_uninit(AVFilterContext *avctx) ++{ ++ ScaleOpenCLContext *ctx = avctx->priv; ++ cl_int cle; ++ ++ if (ctx->kernel) { ++ cle = clReleaseKernel(ctx->kernel); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel: %d.\n", cle); ++ } ++ ++ if (ctx->kernel_uv) { ++ cle = clReleaseKernel(ctx->kernel_uv); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel_uv: %d.\n", cle); ++ } ++ ++ if (ctx->command_queue) { ++ cle = clReleaseCommandQueue(ctx->command_queue); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "command queue: %d.\n", cle); ++ } ++ ++ if (ctx->cx) { ++ cle = clReleaseMemObject(ctx->cx); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "weights buffer: %d.\n", cle); ++ } ++ ++ if (ctx->cy) { ++ cle = clReleaseMemObject(ctx->cy); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "weights buffer: %d.\n", cle); ++ } ++ ++ ff_opencl_filter_uninit(avctx); ++} ++ ++#define OFFSET(x) offsetof(ScaleOpenCLContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++static const AVOption scale_opencl_options[] = { ++ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, ++ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, ++ { "format", "Output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" }, ++ { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "algo", "Scaling algorithm", OFFSET(algorithm), AV_OPT_TYPE_INT, { .i64 = F_BILINEAR }, INT_MIN, INT_MAX, FLAGS, "algo" }, ++ { "area", "Area averaging", 0, AV_OPT_TYPE_CONST, { .i64 = F_AREA }, 0, 0, FLAGS, "algo" }, ++ { "bicubic", "Bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = F_BICUBIC }, 0, 0, FLAGS, "algo" }, ++ { "bilinear", "Bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = F_BILINEAR }, 0, 0, FLAGS, "algo" }, ++ { "gauss", "Gaussian", 0, AV_OPT_TYPE_CONST, { .i64 = F_GAUSSIAN }, 0, 0, FLAGS, "algo" }, ++ { "lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = F_LANCZOS }, 0, 0, FLAGS, "algo" }, ++ { "neighbor", "Nearest Neighbor", 0, AV_OPT_TYPE_CONST, { .i64 = F_NEIGHBOR }, 0, 0, FLAGS, "algo" }, ++ { "sinc", "Sinc", 0, AV_OPT_TYPE_CONST, { .i64 = F_SINC }, 0, 0, FLAGS, "algo" }, ++ { "spline", "Bicubic Spline", 0, AV_OPT_TYPE_CONST, { .i64 = F_SPLINE }, 0, 0, FLAGS, "algo" }, ++ { "experimental", "Experimental", 0, AV_OPT_TYPE_CONST, { .i64 = F_EXPERIMENTAL }, 0, 0, FLAGS, "algo" }, ++ { "force_original_aspect_ratio", "Decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" }, ++ { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" }, ++ { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" }, ++ { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" }, ++ { "force_divisible_by", "Enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS }, ++ { NULL } ++}; ++ ++AVFILTER_DEFINE_CLASS(scale_opencl); ++ ++static const AVFilterPad scale_opencl_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = &scale_opencl_filter_frame, ++ .get_video_buffer = &scale_opencl_get_video_buffer, ++ .config_props = &ff_opencl_filter_config_input, ++ }, ++ { NULL } ++}; ++ ++static const AVFilterPad scale_opencl_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = &scale_opencl_config_output, ++ }, ++ { NULL } ++}; ++ ++AVFilter ff_vf_scale_opencl = { ++ .name = "scale_opencl", ++ .description = NULL_IF_CONFIG_SMALL("Scale the input video size through OpenCL."), ++ .priv_size = sizeof(ScaleOpenCLContext), ++ .priv_class = &scale_opencl_class, ++ .init = &ff_opencl_filter_init, ++ .uninit = &scale_opencl_uninit, ++ .query_formats = &ff_opencl_filter_query_formats, ++ .inputs = scale_opencl_inputs, ++ .outputs = scale_opencl_outputs, ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/debian/patches/series b/debian/patches/series index 2f13511a402..a59a133267e 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -2,3 +2,4 @@ 0002-add-cuda-pixfmt-converter-impl.patch 0003-add-cuda-tonemap-impl.patch 0004-add-amf-refactor-and-hevc-10-bit-encoding.patch +0005-add-opencl-scaler-and-pixfmt-converter-impl.patch From 303e0a390ef2d6cc43cb3e09851ab57c358af834 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:38:25 +0800 Subject: [PATCH 15/41] add bt2390 eetf and code refactor to opencl tonemap --- ...-and-code-refactor-to-opencl-tonemap.patch | 1302 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 1303 insertions(+) create mode 100644 debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch diff --git a/debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch b/debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch new file mode 100644 index 00000000000..19456a2b17c --- /dev/null +++ b/debian/patches/0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch @@ -0,0 +1,1302 @@ +Index: jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/opencl/colorspace_common.cl ++++ jellyfin-ffmpeg/libavfilter/opencl/colorspace_common.cl +@@ -17,7 +17,24 @@ + */ + + #define ST2084_MAX_LUMINANCE 10000.0f +-#define REFERENCE_WHITE 100.0f ++ ++#if (defined(TONE_FUNC) && TONE_FUNC == bt2390) ++ #define REF_WHITE 203.0f ++#else ++ #define REF_WHITE 100.0f ++#endif ++ ++#define ST2084_M1 0.1593017578125f ++#define ST2084_M2 78.84375f ++#define ST2084_C1 0.8359375f ++#define ST2084_C2 18.8515625f ++#define ST2084_C3 18.6875f ++ ++#define ARIB_B67_A 0.17883277f ++#define ARIB_B67_B 0.28466892f ++#define ARIB_B67_C 0.55991073f ++ ++#define FLOAT_EPS 1.175494351e-38f + + #if chroma_loc == 1 + #define chroma_sample(a,b,c,d) (((a) + (c)) * 0.5f) +@@ -33,12 +50,6 @@ + #define chroma_sample(a,b,c,d) (((a) + (b) + (c) + (d)) * 0.25f) + #endif + +-constant const float ST2084_M1 = 0.1593017578125f; +-constant const float ST2084_M2 = 78.84375f; +-constant const float ST2084_C1 = 0.8359375f; +-constant const float ST2084_C2 = 18.8515625f; +-constant const float ST2084_C3 = 18.6875f; +- + float get_luma_dst(float3 c) { + return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; + } +@@ -51,61 +62,99 @@ float3 get_chroma_sample(float3 a, float + return chroma_sample(a, b, c, d); + } + ++// linearizer for PQ/ST2084 + float eotf_st2084(float x) { +- float p = powr(x, 1.0f / ST2084_M2); +- float a = max(p -ST2084_C1, 0.0f); +- float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f); +- float c = powr(a / b, 1.0f / ST2084_M1); +- return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f; +-} +- +-__constant const float HLG_A = 0.17883277f; +-__constant const float HLG_B = 0.28466892f; +-__constant const float HLG_C = 0.55991073f; +- +-// linearizer for HLG +-float inverse_oetf_hlg(float x) { +- float a = 4.0f * x * x; +- float b = exp((x - HLG_C) / HLG_A) + HLG_B; +- return x < 0.5f ? a : b; +-} +- +-// delinearizer for HLG +-float oetf_hlg(float x) { +- float a = 0.5f * sqrt(x); +- float b = HLG_A * log(x - HLG_B) + HLG_C; +- return x <= 1.0f ? a : b; +-} +- +-float3 ootf_hlg(float3 c, float peak) { +- float luma = get_luma_src(c); +- float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); +- gamma = max(1.0f, gamma); +- float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma); +- return c * factor; +-} +- +-float3 inverse_ootf_hlg(float3 c, float peak) { +- float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f); +- c *= powr(12.0f, gamma) / peak; +- c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma); +- return c; ++ x = max(x, 0.0f); ++ float xpow = native_powr(x, 1.0f / ST2084_M2); ++ float num = max(xpow - ST2084_C1, 0.0f); ++ float den = max(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); ++ x = native_powr(num / den, 1.0f / ST2084_M1); ++ return x * ST2084_MAX_LUMINANCE / REF_WHITE; ++} ++ ++// delinearizer for PQ/ST2084 ++float inverse_eotf_st2084(float x) { ++ x = max(x, 0.0f); ++ x *= REF_WHITE / ST2084_MAX_LUMINANCE; ++ float xpow = native_powr(x, ST2084_M1); ++#if 0 ++ // Original formulation from SMPTE ST 2084:2014 publication. ++ float num = ST2084_C1 + ST2084_C2 * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return native_powr(num / den, ST2084_M2); ++#else ++ // More stable arrangement that avoids some cancellation error. ++ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return native_powr(1.0f + num / den, ST2084_M2); ++#endif + } + +-float inverse_eotf_bt1886(float c) { +- return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f); ++float ootf_1_2(float x) { ++ return x > 0.0f ? native_powr(x, 1.2f) : x; + } + +-float oetf_bt709(float c) { +- c = c < 0.0f ? 0.0f : c; +- float r1 = 4.5f * c; +- float r2 = 1.099f * powr(c, 0.45f) - 0.099f; +- return c < 0.018f ? r1 : r2; +-} +-float inverse_oetf_bt709(float c) { +- float r1 = c / 4.5f; +- float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f); +- return c < 0.081f ? r1 : r2; ++float inverse_ootf_1_2(float x) { ++ return x > 0.0f ? native_powr(x, 1.0f / 1.2f) : x; ++} ++ ++float oetf_arib_b67(float x) { ++ x = max(x, 0.0f); ++ return x <= (1.0f / 12.0f) ++ ? native_sqrt(3.0f * x) ++ : (ARIB_B67_A * native_log(12.0f * x - ARIB_B67_B) + ARIB_B67_C); ++} ++ ++float inverse_oetf_arib_b67(float x) { ++ x = max(x, 0.0f); ++ return x <= 0.5f ++ ? (x * x) * (1.0f / 3.0f) ++ : (native_exp((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); ++} ++ ++// linearizer for HLG/ARIB-B67 ++float eotf_arib_b67(float x) { ++ return ootf_1_2(inverse_oetf_arib_b67(x)); ++} ++ ++// delinearizer for HLG/ARIB-B67 ++float inverse_eotf_arib_b67(float x) { ++ return oetf_arib_b67(inverse_ootf_1_2(x)); ++} ++ ++// delinearizer for BT709, BT2020-10 ++float inverse_eotf_bt1886(float x) { ++ return x > 0.0f ? native_powr(x, 1.0f / 2.4f) : 0.0f; ++} ++ ++#ifdef TRC_LUT ++float linearize_lut(float x) { ++ return lin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)]; ++} ++ ++float delinearize_lut(float x) { ++ return delin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)]; ++} ++#endif ++ ++float linearize_pq(float x) { ++#ifdef TRC_LUT_PQ ++ return pqlin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)]; ++#elif defined(TRC_LUT) ++ return linearize_lut(x); ++#else ++ return eotf_st2084(x); ++#endif ++} ++ ++float delinearize_pq(float x) { ++#ifdef TRC_LUT_PQ ++ return pqdelin_lut[clamp(convert_int(x * 1023.0f), 0, 1023)]; ++#elif defined(TRC_LUT) ++ return delinearize_lut(x); ++#else ++ return inverse_eotf_st2084(x); ++#endif + } + + float3 yuv2rgb(float y, float u, float v) { +@@ -187,19 +236,3 @@ float3 lrgb2lrgb(float3 c) { + return (float3)(rr, gg, bb); + #endif + } +- +-float3 ootf(float3 c, float peak) { +-#ifdef ootf_impl +- return ootf_impl(c, peak); +-#else +- return c; +-#endif +-} +- +-float3 inverse_ootf(float3 c, float peak) { +-#ifdef inverse_ootf_impl +- return inverse_ootf_impl(c, peak); +-#else +- return c; +-#endif +-} +Index: jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/opencl/tonemap.cl ++++ jellyfin-ffmpeg/libavfilter/opencl/tonemap.cl +@@ -16,54 +16,51 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-#define REFERENCE_WHITE 100.0f ++#define FLOAT_EPS 1.175494351e-38f ++ + extern float3 lrgb2yuv(float3); + extern float lrgb2y(float3); + extern float3 yuv2lrgb(float3); + extern float3 lrgb2lrgb(float3); ++extern float linearize_pq(float); ++extern float delinearize_pq(float); ++extern float inverse_eotf_st2084(float); + extern float get_luma_src(float3); + extern float get_luma_dst(float3); +-extern float3 ootf(float3 c, float peak); +-extern float3 inverse_ootf(float3 c, float peak); + extern float3 get_chroma_sample(float3, float3, float3, float3); + +-struct detection_result { +- float peak; +- float average; +-}; +- + float hable_f(float in) { + float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; + return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; + } + +-float direct(float s, float peak) { ++float direct(float s, float peak, float target_peak) { + return s; + } + +-float linear(float s, float peak) { ++float linear(float s, float peak, float target_peak) { + return s * tone_param / peak; + } + +-float gamma(float s, float peak) { +- float p = s > 0.05f ? s /peak : 0.05f / peak; +- float v = powr(p, 1.0f / tone_param); +- return s > 0.05f ? v : (s * v /0.05f); ++float gamma(float s, float peak, float target_peak) { ++ float p = s > 0.05f ? s / peak : 0.05f / peak; ++ float v = native_powr(p, 1.0f / tone_param); ++ return s > 0.05f ? v : (s * v / 0.05f); + } + +-float clip(float s, float peak) { ++float clip(float s, float peak, float target_peak) { + return clamp(s * tone_param, 0.0f, 1.0f); + } + +-float reinhard(float s, float peak) { ++float reinhard(float s, float peak, float target_peak) { + return s / (s + tone_param) * (peak + tone_param) / peak; + } + +-float hable(float s, float peak) { +- return hable_f(s)/hable_f(peak); ++float hable(float s, float peak, float target_peak) { ++ return hable_f(s) / hable_f(peak); + } + +-float mobius(float s, float peak) { ++float mobius(float s, float peak, float target_peak) { + float j = tone_param; + float a, b; + +@@ -71,102 +68,32 @@ float mobius(float s, float peak) { + return s; + + a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); +- b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f); ++ b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, FLOAT_EPS); + + return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); + } + +-// detect peak/average signal of a frame, the algorithm was ported from: +-// libplacebo (https://github.com/haasn/libplacebo) +-struct detection_result +-detect_peak_avg(global uint *util_buf, __local uint *sum_wg, +- float signal, float peak) { +-// layout of the util buffer +-// +-// Name: : Size (units of 4-bytes) +-// average buffer : detection_frames + 1 +-// peak buffer : detection_frames + 1 +-// workgroup counter : 1 +-// total of peak : 1 +-// total of average : 1 +-// frame index : 1 +-// frame number : 1 +- global uint *avg_buf = util_buf; +- global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1; +- global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1; +- global uint *max_total_p = counter_wg_p + 1; +- global uint *avg_total_p = max_total_p + 1; +- global uint *frame_idx_p = avg_total_p + 1; +- global uint *scene_frame_num_p = frame_idx_p + 1; +- +- uint frame_idx = *frame_idx_p; +- uint scene_frame_num = *scene_frame_num_p; +- +- size_t lidx = get_local_id(0); +- size_t lidy = get_local_id(1); +- size_t lsizex = get_local_size(0); +- size_t lsizey = get_local_size(1); +- uint num_wg = get_num_groups(0) * get_num_groups(1); +- size_t group_idx = get_group_id(0); +- size_t group_idy = get_group_id(1); +- struct detection_result r = {peak, sdr_avg}; +- if (lidx == 0 && lidy == 0) +- *sum_wg = 0; +- barrier(CLK_LOCAL_MEM_FENCE); +- +- // update workgroup sum +- atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE)); +- barrier(CLK_LOCAL_MEM_FENCE); +- +- // update frame peak/avg using work-group-average. +- if (lidx == 0 && lidy == 0) { +- uint avg_wg = *sum_wg / (lsizex * lsizey); +- atomic_max(&peak_buf[frame_idx], avg_wg); +- atomic_add(&avg_buf[frame_idx], avg_wg); +- } +- +- if (scene_frame_num > 0) { +- float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num); +- float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num); +- r.peak = max(1.0f, peak); +- r.average = max(0.25f, avg); +- } ++float bt2390(float s, float peak, float target_peak) { ++ float peak_pq = inverse_eotf_st2084(peak); ++ float scale = peak_pq > 0.0f ? (1.0f / peak_pq) : 1.0f; ++ ++ float s_pq = inverse_eotf_st2084(s) * scale; ++ float max_lum = inverse_eotf_st2084(target_peak) * scale; ++ ++ float ks = 1.5f * max_lum - 0.5f; ++ float tb = (s_pq - ks) / (1.0f - ks); ++ float tb2 = tb * tb; ++ float tb3 = tb2 * tb; ++ float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + ++ (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + ++ (-2.0f * tb3 + 3.0f * tb2) * max_lum; ++ float sig = mix(pb, s_pq, s_pq < ks); + +- if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) { +- *counter_wg_p = 0; +- avg_buf[frame_idx] /= num_wg; +- +- if (scene_threshold > 0.0f) { +- uint cur_max = peak_buf[frame_idx]; +- uint cur_avg = avg_buf[frame_idx]; +- int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p; +- +- if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) { +- for (uint i = 0; i < DETECTION_FRAMES + 1; i++) +- avg_buf[i] = 0; +- for (uint i = 0; i < DETECTION_FRAMES + 1; i++) +- peak_buf[i] = 0; +- *avg_total_p = *max_total_p = 0; +- *scene_frame_num_p = 0; +- avg_buf[frame_idx] = cur_avg; +- peak_buf[frame_idx] = cur_max; +- } +- } +- uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1); +- // add current frame, subtract next frame +- *max_total_p += peak_buf[frame_idx] - peak_buf[next]; +- *avg_total_p += avg_buf[frame_idx] - avg_buf[next]; +- // reset next frame +- peak_buf[next] = avg_buf[next] = 0; +- *frame_idx_p = next; +- *scene_frame_num_p = min(*scene_frame_num_p + 1, +- (uint)DETECTION_FRAMES); +- } +- return r; ++ return linearize_pq(sig * peak_pq); + } + +-float3 map_one_pixel_rgb(float3 rgb, float peak, float average) { +- float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f); ++float3 map_one_pixel_rgb(float3 rgb, float peak) { ++ float sig = max(max(rgb.x, max(rgb.y, rgb.z)), FLOAT_EPS); + + // Rescale the variables in order to bring it into a representation where + // 1.0 represents the dst_peak. This is because all of the tone mapping +@@ -178,95 +105,91 @@ float3 map_one_pixel_rgb(float3 rgb, flo + + float sig_old = sig; + +- // Scale the signal to compensate for differences in the average brightness +- float slope = min(1.0f, sdr_avg / average); +- sig *= slope; +- peak *= slope; +- + // Desaturate the color using a coefficient dependent on the signal level + if (desat_param > 0.0f) { + float luma = get_luma_dst(rgb); +- float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f); ++ float coeff = max(sig - 0.18f, FLOAT_EPS) / max(sig, FLOAT_EPS); + coeff = native_powr(coeff, 10.0f / desat_param); + rgb = mix(rgb, (float3)luma, (float3)coeff); +- sig = mix(sig, luma * slope, coeff); + } + +- sig = TONE_FUNC(sig, peak); +- ++ sig = TONE_FUNC(sig, peak, target_peak); + sig = min(sig, 1.0f); +- rgb *= (sig/sig_old); ++ rgb *= (sig / sig_old); ++ + return rgb; + } +-// map from source space YUV to destination space RGB +-float3 map_to_dst_space_from_yuv(float3 yuv, float peak) { ++ ++// Map from source space YUV to destination space RGB ++float3 map_to_dst_space_from_yuv(float3 yuv) { + float3 c = yuv2lrgb(yuv); +- c = ootf(c, peak); + c = lrgb2lrgb(c); + return c; + } + ++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_ADDRESS_CLAMP_TO_EDGE | ++ CLK_FILTER_NEAREST); ++ + __kernel void tonemap(__write_only image2d_t dst1, + __read_only image2d_t src1, + __write_only image2d_t dst2, + __read_only image2d_t src2, +- global uint *util_buf, ++#ifdef NON_SEMI_PLANAR_OUT ++ __write_only image2d_t dst3, ++#endif ++#ifdef NON_SEMI_PLANAR_IN ++ __read_only image2d_t src3, ++#endif + float peak + ) + { +- __local uint sum_wg; +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_ADDRESS_CLAMP_TO_EDGE | +- CLK_FILTER_NEAREST); + int xi = get_global_id(0); + int yi = get_global_id(1); + // each work item process four pixels + int x = 2 * xi; + int y = 2 * yi; + +- float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; +- float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; +- float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; +- float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; +- float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; +- +- float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak); +- float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak); +- float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak); +- float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak); +- +- float sig0 = max(c0.x, max(c0.y, c0.z)); +- float sig1 = max(c1.x, max(c1.y, c1.z)); +- float sig2 = max(c2.x, max(c2.y, c2.z)); +- float sig3 = max(c3.x, max(c3.y, c3.z)); +- float sig = max(sig0, max(sig1, max(sig2, sig3))); +- +- struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak); +- +- float3 c0_old = c0, c1_old = c1, c2_old = c2; +- c0 = map_one_pixel_rgb(c0, r.peak, r.average); +- c1 = map_one_pixel_rgb(c1, r.peak, r.average); +- c2 = map_one_pixel_rgb(c2, r.peak, r.average); +- c3 = map_one_pixel_rgb(c3, r.peak, r.average); +- +- c0 = inverse_ootf(c0, target_peak); +- c1 = inverse_ootf(c1, target_peak); +- c2 = inverse_ootf(c2, target_peak); +- c3 = inverse_ootf(c3, target_peak); +- +- y0 = lrgb2y(c0); +- y1 = lrgb2y(c1); +- y2 = lrgb2y(c2); +- y3 = lrgb2y(c3); +- float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); +- float3 chroma = lrgb2yuv(chroma_c); +- + if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) { +- write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f)); +- write_imagef(dst2, (int2)(xi, yi), +- (float4)(chroma.y, chroma.z, 0.0f, 1.0f)); ++ float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; ++ float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; ++ float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; ++ float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; ++#ifdef NON_SEMI_PLANAR_IN ++ float u = read_imagef(src2, sampler, (int2)(xi, yi)).x; ++ float v = read_imagef(src3, sampler, (int2)(xi, yi)).x; ++ float2 uv = (float2)(u, v); ++#else ++ float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; ++#endif ++ ++ float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y)); ++ float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y)); ++ float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y)); ++ float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y)); ++ ++ c0 = map_one_pixel_rgb(c0, peak); ++ c1 = map_one_pixel_rgb(c1, peak); ++ c2 = map_one_pixel_rgb(c2, peak); ++ c3 = map_one_pixel_rgb(c3, peak); ++ ++ y0 = lrgb2y(c0); ++ y1 = lrgb2y(c1); ++ y2 = lrgb2y(c2); ++ y3 = lrgb2y(c3); ++ ++ float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); ++ float3 chroma = lrgb2yuv(chroma_c); ++ ++ write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x, y + 1), (float4)(y2, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst1, (int2)(x + 1, y + 1), (float4)(y3, 0.0f, 0.0f, 1.0f)); ++#ifdef NON_SEMI_PLANAR_OUT ++ write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, 0.0f, 0.0f, 1.0f)); ++ write_imagef(dst3, (int2)(xi, yi), (float4)(chroma.z, 0.0f, 0.0f, 1.0f)); ++#else ++ write_imagef(dst2, (int2)(xi, yi), (float4)(chroma.y, chroma.z, 0.0f, 1.0f)); ++#endif + } + } +Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_opencl.c ++++ jellyfin-ffmpeg/libavfilter/vf_tonemap_opencl.c +@@ -15,6 +15,7 @@ + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ ++ + #include + + #include "libavutil/avassert.h" +@@ -31,12 +32,36 @@ + #include "video.h" + #include "colorspace.h" + +-// TODO: +-// - separate peak-detection from tone-mapping kernel to solve +-// one-frame-delay issue. +-// - more format support ++#define OPENCL_SOURCE_NB 3 ++ ++#define FLOAT_EPS 1.175494351e-38f + +-#define DETECTION_FRAMES 63 ++#define ST2084_MAX_LUMINANCE 10000.0f ++#define REF_WHITE_BT2390 203.0f ++#define REF_WHITE_DEFAULT 100.0f ++ ++#define ST2084_M1 0.1593017578125f ++#define ST2084_M2 78.84375f ++#define ST2084_C1 0.8359375f ++#define ST2084_C2 18.8515625f ++#define ST2084_C3 18.6875f ++ ++#define ARIB_B67_A 0.17883277f ++#define ARIB_B67_B 0.28466892f ++#define ARIB_B67_C 0.55991073f ++ ++#define MAX(a, b) ((a) > (b) ? (a) : (b)) ++#define MIN(a, b) ((a) < (b) ? (a) : (b)) ++#define CLAMP(a, b, c) MIN(MAX((a), (b)), (c)) ++#define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a) ++ ++static const enum AVPixelFormat supported_formats[] = { ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUV420P16, ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_P010, ++ AV_PIX_FMT_P016, ++}; + + enum TonemapAlgorithm { + TONEMAP_NONE, +@@ -46,6 +71,7 @@ enum TonemapAlgorithm { + TONEMAP_REINHARD, + TONEMAP_HABLE, + TONEMAP_MOBIUS, ++ TONEMAP_BT2390, + TONEMAP_MAX, + }; + +@@ -57,23 +83,30 @@ typedef struct TonemapOpenCLContext { + enum AVColorPrimaries primaries, primaries_in, primaries_out; + enum AVColorRange range, range_in, range_out; + enum AVChromaLocation chroma_loc; ++ enum AVPixelFormat in_fmt, out_fmt; ++ const AVPixFmtDescriptor *in_desc, *out_desc; ++ int in_planes, out_planes; ++ ++ float *lin_lut, *delin_lut; ++ float *pqlin_lut, *pqdelin_lut; + + enum TonemapAlgorithm tonemap; + enum AVPixelFormat format; ++ double ref_white; + double peak; + double param; + double desat_param; + double target_peak; + double scene_threshold; ++ int lut_trc; + int initialised; + cl_kernel kernel; + cl_command_queue command_queue; +- cl_mem util_mem; + } TonemapOpenCLContext; + + static const char *const linearize_funcs[AVCOL_TRC_NB] = { +- [AVCOL_TRC_SMPTE2084] = "eotf_st2084", +- [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg", ++ [AVCOL_TRC_SMPTE2084] = "eotf_st2084", ++ [AVCOL_TRC_ARIB_STD_B67] = "eotf_arib_b67", + }; + + static const char *const delinearize_funcs[AVCOL_TRC_NB] = { +@@ -99,8 +132,161 @@ static const char *const tonemap_func[TO + [TONEMAP_REINHARD] = "reinhard", + [TONEMAP_HABLE] = "hable", + [TONEMAP_MOBIUS] = "mobius", ++ [TONEMAP_BT2390] = "bt2390", + }; + ++// linearizer for PQ/ST2084 ++static float eotf_st2084(float x, float ref_white) ++{ ++ x = FFMAX(x, 0.0f); ++ float xpow = powf(x, 1.0f / ST2084_M2); ++ float num = FFMAX(xpow - ST2084_C1, 0.0f); ++ float den = FFMAX(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); ++ x = powf(num / den, 1.0f / ST2084_M1); ++ return x * ST2084_MAX_LUMINANCE / ref_white; ++} ++ ++// delinearizer for PQ/ST2084 ++static float inverse_eotf_st2084(float x, float ref_white) ++{ ++ x = FFMAX(x, 0.0f); ++ x *= ref_white / ST2084_MAX_LUMINANCE; ++ float xpow = powf(x, ST2084_M1); ++#if 0 ++ // Original formulation from SMPTE ST 2084:2014 publication. ++ float num = ST2084_C1 + ST2084_C2 * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return powf(num / den, ST2084_M2); ++#else ++ // More stable arrangement that avoids some cancellation error. ++ float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; ++ float den = 1.0f + ST2084_C3 * xpow; ++ return powf(1.0f + num / den, ST2084_M2); ++#endif ++} ++ ++static float ootf_1_2(float x) { ++ return x > 0.0f ? powf(x, 1.2f) : x; ++} ++ ++static float inverse_ootf_1_2(float x) { ++ return x > 0.0f ? powf(x, 1.0f / 1.2f) : x; ++} ++ ++static float oetf_arib_b67(float x) { ++ x = FFMAX(x, 0.0f); ++ return x <= (1.0f / 12.0f) ++ ? sqrtf(3.0f * x) ++ : (ARIB_B67_A * logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C); ++} ++ ++static float inverse_oetf_arib_b67(float x) { ++ x = FFMAX(x, 0.0f); ++ return x <= 0.5f ++ ? (x * x) * (1.0f / 3.0f) ++ : (expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); ++} ++ ++// linearizer for HLG/ARIB-B67 ++static float eotf_arib_b67(float x) { ++ return ootf_1_2(inverse_oetf_arib_b67(x)); ++} ++ ++// delinearizer for HLG/ARIB-B67 ++static float inverse_eotf_arib_b67(float x) { ++ return oetf_arib_b67(inverse_ootf_1_2(x)); ++} ++ ++// delinearizer for BT709, BT2020-10 ++static float inverse_eotf_bt1886(float x) { ++ return x > 0.0f ? powf(x, 1.0f / 2.4f) : 0.0f; ++} ++ ++static float linearize(float x, float ref_white, enum AVColorTransferCharacteristic trc_in) ++{ ++ if (trc_in == AVCOL_TRC_SMPTE2084) ++ return eotf_st2084(x, ref_white); ++ else if (trc_in == AVCOL_TRC_ARIB_STD_B67) ++ return eotf_arib_b67(x); ++ else ++ return x; ++} ++ ++static float delinearize(float x, float ref_white, enum AVColorTransferCharacteristic trc_out) ++{ ++ if (trc_out == AVCOL_TRC_BT709 || trc_out == AVCOL_TRC_BT2020_10) ++ return inverse_eotf_bt1886(x); ++ if (trc_out == AVCOL_TRC_SMPTE2084) ++ return inverse_eotf_st2084(x, ref_white); ++ else ++ return x; ++} ++ ++static int compute_trc_luts(AVFilterContext *avctx) ++{ ++ TonemapOpenCLContext *ctx = avctx->priv; ++ int lut_pq = ctx->tonemap == TONEMAP_BT2390 && ctx->trc_in != AVCOL_TRC_SMPTE2084; ++ int i; ++ ++ if (!ctx->lin_lut && !(ctx->lin_lut = av_calloc(1024, sizeof(float)))) ++ return AVERROR(ENOMEM); ++ if (!ctx->delin_lut && !(ctx->delin_lut = av_calloc(1024, sizeof(float)))) ++ return AVERROR(ENOMEM); ++ if (lut_pq) { ++ if (!ctx->pqlin_lut && !(ctx->pqlin_lut = av_calloc(1024, sizeof(float)))) ++ return AVERROR(ENOMEM); ++ if (!ctx->pqdelin_lut && !(ctx->pqdelin_lut = av_calloc(1024, sizeof(float)))) ++ return AVERROR(ENOMEM); ++ } ++ ++ for (i = 0; i < 1024; i++) { ++ float x = i / 1023.0f; ++ ctx->lin_lut[i] = FFMAX(linearize(x, ctx->ref_white, ctx->trc_in), 0.0f); ++ ctx->delin_lut[i] = FFMAX(delinearize(x, ctx->ref_white, ctx->trc_out), 0.0f); ++ if (lut_pq) { ++ ctx->pqlin_lut[i] = FFMAX(linearize(x, ctx->ref_white, AVCOL_TRC_SMPTE2084), 0.0f); ++ ctx->pqdelin_lut[i] = FFMAX(delinearize(x, ctx->ref_white, AVCOL_TRC_SMPTE2084), 0.0f); ++ } ++ } ++ ++ return 0; ++} ++ ++static void print_opencl_const_trc_luts(AVFilterContext *avctx, AVBPrint *buf) ++{ ++ TonemapOpenCLContext *ctx = avctx->priv; ++ int i; ++ ++ if (ctx->lin_lut) { ++ av_bprintf(buf, "__constant float lin_lut[1024] = {\n"); ++ for (i = 0; i < 1024; i++) { ++ av_bprintf(buf, " %.5ff,", ctx->lin_lut[i]); ++ } ++ av_bprintf(buf, "};\n"); ++ } ++ if (ctx->delin_lut) { ++ av_bprintf(buf, "__constant float delin_lut[1024] = {\n"); ++ for (i = 0; i < 1024; i++) { ++ av_bprintf(buf, " %.5ff,", ctx->delin_lut[i]); ++ } ++ av_bprintf(buf, "};\n"); ++ } ++ if (ctx->pqlin_lut) { ++ av_bprintf(buf, "__constant float pqlin_lut[1024] = {\n"); ++ for (i = 0; i < 1024; i++) { ++ av_bprintf(buf, " %.5ff,", ctx->pqlin_lut[i]); ++ } ++ av_bprintf(buf, "};\n"); ++ } ++ if (ctx->pqdelin_lut) { ++ av_bprintf(buf, "__constant float pqdelin_lut[1024] = {\n"); ++ for (i = 0; i < 1024; i++) { ++ av_bprintf(buf, " %.5ff,", ctx->pqdelin_lut[i]); ++ } ++ av_bprintf(buf, "};\n"); ++ } ++} ++ + static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out, + double rgb2rgb[3][3]) { + double rgb2xyz[3][3], xyz2rgb[3][3]; +@@ -111,23 +297,17 @@ static void get_rgb2rgb_matrix(enum AVCo + ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb); + } + +-#define OPENCL_SOURCE_NB 3 +-// Average light level for SDR signals. This is equal to a signal level of 0.5 +-// under a typical presentation gamma of about 2.0. +-static const float sdr_avg = 0.25f; +- + static int tonemap_opencl_init(AVFilterContext *avctx) + { + TonemapOpenCLContext *ctx = avctx->priv; ++ AVBPrint header; ++ const char *opencl_sources[OPENCL_SOURCE_NB]; + int rgb2rgb_passthrough = 1; + double rgb2rgb[3][3], rgb2yuv[3][3], yuv2rgb[3][3]; + const struct LumaCoefficients *luma_src, *luma_dst; + cl_int cle; ++ int lut_pq = ctx->tonemap == TONEMAP_BT2390 && ctx->trc_in != AVCOL_TRC_SMPTE2084; + int err; +- AVBPrint header; +- const char *opencl_sources[OPENCL_SOURCE_NB]; +- +- av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC); + + switch(ctx->tonemap) { + case TONEMAP_GAMMA: +@@ -147,22 +327,25 @@ static int tonemap_opencl_init(AVFilterC + if (isnan(ctx->param)) + ctx->param = 1.0f; + ++ ctx->ref_white = ctx->tonemap == TONEMAP_BT2390 ? REF_WHITE_BT2390 ++ : REF_WHITE_DEFAULT; ++ + // SDR peak is 1.0f + ctx->target_peak = 1.0f; +- av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n", ++ ++ av_log(ctx, AV_LOG_DEBUG, "Tonemapping transfer from %s to %s\n", + av_color_transfer_name(ctx->trc_in), + av_color_transfer_name(ctx->trc_out)); +- av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n", ++ av_log(ctx, AV_LOG_DEBUG, "Mapping colorspace from %s to %s\n", + av_color_space_name(ctx->colorspace_in), + av_color_space_name(ctx->colorspace_out)); +- av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n", ++ av_log(ctx, AV_LOG_DEBUG, "Mapping primaries from %s to %s\n", + av_color_primaries_name(ctx->primaries_in), + av_color_primaries_name(ctx->primaries_out)); +- av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n", ++ av_log(ctx, AV_LOG_DEBUG, "Mapping range from %s to %s\n", + av_color_range_name(ctx->range_in), + av_color_range_name(ctx->range_out)); +- // checking valid value just because of limited implementaion +- // please remove when more functionalities are implemented ++ + av_assert0(ctx->trc_out == AVCOL_TRC_BT709 || + ctx->trc_out == AVCOL_TRC_BT2020_10); + av_assert0(ctx->trc_in == AVCOL_TRC_SMPTE2084|| +@@ -172,22 +355,30 @@ static int tonemap_opencl_init(AVFilterC + av_assert0(ctx->primaries_in == AVCOL_PRI_BT2020 || + ctx->primaries_in == AVCOL_PRI_BT709); + +- av_bprintf(&header, "__constant const float tone_param = %.4ff;\n", ++ av_bprint_init(&header, 2048, AV_BPRINT_SIZE_UNLIMITED); ++ ++ av_bprintf(&header, "__constant float tone_param = %.4ff;\n", + ctx->param); +- av_bprintf(&header, "__constant const float desat_param = %.4ff;\n", ++ av_bprintf(&header, "__constant float desat_param = %.4ff;\n", + ctx->desat_param); +- av_bprintf(&header, "__constant const float target_peak = %.4ff;\n", ++ av_bprintf(&header, "__constant float target_peak = %.4ff;\n", + ctx->target_peak); +- av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg); +- av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n", ++ av_bprintf(&header, "__constant float scene_threshold = %.4ff;\n", + ctx->scene_threshold); ++ + av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]); +- av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES); ++ ++ if (ctx->in_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_IN\n"); ++ ++ if (ctx->out_planes > 2) ++ av_bprintf(&header, "#define NON_SEMI_PLANAR_OUT\n"); + + if (ctx->primaries_out != ctx->primaries_in) { + get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb); + rgb2rgb_passthrough = 0; + } ++ + if (ctx->range_in == AVCOL_RANGE_JPEG) + av_bprintf(&header, "#define FULL_RANGE_IN\n"); + +@@ -201,11 +392,10 @@ static int tonemap_opencl_init(AVFilterC + else + ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); + +- + luma_src = ff_get_luma_coefficients(ctx->colorspace_in); + if (!luma_src) { + err = AVERROR(EINVAL); +- av_log(avctx, AV_LOG_ERROR, "unsupported input colorspace %d (%s)\n", ++ av_log(avctx, AV_LOG_ERROR, "Unsupported input colorspace %d (%s)\n", + ctx->colorspace_in, av_color_space_name(ctx->colorspace_in)); + goto fail; + } +@@ -213,7 +403,7 @@ static int tonemap_opencl_init(AVFilterC + luma_dst = ff_get_luma_coefficients(ctx->colorspace_out); + if (!luma_dst) { + err = AVERROR(EINVAL); +- av_log(avctx, AV_LOG_ERROR, "unsupported output colorspace %d (%s)\n", ++ av_log(avctx, AV_LOG_ERROR, "Unsupported output colorspace %d (%s)\n", + ctx->colorspace_out, av_color_space_name(ctx->colorspace_out)); + goto fail; + } +@@ -225,20 +415,27 @@ static int tonemap_opencl_init(AVFilterC + ff_matrix_invert_3x3(rgb2yuv, yuv2rgb); + ff_opencl_print_const_matrix_3x3(&header, "rgb_matrix", yuv2rgb); + +- av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", ++ av_bprintf(&header, "__constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n", + luma_src->cr, luma_src->cg, luma_src->cb); +- av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", ++ av_bprintf(&header, "__constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n", + luma_dst->cr, luma_dst->cg, luma_dst->cb); + +- av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); +- av_bprintf(&header, "#define delinearize %s\n", +- delinearize_funcs[ctx->trc_out]); +- +- if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67) +- av_bprintf(&header, "#define ootf_impl ootf_hlg\n"); +- +- if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67) +- av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n"); ++ if (ctx->lut_trc) { ++ if (!ctx->lin_lut || !ctx->delin_lut) { ++ err = compute_trc_luts(avctx); ++ if (err < 0) ++ goto fail; ++ } ++ print_opencl_const_trc_luts(avctx, &header); ++ if (lut_pq) ++ av_bprintf(&header, "#define TRC_LUT_PQ\n"); ++ av_bprintf(&header, "#define TRC_LUT\n"); ++ av_bprintf(&header, "#define linearize %s\n", "linearize_lut"); ++ av_bprintf(&header, "#define delinearize %s\n", "delinearize_lut"); ++ } else { ++ av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]); ++ av_bprintf(&header, "#define delinearize %s\n", delinearize_funcs[ctx->trc_out]); ++ } + + av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); + opencl_sources[0] = header.str; +@@ -259,43 +456,78 @@ static int tonemap_opencl_init(AVFilterC + ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle); + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle); + +- ctx->util_mem = +- clCreateBuffer(ctx->ocf.hwctx->context, 0, +- (2 * DETECTION_FRAMES + 7) * sizeof(unsigned), +- NULL, &cle); +- CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create util buffer: %d.\n", cle); +- + ctx->initialised = 1; + return 0; + + fail: + av_bprint_finalize(&header, NULL); +- if (ctx->util_mem) +- clReleaseMemObject(ctx->util_mem); + if (ctx->command_queue) + clReleaseCommandQueue(ctx->command_queue); + if (ctx->kernel) + clReleaseKernel(ctx->kernel); ++ if (ctx->lin_lut) ++ av_freep(&ctx->lin_lut); ++ if (ctx->delin_lut) ++ av_freep(&ctx->delin_lut); ++ if (ctx->pqlin_lut) ++ av_freep(&ctx->pqlin_lut); ++ if (ctx->pqdelin_lut) ++ av_freep(&ctx->pqdelin_lut); + return err; + } + ++static int format_is_supported(enum AVPixelFormat fmt) ++{ ++ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) ++ if (supported_formats[i] == fmt) ++ return 1; ++ return 0; ++} ++ + static int tonemap_opencl_config_output(AVFilterLink *outlink) + { +- AVFilterContext *avctx = outlink->src; +- TonemapOpenCLContext *s = avctx->priv; ++ AVFilterContext *avctx = outlink->src; ++ AVFilterLink *inlink = avctx->inputs[0]; ++ TonemapOpenCLContext *ctx = avctx->priv; ++ AVHWFramesContext *in_frames_ctx; ++ enum AVPixelFormat in_format; ++ enum AVPixelFormat out_format; ++ const AVPixFmtDescriptor *in_desc; ++ const AVPixFmtDescriptor *out_desc; + int ret; +- if (s->format == AV_PIX_FMT_NONE) +- av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n"); +- else { +- if (s->format != AV_PIX_FMT_P010 && +- s->format != AV_PIX_FMT_NV12) { +- av_log(avctx, AV_LOG_ERROR, "unsupported output format," +- "only p010/nv12 supported now\n"); ++ ++ if (!inlink->hw_frames_ctx) + return AVERROR(EINVAL); +- } +- } ++ in_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ in_format = in_frames_ctx->sw_format; ++ out_format = (ctx->format == AV_PIX_FMT_NONE) ? in_format : ctx->format; ++ in_desc = av_pix_fmt_desc_get(in_format); ++ out_desc = av_pix_fmt_desc_get(out_format); ++ ++ if (!format_is_supported(in_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", ++ av_get_pix_fmt_name(in_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (!format_is_supported(out_format)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", ++ av_get_pix_fmt_name(out_format)); ++ return AVERROR(ENOSYS); ++ } ++ if (in_desc->comp[0].depth != 10 && in_desc->comp[0].depth != 16) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported input format depth: %d\n", ++ in_desc->comp[0].depth); ++ return AVERROR(ENOSYS); ++ } ++ ++ ctx->in_fmt = in_format; ++ ctx->out_fmt = out_format; ++ ctx->in_desc = in_desc; ++ ctx->out_desc = out_desc; ++ ctx->in_planes = av_pix_fmt_count_planes(in_format); ++ ctx->out_planes = av_pix_fmt_count_planes(out_format); ++ ctx->ocf.output_format = out_format; + +- s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format; + ret = ff_opencl_filter_config_output(outlink); + if (ret < 0) + return ret; +@@ -310,13 +542,36 @@ static int launch_kernel(AVFilterContext + size_t global_work[2]; + size_t local_work[2]; + cl_int cle; ++ int idx_arg; ++ ++ if (!output->data[0] || !input->data[0] || !output->data[1] || !input->data[1]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->out_planes > 2 && !output->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ if (ctx->in_planes > 2 && !input->data[2]) { ++ err = AVERROR(EIO); ++ goto fail; ++ } + + CL_SET_KERNEL_ARG(kernel, 0, cl_mem, &output->data[0]); + CL_SET_KERNEL_ARG(kernel, 1, cl_mem, &input->data[0]); + CL_SET_KERNEL_ARG(kernel, 2, cl_mem, &output->data[1]); + CL_SET_KERNEL_ARG(kernel, 3, cl_mem, &input->data[1]); +- CL_SET_KERNEL_ARG(kernel, 4, cl_mem, &ctx->util_mem); +- CL_SET_KERNEL_ARG(kernel, 5, cl_float, &peak); ++ ++ idx_arg = 4; ++ if (ctx->out_planes > 2) ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &output->data[2]); ++ ++ if (ctx->in_planes > 2) ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &input->data[2]); ++ ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_float, &peak); + + local_work[0] = 16; + local_work[1] = 16; +@@ -343,10 +598,6 @@ static int tonemap_opencl_filter_frame(A + AVFrame *output = NULL; + cl_int cle; + int err; +- double peak = ctx->peak; +- +- AVHWFramesContext *input_frames_ctx = +- (AVHWFramesContext*)input->hw_frames_ctx->data; + + av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input->format), +@@ -365,8 +616,10 @@ static int tonemap_opencl_filter_frame(A + if (err < 0) + goto fail; + +- if (!peak) +- peak = ff_determine_signal_peak(input); ++ if (!ctx->peak) { ++ ctx->peak = ff_determine_signal_peak(input); ++ av_log(ctx, AV_LOG_DEBUG, "Computed signal peak: %f\n", ctx->peak); ++ } + + if (ctx->trc != -1) + output->color_trc = ctx->trc; +@@ -390,13 +643,8 @@ static int tonemap_opencl_filter_frame(A + if (!ctx->initialised) { + if (!(input->color_trc == AVCOL_TRC_SMPTE2084 || + input->color_trc == AVCOL_TRC_ARIB_STD_B67)) { +- av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n"); +- err = AVERROR(ENOSYS); +- goto fail; +- } +- +- if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) { +- av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n"); ++ av_log(ctx, AV_LOG_ERROR, "Unsupported transfer function characteristic: %s\n", ++ av_color_transfer_name(input->color_trc)); + err = AVERROR(ENOSYS); + goto fail; + } +@@ -406,15 +654,9 @@ static int tonemap_opencl_filter_frame(A + goto fail; + } + +- switch(input_frames_ctx->sw_format) { +- case AV_PIX_FMT_P010: +- err = launch_kernel(avctx, ctx->kernel, output, input, peak); +- if (err < 0) goto fail; +- break; +- default: +- err = AVERROR(ENOSYS); ++ err = launch_kernel(avctx, ctx->kernel, output, input, ctx->peak); ++ if (err < 0) + goto fail; +- } + + cle = clFinish(ctx->command_queue); + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle); +@@ -423,31 +665,9 @@ static int tonemap_opencl_filter_frame(A + + ff_update_hdr_metadata(output, ctx->target_peak); + +- av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n", ++ av_log(ctx, AV_LOG_DEBUG, "Tonemapping output: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(output->format), + output->width, output->height, output->pts); +-#ifndef NDEBUG +- { +- uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p; +- float peak_detected, avg_detected; +- unsigned map_size = (2 * DETECTION_FRAMES + 7) * sizeof(unsigned); +- ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem, +- CL_TRUE, CL_MAP_READ, 0, map_size, +- 0, NULL, NULL, &cle); +- // For the layout of the util buffer, refer tonemap.cl +- if (ptr) { +- max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1; +- avg_total_p = max_total_p + 1; +- frame_number_p = avg_total_p + 2; +- peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p)); +- avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p)); +- av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n", +- peak_detected, avg_detected); +- clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0, +- NULL, NULL); +- } +- } +-#endif + + return ff_filter_frame(outlink, output); + +@@ -463,8 +683,6 @@ static av_cold void tonemap_opencl_unini + TonemapOpenCLContext *ctx = avctx->priv; + cl_int cle; + +- if (ctx->util_mem) +- clReleaseMemObject(ctx->util_mem); + if (ctx->kernel) { + cle = clReleaseKernel(ctx->kernel); + if (cle != CL_SUCCESS) +@@ -479,43 +697,54 @@ static av_cold void tonemap_opencl_unini + "command queue: %d.\n", cle); + } + ++ if (ctx->lin_lut) ++ av_freep(&ctx->lin_lut); ++ if (ctx->delin_lut) ++ av_freep(&ctx->delin_lut); ++ if (ctx->pqlin_lut) ++ av_freep(&ctx->pqlin_lut); ++ if (ctx->pqdelin_lut) ++ av_freep(&ctx->pqdelin_lut); ++ + ff_opencl_filter_uninit(avctx); + } + + #define OFFSET(x) offsetof(TonemapOpenCLContext, x) + #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + static const AVOption tonemap_opencl_options[] = { +- { "tonemap", "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" }, +- { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, "tonemap" }, +- { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, "tonemap" }, +- { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, "tonemap" }, +- { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, "tonemap" }, +- { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, "tonemap" }, +- { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, "tonemap" }, +- { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, "tonemap" }, +- { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, +- { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" }, +- { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, "transfer" }, +- { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, "transfer" }, +- { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" }, +- { "m", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" }, +- { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, "matrix" }, +- { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, "matrix" }, +- { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" }, +- { "p", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" }, +- { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, "primaries" }, +- { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, "primaries" }, +- { "range", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" }, +- { "r", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" }, +- { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, +- { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, +- { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, +- { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, +- { "format", "output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" }, +- { "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, +- { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, +- { "desat", "desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, +- { "threshold", "scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, ++ { "tonemap", "Tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, { .i64 = TONEMAP_NONE }, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" }, ++ { "none", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_NONE }, 0, 0, FLAGS, "tonemap" }, ++ { "linear", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_LINEAR }, 0, 0, FLAGS, "tonemap" }, ++ { "gamma", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_GAMMA }, 0, 0, FLAGS, "tonemap" }, ++ { "clip", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_CLIP }, 0, 0, FLAGS, "tonemap" }, ++ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_REINHARD }, 0, 0, FLAGS, "tonemap" }, ++ { "hable", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_HABLE }, 0, 0, FLAGS, "tonemap" }, ++ { "mobius", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_MOBIUS }, 0, 0, FLAGS, "tonemap" }, ++ { "bt2390", 0, 0, AV_OPT_TYPE_CONST, { .i64 = TONEMAP_BT2390 }, 0, 0, FLAGS, "tonemap" }, ++ { "transfer", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" }, ++ { "t", "Set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, { .i64 = AVCOL_TRC_BT709 }, -1, INT_MAX, FLAGS, "transfer" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT709 }, 0, 0, FLAGS, "transfer" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_TRC_BT2020_10 }, 0, 0, FLAGS, "transfer" }, ++ { "matrix", "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, "matrix" }, ++ { "m", "Set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_BT709 }, -1, INT_MAX, FLAGS, "matrix" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT709 }, 0, 0, FLAGS, "matrix" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_SPC_BT2020_NCL }, 0, 0, FLAGS, "matrix" }, ++ { "primaries", "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, "primaries" }, ++ { "p", "Set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, { .i64 = AVCOL_PRI_BT709 }, -1, INT_MAX, FLAGS, "primaries" }, ++ { "bt709", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT709 }, 0, 0, FLAGS, "primaries" }, ++ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_PRI_BT2020 }, 0, 0, FLAGS, "primaries" }, ++ { "range", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_MPEG }, -1, INT_MAX, FLAGS, "range" }, ++ { "r", "Set color range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_MPEG }, -1, INT_MAX, FLAGS, "range" }, ++ { "tv", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "pc", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "limited", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "full", 0, 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "format", "Output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" }, ++ { "peak", "Signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, 0, DBL_MAX, FLAGS }, ++ { "param", "Tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, DBL_MIN, DBL_MAX, FLAGS }, ++ { "desat", "Desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, { .dbl = 0.5}, 0, DBL_MAX, FLAGS }, ++ { "threshold", "Scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, { .dbl = 0.2 }, 0, DBL_MAX, FLAGS }, ++ { "luttrc", "Enable LUT for de/linearize", OFFSET(lut_trc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, + { NULL } + }; + diff --git a/debian/patches/series b/debian/patches/series index a59a133267e..d44e268ac89 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -3,3 +3,4 @@ 0003-add-cuda-tonemap-impl.patch 0004-add-amf-refactor-and-hevc-10-bit-encoding.patch 0005-add-opencl-scaler-and-pixfmt-converter-impl.patch +0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch From b13cb92bf2a0e7b7240bdb2d2de2cfba2a0c8c2b Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:38:41 +0800 Subject: [PATCH 16/41] add pgs subtitle support and code refactor to opencl overlay --- ...-support-and-code-refactor-to-opencl.patch | 634 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 635 insertions(+) create mode 100644 debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch diff --git a/debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch b/debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch new file mode 100644 index 00000000000..bb348f93d63 --- /dev/null +++ b/debian/patches/0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch @@ -0,0 +1,634 @@ +Index: jellyfin-ffmpeg/libavfilter/opencl/overlay.cl +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/opencl/overlay.cl ++++ jellyfin-ffmpeg/libavfilter/opencl/overlay.cl +@@ -16,15 +16,24 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +-__kernel void overlay_no_alpha(__write_only image2d_t dst, +- __read_only image2d_t main, +- __read_only image2d_t overlay, +- int x_position, +- int y_position) ++__constant sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | ++ CLK_FILTER_NEAREST); ++ ++__kernel void overlay_pass(__write_only image2d_t dst, ++ __read_only image2d_t main) + { +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_FILTER_NEAREST); ++ int2 loc = (int2)(get_global_id(0), get_global_id(1)); + ++ float4 val = read_imagef(main, sampler, loc); ++ write_imagef(dst, loc, val); ++} ++ ++__kernel void overlay_noalpha(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay, ++ int x_position, ++ int y_position) ++{ + int2 overlay_size = get_image_dim(overlay); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + +@@ -41,15 +50,15 @@ __kernel void overlay_no_alpha(__write_o + } + } + +-__kernel void overlay_internal_alpha(__write_only image2d_t dst, +- __read_only image2d_t main, +- __read_only image2d_t overlay, +- int x_position, +- int y_position) ++__kernel void overlay_alpha(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay, ++ __read_only image2d_t alpha, ++ int x_position, ++ int y_position, ++ int alpha_adj_x, ++ int alpha_adj_y) + { +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_FILTER_NEAREST); +- + int2 overlay_size = get_image_dim(overlay); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + +@@ -63,24 +72,50 @@ __kernel void overlay_internal_alpha(__w + int2 loc_overlay = (int2)(x_position, y_position); + float4 in_main = read_imagef(main, sampler, loc); + float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay); +- float4 val = in_overlay * in_overlay.w + in_main * (1.0f - in_overlay.w); ++ ++ int2 loc_alpha = (int2)(loc.x * alpha_adj_x, loc.y * alpha_adj_y) - loc_overlay; ++ float4 in_alpha = read_imagef(alpha, sampler, loc_alpha); ++ ++ float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x); + write_imagef(dst, loc, val); + } + } + +-__kernel void overlay_external_alpha(__write_only image2d_t dst, +- __read_only image2d_t main, +- __read_only image2d_t overlay, +- __read_only image2d_t alpha, +- int x_position, +- int y_position, +- int alpha_adj_x, +- int alpha_adj_y) ++__kernel void overlay_noalpha_uv(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay_u, ++ __read_only image2d_t overlay_v, ++ int x_position, ++ int y_position) + { +- const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | +- CLK_FILTER_NEAREST); ++ int2 overlay_size = get_image_dim(overlay_u); ++ int2 loc = (int2)(get_global_id(0), get_global_id(1)); + +- int2 overlay_size = get_image_dim(overlay); ++ if (loc.x < x_position || ++ loc.y < y_position || ++ loc.x >= overlay_size.x + x_position || ++ loc.y >= overlay_size.y + y_position) { ++ float4 val = read_imagef(main, sampler, loc); ++ write_imagef(dst, loc, val); ++ } else { ++ int2 loc_overlay = (int2)(x_position, y_position); ++ float4 val_u = read_imagef(overlay_u, sampler, loc - loc_overlay); ++ float4 val_v = read_imagef(overlay_v, sampler, loc - loc_overlay); ++ write_imagef(dst, loc, (float4)(val_u.x, val_v.x, 0.0f, 1.0f)); ++ } ++} ++ ++__kernel void overlay_alpha_uv(__write_only image2d_t dst, ++ __read_only image2d_t main, ++ __read_only image2d_t overlay_u, ++ __read_only image2d_t overlay_v, ++ __read_only image2d_t alpha, ++ int x_position, ++ int y_position, ++ int alpha_adj_x, ++ int alpha_adj_y) ++{ ++ int2 overlay_size = get_image_dim(overlay_u); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + if (loc.x < x_position || +@@ -90,13 +125,14 @@ __kernel void overlay_external_alpha(__w + float4 val = read_imagef(main, sampler, loc); + write_imagef(dst, loc, val); + } else { +- int2 loc_overlay = (int2)(x_position, y_position); +- float4 in_main = read_imagef(main, sampler, loc); +- float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay); ++ int2 loc_overlay = (int2)(x_position, y_position); ++ float4 in_main = read_imagef(main, sampler, loc); ++ float4 in_overlay_u = read_imagef(overlay_u, sampler, loc - loc_overlay); ++ float4 in_overlay_v = read_imagef(overlay_v, sampler, loc - loc_overlay); ++ float4 in_overlay = (float4)(in_overlay_u.x, in_overlay_v.x, 0.0f, 1.0f); + +- int2 loc_alpha = (int2)(loc.x * alpha_adj_x, +- loc.y * alpha_adj_y) - loc_overlay; +- float4 in_alpha = read_imagef(alpha, sampler, loc_alpha); ++ int2 loc_alpha = (int2)(loc.x * alpha_adj_x, loc.y * alpha_adj_y) - loc_overlay; ++ float4 in_alpha = read_imagef(alpha, sampler, loc_alpha); + + float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x); + write_imagef(dst, loc, val); +Index: jellyfin-ffmpeg/libavfilter/vf_overlay_opencl.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_opencl.c ++++ jellyfin-ffmpeg/libavfilter/vf_overlay_opencl.c +@@ -28,72 +28,113 @@ + #include "opencl_source.h" + #include "video.h" + ++static const enum AVPixelFormat supported_main_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_NONE, ++}; ++ ++static const enum AVPixelFormat supported_overlay_formats[] = { ++ AV_PIX_FMT_NV12, ++ AV_PIX_FMT_YUV420P, ++ AV_PIX_FMT_YUVA420P, ++ AV_PIX_FMT_NONE, ++}; ++ + typedef struct OverlayOpenCLContext { + OpenCLFilterContext ocf; + ++ enum AVPixelFormat in_fmt_main, in_fmt_overlay; ++ const AVPixFmtDescriptor *in_desc_main, *in_desc_overlay; ++ int in_planes_main, in_planes_overlay; ++ + int initialised; + cl_kernel kernel; ++ cl_kernel kernel_pass; ++ cl_kernel kernel_uv; ++ const char *kernel_name; ++ const char *kernel_name_pass; ++ const char *kernel_name_uv; + cl_command_queue command_queue; + + FFFrameSync fs; + +- int nb_planes; + int x_subsample; + int y_subsample; +- int alpha_separate; ++ int alpha; + + int x_position; + int y_position; ++ ++ int opt_repeatlast; ++ int opt_shortest; ++ int opt_eof_action; + } OverlayOpenCLContext; + +-static int overlay_opencl_load(AVFilterContext *avctx, +- enum AVPixelFormat main_format, +- enum AVPixelFormat overlay_format) ++static int format_is_supported(const enum AVPixelFormat fmts[], enum AVPixelFormat fmt) ++{ ++ for (int i = 0; fmts[i] != AV_PIX_FMT_NONE; i++) ++ if (fmts[i] == fmt) ++ return 1; ++ return 0; ++} ++ ++static int formats_match(const enum AVPixelFormat fmt_main, const enum AVPixelFormat fmt_overlay) { ++ switch(fmt_main) { ++ case AV_PIX_FMT_NV12: ++ return fmt_overlay == AV_PIX_FMT_NV12 || ++ fmt_overlay == AV_PIX_FMT_YUV420P || ++ fmt_overlay == AV_PIX_FMT_YUVA420P; ++ case AV_PIX_FMT_YUV420P: ++ return fmt_overlay == AV_PIX_FMT_YUV420P || ++ fmt_overlay == AV_PIX_FMT_YUVA420P; ++ default: ++ return 0; ++ } ++} ++ ++static int overlay_opencl_load(AVFilterContext *avctx) + { + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; +- const char *source = ff_opencl_source_overlay; +- const char *kernel; +- const AVPixFmtDescriptor *main_desc, *overlay_desc; +- int err, i, main_planes, overlay_planes; +- +- main_desc = av_pix_fmt_desc_get(main_format); +- overlay_desc = av_pix_fmt_desc_get(overlay_format); +- +- main_planes = overlay_planes = 0; +- for (i = 0; i < main_desc->nb_components; i++) +- main_planes = FFMAX(main_planes, +- main_desc->comp[i].plane + 1); +- for (i = 0; i < overlay_desc->nb_components; i++) +- overlay_planes = FFMAX(overlay_planes, +- overlay_desc->comp[i].plane + 1); +- +- ctx->nb_planes = main_planes; +- ctx->x_subsample = 1 << main_desc->log2_chroma_w; +- ctx->y_subsample = 1 << main_desc->log2_chroma_h; ++ int err; ++ ++ ctx->x_subsample = 1 << ctx->in_desc_main->log2_chroma_w; ++ ctx->y_subsample = 1 << ctx->in_desc_main->log2_chroma_h; + + if (ctx->x_position % ctx->x_subsample || + ctx->y_position % ctx->y_subsample) { +- av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) " ++ av_log(avctx, AV_LOG_WARNING, "Overlay position (%d, %d) " + "does not match subsampling (%d, %d).\n", + ctx->x_position, ctx->y_position, + ctx->x_subsample, ctx->y_subsample); + } + +- if (main_planes == overlay_planes) { +- if (main_desc->nb_components == overlay_desc->nb_components) +- kernel = "overlay_no_alpha"; ++ switch(ctx->in_fmt_overlay) { ++ case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_YUV420P: ++ ctx->alpha = 0; ++ ctx->kernel_name = "overlay_noalpha"; ++ break; ++ case AV_PIX_FMT_YUVA420P: ++ ctx->alpha = 1; ++ ctx->kernel_name = "overlay_alpha"; ++ break; ++ default: ++ err = AVERROR_BUG; ++ goto fail; ++ } ++ ++ if (ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) { ++ if (ctx->alpha) ++ ctx->kernel_name_uv = "overlay_alpha_uv"; + else +- kernel = "overlay_internal_alpha"; +- ctx->alpha_separate = 0; +- } else { +- kernel = "overlay_external_alpha"; +- ctx->alpha_separate = 1; ++ ctx->kernel_name_uv = "overlay_noalpha_uv"; + } + +- av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel); ++ av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", ctx->kernel_name); + +- err = ff_opencl_filter_load_program(avctx, &source, 1); ++ err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_overlay, 1); + if (err < 0) + goto fail; + +@@ -103,10 +144,20 @@ static int overlay_opencl_load(AVFilterC + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " + "command queue %d.\n", cle); + +- ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle); ++ ctx->kernel = clCreateKernel(ctx->ocf.program, ctx->kernel_name, &cle); + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle); + ++ ctx->kernel_name_pass = "overlay_pass"; ++ ctx->kernel_pass = clCreateKernel(ctx->ocf.program, ctx->kernel_name_pass, &cle); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel_pass %d.\n", cle); ++ ++ if (ctx->kernel_name_uv) { ++ ctx->kernel_uv = clCreateKernel(ctx->ocf.program, ctx->kernel_name_uv, &cle); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel_uv %d.\n", cle); ++ } ++ + ctx->initialised = 1; ++ + return 0; + + fail: +@@ -114,21 +165,113 @@ fail: + clReleaseCommandQueue(ctx->command_queue); + if (ctx->kernel) + clReleaseKernel(ctx->kernel); ++ if (ctx->kernel_pass) ++ clReleaseKernel(ctx->kernel_pass); ++ if (ctx->kernel_uv) ++ clReleaseKernel(ctx->kernel_uv); ++ return err; ++} ++ ++static int launch_kernel(AVFilterContext *avctx, AVFrame *output, AVFrame *input_main, ++ AVFrame *input_overlay, int plane, int passthrough) { ++ OverlayOpenCLContext *ctx = avctx->priv; ++ cl_mem mem; ++ cl_int cle, x, y; ++ cl_kernel kernel; ++ size_t global_work[2]; ++ int idx_arg = 0; ++ int err; ++ ++ if (passthrough) ++ kernel = ctx->kernel_pass; ++ else if (plane == 1 && ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) ++ kernel = ctx->kernel_uv; ++ else ++ kernel = ctx->kernel; ++ ++ // dst ++ mem = (cl_mem)output->data[plane]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ ++ // main ++ mem = (cl_mem)input_main->data[plane]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ ++ if (!passthrough) { ++ // overlay ++ mem = (cl_mem)input_overlay->data[plane]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ ++ // non-semi planar on top of the semi planar ++ if (plane == 1 && ctx->in_planes_main == 2 && ctx->in_planes_overlay > 2) { ++ mem = (cl_mem)input_overlay->data[plane + 1]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ } ++ ++ // alpha ++ if (ctx->alpha) { ++ mem = (cl_mem)input_overlay->data[ctx->in_planes_overlay - 1]; ++ if (!mem) { ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_mem, &mem); ++ } ++ ++ x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample); ++ y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample); ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &x); ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &y); ++ ++ if (ctx->alpha) { ++ cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample; ++ cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample; ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &alpha_adj_x); ++ CL_SET_KERNEL_ARG(kernel, idx_arg++, cl_int, &alpha_adj_y); ++ } ++ } ++ ++ err = ff_opencl_filter_work_size_from_image(avctx, global_work, ++ input_main, plane, 0); ++ if (err < 0) ++ goto fail; ++ ++ cle = clEnqueueNDRangeKernel(ctx->command_queue, kernel, 2, NULL, ++ global_work, NULL, 0, NULL, NULL); ++ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel " ++ "for plane %d: %d.\n", plane, cle); ++ return 0; ++ ++fail: + return err; + } + + static int overlay_opencl_blend(FFFrameSync *fs) + { +- AVFilterContext *avctx = fs->parent; +- AVFilterLink *outlink = avctx->outputs[0]; ++ AVFilterContext *avctx = fs->parent; ++ AVFilterLink *outlink = avctx->outputs[0]; + OverlayOpenCLContext *ctx = avctx->priv; + AVFrame *input_main, *input_overlay; + AVFrame *output; +- cl_mem mem; +- cl_int cle, x, y; +- size_t global_work[2]; +- int kernel_arg = 0; +- int err, plane; ++ cl_int cle; ++ int passthrough = 0; ++ int err, p; + + err = ff_framesync_get_frame(fs, 0, &input_main, 0); + if (err < 0) +@@ -137,14 +280,14 @@ static int overlay_opencl_blend(FFFrameS + if (err < 0) + return err; + +- if (!ctx->initialised) { +- AVHWFramesContext *main_fc = +- (AVHWFramesContext*)input_main->hw_frames_ctx->data; +- AVHWFramesContext *overlay_fc = +- (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; ++ if (!input_main) ++ return AVERROR_BUG; ++ ++ if (!input_overlay) ++ passthrough = 1; + +- err = overlay_opencl_load(avctx, main_fc->sw_format, +- overlay_fc->sw_format); ++ if (!ctx->initialised) { ++ err = overlay_opencl_load(avctx); + if (err < 0) + return err; + } +@@ -155,54 +298,10 @@ static int overlay_opencl_blend(FFFrameS + goto fail; + } + +- for (plane = 0; plane < ctx->nb_planes; plane++) { +- kernel_arg = 0; +- +- mem = (cl_mem)output->data[plane]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- +- mem = (cl_mem)input_main->data[plane]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- +- mem = (cl_mem)input_overlay->data[plane]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- +- if (ctx->alpha_separate) { +- mem = (cl_mem)input_overlay->data[ctx->nb_planes]; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); +- kernel_arg++; +- } +- +- x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample); +- y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample); +- +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &x); +- kernel_arg++; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &y); +- kernel_arg++; +- +- if (ctx->alpha_separate) { +- cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample; +- cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample; +- +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_x); +- kernel_arg++; +- CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_y); +- kernel_arg++; +- } +- +- err = ff_opencl_filter_work_size_from_image(avctx, global_work, +- output, plane, 0); ++ for (p = 0; p < ctx->in_planes_main; p++) { ++ err = launch_kernel(avctx, output, input_main, input_overlay, p, passthrough); + if (err < 0) +- goto fail; +- +- cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, +- global_work, NULL, 0, NULL, NULL); +- CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel " +- "for plane %d: %d.\n", plane, cle); ++ return err; + } + + cle = clFinish(ctx->command_queue); +@@ -217,6 +316,9 @@ static int overlay_opencl_blend(FFFrameS + return ff_filter_frame(outlink, output); + + fail: ++ clFinish(ctx->command_queue); ++ av_frame_free(&input_main); ++ av_frame_free(&input_overlay); + av_frame_free(&output); + return err; + } +@@ -225,8 +327,49 @@ static int overlay_opencl_config_output( + { + AVFilterContext *avctx = outlink->src; + OverlayOpenCLContext *ctx = avctx->priv; ++ ++ AVFilterLink *inlink = avctx->inputs[0]; ++ AVHWFramesContext *frames_ctx_main = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ ++ AVFilterLink *inlink_overlay = avctx->inputs[1]; ++ AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data; ++ + int err; + ++ if (!frames_ctx_main) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->in_fmt_main = frames_ctx_main->sw_format; ++ ctx->in_desc_main = av_pix_fmt_desc_get(frames_ctx_main->sw_format); ++ ctx->in_planes_main = av_pix_fmt_count_planes(frames_ctx_main->sw_format); ++ if (!format_is_supported(supported_main_formats, ctx->in_fmt_main)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s\n", ++ av_get_pix_fmt_name(ctx->in_fmt_main)); ++ return AVERROR(ENOSYS); ++ } ++ ++ if (!frames_ctx_overlay) { ++ av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->in_fmt_overlay = frames_ctx_overlay->sw_format; ++ ctx->in_desc_overlay = av_pix_fmt_desc_get(frames_ctx_overlay->sw_format); ++ ctx->in_planes_overlay = av_pix_fmt_count_planes(frames_ctx_overlay->sw_format); ++ if (!format_is_supported(supported_overlay_formats, ctx->in_fmt_overlay)) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s\n", ++ av_get_pix_fmt_name(ctx->in_fmt_overlay)); ++ return AVERROR(ENOSYS); ++ } ++ ++ if (!formats_match(ctx->in_fmt_main, ctx->in_fmt_overlay)) { ++ av_log(ctx, AV_LOG_ERROR, "Can't overlay %s on %s \n", ++ av_get_pix_fmt_name(ctx->in_fmt_overlay), av_get_pix_fmt_name(ctx->in_fmt_main)); ++ return AVERROR(EINVAL); ++ } ++ + err = ff_opencl_filter_config_output(outlink); + if (err < 0) + return err; +@@ -235,6 +378,11 @@ static int overlay_opencl_config_output( + if (err < 0) + return err; + ++ ctx->fs.opt_repeatlast = ctx->opt_repeatlast; ++ ctx->fs.opt_shortest = ctx->opt_shortest; ++ ctx->fs.opt_eof_action = ctx->opt_eof_action; ++ ctx->fs.time_base = outlink->time_base = inlink->time_base; ++ + return ff_framesync_configure(&ctx->fs); + } + +@@ -266,6 +414,20 @@ static av_cold void overlay_opencl_unini + "kernel: %d.\n", cle); + } + ++ if (ctx->kernel_pass) { ++ cle = clReleaseKernel(ctx->kernel_pass); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel_pass: %d.\n", cle); ++ } ++ ++ if (ctx->kernel_uv) { ++ cle = clReleaseKernel(ctx->kernel_uv); ++ if (cle != CL_SUCCESS) ++ av_log(avctx, AV_LOG_ERROR, "Failed to release " ++ "kernel_uv: %d.\n", cle); ++ } ++ + if (ctx->command_queue) { + cle = clReleaseCommandQueue(ctx->command_queue); + if (cle != CL_SUCCESS) +@@ -280,11 +442,20 @@ static av_cold void overlay_opencl_unini + + #define OFFSET(x) offsetof(OverlayOpenCLContext, x) + #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++ + static const AVOption overlay_opencl_options[] = { + { "x", "Overlay x position", + OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { "y", "Overlay y position", + OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "eof_action", "Action to take when encountering EOF from secondary input ", ++ OFFSET(opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, ++ EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" }, ++ { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" }, ++ { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" }, ++ { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, "eof_action" }, ++ { "shortest", "force termination when the shortest input terminates", OFFSET(opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { NULL }, + }; + diff --git a/debian/patches/series b/debian/patches/series index d44e268ac89..f875b00723e 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -4,3 +4,4 @@ 0004-add-amf-refactor-and-hevc-10-bit-encoding.patch 0005-add-opencl-scaler-and-pixfmt-converter-impl.patch 0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch +0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch From 201acce0cb61c729bc63309a3e4b56a5bb6b4b61 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Wed, 10 Nov 2021 21:31:11 +0800 Subject: [PATCH 17/41] add d3d11-opencl interop for AMD --- ...008-add-d3d11-opencl-interop-for-AMD.patch | 387 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 388 insertions(+) create mode 100644 debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch diff --git a/debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch b/debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch new file mode 100644 index 00000000000..5ab172ac65f --- /dev/null +++ b/debian/patches/0008-add-d3d11-opencl-interop-for-AMD.patch @@ -0,0 +1,387 @@ +Index: jellyfin-ffmpeg/libavutil/hwcontext_opencl.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_opencl.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_opencl.c +@@ -64,6 +64,16 @@ + #if HAVE_OPENCL_D3D11 + #include + #include "hwcontext_d3d11va.h" ++ ++// From cl_amd_planar_yuv; unfortunately no header is provided. ++typedef CL_API_ENTRY cl_mem(CL_API_CALL *clGetPlaneFromImageAMD_fn)( ++ cl_context context, cl_mem mem, cl_uint plane, ++ cl_int *errcode_ret); ++ ++typedef CL_API_ENTRY cl_mem(CL_API_CALL *clConvertImageAMD_fn)( ++ cl_context context, cl_mem image, const cl_image_format *image_format, ++ cl_int *errcode_ret); ++ + #endif + + #if HAVE_OPENCL_DRM_ARM +@@ -72,7 +82,6 @@ + #include "hwcontext_drm.h" + #endif + +- + typedef struct OpenCLDeviceContext { + // Default command queue to use for transfer/mapping operations on + // the device. If the user supplies one, this is a reference to it. +@@ -113,12 +122,19 @@ typedef struct OpenCLDeviceContext { + + #if HAVE_OPENCL_D3D11 + int d3d11_mapping_usable; ++ int d3d11_map_amd; ++ int d3d11_map_intel; ++ + clCreateFromD3D11Texture2DKHR_fn + clCreateFromD3D11Texture2DKHR; + clEnqueueAcquireD3D11ObjectsKHR_fn + clEnqueueAcquireD3D11ObjectsKHR; + clEnqueueReleaseD3D11ObjectsKHR_fn + clEnqueueReleaseD3D11ObjectsKHR; ++ clGetPlaneFromImageAMD_fn ++ clGetPlaneFromImageAMD; ++ clConvertImageAMD_fn ++ clConvertImageAMD; + #endif + + #if HAVE_OPENCL_DRM_ARM +@@ -142,7 +158,6 @@ typedef struct OpenCLFramesContext { + #endif + } OpenCLFramesContext; + +- + static void CL_CALLBACK opencl_error_callback(const char *errinfo, + const void *private_info, + size_t cb, +@@ -820,17 +835,25 @@ static int opencl_device_init(AVHWDevice + #if HAVE_OPENCL_D3D11 + { + const char *d3d11_ext = "cl_khr_d3d11_sharing"; +- const char *nv12_ext = "cl_intel_d3d11_nv12_media_sharing"; ++ const char *amd_ext = "cl_amd_planar_yuv"; ++ const char *intel_ext = "cl_intel_d3d11_nv12_media_sharing"; + int fail = 0; + + if (!opencl_check_extension(hwdev, d3d11_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for D3D11 to OpenCL mapping.\n", d3d11_ext); + fail = 1; +- } else if (!opencl_check_extension(hwdev, nv12_ext)) { +- av_log(hwdev, AV_LOG_VERBOSE, "The %s extension may be " +- "required for D3D11 to OpenCL mapping.\n", nv12_ext); +- // Not fatal. ++ } else { ++ if (opencl_check_extension(hwdev, amd_ext)) { ++ priv->d3d11_map_amd = 1; ++ } else if (opencl_check_extension(hwdev, intel_ext)) { ++ priv->d3d11_map_intel = 1; ++ } else { ++ av_log(hwdev, AV_LOG_VERBOSE, "One of the %s or %s " ++ "extensions are required for D3D11 to OpenCL " ++ "mapping.\n", amd_ext, intel_ext); ++ fail = 1; ++ } + } + + CL_FUNC(clCreateFromD3D11Texture2DKHR, +@@ -840,6 +863,13 @@ static int opencl_device_init(AVHWDevice + CL_FUNC(clEnqueueReleaseD3D11ObjectsKHR, + "D3D11 in OpenCL release"); + ++ if (priv->d3d11_map_amd) { ++ CL_FUNC(clGetPlaneFromImageAMD, ++ "D3D11 to OpenCL image planar mapping on AMD"); ++ CL_FUNC(clConvertImageAMD, ++ "D3D11 to OpenCL image data type converting on AMD"); ++ } ++ + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "D3D11 to OpenCL mapping " + "not usable.\n"); +@@ -1242,7 +1272,7 @@ static int opencl_device_derive(AVHWDevi + CL_CONTEXT_VA_API_DISPLAY_INTEL, + (intptr_t)src_hwctx->display, + CL_CONTEXT_INTEROP_USER_SYNC, +- CL_FALSE, ++ CL_TRUE, + 0, + }; + OpenCLDeviceSelector selector = { +@@ -1281,11 +1311,13 @@ static int opencl_device_derive(AVHWDevi + device_handle, + &device, FALSE); + if (SUCCEEDED(hr)) { +- cl_context_properties props[5] = { ++ cl_context_properties props[7] = { + CL_CONTEXT_PLATFORM, + 0, + CL_CONTEXT_ADAPTER_D3D9EX_KHR, + (intptr_t)device, ++ CL_CONTEXT_INTEROP_USER_SYNC, ++ CL_TRUE, + 0, + }; + OpenCLDeviceSelector selector = { +@@ -1318,11 +1350,13 @@ static int opencl_device_derive(AVHWDevi + case AV_HWDEVICE_TYPE_D3D11VA: + { + AVD3D11VADeviceContext *src_hwctx = src_ctx->hwctx; +- cl_context_properties props[5] = { ++ cl_context_properties props[7] = { + CL_CONTEXT_PLATFORM, + 0, + CL_CONTEXT_D3D11_DEVICE_KHR, + (intptr_t)src_hwctx->device, ++ CL_CONTEXT_INTEROP_USER_SYNC, ++ CL_TRUE, + 0, + }; + OpenCLDeviceSelector selector = { +@@ -2004,7 +2038,8 @@ static int opencl_map_frame(AVHWFramesCo + goto fail; + } + +- dst->data[p] = map->address[p]; ++ dst->data[p] = map->address[p]; ++ dst->linesize[p] = row_pitch; + + av_log(hwfc, AV_LOG_DEBUG, "Map plane %d (%p -> %p).\n", + p, src->data[p], dst->data[p]); +@@ -2329,7 +2364,7 @@ static void opencl_unmap_from_dxva2(AVHW + { + AVOpenCLFrameDescriptor *desc = hwmap->priv; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; +- OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv; ++ OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + cl_event event; + cl_int cle; + +@@ -2421,11 +2456,13 @@ static int opencl_frames_derive_from_dxv + cl_int cle; + int err, i, p, nb_planes; + +- if (src_fc->sw_format != AV_PIX_FMT_NV12) { +- av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported " ++ if (src_fc->sw_format != AV_PIX_FMT_NV12 && ++ src_fc->sw_format != AV_PIX_FMT_P010) { ++ av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are supported " + "for DXVA2 to OpenCL mapping.\n"); + return AVERROR(EINVAL); + } ++ + nb_planes = 2; + + if (src_fc->initial_pool_size == 0) { +@@ -2493,7 +2530,7 @@ static void opencl_unmap_from_d3d11(AVHW + { + AVOpenCLFrameDescriptor *desc = hwmap->priv; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; +- OpenCLFramesContext *frames_priv = dst_fc->device_ctx->internal->priv; ++ OpenCLFramesContext *frames_priv = dst_fc->internal->priv; + cl_event event; + cl_int cle; + +@@ -2501,7 +2538,7 @@ static void opencl_unmap_from_d3d11(AVHW + frames_priv->command_queue, desc->nb_planes, desc->planes, + 0, NULL, &event); + if (cle != CL_SUCCESS) { +- av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface " ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to release texture " + "handle: %d.\n", cle); + } + +@@ -2516,7 +2553,7 @@ static int opencl_map_from_d3d11(AVHWFra + AVOpenCLFrameDescriptor *desc; + cl_event event; + cl_int cle; +- int err, index, i; ++ int err, index, i, nb_planes; + + index = (intptr_t)src->data[1]; + if (index >= frames_priv->nb_mapped_frames) { +@@ -2530,20 +2567,36 @@ static int opencl_map_from_d3d11(AVHWFra + + desc = &frames_priv->mapped_frames[index]; + +- cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( +- frames_priv->command_queue, desc->nb_planes, desc->planes, +- 0, NULL, &event); +- if (cle != CL_SUCCESS) { +- av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface " +- "handle: %d.\n", cle); +- return AVERROR(EIO); ++ nb_planes = device_priv->d3d11_map_amd ? (desc->nb_planes - 1) ++ : desc->nb_planes; ++ ++ if (device_priv->d3d11_map_amd) { ++ cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( ++ frames_priv->command_queue, 1, &desc->planes[nb_planes], ++ 0, NULL, &event); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture " ++ "handle: %d.\n", cle); ++ return AVERROR(EIO); ++ } ++ } else if (device_priv->d3d11_map_intel) { ++ cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( ++ frames_priv->command_queue, nb_planes, desc->planes, ++ 0, NULL, &event); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture " ++ "handle: %d.\n", cle); ++ return AVERROR(EIO); ++ } ++ } else { ++ return AVERROR(ENOSYS); + } + + err = opencl_wait_events(dst_fc, &event, 1); + if (err < 0) + goto fail; + +- for (i = 0; i < desc->nb_planes; i++) ++ for (i = 0; i < nb_planes; i++) + dst->data[i] = (uint8_t*)desc->planes[i]; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, +@@ -2572,16 +2625,26 @@ static int opencl_frames_derive_from_d3d + AVD3D11VAFramesContext *src_hwctx = src_fc->hwctx; + OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; + OpenCLFramesContext *frames_priv = dst_fc->internal->priv; ++ cl_mem planeUI; + cl_mem_flags cl_flags; + cl_int cle; + int err, i, p, nb_planes; + +- if (src_fc->sw_format != AV_PIX_FMT_NV12) { +- av_log(dst_fc, AV_LOG_ERROR, "Only NV12 textures are supported " +- "for D3D11 to OpenCL mapping.\n"); +- return AVERROR(EINVAL); ++ // both AMD and Intel supports NV12 and P010, ++ // but Intel requires D3D11_RESOURCE_MISC_SHARED. ++ if (device_priv->d3d11_map_amd || ++ device_priv->d3d11_map_intel) { ++ if (src_fc->sw_format != AV_PIX_FMT_NV12 && ++ src_fc->sw_format != AV_PIX_FMT_P010) { ++ av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are " ++ "supported with AMD and Intel for D3D11 to OpenCL mapping.\n"); ++ return AVERROR(EINVAL); ++ } ++ } else { ++ return AVERROR(ENOSYS); + } +- nb_planes = 2; ++ ++ nb_planes = device_priv->d3d11_map_amd ? 3 : 2; + + if (src_fc->initial_pool_size == 0) { + av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported " +@@ -2604,27 +2667,94 @@ static int opencl_frames_derive_from_d3d + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + desc->nb_planes = nb_planes; +- for (p = 0; p < nb_planes; p++) { +- UINT subresource = 2 * i + p; +- +- desc->planes[p] = +- device_priv->clCreateFromD3D11Texture2DKHR( +- dst_dev->context, cl_flags, src_hwctx->texture, +- subresource, &cle); +- if (!desc->planes[p]) { +- av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " +- "image from plane %d of D3D texture " +- "index %d (subresource %u): %d.\n", +- p, i, (unsigned int)subresource, cle); ++ if (device_priv->d3d11_map_amd) { ++ // put the multiple-plane AMD shared image at the end. ++ desc->planes[nb_planes - 1] = device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, src_hwctx->texture, i, &cle); ++ if (!desc->planes[nb_planes - 1]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image " ++ "from D3D11 texture index %d: %d.\n", i, cle); + err = AVERROR(EIO); + goto fail; + } ++ ++ for (p = 0; p < nb_planes - 1; p++) { ++ cl_image_format image_fmt; ++ ++ // get plane from AMD in CL_UNSIGNED_INT8|16 type. ++ planeUI = device_priv->clGetPlaneFromImageAMD( ++ dst_dev->context, desc->planes[nb_planes - 1], p, &cle); ++ if (!planeUI) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL image " ++ "from plane %d of image created from D3D11 " ++ "texture index %d: %d.\n", p, i, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ cle = clGetImageInfo( ++ planeUI, CL_IMAGE_FORMAT, sizeof(cl_image_format), &image_fmt, NULL); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to query image format of CL image " ++ "from plane %d of image created from D3D11 " ++ "texture index %d: %d.\n", p, i, cle); ++ err = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ ++ switch (image_fmt.image_channel_data_type) { ++ case CL_UNSIGNED_INT8: ++ image_fmt.image_channel_data_type = CL_UNORM_INT8; break; ++ case CL_UNSIGNED_INT16: ++ image_fmt.image_channel_data_type = CL_UNORM_INT16; break; ++ default: ++ av_log(dst_fc, AV_LOG_ERROR, "The data type of CL image " ++ "from plane %d of image created from D3D11 texture index %d " ++ "isn't a CL_UNSIGNED_INT8|16 type.\n", p, i); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ // convert plane from CL_UNSIGNED_INT8|16 to CL_UNORM_INT8|16. ++ desc->planes[p] = device_priv->clConvertImageAMD( ++ dst_dev->context, planeUI, &image_fmt, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to convert data type of CL image " ++ "from plane %d of image created from D3D11 texture index %d " ++ "to CL_UNORM_INT8|16 type: %d.\n", p, i, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ clReleaseMemObject(planeUI); ++ } ++ } else if (device_priv->d3d11_map_intel) { ++ for (p = 0; p < nb_planes; p++) { ++ UINT subresource = 2 * i + p; ++ ++ desc->planes[p] = ++ device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, src_hwctx->texture, ++ subresource, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " ++ "image from plane %d of D3D11 texture " ++ "index %d (subresource %u): %d.\n", ++ p, i, (unsigned int)subresource, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ } else { ++ return AVERROR(ENOSYS); + } + } + + return 0; + + fail: ++ if (planeUI) ++ clReleaseMemObject(planeUI); + for (i = 0; i < frames_priv->nb_mapped_frames; i++) { + AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; + for (p = 0; p < desc->nb_planes; p++) { diff --git a/debian/patches/series b/debian/patches/series index f875b00723e..807ae0cef18 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -5,3 +5,4 @@ 0005-add-opencl-scaler-and-pixfmt-converter-impl.patch 0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch 0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch +0008-add-d3d11-opencl-interop-for-AMD.patch From 01ba5a279896e6b2a81741c7d29e9842d20e3617 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:39:20 +0800 Subject: [PATCH 18/41] add a hack for d3d11-opencl reverse mapping --- ...dd-a-hack-for-opencl-reverse-mapping.patch | 131 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 132 insertions(+) create mode 100644 debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch diff --git a/debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch b/debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch new file mode 100644 index 00000000000..825614bfa97 --- /dev/null +++ b/debian/patches/0009-add-a-hack-for-opencl-reverse-mapping.patch @@ -0,0 +1,131 @@ +Index: jellyfin-ffmpeg/libavfilter/avfilter.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/avfilter.h ++++ jellyfin-ffmpeg/libavfilter/avfilter.h +@@ -481,6 +481,7 @@ struct AVFilterLink { + int w; ///< agreed upon image width + int h; ///< agreed upon image height + AVRational sample_aspect_ratio; ///< agreed upon sample aspect ratio ++ int fixed_pool_size; ///< fixed size of the frame pool for reverse hw mapping + /* These parameters apply only to audio */ + uint64_t channel_layout; ///< channel layout of current buffer (see libavutil/channel_layout.h) + int sample_rate; ///< samples per second +Index: jellyfin-ffmpeg/libavfilter/opencl.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/opencl.c ++++ jellyfin-ffmpeg/libavfilter/opencl.c +@@ -60,6 +60,7 @@ static int opencl_filter_set_device(AVFi + int ff_opencl_filter_config_input(AVFilterLink *inlink) + { + AVFilterContext *avctx = inlink->dst; ++ AVFilterLink *outlink = avctx->outputs[0]; + OpenCLFilterContext *ctx = avctx->priv; + AVHWFramesContext *input_frames; + int err; +@@ -90,12 +91,15 @@ int ff_opencl_filter_config_input(AVFilt + if (!ctx->output_height) + ctx->output_height = inlink->h; + ++ outlink->fixed_pool_size = inlink->fixed_pool_size; ++ + return 0; + } + + int ff_opencl_filter_config_output(AVFilterLink *outlink) + { + AVFilterContext *avctx = outlink->src; ++ AVFilterLink *inlink = avctx->inputs[0]; + OpenCLFilterContext *ctx = avctx->priv; + AVBufferRef *output_frames_ref = NULL; + AVHWFramesContext *output_frames; +@@ -137,6 +141,7 @@ int ff_opencl_filter_config_output(AVFil + outlink->hw_frames_ctx = output_frames_ref; + outlink->w = ctx->output_width; + outlink->h = ctx->output_height; ++ outlink->fixed_pool_size = inlink->fixed_pool_size; + + return 0; + fail: +Index: jellyfin-ffmpeg/libavfilter/vf_hwmap.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_hwmap.c ++++ jellyfin-ffmpeg/libavfilter/vf_hwmap.c +@@ -22,6 +22,10 @@ + #include "libavutil/opt.h" + #include "libavutil/pixdesc.h" + ++#if HAVE_OPENCL_D3D11 ++#include "libavutil/hwcontext_d3d11va.h" ++#endif ++ + #include "avfilter.h" + #include "formats.h" + #include "internal.h" +@@ -122,6 +126,12 @@ static int hwmap_config_output(AVFilterL + goto fail; + } + ++ if (hwfc->initial_pool_size) { ++ outlink->fixed_pool_size = hwfc->initial_pool_size; ++ av_log(avctx, AV_LOG_DEBUG, "Saved the fixed_pool_size from " ++ "initial_pool_size: %d\n", outlink->fixed_pool_size); ++ } ++ + } else if (inlink->format == hwfc->format && + (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) && + ctx->reverse) { +@@ -144,8 +154,20 @@ static int hwmap_config_output(AVFilterL + frames->width = hwfc->width; + frames->height = hwfc->height; + +- if (avctx->extra_hw_frames >= 0) +- frames->initial_pool_size = 2 + avctx->extra_hw_frames; ++ if (inlink->fixed_pool_size) ++ frames->initial_pool_size = inlink->fixed_pool_size; ++ ++ if (frames->initial_pool_size == 0) { ++ // Dynamic allocation. ++ } else if (avctx->extra_hw_frames) { ++ frames->initial_pool_size += avctx->extra_hw_frames; ++ } ++ ++#if HAVE_OPENCL_D3D11 ++ D3D11_TEXTURE2D_DESC texDesc = { .BindFlags = D3D11_BIND_DECODER, }; ++ if (frames->format == AV_PIX_FMT_D3D11) ++ frames->user_opaque = &texDesc; ++#endif + + err = av_hwframe_ctx_init(ctx->hwframes_ref); + if (err < 0) { +Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +@@ -190,7 +190,7 @@ static AVBufferRef *d3d11va_alloc_single + .ArraySize = 1, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = hwctx->BindFlags, +- .MiscFlags = hwctx->MiscFlags, ++ .MiscFlags = hwctx->MiscFlags | D3D11_RESOURCE_MISC_SHARED, + }; + + hr = ID3D11Device_CreateTexture2D(device_hwctx->device, &texDesc, NULL, &tex); +@@ -254,9 +254,17 @@ static int d3d11va_frames_init(AVHWFrame + .ArraySize = ctx->initial_pool_size, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = hwctx->BindFlags, +- .MiscFlags = hwctx->MiscFlags, ++ .MiscFlags = hwctx->MiscFlags | D3D11_RESOURCE_MISC_SHARED, + }; + ++#if HAVE_OPENCL_D3D11 ++ if (ctx->user_opaque) { ++ D3D11_TEXTURE2D_DESC *desc = ctx->user_opaque; ++ if (desc->BindFlags & D3D11_BIND_DECODER) ++ texDesc.BindFlags = D3D11_BIND_DECODER; ++ } ++#endif ++ + if (hwctx->texture) { + D3D11_TEXTURE2D_DESC texDesc2; + ID3D11Texture2D_GetDesc(hwctx->texture, &texDesc2); diff --git a/debian/patches/series b/debian/patches/series index 807ae0cef18..67198916555 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -6,3 +6,4 @@ 0006-add-bt2390-eetf-and-code-refactor-to-opencl-tonemap.patch 0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch 0008-add-d3d11-opencl-interop-for-AMD.patch +0009-add-a-hack-for-opencl-reverse-mapping.patch From c9c4edb88eea54bbafa5a8e59b4414d6cd2edb2e Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:39:34 +0800 Subject: [PATCH 19/41] add fixes for ffmpeg_hw --- .../0010-add-fixes-for-ffmpeg_hw.patch | 70 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 71 insertions(+) create mode 100644 debian/patches/0010-add-fixes-for-ffmpeg_hw.patch diff --git a/debian/patches/0010-add-fixes-for-ffmpeg_hw.patch b/debian/patches/0010-add-fixes-for-ffmpeg_hw.patch new file mode 100644 index 00000000000..6c1afef91ac --- /dev/null +++ b/debian/patches/0010-add-fixes-for-ffmpeg_hw.patch @@ -0,0 +1,70 @@ +Index: jellyfin-ffmpeg/fftools/ffmpeg_hw.c +=================================================================== +--- jellyfin-ffmpeg.orig/fftools/ffmpeg_hw.c ++++ jellyfin-ffmpeg/fftools/ffmpeg_hw.c +@@ -93,6 +93,8 @@ static char *hw_device_default_name(enum + + int hw_device_init_from_string(const char *arg, HWDevice **dev_out) + { ++ // "type=name" ++ // "type=name,key=value,key2=value2" + // "type=name:device,key=value,key2=value2" + // "type:device,key=value,key2=value2" + // -> av_hwdevice_ctx_create() +@@ -124,7 +126,7 @@ int hw_device_init_from_string(const cha + } + + if (*p == '=') { +- k = strcspn(p + 1, ":@"); ++ k = strcspn(p + 1, ":@,"); + + name = av_strndup(p + 1, k); + if (!name) { +@@ -190,6 +192,18 @@ int hw_device_init_from_string(const cha + src->device_ref, 0); + if (err < 0) + goto fail; ++ } else if (*p == ',') { ++ err = av_dict_parse_string(&options, p + 1, "=", ",", 0); ++ ++ if (err < 0) { ++ errmsg = "failed to parse options"; ++ goto invalid; ++ } ++ ++ err = av_hwdevice_ctx_create(&device_ref, type, ++ NULL, options, 0); ++ if (err < 0) ++ goto fail; + } else { + errmsg = "parse error"; + goto invalid; +@@ -527,15 +541,21 @@ int hw_device_setup_for_filter(FilterGra + HWDevice *dev; + int i; + +- // If the user has supplied exactly one hardware device then just +- // give it straight to every filter for convenience. If more than +- // one device is available then the user needs to pick one explcitly +- // with the filter_hw_device option. ++ // Pick the last hardware device if the user doesn't pick the device for ++ // filters explicitly with the filter_hw_device option. + if (filter_hw_device) + dev = filter_hw_device; +- else if (nb_hw_devices == 1) +- dev = hw_devices[0]; +- else ++ else if (nb_hw_devices > 0) { ++ dev = hw_devices[nb_hw_devices - 1]; ++ ++ if (nb_hw_devices > 1) ++ av_log(NULL, AV_LOG_WARNING, "There are %d hardware devices. device " ++ "%s of type %s is picked for filters by default. Set hardware " ++ "device explicitly with the filter_hw_device option if device " ++ "%s is not usable for filters.\n", ++ nb_hw_devices, dev->name, ++ av_hwdevice_get_type_name(dev->type), dev->name); ++ } else + dev = NULL; + + if (dev) { diff --git a/debian/patches/series b/debian/patches/series index 67198916555..f1b2f9672ba 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -7,3 +7,4 @@ 0007-add-pgs-subtitle-support-and-code-refactor-to-opencl.patch 0008-add-d3d11-opencl-interop-for-AMD.patch 0009-add-a-hack-for-opencl-reverse-mapping.patch +0010-add-fixes-for-ffmpeg_hw.patch From f7497b48527a5db0d1333ffc7f0c29f9b20600c9 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:39:55 +0800 Subject: [PATCH 20/41] add d3d11 support for QSV --- .../0011-add-d3d11-support-for-QSV.patch | 1118 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 1119 insertions(+) create mode 100644 debian/patches/0011-add-d3d11-support-for-QSV.patch diff --git a/debian/patches/0011-add-d3d11-support-for-QSV.patch b/debian/patches/0011-add-d3d11-support-for-QSV.patch new file mode 100644 index 00000000000..5caf8cd3194 --- /dev/null +++ b/debian/patches/0011-add-d3d11-support-for-QSV.patch @@ -0,0 +1,1118 @@ +Index: jellyfin-ffmpeg/libavcodec/qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsv.c ++++ jellyfin-ffmpeg/libavcodec/qsv.c +@@ -36,6 +36,8 @@ + #include "avcodec.h" + #include "qsv_internal.h" + ++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) ++ + #if QSV_VERSION_ATLEAST(1, 12) + #include "mfx/mfxvp8.h" + #endif +@@ -243,7 +245,9 @@ int ff_qsv_find_surface_idx(QSVFramesCon + int i; + for (i = 0; i < ctx->nb_mids; i++) { + QSVMid *mid = &ctx->mids[i]; +- if (mid->handle == frame->surface.Data.MemId) ++ mfxHDLPair *pair = (mfxHDLPair*)frame->surface.Data.MemId; ++ if ((mid->handle_pair->first == pair->first) && ++ (mid->handle_pair->second == pair->second)) + return i; + } + return AVERROR_BUG; +@@ -383,7 +387,11 @@ static int ff_qsv_set_display_handle(AVC + int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, + const char *load_plugins, int gpu_copy) + { ++#if CONFIG_D3D11VA ++ mfxIMPL impl = MFX_IMPL_AUTO_ANY | MFX_IMPL_VIA_D3D11; ++#else + mfxIMPL impl = MFX_IMPL_AUTO_ANY; ++#endif + mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; + +@@ -472,7 +480,7 @@ static AVBufferRef *qsv_create_mids(AVBu + + for (i = 0; i < nb_surfaces; i++) { + QSVMid *mid = &mids[i]; +- mid->handle = frames_hwctx->surfaces[i].Data.MemId; ++ mid->handle_pair = (mfxHDLPair*)frames_hwctx->surfaces[i].Data.MemId; + mid->hw_frames_ref = hw_frames_ref1; + } + +@@ -649,7 +657,7 @@ static mfxStatus qsv_frame_lock(mfxHDL p + goto fail; + + qsv_mid->surf.Info = hw_frames_hwctx->surfaces[0].Info; +- qsv_mid->surf.Data.MemId = qsv_mid->handle; ++ qsv_mid->surf.Data.MemId = qsv_mid->handle_pair; + + /* map the data to the system memory */ + ret = av_hwframe_map(qsv_mid->locked_frame, qsv_mid->hw_frame, +@@ -682,7 +690,13 @@ static mfxStatus qsv_frame_unlock(mfxHDL + static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) + { + QSVMid *qsv_mid = (QSVMid*)mid; +- *hdl = qsv_mid->handle; ++ mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; ++ mfxHDLPair *pair_src = (mfxHDLPair*)qsv_mid->handle_pair; ++ ++ pair_dst->first = pair_src->first; ++ ++ if (pair_src->second != (mfxMemId)MFX_INFINITE) ++ pair_dst->second = pair_src->second; + return MFX_ERR_NONE; + } + +@@ -690,24 +704,19 @@ int ff_qsv_init_session_device(AVCodecCo + AVBufferRef *device_ref, const char *load_plugins, + int gpu_copy) + { +- static const mfxHandleType handle_types[] = { +- MFX_HANDLE_VA_DISPLAY, +- MFX_HANDLE_D3D9_DEVICE_MANAGER, +- MFX_HANDLE_D3D11_DEVICE, +- }; + AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)device_ref->data; + AVQSVDeviceContext *device_hwctx = device_ctx->hwctx; + mfxSession parent_session = device_hwctx->session; + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; + mfxHDL handle = NULL; ++ int hw_handle_supported = 0; + + mfxSession session; + mfxVersion ver; + mfxIMPL impl; + mfxHandleType handle_type; + mfxStatus err; +- +- int i, ret; ++ int ret; + + err = MFXQueryIMPL(parent_session, &impl); + if (err == MFX_ERR_NONE) +@@ -716,13 +725,23 @@ int ff_qsv_init_session_device(AVCodecCo + return ff_qsv_print_error(avctx, err, + "Error querying the session attributes"); + +- for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) { +- err = MFXVideoCORE_GetHandle(parent_session, handle_types[i], &handle); +- if (err == MFX_ERR_NONE) { +- handle_type = handle_types[i]; +- break; ++ if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_VA_DISPLAY; ++ hw_handle_supported = 1; ++ } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D11_DEVICE; ++ hw_handle_supported = 1; ++ } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; ++ hw_handle_supported = 1; ++ } ++ ++ if (hw_handle_supported) { ++ err = MFXVideoCORE_GetHandle(parent_session, handle_type, &handle); ++ if (err != MFX_ERR_NONE) { ++ return ff_qsv_print_error(avctx, err, ++ "Error getting handle session"); + } +- handle = NULL; + } + if (!handle) { + av_log(avctx, AV_LOG_VERBOSE, "No supported hw handle could be retrieved " +Index: jellyfin-ffmpeg/libavcodec/qsv_internal.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsv_internal.h ++++ jellyfin-ffmpeg/libavcodec/qsv_internal.h +@@ -62,7 +62,7 @@ + + typedef struct QSVMid { + AVBufferRef *hw_frames_ref; +- mfxHDL handle; ++ mfxHDLPair *handle_pair; + + AVFrame *locked_frame; + AVFrame *hw_frame; +Index: jellyfin-ffmpeg/libavfilter/qsvvpp.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.c ++++ jellyfin-ffmpeg/libavfilter/qsvvpp.c +@@ -68,11 +68,7 @@ struct QSVVPPContext { + int nb_ext_buffers; + }; + +-static const mfxHandleType handle_types[] = { +- MFX_HANDLE_VA_DISPLAY, +- MFX_HANDLE_D3D9_DEVICE_MANAGER, +- MFX_HANDLE_D3D11_DEVICE, +-}; ++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) + + static const AVRational default_tb = { 1, 90000 }; + +@@ -233,7 +229,13 @@ static mfxStatus frame_unlock(mfxHDL pth + + static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) + { +- *hdl = mid; ++ mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; ++ mfxHDLPair *pair_src = (mfxHDLPair*)mid; ++ ++ pair_dst->first = pair_src->first; ++ ++ if (pair_src->second != (mfxMemId)MFX_INFINITE) ++ pair_dst->second = pair_src->second; + return MFX_ERR_NONE; + } + +@@ -555,7 +557,7 @@ static int init_vpp_session(AVFilterCont + + s->out_mem_mode = IS_OPAQUE_MEMORY(s->in_mem_mode) ? + MFX_MEMTYPE_OPAQUE_FRAME : +- MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; ++ MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET | MFX_MEMTYPE_FROM_VPPOUT; + + out_frames_ctx = (AVHWFramesContext *)out_frames_ref->data; + out_frames_hwctx = out_frames_ctx->hwctx; +@@ -601,14 +603,18 @@ static int init_vpp_session(AVFilterCont + return AVERROR_UNKNOWN; + } + +- for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) { +- ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle); +- if (ret == MFX_ERR_NONE) { +- handle_type = handle_types[i]; +- break; +- } ++ if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_VA_DISPLAY; ++ } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D11_DEVICE; ++ } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; ++ } else { ++ av_log(avctx, AV_LOG_ERROR, "Error unsupported handle type\n"); ++ return AVERROR_UNKNOWN; + } + ++ ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle); + if (ret < 0) + return ff_qsvvpp_print_error(avctx, ret, "Error getting the session handle"); + else if (ret > 0) { +Index: jellyfin-ffmpeg/libavfilter/vf_deinterlace_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_deinterlace_qsv.c ++++ jellyfin-ffmpeg/libavfilter/vf_deinterlace_qsv.c +@@ -42,6 +42,8 @@ + #include "internal.h" + #include "video.h" + ++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) ++ + enum { + QSVDEINT_MORE_OUTPUT = 1, + QSVDEINT_MORE_INPUT, +@@ -157,16 +159,16 @@ static mfxStatus frame_unlock(mfxHDL pth + + static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) + { +- *hdl = mid; ++ mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; ++ mfxHDLPair *pair_src = (mfxHDLPair*)mid; ++ ++ pair_dst->first = pair_src->first; ++ ++ if (pair_src->second != (mfxMemId)MFX_INFINITE) ++ pair_dst->second = pair_src->second; + return MFX_ERR_NONE; + } + +-static const mfxHandleType handle_types[] = { +- MFX_HANDLE_VA_DISPLAY, +- MFX_HANDLE_D3D9_DEVICE_MANAGER, +- MFX_HANDLE_D3D11_DEVICE, +-}; +- + static int init_out_session(AVFilterContext *ctx) + { + +@@ -194,14 +196,18 @@ static int init_out_session(AVFilterCont + return AVERROR_UNKNOWN; + } + +- for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) { +- err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle); +- if (err == MFX_ERR_NONE) { +- handle_type = handle_types[i]; +- break; +- } ++ if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_VA_DISPLAY; ++ } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D11_DEVICE; ++ } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n"); ++ return AVERROR_UNKNOWN; + } + ++ err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle); + if (err < 0) + return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle"); + else if (err > 0) { +Index: jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_qsv.c ++++ jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c +@@ -70,6 +70,7 @@ enum var_name { + }; + + #define QSV_HAVE_SCALING_CONFIG QSV_VERSION_ATLEAST(1, 19) ++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) + + typedef struct QSVScaleContext { + const AVClass *class; +@@ -206,7 +207,7 @@ static int init_out_pool(AVFilterContext + out_frames_ctx->sw_format = out_format; + out_frames_ctx->initial_pool_size = 4; + +- out_frames_hwctx->frame_type = in_frames_hwctx->frame_type; ++ out_frames_hwctx->frame_type = in_frames_hwctx->frame_type | MFX_MEMTYPE_FROM_VPPOUT; + + ret = ff_filter_init_hw_frames(ctx, outlink, 32); + if (ret < 0) +@@ -264,16 +265,16 @@ static mfxStatus frame_unlock(mfxHDL pth + + static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) + { +- *hdl = mid; ++ mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; ++ mfxHDLPair *pair_src = (mfxHDLPair*)mid; ++ ++ pair_dst->first = pair_src->first; ++ ++ if (pair_src->second != (mfxMemId)MFX_INFINITE) ++ pair_dst->second = pair_src->second; + return MFX_ERR_NONE; + } + +-static const mfxHandleType handle_types[] = { +- MFX_HANDLE_VA_DISPLAY, +- MFX_HANDLE_D3D9_DEVICE_MANAGER, +- MFX_HANDLE_D3D11_DEVICE, +-}; +- + static int init_out_session(AVFilterContext *ctx) + { + +@@ -305,14 +306,18 @@ static int init_out_session(AVFilterCont + return AVERROR_UNKNOWN; + } + +- for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) { +- err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle); +- if (err == MFX_ERR_NONE) { +- handle_type = handle_types[i]; +- break; +- } ++ if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_VA_DISPLAY; ++ } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D11_DEVICE; ++ } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { ++ handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; ++ } else { ++ av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n"); ++ return AVERROR_UNKNOWN; + } + ++ err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle); + if (err < 0) + return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle"); + else if (err > 0) { +Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +@@ -112,6 +112,8 @@ static void d3d11va_frames_uninit(AVHWFr + if (s->staging_texture) + ID3D11Texture2D_Release(s->staging_texture); + s->staging_texture = NULL; ++ ++ av_freep(&frames_hwctx->texture_infos); + } + + static int d3d11va_frames_get_constraints(AVHWDeviceContext *ctx, +@@ -152,15 +154,21 @@ static void free_texture(void *opaque, u + av_free(data); + } + +-static AVBufferRef *wrap_texture_buf(ID3D11Texture2D *tex, int index) ++static AVBufferRef *wrap_texture_buf(AVHWFramesContext *ctx, ID3D11Texture2D *tex, int index) + { + AVBufferRef *buf; +- AVD3D11FrameDescriptor *desc = av_mallocz(sizeof(*desc)); ++ AVD3D11FrameDescriptor *desc = av_mallocz(sizeof(*desc)); ++ D3D11VAFramesContext *s = ctx->internal->priv; ++ AVD3D11VAFramesContext *frames_hwctx = ctx->hwctx; + if (!desc) { + ID3D11Texture2D_Release(tex); + return NULL; + } + ++ frames_hwctx->texture_infos[s->nb_surfaces_used].texture = tex; ++ frames_hwctx->texture_infos[s->nb_surfaces_used].index = index; ++ s->nb_surfaces_used++; ++ + desc->texture = tex; + desc->index = index; + +@@ -199,7 +207,7 @@ static AVBufferRef *d3d11va_alloc_single + return NULL; + } + +- return wrap_texture_buf(tex, 0); ++ return wrap_texture_buf(ctx, tex, 0); + } + + static AVBufferRef *d3d11va_pool_alloc(void *opaque, buffer_size_t size) +@@ -220,7 +228,8 @@ static AVBufferRef *d3d11va_pool_alloc(v + } + + ID3D11Texture2D_AddRef(hwctx->texture); +- return wrap_texture_buf(hwctx->texture, s->nb_surfaces_used++); ++ ++ return wrap_texture_buf(ctx, hwctx->texture, s->nb_surfaces_used); + } + + static int d3d11va_frames_init(AVHWFramesContext *ctx) +@@ -267,7 +276,7 @@ static int d3d11va_frames_init(AVHWFrame + av_log(ctx, AV_LOG_ERROR, "User-provided texture has mismatching parameters\n"); + return AVERROR(EINVAL); + } +- } else if (texDesc.ArraySize > 0) { ++ } else if (!(texDesc.BindFlags & D3D11_BIND_RENDER_TARGET) && texDesc.ArraySize > 0) { + hr = ID3D11Device_CreateTexture2D(device_hwctx->device, &texDesc, NULL, &hwctx->texture); + if (FAILED(hr)) { + av_log(ctx, AV_LOG_ERROR, "Could not create the texture (%lx)\n", (long)hr); +@@ -275,6 +284,11 @@ static int d3d11va_frames_init(AVHWFrame + } + } + ++ hwctx->texture_infos = av_mallocz_array(ctx->initial_pool_size, sizeof(*hwctx->texture_infos)); ++ if (!hwctx->texture_infos) ++ return AVERROR(ENOMEM); ++ hwctx->nb_surfaces = ctx->initial_pool_size; ++ + ctx->internal->pool_internal = av_buffer_pool_init2(sizeof(AVD3D11FrameDescriptor), + ctx, d3d11va_pool_alloc, NULL); + if (!ctx->internal->pool_internal) +Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.h ++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.h +@@ -164,6 +164,17 @@ typedef struct AVD3D11VAFramesContext { + * This field is ignored/invalid if a user-allocated texture is provided. + */ + UINT MiscFlags; ++ ++ /** ++ * In case if texture structure member above is not NULL contains the same texture ++ * pointer for all elements and different indexes into the array texture. ++ * In case if texture structure member above is NULL, all elements contains ++ * pointers to separate non-array textures and 0 indexes. ++ * This field is ignored/invalid if a user-allocated texture is provided. ++ */ ++ AVD3D11FrameDescriptor *texture_infos; ++ ++ int nb_surfaces; + } AVD3D11VAFramesContext; + + #endif /* AVUTIL_HWCONTEXT_D3D11VA_H */ +Index: jellyfin-ffmpeg/libavutil/hwcontext_opencl.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_opencl.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_opencl.c +@@ -2249,7 +2249,8 @@ static int opencl_map_from_qsv(AVHWFrame + #if CONFIG_LIBMFX + if (src->format == AV_PIX_FMT_QSV) { + mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3]; +- va_surface = *(VASurfaceID*)mfx_surface->Data.MemId; ++ mfxHDLPair *pair = (mfxHDLPair*)mfx_surface->Data.MemId; ++ va_surface = *(VASurfaceID*)pair->first; + } else + #endif + if (src->format == AV_PIX_FMT_VAAPI) { +Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +@@ -27,9 +27,13 @@ + #include + #endif + ++#define COBJMACROS + #if CONFIG_VAAPI + #include "hwcontext_vaapi.h" + #endif ++#if CONFIG_D3D11VA ++#include "hwcontext_d3d11va.h" ++#endif + #if CONFIG_DXVA2 + #include "hwcontext_dxva2.h" + #endif +@@ -48,6 +52,8 @@ + (MFX_VERSION_MAJOR > (MAJOR) || \ + MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR)) + ++#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) ++ + typedef struct QSVDevicePriv { + AVBufferRef *child_device_ctx; + } QSVDevicePriv; +@@ -74,6 +80,7 @@ typedef struct QSVFramesContext { + + AVBufferRef *child_frames_ref; + mfxFrameSurface1 *surfaces_internal; ++ mfxHDLPair *handle_pairs_internal; + int nb_surfaces_used; + + // used in the frame allocator for non-opaque surfaces +@@ -86,20 +93,6 @@ typedef struct QSVFramesContext { + } QSVFramesContext; + + static const struct { +- mfxHandleType handle_type; +- enum AVHWDeviceType device_type; +- enum AVPixelFormat pix_fmt; +-} supported_handle_types[] = { +-#if CONFIG_VAAPI +- { MFX_HANDLE_VA_DISPLAY, AV_HWDEVICE_TYPE_VAAPI, AV_PIX_FMT_VAAPI }, +-#endif +-#if CONFIG_DXVA2 +- { MFX_HANDLE_D3D9_DEVICE_MANAGER, AV_HWDEVICE_TYPE_DXVA2, AV_PIX_FMT_DXVA2_VLD }, +-#endif +- { 0 }, +-}; +- +-static const struct { + enum AVPixelFormat pix_fmt; + uint32_t fourcc; + } supported_pixel_formats[] = { +@@ -127,28 +120,32 @@ static uint32_t qsv_fourcc_from_pix_fmt( + return 0; + } + ++#if CONFIG_D3D11VA ++static uint32_t qsv_get_d3d11va_bind_flags(int mem_type) ++{ ++ uint32_t bind_flags = 0; ++ ++ if ((mem_type & MFX_MEMTYPE_VIDEO_MEMORY_ENCODER_TARGET) && (mem_type & MFX_MEMTYPE_INTERNAL_FRAME)) ++ bind_flags = D3D11_BIND_DECODER | D3D11_BIND_VIDEO_ENCODER; ++ else ++ bind_flags = D3D11_BIND_DECODER; ++ ++ if ((MFX_MEMTYPE_FROM_VPPOUT & mem_type) || (MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET & mem_type)) ++ bind_flags = D3D11_BIND_RENDER_TARGET; ++ ++ return bind_flags; ++} ++#endif ++ + static int qsv_device_init(AVHWDeviceContext *ctx) + { + AVQSVDeviceContext *hwctx = ctx->hwctx; + QSVDeviceContext *s = ctx->internal->priv; +- ++ int hw_handle_supported = 0; ++ mfxHandleType handle_type; ++ enum AVHWDeviceType device_type; ++ enum AVPixelFormat pix_fmt; + mfxStatus err; +- int i; +- +- for (i = 0; supported_handle_types[i].handle_type; i++) { +- err = MFXVideoCORE_GetHandle(hwctx->session, supported_handle_types[i].handle_type, +- &s->handle); +- if (err == MFX_ERR_NONE) { +- s->handle_type = supported_handle_types[i].handle_type; +- s->child_device_type = supported_handle_types[i].device_type; +- s->child_pix_fmt = supported_handle_types[i].pix_fmt; +- break; +- } +- } +- if (!s->handle) { +- av_log(ctx, AV_LOG_VERBOSE, "No supported hw handle could be retrieved " +- "from the session\n"); +- } + + err = MFXQueryIMPL(hwctx->session, &s->impl); + if (err == MFX_ERR_NONE) +@@ -158,6 +155,41 @@ static int qsv_device_init(AVHWDeviceCon + return AVERROR_UNKNOWN; + } + ++ if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(s->impl)) { ++#if CONFIG_VAAPI ++ handle_type = MFX_HANDLE_VA_DISPLAY; ++ device_type = AV_HWDEVICE_TYPE_VAAPI; ++ pix_fmt = AV_PIX_FMT_VAAPI; ++ hw_handle_supported = 1; ++#endif ++ } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(s->impl)) { ++#if CONFIG_D3D11VA ++ handle_type = MFX_HANDLE_D3D11_DEVICE; ++ device_type = AV_HWDEVICE_TYPE_D3D11VA; ++ pix_fmt = AV_PIX_FMT_D3D11; ++ hw_handle_supported = 1; ++#endif ++ } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(s->impl)) { ++#if CONFIG_DXVA2 ++ handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; ++ device_type = AV_HWDEVICE_TYPE_DXVA2; ++ pix_fmt = AV_PIX_FMT_DXVA2_VLD; ++ hw_handle_supported = 1; ++#endif ++ } ++ ++ if (hw_handle_supported) { ++ err = MFXVideoCORE_GetHandle(hwctx->session, handle_type, &s->handle); ++ if (err == MFX_ERR_NONE) { ++ s->handle_type = handle_type; ++ s->child_device_type = device_type; ++ s->child_pix_fmt = pix_fmt; ++ } ++ } ++ if (!s->handle) { ++ av_log(ctx, AV_LOG_VERBOSE, "No supported hw handle could be retrieved " ++ "from the session\n"); ++ } + return 0; + } + +@@ -187,6 +219,7 @@ static void qsv_frames_uninit(AVHWFrames + av_freep(&s->mem_ids); + av_freep(&s->surface_ptrs); + av_freep(&s->surfaces_internal); ++ av_freep(&s->handle_pairs_internal); + av_buffer_unref(&s->child_frames_ref); + } + +@@ -202,6 +235,8 @@ static AVBufferRef *qsv_pool_alloc(void + + if (s->nb_surfaces_used < hwctx->nb_surfaces) { + s->nb_surfaces_used++; ++ av_buffer_create((uint8_t*)(s->handle_pairs_internal + s->nb_surfaces_used - 1), ++ sizeof(*s->handle_pairs_internal), qsv_pool_release_dummy, NULL, 0); + return av_buffer_create((uint8_t*)(s->surfaces_internal + s->nb_surfaces_used - 1), + sizeof(*hwctx->surfaces), qsv_pool_release_dummy, NULL, 0); + } +@@ -241,6 +276,13 @@ static int qsv_init_child_ctx(AVHWFrames + child_device_hwctx->display = (VADisplay)device_priv->handle; + } + #endif ++#if CONFIG_D3D11VA ++ if (child_device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { ++ AVD3D11VADeviceContext *child_device_hwctx = child_device_ctx->hwctx; ++ ID3D11Device_AddRef((ID3D11Device*)device_priv->handle); ++ child_device_hwctx->device = (ID3D11Device*)device_priv->handle; ++ } ++#endif + #if CONFIG_DXVA2 + if (child_device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) { + AVDXVA2DeviceContext *child_device_hwctx = child_device_ctx->hwctx; +@@ -267,6 +309,16 @@ static int qsv_init_child_ctx(AVHWFrames + child_frames_ctx->width = FFALIGN(ctx->width, 16); + child_frames_ctx->height = FFALIGN(ctx->height, 16); + ++#if CONFIG_D3D11VA ++ if (child_device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { ++ AVD3D11VAFramesContext *child_frames_hwctx = child_frames_ctx->hwctx; ++ if (hwctx->frame_type == 0) ++ hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET; ++ if (hwctx->frame_type & MFX_MEMTYPE_SHARED_RESOURCE) ++ child_frames_hwctx->MiscFlags = D3D11_RESOURCE_MISC_SHARED; ++ child_frames_hwctx->BindFlags = qsv_get_d3d11va_bind_flags(hwctx->frame_type); ++ } ++#endif + #if CONFIG_DXVA2 + if (child_device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) { + AVDXVA2FramesContext *child_frames_hwctx = child_frames_ctx->hwctx; +@@ -286,16 +338,41 @@ static int qsv_init_child_ctx(AVHWFrames + #if CONFIG_VAAPI + if (child_device_ctx->type == AV_HWDEVICE_TYPE_VAAPI) { + AVVAAPIFramesContext *child_frames_hwctx = child_frames_ctx->hwctx; +- for (i = 0; i < ctx->initial_pool_size; i++) +- s->surfaces_internal[i].Data.MemId = child_frames_hwctx->surface_ids + i; ++ for (i = 0; i < ctx->initial_pool_size; i++) { ++ s->handle_pairs_internal[i].first = child_frames_hwctx->surface_ids + i; ++ s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE; ++ s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i]; ++ } + hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; + } + #endif ++#if CONFIG_D3D11VA ++ if (child_device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { ++ AVD3D11VAFramesContext *child_frames_hwctx = child_frames_ctx->hwctx; ++ for (i = 0; i < ctx->initial_pool_size; i++) { ++ s->handle_pairs_internal[i].first = (mfxMemId)child_frames_hwctx->texture_infos[i].texture; ++ if(child_frames_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) { ++ s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE; ++ } else { ++ s->handle_pairs_internal[i].second = (mfxMemId)child_frames_hwctx->texture_infos[i].index; ++ } ++ s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i]; ++ } ++ if (child_frames_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) { ++ hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET; ++ } else { ++ hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; ++ } ++ } ++#endif + #if CONFIG_DXVA2 + if (child_device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) { + AVDXVA2FramesContext *child_frames_hwctx = child_frames_ctx->hwctx; +- for (i = 0; i < ctx->initial_pool_size; i++) +- s->surfaces_internal[i].Data.MemId = (mfxMemId)child_frames_hwctx->surfaces[i]; ++ for (i = 0; i < ctx->initial_pool_size; i++) { ++ s->handle_pairs_internal[i].first = (mfxMemId)child_frames_hwctx->surfaces[i]; ++ s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE; ++ s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i]; ++ } + if (child_frames_hwctx->surface_type == DXVA2_VideoProcessorRenderTarget) + hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET; + else +@@ -360,6 +437,11 @@ static int qsv_init_pool(AVHWFramesConte + return AVERROR(EINVAL); + } + ++ s->handle_pairs_internal = av_mallocz_array(ctx->initial_pool_size, ++ sizeof(*s->handle_pairs_internal)); ++ if (!s->handle_pairs_internal) ++ return AVERROR(ENOMEM); ++ + s->surfaces_internal = av_mallocz_array(ctx->initial_pool_size, + sizeof(*s->surfaces_internal)); + if (!s->surfaces_internal) +@@ -433,7 +515,13 @@ static mfxStatus frame_unlock(mfxHDL pth + + static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) + { +- *hdl = mid; ++ mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; ++ mfxHDLPair *pair_src = (mfxHDLPair*)mid; ++ ++ pair_dst->first = pair_src->first; ++ ++ if (pair_src->second != (mfxMemId)MFX_INFINITE) ++ pair_dst->second = pair_src->second; + return MFX_ERR_NONE; + } + +@@ -626,13 +714,26 @@ static int qsv_frames_derive_from(AVHWFr + sizeof(*dst_hwctx->surface_ids)); + if (!dst_hwctx->surface_ids) + return AVERROR(ENOMEM); +- for (i = 0; i < src_hwctx->nb_surfaces; i++) +- dst_hwctx->surface_ids[i] = +- *(VASurfaceID*)src_hwctx->surfaces[i].Data.MemId; ++ for (i = 0; i < src_hwctx->nb_surfaces; i++) { ++ mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId; ++ dst_hwctx->surface_ids[i] = *(VASurfaceID*)pair->first; ++ } + dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces; + } + break; + #endif ++#if CONFIG_D3D11VA ++ case AV_HWDEVICE_TYPE_D3D11VA: ++ { ++ AVD3D11VAFramesContext *dst_hwctx = dst_ctx->hwctx; ++ mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId; ++ dst_hwctx->texture = (ID3D11Texture2D*)pair->first; ++ if (src_hwctx->frame_type & MFX_MEMTYPE_SHARED_RESOURCE) ++ dst_hwctx->MiscFlags = D3D11_RESOURCE_MISC_SHARED; ++ dst_hwctx->BindFlags = qsv_get_d3d11va_bind_flags(src_hwctx->frame_type); ++ } ++ break; ++#endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: + { +@@ -641,9 +742,10 @@ static int qsv_frames_derive_from(AVHWFr + sizeof(*dst_hwctx->surfaces)); + if (!dst_hwctx->surfaces) + return AVERROR(ENOMEM); +- for (i = 0; i < src_hwctx->nb_surfaces; i++) +- dst_hwctx->surfaces[i] = +- (IDirect3DSurface9*)src_hwctx->surfaces[i].Data.MemId; ++ for (i = 0; i < src_hwctx->nb_surfaces; i++) { ++ mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId; ++ dst_hwctx->surfaces[i] = (IDirect3DSurface9*)pair->first; ++ } + dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces; + if (src_hwctx->frame_type == MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET) + dst_hwctx->surface_type = DXVA2_VideoDecoderRenderTarget; +@@ -677,13 +779,31 @@ static int qsv_map_from(AVHWFramesContex + switch (child_frames_ctx->device_ctx->type) { + #if CONFIG_VAAPI + case AV_HWDEVICE_TYPE_VAAPI: +- child_data = (uint8_t*)(intptr_t)*(VASurfaceID*)surf->Data.MemId; ++ { ++ mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId; ++ /* pair->first is *VASurfaceID while data[3] in vaapi frame is VASurfaceID, so ++ * we need this casting for vaapi. ++ * Add intptr_t to force cast from VASurfaceID(uint) type to pointer(long) type ++ * to avoid compile warning */ ++ child_data = (uint8_t*)(intptr_t)*(VASurfaceID*)pair->first; + break; ++ } ++#endif ++#if CONFIG_D3D11VA ++ case AV_HWDEVICE_TYPE_D3D11VA: ++ { ++ mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId; ++ child_data = pair->first; ++ break; ++ } + #endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: +- child_data = surf->Data.MemId; ++ { ++ mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId; ++ child_data = pair->first; + break; ++ } + #endif + default: + return AVERROR(ENOSYS); +@@ -697,7 +817,14 @@ static int qsv_map_from(AVHWFramesContex + + dst->width = src->width; + dst->height = src->height; +- dst->data[3] = child_data; ++ ++ if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { ++ mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId; ++ dst->data[0] = pair->first; ++ dst->data[1] = pair->second; ++ } else { ++ dst->data[3] = child_data; ++ } + + return 0; + } +@@ -720,7 +847,14 @@ static int qsv_map_from(AVHWFramesContex + dummy->format = child_frames_ctx->format; + dummy->width = src->width; + dummy->height = src->height; +- dummy->data[3] = child_data; ++ ++ if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { ++ mfxHDLPair *pair = (mfxHDLPair*)surf->Data.MemId; ++ dummy->data[0] = pair->first; ++ dummy->data[1] = pair->second; ++ } else { ++ dummy->data[3] = child_data; ++ } + + ret = av_hwframe_map(dst, dummy, flags); + +@@ -978,35 +1112,84 @@ static int qsv_frames_derive_to(AVHWFram + AVQSVFramesContext *dst_hwctx = dst_ctx->hwctx; + int i; + ++ if (src_ctx->initial_pool_size == 0) { ++ av_log(dst_ctx, AV_LOG_ERROR, "Only fixed-size pools can be " ++ "mapped to QSV frames.\n"); ++ return AVERROR(EINVAL); ++ } ++ + switch (src_ctx->device_ctx->type) { + #if CONFIG_VAAPI + case AV_HWDEVICE_TYPE_VAAPI: + { + AVVAAPIFramesContext *src_hwctx = src_ctx->hwctx; ++ s->handle_pairs_internal = av_mallocz_array(src_hwctx->nb_surfaces, ++ sizeof(*s->handle_pairs_internal)); ++ if (!s->handle_pairs_internal) ++ return AVERROR(ENOMEM); + s->surfaces_internal = av_mallocz_array(src_hwctx->nb_surfaces, + sizeof(*s->surfaces_internal)); + if (!s->surfaces_internal) + return AVERROR(ENOMEM); + for (i = 0; i < src_hwctx->nb_surfaces; i++) { + qsv_init_surface(dst_ctx, &s->surfaces_internal[i]); +- s->surfaces_internal[i].Data.MemId = src_hwctx->surface_ids + i; ++ s->handle_pairs_internal[i].first = src_hwctx->surface_ids + i; ++ s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE; ++ s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i]; + } + dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces; + dst_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; + } + break; + #endif ++#if CONFIG_D3D11VA ++ case AV_HWDEVICE_TYPE_D3D11VA: ++ { ++ AVD3D11VAFramesContext *src_hwctx = src_ctx->hwctx; ++ s->handle_pairs_internal = av_mallocz_array(src_hwctx->nb_surfaces, ++ sizeof(*s->handle_pairs_internal)); ++ if (!s->handle_pairs_internal) ++ return AVERROR(ENOMEM); ++ s->surfaces_internal = av_mallocz_array(src_hwctx->nb_surfaces, ++ sizeof(*s->surfaces_internal)); ++ if (!s->surfaces_internal) ++ return AVERROR(ENOMEM); ++ for (i = 0; i < src_hwctx->nb_surfaces; i++) { ++ qsv_init_surface(dst_ctx, &s->surfaces_internal[i]); ++ s->handle_pairs_internal[i].first = (mfxMemId)src_hwctx->texture_infos[i].texture; ++ if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) { ++ s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE; ++ } else { ++ s->handle_pairs_internal[i].second = (mfxMemId)src_hwctx->texture_infos[i].index; ++ } ++ s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i]; ++ } ++ dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces; ++ if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) { ++ dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET; ++ } else { ++ dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; ++ } ++ } ++ break; ++#endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: + { + AVDXVA2FramesContext *src_hwctx = src_ctx->hwctx; ++ s->handle_pairs_internal = av_mallocz_array(src_hwctx->nb_surfaces, ++ sizeof(*s->handle_pairs_internal)); ++ if (!s->handle_pairs_internal) ++ return AVERROR(ENOMEM); + s->surfaces_internal = av_mallocz_array(src_hwctx->nb_surfaces, + sizeof(*s->surfaces_internal)); + if (!s->surfaces_internal) + return AVERROR(ENOMEM); + for (i = 0; i < src_hwctx->nb_surfaces; i++) { + qsv_init_surface(dst_ctx, &s->surfaces_internal[i]); +- s->surfaces_internal[i].Data.MemId = (mfxMemId)src_hwctx->surfaces[i]; ++ s->handle_pairs_internal[i].first = (mfxMemId)src_hwctx->surfaces[i]; ++ s->handle_pairs_internal[i].second = (mfxMemId)MFX_INFINITE; ++ s->surfaces_internal[i].Data.MemId = (mfxMemId)&s->handle_pairs_internal[i]; + } + dst_hwctx->nb_surfaces = src_hwctx->nb_surfaces; + if (src_hwctx->surface_type == DXVA2_VideoProcessorRenderTarget) +@@ -1029,21 +1212,44 @@ static int qsv_map_to(AVHWFramesContext + AVFrame *dst, const AVFrame *src, int flags) + { + AVQSVFramesContext *hwctx = dst_ctx->hwctx; +- int i, err; ++ int i, err, index = -1; + +- for (i = 0; i < hwctx->nb_surfaces; i++) { ++ for (i = 0; i < hwctx->nb_surfaces && index < 0; i++) { ++ switch(src->format) { + #if CONFIG_VAAPI +- if (*(VASurfaceID*)hwctx->surfaces[i].Data.MemId == +- (VASurfaceID)(uintptr_t)src->data[3]) +- break; ++ case AV_PIX_FMT_VAAPI: ++ { ++ mfxHDLPair *pair = (mfxHDLPair*)hwctx->surfaces[i].Data.MemId; ++ if (*(VASurfaceID*)pair->first == (VASurfaceID)src->data[3]) { ++ index = i; ++ break; ++ } ++ } ++#endif ++#if CONFIG_D3D11VA ++ case AV_PIX_FMT_D3D11: ++ { ++ mfxHDLPair *pair = (mfxHDLPair*)hwctx->surfaces[i].Data.MemId; ++ if (pair->first == src->data[0] ++ && pair->second == src->data[1]) { ++ index = i; ++ break; ++ } ++ } + #endif + #if CONFIG_DXVA2 +- if ((IDirect3DSurface9*)hwctx->surfaces[i].Data.MemId == +- (IDirect3DSurface9*)(uintptr_t)src->data[3]) +- break; ++ case AV_PIX_FMT_DXVA2_VLD: ++ { ++ mfxHDLPair *pair = (mfxHDLPair*)hwctx->surfaces[i].Data.MemId; ++ if (pair->first == src->data[3]) { ++ index = i; ++ break; ++ } ++ } + #endif ++ } + } +- if (i >= hwctx->nb_surfaces) { ++ if (index < 0) { + av_log(dst_ctx, AV_LOG_ERROR, "Trying to map from a surface which " + "is not in the mapped frames context.\n"); + return AVERROR(EINVAL); +@@ -1056,7 +1262,7 @@ static int qsv_map_to(AVHWFramesContext + + dst->width = src->width; + dst->height = src->height; +- dst->data[3] = (uint8_t*)&hwctx->surfaces[i]; ++ dst->data[3] = (uint8_t*)&hwctx->surfaces[index]; + + return 0; + } +@@ -1098,7 +1304,7 @@ static void qsv_device_free(AVHWDeviceCo + av_freep(&priv); + } + +-static mfxIMPL choose_implementation(const char *device) ++static mfxIMPL choose_implementation(const char *device, enum AVHWDeviceType child_device_type) + { + static const struct { + const char *name; +@@ -1127,6 +1333,13 @@ static mfxIMPL choose_implementation(con + impl = strtol(device, NULL, 0); + } + ++ if (impl != MFX_IMPL_SOFTWARE) { ++ if (child_device_type == AV_HWDEVICE_TYPE_D3D11VA) ++ impl |= MFX_IMPL_VIA_D3D11; ++ else if (child_device_type == AV_HWDEVICE_TYPE_DXVA2) ++ impl |= MFX_IMPL_VIA_D3D9; ++ } ++ + return impl; + } + +@@ -1153,6 +1366,15 @@ static int qsv_device_derive_from_child( + } + break; + #endif ++#if CONFIG_D3D11VA ++ case AV_HWDEVICE_TYPE_D3D11VA: ++ { ++ AVD3D11VADeviceContext *child_device_hwctx = child_device_ctx->hwctx; ++ handle_type = MFX_HANDLE_D3D11_DEVICE; ++ handle = (mfxHDL)child_device_hwctx->device; ++ } ++ break; ++#endif + #if CONFIG_DXVA2 + case AV_HWDEVICE_TYPE_DXVA2: + { +@@ -1216,7 +1438,9 @@ static int qsv_device_derive(AVHWDeviceC + AVHWDeviceContext *child_device_ctx, + AVDictionary *opts, int flags) + { +- return qsv_device_derive_from_child(ctx, MFX_IMPL_HARDWARE_ANY, ++ mfxIMPL impl; ++ impl = choose_implementation("hw_any", child_device_ctx->type); ++ return qsv_device_derive_from_child(ctx, impl, + child_device_ctx, flags); + } + +@@ -1239,25 +1463,57 @@ static int qsv_device_create(AVHWDeviceC + ctx->user_opaque = priv; + ctx->free = qsv_device_free; + +- e = av_dict_get(opts, "child_device", NULL, 0); +- +- child_device_opts = NULL; +- if (CONFIG_VAAPI) { ++ e = av_dict_get(opts, "child_device_type", NULL, 0); ++ if (e) { ++ child_device_type = av_hwdevice_find_type_by_name(e ? e->value : NULL); ++ if (child_device_type == AV_HWDEVICE_TYPE_NONE) { ++ av_log(ctx, AV_LOG_ERROR, "Unknown child device type " ++ "\"%s\".\n", e ? e->value : NULL); ++ return AVERROR(EINVAL); ++ } ++ } else if (CONFIG_VAAPI) { + child_device_type = AV_HWDEVICE_TYPE_VAAPI; +- // libmfx does not actually implement VAAPI properly, rather it +- // depends on the specific behaviour of a matching iHD driver when +- // used on recent Intel hardware. Set options to the VAAPI device +- // creation so that we should pick a usable setup by default if +- // possible, even when multiple devices and drivers are available. +- av_dict_set(&child_device_opts, "kernel_driver", "i915", 0); +- av_dict_set(&child_device_opts, "driver", "iHD", 0); +- } else if (CONFIG_DXVA2) ++ } else if (CONFIG_D3D11VA) { ++ child_device_type = AV_HWDEVICE_TYPE_D3D11VA; ++ } else if (CONFIG_DXVA2) { + child_device_type = AV_HWDEVICE_TYPE_DXVA2; +- else { ++ } else { + av_log(ctx, AV_LOG_ERROR, "No supported child device type is enabled\n"); + return AVERROR(ENOSYS); + } + ++ child_device_opts = NULL; ++ switch (child_device_type) { ++#if CONFIG_VAAPI ++ case AV_HWDEVICE_TYPE_VAAPI: ++ { ++ // libmfx does not actually implement VAAPI properly, rather it ++ // depends on the specific behaviour of a matching iHD driver when ++ // used on recent Intel hardware. Set options to the VAAPI device ++ // creation so that we should pick a usable setup by default if ++ // possible, even when multiple devices and drivers are available. ++ av_dict_set(&child_device_opts, "kernel_driver", "i915", 0); ++ av_dict_set(&child_device_opts, "driver", "iHD", 0); ++ } ++ break; ++#endif ++#if CONFIG_D3D11VA ++ case AV_HWDEVICE_TYPE_D3D11VA: ++ break; ++#endif ++#if CONFIG_DXVA2 ++ case AV_HWDEVICE_TYPE_DXVA2: ++ break; ++#endif ++ default: ++ { ++ av_log(ctx, AV_LOG_ERROR, "No supported child device type is enabled\n"); ++ return AVERROR(ENOSYS); ++ } ++ break; ++ } ++ ++ e = av_dict_get(opts, "child_device", NULL, 0); + ret = av_hwdevice_ctx_create(&priv->child_device_ctx, child_device_type, + e ? e->value : NULL, child_device_opts, 0); + +@@ -1267,9 +1523,15 @@ static int qsv_device_create(AVHWDeviceC + + child_device = (AVHWDeviceContext*)priv->child_device_ctx->data; + +- impl = choose_implementation(device); ++ impl = choose_implementation(device, child_device_type); + +- return qsv_device_derive_from_child(ctx, impl, child_device, 0); ++ ret = qsv_device_derive_from_child(ctx, impl, child_device, 0); ++ if (ret == 0) { ++ ctx->internal->source_device = av_buffer_ref(priv->child_device_ctx); ++ if (!ctx->internal->source_device) ++ ret = AVERROR(ENOMEM); ++ } ++ return ret; + } + + const HWContextType ff_hwcontext_type_qsv = { diff --git a/debian/patches/series b/debian/patches/series index f1b2f9672ba..e214bc44f08 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -8,3 +8,4 @@ 0008-add-d3d11-opencl-interop-for-AMD.patch 0009-add-a-hack-for-opencl-reverse-mapping.patch 0010-add-fixes-for-ffmpeg_hw.patch +0011-add-d3d11-support-for-QSV.patch From 930f8da233592d2f8940a728327c2d9be3357280 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:40:14 +0800 Subject: [PATCH 21/41] add hw_device_ctx support for qsvdec --- ...add-hw_device_ctx-support-for-qsvdec.patch | 271 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 272 insertions(+) create mode 100644 debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch diff --git a/debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch b/debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch new file mode 100644 index 00000000000..8b440fd002b --- /dev/null +++ b/debian/patches/0012-add-hw_device_ctx-support-for-qsvdec.patch @@ -0,0 +1,271 @@ +Index: jellyfin-ffmpeg/fftools/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/fftools/Makefile ++++ jellyfin-ffmpeg/fftools/Makefile +@@ -10,7 +10,6 @@ ALLAVPROGS = $(AVBASENAMES:%=%$(PROGSS + ALLAVPROGS_G = $(AVBASENAMES:%=%$(PROGSSUF)_g$(EXESUF)) + + OBJS-ffmpeg += fftools/ffmpeg_opt.o fftools/ffmpeg_filter.o fftools/ffmpeg_hw.o +-OBJS-ffmpeg-$(CONFIG_LIBMFX) += fftools/ffmpeg_qsv.o + ifndef CONFIG_VIDEOTOOLBOX + OBJS-ffmpeg-$(CONFIG_VDA) += fftools/ffmpeg_videotoolbox.o + endif +Index: jellyfin-ffmpeg/fftools/ffmpeg.h +=================================================================== +--- jellyfin-ffmpeg.orig/fftools/ffmpeg.h ++++ jellyfin-ffmpeg/fftools/ffmpeg.h +@@ -60,7 +60,6 @@ enum HWAccelID { + HWACCEL_AUTO, + HWACCEL_GENERIC, + HWACCEL_VIDEOTOOLBOX, +- HWACCEL_QSV, + }; + + typedef struct HWAccel { +Index: jellyfin-ffmpeg/fftools/ffmpeg_hw.c +=================================================================== +--- jellyfin-ffmpeg.orig/fftools/ffmpeg_hw.c ++++ jellyfin-ffmpeg/fftools/ffmpeg_hw.c +@@ -353,6 +353,18 @@ int hw_device_setup_for_decode(InputStre + } else if (ist->hwaccel_id == HWACCEL_GENERIC) { + type = ist->hwaccel_device_type; + dev = hw_device_get_by_type(type); ++ ++ // When "-qsv_device device" is used, an internal QSV device named ++ // as "__qsv_device" is created. Another QSV device is created too ++ // if "-init_hw_device qsv=name:device" is used. There are 2 QSV devices ++ // if both "-qsv_device device" and "-init_hw_device qsv=name:device" ++ // are used, hw_device_get_by_type(AV_HWDEVICE_TYPE_QSV) returns NULL. ++ // To keep back-compatibility with the removed ad-hoc libmfx setup code, ++ // call hw_device_get_by_name("__qsv_device") to select the internal QSV ++ // device. ++ if (!dev && type == AV_HWDEVICE_TYPE_QSV) ++ dev = hw_device_get_by_name("__qsv_device"); ++ + if (!dev) + err = hw_device_init_from_type(type, NULL, &dev); + } else { +Index: jellyfin-ffmpeg/fftools/ffmpeg_opt.c +=================================================================== +--- jellyfin-ffmpeg.orig/fftools/ffmpeg_opt.c ++++ jellyfin-ffmpeg/fftools/ffmpeg_opt.c +@@ -137,9 +137,6 @@ const HWAccel hwaccels[] = { + #if CONFIG_VIDEOTOOLBOX + { "videotoolbox", videotoolbox_init, HWACCEL_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX }, + #endif +-#if CONFIG_LIBMFX +- { "qsv", qsv_init, HWACCEL_QSV, AV_PIX_FMT_QSV }, +-#endif + { 0 }, + }; + HWDevice *filter_hw_device; +@@ -569,6 +566,23 @@ static int opt_vaapi_device(void *optctx + } + #endif + ++#if CONFIG_QSV ++static int opt_qsv_device(void *optctx, const char *opt, const char *arg) ++{ ++ const char *prefix = "qsv=__qsv_device:hw_any,child_device="; ++ int err; ++ char *tmp = av_asprintf("%s%s", prefix, arg); ++ ++ if (!tmp) ++ return AVERROR(ENOMEM); ++ ++ err = hw_device_init_from_string(tmp, NULL); ++ av_free(tmp); ++ ++ return err; ++} ++#endif ++ + static int opt_init_hw_device(void *optctx, const char *opt, const char *arg) + { + if (!strcmp(arg, "list")) { +@@ -893,6 +907,12 @@ static void add_input_streams(OptionsCon + "with old commandlines. This behaviour is DEPRECATED and will be removed " + "in the future. Please explicitly set \"-hwaccel_output_format cuda\".\n"); + ist->hwaccel_output_format = AV_PIX_FMT_CUDA; ++ } else if (!hwaccel_output_format && hwaccel && !strcmp(hwaccel, "qsv")) { ++ av_log(NULL, AV_LOG_WARNING, ++ "WARNING: defaulting hwaccel_output_format to qsv for compatibility " ++ "with old commandlines. This behaviour is DEPRECATED and will be removed " ++ "in the future. Please explicitly set \"-hwaccel_output_format qsv\".\n"); ++ ist->hwaccel_output_format = AV_PIX_FMT_QSV; + } else if (hwaccel_output_format) { + ist->hwaccel_output_format = av_get_pix_fmt(hwaccel_output_format); + if (ist->hwaccel_output_format == AV_PIX_FMT_NONE) { +@@ -3814,7 +3834,7 @@ const OptionDef options[] = { + #endif + + #if CONFIG_QSV +- { "qsv_device", HAS_ARG | OPT_STRING | OPT_EXPERT, { &qsv_device }, ++ { "qsv_device", HAS_ARG | OPT_EXPERT, { .func_arg = opt_qsv_device }, + "set QSV hardware device (DirectX adapter index, DRM path or X11 display name)", "device"}, + #endif + +Index: jellyfin-ffmpeg/fftools/ffmpeg_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/fftools/ffmpeg_qsv.c ++++ /dev/null +@@ -1,110 +0,0 @@ +-/* +- * This file is part of FFmpeg. +- * +- * FFmpeg is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. +- * +- * FFmpeg is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with FFmpeg; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- */ +- +-#include +-#include +- +-#include "libavutil/dict.h" +-#include "libavutil/hwcontext.h" +-#include "libavutil/hwcontext_qsv.h" +-#include "libavutil/mem.h" +-#include "libavutil/opt.h" +-#include "libavcodec/qsv.h" +- +-#include "ffmpeg.h" +- +-static AVBufferRef *hw_device_ctx; +-char *qsv_device = NULL; +- +-static int qsv_get_buffer(AVCodecContext *s, AVFrame *frame, int flags) +-{ +- InputStream *ist = s->opaque; +- +- return av_hwframe_get_buffer(ist->hw_frames_ctx, frame, 0); +-} +- +-static void qsv_uninit(AVCodecContext *s) +-{ +- InputStream *ist = s->opaque; +- av_buffer_unref(&ist->hw_frames_ctx); +-} +- +-static int qsv_device_init(InputStream *ist) +-{ +- int err; +- AVDictionary *dict = NULL; +- +- if (qsv_device) { +- err = av_dict_set(&dict, "child_device", qsv_device, 0); +- if (err < 0) +- return err; +- } +- +- err = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, +- ist->hwaccel_device, dict, 0); +- if (err < 0) { +- av_log(NULL, AV_LOG_ERROR, "Error creating a QSV device\n"); +- goto err_out; +- } +- +-err_out: +- if (dict) +- av_dict_free(&dict); +- +- return err; +-} +- +-int qsv_init(AVCodecContext *s) +-{ +- InputStream *ist = s->opaque; +- AVHWFramesContext *frames_ctx; +- AVQSVFramesContext *frames_hwctx; +- int ret; +- +- if (!hw_device_ctx) { +- ret = qsv_device_init(ist); +- if (ret < 0) +- return ret; +- } +- +- av_buffer_unref(&ist->hw_frames_ctx); +- ist->hw_frames_ctx = av_hwframe_ctx_alloc(hw_device_ctx); +- if (!ist->hw_frames_ctx) +- return AVERROR(ENOMEM); +- +- frames_ctx = (AVHWFramesContext*)ist->hw_frames_ctx->data; +- frames_hwctx = frames_ctx->hwctx; +- +- frames_ctx->width = FFALIGN(s->coded_width, 32); +- frames_ctx->height = FFALIGN(s->coded_height, 32); +- frames_ctx->format = AV_PIX_FMT_QSV; +- frames_ctx->sw_format = s->sw_pix_fmt; +- frames_ctx->initial_pool_size = 64 + s->extra_hw_frames; +- frames_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; +- +- ret = av_hwframe_ctx_init(ist->hw_frames_ctx); +- if (ret < 0) { +- av_log(NULL, AV_LOG_ERROR, "Error initializing a QSV frame pool\n"); +- return ret; +- } +- +- ist->hwaccel_get_buffer = qsv_get_buffer; +- ist->hwaccel_uninit = qsv_uninit; +- +- return 0; +-} +Index: jellyfin-ffmpeg/libavcodec/qsvdec.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsvdec.c ++++ jellyfin-ffmpeg/libavcodec/qsvdec.c +@@ -89,7 +89,7 @@ static const AVCodecHWConfigInternal *co + .public = { + .pix_fmt = AV_PIX_FMT_QSV, + .methods = AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX | +- AV_CODEC_HW_CONFIG_METHOD_AD_HOC, ++ AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, + .device_type = AV_HWDEVICE_TYPE_QSV, + }, + .hwaccel = NULL, +@@ -238,6 +238,35 @@ static int qsv_decode_preinit(AVCodecCon + q->nb_ext_buffers = user_ctx->nb_ext_buffers; + } + ++ if (avctx->hw_device_ctx && !avctx->hw_frames_ctx && ret == AV_PIX_FMT_QSV) { ++ AVHWFramesContext *hwframes_ctx; ++ AVQSVFramesContext *frames_hwctx; ++ ++ avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx); ++ ++ if (!avctx->hw_frames_ctx) { ++ av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n"); ++ return AVERROR(ENOMEM); ++ } ++ ++ hwframes_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; ++ frames_hwctx = hwframes_ctx->hwctx; ++ hwframes_ctx->width = FFALIGN(avctx->coded_width, 32); ++ hwframes_ctx->height = FFALIGN(avctx->coded_height, 32); ++ hwframes_ctx->format = AV_PIX_FMT_QSV; ++ hwframes_ctx->sw_format = avctx->sw_pix_fmt; ++ hwframes_ctx->initial_pool_size = 64 + avctx->extra_hw_frames; ++ frames_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; ++ ++ ret = av_hwframe_ctx_init(avctx->hw_frames_ctx); ++ ++ if (ret < 0) { ++ av_log(NULL, AV_LOG_ERROR, "Error initializing a QSV frame pool\n"); ++ av_buffer_unref(&avctx->hw_frames_ctx); ++ return ret; ++ } ++ } ++ + if (avctx->hw_frames_ctx) { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; + AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx; diff --git a/debian/patches/series b/debian/patches/series index e214bc44f08..54d9308e99c 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -9,3 +9,4 @@ 0009-add-a-hack-for-opencl-reverse-mapping.patch 0010-add-fixes-for-ffmpeg_hw.patch 0011-add-d3d11-support-for-QSV.patch +0012-add-hw_device_ctx-support-for-qsvdec.patch From 1accfc76cbc8a0cab83dd6c4e874e358e75f6ea2 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Fri, 17 Dec 2021 23:51:48 +0800 Subject: [PATCH 22/41] add qsv(d3d11)-opencl interop --- .../0013-add-qsv-d3d11-opencl-interop.patch | 321 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 322 insertions(+) create mode 100644 debian/patches/0013-add-qsv-d3d11-opencl-interop.patch diff --git a/debian/patches/0013-add-qsv-d3d11-opencl-interop.patch b/debian/patches/0013-add-qsv-d3d11-opencl-interop.patch new file mode 100644 index 00000000000..97552fbb374 --- /dev/null +++ b/debian/patches/0013-add-qsv-d3d11-opencl-interop.patch @@ -0,0 +1,321 @@ +Index: jellyfin-ffmpeg/libavutil/hwcontext_opencl.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_opencl.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_opencl.c +@@ -62,6 +62,9 @@ + #endif + + #if HAVE_OPENCL_D3D11 ++#if CONFIG_LIBMFX ++#include "hwcontext_qsv.h" ++#endif + #include + #include "hwcontext_d3d11va.h" + +@@ -122,6 +125,7 @@ typedef struct OpenCLDeviceContext { + + #if HAVE_OPENCL_D3D11 + int d3d11_mapping_usable; ++ int d3d11_qsv_mapping_usable; + int d3d11_map_amd; + int d3d11_map_intel; + +@@ -876,6 +880,11 @@ static int opencl_device_init(AVHWDevice + priv->d3d11_mapping_usable = 0; + } else { + priv->d3d11_mapping_usable = 1; ++ ++ if (priv->d3d11_map_intel) ++ priv->d3d11_qsv_mapping_usable = 1; ++ else ++ priv->d3d11_qsv_mapping_usable = 0; + } + } + #endif +@@ -1746,18 +1755,20 @@ static void opencl_frames_uninit(AVHWFra + + #if HAVE_OPENCL_DXVA2 || HAVE_OPENCL_D3D11 + int i, p; +- for (i = 0; i < priv->nb_mapped_frames; i++) { +- AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i]; +- for (p = 0; p < desc->nb_planes; p++) { +- cle = clReleaseMemObject(desc->planes[p]); +- if (cle != CL_SUCCESS) { +- av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped " +- "frame object (frame %d plane %d): %d.\n", +- i, p, cle); ++ if (priv->nb_mapped_frames && priv->mapped_frames) { ++ for (i = 0; i < priv->nb_mapped_frames; i++) { ++ AVOpenCLFrameDescriptor *desc = &priv->mapped_frames[i]; ++ for (p = 0; p < desc->nb_planes; p++) { ++ cle = clReleaseMemObject(desc->planes[p]); ++ if (cle != CL_SUCCESS) { ++ av_log(hwfc, AV_LOG_ERROR, "Failed to release mapped " ++ "frame object (frame %d plane %d): %d.\n", ++ i, p, cle); ++ } + } + } ++ av_freep(&priv->mapped_frames); + } +- av_freep(&priv->mapped_frames); + #endif + + if (priv->command_queue) { +@@ -2526,6 +2537,225 @@ fail: + + #if HAVE_OPENCL_D3D11 + ++#if CONFIG_LIBMFX ++ ++static void opencl_unmap_from_d3d11_qsv(AVHWFramesContext *dst_fc, ++ HWMapDescriptor *hwmap) ++{ ++ AVOpenCLFrameDescriptor *desc = hwmap->priv; ++ OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; ++ OpenCLFramesContext *frames_priv = dst_fc->internal->priv; ++ cl_event event; ++ cl_int cle; ++ int p; ++ ++ av_log(dst_fc, AV_LOG_DEBUG, "Unmap QSV surface from OpenCL.\n"); ++ ++ cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( ++ frames_priv->command_queue, desc->nb_planes, desc->planes, ++ 0, NULL, &event); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to release texture " ++ "handle: %d.\n", cle); ++ } ++ ++ opencl_wait_events(dst_fc, &event, 1); ++ ++ if (!frames_priv->nb_mapped_frames && !frames_priv->mapped_frames) { ++ for (p = 0; p < desc->nb_planes; p++) { ++ cle = clReleaseMemObject(desc->planes[p]); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL " ++ "image of plane %d of D3D11 texture: %d\n", ++ p, cle); ++ } ++ } ++ av_freep(&desc); ++ } ++} ++ ++static int opencl_map_from_d3d11_qsv(AVHWFramesContext *dst_fc, AVFrame *dst, ++ const AVFrame *src, int flags) ++{ ++ AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; ++ OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; ++ OpenCLFramesContext *frames_priv = dst_fc->internal->priv; ++ mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3]; ++ mfxHDLPair *pair = (mfxHDLPair*)mfx_surface->Data.MemId; ++ ID3D11Texture2D *tex = (ID3D11Texture2D*)pair->first; ++ AVOpenCLFrameDescriptor *desc; ++ cl_mem_flags cl_flags; ++ cl_event event; ++ cl_int cle; ++ int err, p, index, decoder_target; ++ ++ cl_flags = opencl_mem_flags_for_mapping(flags); ++ if (!cl_flags) ++ return AVERROR(EINVAL); ++ ++ av_log(dst_fc, AV_LOG_DEBUG, "Map QSV surface %#x to OpenCL.\n", pair); ++ ++ index = (intptr_t)pair->second; ++ decoder_target = index >= 0 && index != MFX_INFINITE; ++ ++ if (decoder_target && index >= frames_priv->nb_mapped_frames) { ++ av_log(dst_fc, AV_LOG_ERROR, "Texture array index out of range for " ++ "mapping: %d >= %d.\n", index, frames_priv->nb_mapped_frames); ++ return AVERROR(EINVAL); ++ } ++ ++ if (decoder_target) { ++ desc = &frames_priv->mapped_frames[index]; ++ } else { ++ desc = av_mallocz(sizeof(*desc)); ++ if (!desc) ++ return AVERROR(ENOMEM); ++ ++ desc->nb_planes = 2; ++ for (p = 0; p < desc->nb_planes; p++) { ++ desc->planes[p] = ++ device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, tex, ++ p, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " ++ "image from plane %d of D3D11 texture: %d.\n", ++ p, cle); ++ err = AVERROR(EIO); ++ goto fail2; ++ } ++ } ++ } ++ ++ cle = device_priv->clEnqueueAcquireD3D11ObjectsKHR( ++ frames_priv->command_queue, desc->nb_planes, desc->planes, ++ 0, NULL, &event); ++ if (cle != CL_SUCCESS) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire texture " ++ "handle: %d.\n", cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ err = opencl_wait_events(dst_fc, &event, 1); ++ if (err < 0) ++ goto fail; ++ ++ for (p = 0; p < desc->nb_planes; p++) ++ dst->data[p] = (uint8_t*)desc->planes[p]; ++ ++ err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, ++ &opencl_unmap_from_d3d11_qsv, desc); ++ if (err < 0) ++ goto fail; ++ ++ dst->width = src->width; ++ dst->height = src->height; ++ ++ return 0; ++ ++fail: ++ cle = device_priv->clEnqueueReleaseD3D11ObjectsKHR( ++ frames_priv->command_queue, desc->nb_planes, desc->planes, ++ 0, NULL, &event); ++ if (cle == CL_SUCCESS) ++ opencl_wait_events(dst_fc, &event, 1); ++fail2: ++ if (!decoder_target) { ++ for (p = 0; p < desc->nb_planes; p++) { ++ if (desc->planes[p]) ++ clReleaseMemObject(desc->planes[p]); ++ } ++ av_freep(&desc); ++ } ++ return err; ++} ++ ++static int opencl_frames_derive_from_d3d11_qsv(AVHWFramesContext *dst_fc, ++ AVHWFramesContext *src_fc, int flags) ++{ ++ AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; ++ AVQSVFramesContext *src_hwctx = src_fc->hwctx; ++ OpenCLDeviceContext *device_priv = dst_fc->device_ctx->internal->priv; ++ OpenCLFramesContext *frames_priv = dst_fc->internal->priv; ++ cl_mem_flags cl_flags; ++ cl_int cle; ++ int err, i, p, nb_planes = 2; ++ ++ mfxHDLPair *pair = (mfxHDLPair*)src_hwctx->surfaces[i].Data.MemId; ++ ID3D11Texture2D *tex = (ID3D11Texture2D*)pair->first; ++ ++ if (src_fc->sw_format != AV_PIX_FMT_NV12 && ++ src_fc->sw_format != AV_PIX_FMT_P010) { ++ av_log(dst_fc, AV_LOG_ERROR, "Only NV12 and P010 textures are " ++ "supported for QSV with D3D11 to OpenCL mapping.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (src_fc->initial_pool_size == 0) { ++ av_log(dst_fc, AV_LOG_ERROR, "Only fixed-size pools are supported " ++ "for QSV with D3D11 to OpenCL mapping.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (!(src_hwctx->frame_type & MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET) || ++ (src_hwctx->frame_type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) || ++ (src_hwctx->frame_type & MFX_MEMTYPE_FROM_VPPOUT)) { ++ av_log(dst_fc, AV_LOG_DEBUG, "Non-DECODER_TARGET direct input for QSV " ++ "with D3D11 to OpenCL mapping.\n"); ++ return 0; ++ } ++ ++ cl_flags = opencl_mem_flags_for_mapping(flags); ++ if (!cl_flags) ++ return AVERROR(EINVAL); ++ ++ frames_priv->nb_mapped_frames = src_fc->initial_pool_size; ++ ++ frames_priv->mapped_frames = ++ av_mallocz_array(frames_priv->nb_mapped_frames, ++ sizeof(*frames_priv->mapped_frames)); ++ if (!frames_priv->mapped_frames) ++ return AVERROR(ENOMEM); ++ ++ for (i = 0; i < frames_priv->nb_mapped_frames; i++) { ++ AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; ++ desc->nb_planes = nb_planes; ++ ++ for (p = 0; p < nb_planes; p++) { ++ UINT subresource = 2 * i + p; ++ desc->planes[p] = ++ device_priv->clCreateFromD3D11Texture2DKHR( ++ dst_dev->context, cl_flags, tex, ++ subresource, &cle); ++ if (!desc->planes[p]) { ++ av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " ++ "image from plane %d of D3D11 texture " ++ "index %d (subresource %u): %d.\n", ++ p, i, (unsigned int)subresource, cle); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ } ++ } ++ ++ return 0; ++ ++fail: ++ for (i = 0; i < frames_priv->nb_mapped_frames; i++) { ++ AVOpenCLFrameDescriptor *desc = &frames_priv->mapped_frames[i]; ++ for (p = 0; p < desc->nb_planes; p++) { ++ if (desc->planes[p]) ++ clReleaseMemObject(desc->planes[p]); ++ } ++ } ++ av_freep(&frames_priv->mapped_frames); ++ frames_priv->nb_mapped_frames = 0; ++ return err; ++} ++ ++#endif ++ + static void opencl_unmap_from_d3d11(AVHWFramesContext *dst_fc, + HWMapDescriptor *hwmap) + { +@@ -2966,6 +3196,11 @@ static int opencl_map_to(AVHWFramesConte + return opencl_map_from_dxva2(hwfc, dst, src, flags); + #endif + #if HAVE_OPENCL_D3D11 ++#if CONFIG_LIBMFX ++ case AV_PIX_FMT_QSV: ++ if (priv->d3d11_qsv_mapping_usable) ++ return opencl_map_from_d3d11_qsv(hwfc, dst, src, flags); ++#endif + case AV_PIX_FMT_D3D11: + if (priv->d3d11_mapping_usable) + return opencl_map_from_d3d11(hwfc, dst, src, flags); +@@ -3016,6 +3251,18 @@ static int opencl_frames_derive_to(AVHWF + break; + #endif + #if HAVE_OPENCL_D3D11 ++#if CONFIG_LIBMFX ++ case AV_HWDEVICE_TYPE_QSV: ++ if (!priv->d3d11_qsv_mapping_usable) ++ return AVERROR(ENOSYS); ++ { ++ int err; ++ err = opencl_frames_derive_from_d3d11_qsv(dst_fc, src_fc, flags); ++ if (err < 0) ++ return err; ++ } ++ break; ++#endif + case AV_HWDEVICE_TYPE_D3D11VA: + if (!priv->d3d11_mapping_usable) + return AVERROR(ENOSYS); diff --git a/debian/patches/series b/debian/patches/series index 54d9308e99c..75de1254ced 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -10,3 +10,4 @@ 0010-add-fixes-for-ffmpeg_hw.patch 0011-add-d3d11-support-for-QSV.patch 0012-add-hw_device_ctx-support-for-qsvdec.patch +0013-add-qsv-d3d11-opencl-interop.patch From 29c27b2262bd95554cfebc2783bb2834aeae2453 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:40:52 +0800 Subject: [PATCH 23/41] add vendor opts to d3d11va and bgra fmt to d3d11/dxva2 --- ...to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch | 93 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 94 insertions(+) create mode 100644 debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch diff --git a/debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch b/debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch new file mode 100644 index 00000000000..f89c0a9c677 --- /dev/null +++ b/debian/patches/0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch @@ -0,0 +1,93 @@ +Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +@@ -83,11 +83,12 @@ static const struct { + DXGI_FORMAT d3d_format; + enum AVPixelFormat pix_fmt; + } supported_formats[] = { +- { DXGI_FORMAT_NV12, AV_PIX_FMT_NV12 }, +- { DXGI_FORMAT_P010, AV_PIX_FMT_P010 }, ++ { DXGI_FORMAT_NV12, AV_PIX_FMT_NV12 }, ++ { DXGI_FORMAT_P010, AV_PIX_FMT_P010 }, ++ { DXGI_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA }, + // Special opaque formats. The pix_fmt is merely a place holder, as the + // opaque format cannot be accessed directly. +- { DXGI_FORMAT_420_OPAQUE, AV_PIX_FMT_YUV420P }, ++ { DXGI_FORMAT_420_OPAQUE, AV_PIX_FMT_YUV420P }, + }; + + static void d3d11va_default_lock(void *ctx) +@@ -539,9 +540,12 @@ static int d3d11va_device_create(AVHWDev + AVD3D11VADeviceContext *device_hwctx = ctx->hwctx; + + HRESULT hr; ++ AVDictionaryEntry *e; + IDXGIAdapter *pAdapter = NULL; + ID3D10Multithread *pMultithread; + UINT creationFlags = D3D11_CREATE_DEVICE_VIDEO_SUPPORT; ++ int adapter = -1; ++ long int vendor_id = -1; + int is_debug = !!av_dict_get(opts, "debug", NULL, 0); + int ret; + +@@ -561,13 +565,45 @@ static int d3d11va_device_create(AVHWDev + return AVERROR_UNKNOWN; + } + ++ e = av_dict_get(opts, "vendor", NULL, 0); ++ if (e) { ++ vendor_id = strtol(e->value, NULL, 0); ++ } ++ + if (device) { ++ adapter = atoi(device); ++ } ++ ++ if (adapter >= 0 || vendor_id != -1) { + IDXGIFactory2 *pDXGIFactory; + hr = mCreateDXGIFactory(&IID_IDXGIFactory2, (void **)&pDXGIFactory); + if (SUCCEEDED(hr)) { +- int adapter = atoi(device); +- if (FAILED(IDXGIFactory2_EnumAdapters(pDXGIFactory, adapter, &pAdapter))) ++ if (adapter < 0) { ++ int adapter_cnt = 0; ++ while (IDXGIFactory2_EnumAdapters(pDXGIFactory, adapter_cnt++, &pAdapter) != DXGI_ERROR_NOT_FOUND) { ++ DXGI_ADAPTER_DESC adapter_desc; ++ hr = IDXGIAdapter2_GetDesc(pAdapter, &adapter_desc); ++ if (FAILED(hr)) { ++ av_log(ctx, AV_LOG_ERROR, "IDXGIAdapter2_GetDesc returned error with adapter id %d\n", adapter_cnt); ++ continue; ++ } ++ ++ if (adapter_desc.VendorId == vendor_id) { ++ break; ++ } ++ ++ if (adapter) ++ IDXGIAdapter_Release(pAdapter); ++ } ++ if (adapter_cnt < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Failed to find d3d11va adapter by vendor id %ld\n", vendor_id); ++ IDXGIFactory2_Release(pDXGIFactory); ++ return AVERROR_UNKNOWN; ++ } ++ } else { ++ if (FAILED(IDXGIFactory2_EnumAdapters(pDXGIFactory, adapter, &pAdapter))) + pAdapter = NULL; ++ } + IDXGIFactory2_Release(pDXGIFactory); + } + } +Index: jellyfin-ffmpeg/libavutil/hwcontext_dxva2.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_dxva2.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_dxva2.c +@@ -83,6 +83,7 @@ static const struct { + { MKTAG('N', 'V', '1', '2'), AV_PIX_FMT_NV12 }, + { MKTAG('P', '0', '1', '0'), AV_PIX_FMT_P010 }, + { D3DFMT_P8, AV_PIX_FMT_PAL8 }, ++ { D3DFMT_A8R8G8B8, AV_PIX_FMT_BGRA }, + }; + + DEFINE_GUID(video_decoder_service, 0xfc51a551, 0xd5e7, 0x11d9, 0xaf, 0x55, 0x00, 0x05, 0x4e, 0x43, 0xff, 0x02); diff --git a/debian/patches/series b/debian/patches/series index 75de1254ced..f4e7004c478 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -11,3 +11,4 @@ 0011-add-d3d11-support-for-QSV.patch 0012-add-hw_device_ctx-support-for-qsvdec.patch 0013-add-qsv-d3d11-opencl-interop.patch +0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch From ff7e513554cfb2d687ee4747022fd96b9975fc1c Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:41:14 +0800 Subject: [PATCH 24/41] add a vaapi hwupload filter --- .../0015-add-a-vaapi-hwupload-filter.patch | 237 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 238 insertions(+) create mode 100644 debian/patches/0015-add-a-vaapi-hwupload-filter.patch diff --git a/debian/patches/0015-add-a-vaapi-hwupload-filter.patch b/debian/patches/0015-add-a-vaapi-hwupload-filter.patch new file mode 100644 index 00000000000..6963c69df8b --- /dev/null +++ b/debian/patches/0015-add-a-vaapi-hwupload-filter.patch @@ -0,0 +1,237 @@ +Index: jellyfin-ffmpeg/configure +=================================================================== +--- jellyfin-ffmpeg.orig/configure ++++ jellyfin-ffmpeg/configure +@@ -3579,6 +3579,7 @@ fspp_filter_deps="gpl" + headphone_filter_select="fft" + histeq_filter_deps="gpl" + hqdn3d_filter_deps="gpl" ++hwupload_vaapi_filter_deps="vaapi" + interlace_filter_deps="gpl" + kerndeint_filter_deps="gpl" + ladspa_filter_deps="ladspa libdl" +Index: jellyfin-ffmpeg/libavfilter/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/Makefile ++++ jellyfin-ffmpeg/libavfilter/Makefile +@@ -297,6 +297,7 @@ OBJS-$(CONFIG_HUE_FILTER) + OBJS-$(CONFIG_HWDOWNLOAD_FILTER) += vf_hwdownload.o + OBJS-$(CONFIG_HWMAP_FILTER) += vf_hwmap.o + OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o ++OBJS-$(CONFIG_HWUPLOAD_VAAPI_FILTER) += vf_hwupload_vaapi.o + OBJS-$(CONFIG_HWUPLOAD_FILTER) += vf_hwupload.o + OBJS-$(CONFIG_HYSTERESIS_FILTER) += vf_hysteresis.o framesync.o + OBJS-$(CONFIG_IDENTITY_FILTER) += vf_identity.o +Index: jellyfin-ffmpeg/libavfilter/allfilters.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c ++++ jellyfin-ffmpeg/libavfilter/allfilters.c +@@ -282,6 +282,7 @@ extern AVFilter ff_vf_hwdownload; + extern AVFilter ff_vf_hwmap; + extern AVFilter ff_vf_hwupload; + extern AVFilter ff_vf_hwupload_cuda; ++extern AVFilter ff_vf_hwupload_vaapi; + extern AVFilter ff_vf_hysteresis; + extern AVFilter ff_vf_identity; + extern AVFilter ff_vf_idet; +Index: jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/vf_hwupload_vaapi.c +@@ -0,0 +1,196 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/buffer.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/log.h" ++#include "libavutil/opt.h" ++ ++#include "avfilter.h" ++#include "formats.h" ++#include "internal.h" ++#include "video.h" ++ ++typedef struct VaapiUploadContext { ++ const AVClass *class; ++ int device_idx; ++ ++ AVBufferRef *hwdevice; ++ AVBufferRef *hwframe; ++} VaapiUploadContext; ++ ++static av_cold int vaapiupload_init(AVFilterContext *ctx) ++{ ++ VaapiUploadContext *s = ctx->priv; ++ return av_hwdevice_ctx_create(&s->hwdevice, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0); ++} ++ ++static av_cold void vaapiupload_uninit(AVFilterContext *ctx) ++{ ++ VaapiUploadContext *s = ctx->priv; ++ ++ av_buffer_unref(&s->hwframe); ++ av_buffer_unref(&s->hwdevice); ++} ++ ++static int vaapiupload_query_formats(AVFilterContext *ctx) ++{ ++ int ret; ++ ++ static const enum AVPixelFormat input_pix_fmts[] = { ++ AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, ++ AV_PIX_FMT_UYVY422, AV_PIX_FMT_YUYV422, AV_PIX_FMT_Y210, ++ AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, ++ AV_PIX_FMT_GRAY8, AV_PIX_FMT_P010, AV_PIX_FMT_BGRA, ++ AV_PIX_FMT_BGR0, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB0, ++ AV_PIX_FMT_ABGR, AV_PIX_FMT_0BGR, AV_PIX_FMT_ARGB, ++ AV_PIX_FMT_0RGB, AV_PIX_FMT_NONE, ++ }; ++ static const enum AVPixelFormat output_pix_fmts[] = { ++ AV_PIX_FMT_VAAPI, AV_PIX_FMT_NONE, ++ }; ++ AVFilterFormats *in_fmts = ff_make_format_list(input_pix_fmts); ++ AVFilterFormats *out_fmts; ++ ++ ret = ff_formats_ref(in_fmts, &ctx->inputs[0]->outcfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ out_fmts = ff_make_format_list(output_pix_fmts); ++ ++ ret = ff_formats_ref(out_fmts, &ctx->outputs[0]->incfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int vaapiupload_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AVFilterLink *inlink = ctx->inputs[0]; ++ VaapiUploadContext *s = ctx->priv; ++ ++ AVHWFramesContext *hwframe_ctx; ++ int ret; ++ ++ av_buffer_unref(&s->hwframe); ++ s->hwframe = av_hwframe_ctx_alloc(s->hwdevice); ++ if (!s->hwframe) ++ return AVERROR(ENOMEM); ++ ++ hwframe_ctx = (AVHWFramesContext*)s->hwframe->data; ++ hwframe_ctx->format = AV_PIX_FMT_VAAPI; ++ if (inlink->hw_frames_ctx) { ++ AVHWFramesContext *in_hwframe_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; ++ hwframe_ctx->sw_format = in_hwframe_ctx->sw_format; ++ } else { ++ hwframe_ctx->sw_format = inlink->format; ++ } ++ hwframe_ctx->width = inlink->w; ++ hwframe_ctx->height = inlink->h; ++ ++ ret = av_hwframe_ctx_init(s->hwframe); ++ if (ret < 0) ++ return ret; ++ ++ outlink->hw_frames_ctx = av_buffer_ref(s->hwframe); ++ if (!outlink->hw_frames_ctx) ++ return AVERROR(ENOMEM); ++ ++ return 0; ++} ++ ++static int vaapiupload_filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *ctx = link->dst; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ ++ AVFrame *out = NULL; ++ int ret; ++ ++ out = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!out) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ out->width = in->width; ++ out->height = in->height; ++ ++ ret = av_hwframe_transfer_data(out, in, 0); ++ if (ret < 0) { ++ av_log(ctx, AV_LOG_ERROR, "Error transferring data to the GPU\n"); ++ goto fail; ++ } ++ ++ ret = av_frame_copy_props(out, in); ++ if (ret < 0) ++ goto fail; ++ ++ av_frame_free(&in); ++ ++ return ff_filter_frame(ctx->outputs[0], out); ++fail: ++ av_frame_free(&in); ++ av_frame_free(&out); ++ return ret; ++} ++ ++static const AVClass vaapiupload_class = { ++ .class_name = "vaapiupload", ++ .item_name = av_default_item_name, ++ .option = NULL, ++ .version = LIBAVUTIL_VERSION_INT, ++}; ++ ++static const AVFilterPad vaapiupload_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = vaapiupload_filter_frame, ++ }, ++ { NULL } ++}; ++ ++static const AVFilterPad vaapiupload_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = vaapiupload_config_output, ++ }, ++ { NULL } ++}; ++ ++AVFilter ff_vf_hwupload_vaapi = { ++ .name = "hwupload_vaapi", ++ .description = NULL_IF_CONFIG_SMALL("Upload a system memory frame to a VAAPI device."), ++ ++ .init = vaapiupload_init, ++ .uninit = vaapiupload_uninit, ++ ++ .query_formats = vaapiupload_query_formats, ++ ++ .priv_size = sizeof(VaapiUploadContext), ++ .priv_class = &vaapiupload_class, ++ ++ .inputs = vaapiupload_inputs, ++ .outputs = vaapiupload_outputs, ++ ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/debian/patches/series b/debian/patches/series index f4e7004c478..51f33e36975 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -12,3 +12,4 @@ 0012-add-hw_device_ctx-support-for-qsvdec.patch 0013-add-qsv-d3d11-opencl-interop.patch 0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch +0015-add-a-vaapi-hwupload-filter.patch From 1505e5be63b520b2458bb6e60490c79824150fbb Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:41:29 +0800 Subject: [PATCH 25/41] add fixes for the broken vaapi tonemap --- ...d-fixes-for-the-broken-vaapi-tonemap.patch | 328 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 329 insertions(+) create mode 100644 debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch diff --git a/debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch b/debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch new file mode 100644 index 00000000000..c0e65d6ec81 --- /dev/null +++ b/debian/patches/0016-add-fixes-for-the-broken-vaapi-tonemap.patch @@ -0,0 +1,328 @@ +Index: jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_tonemap_vaapi.c ++++ jellyfin-ffmpeg/libavfilter/vf_tonemap_vaapi.c +@@ -41,7 +41,13 @@ typedef struct HDRVAAPIContext { + enum AVColorTransferCharacteristic color_transfer; + enum AVColorSpace color_matrix; + ++ char *in_master_display; ++ char *in_content_light; ++ char *out_master_display; ++ char *out_content_light; ++ + VAHdrMetaDataHDR10 in_metadata; ++ VAHdrMetaDataHDR10 out_metadata; + + AVFrameSideData *src_display; + AVFrameSideData *src_light; +@@ -54,7 +60,7 @@ static int tonemap_vaapi_save_metadata(A + AVContentLightMetadata *light_meta; + + if (input_frame->color_trc != AVCOL_TRC_SMPTE2084) { +- av_log(avctx, AV_LOG_WARNING, "Only support HDR10 as input for vaapi tone-mapping\n"); ++ av_log(avctx, AV_LOG_DEBUG, "Only support HDR10 as input for vaapi tone-mapping\n"); + } + + ctx->src_display = av_frame_get_side_data(input_frame, +@@ -62,8 +68,7 @@ static int tonemap_vaapi_save_metadata(A + if (ctx->src_display) { + hdr_meta = (AVMasteringDisplayMetadata *)ctx->src_display->data; + if (!hdr_meta) { +- av_log(avctx, AV_LOG_ERROR, "No mastering display data\n"); +- return AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_DEBUG, "No mastering display data\n"); + } + + if (hdr_meta->has_luminance) { +@@ -120,8 +125,7 @@ static int tonemap_vaapi_save_metadata(A + ctx->in_metadata.white_point_y); + } + } else { +- av_log(avctx, AV_LOG_ERROR, "No mastering display data from input\n"); +- return AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_DEBUG, "No mastering display data from input\n"); + } + + ctx->src_light = av_frame_get_side_data(input_frame, +@@ -129,8 +133,7 @@ static int tonemap_vaapi_save_metadata(A + if (ctx->src_light) { + light_meta = (AVContentLightMetadata *)ctx->src_light->data; + if (!light_meta) { +- av_log(avctx, AV_LOG_ERROR, "No light metadata\n"); +- return AVERROR(EINVAL); ++ av_log(avctx, AV_LOG_DEBUG, "No light metadata\n"); + } + + ctx->in_metadata.max_content_light_level = light_meta->MaxCLL; +@@ -148,6 +151,107 @@ static int tonemap_vaapi_save_metadata(A + return 0; + } + ++static int tonemap_vaapi_update_sidedata(AVFilterContext *avctx, AVFrame *output_frame) ++{ ++ HDRVAAPIContext *ctx = avctx->priv; ++ AVFrameSideData *metadata; ++ AVMasteringDisplayMetadata *hdr_meta; ++ AVFrameSideData *metadata_lt; ++ AVContentLightMetadata *hdr_meta_lt; ++ ++ int i; ++ const int mapping[3] = {1, 2, 0}; //green, blue, red ++ const int chroma_den = 50000; ++ const int luma_den = 10000; ++ ++ metadata = av_frame_get_side_data(output_frame, ++ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ if (metadata) { ++ av_frame_remove_side_data(output_frame, ++ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); ++ metadata = av_frame_new_side_data(output_frame, ++ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, ++ sizeof(AVMasteringDisplayMetadata)); ++ } else { ++ metadata = av_frame_new_side_data(output_frame, ++ AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, ++ sizeof(AVMasteringDisplayMetadata)); ++ } ++ ++ hdr_meta = (AVMasteringDisplayMetadata *)metadata->data; ++ ++ for (i = 0; i < 3; i++) { ++ const int j = mapping[i]; ++ hdr_meta->display_primaries[j][0].num = ctx->out_metadata.display_primaries_x[i]; ++ hdr_meta->display_primaries[j][0].den = chroma_den; ++ ++ hdr_meta->display_primaries[j][1].num = ctx->out_metadata.display_primaries_y[i]; ++ hdr_meta->display_primaries[j][1].den = chroma_den; ++ } ++ ++ hdr_meta->white_point[0].num = ctx->out_metadata.white_point_x; ++ hdr_meta->white_point[0].den = chroma_den; ++ ++ hdr_meta->white_point[1].num = ctx->out_metadata.white_point_y; ++ hdr_meta->white_point[1].den = chroma_den; ++ hdr_meta->has_primaries = 1; ++ ++ hdr_meta->max_luminance.num = ctx->out_metadata.max_display_mastering_luminance; ++ hdr_meta->max_luminance.den = luma_den; ++ ++ hdr_meta->min_luminance.num = ctx->out_metadata.min_display_mastering_luminance; ++ hdr_meta->min_luminance.den = luma_den; ++ hdr_meta->has_luminance = 1; ++ ++ av_log(avctx, AV_LOG_DEBUG, ++ "Mastering Display Metadata(out luminance):\n"); ++ av_log(avctx, AV_LOG_DEBUG, ++ "min_luminance=%u, max_luminance=%u\n", ++ ctx->out_metadata.min_display_mastering_luminance, ++ ctx->out_metadata.max_display_mastering_luminance); ++ ++ av_log(avctx, AV_LOG_DEBUG, ++ "Mastering Display Metadata(out primaries):\n"); ++ av_log(avctx, AV_LOG_DEBUG, ++ "G(%u,%u) B(%u,%u) R(%u,%u) WP(%u,%u)\n", ++ ctx->out_metadata.display_primaries_x[0], ++ ctx->out_metadata.display_primaries_y[0], ++ ctx->out_metadata.display_primaries_x[1], ++ ctx->out_metadata.display_primaries_y[1], ++ ctx->out_metadata.display_primaries_x[2], ++ ctx->out_metadata.display_primaries_y[2], ++ ctx->out_metadata.white_point_x, ++ ctx->out_metadata.white_point_y); ++ ++ metadata_lt = av_frame_get_side_data(output_frame, ++ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ if (metadata_lt) { ++ av_frame_remove_side_data(output_frame, ++ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); ++ metadata_lt = av_frame_new_side_data(output_frame, ++ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, ++ sizeof(AVContentLightMetadata)); ++ } else { ++ metadata_lt = av_frame_new_side_data(output_frame, ++ AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, ++ sizeof(AVContentLightMetadata)); ++ } ++ ++ hdr_meta_lt = (AVContentLightMetadata *)metadata_lt->data; ++ ++ hdr_meta_lt->MaxCLL = FFMIN(ctx->out_metadata.max_content_light_level, 65535); ++ hdr_meta_lt->MaxFALL = FFMIN(ctx->out_metadata.max_pic_average_light_level, 65535); ++ ++ av_log(avctx, AV_LOG_DEBUG, ++ "Mastering Content Light Level (out):\n"); ++ av_log(avctx, AV_LOG_DEBUG, ++ "MaxCLL(%u) MaxFALL(%u)\n", ++ ctx->out_metadata.max_content_light_level, ++ ctx->out_metadata.max_pic_average_light_level); ++ ++ return 0; ++} ++ + static int tonemap_vaapi_set_filter_params(AVFilterContext *avctx, AVFrame *input_frame) + { + VAAPIVPPContext *vpp_ctx = avctx->priv; +@@ -210,15 +314,26 @@ static int tonemap_vaapi_build_filter_pa + return AVERROR(EINVAL); + } + +- for (i = 0; i < num_query_caps; i++) { +- if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag) +- break; +- } +- +- if (i >= num_query_caps) { +- av_log(avctx, AV_LOG_ERROR, +- "VAAPI driver doesn't support HDR to SDR\n"); +- return AVERROR(EINVAL); ++ if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) { ++ for (i = 0; i < num_query_caps; i++) { ++ if (VA_TONE_MAPPING_HDR_TO_HDR & hdr_cap[i].caps_flag) ++ break; ++ } ++ if (i >= num_query_caps) { ++ av_log(avctx, AV_LOG_ERROR, ++ "VAAPI driver doesn't support HDR to HDR\n"); ++ return AVERROR(EINVAL); ++ } ++ } else { ++ for (i = 0; i < num_query_caps; i++) { ++ if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag) ++ break; ++ } ++ if (i >= num_query_caps) { ++ av_log(avctx, AV_LOG_ERROR, ++ "VAAPI driver doesn't support HDR to SDR\n"); ++ return AVERROR(EINVAL); ++ } + } + + hdrtm_param.type = VAProcFilterHighDynamicRangeToneMapping; +@@ -243,6 +358,8 @@ static int tonemap_vaapi_filter_frame(AV + VAProcPipelineParameterBuffer params; + int err; + ++ VAHdrMetaData out_hdr_metadata; ++ + av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input_frame->format), + input_frame->width, input_frame->height, input_frame->pts); +@@ -252,9 +369,11 @@ static int tonemap_vaapi_filter_frame(AV + return AVERROR(EINVAL); + } + +- err = tonemap_vaapi_save_metadata(avctx, input_frame); +- if (err < 0) +- goto fail; ++ if (!ctx->in_master_display && !ctx->in_content_light) { ++ err = tonemap_vaapi_save_metadata(avctx, input_frame); ++ if (err < 0) ++ goto fail; ++ } + + err = tonemap_vaapi_set_filter_params(avctx, input_frame); + if (err < 0) +@@ -291,11 +410,26 @@ static int tonemap_vaapi_filter_frame(AV + if (ctx->color_matrix != AVCOL_SPC_UNSPECIFIED) + output_frame->colorspace = ctx->color_matrix; + ++ if (output_frame->color_trc == AVCOL_TRC_SMPTE2084) { ++ err = tonemap_vaapi_update_sidedata(avctx, output_frame); ++ if (err < 0) ++ goto fail; ++ ++ out_hdr_metadata.metadata_type = VAProcHighDynamicRangeMetadataHDR10; ++ out_hdr_metadata.metadata = &ctx->out_metadata; ++ out_hdr_metadata.metadata_size = sizeof(VAHdrMetaDataHDR10); ++ ++ params.output_hdr_metadata = &out_hdr_metadata; ++ } ++ + err = ff_vaapi_vpp_init_params(avctx, ¶ms, + input_frame, output_frame); + if (err < 0) + goto fail; + ++ params.filters = &vpp_ctx->filter_buffers[0]; ++ params.num_filters = vpp_ctx->nb_filter_buffers; ++ + err = ff_vaapi_vpp_render_picture(avctx, ¶ms, output_frame); + if (err < 0) + goto fail; +@@ -355,6 +489,60 @@ static av_cold int tonemap_vaapi_init(AV + STRING_OPTION(color_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED); + STRING_OPTION(color_matrix, color_space, AVCOL_SPC_UNSPECIFIED); + ++#define READ_DISPLAY_OPTION(in_or_out) do { \ ++ if (10 != sscanf(ctx->in_or_out ## _master_display, \ ++ "G(%hu|%hu)B(%hu|%hu)R(%hu|%hu)WP(%hu|%hu)L(%u|%u)", \ ++ &ctx->in_or_out ## _metadata.display_primaries_x[0], \ ++ &ctx->in_or_out ## _metadata.display_primaries_y[0], \ ++ &ctx->in_or_out ## _metadata.display_primaries_x[1], \ ++ &ctx->in_or_out ## _metadata.display_primaries_y[1], \ ++ &ctx->in_or_out ## _metadata.display_primaries_x[2], \ ++ &ctx->in_or_out ## _metadata.display_primaries_y[2], \ ++ &ctx->in_or_out ## _metadata.white_point_x, \ ++ &ctx->in_or_out ## _metadata.white_point_y, \ ++ &ctx->in_or_out ## _metadata.min_display_mastering_luminance, \ ++ &ctx->in_or_out ## _metadata.max_display_mastering_luminance)) { \ ++ av_log(avctx, AV_LOG_ERROR, \ ++ "Option " #in_or_out "-mastering-display input invalid\n"); \ ++ return AVERROR(EINVAL); \ ++ } \ ++ } while (0) ++ ++#define READ_LIGHT_OPTION(in_or_out) do { \ ++ if (2 != sscanf(ctx->in_or_out ## _content_light, \ ++ "CLL(%hu)FALL(%hu)", \ ++ &ctx->in_or_out ## _metadata.max_content_light_level, \ ++ &ctx->in_or_out ## _metadata.max_pic_average_light_level)) { \ ++ av_log(avctx, AV_LOG_ERROR, \ ++ "Option " #in_or_out "-content-light input invalid\n"); \ ++ return AVERROR(EINVAL); \ ++ } \ ++ } while (0) ++ ++ if (ctx->in_master_display) { ++ READ_DISPLAY_OPTION(in); ++ } ++ ++ if (ctx->in_content_light) { ++ READ_LIGHT_OPTION(in); ++ } ++ ++ if (ctx->color_transfer == AVCOL_TRC_SMPTE2084) { ++ if (!ctx->out_master_display) { ++ av_log(avctx, AV_LOG_ERROR, ++ "H2H tone-mapping requires valid out-mastering-display metadata\n"); ++ return AVERROR(EINVAL); ++ } ++ READ_DISPLAY_OPTION(out); ++ ++ if (!ctx->out_content_light) { ++ av_log(avctx, AV_LOG_ERROR, ++ "H2H tone-mapping requires valid out-content-light metadata\n"); ++ return AVERROR(EINVAL); ++ } ++ READ_LIGHT_OPTION(out); ++ } ++ + return 0; + } + +@@ -380,10 +568,13 @@ static const AVOption tonemap_vaapi_opti + { "t", "Output color transfer characteristics set", + OFFSET(color_transfer_string), AV_OPT_TYPE_STRING, + { .str = NULL }, .flags = FLAGS, "transfer" }, ++ { "indisplay", "Set input mastering display", OFFSET(in_master_display), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, ++ { "inlight", "Set input content light", OFFSET(in_content_light), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, ++ { "outdisplay", "Set output mastering display for H2H", OFFSET(out_master_display), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, ++ { "outlight", "Set output content light for H2H", OFFSET(out_content_light), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, + { NULL } + }; + +- + AVFILTER_DEFINE_CLASS(tonemap_vaapi); + + static const AVFilterPad tonemap_vaapi_inputs[] = { diff --git a/debian/patches/series b/debian/patches/series index 51f33e36975..507ebf96943 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -13,3 +13,4 @@ 0013-add-qsv-d3d11-opencl-interop.patch 0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch 0015-add-a-vaapi-hwupload-filter.patch +0016-add-fixes-for-the-broken-vaapi-tonemap.patch From 4f64ad1d23b366aa0c3b682284f696e68b46f5b5 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:41:48 +0800 Subject: [PATCH 26/41] add fixes for webvttenc when using segement muxer --- ...-webvttenc-when-using-segement-muxer.patch | 25 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 26 insertions(+) create mode 100644 debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch diff --git a/debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch b/debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch new file mode 100644 index 00000000000..7cb65ee8552 --- /dev/null +++ b/debian/patches/0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch @@ -0,0 +1,25 @@ +Index: jellyfin-ffmpeg/libavformat/webvttenc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavformat/webvttenc.c ++++ jellyfin-ffmpeg/libavformat/webvttenc.c +@@ -49,8 +49,8 @@ static int webvtt_write_header(AVFormatC + AVCodecParameters *par = ctx->streams[0]->codecpar; + AVIOContext *pb = ctx->pb; + +- if (ctx->nb_streams != 1 || par->codec_id != AV_CODEC_ID_WEBVTT) { +- av_log(ctx, AV_LOG_ERROR, "Exactly one WebVTT stream is needed.\n"); ++ if (par->codec_id != AV_CODEC_ID_WEBVTT) { ++ av_log(ctx, AV_LOG_ERROR, "First stream must be WebVTT.\n"); + return AVERROR(EINVAL); + } + +@@ -67,6 +67,9 @@ static int webvtt_write_packet(AVFormatC + buffer_size_t id_size, settings_size; + uint8_t *id, *settings; + ++ if (pkt->stream_index != 0) ++ return 0; ++ + avio_printf(pb, "\n"); + + id = av_packet_get_side_data(pkt, AV_PKT_DATA_WEBVTT_IDENTIFIER, diff --git a/debian/patches/series b/debian/patches/series index 507ebf96943..c0a2b490c41 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -14,3 +14,4 @@ 0014-add-vendor-opts-to-d3d11va-and-bgra-fmt-to-d3d11-dxv.patch 0015-add-a-vaapi-hwupload-filter.patch 0016-add-fixes-for-the-broken-vaapi-tonemap.patch +0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch From deb62788011fd15398fd6dc40ba4123a40a16b89 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:42:19 +0800 Subject: [PATCH 27/41] add fixes for nvdec exceed 32 surfaces error --- ...xes-for-nvdec-exceed-32-surfaces-error.patch | 17 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 18 insertions(+) create mode 100644 debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch diff --git a/debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch b/debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch new file mode 100644 index 00000000000..ed02508ae5f --- /dev/null +++ b/debian/patches/0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch @@ -0,0 +1,17 @@ +Index: jellyfin-ffmpeg/libavcodec/nvdec.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/nvdec.c ++++ jellyfin-ffmpeg/libavcodec/nvdec.c +@@ -303,8 +303,10 @@ static int nvdec_init_hwframes(AVCodecCo + frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data; + + if (dummy) { +- // Copied from ff_decode_get_hw_frames_ctx for compatibility +- frames_ctx->initial_pool_size += 3; ++ // The function above guarantees 1 work surface, We must guarantee 4 work surfaces. ++ // (the absolute minimum), so add the missing count without exceeding the maximum ++ // recommended for nvdec. ++ frames_ctx->initial_pool_size = FFMIN(frames_ctx->initial_pool_size + 3, 32); + + frames_ctx->free = nvdec_free_dummy; + frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy); diff --git a/debian/patches/series b/debian/patches/series index c0a2b490c41..7552128249c 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -15,3 +15,4 @@ 0015-add-a-vaapi-hwupload-filter.patch 0016-add-fixes-for-the-broken-vaapi-tonemap.patch 0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch +0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch From 719c36d688bd71c54b57d82f2c8bb77851140f15 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:42:40 +0800 Subject: [PATCH 28/41] add miscellaneous fixes for QSV from upstream --- ...llaneous-fixes-for-QSV-from-upstream.patch | 631 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 632 insertions(+) create mode 100644 debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch diff --git a/debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch b/debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch new file mode 100644 index 00000000000..fb0c7c97d2b --- /dev/null +++ b/debian/patches/0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch @@ -0,0 +1,631 @@ +Index: jellyfin-ffmpeg/libavcodec/qsv_internal.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsv_internal.h ++++ jellyfin-ffmpeg/libavcodec/qsv_internal.h +@@ -52,6 +52,8 @@ + + #define QSV_MAX_ENC_PAYLOAD 2 // # of mfxEncodeCtrl payloads supported + ++#define QSV_PAYLOAD_SIZE 1024 ++ + #define QSV_VERSION_ATLEAST(MAJOR, MINOR) \ + (MFX_VERSION_MAJOR > (MAJOR) || \ + MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR)) +Index: jellyfin-ffmpeg/libavcodec/qsvdec.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsvdec.c ++++ jellyfin-ffmpeg/libavcodec/qsvdec.c +@@ -38,14 +38,27 @@ + #include "libavutil/pixfmt.h" + #include "libavutil/time.h" + #include "libavutil/imgutils.h" ++#include "libavutil/stereo3d.h" + + #include "avcodec.h" + #include "internal.h" + #include "decode.h" + #include "hwconfig.h" ++#include "get_bits.h" + #include "qsv.h" ++#include "h264_sei.h" + #include "qsv_internal.h" + ++static const AVRational mfx_tb = { 1, 90000 }; ++ ++#define PTS_TO_MFX_PTS(pts, pts_tb) ((pts) == AV_NOPTS_VALUE ? \ ++ MFX_TIMESTAMP_UNKNOWN : pts_tb.num ? \ ++ av_rescale_q(pts, pts_tb, mfx_tb) : pts) ++ ++#define MFX_PTS_TO_PTS(mfx_pts, pts_tb) ((mfx_pts) == MFX_TIMESTAMP_UNKNOWN ? \ ++ AV_NOPTS_VALUE : pts_tb.num ? \ ++ av_rescale_q(mfx_pts, mfx_tb, pts_tb) : mfx_pts) ++ + typedef struct QSVContext { + // the session used for decoding + mfxSession session; +@@ -63,14 +76,13 @@ typedef struct QSVContext { + + AVFifoBuffer *async_fifo; + int zero_consume_run; +- int buffered_count; + int reinit_flag; + + enum AVPixelFormat orig_pix_fmt; + uint32_t fourcc; + mfxFrameInfo frame_info; + AVBufferPool *pool; +- ++ int suggest_pool_size; + int initialized; + + // options set by the caller +@@ -80,8 +92,13 @@ typedef struct QSVContext { + + char *load_plugins; + ++ mfxPayload payload; ++ + mfxExtBuffer **ext_buffers; + int nb_ext_buffers; ++ ++ H264SEIContext sei; ++ H264ParamSets ps; + } QSVContext; + + static const AVCodecHWConfigInternal *const qsv_hw_configs[] = { +@@ -218,6 +235,8 @@ static int qsv_decode_preinit(AVCodecCon + pix_fmt, /* system memory format obtained from bitstream parser */ + AV_PIX_FMT_NONE }; + ++ av_buffer_unref(&q->frames_ctx.mids_buf); ++ av_buffer_unref(&q->frames_ctx.hw_frames_ctx); + ret = ff_get_format(avctx, pix_fmts); + if (ret < 0) { + q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE; +@@ -255,7 +274,7 @@ static int qsv_decode_preinit(AVCodecCon + hwframes_ctx->height = FFALIGN(avctx->coded_height, 32); + hwframes_ctx->format = AV_PIX_FMT_QSV; + hwframes_ctx->sw_format = avctx->sw_pix_fmt; +- hwframes_ctx->initial_pool_size = 64 + avctx->extra_hw_frames; ++ hwframes_ctx->initial_pool_size = q->suggest_pool_size + 16 + avctx->extra_hw_frames; + frames_hwctx->frame_type = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET; + + ret = av_hwframe_ctx_init(avctx->hw_frames_ctx); +@@ -330,14 +349,15 @@ static int qsv_decode_header(AVCodecCont + mfxVideoParam *param) + { + int ret; +- ++ mfxExtVideoSignalInfo video_signal_info = { 0 }; ++ mfxExtBuffer *header_ext_params[1] = { (mfxExtBuffer *)&video_signal_info }; + mfxBitstream bs = { 0 }; + + if (avpkt->size) { + bs.Data = avpkt->data; + bs.DataLength = avpkt->size; + bs.MaxLength = bs.DataLength; +- bs.TimeStamp = avpkt->pts; ++ bs.TimeStamp = PTS_TO_MFX_PTS(avpkt->pts, avctx->pkt_timebase); + if (avctx->field_order == AV_FIELD_PROGRESSIVE) + bs.DataFlag |= MFX_BITSTREAM_COMPLETE_FRAME; + } else +@@ -355,6 +375,12 @@ static int qsv_decode_header(AVCodecCont + return ret; + + param->mfx.CodecId = ret; ++ video_signal_info.Header.BufferId = MFX_EXTBUFF_VIDEO_SIGNAL_INFO; ++ video_signal_info.Header.BufferSz = sizeof(video_signal_info); ++ // The SDK doesn't support other ext buffers when calling MFXVideoDECODE_DecodeHeader, ++ // so do not append this buffer to the existent buffer array ++ param->ExtParam = header_ext_params; ++ param->NumExtParam = 1; + ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param); + if (MFX_ERR_MORE_DATA == ret) { + return AVERROR(EAGAIN); +@@ -363,6 +389,17 @@ static int qsv_decode_header(AVCodecCont + return ff_qsv_print_error(avctx, ret, + "Error decoding stream header"); + ++ avctx->color_range = video_signal_info.VideoFullRange ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; ++ ++ if (video_signal_info.ColourDescriptionPresent) { ++ avctx->color_primaries = video_signal_info.ColourPrimaries; ++ avctx->color_trc = video_signal_info.TransferCharacteristics; ++ avctx->colorspace = video_signal_info.MatrixCoefficients; ++ } ++ ++ param->ExtParam = q->ext_buffers; ++ param->NumExtParam = q->nb_ext_buffers; ++ + return 0; + } + +@@ -381,13 +418,13 @@ static int alloc_frame(AVCodecContext *a + if (frame->frame->format == AV_PIX_FMT_QSV) { + frame->surface = *(mfxFrameSurface1*)frame->frame->data[3]; + } else { +- frame->surface.Info = q->frame_info; +- + frame->surface.Data.PitchLow = frame->frame->linesize[0]; + frame->surface.Data.Y = frame->frame->data[0]; + frame->surface.Data.UV = frame->frame->data[1]; + } + ++ frame->surface.Info = q->frame_info; ++ + if (q->frames_ctx.mids) { + ret = ff_qsv_find_surface_idx(&q->frames_ctx, frame); + if (ret < 0) +@@ -470,6 +507,147 @@ static QSVFrame *find_frame(QSVContext * + return NULL; + } + ++static int h264_decode_fpa(H264SEIFramePacking *fpa, AVFrame *frame) ++{ ++ if (!fpa || !frame) { ++ return AVERROR(EINVAL); ++ } ++ ++ if (!fpa->arrangement_cancel_flag && ++ fpa->arrangement_type <= 6 && ++ fpa->content_interpretation_type > 0 && ++ fpa->content_interpretation_type < 3) { ++ AVStereo3D *stereo = av_stereo3d_create_side_data(frame); ++ if (stereo) { ++ switch (fpa->arrangement_type) { ++ case 0: ++ stereo->type = AV_STEREO3D_CHECKERBOARD; ++ break; ++ case 1: ++ stereo->type = AV_STEREO3D_COLUMNS; ++ break; ++ case 2: ++ stereo->type = AV_STEREO3D_LINES; ++ break; ++ case 3: ++ if (fpa->quincunx_sampling_flag) ++ stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX; ++ else ++ stereo->type = AV_STEREO3D_SIDEBYSIDE; ++ break; ++ case 4: ++ stereo->type = AV_STEREO3D_TOPBOTTOM; ++ break; ++ case 5: ++ stereo->type = AV_STEREO3D_FRAMESEQUENCE; ++ if (fpa->current_frame_is_frame0_flag) ++ stereo->view = AV_STEREO3D_VIEW_LEFT; ++ else ++ stereo->view = AV_STEREO3D_VIEW_RIGHT; ++ break; ++ case 6: ++ stereo->type = AV_STEREO3D_2D; ++ break; ++ } ++ ++ if (fpa->content_interpretation_type == 2) ++ stereo->flags = AV_STEREO3D_FLAG_INVERT; ++ } ++ } ++ return 0; ++} ++ ++static int h264_parse_side_data(AVCodecContext *avctx, QSVContext *q, AVFrame *frame) ++{ ++ GetBitContext gb_payload; ++ uint8_t *sei_buffer; ++ int sei_buffer_index; ++ int ret; ++ ++ /* remove emulation prevention bytes */ ++ sei_buffer = (uint8_t *)av_mallocz(q->payload.NumBit / 8); ++ if (!sei_buffer) { ++ av_freep(&sei_buffer); ++ return AVERROR(ENOMEM); ++ } ++ sei_buffer_index = 0; ++ for (int i = 0; i < q->payload.NumBit / 8; i++) { ++ if (q->payload.Data[i] == 3) ++ i++; ++ sei_buffer[sei_buffer_index] = q->payload.Data[i]; ++ sei_buffer_index += 1; ++ } ++ ++ ret = init_get_bits8(&gb_payload, sei_buffer, sei_buffer_index+1); ++ if (ret < 0) { ++ av_freep(&sei_buffer); ++ return ret; ++ } ++ ++ ret = ff_h264_sei_decode(&q->sei, &gb_payload, &q->ps, avctx); ++ if (ret < 0) { ++ av_freep(&sei_buffer); ++ return ret; ++ } ++ ++ switch (q->payload.Type) { ++ case SEI_TYPE_FRAME_PACKING_ARRANGEMENT: ++ ret = h264_decode_fpa(&q->sei.frame_packing, frame); ++ break; ++ default: ++ break; ++ } ++ ++ av_freep(&sei_buffer); ++ return ret; ++} ++ ++static int extract_frame_side_data(AVCodecContext *avctx, QSVContext *q, AVFrame *frame) ++{ ++ mfxU64 ts; ++ mfxStatus sts; ++ int ret; ++ ++ if (q->payload.BufSize == 0) { ++ q->payload.Data = av_mallocz(QSV_PAYLOAD_SIZE); ++ if (!q->payload.Data) { ++ av_freep(&q->payload.Data); ++ return AVERROR(ENOMEM); ++ } ++ q->payload.BufSize = QSV_PAYLOAD_SIZE; ++ } ++ ++ sts = MFX_ERR_NONE; ++ while (sts == MFX_ERR_NONE) { ++ ++ sts = MFXVideoDECODE_GetPayload(q->session, &ts, &q->payload); ++ ++ if (sts == MFX_ERR_NOT_ENOUGH_BUFFER) { ++ av_log(avctx, AV_LOG_DEBUG, "Space for SEI is not enough. One SEI will be skipped\n"); ++ continue; ++ } else if (sts != MFX_ERR_NONE || q->payload.NumBit == 0) { ++ break; ++ } ++ ++ if (q->payload.Type != SEI_TYPE_FRAME_PACKING_ARRANGEMENT) ++ continue; ++ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ ret = h264_parse_side_data(avctx, q, frame); ++ break; ++ default: ++ break; ++ } ++ ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_WARNING, "parse side data failed\n"); ++ break; ++ } ++ } ++ return ret; ++} ++ + static int qsv_decode(AVCodecContext *avctx, QSVContext *q, + AVFrame *frame, int *got_frame, + const AVPacket *avpkt) +@@ -485,7 +663,7 @@ static int qsv_decode(AVCodecContext *av + bs.Data = avpkt->data; + bs.DataLength = avpkt->size; + bs.MaxLength = bs.DataLength; +- bs.TimeStamp = avpkt->pts; ++ bs.TimeStamp = PTS_TO_MFX_PTS(avpkt->pts, avctx->pkt_timebase); + if (avctx->field_order == AV_FIELD_PROGRESSIVE) + bs.DataFlag |= MFX_BITSTREAM_COMPLETE_FRAME; + } +@@ -510,6 +688,13 @@ static int qsv_decode(AVCodecContext *av + + } while (ret == MFX_WRN_DEVICE_BUSY || ret == MFX_ERR_MORE_SURFACE); + ++ if (ret == MFX_ERR_INCOMPATIBLE_VIDEO_PARAM) { ++ q->reinit_flag = 1; ++ av_log(avctx, AV_LOG_DEBUG, "Video parameter change\n"); ++ av_freep(&sync); ++ return 0; ++ } ++ + if (ret != MFX_ERR_NONE && + ret != MFX_ERR_MORE_DATA && + ret != MFX_WRN_VIDEO_PARAM_CHANGED && +@@ -526,8 +711,6 @@ static int qsv_decode(AVCodecContext *av + ++q->zero_consume_run; + if (q->zero_consume_run > 1) + ff_qsv_print_warning(avctx, ret, "A decode call did not consume any data"); +- } else if (!*sync && bs.DataOffset) { +- ++q->buffered_count; + } else { + q->zero_consume_run = 0; + } +@@ -542,7 +725,7 @@ static int qsv_decode(AVCodecContext *av + return AVERROR_BUG; + } + +- out_frame->queued = 1; ++ out_frame->queued += 1; + av_fifo_generic_write(q->async_fifo, &out_frame, sizeof(out_frame), NULL); + av_fifo_generic_write(q->async_fifo, &sync, sizeof(sync), NULL); + } else { +@@ -555,7 +738,7 @@ static int qsv_decode(AVCodecContext *av + + av_fifo_generic_read(q->async_fifo, &out_frame, sizeof(out_frame), NULL); + av_fifo_generic_read(q->async_fifo, &sync, sizeof(sync), NULL); +- out_frame->queued = 0; ++ out_frame->queued -= 1; + + if (avctx->pix_fmt != AV_PIX_FMT_QSV) { + do { +@@ -573,12 +756,16 @@ static int qsv_decode(AVCodecContext *av + + outsurf = &out_frame->surface; + ++ ret = extract_frame_side_data(avctx, q, frame); ++ if (ret < 0) ++ av_log(avctx, AV_LOG_WARNING, "Extracting side from packet failed\n"); ++ + #if FF_API_PKT_PTS + FF_DISABLE_DEPRECATION_WARNINGS + frame->pkt_pts = outsurf->Data.TimeStamp; + FF_ENABLE_DEPRECATION_WARNINGS + #endif +- frame->pts = outsurf->Data.TimeStamp; ++ frame->pts = MFX_PTS_TO_PTS(outsurf->Data.TimeStamp, avctx->pkt_timebase); + + frame->repeat_pict = + outsurf->Info.PicStruct & MFX_PICSTRUCT_FRAME_TRIPLING ? 4 : +@@ -635,6 +822,8 @@ static void qsv_decode_close_qsvcontext( + av_buffer_unref(&q->frames_ctx.hw_frames_ctx); + av_buffer_unref(&q->frames_ctx.mids_buf); + av_buffer_pool_uninit(&q->pool); ++ ++ av_freep(&q->payload.Data); + } + + static int qsv_process_data(AVCodecContext *avctx, QSVContext *q, +@@ -659,26 +848,37 @@ static int qsv_process_data(AVCodecConte + if (!avctx->coded_height) + avctx->coded_height = 720; + +- ret = qsv_decode_header(avctx, q, pkt, pix_fmt, ¶m); +- +- if (ret >= 0 && (q->orig_pix_fmt != ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) || +- avctx->coded_width != param.mfx.FrameInfo.Width || +- avctx->coded_height != param.mfx.FrameInfo.Height)) { ++ /* decode zero-size pkt to flush the buffered pkt before reinit */ ++ if (q->reinit_flag) { + AVPacket zero_pkt = {0}; ++ ret = qsv_decode(avctx, q, frame, got_frame, &zero_pkt); ++ if (ret < 0 || *got_frame) ++ return ret; ++ } ++ ++ if (q->reinit_flag || !q->session) { ++ mfxFrameAllocRequest request; ++ memset(&request, 0, sizeof(request)); + +- if (q->buffered_count) { +- q->reinit_flag = 1; +- /* decode zero-size pkt to flush the buffered pkt before reinit */ +- q->buffered_count--; +- return qsv_decode(avctx, q, frame, got_frame, &zero_pkt); +- } + q->reinit_flag = 0; ++ ret = qsv_decode_header(avctx, q, pkt, pix_fmt, ¶m); ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Error decoding header\n"); ++ goto reinit_fail; ++ } ++ param.IOPattern = q->iopattern; + + q->orig_pix_fmt = avctx->pix_fmt = pix_fmt = ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC); + + avctx->coded_width = param.mfx.FrameInfo.Width; + avctx->coded_height = param.mfx.FrameInfo.Height; + ++ ret = MFXVideoDECODE_QueryIOSurf(q->session, ¶m, &request); ++ if (ret < 0) ++ return ff_qsv_print_error(avctx, ret, "Error querying IO surface"); ++ ++ q->suggest_pool_size = request.NumFrameSuggested; ++ + ret = qsv_decode_preinit(avctx, q, pix_fmt, ¶m); + if (ret < 0) + goto reinit_fail; +@@ -782,6 +982,9 @@ static av_cold int qsv_decode_init(AVCod + goto fail; + } + ++ if (!avctx->pkt_timebase.num) ++ av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n"); ++ + return 0; + fail: + qsv_decode_close(avctx); +Index: jellyfin-ffmpeg/libavcodec/qsvenc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsvenc.c ++++ jellyfin-ffmpeg/libavcodec/qsvenc.c +@@ -448,7 +448,7 @@ static int init_video_param_jpeg(AVCodec + q->param.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; + q->param.mfx.FrameInfo.BitDepthLuma = desc->comp[0].depth; + q->param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth; +- q->param.mfx.FrameInfo.Shift = desc->comp[0].depth > 8; ++ q->param.mfx.FrameInfo.Shift = desc->comp[0].shift > 0; + + q->param.mfx.FrameInfo.Width = FFALIGN(avctx->width, 16); + q->param.mfx.FrameInfo.Height = FFALIGN(avctx->height, 16); +@@ -510,7 +510,7 @@ static int init_video_param(AVCodecConte + } + } + +- if (q->low_power) { ++ if (q->low_power == 1) { + #if QSV_HAVE_VDENC + q->param.mfx.LowPower = MFX_CODINGOPTION_ON; + #else +@@ -519,7 +519,9 @@ static int init_video_param(AVCodecConte + q->low_power = 0; + q->param.mfx.LowPower = MFX_CODINGOPTION_OFF; + #endif +- } else ++ } else if (q->low_power == -1) ++ q->param.mfx.LowPower = MFX_CODINGOPTION_UNKNOWN; ++ else + q->param.mfx.LowPower = MFX_CODINGOPTION_OFF; + + q->param.mfx.CodecProfile = q->profile; +@@ -527,7 +529,7 @@ static int init_video_param(AVCodecConte + q->param.mfx.GopPicSize = FFMAX(0, avctx->gop_size); + q->param.mfx.GopRefDist = FFMAX(-1, avctx->max_b_frames) + 1; + q->param.mfx.GopOptFlag = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? +- MFX_GOP_CLOSED : 0; ++ MFX_GOP_CLOSED : MFX_GOP_STRICT; + q->param.mfx.IdrInterval = q->idr_interval; + q->param.mfx.NumSlice = avctx->slices; + q->param.mfx.NumRefFrame = FFMAX(0, avctx->refs); +@@ -550,7 +552,7 @@ static int init_video_param(AVCodecConte + !desc->log2_chroma_w + !desc->log2_chroma_h; + q->param.mfx.FrameInfo.BitDepthLuma = desc->comp[0].depth; + q->param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth; +- q->param.mfx.FrameInfo.Shift = desc->comp[0].depth > 8; ++ q->param.mfx.FrameInfo.Shift = desc->comp[0].shift > 0; + + // If the minor version is greater than or equal to 19, + // then can use the same alignment settings as H.264 for HEVC +@@ -646,7 +648,7 @@ static int init_video_param(AVCodecConte + case MFX_RATECONTROL_LA_ICQ: + q->extco2.LookAheadDepth = q->look_ahead_depth; + case MFX_RATECONTROL_ICQ: +- q->param.mfx.ICQQuality = avctx->global_quality; ++ q->param.mfx.ICQQuality = av_clip(avctx->global_quality, 1, 51); + break; + #endif + #endif +@@ -804,6 +806,24 @@ FF_ENABLE_DEPRECATION_WARNINGS + } + #endif + ++ q->extvsi.VideoFullRange = (avctx->color_range == AVCOL_RANGE_JPEG); ++ q->extvsi.ColourDescriptionPresent = 0; ++ ++ if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED || ++ avctx->color_trc != AVCOL_TRC_UNSPECIFIED || ++ avctx->colorspace != AVCOL_SPC_UNSPECIFIED) { ++ q->extvsi.ColourDescriptionPresent = 1; ++ q->extvsi.ColourPrimaries = avctx->color_primaries; ++ q->extvsi.TransferCharacteristics = avctx->color_trc; ++ q->extvsi.MatrixCoefficients = avctx->colorspace; ++ } ++ ++ if (q->extvsi.VideoFullRange || q->extvsi.ColourDescriptionPresent) { ++ q->extvsi.Header.BufferId = MFX_EXTBUFF_VIDEO_SIGNAL_INFO; ++ q->extvsi.Header.BufferSz = sizeof(q->extvsi); ++ q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extvsi; ++ } ++ + if (!check_enc_param(avctx,q)) { + av_log(avctx, AV_LOG_ERROR, + "some encoding parameters are not supported by the QSV " +@@ -1250,6 +1270,8 @@ static void clear_unused_frames(QSVEncCo + while (cur) { + if (cur->used && !cur->surface.Data.Locked) { + free_encoder_ctrl_payloads(&cur->enc_ctrl); ++ //do not reuse enc_ctrl from previous frame ++ memset(&cur->enc_ctrl, 0, sizeof(cur->enc_ctrl)); + if (cur->frame->format == AV_PIX_FMT_QSV) { + av_frame_unref(cur->frame); + } +Index: jellyfin-ffmpeg/libavcodec/qsvenc.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsvenc.h ++++ jellyfin-ffmpeg/libavcodec/qsvenc.h +@@ -96,7 +96,7 @@ + { "adaptive_b", "Adaptive B-frame placement", OFFSET(qsv.adaptive_b), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ + { "b_strategy", "Strategy to choose between I/P/B-frames", OFFSET(qsv.b_strategy), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ + { "forced_idr", "Forcing I frames as IDR frames", OFFSET(qsv.forced_idr), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, \ +-{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = 0}, 0, 1, VE},\ ++{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE},\ + + extern const AVCodecHWConfigInternal *const ff_qsv_enc_hw_configs[]; + +@@ -139,7 +139,9 @@ typedef struct QSVEncContext { + mfxFrameSurface1 **opaque_surfaces; + AVBufferRef *opaque_alloc_buf; + +- mfxExtBuffer *extparam_internal[2 + QSV_HAVE_CO2 + QSV_HAVE_CO3 + (QSV_HAVE_MF * 2)]; ++ mfxExtVideoSignalInfo extvsi; ++ ++ mfxExtBuffer *extparam_internal[3 + QSV_HAVE_CO2 + QSV_HAVE_CO3 + (QSV_HAVE_MF * 2)]; + int nb_extparam_internal; + + mfxExtBuffer **extparam; +Index: jellyfin-ffmpeg/libavfilter/qsvvpp.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.c ++++ jellyfin-ffmpeg/libavfilter/qsvvpp.c +@@ -488,9 +488,6 @@ static QSVFrame *query_frame(QSVVPPConte + if (!out_frame->frame) + return NULL; + +- out_frame->frame->width = outlink->w; +- out_frame->frame->height = outlink->h; +- + ret = map_frame_to_surface(out_frame->frame, + &out_frame->surface_internal); + if (ret < 0) +@@ -499,6 +496,8 @@ static QSVFrame *query_frame(QSVVPPConte + out_frame->surface = &out_frame->surface_internal; + } + ++ out_frame->frame->width = outlink->w; ++ out_frame->frame->height = outlink->h; + out_frame->surface->Info = s->vpp_param.vpp.Out; + + return out_frame; +Index: jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_qsv.c ++++ jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c +@@ -275,7 +275,7 @@ static mfxStatus frame_get_hdl(mfxHDL pt + return MFX_ERR_NONE; + } + +-static int init_out_session(AVFilterContext *ctx) ++static int init_out_session(AVFilterContext *ctx, int in_width, int in_height) + { + + QSVScaleContext *s = ctx->priv; +@@ -392,8 +392,11 @@ static int init_out_session(AVFilterCont + sizeof(*s->mem_ids_in)); + if (!s->mem_ids_in) + return AVERROR(ENOMEM); +- for (i = 0; i < in_frames_hwctx->nb_surfaces; i++) ++ for (i = 0; i < in_frames_hwctx->nb_surfaces; i++) { + s->mem_ids_in[i] = in_frames_hwctx->surfaces[i].Data.MemId; ++ in_frames_hwctx->surfaces[i].Info.CropW = in_width; ++ in_frames_hwctx->surfaces[i].Info.CropH = in_height; ++ } + s->nb_mem_ids_in = in_frames_hwctx->nb_surfaces; + + s->mem_ids_out = av_mallocz_array(out_frames_hwctx->nb_surfaces, +@@ -465,7 +468,7 @@ static int init_scale_session(AVFilterCo + if (ret < 0) + return ret; + +- ret = init_out_session(ctx); ++ ret = init_out_session(ctx, in_width, in_height); + if (ret < 0) + return ret; + +Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +@@ -404,7 +404,7 @@ static int qsv_init_surface(AVHWFramesCo + + surf->Info.BitDepthLuma = desc->comp[0].depth; + surf->Info.BitDepthChroma = desc->comp[0].depth; +- surf->Info.Shift = desc->comp[0].depth > 8; ++ surf->Info.Shift = desc->comp[0].shift > 0; + + if (desc->log2_chroma_w && desc->log2_chroma_h) + surf->Info.ChromaFormat = MFX_CHROMAFORMAT_YUV420; diff --git a/debian/patches/series b/debian/patches/series index 7552128249c..791665e7b4f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -16,3 +16,4 @@ 0016-add-fixes-for-the-broken-vaapi-tonemap.patch 0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch 0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch +0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch From cf26dfc77aa44ff706837a6a966e74a7a2c79eda Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:45:55 +0800 Subject: [PATCH 29/41] add miscellaneous fixes for NV from upstream --- ...ellaneous-fixes-for-NV-from-upstream.patch | 79 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 80 insertions(+) create mode 100644 debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch diff --git a/debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch b/debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch new file mode 100644 index 00000000000..116e456aab3 --- /dev/null +++ b/debian/patches/0020-add-miscellaneous-fixes-for-NV-from-upstream.patch @@ -0,0 +1,79 @@ +Index: jellyfin-ffmpeg/libavcodec/cuviddec.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/cuviddec.c ++++ jellyfin-ffmpeg/libavcodec/cuviddec.c +@@ -336,7 +336,8 @@ static int CUDAAPI cuvid_handle_picture_ + + av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n"); + +- ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag; ++ if(picparams->intra_pic_flag) ++ ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag; + + ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams)); + if (ctx->internal_error < 0) +@@ -593,6 +594,8 @@ static int cuvid_output_frame(AVCodecCon + } + + frame->key_frame = ctx->key_frame[parsed_frame.dispinfo.picture_index]; ++ ctx->key_frame[parsed_frame.dispinfo.picture_index] = 0; ++ + frame->width = avctx->width; + frame->height = avctx->height; + if (avctx->pkt_timebase.num && avctx->pkt_timebase.den) +Index: jellyfin-ffmpeg/libavcodec/nvenc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/nvenc.c ++++ jellyfin-ffmpeg/libavcodec/nvenc.c +@@ -210,8 +210,14 @@ static void nvenc_map_preset(NvencContex + + static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level) + { +-#if NVENCAPI_CHECK_VERSION(11, 1) ++#if NVENCAPI_CHECK_VERSION(11, 2) + const char *minver = "(unknown)"; ++#elif NVENCAPI_CHECK_VERSION(11, 1) ++# if defined(_WIN32) || defined(__CYGWIN__) ++ const char *minver = "471.41"; ++# else ++ const char *minver = "470.57.02"; ++# endif + #elif NVENCAPI_CHECK_VERSION(11, 0) + # if defined(_WIN32) || defined(__CYGWIN__) + const char *minver = "456.71"; +@@ -1053,7 +1059,7 @@ static av_cold int nvenc_setup_h264_conf + || vui->videoFullRangeFlag != 0); + + h264->sliceMode = 3; +- h264->sliceModeData = 1; ++ h264->sliceModeData = avctx->slices > 0 ? avctx->slices : 1; + + h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; + h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1; +@@ -1150,7 +1156,7 @@ static av_cold int nvenc_setup_hevc_conf + || vui->videoFullRangeFlag != 0); + + hevc->sliceMode = 3; +- hevc->sliceModeData = 1; ++ hevc->sliceModeData = avctx->slices > 0 ? avctx->slices : 1; + + hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; + hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1; +Index: jellyfin-ffmpeg/libavfilter/vf_yadif_cuda.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_yadif_cuda.c ++++ jellyfin-ffmpeg/libavfilter/vf_yadif_cuda.c +@@ -297,10 +297,9 @@ static int config_output(AVFilterLink *l + goto exit; + } + +- link->time_base.num = ctx->inputs[0]->time_base.num; +- link->time_base.den = ctx->inputs[0]->time_base.den * 2; +- link->w = ctx->inputs[0]->w; +- link->h = ctx->inputs[0]->h; ++ link->time_base = av_mul_q(ctx->inputs[0]->time_base, (AVRational){1, 2}); ++ link->w = ctx->inputs[0]->w; ++ link->h = ctx->inputs[0]->h; + + if(y->mode & 1) + link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate, diff --git a/debian/patches/series b/debian/patches/series index 791665e7b4f..8c3e62c5e1e 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -17,3 +17,4 @@ 0017-add-fixes-for-webvttenc-when-using-segement-muxer.patch 0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch 0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch +0020-add-miscellaneous-fixes-for-NV-from-upstream.patch From aeb8ea2dc001f6400c119d7920d5ee616f5edd94 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Wed, 10 Nov 2021 21:45:24 +0800 Subject: [PATCH 30/41] add qsv dec support for yuv444 8/10bit --- ...d-qsv-dec-support-for-yuv444-8-10bit.patch | 369 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 370 insertions(+) create mode 100644 debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch diff --git a/debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch b/debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch new file mode 100644 index 00000000000..4978f80d5e9 --- /dev/null +++ b/debian/patches/0021-add-qsv-dec-support-for-yuv444-8-10bit.patch @@ -0,0 +1,369 @@ +Index: jellyfin-ffmpeg/libavcodec/qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsv.c ++++ jellyfin-ffmpeg/libavcodec/qsv.c +@@ -201,10 +201,14 @@ enum AVPixelFormat ff_qsv_map_fourcc(uin + case MFX_FOURCC_NV12: return AV_PIX_FMT_NV12; + case MFX_FOURCC_P010: return AV_PIX_FMT_P010; + case MFX_FOURCC_P8: return AV_PIX_FMT_PAL8; +-#if CONFIG_VAAPI ++#if CONFIG_VAAPI || CONFIG_D3D11VA + case MFX_FOURCC_YUY2: return AV_PIX_FMT_YUYV422; ++#if QSV_VERSION_ATLEAST(1, 17) ++ case MFX_FOURCC_AYUV: return AV_PIX_FMT_0YUV; ++#endif + #if QSV_VERSION_ATLEAST(1, 27) + case MFX_FOURCC_Y210: return AV_PIX_FMT_Y210; ++ case MFX_FOURCC_Y410: return AV_PIX_FMT_Y410; + #endif + #endif + } +@@ -223,16 +227,26 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat + case AV_PIX_FMT_P010: + *fourcc = MFX_FOURCC_P010; + return AV_PIX_FMT_P010; +-#if CONFIG_VAAPI ++#if CONFIG_VAAPI || CONFIG_D3D11VA + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUYV422: + *fourcc = MFX_FOURCC_YUY2; + return AV_PIX_FMT_YUYV422; ++#if QSV_VERSION_ATLEAST(1, 17) ++ case AV_PIX_FMT_0YUV: ++ case AV_PIX_FMT_YUV444P: ++ *fourcc = MFX_FOURCC_AYUV; ++ return AV_PIX_FMT_0YUV; ++#endif + #if QSV_VERSION_ATLEAST(1, 27) + case AV_PIX_FMT_YUV422P10: + case AV_PIX_FMT_Y210: + *fourcc = MFX_FOURCC_Y210; + return AV_PIX_FMT_Y210; ++ case AV_PIX_FMT_Y410: ++ case AV_PIX_FMT_YUV444P10: ++ *fourcc = MFX_FOURCC_Y410; ++ return AV_PIX_FMT_Y410; + #endif + #endif + default: +Index: jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_d3d11va.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_d3d11va.c +@@ -85,6 +85,10 @@ static const struct { + } supported_formats[] = { + { DXGI_FORMAT_NV12, AV_PIX_FMT_NV12 }, + { DXGI_FORMAT_P010, AV_PIX_FMT_P010 }, ++ { DXGI_FORMAT_AYUV, AV_PIX_FMT_0YUV }, ++ { DXGI_FORMAT_YUY2, AV_PIX_FMT_YUYV422 }, ++ { DXGI_FORMAT_Y210, AV_PIX_FMT_Y210 }, ++ { DXGI_FORMAT_Y410, AV_PIX_FMT_Y410 }, + { DXGI_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA }, + // Special opaque formats. The pix_fmt is merely a place holder, as the + // opaque format cannot be accessed directly. +Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +@@ -100,12 +100,18 @@ static const struct { + { AV_PIX_FMT_BGRA, MFX_FOURCC_RGB4 }, + { AV_PIX_FMT_P010, MFX_FOURCC_P010 }, + { AV_PIX_FMT_PAL8, MFX_FOURCC_P8 }, +-#if CONFIG_VAAPI ++#if CONFIG_VAAPI || CONFIG_D3D11VA + { AV_PIX_FMT_YUYV422, + MFX_FOURCC_YUY2 }, ++#if QSV_VERSION_ATLEAST(1, 17) ++ { AV_PIX_FMT_0YUV, ++ MFX_FOURCC_AYUV }, ++#endif + #if QSV_VERSION_ATLEAST(1, 27) + { AV_PIX_FMT_Y210, + MFX_FOURCC_Y210 }, ++ { AV_PIX_FMT_Y410, ++ MFX_FOURCC_Y410 }, + #endif + #endif + }; +@@ -919,7 +925,7 @@ static int map_frame_to_surface(const AV + surface->Data.R = frame->data[0] + 2; + surface->Data.A = frame->data[0] + 3; + break; +-#if CONFIG_VAAPI ++#if CONFIG_VAAPI || CONFIG_D3D11VA + case AV_PIX_FMT_YUYV422: + surface->Data.Y = frame->data[0]; + surface->Data.U = frame->data[0] + 1; +@@ -931,6 +937,15 @@ static int map_frame_to_surface(const AV + surface->Data.U16 = (mfxU16 *)frame->data[0] + 1; + surface->Data.V16 = (mfxU16 *)frame->data[0] + 3; + break; ++ case AV_PIX_FMT_0YUV: ++ surface->Data.V = frame->data[0]; ++ surface->Data.U = frame->data[0] + 1; ++ surface->Data.Y = frame->data[0] + 2; ++ surface->Data.A = frame->data[0] + 3; ++ break; ++ case AV_PIX_FMT_Y410: ++ surface->Data.U = frame->data[0]; ++ break; + #endif + default: + return MFX_ERR_UNSUPPORTED; +Index: jellyfin-ffmpeg/libavutil/pixdesc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/pixdesc.c ++++ jellyfin-ffmpeg/libavutil/pixdesc.c +@@ -228,6 +228,41 @@ static const AVPixFmtDescriptor av_pix_f + }, + .flags = AV_PIX_FMT_FLAG_BE, + }, ++ [AV_PIX_FMT_0YUV] = { ++ .name = "0yuv", ++ .nb_components = 3, ++ .log2_chroma_w = 0, ++ .log2_chroma_h = 0, ++ .comp = { ++ { 0, 4, 1, 0, 8 }, /* Y */ ++ { 0, 4, 2, 0, 8 }, /* U */ ++ { 0, 4, 3, 0, 8 }, /* V */ ++ }, ++ }, ++ [AV_PIX_FMT_Y410LE] = { ++ .name = "y410le", ++ .nb_components = 3, ++ .log2_chroma_w = 0, ++ .log2_chroma_h = 0, ++ .comp = { ++ { 0, 32, 10, 0, 10 }, /* Y */ ++ { 0, 32, 0, 0, 10 }, /* U */ ++ { 0, 32, 20, 0, 10 }, /* V */ ++ }, ++ .flags = AV_PIX_FMT_FLAG_ALPHA | AV_PIX_FMT_FLAG_BITSTREAM, ++ }, ++ [AV_PIX_FMT_Y410BE] = { ++ .name = "y410be", ++ .nb_components = 3, ++ .log2_chroma_w = 0, ++ .log2_chroma_h = 0, ++ .comp = { ++ { 0, 32, 10, 0, 10 }, /* Y */ ++ { 0, 32, 0, 0, 10 }, /* U */ ++ { 0, 32, 20, 0, 10 }, /* V */ ++ }, ++ .flags = AV_PIX_FMT_FLAG_ALPHA | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_BE, ++ }, + [AV_PIX_FMT_RGB24] = { + .name = "rgb24", + .nb_components = 3, +Index: jellyfin-ffmpeg/libavutil/pixfmt.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/pixfmt.h ++++ jellyfin-ffmpeg/libavutil/pixfmt.h +@@ -358,6 +358,10 @@ enum AVPixelFormat { + AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian + AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian + ++ AV_PIX_FMT_0YUV, ///< packed YUV 4:4:4, 32bpp, X Y Cb Cr, X=unused/undefined ++ AV_PIX_FMT_Y410LE, ///< packed YUV 4:4:4, 32bpp, Cr Y Cb A, little-endian ++ AV_PIX_FMT_Y410BE, ///< packed YUV 4:4:4, 32bpp, Cr Y Cb A, big-endian ++ + AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined + AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions +@@ -449,6 +453,7 @@ enum AVPixelFormat { + #define AV_PIX_FMT_P016 AV_PIX_FMT_NE(P016BE, P016LE) + + #define AV_PIX_FMT_Y210 AV_PIX_FMT_NE(Y210BE, Y210LE) ++#define AV_PIX_FMT_Y410 AV_PIX_FMT_NE(Y410BE, Y410LE) + #define AV_PIX_FMT_X2RGB10 AV_PIX_FMT_NE(X2RGB10BE, X2RGB10LE) + + /** +Index: jellyfin-ffmpeg/libswscale/input.c +=================================================================== +--- jellyfin-ffmpeg.orig/libswscale/input.c ++++ jellyfin-ffmpeg/libswscale/input.c +@@ -573,6 +573,25 @@ static void y210le_Y_c(uint8_t *dst, con + AV_WN16(dst + i * 2, AV_RL16(src + i * 4) >> 6); + } + ++static void XyuvToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, ++ uint32_t *unused) ++{ ++ int i; ++ for (i = 0; i < width; i++) ++ dst[i] = src[4 * i + 2]; ++} ++ ++static void XyuvToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, ++ const uint8_t *src2, int width, uint32_t *unused) ++{ ++ int i; ++ for (i = 0; i < width; i++) { ++ dstV[i] = src1[4 * i]; ++ dstU[i] = src1[4 * i + 1]; ++ } ++ av_assert1(src1 == src2); ++} ++ + static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, + uint32_t *unused) + { +@@ -1257,6 +1276,9 @@ av_cold void ff_sws_init_input_funcs(Sws + case AV_PIX_FMT_Y210LE: + c->chrToYV12 = y210le_UV_c; + break; ++ case AV_PIX_FMT_0YUV: ++ c->lumToYV12 = XyuvToY_c; ++ break; + } + if (c->chrSrcHSubSample) { + switch (srcFormat) { +@@ -1708,6 +1730,9 @@ av_cold void ff_sws_init_input_funcs(Sws + case AV_PIX_FMT_Y210LE: + c->lumToYV12 = y210le_Y_c; + break; ++ case AV_PIX_FMT_0YUV: ++ c->lumToYV12 = XyuvToY_c; ++ break; + case AV_PIX_FMT_X2RGB10LE: + c->lumToYV12 =rgb30leToY_c; + break; +Index: jellyfin-ffmpeg/libswscale/output.c +=================================================================== +--- jellyfin-ffmpeg.orig/libswscale/output.c ++++ jellyfin-ffmpeg/libswscale/output.c +@@ -2492,6 +2492,53 @@ yuv2ya8_X_c(SwsContext *c, const int16_t + } + + static void ++yuv2Xyuv_X_c(SwsContext *c, const int16_t *lumFilter, ++ const int16_t **lumSrc, int lumFilterSize, ++ const int16_t *chrFilter, const int16_t **chrUSrc, ++ const int16_t **chrVSrc, int chrFilterSize, ++ const int16_t **alpSrc, uint8_t *dest, int dstW, int y) ++{ ++ int hasAlpha = !!alpSrc; ++ int i; ++ ++ for (i = 0; i < dstW; i++) { ++ int j; ++ int A = 1 << 18; ++ int Y = 1 << 18; ++ int U = 1 << 18; ++ int V = 1 << 18; ++ ++ for (j = 0; j < lumFilterSize; j++) { ++ Y += lumSrc[j][i] * lumFilter[j]; ++ } ++ for (j = 0; j < chrFilterSize; j++) { ++ U += chrUSrc[j][i] * chrFilter[j]; ++ V += chrVSrc[j][i] * chrFilter[j]; ++ } ++ if (hasAlpha) ++ for (j = 0; j < lumFilterSize; j++) ++ A += alpSrc[j][i] * lumFilter[j]; ++ A >>= 19; ++ Y >>= 19; ++ U >>= 19; ++ V >>= 19; ++ A = hasAlpha ? A : 255; ++ ++ if ((A | Y | U | V) & 0x100) { ++ A = av_clip_uint8(A); ++ Y = av_clip_uint8(Y); ++ U = av_clip_uint8(U); ++ V = av_clip_uint8(V); ++ } ++ ++ dest[4*i] = V; ++ dest[4*i + 1] = U; ++ dest[4*i + 2] = Y; ++ dest[4*i + 3] = A; ++ } ++} ++ ++static void + yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **_lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **_chrUSrc, +@@ -3033,6 +3080,9 @@ av_cold void ff_sws_init_output_funcs(Sw + *yuv2packed2 = yuv2ya16be_2_c; + *yuv2packedX = yuv2ya16be_X_c; + break; ++ case AV_PIX_FMT_0YUV: ++ *yuv2packedX = yuv2Xyuv_X_c; ++ break; + case AV_PIX_FMT_AYUV64LE: + *yuv2packedX = yuv2ayuv64le_X_c; + break; +Index: jellyfin-ffmpeg/libswscale/swscale_unscaled.c +=================================================================== +--- jellyfin-ffmpeg.orig/libswscale/swscale_unscaled.c ++++ jellyfin-ffmpeg/libswscale/swscale_unscaled.c +@@ -403,6 +403,41 @@ static int yuyvToYuv422Wrapper(SwsContex + return srcSliceH; + } + ++static void yuv444pTo0yuv(const uint8_t *src[], int srcStride[], ++ uint8_t *dst, int dstStride, int srcSliceH, int width) ++{ ++ int x, h, i; ++ for (h = 0; h < srcSliceH; h++) { ++ uint8_t *dest = dst + dstStride * h; ++ ++ for (x = 0; x < width; x++) { ++ *dest++ = src[2][x]; ++ *dest++ = src[1][x]; ++ *dest++ = src[0][x]; ++ *dest++ = 0xFF; ++ } ++ ++ for (i = 0; i < 3; i++) ++ src[i] += srcStride[i]; ++ } ++} ++ ++ ++static int yuv444pTo0yuvWrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dstParam[], int dstStride[]) ++{ ++ uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; ++ ++ const uint8_t *source[] = { src[0], src[1], src[2] }; ++ int stride[] = { srcStride[0], srcStride[1], srcStride[2] }; ++ ++ yuv444pTo0yuv(source, stride, dst + srcSliceY * dstStride[0], dstStride[0], ++ srcSliceH, c->srcW); ++ ++ return srcSliceH; ++} ++ + static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dstParam[], int dstStride[]) +@@ -2170,6 +2205,11 @@ void ff_get_unscaled_swscale(SwsContext + c->swscale = yuv422pToUyvyWrapper; + } + ++ if (srcFormat == AV_PIX_FMT_YUV444P) { ++ if (dstFormat == AV_PIX_FMT_0YUV) ++ c->swscale = yuv444pTo0yuvWrapper; ++ } ++ + /* uint Y to float Y */ + if (srcFormat == AV_PIX_FMT_GRAY8 && dstFormat == AV_PIX_FMT_GRAYF32){ + c->swscale = uint_y_to_float_y_wrapper; +Index: jellyfin-ffmpeg/libswscale/utils.c +=================================================================== +--- jellyfin-ffmpeg.orig/libswscale/utils.c ++++ jellyfin-ffmpeg/libswscale/utils.c +@@ -271,6 +271,7 @@ static const FormatEntry format_entries[ + [AV_PIX_FMT_NV24] = { 1, 1 }, + [AV_PIX_FMT_NV42] = { 1, 1 }, + [AV_PIX_FMT_Y210LE] = { 1, 0 }, ++ [AV_PIX_FMT_0YUV] = { 1, 1 }, + [AV_PIX_FMT_X2RGB10LE] = { 1, 1 }, + }; + diff --git a/debian/patches/series b/debian/patches/series index 8c3e62c5e1e..6806f4027a4 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -18,3 +18,4 @@ 0018-add-fixes-for-nvdec-exceed-32-surfaces-error.patch 0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch 0020-add-miscellaneous-fixes-for-NV-from-upstream.patch +0021-add-qsv-dec-support-for-yuv444-8-10bit.patch From e19e1e95b267d9b2aaf8f8d404490787f728821a Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:47:00 +0800 Subject: [PATCH 31/41] add fixes for warning on overlay filters --- ...dd-fixes-for-warning-on-overlay-filters.patch | 16 ++++++++++++++++ debian/patches/series | 1 + 2 files changed, 17 insertions(+) create mode 100644 debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch diff --git a/debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch b/debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch new file mode 100644 index 00000000000..4133e07a5e9 --- /dev/null +++ b/debian/patches/0022-add-fixes-for-warning-on-overlay-filters.patch @@ -0,0 +1,16 @@ +Index: jellyfin-ffmpeg/libavfilter/buffersrc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/buffersrc.c ++++ jellyfin-ffmpeg/libavfilter/buffersrc.c +@@ -69,9 +69,9 @@ typedef struct BufferSourceContext { + + #define CHECK_VIDEO_PARAM_CHANGE(s, c, width, height, format, pts)\ + if (c->w != width || c->h != height || c->pix_fmt != format) {\ +- av_log(s, AV_LOG_INFO, "filter context - w: %d h: %d fmt: %d, incoming frame - w: %d h: %d fmt: %d pts_time: %s\n",\ ++ av_log(s, AV_LOG_DEBUG, "filter context - w: %d h: %d fmt: %d, incoming frame - w: %d h: %d fmt: %d pts_time: %s\n",\ + c->w, c->h, c->pix_fmt, width, height, format, av_ts2timestr(pts, &s->outputs[0]->time_base));\ +- av_log(s, AV_LOG_WARNING, "Changing video frame properties on the fly is not supported by all filters.\n");\ ++ av_log(s, AV_LOG_DEBUG, "Changing video frame properties on the fly is not supported by all filters.\n");\ + } + + #define CHECK_AUDIO_PARAM_CHANGE(s, c, srate, ch_layout, ch_count, format, pts)\ diff --git a/debian/patches/series b/debian/patches/series index 6806f4027a4..971e851876f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -19,3 +19,4 @@ 0019-add-miscellaneous-fixes-for-QSV-from-upstream.patch 0020-add-miscellaneous-fixes-for-NV-from-upstream.patch 0021-add-qsv-dec-support-for-yuv444-8-10bit.patch +0022-add-fixes-for-warning-on-overlay-filters.patch From 3f558dd3b2fc5c2ee7ae6f038dcc0205a326c365 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:47:19 +0800 Subject: [PATCH 32/41] add fixes for HEVC 10bit HDR decoding in bsf --- ...-for-HEVC-10-bit-HDR-decoding-in-bsf.patch | 28 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 29 insertions(+) create mode 100644 debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch diff --git a/debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch b/debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch new file mode 100644 index 00000000000..3e8f4a2b2bf --- /dev/null +++ b/debian/patches/0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch @@ -0,0 +1,28 @@ +Index: jellyfin-ffmpeg/libavcodec/hevc_mp4toannexb_bsf.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/hevc_mp4toannexb_bsf.c ++++ jellyfin-ffmpeg/libavcodec/hevc_mp4toannexb_bsf.c +@@ -121,7 +121,7 @@ static int hevc_mp4toannexb_filter(AVBSF + HEVCBSFContext *s = ctx->priv_data; + AVPacket *in; + GetByteContext gb; +- ++ int has_sps = 0, has_pps = 0; + int got_irap = 0; + int i, ret = 0; + +@@ -155,10 +155,13 @@ static int hevc_mp4toannexb_filter(AVBSF + } + + nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f; ++ has_sps = (has_sps || nalu_type == HEVC_NAL_SPS); ++ has_pps = (has_pps || nalu_type == HEVC_NAL_PPS); + + /* prepend extradata to IRAP frames */ + is_irap = nalu_type >= 16 && nalu_type <= 23; +- add_extradata = is_irap && !got_irap; ++ /* ignore the extradata if IRAP frame has sps and pps */ ++ add_extradata = is_irap && !got_irap && !(has_sps && has_pps); + extra_size = add_extradata * ctx->par_out->extradata_size; + got_irap |= is_irap; + diff --git a/debian/patches/series b/debian/patches/series index 971e851876f..fc58f4b6987 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -20,3 +20,4 @@ 0020-add-miscellaneous-fixes-for-NV-from-upstream.patch 0021-add-qsv-dec-support-for-yuv444-8-10bit.patch 0022-add-fixes-for-warning-on-overlay-filters.patch +0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch From b6a47d815974b0e8714b201170765a7fb5096d34 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:47:37 +0800 Subject: [PATCH 33/41] add sub2video option to subtitles filter --- ...sub2video-option-to-subtitles-filter.patch | 111 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 112 insertions(+) create mode 100644 debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch diff --git a/debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch b/debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch new file mode 100644 index 00000000000..87dfc2e4490 --- /dev/null +++ b/debian/patches/0024-add-sub2video-option-to-subtitles-filter.patch @@ -0,0 +1,111 @@ +Index: jellyfin-ffmpeg/libavfilter/vf_subtitles.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_subtitles.c ++++ jellyfin-ffmpeg/libavfilter/vf_subtitles.c +@@ -55,10 +55,13 @@ typedef struct AssContext { + char *force_style; + int stream_index; + int alpha; ++ int sub2video; ++ int last_image; + uint8_t rgba_map[4]; + int pix_step[4]; ///< steps per pixel for each plane of the main output + int original_w, original_h; + int shaping; ++ int64_t max_pts, max_ts_ms; + FFDrawContext draw; + } AssContext; + +@@ -70,7 +73,8 @@ typedef struct AssContext { + {"f", "set the filename of file to read", OFFSET(filename), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, \ + {"original_size", "set the size of the original video (used to scale fonts)", OFFSET(original_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS }, \ + {"fontsdir", "set the directory containing the fonts to read", OFFSET(fontsdir), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, \ +- {"alpha", "enable processing of alpha channel", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, \ ++ {"alpha", "enable processing of alpha channel", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, \ ++ {"sub2video", "enable textual subtitle to video mode", OFFSET(sub2video), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, \ + + /* libass supports a log level ranging from 0 to 7 */ + static const int ass_libavfilter_log_level_map[] = { +@@ -151,6 +155,8 @@ static int config_input(AVFilterLink *in + if (ass->shaping != -1) + ass_set_shaper(ass->renderer, ass->shaping); + ++ ass->max_pts = ass->max_ts_ms / (av_q2d(inlink->time_base) * 1000); ++ + return 0; + } + +@@ -181,18 +187,41 @@ static int filter_frame(AVFilterLink *in + AVFilterLink *outlink = ctx->outputs[0]; + AssContext *ass = ctx->priv; + int detect_change = 0; +- double time_ms = picref->pts * av_q2d(inlink->time_base) * 1000; ++ int64_t time_ms = picref->pts * av_q2d(inlink->time_base) * 1000; + ASS_Image *image = ass_render_frame(ass->renderer, ass->track, + time_ms, &detect_change); + ++ if (ass->sub2video) { ++ if (!image && !ass->last_image && picref->pts <= ass->max_pts && outlink->current_pts != AV_NOPTS_VALUE) { ++ av_log(ctx, AV_LOG_DEBUG, "sub2video skip pts:%"PRId64"\n", picref->pts); ++ av_frame_free(&picref); ++ return 0; ++ } ++ ass->last_image = image != NULL; ++ } ++ + if (detect_change) +- av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%f\n", time_ms); ++ av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%"PRId64"\n", time_ms); + + overlay_ass_image(ass, picref, image); + + return ff_filter_frame(outlink, picref); + } + ++static void get_max_timestamp(AVFilterContext *ctx) ++{ ++ AssContext *ass = ctx->priv; ++ int i; ++ ++ ass->max_ts_ms = 0; ++ if (ass->track) { ++ for (i = 0; i < ass->track->n_events; i++) { ++ ASS_Event *event = ass->track->events + i; ++ ass->max_ts_ms = FFMAX(event->Start + event->Duration, ass->max_ts_ms); ++ } ++ } ++} ++ + static const AVFilterPad ass_inputs[] = { + { + .name = "default", +@@ -243,6 +272,9 @@ static av_cold int init_ass(AVFilterCont + ass->filename); + return AVERROR(EINVAL); + } ++ ++ get_max_timestamp(ctx); ++ + return 0; + } + +@@ -264,8 +296,8 @@ AVFilter ff_vf_ass = { + static const AVOption subtitles_options[] = { + COMMON_OPTIONS + {"charenc", "set input character encoding", OFFSET(charenc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, +- {"stream_index", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, +- {"si", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, ++ {"stream_index", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, ++ {"si", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, + {"force_style", "force subtitle style", OFFSET(force_style), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {NULL}, + }; +@@ -473,6 +505,8 @@ static av_cold int init_subtitles(AVFilt + avsubtitle_free(&sub); + } + ++ get_max_timestamp(ctx); ++ + end: + av_dict_free(&codec_opts); + avcodec_free_context(&dec_ctx); diff --git a/debian/patches/series b/debian/patches/series index fc58f4b6987..757669c5560 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -21,3 +21,4 @@ 0021-add-qsv-dec-support-for-yuv444-8-10bit.patch 0022-add-fixes-for-warning-on-overlay-filters.patch 0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch +0024-add-sub2video-option-to-subtitles-filter.patch From e966e53a735af39d894b6a7033a961412569d1c2 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sun, 7 Nov 2021 15:47:52 +0800 Subject: [PATCH 34/41] add alphasrc source video filter --- ...025-add-alphasrc-source-video-filter.patch | 194 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 195 insertions(+) create mode 100644 debian/patches/0025-add-alphasrc-source-video-filter.patch diff --git a/debian/patches/0025-add-alphasrc-source-video-filter.patch b/debian/patches/0025-add-alphasrc-source-video-filter.patch new file mode 100644 index 00000000000..c03869c09b7 --- /dev/null +++ b/debian/patches/0025-add-alphasrc-source-video-filter.patch @@ -0,0 +1,194 @@ +Index: jellyfin-ffmpeg/libavfilter/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/Makefile ++++ jellyfin-ffmpeg/libavfilter/Makefile +@@ -495,6 +495,7 @@ OBJS-$(CONFIG_ZSCALE_FILTER) + + OBJS-$(CONFIG_ALLRGB_FILTER) += vsrc_testsrc.o + OBJS-$(CONFIG_ALLYUV_FILTER) += vsrc_testsrc.o ++OBJS-$(CONFIG_ALPHASRC_FILTER) += vsrc_alphasrc.o + OBJS-$(CONFIG_CELLAUTO_FILTER) += vsrc_cellauto.o + OBJS-$(CONFIG_COLOR_FILTER) += vsrc_testsrc.o + OBJS-$(CONFIG_COREIMAGESRC_FILTER) += vf_coreimage.o +Index: jellyfin-ffmpeg/libavfilter/allfilters.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c ++++ jellyfin-ffmpeg/libavfilter/allfilters.c +@@ -472,6 +472,7 @@ extern AVFilter ff_vf_zscale; + + extern AVFilter ff_vsrc_allrgb; + extern AVFilter ff_vsrc_allyuv; ++extern AVFilter ff_vsrc_alphasrc; + extern AVFilter ff_vsrc_cellauto; + extern AVFilter ff_vsrc_color; + extern AVFilter ff_vsrc_coreimagesrc; +Index: jellyfin-ffmpeg/libavfilter/vsrc_alphasrc.c +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/vsrc_alphasrc.c +@@ -0,0 +1,165 @@ ++/* ++ * Copyright (c) 2021 NyanMisaka ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * Provide a blank video input with alpha channel. ++ */ ++ ++#include "libavutil/avstring.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/opt.h" ++#include "libavutil/parseutils.h" ++#include "filters.h" ++#include "avfilter.h" ++#include "internal.h" ++#include "formats.h" ++#include "video.h" ++ ++typedef struct AlphaSrc { ++ const AVClass *class; ++ AVRational time_base, frame_rate; ++ int64_t pts; ++ int64_t duration, start; ++ int out_w, out_h; ++ int rgb, planar; ++} AlphaSrc; ++ ++static av_cold int alphasrc_init(AVFilterContext *ctx) ++{ ++ AlphaSrc *s = ctx->priv; ++ ++ s->time_base = av_inv_q(s->frame_rate); ++ s->pts = 0; ++ ++ if (s->start > 0) ++ s->pts += av_rescale_q(s->start, AV_TIME_BASE_Q, s->time_base); ++ ++ return 0; ++} ++ ++static int alphasrc_query_formats(AVFilterContext *ctx) ++{ ++ AVFilterLink *outlink = ctx->outputs[0]; ++ AVFilterFormats *formats; ++ int ret; ++ ++ if ((ret = ff_formats_pixdesc_filter(&formats, AV_PIX_FMT_FLAG_ALPHA, 0)) || ++ (ret = ff_formats_ref(formats, &outlink->incfg.formats))) ++ return ret; ++ ++ return 0; ++} ++ ++static int alphasrc_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AlphaSrc *s = ctx->priv; ++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format); ++ ++ s->rgb = desc->flags & AV_PIX_FMT_FLAG_RGB; ++ s->planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR; ++ ++ if (!s->rgb && !s->planar) { ++ av_log(ctx, AV_LOG_ERROR, "Unsupported output format.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (s->out_w <= 0 || s->out_h <= 0) { ++ av_log(ctx, AV_LOG_ERROR, "Invalid output video size.\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ outlink->w = s->out_w; ++ outlink->h = s->out_h; ++ outlink->frame_rate = s->frame_rate; ++ outlink->time_base = s->time_base; ++ outlink->sample_aspect_ratio = (AVRational){1, 1}; ++ ++ return 0; ++} ++ ++static int alphasrc_request_frame(AVFilterLink *outlink) ++{ ++ AVFilterContext *ctx = outlink->src; ++ AlphaSrc *s = ctx->priv; ++ AVFrame *out; ++ int i; ++ ++ if (s->duration > 0 && ++ av_rescale_q(s->pts, s->time_base, AV_TIME_BASE_Q) >= s->duration) { ++ ff_outlink_set_status(outlink, AVERROR_EOF, s->pts); ++ return 0; ++ } ++ ++ out = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!out) ++ return AVERROR(ENOMEM); ++ ++ for (i = 0; i < AV_NUM_DATA_POINTERS; i++) { ++ if (out->buf[i]) { ++ if (s->rgb) ++ memset(out->buf[i]->data, 0, out->buf[i]->size); ++ else if (s->planar) ++ memset(out->buf[i]->data, (i == 1 || i == 2) ? 128 : 0, out->buf[i]->size); ++ } ++ } ++ ++ out->pts = s->pts++; ++ ++ return ff_filter_frame(outlink, out); ++} ++ ++#define OFFSET(x) offsetof(AlphaSrc, x) ++#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM ++static const AVOption alphasrc_options[] = { ++ {"duration", "set the duration of the video", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX, FLAGS}, ++ {"d", "set the duration of the video", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX, FLAGS}, ++ {"start", "set the start timestamp of the video", OFFSET(start), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX, FLAGS}, ++ {"rate", "set the frame rate of the video", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "15"}, 1, INT_MAX, FLAGS}, ++ {"r", "set the frame rate of the video", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "15"}, 1, INT_MAX, FLAGS}, ++ {"size", "set the size of the video", OFFSET(out_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS}, ++ {"s", "set the size of the video", OFFSET(out_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, FLAGS}, ++ {NULL} ++}; ++ ++AVFILTER_DEFINE_CLASS(alphasrc); ++ ++static const AVFilterPad alphasrc_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = alphasrc_config_output, ++ .request_frame = alphasrc_request_frame, ++ }, ++ { NULL } ++}; ++ ++AVFilter ff_vsrc_alphasrc = { ++ .name = "alphasrc", ++ .description = NULL_IF_CONFIG_SMALL("Provide a blank video input with alpha channel."), ++ .priv_size = sizeof(AlphaSrc), ++ .priv_class = &alphasrc_class, ++ .query_formats = alphasrc_query_formats, ++ .init = alphasrc_init, ++ .uninit = NULL, ++ .inputs = NULL, ++ .outputs = alphasrc_outputs, ++}; diff --git a/debian/patches/series b/debian/patches/series index 757669c5560..f3d006d95b6 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -22,3 +22,4 @@ 0022-add-fixes-for-warning-on-overlay-filters.patch 0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch 0024-add-sub2video-option-to-subtitles-filter.patch +0025-add-alphasrc-source-video-filter.patch From 31f04e80b6f31c80a57bfd8c8c08547a0865586f Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Mon, 8 Nov 2021 01:26:40 +0800 Subject: [PATCH 35/41] add fixes for armhf build with gcc 11 --- ...-add-fixes-for-armhf-build-with-gcc-11.patch | 17 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 18 insertions(+) create mode 100644 debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch diff --git a/debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch b/debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch new file mode 100644 index 00000000000..f823e7a19d8 --- /dev/null +++ b/debian/patches/0026-add-fixes-for-armhf-build-with-gcc-11.patch @@ -0,0 +1,17 @@ +Index: jellyfin-ffmpeg/configure +=================================================================== +--- jellyfin-ffmpeg.orig/configure ++++ jellyfin-ffmpeg/configure +@@ -4988,9 +4988,11 @@ elif enabled arm; then + fi + } + +- [ "$cpu" = generic ] && cpu=$(probe_arm_arch) + + case $cpu in ++ generic) ++ subarch=$(probe_arm_arch | sed 's/[^a-z0-9]//g') ++ ;; + armv*) + cpuflags="-march=$cpu" + subarch=$(echo $cpu | sed 's/[^a-z0-9]//g') diff --git a/debian/patches/series b/debian/patches/series index f3d006d95b6..5ca70657cc8 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -23,3 +23,4 @@ 0023-add-fixes-for-HEVC-10-bit-HDR-decoding-in-bsf.patch 0024-add-sub2video-option-to-subtitles-filter.patch 0025-add-alphasrc-source-video-filter.patch +0026-add-fixes-for-armhf-build-with-gcc-11.patch From 772d8499434d28a072fc635b4f689060866d7eaf Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Mon, 8 Nov 2021 16:36:36 +0800 Subject: [PATCH 36/41] add fixes to improve the performance of vaapi encode --- ...rove-the-performance-of-vaapi-encode.patch | 235 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 236 insertions(+) create mode 100644 debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch diff --git a/debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch b/debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch new file mode 100644 index 00000000000..53f86235586 --- /dev/null +++ b/debian/patches/0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch @@ -0,0 +1,235 @@ +Index: jellyfin-ffmpeg/libavcodec/vaapi_encode.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/vaapi_encode.c ++++ jellyfin-ffmpeg/libavcodec/vaapi_encode.c +@@ -134,7 +134,8 @@ static int vaapi_encode_make_misc_param_ + } + + static int vaapi_encode_wait(AVCodecContext *avctx, +- VAAPIEncodePicture *pic) ++ VAAPIEncodePicture *pic, ++ uint8_t wait) + { + VAAPIEncodeContext *ctx = avctx->priv_data; + VAStatus vas; +@@ -150,11 +151,43 @@ static int vaapi_encode_wait(AVCodecCont + "(input surface %#x).\n", pic->display_order, + pic->encode_order, pic->input_surface); + +- vas = vaSyncSurface(ctx->hwctx->display, pic->input_surface); +- if (vas != VA_STATUS_SUCCESS) { +- av_log(avctx, AV_LOG_ERROR, "Failed to sync to picture completion: " +- "%d (%s).\n", vas, vaErrorStr(vas)); ++#if VA_CHECK_VERSION(1, 9, 0) ++ // Try vaSyncBuffer. ++ vas = vaSyncBuffer(ctx->hwctx->display, ++ pic->output_buffer, ++ wait ? VA_TIMEOUT_INFINITE : 0); ++ if (vas == VA_STATUS_ERROR_TIMEDOUT) { ++ return AVERROR(EAGAIN); ++ } else if (vas != VA_STATUS_SUCCESS && vas != VA_STATUS_ERROR_UNIMPLEMENTED) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to sync to output buffer completion: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR(EIO); ++ } else if (vas == VA_STATUS_ERROR_UNIMPLEMENTED) ++ // If vaSyncBuffer is not implemented, try old version API. ++#endif ++ { ++ if (!wait) { ++ VASurfaceStatus surface_status; ++ vas = vaQuerySurfaceStatus(ctx->hwctx->display, ++ pic->input_surface, ++ &surface_status); ++ if (vas == VA_STATUS_SUCCESS && ++ surface_status != VASurfaceReady && ++ surface_status != VASurfaceSkipped) { ++ return AVERROR(EAGAIN); ++ } else if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to query surface status: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ return AVERROR(EIO); ++ } ++ } else { ++ vas = vaSyncSurface(ctx->hwctx->display, pic->input_surface); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to sync to picture completion: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ return AVERROR(EIO); ++ } ++ } + } + + // Input is definitely finished with now. +@@ -633,7 +666,7 @@ static int vaapi_encode_output(AVCodecCo + uint8_t *ptr; + int err; + +- err = vaapi_encode_wait(avctx, pic); ++ err = vaapi_encode_wait(avctx, pic, 1); + if (err < 0) + return err; + +@@ -695,7 +728,7 @@ fail: + static int vaapi_encode_discard(AVCodecContext *avctx, + VAAPIEncodePicture *pic) + { +- vaapi_encode_wait(avctx, pic); ++ vaapi_encode_wait(avctx, pic, 1); + + if (pic->output_buffer_ref) { + av_log(avctx, AV_LOG_DEBUG, "Discard output for pic " +@@ -951,8 +984,10 @@ static int vaapi_encode_pick_next(AVCode + if (!pic && ctx->end_of_stream) { + --b_counter; + pic = ctx->pic_end; +- if (pic->encode_issued) ++ if (pic->encode_complete) + return AVERROR_EOF; ++ else if (pic->encode_issued) ++ return AVERROR(EAGAIN); + } + + if (!pic) { +@@ -1123,7 +1158,8 @@ static int vaapi_encode_send_frame(AVCod + if (ctx->input_order == ctx->decode_delay) + ctx->dts_pts_diff = pic->pts - ctx->first_pts; + if (ctx->output_delay > 0) +- ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts; ++ ctx->ts_ring[ctx->input_order % ++ (3 * ctx->output_delay + ctx->async_depth)] = pic->pts; + + pic->display_order = ctx->input_order; + ++ctx->input_order; +@@ -1177,20 +1213,40 @@ int ff_vaapi_encode_receive_packet(AVCod + return AVERROR(EAGAIN); + } + +- pic = NULL; +- err = vaapi_encode_pick_next(avctx, &pic); +- if (err < 0) +- return err; +- av_assert0(pic); ++ while (av_fifo_size(ctx->encode_fifo) < ++ MAX_ASYNC_DEPTH * sizeof(VAAPIEncodePicture *)) { ++ pic = NULL; ++ err = vaapi_encode_pick_next(avctx, &pic); ++ if (err < 0) ++ break; ++ av_assert0(pic); + +- pic->encode_order = ctx->encode_order++; ++ pic->encode_order = ctx->encode_order + ++ (av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture *)); + +- err = vaapi_encode_issue(avctx, pic); +- if (err < 0) { +- av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err); ++ err = vaapi_encode_issue(avctx, pic); ++ if (err < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err); ++ return err; ++ } ++ ++ av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL); ++ } ++ ++ if (!av_fifo_size(ctx->encode_fifo)) + return err; ++ ++ if (av_fifo_size(ctx->encode_fifo) < ctx->async_depth * sizeof(VAAPIEncodePicture *) && ++ !ctx->end_of_stream) { ++ av_fifo_generic_peek(ctx->encode_fifo, &pic, sizeof(pic), NULL); ++ err = vaapi_encode_wait(avctx, pic, 0); ++ if (err < 0) ++ return err; + } + ++ av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL); ++ ctx->encode_order = pic->encode_order + 1; ++ + err = vaapi_encode_output(avctx, pic, pkt); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err); +@@ -1206,7 +1262,7 @@ int ff_vaapi_encode_receive_packet(AVCod + pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff; + } else { + pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) % +- (3 * ctx->output_delay)]; ++ (3 * ctx->output_delay + ctx->async_depth)]; + } + av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n", + pkt->pts, pkt->dts); +@@ -2520,6 +2576,11 @@ av_cold int ff_vaapi_encode_init(AVCodec + } + } + ++ ctx->encode_fifo = av_fifo_alloc(MAX_ASYNC_DEPTH * ++ sizeof(VAAPIEncodePicture *)); ++ if (!ctx->encode_fifo) ++ return AVERROR(ENOMEM); ++ + return 0; + + fail: +@@ -2552,6 +2613,7 @@ av_cold int ff_vaapi_encode_close(AVCode + + av_freep(&ctx->codec_sequence_params); + av_freep(&ctx->codec_picture_params); ++ av_fifo_freep(&ctx->encode_fifo); + + av_buffer_unref(&ctx->recon_frames_ref); + av_buffer_unref(&ctx->input_frames_ref); +Index: jellyfin-ffmpeg/libavcodec/vaapi_encode.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/vaapi_encode.h ++++ jellyfin-ffmpeg/libavcodec/vaapi_encode.h +@@ -29,6 +29,7 @@ + + #include "libavutil/hwcontext.h" + #include "libavutil/hwcontext_vaapi.h" ++#include "libavutil/fifo.h" + + #include "avcodec.h" + #include "hwconfig.h" +@@ -47,6 +48,7 @@ enum { + MAX_TILE_ROWS = 22, + // A.4.1: table A.6 allows at most 20 tile columns for any level. + MAX_TILE_COLS = 20, ++ MAX_ASYNC_DEPTH = 64, + }; + + extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[]; +@@ -297,7 +299,8 @@ typedef struct VAAPIEncodeContext { + // Timestamp handling. + int64_t first_pts; + int64_t dts_pts_diff; +- int64_t ts_ring[MAX_REORDER_DELAY * 3]; ++ int64_t ts_ring[MAX_REORDER_DELAY * 3 + ++ MAX_ASYNC_DEPTH]; + + // Slice structure. + int slice_block_rows; +@@ -345,6 +348,10 @@ typedef struct VAAPIEncodeContext { + int roi_warned; + + AVFrame *frame; ++ ++ AVFifoBuffer *encode_fifo; ++ ++ int async_depth; + } VAAPIEncodeContext; + + enum { +@@ -455,7 +462,11 @@ int ff_vaapi_encode_close(AVCodecContext + { "b_depth", \ + "Maximum B-frame reference depth", \ + OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \ +- { .i64 = 1 }, 1, INT_MAX, FLAGS } ++ { .i64 = 1 }, 1, INT_MAX, FLAGS }, \ ++ { "async_depth", "Maximum processing parallelism. " \ ++ "Increase this to improve single channel performance", \ ++ OFFSET(common.async_depth), AV_OPT_TYPE_INT, \ ++ { .i64 = 4 }, 0, MAX_ASYNC_DEPTH, FLAGS } + + #define VAAPI_ENCODE_RC_MODE(name, desc) \ + { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \ diff --git a/debian/patches/series b/debian/patches/series index 5ca70657cc8..ffda73fbb33 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -24,3 +24,4 @@ 0024-add-sub2video-option-to-subtitles-filter.patch 0025-add-alphasrc-source-video-filter.patch 0026-add-fixes-for-armhf-build-with-gcc-11.patch +0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch From a8d4a7992ba531e631db89af15f3a40626e10507 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Mon, 15 Nov 2021 02:44:08 +0800 Subject: [PATCH 37/41] add fixes for alignment issue when upload to qsv --- ...r-alignment-issue-when-upload-to-qsv.patch | 96 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 97 insertions(+) create mode 100644 debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch diff --git a/debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch b/debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch new file mode 100644 index 00000000000..fc38abd89bc --- /dev/null +++ b/debian/patches/0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch @@ -0,0 +1,96 @@ +Index: jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/hwcontext_qsv.c ++++ jellyfin-ffmpeg/libavutil/hwcontext_qsv.c +@@ -47,6 +47,7 @@ + #include "pixfmt.h" + #include "pixdesc.h" + #include "time.h" ++#include "imgutils.h" + + #define QSV_VERSION_ATLEAST(MAJOR, MINOR) \ + (MFX_VERSION_MAJOR > (MAJOR) || \ +@@ -90,6 +91,7 @@ typedef struct QSVFramesContext { + + mfxExtOpaqueSurfaceAlloc opaque_alloc; + mfxExtBuffer *ext_buffers[1]; ++ AVFrame realigned_tmp_frame; + } QSVFramesContext; + + static const struct { +@@ -226,6 +228,7 @@ static void qsv_frames_uninit(AVHWFrames + av_freep(&s->surface_ptrs); + av_freep(&s->surfaces_internal); + av_freep(&s->handle_pairs_internal); ++ av_frame_unref(&s->realigned_tmp_frame); + av_buffer_unref(&s->child_frames_ref); + } + +@@ -1036,7 +1039,7 @@ static int qsv_transfer_data_to(AVHWFram + mfxStatus err; + int ret = 0; + /* make a copy if the input is not padded as libmfx requires */ +- AVFrame tmp_frame; ++ AVFrame *tmp_frame = &s->realigned_tmp_frame; + const AVFrame *src_frame; + int realigned = 0; + +@@ -1067,22 +1070,37 @@ static int qsv_transfer_data_to(AVHWFram + + if (src->height & 15 || src->linesize[0] & 15) { + realigned = 1; +- memset(&tmp_frame, 0, sizeof(tmp_frame)); +- tmp_frame.format = src->format; +- tmp_frame.width = FFALIGN(src->width, 16); +- tmp_frame.height = FFALIGN(src->height, 16); +- ret = av_frame_get_buffer(&tmp_frame, 0); +- if (ret < 0) +- return ret; +- +- ret = av_frame_copy(&tmp_frame, src); ++ if (tmp_frame->format != src->format || ++ tmp_frame->width != FFALIGN(src->width, 16) || ++ tmp_frame->height != FFALIGN(src->height, 16)) { ++ ptrdiff_t linesize[4] = {tmp_frame->linesize[0], ++ tmp_frame->linesize[1], ++ tmp_frame->linesize[2], ++ tmp_frame->linesize[3]}; ++ av_frame_unref(tmp_frame); ++ ++ tmp_frame->format = src->format; ++ tmp_frame->width = FFALIGN(src->width, 16); ++ tmp_frame->height = FFALIGN(src->height, 16); ++ ret = av_frame_get_buffer(tmp_frame, 0); ++ if (ret < 0) ++ return ret; ++ ++ ret = av_image_fill_black(tmp_frame->data, linesize, tmp_frame->format, ++ 0, tmp_frame->width, tmp_frame->height); ++ if (ret < 0) { ++ av_frame_unref(tmp_frame); ++ return ret; ++ } ++ } ++ ret = av_frame_copy(tmp_frame, src); + if (ret < 0) { +- av_frame_unref(&tmp_frame); ++ av_frame_unref(tmp_frame); + return ret; + } + } + +- src_frame = realigned ? &tmp_frame : src; ++ src_frame = realigned ? tmp_frame : src; + + if (!s->session_upload) { + if (s->child_frames_ref) +@@ -1114,9 +1132,6 @@ static int qsv_transfer_data_to(AVHWFram + return AVERROR_UNKNOWN; + } + +- if (realigned) +- av_frame_unref(&tmp_frame); +- + return 0; + } + diff --git a/debian/patches/series b/debian/patches/series index ffda73fbb33..8a9ecb2e8e2 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -25,3 +25,4 @@ 0025-add-alphasrc-source-video-filter.patch 0026-add-fixes-for-armhf-build-with-gcc-11.patch 0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch +0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch From cc8a2ff0852f3fff24c65449b032492b2d4a85f3 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Mon, 29 Nov 2021 01:44:53 +0800 Subject: [PATCH 38/41] add fixes for qsv overlay to allow external pgssubs --- ...sv-overlay-to-allow-external-pgssubs.patch | 92 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 93 insertions(+) create mode 100644 debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch diff --git a/debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch b/debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch new file mode 100644 index 00000000000..b5d682f29e4 --- /dev/null +++ b/debian/patches/0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch @@ -0,0 +1,92 @@ +Index: jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_qsv.c ++++ jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c +@@ -230,40 +230,48 @@ static int config_overlay_input(AVFilter + + static int process_frame(FFFrameSync *fs) + { +- AVFilterContext *ctx = fs->parent; +- QSVOverlayContext *s = fs->opaque; +- AVFrame *frame = NULL; +- int ret = 0, i; +- +- for (i = 0; i < ctx->nb_inputs; i++) { +- ret = ff_framesync_get_frame(fs, i, &frame, 0); +- if (ret == 0) +- ret = ff_qsvvpp_filter_frame(s->qsv, ctx->inputs[i], frame); +- if (ret < 0 && ret != AVERROR(EAGAIN)) +- break; +- } ++ AVFilterContext *ctx = fs->parent; ++ QSVOverlayContext *s = fs->opaque; ++ AVFilterLink *in0 = ctx->inputs[0]; ++ AVFilterLink *in1 = ctx->inputs[1]; ++ AVFrame *main = NULL; ++ AVFrame *overlay = NULL; ++ int ret = 0; ++ ++ ret = ff_framesync_get_frame(fs, 0, &main, 0); ++ if (ret < 0) ++ return ret; ++ ret = ff_framesync_get_frame(fs, 1, &overlay, 0); ++ if (ret < 0) ++ return ret; + ++ if (!main) ++ return AVERROR_BUG; ++ ++ /* composite main frame */ ++ ret = ff_qsvvpp_filter_frame(s->qsv, in0, main); ++ if (ret < 0 && ret != AVERROR(EAGAIN)) ++ return ret; ++ ++ /* composite overlay frame */ ++ /* or overwrite main frame again if the overlay frame isn't ready yet */ ++ ret = ff_qsvvpp_filter_frame(s->qsv, overlay ? in1 : in0, overlay ? overlay : main); + return ret; + } + + static int init_framesync(AVFilterContext *ctx) + { +- QSVOverlayContext *s = ctx->priv; +- int ret, i; ++ QSVOverlayContext *s = ctx->priv; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ int ret; + +- s->fs.on_event = process_frame; +- s->fs.opaque = s; +- ret = ff_framesync_init(&s->fs, ctx, ctx->nb_inputs); ++ ret = ff_framesync_init_dualinput(&s->fs, ctx); + if (ret < 0) + return ret; + +- for (i = 0; i < ctx->nb_inputs; i++) { +- FFFrameSyncIn *in = &s->fs.in[i]; +- in->before = EXT_STOP; +- in->after = EXT_INFINITY; +- in->sync = i ? 1 : 2; +- in->time_base = ctx->inputs[i]->time_base; +- } ++ s->fs.time_base = outlink->time_base; ++ s->fs.on_event = process_frame; ++ s->fs.opaque = s; + + return ff_framesync_configure(&s->fs); + } +@@ -281,14 +289,6 @@ static int config_output(AVFilterLink *o + (in0->format != AV_PIX_FMT_QSV && in1->format == AV_PIX_FMT_QSV)) { + av_log(ctx, AV_LOG_ERROR, "Mixing hardware and software pixel formats is not supported.\n"); + return AVERROR(EINVAL); +- } else if (in0->format == AV_PIX_FMT_QSV) { +- AVHWFramesContext *hw_frame0 = (AVHWFramesContext *)in0->hw_frames_ctx->data; +- AVHWFramesContext *hw_frame1 = (AVHWFramesContext *)in1->hw_frames_ctx->data; +- +- if (hw_frame0->device_ctx != hw_frame1->device_ctx) { +- av_log(ctx, AV_LOG_ERROR, "Inputs with different underlying QSV devices are forbidden.\n"); +- return AVERROR(EINVAL); +- } + } + + outlink->w = vpp->var_values[VAR_MW]; diff --git a/debian/patches/series b/debian/patches/series index 8a9ecb2e8e2..90db6b2ec01 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -26,3 +26,4 @@ 0026-add-fixes-for-armhf-build-with-gcc-11.patch 0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch 0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch +0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch From 0ffdc6e719dcb7d9cb044f2ad6d82a28e43840f4 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sat, 27 Nov 2021 17:43:28 +0800 Subject: [PATCH 39/41] add fixes for a vaapi-qsv mapping error --- ...-fixes-for-a-vaapi-qsv-mapping-error.patch | 61 +++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 62 insertions(+) create mode 100644 debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch diff --git a/debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch b/debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch new file mode 100644 index 00000000000..b24b23f118d --- /dev/null +++ b/debian/patches/0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch @@ -0,0 +1,61 @@ +Index: jellyfin-ffmpeg/libavcodec/qsvenc_hevc.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsvenc_hevc.c ++++ jellyfin-ffmpeg/libavcodec/qsvenc_hevc.c +@@ -260,9 +260,9 @@ static const AVClass class = { + static const AVCodecDefault qsv_enc_defaults[] = { + { "b", "1M" }, + { "refs", "0" }, +- // same as the x264 default ++ // same as the x265 default + { "g", "248" }, +- { "bf", "8" }, ++ { "bf", "4" }, + { "qmin", "-1" }, + { "qmax", "-1" }, + { "trellis", "-1" }, +Index: jellyfin-ffmpeg/libavcodec/vaapi_decode.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/vaapi_decode.c ++++ jellyfin-ffmpeg/libavcodec/vaapi_decode.c +@@ -572,22 +572,24 @@ static int vaapi_decode_make_config(AVCo + if (err < 0) + goto fail; + +- frames->initial_pool_size = 1; +- // Add per-codec number of surfaces used for storing reference frames. +- switch (avctx->codec_id) { +- case AV_CODEC_ID_H264: +- case AV_CODEC_ID_HEVC: +- frames->initial_pool_size += 16; +- break; +- case AV_CODEC_ID_VP9: +- case AV_CODEC_ID_AV1: +- frames->initial_pool_size += 8; +- break; +- case AV_CODEC_ID_VP8: +- frames->initial_pool_size += 3; +- break; +- default: +- frames->initial_pool_size += 2; ++ if (!frames->initial_pool_size) { ++ frames->initial_pool_size = 1; ++ // Add per-codec number of surfaces used for storing reference frames. ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_HEVC: ++ frames->initial_pool_size += 16; ++ break; ++ case AV_CODEC_ID_VP9: ++ case AV_CODEC_ID_AV1: ++ frames->initial_pool_size += 8; ++ break; ++ case AV_CODEC_ID_VP8: ++ frames->initial_pool_size += 3; ++ break; ++ default: ++ frames->initial_pool_size += 2; ++ } + } + } + diff --git a/debian/patches/series b/debian/patches/series index 90db6b2ec01..1c6e70b155f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -27,3 +27,4 @@ 0027-add-fixes-to-improve-the-performance-of-vaapi-encode.patch 0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch 0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch +0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch From d9454b53cc75ac9405c18da5fb2fe7b6796cdb60 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Mon, 29 Nov 2021 01:36:37 +0800 Subject: [PATCH 40/41] add a vaapi overlay filter --- .../0031-add-a-vaapi-overlay-filter.patch | 525 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 526 insertions(+) create mode 100644 debian/patches/0031-add-a-vaapi-overlay-filter.patch diff --git a/debian/patches/0031-add-a-vaapi-overlay-filter.patch b/debian/patches/0031-add-a-vaapi-overlay-filter.patch new file mode 100644 index 00000000000..fd41b3f1737 --- /dev/null +++ b/debian/patches/0031-add-a-vaapi-overlay-filter.patch @@ -0,0 +1,525 @@ +Index: jellyfin-ffmpeg/configure +=================================================================== +--- jellyfin-ffmpeg.orig/configure ++++ jellyfin-ffmpeg/configure +@@ -3601,6 +3601,7 @@ openclsrc_filter_deps="opencl" + overlay_opencl_filter_deps="opencl" + overlay_qsv_filter_deps="libmfx" + overlay_qsv_filter_select="qsvvpp" ++overlay_vaapi_filter_deps="vaapi" + overlay_vulkan_filter_deps="vulkan libglslang" + owdenoise_filter_deps="gpl" + pad_opencl_filter_deps="opencl" +@@ -3662,6 +3663,7 @@ tonemap_vaapi_filter_deps="vaapi VAProcF + tonemap_opencl_filter_deps="opencl const_nan" + transpose_opencl_filter_deps="opencl" + transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" ++overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags" + unsharp_opencl_filter_deps="opencl" + uspp_filter_deps="gpl avcodec" + vaguedenoiser_filter_deps="gpl" +@@ -6712,6 +6714,7 @@ if enabled vaapi; then + check_struct "va/va.h" "VADecPictureParameterBufferAV1" bit_depth_idx + check_type "va/va.h va/va_vpp.h" "VAProcFilterParameterBufferHDRToneMapping" + check_struct "va/va.h va/va_vpp.h" "VAProcPipelineCaps" rotation_flags ++ check_struct "va/va.h va/va_vpp.h" "VAProcPipelineCaps" blend_flags + check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC" + check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG" + check_type "va/va.h va/va_enc_vp8.h" "VAEncPictureParameterBufferVP8" +Index: jellyfin-ffmpeg/libavfilter/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/Makefile ++++ jellyfin-ffmpeg/libavfilter/Makefile +@@ -353,6 +353,7 @@ OBJS-$(CONFIG_OVERLAY_CUDA_FILTER) + OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ + opencl/overlay.o framesync.o + OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o ++OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER) += vf_overlay_vaapi.o framesync.o vaapi_vpp.o + OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o + OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o + OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o +Index: jellyfin-ffmpeg/libavfilter/allfilters.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/allfilters.c ++++ jellyfin-ffmpeg/libavfilter/allfilters.c +@@ -336,6 +336,7 @@ extern AVFilter ff_vf_oscilloscope; + extern AVFilter ff_vf_overlay; + extern AVFilter ff_vf_overlay_opencl; + extern AVFilter ff_vf_overlay_qsv; ++extern AVFilter ff_vf_overlay_vaapi; + extern AVFilter ff_vf_overlay_vulkan; + extern AVFilter ff_vf_overlay_cuda; + extern AVFilter ff_vf_owdenoise; +Index: jellyfin-ffmpeg/libavfilter/vf_overlay_vaapi.c +=================================================================== +--- /dev/null ++++ jellyfin-ffmpeg/libavfilter/vf_overlay_vaapi.c +@@ -0,0 +1,468 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++#include ++ ++#include "libavutil/avassert.h" ++#include "libavutil/mem.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++ ++#include "avfilter.h" ++#include "framesync.h" ++#include "formats.h" ++#include "internal.h" ++#include "vaapi_vpp.h" ++ ++typedef struct OverlayVAAPIContext { ++ VAAPIVPPContext vpp_ctx; // must be the first field ++ FFFrameSync fs; ++ int global_alpha_flag; ++ int premultiplied_alpha_flag; ++ int pixel_alpha_enabled; ++ int overlay_ox; ++ int overlay_oy; ++ int overlay_ow; ++ int overlay_oh; ++ float alpha; ++ int opt_repeatlast; ++ int opt_shortest; ++ int opt_eof_action; ++} OverlayVAAPIContext; ++ ++static int overlay_vaapi_query_formats(AVFilterContext *ctx) ++{ ++ int ret; ++ enum { ++ MAIN = 0, ++ OVERLAY = 1, ++ }; ++ ++ static const enum AVPixelFormat pix_fmts[] = { ++ AV_PIX_FMT_VAAPI, ++ AV_PIX_FMT_NONE ++ }; ++ ++ ret = ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->inputs[MAIN]->outcfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ ret = ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->inputs[OVERLAY]->outcfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ ret = ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->outputs[0]->incfg.formats); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ ++static int overlay_vaapi_build_filter_params(AVFilterContext *avctx) ++{ ++ VAAPIVPPContext *vpp_ctx = avctx->priv; ++ OverlayVAAPIContext *ctx = avctx->priv; ++ VAProcPipelineCaps pipeline_caps; ++ VAStatus vas; ++ ++ memset(&pipeline_caps, 0, sizeof(pipeline_caps)); ++ vas = vaQueryVideoProcPipelineCaps(vpp_ctx->hwctx->display, ++ vpp_ctx->va_context, ++ NULL, 0, ++ &pipeline_caps); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to query pipeline " ++ "caps: %d (%s).\n", vas, vaErrorStr(vas)); ++ return AVERROR(EIO); ++ } ++ ++ if (!pipeline_caps.blend_flags) { ++ av_log(avctx, AV_LOG_ERROR, "VAAPI driver doesn't support overlay\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->global_alpha_flag = pipeline_caps.blend_flags & VA_BLEND_GLOBAL_ALPHA; ++ if (!ctx->global_alpha_flag) { ++ av_log(avctx, AV_LOG_ERROR, "VAAPI driver doesn't support global alpha blending\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->premultiplied_alpha_flag = pipeline_caps.blend_flags & VA_BLEND_PREMULTIPLIED_ALPHA; ++ if (!ctx->premultiplied_alpha_flag) { ++ av_log(avctx, AV_LOG_WARNING, "VAAPI driver doesn't support premultiplied alpha blending, " ++ "alpha planar of the overlay frames will be ignored\n"); ++ } ++ ++ return 0; ++} ++ ++ ++static int overlay_vaapi_render_picture(AVFilterContext *avctx, ++ VAProcPipelineParameterBuffer *params, ++ VAProcPipelineParameterBuffer *subpic_params, ++ AVFrame *output_frame, ++ int passthrough) ++{ ++ VAAPIVPPContext *ctx = avctx->priv; ++ VASurfaceID output_surface; ++ VABufferID params_id; ++ VABufferID subpic_params_id; ++ VAStatus vas; ++ int err = 0; ++ ++ output_surface = (VASurfaceID)(uintptr_t)output_frame->data[3]; ++ ++ vas = vaBeginPicture(ctx->hwctx->display, ++ ctx->va_context, output_surface); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to attach new picture: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ err = AVERROR(EIO); ++ goto fail; ++ } ++ ++ vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, ++ VAProcPipelineParameterBufferType, ++ sizeof(*params), 1, params, ¶ms_id); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create parameter buffer: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ err = AVERROR(EIO); ++ goto fail_after_begin; ++ } ++ av_log(avctx, AV_LOG_DEBUG, "Pipeline parameter buffer is %#x.\n", ++ params_id); ++ ++ if (!passthrough) { ++ vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, ++ VAProcPipelineParameterBufferType, ++ sizeof(*subpic_params), 1, subpic_params, &subpic_params_id); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create parameter buffer: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ err = AVERROR(EIO); ++ goto fail_after_begin; ++ } ++ av_log(avctx, AV_LOG_DEBUG, "Pipeline subpic parameter buffer is %#x.\n", ++ subpic_params_id); ++ } ++ ++ vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context, ++ ¶ms_id, 1); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to render parameter buffer: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ err = AVERROR(EIO); ++ goto fail_after_begin; ++ } ++ ++ if (!passthrough) { ++ vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context, ++ &subpic_params_id, 1); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to render subpic parameter buffer: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ err = AVERROR(EIO); ++ goto fail_after_begin; ++ } ++ } ++ ++ vas = vaEndPicture(ctx->hwctx->display, ctx->va_context); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to start picture processing: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ err = AVERROR(EIO); ++ goto fail_after_render; ++ } ++ ++ if (CONFIG_VAAPI_1 || ctx->hwctx->driver_quirks & ++ AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS) { ++ vas = vaDestroyBuffer(ctx->hwctx->display, params_id); ++ if (vas != VA_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to free parameter buffer: " ++ "%d (%s).\n", vas, vaErrorStr(vas)); ++ // And ignore. ++ } ++ } ++ ++ return 0; ++ ++ // We want to make sure that if vaBeginPicture has been called, we also ++ // call vaRenderPicture and vaEndPicture. These calls may well fail or ++ // do something else nasty, but once we're in this failure case there ++ // isn't much else we can do. ++fail_after_begin: ++ vaRenderPicture(ctx->hwctx->display, ctx->va_context, ¶ms_id, 1); ++fail_after_render: ++ vaEndPicture(ctx->hwctx->display, ctx->va_context); ++fail: ++ return err; ++} ++ ++static int overlay_vaapi_blend(FFFrameSync *fs) ++{ ++ AVFilterContext *avctx = fs->parent; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ OverlayVAAPIContext *ctx = avctx->priv; ++ VAAPIVPPContext *vpp_ctx = avctx->priv; ++ AVFrame *input_main, *input_overlay; ++ AVFrame *output; ++ VAProcPipelineParameterBuffer params, subpic_params; ++ VABlendState blend_state; ++ VARectangle overlay_region, output_region; ++ int err, passthrough = 0; ++ ++ err = ff_framesync_get_frame(fs, 0, &input_main, 0); ++ if (err < 0) ++ return err; ++ err = ff_framesync_get_frame(fs, 1, &input_overlay, 0); ++ if (err < 0) ++ return err; ++ ++ if (!input_main) ++ return AVERROR_BUG; ++ ++ if (!input_overlay) ++ passthrough = 1; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Filter main: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(input_main->format), ++ input_main->width, input_main->height, input_main->pts); ++ ++ if (input_overlay) { ++ av_log(avctx, AV_LOG_DEBUG, "Filter overlay: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(input_overlay->format), ++ input_overlay->width, input_overlay->height, input_overlay->pts); ++ } ++ ++ if (vpp_ctx->va_context == VA_INVALID_ID) ++ return AVERROR(EINVAL); ++ ++ output = ff_get_video_buffer(outlink, outlink->w, outlink->h); ++ if (!output) { ++ err = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ err = av_frame_copy_props(output, input_main); ++ if (err < 0) ++ goto fail; ++ ++ err = ff_vaapi_vpp_init_params(avctx, ¶ms, ++ input_main, output); ++ if (err < 0) ++ goto fail; ++ ++ output_region = (VARectangle) { ++ .x = 0, ++ .y = 0, ++ .width = output->width, ++ .height = output->height, ++ }; ++ ++ params.filters = &vpp_ctx->filter_buffers[0]; ++ params.num_filters = vpp_ctx->nb_filter_buffers; ++ ++ params.output_region = &output_region; ++ params.output_background_color = VAAPI_VPP_BACKGROUND_BLACK; ++ ++ if (!passthrough) { ++ overlay_region = (VARectangle) { ++ .x = ctx->overlay_ox, ++ .y = ctx->overlay_oy, ++ .width = ctx->overlay_ow ? ctx->overlay_ow : input_overlay->width, ++ .height = ctx->overlay_oh ? ctx->overlay_oh : input_overlay->height, ++ }; ++ ++ if (overlay_region.x + overlay_region.width > input_main->width || ++ overlay_region.y + overlay_region.height > input_main->height) { ++ av_log(ctx, AV_LOG_WARNING, ++ "The overlay image exceeds the scope of the main image, " ++ "will crop the overlay image according based on the main image.\n"); ++ } ++ ++ memcpy(&subpic_params, ¶ms, sizeof(subpic_params)); ++ ++ blend_state.flags = VA_BLEND_GLOBAL_ALPHA; ++ if (ctx->pixel_alpha_enabled) ++ blend_state.flags |= VA_BLEND_PREMULTIPLIED_ALPHA; ++ ++ blend_state.global_alpha = ctx->alpha; ++ subpic_params.blend_state = &blend_state; ++ ++ subpic_params.surface = (VASurfaceID)(uintptr_t)input_overlay->data[3]; ++ subpic_params.output_region = &overlay_region; ++ } ++ ++ err = overlay_vaapi_render_picture(avctx, ¶ms, &subpic_params, output, passthrough); ++ if (err < 0) ++ goto fail; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", ++ av_get_pix_fmt_name(output->format), ++ output->width, output->height, output->pts); ++ ++ return ff_filter_frame(outlink, output); ++ ++fail: ++ av_frame_free(&output); ++ return err; ++} ++ ++static int overlay_vaapi_init_framesync(AVFilterContext *avctx) ++{ ++ OverlayVAAPIContext *ctx = avctx->priv; ++ AVFilterLink *outlink = avctx->outputs[0]; ++ int ret; ++ ++ ret = ff_framesync_init_dualinput(&ctx->fs, avctx); ++ if (ret < 0) ++ return ret; ++ ++ ctx->fs.opt_repeatlast = ctx->opt_repeatlast; ++ ctx->fs.opt_shortest = ctx->opt_shortest; ++ ctx->fs.opt_eof_action = ctx->opt_eof_action; ++ ctx->fs.time_base = outlink->time_base; ++ ctx->fs.on_event = overlay_vaapi_blend; ++ ctx->fs.opaque = ctx; ++ ++ return ff_framesync_configure(&ctx->fs); ++} ++ ++static int overlay_vaapi_config_output(AVFilterLink *outlink) ++{ ++ AVFilterContext *avctx = outlink->src; ++ OverlayVAAPIContext *ctx = avctx->priv; ++ VAAPIVPPContext *vpp_ctx = avctx->priv; ++ AVFilterLink *inlink0 = avctx->inputs[0]; ++ AVFilterLink *inlink1 = avctx->inputs[1]; ++ AVHWFramesContext *frames_ctx1 = ++ (AVHWFramesContext*)inlink1->hw_frames_ctx->data; ++ const AVPixFmtDescriptor *desc; ++ int err; ++ ++ outlink->time_base = inlink0->time_base; ++ vpp_ctx->output_width = inlink0->w; ++ vpp_ctx->output_height = inlink0->h; ++ ++ err = overlay_vaapi_init_framesync(avctx); ++ if (err < 0) ++ return err; ++ ++ err = ff_vaapi_vpp_config_output(outlink); ++ if (err < 0) ++ return err; ++ ++ desc = av_pix_fmt_desc_get(frames_ctx1->sw_format); ++ if (!desc) ++ return AVERROR(EINVAL); ++ ++ ctx->pixel_alpha_enabled = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) ++ && ctx->premultiplied_alpha_flag; ++ ++ return 0; ++} ++ ++static av_cold int overlay_vaapi_init(AVFilterContext *avctx) ++{ ++ VAAPIVPPContext *vpp_ctx = avctx->priv; ++ ++ ff_vaapi_vpp_ctx_init(avctx); ++ vpp_ctx->build_filter_params = overlay_vaapi_build_filter_params; ++ vpp_ctx->pipeline_uninit = ff_vaapi_vpp_pipeline_uninit; ++ vpp_ctx->output_format = AV_PIX_FMT_NONE; ++ ++ return 0; ++} ++ ++static int overlay_vaapi_activate(AVFilterContext *avctx) ++{ ++ OverlayVAAPIContext *ctx = avctx->priv; ++ ++ return ff_framesync_activate(&ctx->fs); ++} ++ ++static av_cold void overlay_vaapi_uninit(AVFilterContext *avctx) ++{ ++ OverlayVAAPIContext *ctx = avctx->priv; ++ ++ ff_framesync_uninit(&ctx->fs); ++} ++ ++#define OFFSET(x) offsetof(OverlayVAAPIContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) ++static const AVOption overlay_vaapi_options[] = { ++ { "x", "Overlay x position", ++ OFFSET(overlay_ox), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "y", "Overlay y position", ++ OFFSET(overlay_oy), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "w", "Overlay width", ++ OFFSET(overlay_ow), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "h", "Overlay height", ++ OFFSET(overlay_oh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, ++ { "alpha", "Overlay global alpha", ++ OFFSET(alpha), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.0, 1.0, .flags = FLAGS }, ++ { "eof_action", "Action to take when encountering EOF from secondary input ", ++ OFFSET(opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, ++ EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" }, ++ { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" }, ++ { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" }, ++ { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, "eof_action" }, ++ { "shortest", "force termination when the shortest input terminates", OFFSET(opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, ++ { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(opt_repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, ++ { NULL }, ++}; ++ ++AVFILTER_DEFINE_CLASS(overlay_vaapi); ++ ++static const AVFilterPad overlay_vaapi_inputs[] = { ++ { ++ .name = "main", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .get_video_buffer = ff_default_get_video_buffer, ++ .config_props = &ff_vaapi_vpp_config_input, ++ }, ++ { ++ .name = "overlay", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .get_video_buffer = ff_default_get_video_buffer, ++ }, ++ { NULL } ++}; ++ ++static const AVFilterPad overlay_vaapi_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = &overlay_vaapi_config_output, ++ }, ++ { NULL } ++}; ++ ++AVFilter ff_vf_overlay_vaapi = { ++ .name = "overlay_vaapi", ++ .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"), ++ .priv_size = sizeof(OverlayVAAPIContext), ++ .priv_class = &overlay_vaapi_class, ++ .init = &overlay_vaapi_init, ++ .uninit = &overlay_vaapi_uninit, ++ .query_formats = &overlay_vaapi_query_formats, ++ .activate = &overlay_vaapi_activate, ++ .inputs = overlay_vaapi_inputs, ++ .outputs = overlay_vaapi_outputs, ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++}; diff --git a/debian/patches/series b/debian/patches/series index 1c6e70b155f..ec652d1469b 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -28,3 +28,4 @@ 0028-add-fixes-for-alignment-issue-when-upload-to-qsv.patch 0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch 0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch +0031-add-a-vaapi-overlay-filter.patch From 205cbdd109d9a32d6d501f2d53b3986574b481a3 Mon Sep 17 00:00:00 2001 From: nyanmisaka Date: Sat, 18 Dec 2021 00:00:04 +0800 Subject: [PATCH 41/41] add async support for qsv vpp --- .../0032-add-async-support-for-qsv-vpp.patch | 2784 +++++++++++++++++ debian/patches/series | 1 + 2 files changed, 2785 insertions(+) create mode 100644 debian/patches/0032-add-async-support-for-qsv-vpp.patch diff --git a/debian/patches/0032-add-async-support-for-qsv-vpp.patch b/debian/patches/0032-add-async-support-for-qsv-vpp.patch new file mode 100644 index 00000000000..bd14e399eee --- /dev/null +++ b/debian/patches/0032-add-async-support-for-qsv-vpp.patch @@ -0,0 +1,2784 @@ +Index: jellyfin-ffmpeg/libavcodec/qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavcodec/qsv.c ++++ jellyfin-ffmpeg/libavcodec/qsv.c +@@ -191,7 +191,7 @@ int ff_qsv_print_warning(void *log_ctx, + const char *desc; + int ret; + ret = ff_qsv_map_error(err, &desc); +- av_log(log_ctx, AV_LOG_WARNING, "%s: %s (%d)\n", warning_string, desc, err); ++ av_log(log_ctx, AV_LOG_VERBOSE, "%s: %s (%d)\n", warning_string, desc, err); + return ret; + } + +Index: jellyfin-ffmpeg/libavfilter/Makefile +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/Makefile ++++ jellyfin-ffmpeg/libavfilter/Makefile +@@ -231,7 +231,7 @@ OBJS-$(CONFIG_DECONVOLVE_FILTER) + OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o + OBJS-$(CONFIG_DEFLATE_FILTER) += vf_neighbor.o + OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o +-OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_deinterlace_qsv.o ++OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_vpp_qsv.o + OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o + OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o + OBJS-$(CONFIG_DELOGO_FILTER) += vf_delogo.o +@@ -397,7 +397,7 @@ OBJS-$(CONFIG_SCALE_FILTER) + OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o scale_eval.o + OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o + OBJS-$(CONFIG_SCALE_OPENCL_FILTER) += vf_scale_opencl.o opencl.o opencl/scale.o scale_eval.o +-OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o ++OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_vpp_qsv.o + OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o + OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o vulkan.o + OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale_eval.o +Index: jellyfin-ffmpeg/libavfilter/qsvvpp.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.c ++++ jellyfin-ffmpeg/libavfilter/qsvvpp.c +@@ -36,38 +36,6 @@ + MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET)) + #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME) + #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY) +- +-typedef struct QSVFrame { +- AVFrame *frame; +- mfxFrameSurface1 *surface; +- mfxFrameSurface1 surface_internal; /* for system memory */ +- struct QSVFrame *next; +-} QSVFrame; +- +-/* abstract struct for all QSV filters */ +-struct QSVVPPContext { +- mfxSession session; +- int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */ +- enum AVPixelFormat out_sw_format; /* Real output format */ +- mfxVideoParam vpp_param; +- mfxFrameInfo *frame_infos; /* frame info for each input */ +- +- /* members related to the input/output surface */ +- int in_mem_mode; +- int out_mem_mode; +- QSVFrame *in_frame_list; +- QSVFrame *out_frame_list; +- int nb_surface_ptrs_in; +- int nb_surface_ptrs_out; +- mfxFrameSurface1 **surface_ptrs_in; +- mfxFrameSurface1 **surface_ptrs_out; +- +- /* MFXVPP extern parameters */ +- mfxExtOpaqueSurfaceAlloc opaque_alloc; +- mfxExtBuffer **ext_buffers; +- int nb_ext_buffers; +-}; +- + #define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) + + static const AVRational default_tb = { 1, 90000 }; +@@ -172,7 +140,7 @@ int ff_qsvvpp_print_warning(void *log_ct + const char *desc; + int ret; + ret = qsv_map_error(err, &desc); +- av_log(log_ctx, AV_LOG_WARNING, "%s: %s (%d)\n", warning_string, desc, err); ++ av_log(log_ctx, AV_LOG_VERBOSE, "%s: %s (%d)\n", warning_string, desc, err); + return ret; + } + +@@ -329,6 +297,14 @@ static int fill_frameinfo_by_link(mfxFra + frameinfo->CropH = link->h; + frameinfo->FrameRateExtN = link->frame_rate.num; + frameinfo->FrameRateExtD = link->frame_rate.den; ++ ++ /* Apparently VPP in the SDK requires the frame rate to be set to some value, otherwise ++ * init will fail */ ++ if (frameinfo->FrameRateExtD == 0 || frameinfo->FrameRateExtN == 0) { ++ frameinfo->FrameRateExtN = 25; ++ frameinfo->FrameRateExtD = 1; ++ } ++ + frameinfo->AspectRatioW = link->sample_aspect_ratio.num ? link->sample_aspect_ratio.num : 1; + frameinfo->AspectRatioH = link->sample_aspect_ratio.den ? link->sample_aspect_ratio.den : 1; + +@@ -338,9 +314,11 @@ static int fill_frameinfo_by_link(mfxFra + static void clear_unused_frames(QSVFrame *list) + { + while (list) { +- if (list->surface && !list->surface->Data.Locked) { +- list->surface = NULL; ++ /* list->queued==1 means the frame is not cached in VPP ++ * process any more, it can be released to pool. */ ++ if ((list->queued == 1) && !list->surface.Data.Locked) { + av_frame_free(&list->frame); ++ list->queued = 0; + } + list = list->next; + } +@@ -363,8 +341,10 @@ static QSVFrame *get_free_frame(QSVFrame + QSVFrame *out = *list; + + for (; out; out = out->next) { +- if (!out->surface) ++ if (!out->queued) { ++ out->queued = 1; + break; ++ } + } + + if (!out) { +@@ -373,8 +353,9 @@ static QSVFrame *get_free_frame(QSVFrame + av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n"); + return NULL; + } +- out->next = *list; +- *list = out; ++ out->queued = 1; ++ out->next = *list; ++ *list = out; + } + + return out; +@@ -404,7 +385,7 @@ static QSVFrame *submit_frame(QSVVPPCont + return NULL; + } + qsv_frame->frame = av_frame_clone(picref); +- qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3]; ++ qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3]; + } else { + /* make a copy if the input is not padded as libmfx requires */ + if (picref->height & 31 || picref->linesize[0] & 31) { +@@ -427,27 +408,26 @@ static QSVFrame *submit_frame(QSVVPPCont + qsv_frame->frame = av_frame_clone(picref); + + if (map_frame_to_surface(qsv_frame->frame, +- &qsv_frame->surface_internal) < 0) { ++ &qsv_frame->surface) < 0) { + av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n"); + return NULL; + } +- qsv_frame->surface = &qsv_frame->surface_internal; + } + +- qsv_frame->surface->Info = s->frame_infos[FF_INLINK_IDX(inlink)]; +- qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, ++ qsv_frame->surface.Info = s->frame_infos[FF_INLINK_IDX(inlink)]; ++ qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, + inlink->time_base, default_tb); + +- qsv_frame->surface->Info.PicStruct = ++ qsv_frame->surface.Info.PicStruct = + !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE : + (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF : + MFX_PICSTRUCT_FIELD_BFF); + if (qsv_frame->frame->repeat_pict == 1) +- qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; ++ qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; + else if (qsv_frame->frame->repeat_pict == 2) +- qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; ++ qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; + else if (qsv_frame->frame->repeat_pict == 4) +- qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; ++ qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; + + return qsv_frame; + } +@@ -478,7 +458,7 @@ static QSVFrame *query_frame(QSVVPPConte + return NULL; + } + +- out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3]; ++ out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3]; + } else { + /* Get a frame with aligned dimensions. + * Libmfx need system memory being 128x64 aligned */ +@@ -489,16 +469,14 @@ static QSVFrame *query_frame(QSVVPPConte + return NULL; + + ret = map_frame_to_surface(out_frame->frame, +- &out_frame->surface_internal); ++ &out_frame->surface); + if (ret < 0) + return NULL; +- +- out_frame->surface = &out_frame->surface_internal; + } + + out_frame->frame->width = outlink->w; + out_frame->frame->height = outlink->h; +- out_frame->surface->Info = s->vpp_param.vpp.Out; ++ out_frame->surface.Info = s->vpp_param.vpp.Out; + + return out_frame; + } +@@ -671,16 +649,23 @@ static int init_vpp_session(AVFilterCont + return 0; + } + +-int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param) ++static unsigned int qsv_fifo_item_size(void) ++{ ++ return sizeof(mfxSyncPoint) + sizeof(QSVFrame*); ++} ++ ++static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) ++{ ++ return av_fifo_size(fifo)/qsv_fifo_item_size(); ++} ++ ++int ff_qsvvpp_init(AVFilterContext *avctx, QSVVPPParam *param) + { + int i; + int ret; +- QSVVPPContext *s; +- +- s = av_mallocz(sizeof(*s)); +- if (!s) +- return AVERROR(ENOMEM); ++ QSVVPPContext *s = avctx->priv; + ++ s->last_in_pts = AV_NOPTS_VALUE; + s->filter_frame = param->filter_frame; + if (!s->filter_frame) + s->filter_frame = ff_filter_frame; +@@ -743,7 +728,16 @@ int ff_qsvvpp_create(AVFilterContext *av + s->vpp_param.ExtParam = param->ext_buf; + } + +- s->vpp_param.AsyncDepth = 1; ++ s->got_frame = 0; ++ ++ /** keep fifo size at least 1. Even when async_depth is 0, fifo is used. */ ++ s->async_fifo = av_fifo_alloc((s->async_depth + 1) * qsv_fifo_item_size()); ++ if (!s->async_fifo) { ++ ret = AVERROR(ENOMEM); ++ goto failed; ++ } ++ ++ s->vpp_param.AsyncDepth = s->async_depth; + + if (IS_SYSTEM_MEMORY(s->in_mem_mode)) + s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY; +@@ -770,27 +764,26 @@ int ff_qsvvpp_create(AVFilterContext *av + } else if (ret > 0) + ff_qsvvpp_print_warning(avctx, ret, "Warning When creating qsvvpp"); + +- *vpp = s; + return 0; + + failed: +- ff_qsvvpp_free(&s); ++ ff_qsvvpp_close(avctx); + + return ret; + } + +-int ff_qsvvpp_free(QSVVPPContext **vpp) ++int ff_qsvvpp_close(AVFilterContext *avctx) + { +- QSVVPPContext *s = *vpp; +- +- if (!s) +- return 0; ++ QSVVPPContext *s = avctx->priv; + + if (s->session) { + MFXVideoVPP_Close(s->session); + MFXClose(s->session); ++ s->session = NULL; + } + ++ s->last_in_pts = AV_NOPTS_VALUE; ++ + /* release all the resources */ + clear_frame_list(&s->in_frame_list); + clear_frame_list(&s->out_frame_list); +@@ -798,7 +791,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp) + av_freep(&s->surface_ptrs_out); + av_freep(&s->ext_buffers); + av_freep(&s->frame_infos); +- av_freep(vpp); ++ av_fifo_free(s->async_fifo); + + return 0; + } +@@ -808,8 +801,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + mfxSyncPoint sync; +- QSVFrame *in_frame, *out_frame; +- int ret, filter_ret; ++ QSVFrame *in_frame, *out_frame, *tmp; ++ int ret, ret1, filter_ret; ++ int64_t dpts = 0; ++ ++ while (s->eof && qsv_fifo_size(s->async_fifo)) { ++ av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); ++ av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); ++ if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) ++ av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); ++ ++ filter_ret = s->filter_frame(outlink, tmp->frame); ++ if (filter_ret < 0) { ++ av_frame_free(&tmp->frame); ++ ret = filter_ret; ++ break; ++ } ++ tmp->queued--; ++ s->got_frame = 1; ++ tmp->frame = NULL; ++ }; ++ ++ if (!picref) ++ return 0; + + in_frame = submit_frame(s, inlink, picref); + if (!in_frame) { +@@ -826,8 +840,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext + } + + do { +- ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface, +- out_frame->surface, NULL, &sync); ++ ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame->surface, ++ &out_frame->surface, NULL, &sync); + if (ret == MFX_WRN_DEVICE_BUSY) + av_usleep(500); + } while (ret == MFX_WRN_DEVICE_BUSY); +@@ -839,20 +853,63 @@ int ff_qsvvpp_filter_frame(QSVVPPContext + break; + } + +- if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) +- av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); ++ if (in_frame->frame->color_primaries != -1) ++ out_frame->frame->color_primaries = in_frame->frame->color_primaries; ++ if (in_frame->frame->color_trc != -1) ++ out_frame->frame->color_trc = in_frame->frame->color_trc; ++ if (in_frame->frame->colorspace != -1) ++ out_frame->frame->colorspace = in_frame->frame->colorspace; ++ if (in_frame->frame->color_range != -1) ++ out_frame->frame->color_range = in_frame->frame->color_range; + +- out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp, +- default_tb, outlink->time_base); ++ ret = av_frame_copy_side_data(out_frame->frame, in_frame->frame, 0); ++ if (ret < 0) ++ return ret; + +- filter_ret = s->filter_frame(outlink, out_frame->frame); +- if (filter_ret < 0) { +- av_frame_free(&out_frame->frame); +- ret = filter_ret; +- break; ++ /* TODO: calculate the PTS for other cases */ ++ if (s->deinterlace_enabled && ++ s->last_in_pts != AV_NOPTS_VALUE && ++ ret == MFX_ERR_MORE_SURFACE && ++ out_frame->surface.Data.TimeStamp == MFX_TIMESTAMP_UNKNOWN) ++ dpts = (in_frame->frame->pts - s->last_in_pts) / 2; ++ else ++ dpts = 0; ++ ++ out_frame->frame->pts = av_rescale_q(in_frame->frame->pts - dpts, ++ inlink->time_base, ++ outlink->time_base); ++ ++ out_frame->queued++; ++ av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame), NULL); ++ av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL); ++ ++ if (qsv_fifo_size(s->async_fifo) > s->async_depth) { ++ av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); ++ av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); ++ ++ do { ++ ret1 = MFXVideoCORE_SyncOperation(s->session, sync, 1000); ++ } while (ret1 == MFX_WRN_IN_EXECUTION); ++ ++ if (ret1 < 0) { ++ ret = ret1; ++ break; ++ } ++ ++ filter_ret = s->filter_frame(outlink, tmp->frame); ++ if (filter_ret < 0) { ++ av_frame_free(&tmp->frame); ++ ret = filter_ret; ++ break; ++ } ++ ++ tmp->queued--; ++ s->got_frame = 1; ++ tmp->frame = NULL; + } +- out_frame->frame = NULL; +- } while(ret == MFX_ERR_MORE_SURFACE); ++ } while (ret == MFX_ERR_MORE_SURFACE); ++ ++ s->last_in_pts = in_frame->frame->pts; + + return ret; + } +Index: jellyfin-ffmpeg/libavfilter/qsvvpp.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/qsvvpp.h ++++ jellyfin-ffmpeg/libavfilter/qsvvpp.h +@@ -27,6 +27,7 @@ + #include + + #include "avfilter.h" ++#include "libavutil/fifo.h" + + #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst->input_pads)) + #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads)) +@@ -39,7 +40,46 @@ + ((MFX_VERSION.Major > (MAJOR)) || \ + (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))) + +-typedef struct QSVVPPContext QSVVPPContext; ++typedef struct QSVFrame { ++ AVFrame *frame; ++ mfxFrameSurface1 surface; ++ struct QSVFrame *next; ++ int queued; ++} QSVFrame; ++ ++typedef struct QSVVPPContext { ++ const AVClass *class; ++ ++ mfxSession session; ++ int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**< callback */ ++ enum AVPixelFormat out_sw_format; /**< Real output format */ ++ mfxVideoParam vpp_param; ++ mfxFrameInfo *frame_infos; /**< frame info for each input */ ++ ++ /** members related to the input/output surface */ ++ int in_mem_mode; ++ int out_mem_mode; ++ QSVFrame *in_frame_list; ++ QSVFrame *out_frame_list; ++ int nb_surface_ptrs_in; ++ int nb_surface_ptrs_out; ++ mfxFrameSurface1 **surface_ptrs_in; ++ mfxFrameSurface1 **surface_ptrs_out; ++ ++ /** MFXVPP extern parameters */ ++ mfxExtOpaqueSurfaceAlloc opaque_alloc; ++ mfxExtBuffer **ext_buffers; ++ int nb_ext_buffers; ++ ++ int got_frame; ++ int async_depth; ++ int eof; ++ int deinterlace_enabled; ++ /** order with frame_out, sync */ ++ AVFifoBuffer *async_fifo; ++ ++ int64_t last_in_pts; ++} QSVVPPContext; + + typedef struct QSVVPPCrop { + int in_idx; ///< Input index +@@ -63,10 +103,10 @@ typedef struct QSVVPPParam { + } QSVVPPParam; + + /* create and initialize the QSV session */ +-int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param); ++int ff_qsvvpp_init(AVFilterContext *avctx, QSVVPPParam *param); + + /* release the resources (eg.surfaces) */ +-int ff_qsvvpp_free(QSVVPPContext **vpp); ++int ff_qsvvpp_close(AVFilterContext *avctx); + + /* vpp filter frame and call the cb if needed */ + int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame); +Index: jellyfin-ffmpeg/libavfilter/vf_deinterlace_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_deinterlace_qsv.c ++++ /dev/null +@@ -1,625 +0,0 @@ +-/* +- * This file is part of FFmpeg. +- * +- * FFmpeg is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. +- * +- * FFmpeg is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with FFmpeg; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- */ +- +-/** +- * @file +- * deinterlace video filter - QSV +- */ +- +-#include +- +-#include +-#include +- +-#include "libavutil/avstring.h" +-#include "libavutil/common.h" +-#include "libavutil/hwcontext.h" +-#include "libavutil/hwcontext_qsv.h" +-#include "libavutil/internal.h" +-#include "libavutil/mathematics.h" +-#include "libavutil/opt.h" +-#include "libavutil/pixdesc.h" +-#include "libavutil/time.h" +-#include "libavfilter/qsvvpp.h" +- +-#include "avfilter.h" +-#include "formats.h" +-#include "internal.h" +-#include "video.h" +- +-#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) +- +-enum { +- QSVDEINT_MORE_OUTPUT = 1, +- QSVDEINT_MORE_INPUT, +-}; +- +-typedef struct QSVFrame { +- AVFrame *frame; +- mfxFrameSurface1 surface; +- int used; +- +- struct QSVFrame *next; +-} QSVFrame; +- +-typedef struct QSVDeintContext { +- const AVClass *class; +- +- AVBufferRef *hw_frames_ctx; +- /* a clone of the main session, used internally for deinterlacing */ +- mfxSession session; +- +- mfxMemId *mem_ids; +- int nb_mem_ids; +- +- mfxFrameSurface1 **surface_ptrs; +- int nb_surface_ptrs; +- +- mfxExtOpaqueSurfaceAlloc opaque_alloc; +- mfxExtVPPDeinterlacing deint_conf; +- mfxExtBuffer *ext_buffers[2]; +- int num_ext_buffers; +- +- QSVFrame *work_frames; +- +- int64_t last_pts; +- +- int eof; +- +- /* option for Deinterlacing algorithm to be used */ +- int mode; +-} QSVDeintContext; +- +-static av_cold void qsvdeint_uninit(AVFilterContext *ctx) +-{ +- QSVDeintContext *s = ctx->priv; +- QSVFrame *cur; +- +- if (s->session) { +- MFXClose(s->session); +- s->session = NULL; +- } +- av_buffer_unref(&s->hw_frames_ctx); +- +- cur = s->work_frames; +- while (cur) { +- s->work_frames = cur->next; +- av_frame_free(&cur->frame); +- av_freep(&cur); +- cur = s->work_frames; +- } +- +- av_freep(&s->mem_ids); +- s->nb_mem_ids = 0; +- +- av_freep(&s->surface_ptrs); +- s->nb_surface_ptrs = 0; +-} +- +-static int qsvdeint_query_formats(AVFilterContext *ctx) +-{ +- static const enum AVPixelFormat pixel_formats[] = { +- AV_PIX_FMT_QSV, AV_PIX_FMT_NONE, +- }; +- AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); +- int ret; +- +- if ((ret = ff_set_common_formats(ctx, pix_fmts)) < 0) +- return ret; +- +- return 0; +-} +- +-static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req, +- mfxFrameAllocResponse *resp) +-{ +- AVFilterContext *ctx = pthis; +- QSVDeintContext *s = ctx->priv; +- +- if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) || +- !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) || +- !(req->Type & MFX_MEMTYPE_EXTERNAL_FRAME)) +- return MFX_ERR_UNSUPPORTED; +- +- resp->mids = s->mem_ids; +- resp->NumFrameActual = s->nb_mem_ids; +- +- return MFX_ERR_NONE; +-} +- +-static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp) +-{ +- return MFX_ERR_NONE; +-} +- +-static mfxStatus frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) +-{ +- return MFX_ERR_UNSUPPORTED; +-} +- +-static mfxStatus frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) +-{ +- return MFX_ERR_UNSUPPORTED; +-} +- +-static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) +-{ +- mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; +- mfxHDLPair *pair_src = (mfxHDLPair*)mid; +- +- pair_dst->first = pair_src->first; +- +- if (pair_src->second != (mfxMemId)MFX_INFINITE) +- pair_dst->second = pair_src->second; +- return MFX_ERR_NONE; +-} +- +-static int init_out_session(AVFilterContext *ctx) +-{ +- +- QSVDeintContext *s = ctx->priv; +- AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data; +- AVQSVFramesContext *hw_frames_hwctx = hw_frames_ctx->hwctx; +- AVQSVDeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; +- +- int opaque = !!(hw_frames_hwctx->frame_type & MFX_MEMTYPE_OPAQUE_FRAME); +- +- mfxHDL handle = NULL; +- mfxHandleType handle_type; +- mfxVersion ver; +- mfxIMPL impl; +- mfxVideoParam par; +- mfxStatus err; +- int i; +- +- /* extract the properties of the "master" session given to us */ +- err = MFXQueryIMPL(device_hwctx->session, &impl); +- if (err == MFX_ERR_NONE) +- err = MFXQueryVersion(device_hwctx->session, &ver); +- if (err != MFX_ERR_NONE) { +- av_log(ctx, AV_LOG_ERROR, "Error querying the session attributes\n"); +- return AVERROR_UNKNOWN; +- } +- +- if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_VA_DISPLAY; +- } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_D3D11_DEVICE; +- } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; +- } else { +- av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n"); +- return AVERROR_UNKNOWN; +- } +- +- err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle); +- if (err < 0) +- return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle"); +- else if (err > 0) { +- ff_qsvvpp_print_warning(ctx, err, "Warning in getting the session handle"); +- return AVERROR_UNKNOWN; +- } +- +- /* create a "slave" session with those same properties, to be used for +- * actual deinterlacing */ +- err = MFXInit(impl, &ver, &s->session); +- if (err < 0) +- return ff_qsvvpp_print_error(ctx, err, "Error initializing a session for deinterlacing"); +- else if (err > 0) { +- ff_qsvvpp_print_warning(ctx, err, "Warning in session initialization"); +- return AVERROR_UNKNOWN; +- } +- +- if (handle) { +- err = MFXVideoCORE_SetHandle(s->session, handle_type, handle); +- if (err != MFX_ERR_NONE) +- return AVERROR_UNKNOWN; +- } +- +- if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) { +- err = MFXJoinSession(device_hwctx->session, s->session); +- if (err != MFX_ERR_NONE) +- return AVERROR_UNKNOWN; +- } +- +- memset(&par, 0, sizeof(par)); +- +- s->deint_conf.Header.BufferId = MFX_EXTBUFF_VPP_DEINTERLACING; +- s->deint_conf.Header.BufferSz = sizeof(s->deint_conf); +- s->deint_conf.Mode = s->mode; +- +- s->ext_buffers[s->num_ext_buffers++] = (mfxExtBuffer *)&s->deint_conf; +- +- if (opaque) { +- s->surface_ptrs = av_mallocz_array(hw_frames_hwctx->nb_surfaces, +- sizeof(*s->surface_ptrs)); +- if (!s->surface_ptrs) +- return AVERROR(ENOMEM); +- for (i = 0; i < hw_frames_hwctx->nb_surfaces; i++) +- s->surface_ptrs[i] = hw_frames_hwctx->surfaces + i; +- s->nb_surface_ptrs = hw_frames_hwctx->nb_surfaces; +- +- s->opaque_alloc.In.Surfaces = s->surface_ptrs; +- s->opaque_alloc.In.NumSurface = s->nb_surface_ptrs; +- s->opaque_alloc.In.Type = hw_frames_hwctx->frame_type; +- +- s->opaque_alloc.Out = s->opaque_alloc.In; +- +- s->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; +- s->opaque_alloc.Header.BufferSz = sizeof(s->opaque_alloc); +- +- s->ext_buffers[s->num_ext_buffers++] = (mfxExtBuffer *)&s->opaque_alloc; +- +- par.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY; +- } else { +- mfxFrameAllocator frame_allocator = { +- .pthis = ctx, +- .Alloc = frame_alloc, +- .Lock = frame_lock, +- .Unlock = frame_unlock, +- .GetHDL = frame_get_hdl, +- .Free = frame_free, +- }; +- +- s->mem_ids = av_mallocz_array(hw_frames_hwctx->nb_surfaces, +- sizeof(*s->mem_ids)); +- if (!s->mem_ids) +- return AVERROR(ENOMEM); +- for (i = 0; i < hw_frames_hwctx->nb_surfaces; i++) +- s->mem_ids[i] = hw_frames_hwctx->surfaces[i].Data.MemId; +- s->nb_mem_ids = hw_frames_hwctx->nb_surfaces; +- +- err = MFXVideoCORE_SetFrameAllocator(s->session, &frame_allocator); +- if (err != MFX_ERR_NONE) +- return AVERROR_UNKNOWN; +- +- par.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; +- } +- +- par.ExtParam = s->ext_buffers; +- par.NumExtParam = s->num_ext_buffers; +- +- par.AsyncDepth = 1; // TODO async +- +- par.vpp.In = hw_frames_hwctx->surfaces[0].Info; +- +- par.vpp.In.CropW = ctx->inputs[0]->w; +- par.vpp.In.CropH = ctx->inputs[0]->h; +- +- if (ctx->inputs[0]->frame_rate.num) { +- par.vpp.In.FrameRateExtN = ctx->inputs[0]->frame_rate.num; +- par.vpp.In.FrameRateExtD = ctx->inputs[0]->frame_rate.den; +- } else { +- par.vpp.In.FrameRateExtN = ctx->inputs[0]->time_base.num; +- par.vpp.In.FrameRateExtD = ctx->inputs[0]->time_base.den; +- } +- +- par.vpp.Out = par.vpp.In; +- +- if (ctx->outputs[0]->frame_rate.num) { +- par.vpp.Out.FrameRateExtN = ctx->outputs[0]->frame_rate.num; +- par.vpp.Out.FrameRateExtD = ctx->outputs[0]->frame_rate.den; +- } else { +- par.vpp.Out.FrameRateExtN = ctx->outputs[0]->time_base.num; +- par.vpp.Out.FrameRateExtD = ctx->outputs[0]->time_base.den; +- } +- +- /* Print input memory mode */ +- ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0x0F, "VPP"); +- /* Print output memory mode */ +- ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0xF0, "VPP"); +- err = MFXVideoVPP_Init(s->session, &par); +- if (err < 0) +- return ff_qsvvpp_print_error(ctx, err, +- "Error opening the VPP for deinterlacing"); +- else if (err > 0) { +- ff_qsvvpp_print_warning(ctx, err, +- "Warning in VPP initialization"); +- return AVERROR_UNKNOWN; +- } +- +- return 0; +-} +- +-static int qsvdeint_config_props(AVFilterLink *outlink) +-{ +- AVFilterContext *ctx = outlink->src; +- AVFilterLink *inlink = ctx->inputs[0]; +- QSVDeintContext *s = ctx->priv; +- int ret; +- +- qsvdeint_uninit(ctx); +- +- s->last_pts = AV_NOPTS_VALUE; +- outlink->frame_rate = av_mul_q(inlink->frame_rate, +- (AVRational){ 2, 1 }); +- outlink->time_base = av_mul_q(inlink->time_base, +- (AVRational){ 1, 2 }); +- +- /* check that we have a hw context */ +- if (!inlink->hw_frames_ctx) { +- av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); +- return AVERROR(EINVAL); +- } +- +- s->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); +- if (!s->hw_frames_ctx) +- return AVERROR(ENOMEM); +- +- av_buffer_unref(&outlink->hw_frames_ctx); +- outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); +- if (!outlink->hw_frames_ctx) { +- qsvdeint_uninit(ctx); +- return AVERROR(ENOMEM); +- } +- +- ret = init_out_session(ctx); +- if (ret < 0) +- return ret; +- +- +- return 0; +-} +- +-static void clear_unused_frames(QSVDeintContext *s) +-{ +- QSVFrame *cur = s->work_frames; +- while (cur) { +- if (!cur->surface.Data.Locked) { +- av_frame_free(&cur->frame); +- cur->used = 0; +- } +- cur = cur->next; +- } +-} +- +-static int get_free_frame(QSVDeintContext *s, QSVFrame **f) +-{ +- QSVFrame *frame, **last; +- +- clear_unused_frames(s); +- +- frame = s->work_frames; +- last = &s->work_frames; +- while (frame) { +- if (!frame->used) { +- *f = frame; +- return 0; +- } +- +- last = &frame->next; +- frame = frame->next; +- } +- +- frame = av_mallocz(sizeof(*frame)); +- if (!frame) +- return AVERROR(ENOMEM); +- *last = frame; +- *f = frame; +- +- return 0; +-} +- +-static int submit_frame(AVFilterContext *ctx, AVFrame *frame, +- mfxFrameSurface1 **surface) +-{ +- QSVDeintContext *s = ctx->priv; +- QSVFrame *qf; +- int ret; +- +- ret = get_free_frame(s, &qf); +- if (ret < 0) +- return ret; +- +- qf->frame = frame; +- +- qf->surface = *(mfxFrameSurface1*)qf->frame->data[3]; +- +- qf->surface.Data.Locked = 0; +- qf->surface.Info.CropW = qf->frame->width; +- qf->surface.Info.CropH = qf->frame->height; +- +- qf->surface.Info.PicStruct = !qf->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE : +- (qf->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF : +- MFX_PICSTRUCT_FIELD_BFF); +- if (qf->frame->repeat_pict == 1) { +- qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; +- qf->surface.Info.PicStruct |= qf->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF : +- MFX_PICSTRUCT_FIELD_BFF; +- } else if (qf->frame->repeat_pict == 2) +- qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; +- else if (qf->frame->repeat_pict == 4) +- qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; +- +- if (ctx->inputs[0]->frame_rate.num) { +- qf->surface.Info.FrameRateExtN = ctx->inputs[0]->frame_rate.num; +- qf->surface.Info.FrameRateExtD = ctx->inputs[0]->frame_rate.den; +- } else { +- qf->surface.Info.FrameRateExtN = ctx->inputs[0]->time_base.num; +- qf->surface.Info.FrameRateExtD = ctx->inputs[0]->time_base.den; +- } +- +- qf->surface.Data.TimeStamp = av_rescale_q(qf->frame->pts, +- ctx->inputs[0]->time_base, +- (AVRational){1, 90000}); +- +- *surface = &qf->surface; +- qf->used = 1; +- +- return 0; +-} +- +-static int process_frame(AVFilterContext *ctx, const AVFrame *in, +- mfxFrameSurface1 *surf_in) +-{ +- QSVDeintContext *s = ctx->priv; +- AVFilterLink *inlink = ctx->inputs[0]; +- AVFilterLink *outlink = ctx->outputs[0]; +- +- AVFrame *out; +- mfxFrameSurface1 *surf_out; +- mfxSyncPoint sync = NULL; +- mfxStatus err; +- int ret, again = 0; +- +- out = ff_get_video_buffer(outlink, outlink->w, outlink->h); +- if (!out) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } +- +- surf_out = (mfxFrameSurface1*)out->data[3]; +- surf_out->Info.CropW = outlink->w; +- surf_out->Info.CropH = outlink->h; +- surf_out->Info.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; +- +- do { +- err = MFXVideoVPP_RunFrameVPPAsync(s->session, surf_in, surf_out, +- NULL, &sync); +- if (err == MFX_WRN_DEVICE_BUSY) +- av_usleep(1); +- } while (err == MFX_WRN_DEVICE_BUSY); +- +- if (err == MFX_ERR_MORE_DATA) { +- av_frame_free(&out); +- return QSVDEINT_MORE_INPUT; +- } +- +- if (err < 0 && err != MFX_ERR_MORE_SURFACE) { +- ret = ff_qsvvpp_print_error(ctx, err, "Error during deinterlacing"); +- goto fail; +- } +- +- if (!sync) { +- av_log(ctx, AV_LOG_ERROR, "No sync during deinterlacing\n"); +- ret = AVERROR_UNKNOWN; +- goto fail; +- } +- if (err == MFX_ERR_MORE_SURFACE) +- again = 1; +- +- do { +- err = MFXVideoCORE_SyncOperation(s->session, sync, 1000); +- } while (err == MFX_WRN_IN_EXECUTION); +- if (err < 0) { +- ret = ff_qsvvpp_print_error(ctx, err, "Error synchronizing the operation"); +- goto fail; +- } +- +- ret = av_frame_copy_props(out, in); +- if (ret < 0) +- goto fail; +- +- out->width = outlink->w; +- out->height = outlink->h; +- out->interlaced_frame = 0; +- +- out->pts = av_rescale_q(out->pts, inlink->time_base, outlink->time_base); +- if (out->pts == s->last_pts) +- out->pts++; +- s->last_pts = out->pts; +- +- ret = ff_filter_frame(outlink, out); +- if (ret < 0) +- return ret; +- +- return again ? QSVDEINT_MORE_OUTPUT : 0; +-fail: +- av_frame_free(&out); +- return ret; +-} +- +-static int qsvdeint_filter_frame(AVFilterLink *link, AVFrame *in) +-{ +- AVFilterContext *ctx = link->dst; +- +- mfxFrameSurface1 *surf_in; +- int ret; +- +- ret = submit_frame(ctx, in, &surf_in); +- if (ret < 0) { +- av_frame_free(&in); +- return ret; +- } +- +- do { +- ret = process_frame(ctx, in, surf_in); +- if (ret < 0) +- return ret; +- } while (ret == QSVDEINT_MORE_OUTPUT); +- +- return 0; +-} +- +-static int qsvdeint_request_frame(AVFilterLink *outlink) +-{ +- AVFilterContext *ctx = outlink->src; +- +- return ff_request_frame(ctx->inputs[0]); +-} +- +-#define OFFSET(x) offsetof(QSVDeintContext, x) +-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM +-static const AVOption options[] = { +- { "mode", "set deinterlace mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"}, +- { "bob", "bob algorithm", 0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_BOB}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"}, +- { "advanced", "Motion adaptive algorithm", 0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"}, +- { NULL }, +-}; +- +-static const AVClass qsvdeint_class = { +- .class_name = "deinterlace_qsv", +- .item_name = av_default_item_name, +- .option = options, +- .version = LIBAVUTIL_VERSION_INT, +-}; +- +-static const AVFilterPad qsvdeint_inputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .filter_frame = qsvdeint_filter_frame, +- }, +- { NULL } +-}; +- +-static const AVFilterPad qsvdeint_outputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .config_props = qsvdeint_config_props, +- .request_frame = qsvdeint_request_frame, +- }, +- { NULL } +-}; +- +-AVFilter ff_vf_deinterlace_qsv = { +- .name = "deinterlace_qsv", +- .description = NULL_IF_CONFIG_SMALL("QuickSync video deinterlacing"), +- +- .uninit = qsvdeint_uninit, +- .query_formats = qsvdeint_query_formats, +- +- .priv_size = sizeof(QSVDeintContext), +- .priv_class = &qsvdeint_class, +- +- .inputs = qsvdeint_inputs, +- .outputs = qsvdeint_outputs, +- +- .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +-}; +Index: jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_overlay_qsv.c ++++ jellyfin-ffmpeg/libavfilter/vf_overlay_qsv.c +@@ -58,10 +58,9 @@ enum var_name { + }; + + typedef struct QSVOverlayContext { +- const AVClass *class; ++ QSVVPPContext qsv; + + FFFrameSync fs; +- QSVVPPContext *qsv; + QSVVPPParam qsv_param; + mfxExtVPPComposite comp_conf; + double var_values[VAR_VARS_NB]; +@@ -231,7 +230,7 @@ static int config_overlay_input(AVFilter + static int process_frame(FFFrameSync *fs) + { + AVFilterContext *ctx = fs->parent; +- QSVOverlayContext *s = fs->opaque; ++ QSVVPPContext *qsv = fs->opaque; + AVFilterLink *in0 = ctx->inputs[0]; + AVFilterLink *in1 = ctx->inputs[1]; + AVFrame *main = NULL; +@@ -249,14 +248,17 @@ static int process_frame(FFFrameSync *fs + return AVERROR_BUG; + + /* composite main frame */ +- ret = ff_qsvvpp_filter_frame(s->qsv, in0, main); ++ ret = ff_qsvvpp_filter_frame(qsv, in0, main); + if (ret < 0 && ret != AVERROR(EAGAIN)) + return ret; + ++ /* remove all side data of the overlay frame*/ ++ if (overlay) ++ av_frame_remove_all_side_data(overlay); ++ + /* composite overlay frame */ + /* or overwrite main frame again if the overlay frame isn't ready yet */ +- ret = ff_qsvvpp_filter_frame(s->qsv, overlay ? in1 : in0, overlay ? overlay : main); +- return ret; ++ return ff_qsvvpp_filter_frame(qsv, overlay ? in1 : in0, overlay ? overlay : main); + } + + static int init_framesync(AVFilterContext *ctx) +@@ -300,7 +302,7 @@ static int config_output(AVFilterLink *o + if (ret < 0) + return ret; + +- return ff_qsvvpp_create(ctx, &vpp->qsv, &vpp->qsv_param); ++ return ff_qsvvpp_init(ctx, &vpp->qsv_param); + } + + /* +@@ -349,7 +351,7 @@ static av_cold void overlay_qsv_uninit(A + { + QSVOverlayContext *vpp = ctx->priv; + +- ff_qsvvpp_free(&vpp->qsv); ++ ff_qsvvpp_close(ctx); + ff_framesync_uninit(&vpp->fs); + av_freep(&vpp->comp_conf.InputStream); + av_freep(&vpp->qsv_param.ext_buf); +Index: jellyfin-ffmpeg/libavfilter/vf_scale_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_scale_qsv.c ++++ /dev/null +@@ -1,693 +0,0 @@ +-/* +- * This file is part of FFmpeg. +- * +- * FFmpeg is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. +- * +- * FFmpeg is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with FFmpeg; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- */ +- +-/** +- * @file +- * scale video filter - QSV +- */ +- +-#include +- +-#include +-#include +- +-#include "libavutil/avstring.h" +-#include "libavutil/common.h" +-#include "libavutil/eval.h" +-#include "libavutil/hwcontext.h" +-#include "libavutil/hwcontext_qsv.h" +-#include "libavutil/internal.h" +-#include "libavutil/mathematics.h" +-#include "libavutil/opt.h" +-#include "libavutil/pixdesc.h" +-#include "libavutil/time.h" +-#include "libavfilter/qsvvpp.h" +- +-#include "avfilter.h" +-#include "formats.h" +-#include "internal.h" +-#include "video.h" +- +-static const char *const var_names[] = { +- "PI", +- "PHI", +- "E", +- "in_w", "iw", +- "in_h", "ih", +- "out_w", "ow", +- "out_h", "oh", +- "a", "dar", +- "sar", +- NULL +-}; +- +-enum var_name { +- VAR_PI, +- VAR_PHI, +- VAR_E, +- VAR_IN_W, VAR_IW, +- VAR_IN_H, VAR_IH, +- VAR_OUT_W, VAR_OW, +- VAR_OUT_H, VAR_OH, +- VAR_A, VAR_DAR, +- VAR_SAR, +- VARS_NB +-}; +- +-#define QSV_HAVE_SCALING_CONFIG QSV_VERSION_ATLEAST(1, 19) +-#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) +- +-typedef struct QSVScaleContext { +- const AVClass *class; +- +- /* a clone of the main session, used internally for scaling */ +- mfxSession session; +- +- mfxMemId *mem_ids_in; +- int nb_mem_ids_in; +- +- mfxMemId *mem_ids_out; +- int nb_mem_ids_out; +- +- mfxFrameSurface1 **surface_ptrs_in; +- int nb_surface_ptrs_in; +- +- mfxFrameSurface1 **surface_ptrs_out; +- int nb_surface_ptrs_out; +- +- mfxExtOpaqueSurfaceAlloc opaque_alloc; +- +-#if QSV_HAVE_SCALING_CONFIG +- mfxExtVPPScaling scale_conf; +-#endif +- int mode; +- +- mfxExtBuffer *ext_buffers[1 + QSV_HAVE_SCALING_CONFIG]; +- int num_ext_buf; +- +- int shift_width, shift_height; +- +- /** +- * New dimensions. Special values are: +- * 0 = original width/height +- * -1 = keep original aspect +- */ +- int w, h; +- +- /** +- * Output sw format. AV_PIX_FMT_NONE for no conversion. +- */ +- enum AVPixelFormat format; +- +- char *w_expr; ///< width expression string +- char *h_expr; ///< height expression string +- char *format_str; +-} QSVScaleContext; +- +-static av_cold int qsvscale_init(AVFilterContext *ctx) +-{ +- QSVScaleContext *s = ctx->priv; +- +- if (!strcmp(s->format_str, "same")) { +- s->format = AV_PIX_FMT_NONE; +- } else { +- s->format = av_get_pix_fmt(s->format_str); +- if (s->format == AV_PIX_FMT_NONE) { +- av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); +- return AVERROR(EINVAL); +- } +- } +- +- return 0; +-} +- +-static av_cold void qsvscale_uninit(AVFilterContext *ctx) +-{ +- QSVScaleContext *s = ctx->priv; +- +- if (s->session) { +- MFXClose(s->session); +- s->session = NULL; +- } +- +- av_freep(&s->mem_ids_in); +- av_freep(&s->mem_ids_out); +- s->nb_mem_ids_in = 0; +- s->nb_mem_ids_out = 0; +- +- av_freep(&s->surface_ptrs_in); +- av_freep(&s->surface_ptrs_out); +- s->nb_surface_ptrs_in = 0; +- s->nb_surface_ptrs_out = 0; +-} +- +-static int qsvscale_query_formats(AVFilterContext *ctx) +-{ +- static const enum AVPixelFormat pixel_formats[] = { +- AV_PIX_FMT_QSV, AV_PIX_FMT_NONE, +- }; +- AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); +- int ret; +- +- if ((ret = ff_set_common_formats(ctx, pix_fmts)) < 0) +- return ret; +- +- return 0; +-} +- +-static int init_out_pool(AVFilterContext *ctx, +- int out_width, int out_height) +-{ +- QSVScaleContext *s = ctx->priv; +- AVFilterLink *outlink = ctx->outputs[0]; +- +- AVHWFramesContext *in_frames_ctx; +- AVHWFramesContext *out_frames_ctx; +- AVQSVFramesContext *in_frames_hwctx; +- AVQSVFramesContext *out_frames_hwctx; +- enum AVPixelFormat in_format; +- enum AVPixelFormat out_format; +- int i, ret; +- +- /* check that we have a hw context */ +- if (!ctx->inputs[0]->hw_frames_ctx) { +- av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); +- return AVERROR(EINVAL); +- } +- in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; +- in_frames_hwctx = in_frames_ctx->hwctx; +- +- in_format = in_frames_ctx->sw_format; +- out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format; +- +- outlink->hw_frames_ctx = av_hwframe_ctx_alloc(in_frames_ctx->device_ref); +- if (!outlink->hw_frames_ctx) +- return AVERROR(ENOMEM); +- out_frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data; +- out_frames_hwctx = out_frames_ctx->hwctx; +- +- out_frames_ctx->format = AV_PIX_FMT_QSV; +- out_frames_ctx->width = FFALIGN(out_width, 16); +- out_frames_ctx->height = FFALIGN(out_height, 16); +- out_frames_ctx->sw_format = out_format; +- out_frames_ctx->initial_pool_size = 4; +- +- out_frames_hwctx->frame_type = in_frames_hwctx->frame_type | MFX_MEMTYPE_FROM_VPPOUT; +- +- ret = ff_filter_init_hw_frames(ctx, outlink, 32); +- if (ret < 0) +- return ret; +- +- ret = av_hwframe_ctx_init(outlink->hw_frames_ctx); +- if (ret < 0) +- return ret; +- +- for (i = 0; i < out_frames_hwctx->nb_surfaces; i++) { +- mfxFrameInfo *info = &out_frames_hwctx->surfaces[i].Info; +- info->CropW = out_width; +- info->CropH = out_height; +- } +- +- return 0; +-} +- +-static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req, +- mfxFrameAllocResponse *resp) +-{ +- AVFilterContext *ctx = pthis; +- QSVScaleContext *s = ctx->priv; +- +- if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) || +- !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) || +- !(req->Type & MFX_MEMTYPE_EXTERNAL_FRAME)) +- return MFX_ERR_UNSUPPORTED; +- +- if (req->Type & MFX_MEMTYPE_FROM_VPPIN) { +- resp->mids = s->mem_ids_in; +- resp->NumFrameActual = s->nb_mem_ids_in; +- } else { +- resp->mids = s->mem_ids_out; +- resp->NumFrameActual = s->nb_mem_ids_out; +- } +- +- return MFX_ERR_NONE; +-} +- +-static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp) +-{ +- return MFX_ERR_NONE; +-} +- +-static mfxStatus frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) +-{ +- return MFX_ERR_UNSUPPORTED; +-} +- +-static mfxStatus frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) +-{ +- return MFX_ERR_UNSUPPORTED; +-} +- +-static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) +-{ +- mfxHDLPair *pair_dst = (mfxHDLPair*)hdl; +- mfxHDLPair *pair_src = (mfxHDLPair*)mid; +- +- pair_dst->first = pair_src->first; +- +- if (pair_src->second != (mfxMemId)MFX_INFINITE) +- pair_dst->second = pair_src->second; +- return MFX_ERR_NONE; +-} +- +-static int init_out_session(AVFilterContext *ctx, int in_width, int in_height) +-{ +- +- QSVScaleContext *s = ctx->priv; +- AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; +- AVHWFramesContext *out_frames_ctx = (AVHWFramesContext*)ctx->outputs[0]->hw_frames_ctx->data; +- AVQSVFramesContext *in_frames_hwctx = in_frames_ctx->hwctx; +- AVQSVFramesContext *out_frames_hwctx = out_frames_ctx->hwctx; +- AVQSVDeviceContext *device_hwctx = in_frames_ctx->device_ctx->hwctx; +- +- int opaque = !!(in_frames_hwctx->frame_type & MFX_MEMTYPE_OPAQUE_FRAME); +- +- mfxHDL handle = NULL; +- mfxHandleType handle_type; +- mfxVersion ver; +- mfxIMPL impl; +- mfxVideoParam par; +- mfxStatus err; +- int i; +- +- s->num_ext_buf = 0; +- +- /* extract the properties of the "master" session given to us */ +- err = MFXQueryIMPL(device_hwctx->session, &impl); +- if (err == MFX_ERR_NONE) +- err = MFXQueryVersion(device_hwctx->session, &ver); +- if (err != MFX_ERR_NONE) { +- av_log(ctx, AV_LOG_ERROR, "Error querying the session attributes\n"); +- return AVERROR_UNKNOWN; +- } +- +- if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_VA_DISPLAY; +- } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_D3D11_DEVICE; +- } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) { +- handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER; +- } else { +- av_log(ctx, AV_LOG_ERROR, "Error unsupported handle type\n"); +- return AVERROR_UNKNOWN; +- } +- +- err = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle); +- if (err < 0) +- return ff_qsvvpp_print_error(ctx, err, "Error getting the session handle"); +- else if (err > 0) { +- ff_qsvvpp_print_warning(ctx, err, "Warning in getting the session handle"); +- return AVERROR_UNKNOWN; +- } +- +- /* create a "slave" session with those same properties, to be used for +- * actual scaling */ +- err = MFXInit(impl, &ver, &s->session); +- if (err != MFX_ERR_NONE) { +- av_log(ctx, AV_LOG_ERROR, "Error initializing a session for scaling\n"); +- return AVERROR_UNKNOWN; +- } +- +- if (handle) { +- err = MFXVideoCORE_SetHandle(s->session, handle_type, handle); +- if (err != MFX_ERR_NONE) +- return AVERROR_UNKNOWN; +- } +- +- if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) { +- err = MFXJoinSession(device_hwctx->session, s->session); +- if (err != MFX_ERR_NONE) +- return AVERROR_UNKNOWN; +- } +- +- memset(&par, 0, sizeof(par)); +- +- if (opaque) { +- s->surface_ptrs_in = av_mallocz_array(in_frames_hwctx->nb_surfaces, +- sizeof(*s->surface_ptrs_in)); +- if (!s->surface_ptrs_in) +- return AVERROR(ENOMEM); +- for (i = 0; i < in_frames_hwctx->nb_surfaces; i++) +- s->surface_ptrs_in[i] = in_frames_hwctx->surfaces + i; +- s->nb_surface_ptrs_in = in_frames_hwctx->nb_surfaces; +- +- s->surface_ptrs_out = av_mallocz_array(out_frames_hwctx->nb_surfaces, +- sizeof(*s->surface_ptrs_out)); +- if (!s->surface_ptrs_out) +- return AVERROR(ENOMEM); +- for (i = 0; i < out_frames_hwctx->nb_surfaces; i++) +- s->surface_ptrs_out[i] = out_frames_hwctx->surfaces + i; +- s->nb_surface_ptrs_out = out_frames_hwctx->nb_surfaces; +- +- s->opaque_alloc.In.Surfaces = s->surface_ptrs_in; +- s->opaque_alloc.In.NumSurface = s->nb_surface_ptrs_in; +- s->opaque_alloc.In.Type = in_frames_hwctx->frame_type; +- +- s->opaque_alloc.Out.Surfaces = s->surface_ptrs_out; +- s->opaque_alloc.Out.NumSurface = s->nb_surface_ptrs_out; +- s->opaque_alloc.Out.Type = out_frames_hwctx->frame_type; +- +- s->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; +- s->opaque_alloc.Header.BufferSz = sizeof(s->opaque_alloc); +- +- s->ext_buffers[s->num_ext_buf++] = (mfxExtBuffer*)&s->opaque_alloc; +- +- par.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY; +- } else { +- mfxFrameAllocator frame_allocator = { +- .pthis = ctx, +- .Alloc = frame_alloc, +- .Lock = frame_lock, +- .Unlock = frame_unlock, +- .GetHDL = frame_get_hdl, +- .Free = frame_free, +- }; +- +- s->mem_ids_in = av_mallocz_array(in_frames_hwctx->nb_surfaces, +- sizeof(*s->mem_ids_in)); +- if (!s->mem_ids_in) +- return AVERROR(ENOMEM); +- for (i = 0; i < in_frames_hwctx->nb_surfaces; i++) { +- s->mem_ids_in[i] = in_frames_hwctx->surfaces[i].Data.MemId; +- in_frames_hwctx->surfaces[i].Info.CropW = in_width; +- in_frames_hwctx->surfaces[i].Info.CropH = in_height; +- } +- s->nb_mem_ids_in = in_frames_hwctx->nb_surfaces; +- +- s->mem_ids_out = av_mallocz_array(out_frames_hwctx->nb_surfaces, +- sizeof(*s->mem_ids_out)); +- if (!s->mem_ids_out) +- return AVERROR(ENOMEM); +- for (i = 0; i < out_frames_hwctx->nb_surfaces; i++) +- s->mem_ids_out[i] = out_frames_hwctx->surfaces[i].Data.MemId; +- s->nb_mem_ids_out = out_frames_hwctx->nb_surfaces; +- +- err = MFXVideoCORE_SetFrameAllocator(s->session, &frame_allocator); +- if (err != MFX_ERR_NONE) +- return AVERROR_UNKNOWN; +- +- par.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; +- } +- +-#if QSV_HAVE_SCALING_CONFIG +- memset(&s->scale_conf, 0, sizeof(mfxExtVPPScaling)); +- s->scale_conf.Header.BufferId = MFX_EXTBUFF_VPP_SCALING; +- s->scale_conf.Header.BufferSz = sizeof(mfxExtVPPScaling); +- s->scale_conf.ScalingMode = s->mode; +- s->ext_buffers[s->num_ext_buf++] = (mfxExtBuffer*)&s->scale_conf; +- av_log(ctx, AV_LOG_VERBOSE, "Scaling mode: %d\n", s->mode); +-#endif +- +- par.ExtParam = s->ext_buffers; +- par.NumExtParam = s->num_ext_buf; +- +- par.AsyncDepth = 1; // TODO async +- +- par.vpp.In = in_frames_hwctx->surfaces[0].Info; +- par.vpp.Out = out_frames_hwctx->surfaces[0].Info; +- +- /* Apparently VPP requires the frame rate to be set to some value, otherwise +- * init will fail (probably for the framerate conversion filter). Since we +- * are only doing scaling here, we just invent an arbitrary +- * value */ +- par.vpp.In.FrameRateExtN = 25; +- par.vpp.In.FrameRateExtD = 1; +- par.vpp.Out.FrameRateExtN = 25; +- par.vpp.Out.FrameRateExtD = 1; +- +- /* Print input memory mode */ +- ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0x0F, "VPP"); +- /* Print output memory mode */ +- ff_qsvvpp_print_iopattern(ctx, par.IOPattern & 0xF0, "VPP"); +- err = MFXVideoVPP_Init(s->session, &par); +- if (err < 0) +- return ff_qsvvpp_print_error(ctx, err, +- "Error opening the VPP for scaling"); +- else if (err > 0) { +- ff_qsvvpp_print_warning(ctx, err, +- "Warning in VPP initialization"); +- return AVERROR_UNKNOWN; +- } +- +- return 0; +-} +- +-static int init_scale_session(AVFilterContext *ctx, int in_width, int in_height, +- int out_width, int out_height) +-{ +- int ret; +- +- qsvscale_uninit(ctx); +- +- ret = init_out_pool(ctx, out_width, out_height); +- if (ret < 0) +- return ret; +- +- ret = init_out_session(ctx, in_width, in_height); +- if (ret < 0) +- return ret; +- +- return 0; +-} +- +-static int qsvscale_config_props(AVFilterLink *outlink) +-{ +- AVFilterContext *ctx = outlink->src; +- AVFilterLink *inlink = outlink->src->inputs[0]; +- QSVScaleContext *s = ctx->priv; +- int64_t w, h; +- double var_values[VARS_NB], res; +- char *expr; +- int ret; +- +- var_values[VAR_PI] = M_PI; +- var_values[VAR_PHI] = M_PHI; +- var_values[VAR_E] = M_E; +- var_values[VAR_IN_W] = var_values[VAR_IW] = inlink->w; +- var_values[VAR_IN_H] = var_values[VAR_IH] = inlink->h; +- var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN; +- var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN; +- var_values[VAR_A] = (double) inlink->w / inlink->h; +- var_values[VAR_SAR] = inlink->sample_aspect_ratio.num ? +- (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1; +- var_values[VAR_DAR] = var_values[VAR_A] * var_values[VAR_SAR]; +- +- /* evaluate width and height */ +- av_expr_parse_and_eval(&res, (expr = s->w_expr), +- var_names, var_values, +- NULL, NULL, NULL, NULL, NULL, 0, ctx); +- s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res; +- if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr), +- var_names, var_values, +- NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) +- goto fail; +- s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res; +- /* evaluate again the width, as it may depend on the output height */ +- if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr), +- var_names, var_values, +- NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) +- goto fail; +- s->w = res; +- +- w = s->w; +- h = s->h; +- +- /* sanity check params */ +- if (w < -1 || h < -1) { +- av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n"); +- return AVERROR(EINVAL); +- } +- if (w == -1 && h == -1) +- s->w = s->h = 0; +- +- if (!(w = s->w)) +- w = inlink->w; +- if (!(h = s->h)) +- h = inlink->h; +- if (w == -1) +- w = av_rescale(h, inlink->w, inlink->h); +- if (h == -1) +- h = av_rescale(w, inlink->h, inlink->w); +- +- if (w > INT_MAX || h > INT_MAX || +- (h * inlink->w) > INT_MAX || +- (w * inlink->h) > INT_MAX) +- av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); +- +- outlink->w = w; +- outlink->h = h; +- +- ret = init_scale_session(ctx, inlink->w, inlink->h, w, h); +- if (ret < 0) +- return ret; +- +- av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n", +- inlink->w, inlink->h, outlink->w, outlink->h); +- +- if (inlink->sample_aspect_ratio.num) +- outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, +- outlink->w*inlink->h}, +- inlink->sample_aspect_ratio); +- else +- outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; +- +- return 0; +- +-fail: +- av_log(ctx, AV_LOG_ERROR, +- "Error when evaluating the expression '%s'\n", expr); +- return ret; +-} +- +-static int qsvscale_filter_frame(AVFilterLink *link, AVFrame *in) +-{ +- AVFilterContext *ctx = link->dst; +- QSVScaleContext *s = ctx->priv; +- AVFilterLink *outlink = ctx->outputs[0]; +- +- mfxSyncPoint sync = NULL; +- mfxStatus err; +- +- AVFrame *out = NULL; +- int ret = 0; +- +- out = ff_get_video_buffer(outlink, outlink->w, outlink->h); +- if (!out) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } +- +- do { +- err = MFXVideoVPP_RunFrameVPPAsync(s->session, +- (mfxFrameSurface1*)in->data[3], +- (mfxFrameSurface1*)out->data[3], +- NULL, &sync); +- if (err == MFX_WRN_DEVICE_BUSY) +- av_usleep(1); +- } while (err == MFX_WRN_DEVICE_BUSY); +- +- if (err < 0) { +- ret = ff_qsvvpp_print_error(ctx, err, "Error during scaling"); +- goto fail; +- } +- +- if (!sync) { +- av_log(ctx, AV_LOG_ERROR, "No sync during scaling\n"); +- ret = AVERROR_UNKNOWN; +- goto fail; +- } +- +- do { +- err = MFXVideoCORE_SyncOperation(s->session, sync, 1000); +- } while (err == MFX_WRN_IN_EXECUTION); +- if (err < 0) { +- ret = ff_qsvvpp_print_error(ctx, err, "Error synchronizing the operation"); +- goto fail; +- } +- +- ret = av_frame_copy_props(out, in); +- if (ret < 0) +- goto fail; +- +- out->width = outlink->w; +- out->height = outlink->h; +- +- av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, +- (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, +- (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, +- INT_MAX); +- +- av_frame_free(&in); +- return ff_filter_frame(outlink, out); +-fail: +- av_frame_free(&in); +- av_frame_free(&out); +- return ret; +-} +- +-#define OFFSET(x) offsetof(QSVScaleContext, x) +-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM +-static const AVOption options[] = { +- { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, +- { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, +- { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, +- +-#if QSV_HAVE_SCALING_CONFIG +- { "mode", "set scaling mode", OFFSET(mode), AV_OPT_TYPE_INT, { .i64 = MFX_SCALING_MODE_DEFAULT}, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, FLAGS, "mode"}, +- { "low_power", "low power mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, "mode"}, +- { "hq", "high quality mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_QUALITY}, INT_MIN, INT_MAX, FLAGS, "mode"}, +-#else +- { "mode", "(not supported)", OFFSET(mode), AV_OPT_TYPE_INT, { .i64 = 0}, 0, INT_MAX, FLAGS, "mode"}, +- { "low_power", "", 0, AV_OPT_TYPE_CONST, { .i64 = 1}, 0, 0, FLAGS, "mode"}, +- { "hq", "", 0, AV_OPT_TYPE_CONST, { .i64 = 2}, 0, 0, FLAGS, "mode"}, +-#endif +- +- { NULL }, +-}; +- +-static const AVClass qsvscale_class = { +- .class_name = "scale_qsv", +- .item_name = av_default_item_name, +- .option = options, +- .version = LIBAVUTIL_VERSION_INT, +-}; +- +-static const AVFilterPad qsvscale_inputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .filter_frame = qsvscale_filter_frame, +- }, +- { NULL } +-}; +- +-static const AVFilterPad qsvscale_outputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .config_props = qsvscale_config_props, +- }, +- { NULL } +-}; +- +-AVFilter ff_vf_scale_qsv = { +- .name = "scale_qsv", +- .description = NULL_IF_CONFIG_SMALL("QuickSync video scaling and format conversion"), +- +- .init = qsvscale_init, +- .uninit = qsvscale_uninit, +- .query_formats = qsvscale_query_formats, +- +- .priv_size = sizeof(QSVScaleContext), +- .priv_class = &qsvscale_class, +- +- .inputs = qsvscale_inputs, +- .outputs = qsvscale_outputs, +- +- .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +-}; +Index: jellyfin-ffmpeg/libavfilter/vf_vpp_qsv.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavfilter/vf_vpp_qsv.c ++++ jellyfin-ffmpeg/libavfilter/vf_vpp_qsv.c +@@ -32,6 +32,7 @@ + #include "formats.h" + #include "internal.h" + #include "avfilter.h" ++#include "filters.h" + #include "libavcodec/avcodec.h" + #include "libavformat/avformat.h" + +@@ -42,14 +43,13 @@ + #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM) + + /* number of video enhancement filters */ +-#define ENH_FILTERS_COUNT (7) +-#define QSV_HAVE_ROTATION QSV_VERSION_ATLEAST(1, 17) +-#define QSV_HAVE_MIRRORING QSV_VERSION_ATLEAST(1, 19) ++#define ENH_FILTERS_COUNT (8) ++#define QSV_HAVE_ROTATION QSV_VERSION_ATLEAST(1, 17) ++#define QSV_HAVE_MIRRORING QSV_VERSION_ATLEAST(1, 19) ++#define QSV_HAVE_SCALING_CONFIG QSV_VERSION_ATLEAST(1, 19) + + typedef struct VPPContext{ +- const AVClass *class; +- +- QSVVPPContext *qsv; ++ QSVVPPContext qsv; + + /* Video Enhancement Algorithms */ + mfxExtVPPDeinterlacing deinterlace_conf; +@@ -59,7 +59,15 @@ typedef struct VPPContext{ + mfxExtVPPProcAmp procamp_conf; + mfxExtVPPRotation rotation_conf; + mfxExtVPPMirroring mirroring_conf; ++#ifdef QSV_HAVE_SCALING_CONFIG ++ mfxExtVPPScaling scale_conf; ++#endif + ++ /** ++ * New dimensions. Special values are: ++ * 0 = original width/height ++ * -1 = keep original aspect ++ */ + int out_width; + int out_height; + /** +@@ -83,8 +91,10 @@ typedef struct VPPContext{ + int rotate; /* rotate angle : [0, 90, 180, 270] */ + int hflip; /* flip mode : 0 = off, 1 = HORIZONTAL flip */ + ++ int scale_mode; /* scale mode : 0 = auto, 1 = low power, 2 = high quality */ ++ + /* param for the procamp */ +- int procamp; /* enable procamp */ ++ int procamp; /* enable procamp */ + float hue; + float saturation; + float contrast; +@@ -93,44 +103,10 @@ typedef struct VPPContext{ + char *cx, *cy, *cw, *ch; + char *ow, *oh; + char *output_format_str; +-} VPPContext; +- +-static const AVOption options[] = { +- { "deinterlace", "deinterlace mode: 0=off, 1=bob, 2=advanced", OFFSET(deinterlace), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MFX_DEINTERLACING_ADVANCED, .flags = FLAGS, "deinterlace" }, +- { "bob", "Bob deinterlace mode.", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_DEINTERLACING_BOB }, .flags = FLAGS, "deinterlace" }, +- { "advanced", "Advanced deinterlace mode. ", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_DEINTERLACING_ADVANCED }, .flags = FLAGS, "deinterlace" }, +- +- { "denoise", "denoise level [0, 100]", OFFSET(denoise), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, .flags = FLAGS }, +- { "detail", "enhancement level [0, 100]", OFFSET(detail), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, .flags = FLAGS }, +- { "framerate", "output framerate", OFFSET(framerate), AV_OPT_TYPE_RATIONAL, { .dbl = 0.0 },0, DBL_MAX, .flags = FLAGS }, +- { "procamp", "Enable ProcAmp", OFFSET(procamp), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = FLAGS}, +- { "hue", "ProcAmp hue", OFFSET(hue), AV_OPT_TYPE_FLOAT, { .dbl = 0.0 }, -180.0, 180.0, .flags = FLAGS}, +- { "saturation", "ProcAmp saturation", OFFSET(saturation), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS}, +- { "contrast", "ProcAmp contrast", OFFSET(contrast), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS}, +- { "brightness", "ProcAmp brightness", OFFSET(brightness), AV_OPT_TYPE_FLOAT, { .dbl = 0.0 }, -100.0, 100.0, .flags = FLAGS}, + +- { "transpose", "set transpose direction", OFFSET(transpose), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 6, FLAGS, "transpose"}, +- { "cclock_hflip", "rotate counter-clockwise with horizontal flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "transpose" }, +- { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "transpose" }, +- { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "transpose" }, +- { "clock_hflip", "rotate clockwise with horizontal flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "transpose" }, +- { "reversal", "rotate by half-turn", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, .flags=FLAGS, .unit = "transpose" }, +- { "hflip", "flip horizontally", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, .flags=FLAGS, .unit = "transpose" }, +- { "vflip", "flip vertically", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, .flags=FLAGS, .unit = "transpose" }, +- +- { "cw", "set the width crop area expression", OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, 0, 0, FLAGS }, +- { "ch", "set the height crop area expression", OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, 0, 0, FLAGS }, +- { "cx", "set the x crop area expression", OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(in_w-out_w)/2" }, 0, 0, FLAGS }, +- { "cy", "set the y crop area expression", OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(in_h-out_h)/2" }, 0, 0, FLAGS }, +- +- { "w", "Output video width", OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS }, +- { "width", "Output video width", OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS }, +- { "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, +- { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, +- { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, +- +- { NULL } +-}; ++ int has_passthrough; /* apply pass through mode if possible */ ++ int field_rate; /* deinterlace mode */ ++} VPPContext; + + static const char *const var_names[] = { + "iw", "in_w", +@@ -141,32 +117,41 @@ static const char *const var_names[] = { + "ch", + "cx", + "cy", ++ "a", "dar", ++ "sar", + NULL + }; + + enum var_name { +- VAR_iW, VAR_IN_W, +- VAR_iH, VAR_IN_H, +- VAR_oW, VAR_OUT_W, VAR_W, +- VAR_oH, VAR_OUT_H, VAR_H, ++ VAR_IW, VAR_IN_W, ++ VAR_IH, VAR_IN_H, ++ VAR_OW, VAR_OUT_W, VAR_W, ++ VAR_OH, VAR_OUT_H, VAR_H, + CW, + CH, + CX, + CY, ++ VAR_A, VAR_DAR, ++ VAR_SAR, + VAR_VARS_NB + }; + + static int eval_expr(AVFilterContext *ctx) + { + #define PASS_EXPR(e, s) {\ +- ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \ +- if (ret < 0) {\ +- av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s);\ +- goto release;\ ++ if (s) {\ ++ ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \ ++ if (ret < 0) { \ ++ av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s); \ ++ goto release; \ ++ } \ + }\ + } +-#define CALC_EXPR(e, v, i) {\ +- i = v = av_expr_eval(e, var_values, NULL); \ ++#define CALC_EXPR(e, v, i, d) {\ ++ if (e)\ ++ i = v = av_expr_eval(e, var_values, NULL); \ ++ else\ ++ i = v = d;\ + } + VPPContext *vpp = ctx->priv; + double var_values[VAR_VARS_NB] = { NAN }; +@@ -184,39 +169,43 @@ static int eval_expr(AVFilterContext *ct + PASS_EXPR(cx_expr, vpp->cx); + PASS_EXPR(cy_expr, vpp->cy); + +- var_values[VAR_iW] = ++ var_values[VAR_IW] = + var_values[VAR_IN_W] = ctx->inputs[0]->w; + +- var_values[VAR_iH] = ++ var_values[VAR_IH] = + var_values[VAR_IN_H] = ctx->inputs[0]->h; + ++ var_values[VAR_A] = (double)var_values[VAR_IN_W] / var_values[VAR_IN_H]; ++ var_values[VAR_SAR] = ctx->inputs[0]->sample_aspect_ratio.num ? ++ (double)ctx->inputs[0]->sample_aspect_ratio.num / ctx->inputs[0]->sample_aspect_ratio.den : 1; ++ var_values[VAR_DAR] = var_values[VAR_A] * var_values[VAR_SAR]; ++ + /* crop params */ +- CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w); +- CALC_EXPR(ch_expr, var_values[CH], vpp->crop_h); ++ CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w, var_values[VAR_IW]); ++ CALC_EXPR(ch_expr, var_values[CH], vpp->crop_h, var_values[VAR_IH]); + + /* calc again in case cw is relative to ch */ +- CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w); ++ CALC_EXPR(cw_expr, var_values[CW], vpp->crop_w, var_values[VAR_IW]); + + CALC_EXPR(w_expr, +- var_values[VAR_OUT_W] = var_values[VAR_oW] = var_values[VAR_W], +- vpp->out_width); ++ var_values[VAR_OUT_W] = var_values[VAR_OW] = var_values[VAR_W], ++ vpp->out_width, var_values[CW]); + CALC_EXPR(h_expr, +- var_values[VAR_OUT_H] = var_values[VAR_oH] = var_values[VAR_H], +- vpp->out_height); ++ var_values[VAR_OUT_H] = var_values[VAR_OH] = var_values[VAR_H], ++ vpp->out_height, var_values[CH]); + + /* calc again in case ow is relative to oh */ + CALC_EXPR(w_expr, +- var_values[VAR_OUT_W] = var_values[VAR_oW] = var_values[VAR_W], +- vpp->out_width); ++ var_values[VAR_OUT_W] = var_values[VAR_OW] = var_values[VAR_W], ++ vpp->out_width, var_values[CW]); + +- +- CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x); +- CALC_EXPR(cy_expr, var_values[CY], vpp->crop_y); ++ CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x, (var_values[VAR_IW] - var_values[VAR_OW]) / 2); ++ CALC_EXPR(cy_expr, var_values[CY], vpp->crop_y, (var_values[VAR_IH] - var_values[VAR_OH]) / 2); + + /* calc again in case cx is relative to cy */ +- CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x); ++ CALC_EXPR(cx_expr, var_values[CX], vpp->crop_x, (var_values[VAR_IW] - var_values[VAR_OW]) / 2); + +- if ((vpp->crop_w != var_values[VAR_iW]) || (vpp->crop_h != var_values[VAR_iH])) ++ if ((vpp->crop_w != var_values[VAR_IW]) || (vpp->crop_h != var_values[VAR_IH])) + vpp->use_crop = 1; + + release: +@@ -232,11 +221,25 @@ release: + return ret; + } + ++static av_cold int vpp_preinit(AVFilterContext *ctx) ++{ ++ VPPContext *vpp = ctx->priv; ++ /* For AV_OPT_TYPE_STRING options, NULL is handled in other way so ++ * we needn't set default value here ++ */ ++ vpp->saturation = 1.0; ++ vpp->contrast = 1.0; ++ vpp->transpose = -1; ++ vpp->field_rate = 1; ++ ++ return 0; ++} ++ + static av_cold int vpp_init(AVFilterContext *ctx) + { + VPPContext *vpp = ctx->priv; + +- if (!strcmp(vpp->output_format_str, "same")) { ++ if (!vpp->output_format_str || !strcmp(vpp->output_format_str, "same")) { + vpp->out_format = AV_PIX_FMT_NONE; + } else { + vpp->out_format = av_get_pix_fmt(vpp->output_format_str); +@@ -254,11 +257,15 @@ static int config_input(AVFilterLink *in + AVFilterContext *ctx = inlink->dst; + VPPContext *vpp = ctx->priv; + int ret; ++ int64_t ow, oh; + + if (vpp->framerate.den == 0 || vpp->framerate.num == 0) + vpp->framerate = inlink->frame_rate; + +- if (av_cmp_q(vpp->framerate, inlink->frame_rate)) ++ if (vpp->field_rate == 2) ++ vpp->framerate = av_mul_q(inlink->frame_rate, ++ (AVRational){ 2, 1 }); ++ else if (av_cmp_q(vpp->framerate, inlink->frame_rate)) + vpp->use_frc = 1; + + ret = eval_expr(ctx); +@@ -267,11 +274,38 @@ static int config_input(AVFilterLink *in + return ret; + } + +- if (vpp->out_height == 0 || vpp->out_width == 0) { +- vpp->out_width = inlink->w; +- vpp->out_height = inlink->h; ++ ow = vpp->out_width; ++ oh = vpp->out_height; ++ ++ /* sanity check params */ ++ if (ow < -1 || oh < -1) { ++ av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n"); ++ return AVERROR(EINVAL); + } + ++ if (ow == -1 && oh == -1) ++ vpp->out_width = vpp->out_height = 0; ++ ++ if (!(ow = vpp->out_width)) ++ ow = inlink->w; ++ ++ if (!(oh = vpp->out_height)) ++ oh = inlink->h; ++ ++ if (ow == -1) ++ ow = av_rescale(oh, inlink->w, inlink->h); ++ ++ if (oh == -1) ++ oh = av_rescale(ow, inlink->h, inlink->w); ++ ++ if (ow > INT_MAX || oh > INT_MAX || ++ (oh * inlink->w) > INT_MAX || ++ (ow * inlink->h) > INT_MAX) ++ av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); ++ ++ vpp->out_width = ow; ++ vpp->out_height = oh; ++ + if (vpp->use_crop) { + vpp->crop_x = FFMAX(vpp->crop_x, 0); + vpp->crop_y = FFMAX(vpp->crop_y, 0); +@@ -298,7 +332,7 @@ static int config_output(AVFilterLink *o + outlink->w = vpp->out_width; + outlink->h = vpp->out_height; + outlink->frame_rate = vpp->framerate; +- outlink->time_base = av_inv_q(vpp->framerate); ++ outlink->time_base = inlink->time_base; + + param.filter_frame = NULL; + param.num_ext_buf = 0; +@@ -327,53 +361,46 @@ static int config_output(AVFilterLink *o + param.crop = &crop; + } + +- if (vpp->deinterlace) { +- memset(&vpp->deinterlace_conf, 0, sizeof(mfxExtVPPDeinterlacing)); +- vpp->deinterlace_conf.Header.BufferId = MFX_EXTBUFF_VPP_DEINTERLACING; +- vpp->deinterlace_conf.Header.BufferSz = sizeof(mfxExtVPPDeinterlacing); +- vpp->deinterlace_conf.Mode = vpp->deinterlace == 1 ? +- MFX_DEINTERLACING_BOB : MFX_DEINTERLACING_ADVANCED; ++#define INIT_MFX_EXTBUF(extbuf, id) do { \ ++ memset(&vpp->extbuf, 0, sizeof(vpp->extbuf)); \ ++ vpp->extbuf.Header.BufferId = id; \ ++ vpp->extbuf.Header.BufferSz = sizeof(vpp->extbuf); \ ++ param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->extbuf; \ ++ } while (0) ++ ++#define SET_MFX_PARAM_FIELD(extbuf, field, value) do { \ ++ vpp->extbuf.field = value; \ ++ } while (0) + +- param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->deinterlace_conf; ++ vpp->qsv.deinterlace_enabled = !!vpp->deinterlace; ++ ++ if (vpp->deinterlace) { ++ INIT_MFX_EXTBUF(deinterlace_conf, MFX_EXTBUFF_VPP_DEINTERLACING); ++ SET_MFX_PARAM_FIELD(deinterlace_conf, Mode, (vpp->deinterlace == 1 ? ++ MFX_DEINTERLACING_BOB : MFX_DEINTERLACING_ADVANCED)); + } + + if (vpp->use_frc) { +- memset(&vpp->frc_conf, 0, sizeof(mfxExtVPPFrameRateConversion)); +- vpp->frc_conf.Header.BufferId = MFX_EXTBUFF_VPP_FRAME_RATE_CONVERSION; +- vpp->frc_conf.Header.BufferSz = sizeof(mfxExtVPPFrameRateConversion); +- vpp->frc_conf.Algorithm = MFX_FRCALGM_DISTRIBUTED_TIMESTAMP; +- +- param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->frc_conf; ++ INIT_MFX_EXTBUF(frc_conf, MFX_EXTBUFF_VPP_FRAME_RATE_CONVERSION); ++ SET_MFX_PARAM_FIELD(frc_conf, Algorithm, MFX_FRCALGM_DISTRIBUTED_TIMESTAMP); + } + + if (vpp->denoise) { +- memset(&vpp->denoise_conf, 0, sizeof(mfxExtVPPDenoise)); +- vpp->denoise_conf.Header.BufferId = MFX_EXTBUFF_VPP_DENOISE; +- vpp->denoise_conf.Header.BufferSz = sizeof(mfxExtVPPDenoise); +- vpp->denoise_conf.DenoiseFactor = vpp->denoise; +- +- param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->denoise_conf; ++ INIT_MFX_EXTBUF(denoise_conf, MFX_EXTBUFF_VPP_DENOISE); ++ SET_MFX_PARAM_FIELD(denoise_conf, DenoiseFactor, vpp->denoise); + } + + if (vpp->detail) { +- memset(&vpp->detail_conf, 0, sizeof(mfxExtVPPDetail)); +- vpp->detail_conf.Header.BufferId = MFX_EXTBUFF_VPP_DETAIL; +- vpp->detail_conf.Header.BufferSz = sizeof(mfxExtVPPDetail); +- vpp->detail_conf.DetailFactor = vpp->detail; +- +- param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->detail_conf; ++ INIT_MFX_EXTBUF(detail_conf, MFX_EXTBUFF_VPP_DETAIL); ++ SET_MFX_PARAM_FIELD(detail_conf, DetailFactor, vpp->detail); + } + + if (vpp->procamp) { +- memset(&vpp->procamp_conf, 0, sizeof(mfxExtVPPProcAmp)); +- vpp->procamp_conf.Header.BufferId = MFX_EXTBUFF_VPP_PROCAMP; +- vpp->procamp_conf.Header.BufferSz = sizeof(mfxExtVPPProcAmp); +- vpp->procamp_conf.Hue = vpp->hue; +- vpp->procamp_conf.Saturation = vpp->saturation; +- vpp->procamp_conf.Contrast = vpp->contrast; +- vpp->procamp_conf.Brightness = vpp->brightness; +- +- param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->procamp_conf; ++ INIT_MFX_EXTBUF(procamp_conf, MFX_EXTBUFF_VPP_PROCAMP); ++ SET_MFX_PARAM_FIELD(procamp_conf, Hue, vpp->hue); ++ SET_MFX_PARAM_FIELD(procamp_conf, Saturation, vpp->saturation); ++ SET_MFX_PARAM_FIELD(procamp_conf, Contrast, vpp->contrast); ++ SET_MFX_PARAM_FIELD(procamp_conf, Brightness, vpp->brightness); + } + + if (vpp->transpose >= 0) { +@@ -420,18 +447,14 @@ static int config_output(AVFilterLink *o + + if (vpp->rotate) { + #ifdef QSV_HAVE_ROTATION +- memset(&vpp->rotation_conf, 0, sizeof(mfxExtVPPRotation)); +- vpp->rotation_conf.Header.BufferId = MFX_EXTBUFF_VPP_ROTATION; +- vpp->rotation_conf.Header.BufferSz = sizeof(mfxExtVPPRotation); +- vpp->rotation_conf.Angle = vpp->rotate; ++ INIT_MFX_EXTBUF(rotation_conf, MFX_EXTBUFF_VPP_ROTATION); ++ SET_MFX_PARAM_FIELD(rotation_conf, Angle, vpp->rotate); + + if (MFX_ANGLE_90 == vpp->rotate || MFX_ANGLE_270 == vpp->rotate) { + FFSWAP(int, vpp->out_width, vpp->out_height); + FFSWAP(int, outlink->w, outlink->h); + av_log(ctx, AV_LOG_DEBUG, "Swap width and height for clock/cclock rotation.\n"); + } +- +- param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->rotation_conf; + #else + av_log(ctx, AV_LOG_WARNING, "The QSV VPP rotate option is " + "not supported with this MSDK version.\n"); +@@ -441,12 +464,8 @@ static int config_output(AVFilterLink *o + + if (vpp->hflip) { + #ifdef QSV_HAVE_MIRRORING +- memset(&vpp->mirroring_conf, 0, sizeof(mfxExtVPPMirroring)); +- vpp->mirroring_conf.Header.BufferId = MFX_EXTBUFF_VPP_MIRRORING; +- vpp->mirroring_conf.Header.BufferSz = sizeof(mfxExtVPPMirroring); +- vpp->mirroring_conf.Type = vpp->hflip; +- +- param.ext_buf[param.num_ext_buf++] = (mfxExtBuffer*)&vpp->mirroring_conf; ++ INIT_MFX_EXTBUF(mirroring_conf, MFX_EXTBUFF_VPP_MIRRORING); ++ SET_MFX_PARAM_FIELD(mirroring_conf, Type, vpp->hflip); + #else + av_log(ctx, AV_LOG_WARNING, "The QSV VPP hflip option is " + "not supported with this MSDK version.\n"); +@@ -454,11 +473,26 @@ static int config_output(AVFilterLink *o + #endif + } + ++ if (inlink->w != outlink->w || inlink->h != outlink->h) { ++#ifdef QSV_HAVE_SCALING_CONFIG ++ INIT_MFX_EXTBUF(scale_conf, MFX_EXTBUFF_VPP_SCALING); ++ SET_MFX_PARAM_FIELD(scale_conf, ScalingMode, vpp->scale_mode); ++#else ++ av_log(ctx, AV_LOG_WARNING, "The QSV VPP Scale option is " ++ "not supported with this MSDK version.\n"); ++#endif ++ } ++ ++#undef INIT_MFX_EXTBUF ++#undef SET_MFX_PARAM_FIELD ++ + if (vpp->use_frc || vpp->use_crop || vpp->deinterlace || vpp->denoise || + vpp->detail || vpp->procamp || vpp->rotate || vpp->hflip || +- inlink->w != outlink->w || inlink->h != outlink->h || in_format != vpp->out_format) +- return ff_qsvvpp_create(ctx, &vpp->qsv, ¶m); ++ inlink->w != outlink->w || inlink->h != outlink->h || in_format != vpp->out_format || ++ !vpp->has_passthrough) ++ return ff_qsvvpp_init(ctx, ¶m); + else { ++ /* No MFX session is created in this case */ + av_log(ctx, AV_LOG_VERBOSE, "qsv vpp pass through mode.\n"); + if (inlink->hw_frames_ctx) + outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); +@@ -467,26 +501,161 @@ static int config_output(AVFilterLink *o + return 0; + } + +-static int filter_frame(AVFilterLink *inlink, AVFrame *picref) ++static int activate(AVFilterContext *ctx) + { +- int ret = 0; +- AVFilterContext *ctx = inlink->dst; +- VPPContext *vpp = inlink->dst->priv; +- AVFilterLink *outlink = ctx->outputs[0]; +- +- if (vpp->qsv) { +- ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); +- av_frame_free(&picref); ++ AVFilterLink *inlink = ctx->inputs[0]; ++ AVFilterLink *outlink = ctx->outputs[0]; ++ QSVVPPContext *qsv = ctx->priv; ++ AVFrame *in = NULL; ++ int ret, status = 0; ++ int64_t pts = AV_NOPTS_VALUE; ++ ++ FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); ++ ++ if (!qsv->eof) { ++ ret = ff_inlink_consume_frame(inlink, &in); ++ if (ret < 0) ++ return ret; ++ ++ if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { ++ if (status == AVERROR_EOF) { ++ qsv->eof = 1; ++ } ++ } ++ } ++ ++ if (qsv->session) { ++ if (in || qsv->eof) { ++ ret = ff_qsvvpp_filter_frame(qsv, inlink, in); ++ av_frame_free(&in); ++ ++ if (qsv->eof) { ++ ff_outlink_set_status(outlink, status, pts); ++ return 0; ++ } ++ ++ if (qsv->got_frame) { ++ qsv->got_frame = 0; ++ return ret; ++ } ++ } + } else { +- if (picref->pts != AV_NOPTS_VALUE) +- picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base); +- ret = ff_filter_frame(outlink, picref); ++ /* No MFX session is created in pass-through mode */ ++ if (in) { ++ if (in->pts != AV_NOPTS_VALUE) ++ in->pts = av_rescale_q(in->pts, inlink->time_base, outlink->time_base); ++ ++ ret = ff_filter_frame(outlink, in); ++ return ret; ++ } + } + +- return ret; ++ if (qsv->eof) { ++ ff_outlink_set_status(outlink, status, pts); ++ return 0; ++ } else { ++ FF_FILTER_FORWARD_WANTED(outlink, inlink); ++ } ++ ++ return FFERROR_NOT_READY; + } + +-static int query_formats(AVFilterContext *ctx) ++static av_cold void vpp_uninit(AVFilterContext *ctx) ++{ ++ ff_qsvvpp_close(ctx); ++} ++ ++static const AVFilterPad vpp_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = config_input, ++ }, ++ { NULL } ++}; ++ ++static const AVFilterPad vpp_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = config_output, ++ }, ++ { NULL } ++}; ++ ++#define DEFINE_QSV_FILTER(x, sn, ln) \ ++static const AVClass x##_class = { \ ++ .class_name = #sn "_qsv", \ ++ .item_name = av_default_item_name, \ ++ .option = x##_options, \ ++ .version = LIBAVUTIL_VERSION_INT, \ ++}; \ ++const AVFilter ff_vf_##sn##_qsv = { \ ++ .name = #sn "_qsv", \ ++ .description = NULL_IF_CONFIG_SMALL("Quick Sync Video " #ln), \ ++ .preinit = x##_preinit, \ ++ .init = vpp_init, \ ++ .uninit = vpp_uninit, \ ++ .priv_size = sizeof(VPPContext), \ ++ .priv_class = &x##_class, \ ++ .inputs = vpp_inputs, \ ++ .outputs = vpp_outputs, \ ++ .query_formats = x##_query_formats, \ ++ .activate = activate, \ ++ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, \ ++}; ++ ++static const AVOption vpp_options[] = { ++ { "deinterlace", "deinterlace mode: 0=off, 1=bob, 2=advanced", OFFSET(deinterlace), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MFX_DEINTERLACING_ADVANCED, .flags = FLAGS, "deinterlace" }, ++ { "bob", "Bob deinterlace mode.", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_DEINTERLACING_BOB }, .flags = FLAGS, "deinterlace" }, ++ { "advanced", "Advanced deinterlace mode. ", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_DEINTERLACING_ADVANCED }, .flags = FLAGS, "deinterlace" }, ++ ++ { "denoise", "denoise level [0, 100]", OFFSET(denoise), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, .flags = FLAGS }, ++ { "detail", "enhancement level [0, 100]", OFFSET(detail), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, .flags = FLAGS }, ++ { "framerate", "output framerate", OFFSET(framerate), AV_OPT_TYPE_RATIONAL, { .dbl = 0.0 },0, DBL_MAX, .flags = FLAGS }, ++ { "procamp", "Enable ProcAmp", OFFSET(procamp), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = FLAGS}, ++ { "hue", "ProcAmp hue", OFFSET(hue), AV_OPT_TYPE_FLOAT, { .dbl = 0.0 }, -180.0, 180.0, .flags = FLAGS}, ++ { "saturation", "ProcAmp saturation", OFFSET(saturation), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS}, ++ { "contrast", "ProcAmp contrast", OFFSET(contrast), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.0, 10.0, .flags = FLAGS}, ++ { "brightness", "ProcAmp brightness", OFFSET(brightness), AV_OPT_TYPE_FLOAT, { .dbl = 0.0 }, -100.0, 100.0, .flags = FLAGS}, ++ ++ { "transpose", "set transpose direction", OFFSET(transpose), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 6, FLAGS, "transpose"}, ++ { "cclock_hflip", "rotate counter-clockwise with horizontal flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "transpose" }, ++ { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "transpose" }, ++ { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "transpose" }, ++ { "clock_hflip", "rotate clockwise with horizontal flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "transpose" }, ++ { "reversal", "rotate by half-turn", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, .flags=FLAGS, .unit = "transpose" }, ++ { "hflip", "flip horizontally", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, .flags=FLAGS, .unit = "transpose" }, ++ { "vflip", "flip vertically", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, .flags=FLAGS, .unit = "transpose" }, ++ ++ { "cw", "set the width crop area expression", OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, 0, 0, FLAGS }, ++ { "ch", "set the height crop area expression", OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, 0, 0, FLAGS }, ++ { "cx", "set the x crop area expression", OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(in_w-out_w)/2" }, 0, 0, FLAGS }, ++ { "cy", "set the y crop area expression", OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(in_h-out_h)/2" }, 0, 0, FLAGS }, ++ ++ { "w", "Output video width(0=input video width, -1=keep input video aspect)", OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS }, ++ { "width", "Output video width(0=input video width, -1=keep input video aspect)", OFFSET(ow), AV_OPT_TYPE_STRING, { .str="cw" }, 0, 255, .flags = FLAGS }, ++ { "h", "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, ++ { "height", "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, ++ { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, ++ { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS }, ++#if QSV_HAVE_SCALING_CONFIG ++ { "scale_mode", "scale mode", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = MFX_SCALING_MODE_DEFAULT }, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, .flags = FLAGS, "scale mode" }, ++ { "auto", "auto mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_DEFAULT}, INT_MIN, INT_MAX, FLAGS, "scale mode"}, ++ { "low_power", "low power mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, "scale mode"}, ++ { "hq", "high quality mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_QUALITY}, INT_MIN, INT_MAX, FLAGS, "scale mode"}, ++#else ++ { "scale_mode", "(not supported)", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS, "scale mode" }, ++ { "auto", "", 0, AV_OPT_TYPE_CONST, { .i64 = 0}, 0, 0, FLAGS, "scale mode"}, ++ { "low_power", "", 0, AV_OPT_TYPE_CONST, { .i64 = 1}, 0, 0, FLAGS, "scale mode"}, ++ { "hq", "", 0, AV_OPT_TYPE_CONST, { .i64 = 2}, 0, 0, FLAGS, "scale mode"}, ++#endif ++ { "passthrough", "Apply pass through mode if possible.", OFFSET(has_passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, .flags = FLAGS }, ++ ++ { NULL } ++}; ++ ++static int vpp_query_formats(AVFilterContext *ctx) + { + int ret; + static const enum AVPixelFormat in_pix_fmts[] = { +@@ -512,48 +681,92 @@ static int query_formats(AVFilterContext + &ctx->outputs[0]->incfg.formats); + } + +-static av_cold void vpp_uninit(AVFilterContext *ctx) ++DEFINE_QSV_FILTER(vpp, vpp, VPP); ++ ++static int default_query_formats(AVFilterContext *ctx) + { +- VPPContext *vpp = ctx->priv; ++ static const enum AVPixelFormat pixel_formats[] = { ++ AV_PIX_FMT_QSV, AV_PIX_FMT_NONE, ++ }; ++ AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); ++ int ret; ++ ++ if ((ret = ff_set_common_formats(ctx, pix_fmts)) < 0) ++ return ret; + +- ff_qsvvpp_free(&vpp->qsv); ++ return 0; + } + +-static const AVClass vpp_class = { +- .class_name = "vpp_qsv", +- .item_name = av_default_item_name, +- .option = options, +- .version = LIBAVUTIL_VERSION_INT, +-}; ++static av_cold int default_preinit(AVFilterContext *ctx) ++{ ++ VPPContext *vpp = ctx->priv; + +-static const AVFilterPad vpp_inputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .config_props = config_input, +- .filter_frame = filter_frame, +- }, +- { NULL } +-}; ++ vpp_preinit(ctx); ++ vpp->has_passthrough = 0; + +-static const AVFilterPad vpp_outputs[] = { +- { +- .name = "default", +- .type = AVMEDIA_TYPE_VIDEO, +- .config_props = config_output, +- }, +- { NULL } ++ return 0; ++} ++ ++static const AVOption qsvscale_options[] = { ++ { "w", "Output video width(0=input video width, -1=keep input video aspect)", OFFSET(ow), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, ++ { "h", "Output video height(0=input video height, -1=keep input video aspect)", OFFSET(oh), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, ++ { "cw", "set the width crop area expression", OFFSET(cw), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, ++ { "ch", "set the height crop area expression", OFFSET(ch), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, ++ { "cx", "set the x crop area expression", OFFSET(cx), AV_OPT_TYPE_STRING, { .str = "(iw-ow)/2" }, .flags = FLAGS }, ++ { "cy", "set the y crop area expression", OFFSET(cy), AV_OPT_TYPE_STRING, { .str = "(ih-oh)/2" }, .flags = FLAGS }, ++ { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, ++ ++#if QSV_HAVE_SCALING_CONFIG ++ { "mode", "set scaling mode", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = MFX_SCALING_MODE_DEFAULT}, MFX_SCALING_MODE_DEFAULT, MFX_SCALING_MODE_QUALITY, FLAGS, "mode"}, ++ { "low_power", "low power mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_LOWPOWER}, INT_MIN, INT_MAX, FLAGS, "mode"}, ++ { "hq", "high quality mode", 0, AV_OPT_TYPE_CONST, { .i64 = MFX_SCALING_MODE_QUALITY}, INT_MIN, INT_MAX, FLAGS, "mode"}, ++#else ++ { "mode", "(not supported)", OFFSET(scale_mode), AV_OPT_TYPE_INT, { .i64 = 0}, 0, INT_MAX, FLAGS, "mode"}, ++ { "low_power", "", 0, AV_OPT_TYPE_CONST, { .i64 = 1}, 0, 0, FLAGS, "mode"}, ++ { "hq", "", 0, AV_OPT_TYPE_CONST, { .i64 = 2}, 0, 0, FLAGS, "mode"}, ++#endif ++ ++ { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS }, ++ ++ { NULL }, + }; + +-AVFilter ff_vf_vpp_qsv = { +- .name = "vpp_qsv", +- .description = NULL_IF_CONFIG_SMALL("Quick Sync Video VPP."), +- .priv_size = sizeof(VPPContext), +- .query_formats = query_formats, +- .init = vpp_init, +- .uninit = vpp_uninit, +- .inputs = vpp_inputs, +- .outputs = vpp_outputs, +- .priv_class = &vpp_class, +- .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, ++static int qsvscale_query_formats(AVFilterContext *ctx) ++{ ++ return default_query_formats(ctx); ++} ++ ++static av_cold int qsvscale_preinit(AVFilterContext *ctx) ++{ ++ return default_preinit(ctx); ++} ++ ++DEFINE_QSV_FILTER(qsvscale, scale, scaling and format conversion); ++ ++static const AVOption qsvdeint_options[] = { ++ { "mode", "set deinterlace mode", OFFSET(deinterlace), AV_OPT_TYPE_INT, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"}, ++ { "bob", "bob algorithm", 0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_BOB}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"}, ++ { "advanced", "Motion adaptive algorithm", 0, AV_OPT_TYPE_CONST, {.i64 = MFX_DEINTERLACING_ADVANCED}, MFX_DEINTERLACING_BOB, MFX_DEINTERLACING_ADVANCED, FLAGS, "mode"}, ++ ++ { "rate", "Generate output at frame rate or field rate", ++ OFFSET(field_rate), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 2, FLAGS, "rate" }, ++ { "frame", "Output at frame rate (one frame of output for each field-pair)", ++ 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "rate" }, ++ { "field", "Output at field rate (one frame of output for each field)", ++ 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "rate" }, ++ ++ { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = 4 }, 0, INT_MAX, .flags = FLAGS }, ++ { NULL }, + }; ++ ++static int qsvdeint_query_formats(AVFilterContext *ctx) ++{ ++ return default_query_formats(ctx); ++} ++ ++static av_cold int qsvdeint_preinit(AVFilterContext *ctx) ++{ ++ return default_preinit(ctx); ++} ++ ++DEFINE_QSV_FILTER(qsvdeint, deinterlace, deinterlacing); +Index: jellyfin-ffmpeg/libavutil/frame.c +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/frame.c ++++ jellyfin-ffmpeg/libavutil/frame.c +@@ -347,6 +347,36 @@ int av_frame_get_buffer(AVFrame *frame, + return AVERROR(EINVAL); + } + ++int av_frame_copy_side_data(AVFrame* dst, const AVFrame* src, int flags) ++{ ++ for (unsigned i = 0; i < src->nb_side_data; i++) { ++ const AVFrameSideData *sd_src = src->side_data[i]; ++ AVFrameSideData *sd_dst; ++ if ( sd_src->type == AV_FRAME_DATA_PANSCAN ++ && (src->width != dst->width || src->height != dst->height)) ++ continue; ++ if (flags & AV_FRAME_COPY_PROPS_FORCECOPY) { ++ sd_dst = av_frame_new_side_data(dst, sd_src->type, ++ sd_src->size); ++ if (!sd_dst) { ++ wipe_side_data(dst); ++ return AVERROR(ENOMEM); ++ } ++ memcpy(sd_dst->data, sd_src->data, sd_src->size); ++ } else { ++ AVBufferRef *ref = av_buffer_ref(sd_src->buf); ++ sd_dst = av_frame_new_side_data_from_buf(dst, sd_src->type, ref); ++ if (!sd_dst) { ++ av_buffer_unref(&ref); ++ wipe_side_data(dst); ++ return AVERROR(ENOMEM); ++ } ++ } ++ av_dict_copy(&sd_dst->metadata, sd_src->metadata, 0); ++ } ++ return 0; ++} ++ + static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy) + { + int ret, i; +@@ -395,31 +425,9 @@ FF_DISABLE_DEPRECATION_WARNINGS + FF_ENABLE_DEPRECATION_WARNINGS + #endif + +- for (i = 0; i < src->nb_side_data; i++) { +- const AVFrameSideData *sd_src = src->side_data[i]; +- AVFrameSideData *sd_dst; +- if ( sd_src->type == AV_FRAME_DATA_PANSCAN +- && (src->width != dst->width || src->height != dst->height)) +- continue; +- if (force_copy) { +- sd_dst = av_frame_new_side_data(dst, sd_src->type, +- sd_src->size); +- if (!sd_dst) { +- wipe_side_data(dst); +- return AVERROR(ENOMEM); +- } +- memcpy(sd_dst->data, sd_src->data, sd_src->size); +- } else { +- AVBufferRef *ref = av_buffer_ref(sd_src->buf); +- sd_dst = av_frame_new_side_data_from_buf(dst, sd_src->type, ref); +- if (!sd_dst) { +- av_buffer_unref(&ref); +- wipe_side_data(dst); +- return AVERROR(ENOMEM); +- } +- } +- av_dict_copy(&sd_dst->metadata, sd_src->metadata, 0); +- } ++ if (ret = av_frame_copy_side_data(dst, src, ++ force_copy ? AV_FRAME_COPY_PROPS_FORCECOPY : 0) < 0) ++ return ret; + + #if FF_API_FRAME_QP + FF_DISABLE_DEPRECATION_WARNINGS +@@ -823,6 +831,17 @@ void av_frame_remove_side_data(AVFrame * + } + } + ++void av_frame_remove_all_side_data(AVFrame *frame) ++{ ++ int i; ++ ++ for (i = frame->nb_side_data - 1; i >= 0; i--) { ++ free_side_data(&frame->side_data[i]); ++ frame->side_data[i] = frame->side_data[frame->nb_side_data - 1]; ++ frame->nb_side_data--; ++ } ++} ++ + const char *av_frame_side_data_name(enum AVFrameSideDataType type) + { + switch(type) { +Index: jellyfin-ffmpeg/libavutil/frame.h +=================================================================== +--- jellyfin-ffmpeg.orig/libavutil/frame.h ++++ jellyfin-ffmpeg/libavutil/frame.h +@@ -897,6 +897,22 @@ int av_frame_copy(AVFrame *dst, const AV + int av_frame_copy_props(AVFrame *dst, const AVFrame *src); + + /** ++ * Copy actual data buffers instead of references. ++ */ ++#define AV_FRAME_COPY_PROPS_FORCECOPY 1 ++ ++/** ++ * Copy only side-data from src to dst. ++ * ++ * @param dst a frame to which the side data should be copied. ++ * @param src a frame from which to copy the side data. ++ * @param flags flags of type AV_FRAME_COPY_PROPS_*, controlling copy behavior. ++ * ++ * @return >= 0 on success, a negative AVERROR on error. ++ */ ++int av_frame_copy_side_data(AVFrame* dst, const AVFrame* src, int flags); ++ ++/** + * Get the buffer reference a given data plane is stored in. + * + * @param plane index of the data plane of interest in frame->extended_data. +@@ -951,6 +967,10 @@ AVFrameSideData *av_frame_get_side_data( + */ + void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type); + ++/** ++ * Remove and free all side data in this frame. ++ */ ++void av_frame_remove_all_side_data(AVFrame *frame); + + /** + * Flags for frame cropping. diff --git a/debian/patches/series b/debian/patches/series index ec652d1469b..3fc8da2647e 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -29,3 +29,4 @@ 0029-add-fixes-for-qsv-overlay-to-allow-external-pgssubs.patch 0030-add-fixes-for-a-vaapi-qsv-mapping-error.patch 0031-add-a-vaapi-overlay-filter.patch +0032-add-async-support-for-qsv-vpp.patch