diff --git a/NVEnc/NVEnc_readme.txt b/NVEnc/NVEnc_readme.txt index 8c4b11b0..abbea1ba 100644 --- a/NVEnc/NVEnc_readme.txt +++ b/NVEnc/NVEnc_readme.txt @@ -214,6 +214,10 @@ NVIDIA グラフィックドライバ 545.92 今後の更新で設定ファイルの互換性がなくなるかもしれません。 【メモ】 +2023.11.04 (7.36) +[NVEncC] +- --vpp-rffをavswにも対応。 + 2023.10.28 (7.35) [NVEncC] - qvbrをデフォルトに。 diff --git a/NVEncC_Options.en.md b/NVEncC_Options.en.md index 082d7983..0a90649a 100644 --- a/NVEncC_Options.en.md +++ b/NVEncC_Options.en.md @@ -1780,7 +1780,7 @@ Values for parameters will be copied from input file for "input". ### --vpp-rff -Reflect the Repeat Field Flag. The avsync error caused by rff could be solved. Available only when [--avhw](#--avhw-string) is used. +Reflect the Repeat Field Flag. The avsync error caused by rff could be solved. Available only when [--avhw](#--avhw-string) or [--avsw](#--avsw-string) is used. rff of 2 or more will not be supported (only supports rff = 1). Also, it can not be used with [--trim](#--trim-intintintintintint), [--vpp-deinterlace](#--vpp-deinterlace-string). diff --git a/NVEncC_Options.ja.md b/NVEncC_Options.ja.md index ed795cf5..f90b2087 100644 --- a/NVEncC_Options.ja.md +++ b/NVEncC_Options.ja.md @@ -1787,7 +1787,7 @@ vppフィルタの適用順は固定で、コマンドラインの順序によ avhwを使用していないがインタレ解除を行いたい場合や、24fps化(Inverse Telecine)を行いたい場合は、[--vpp-afs](#--vpp-afs-param1value1param2value2)を使用する。 ### --vpp-rff -Repeat Field Flagを反映して、フレームを再構築する。rffによる音ズレ問題が解消できる。[--avhw](#--avhw-string)使用時のみ有効。 +Repeat Field Flagを反映して、フレームを再構築する。rffによる音ズレ問題が解消できる。[--avhw](#--avsw-string)か[--avhw](#--avsw-string)使用時のみ有効。 rff=1の場合のみの対応。(rff > 1には対応しない) また、[--trim](#--trim-intintintintintint), [--vpp-deinterlace](#--vpp-deinterlace-string)とは併用できない。 diff --git a/NVEncCore/NVEncCore.cpp b/NVEncCore/NVEncCore.cpp index ff532dfd..ddac5af7 100644 --- a/NVEncCore/NVEncCore.cpp +++ b/NVEncCore/NVEncCore.cpp @@ -483,7 +483,7 @@ NVENCSTATUS NVEncCore::InitInput(InEncodeVideoParam *inputParam, const std::vect //入力モジュールの初期化 if (initReaders(m_pFileReader, m_AudioReaders, &inputParam->input, inputCspOfRawReader, m_pStatus, &inputParam->common, &inputParam->ctrl, HWDecCodecCsp, subburnTrackId, - inputParam->vpp.rff, inputParam->vpp.afs.enable, + inputParam->vpp.rff.enable, inputParam->vpp.afs.enable, m_poolPkt.get(), m_poolFrame.get(), m_qpTable.get(), m_pPerfMonitor.get(), m_pNVLog) != RGY_ERR_NONE) { PrintMes(RGY_LOG_ERROR, _T("failed to initialize file reader(s).\n")); return NV_ENC_ERR_GENERIC; @@ -532,11 +532,11 @@ NVENCSTATUS NVEncCore::InitInput(InEncodeVideoParam *inputParam, const std::vect } #if ENABLE_AVSW_READER - if ((m_nAVSyncMode & (RGY_AVSYNC_VFR | RGY_AVSYNC_FORCE_CFR)) || inputParam->vpp.rff) { + if ((m_nAVSyncMode & (RGY_AVSYNC_VFR | RGY_AVSYNC_FORCE_CFR)) || inputParam->vpp.rff.enable) { tstring err_target; if (m_nAVSyncMode & RGY_AVSYNC_VFR) err_target += _T("avsync vfr, "); if (m_nAVSyncMode & RGY_AVSYNC_FORCE_CFR) err_target += _T("avsync forcecfr, "); - if (inputParam->vpp.rff) err_target += _T("vpp-rff, "); + if (inputParam->vpp.rff.enable) err_target += _T("vpp-rff, "); err_target = err_target.substr(0, err_target.length()-2); if (pAVCodecReader) { @@ -566,7 +566,7 @@ NVENCSTATUS NVEncCore::InitInput(InEncodeVideoParam *inputParam, const std::vect m_nAVSyncMode |= RGY_AVSYNC_VFR; const auto timebaseStreamIn = to_rgy(pAVCodecReader->GetInputVideoStream()->time_base); if ((timebaseStreamIn.inv() * m_inputFps.inv()).d() == 1 || timebaseStreamIn.n() > 1000) { //fpsを割り切れるtimebaseなら - if (!inputParam->vpp.afs.enable && !inputParam->vpp.rff) { + if (!inputParam->vpp.afs.enable && !inputParam->vpp.rff.enable) { m_outputTimebase = m_inputFps.inv() * rgy_rational(1, 8); } } @@ -2250,13 +2250,7 @@ RGY_ERR NVEncCore::InitFilters(const InEncodeVideoParam *inputParam) { auto VuiFiltered = inputParam->input.vui; //vpp-rffの制約事項 - if (inputParam->vpp.rff) { -#if ENABLE_AVSW_READER - if (!m_cuvidDec) { - PrintMes(RGY_LOG_ERROR, _T("vpp-rff can only be used with hw decoder.\n")); - return RGY_ERR_UNSUPPORTED; - } -#endif //#if ENABLE_AVSW_READER + if (inputParam->vpp.rff.enable) { if (inputParam->vppnv.deinterlace != cudaVideoDeinterlaceMode_Weave) { PrintMes(RGY_LOG_ERROR, _T("vpp-rff cannot be used with vpp-deinterlace.\n")); return RGY_ERR_UNSUPPORTED; @@ -2290,7 +2284,7 @@ RGY_ERR NVEncCore::InitFilters(const InEncodeVideoParam *inputParam) { || inputParam->vpp.colorspace.enable || inputParam->vpp.pad.enable || inputParam->vpp.subburn.size() > 0 - || inputParam->vpp.rff + || inputParam->vpp.rff.enable || inputParam->vpp.decimate.enable || inputParam->vpp.mpdecimate.enable || inputParam->vpp.selectevery.enable @@ -2388,14 +2382,16 @@ RGY_ERR NVEncCore::InitFilters(const InEncodeVideoParam *inputParam) { m_encFps = param->baseFps; } //rff - if (inputParam->vpp.rff) { + if (inputParam->vpp.rff.enable) { unique_ptr filter(new NVEncFilterRff()); shared_ptr param(new NVEncFilterParamRff()); + param->rff = inputParam->vpp.rff; param->frameIn = inputFrame; param->frameOut = inputFrame; param->baseFps = m_encFps; param->inFps = m_inputFps; param->timebase = m_outputTimebase; + param->outFilename = inputParam->common.outputFilename; param->bOutOverwrite = true; NVEncCtxAutoLock(cxtlock(m_dev->vidCtxLock())); auto sts = filter->init(param, m_pNVLog); @@ -3912,8 +3908,6 @@ NVENCSTATUS NVEncCore::Encode() { const int64_t nOutFrameDuration = std::max(1, rational_rescale(1, m_inputFps.inv(), m_outputTimebase)); //固定fpsを仮定した時の1フレームのduration (スケール: m_outputTimebase) int64_t nLastPts = AV_NOPTS_VALUE; - int dec_vpp_rff_sts = 0; //rffの展開状態を示すフラグ - auto add_dec_vpp_param = [&](FrameBufferDataIn *pInputFrame, vector>& vppParams, int64_t outPts, int64_t outDuration) { if (pInputFrame->inputIsHost()) { pInputFrame->setTimeStamp(outPts); @@ -3933,28 +3927,8 @@ NVENCSTATUS NVEncCore::Encode() { case cudaVideoDeinterlaceMode_Weave: oVPP.progressive_frame = pInputFrame->getCuvidInfo()->progressive_frame; oVPP.unpaired_field = 0;// oVPP.progressive_frame; - if (vpp_rff) { - //rffを展開する場合、時間を補正する - if (frameinfo.flags & RGY_FRAME_FLAG_RFF) { - frameinfo.duration = (frameinfo.duration * 2) / 3; - } - if (dec_vpp_rff_sts) { //rff展開中の場合、ptsを1フィールド戻す - frameinfo.timestamp -= frameinfo.duration / 2; - } - } vppParams.push_back(unique_ptr(new FrameBufferDataIn(pInputFrame->getCuvidInfo(), oVPP, frameinfo))); //PrintMes(RGY_LOG_INFO, _T("pts: %lld, duration %lld, progressive:%d, rff:%d\n"), (lls)frameinfo.timestamp, (lls)frameinfo.duration, oVPP.progressive_frame, (frameinfo.flags & RGY_FRAME_FLAG_RFF) ? 1 : 0); - - if (vpp_rff && (frameinfo.flags & RGY_FRAME_FLAG_RFF)) { - if (dec_vpp_rff_sts) { - frameinfo.flags |= RGY_FRAME_FLAG_RFF_COPY; - //rffを展開する場合、時間を補正する - frameinfo.timestamp += frameinfo.duration; - vppParams.push_back(unique_ptr(new FrameBufferDataIn(pInputFrame->getCuvidInfo(), oVPP, frameinfo))); - //PrintMes(RGY_LOG_INFO, _T("pts: %lld, duration %lld\n"), (lls)frameinfo.timestamp, (lls)frameinfo.duration); - } - dec_vpp_rff_sts ^= 1; //反転 - } PrintMes(RGY_LOG_TRACE, _T("add_dec_vpp_param[dev](%d): outPtsSource %lld, outDuration %d, progressive %d\n"), pInputFrame->getFrameInfo().inputFrameId, frameinfo.timestamp, frameinfo.duration, oVPP.progressive_frame); break; case cudaVideoDeinterlaceMode_Bob: @@ -4011,6 +3985,9 @@ NVENCSTATUS NVEncCore::Encode() { } else { //CFR仮定ではなく、オリジナルの時間を見る outPtsSource = rational_rescale(pInputFrame->getTimeStamp(), srcTimebase, m_outputTimebase); + if (pInputFrame->getDuration() > 0) { + pInputFrame->setDuration(rational_rescale(pInputFrame->getDuration(), srcTimebase, m_outputTimebase)); + } } } PrintMes(RGY_LOG_TRACE, _T("check_pts(%d): nOutEstimatedPts %lld, outPtsSource %lld, outDuration %d\n"), pInputFrame->getFrameInfo().inputFrameId, nOutEstimatedPts, outPtsSource, outDuration); @@ -4187,10 +4164,14 @@ NVENCSTATUS NVEncCore::Encode() { while (filterframes.size() > 0 || bDrain) { //フィルタリングするならここ for (uint32_t ifilter = filterframes.front().second; ifilter < m_vpFilters.size() - 1; ifilter++) { + // コピーを作ってそれをfilter関数に渡す + // vpp-rffなどoverwirteするフィルタのときに、filterframes.pop_front -> push がうまく動作しない + RGYFrameInfo input = filterframes.front().first; + NVEncCtxAutoLock(ctxlock(m_dev->vidCtxLock())); int nOutFrames = 0; RGYFrameInfo *outInfo[16] = { 0 }; - auto sts_filter = m_vpFilters[ifilter]->filter(&filterframes.front().first, (RGYFrameInfo **)&outInfo, &nOutFrames, cudaStreamDefault); + auto sts_filter = m_vpFilters[ifilter]->filter(&input, (RGYFrameInfo **)&outInfo, &nOutFrames, cudaStreamDefault); if (sts_filter != RGY_ERR_NONE) { PrintMes(RGY_LOG_ERROR, _T("Error while running filter \"%s\".\n"), m_vpFilters[ifilter]->name().c_str()); return NV_ENC_ERR_GENERIC; @@ -4208,22 +4189,10 @@ NVENCSTATUS NVEncCore::Encode() { } bDrain = false; //途中でフレームが出てきたら、drain完了していない - // 上書きするタイプのフィルタの場合、pop_front -> push_front は不要 - if (m_vpFilters[ifilter]->GetFilterParam()->bOutOverwrite - && filterframes.front().first.ptr - && filterframes.front().first.ptr == outInfo[0]->ptr) { - // 上書きするタイプのフィルタが複数のフレームを返すのはサポートしない - if (nOutFrames > 1) { - PrintMes(RGY_LOG_ERROR, _T("bOutOverwrite = true but nOutFrames = %d at filter[%d][%s].\n"), - nOutFrames, ifilter, m_vpFilters[ifilter]->name().c_str()); - return NV_ENC_ERR_GENERIC; - } - } else { - filterframes.pop_front(); - //最初に出てきたフレームは先頭に追加する - for (int jframe = nOutFrames - 1; jframe >= 0; jframe--) { - filterframes.push_front(std::make_pair(*outInfo[jframe], ifilter + 1)); - } + filterframes.pop_front(); + //最初に出てきたフレームは先頭に追加する + for (int jframe = nOutFrames - 1; jframe >= 0; jframe--) { + filterframes.push_front(std::make_pair(*outInfo[jframe], ifilter + 1)); } } if (bDrain) { diff --git a/NVEncCore/NVEncFilterRff.cpp b/NVEncCore/NVEncFilterRff.cpp index 1ea0cd77..22f9289e 100644 --- a/NVEncCore/NVEncFilterRff.cpp +++ b/NVEncCore/NVEncFilterRff.cpp @@ -33,12 +33,19 @@ #include "NVEncParam.h" #pragma warning (push) +static const int FRAME_OUT_INDEX = FRAME_BUF_SIZE; + +tstring NVEncFilterParamRff::print() const { + return rff.print(); +} NVEncFilterRff::NVEncFilterRff() : - m_nStatus(), - m_fieldBuf(), - m_nFieldBufUsed(-1), - m_nFieldBufPicStruct(RGY_FRAME_FLAG_NONE) { + m_nFieldBufUsed(0), + m_nFieldBufPicStruct({ RGY_FRAME_FLAG_NONE, RGY_FRAME_FLAG_NONE }), + m_ptsOffset(0), + m_prevInputTimestamp(-1), + m_prevInputFlags(RGY_FRAME_FLAG_NONE), + m_fpLog() { m_sFilterName = _T("rff"); } @@ -46,36 +53,102 @@ NVEncFilterRff::~NVEncFilterRff() { close(); } +RGY_ERR NVEncFilterRff::checkParam(const NVEncFilterParam *param) { + auto prm = dynamic_cast(param); + if (!prm) { + AddMessage(RGY_LOG_ERROR, _T("Invalid parameter type.\n")); + return RGY_ERR_INVALID_PARAM; + } + //パラメータチェック + if (prm->frameOut.height <= 0 || prm->frameOut.width <= 0) { + AddMessage(RGY_LOG_ERROR, _T("Invalid parameter.\n")); + return RGY_ERR_INVALID_PARAM; + } + return RGY_ERR_NONE; +} + RGY_ERR NVEncFilterRff::init(shared_ptr pParam, shared_ptr pPrintMes) { RGY_ERR sts = RGY_ERR_NONE; m_pPrintMes = pPrintMes; - auto pRffParam = std::dynamic_pointer_cast(pParam); - if (!pRffParam) { + auto prm = std::dynamic_pointer_cast(pParam); + if (!prm) { AddMessage(RGY_LOG_ERROR, _T("Invalid parameter type.\n")); return RGY_ERR_INVALID_PARAM; } - pRffParam->frameOut.pitch = pRffParam->frameIn.pitch; + prm->frameOut.pitch = prm->frameIn.pitch; - if (cmpFrameInfoCspResolution(&m_fieldBuf.frame, &pRffParam->frameOut)) { - m_fieldBuf.frame = pRffParam->frameOut; - auto cudaerr = m_fieldBuf.alloc(); + if (!m_pParam || cmpFrameInfoCspResolution(&m_pParam->frameOut, &prm->frameOut)) { + auto cudaerr = AllocFrameBuf(prm->frameOut, FRAME_BUF_SIZE + 1); if (cudaerr != cudaSuccess) { AddMessage(RGY_LOG_ERROR, _T("failed to allocate memory: %s.\n"), char_to_tstring(cudaGetErrorName(cudaerr)).c_str()); return RGY_ERR_MEMORY_ALLOC; } + if (prm->rff.log) { + m_fpLog = std::unique_ptr(_tfopen((prm->outFilename + _T(".rff.log")).c_str(), _T("w")), fp_deleter()); + } + m_nFieldBufUsed = 0; + m_nFieldBufPicStruct.fill(RGY_FRAME_FLAG_NONE); + m_ptsOffset = 0; + m_prevInputTimestamp = -1; + m_prevInputFlags = RGY_FRAME_FLAG_NONE; } - m_nFieldBufUsed = -1; - m_nPathThrough &= (~(FILTER_PATHTHROUGH_PICSTRUCT)); + m_nPathThrough &= (~(FILTER_PATHTHROUGH_PICSTRUCT | FILTER_PATHTHROUGH_FLAGS | FILTER_PATHTHROUGH_TIMESTAMP)); setFilterInfo(pParam->print()); m_pParam = pParam; return sts; } -tstring NVEncFilterParamRff::print() const { - return _T("rff"); +std::tuple NVEncFilterRff::copyFieldFromBuffer(RGYFrameInfo *dst, const int idx, cudaStream_t& stream) { + const int targetIdx = idx % FRAME_BUF_SIZE; + const bool copyTopField = (m_nFieldBufPicStruct[targetIdx] & RGY_FRAME_FLAG_RFF_TFF) != 0; + const int inputFrameId = m_pFrameBuf[targetIdx]->frame.inputFrameId; + // m_cl->copyFrameはframe情報をsrcからコピーする + // dst側の情報を維持するため、あらかじめdstの情報をコピーしておく + copyFrameProp(&m_pFrameBuf[targetIdx]->frame, dst); + auto err = err_to_rgy(copyFrameFieldAsync(dst, &m_pFrameBuf[targetIdx]->frame, copyTopField, copyTopField, stream)); + m_nFieldBufPicStruct[targetIdx] = RGY_FRAME_FLAG_NONE; + return { err, inputFrameId, copyTopField }; +} + +RGY_ERR NVEncFilterRff::copyFieldToBuffer(const RGYFrameInfo *src, const bool copyTopField, cudaStream_t& stream) { + const int targetIdx = (m_nFieldBufUsed++) % m_nFieldBufPicStruct.size(); + if (copyTopField) { + m_nFieldBufPicStruct[targetIdx] = RGY_FRAME_FLAG_RFF | RGY_FRAME_FLAG_RFF_TFF; + } else { + m_nFieldBufPicStruct[targetIdx] = RGY_FRAME_FLAG_RFF | RGY_FRAME_FLAG_RFF_BFF; + } + auto err = err_to_rgy(copyFrameFieldAsync(&m_pFrameBuf[targetIdx]->frame, src, copyTopField, copyTopField, stream)); + copyFrameProp(&m_pFrameBuf[targetIdx]->frame, src); + return err; +} + +int64_t NVEncFilterRff::getInputDuration(const RGYFrameInfo *pInputFrame) { + if (pInputFrame->duration) return pInputFrame->duration; + // durationがない場合は、前のフレームから推定する + if (m_prevInputTimestamp >= 0) { + auto est_duration = rgy_rational(pInputFrame->timestamp - m_prevInputTimestamp, 1); + if (pInputFrame->flags & RGY_FRAME_FLAG_RFF) { + est_duration *= rgy_rational(3, 2); + } + if (m_prevInputFlags & RGY_FRAME_FLAG_RFF) { + est_duration *= rgy_rational(2, 3); + } + return est_duration.round(); + } + // わからない場合はfpsから推定する + auto prm = std::dynamic_pointer_cast(m_pParam); + if (!prm) { + AddMessage(RGY_LOG_ERROR, _T("Invalid parameter type.\n")); + return -1; + } + return (prm->timebase.inv() / prm->inFps).round(); +} + +RGY_FRAME_FLAGS NVEncFilterRff::getPrevBufFlags() const { + return m_nFieldBufPicStruct[(m_nFieldBufUsed - 1) % m_nFieldBufPicStruct.size()]; } RGY_ERR NVEncFilterRff::run_filter(const RGYFrameInfo *pInputFrame, RGYFrameInfo **ppOutputFrames, int *pOutputFrameNum, cudaStream_t stream) { @@ -85,61 +158,113 @@ RGY_ERR NVEncFilterRff::run_filter(const RGYFrameInfo *pInputFrame, RGYFrameInfo return sts; } - auto pRffParam = std::dynamic_pointer_cast(m_pParam); - if (!pRffParam) { + auto prm = std::dynamic_pointer_cast(m_pParam); + if (!prm) { AddMessage(RGY_LOG_ERROR, _T("Invalid parameter type.\n")); return RGY_ERR_INVALID_PARAM; } - //出力先のフレーム - auto *pOutFrame = ppOutputFrames[0]; + const int64_t input_duration = getInputDuration(pInputFrame); - if ((pInputFrame->flags & (RGY_FRAME_FLAG_RFF | RGY_FRAME_FLAG_RFF_COPY)) == RGY_FRAME_FLAG_RFF) { - //RGY_FRAME_FLAG_RFFフラグのみ立っているとき、状態を反転する - m_nStatus ^= 1; - } + const auto prevBufFieldPicStruct = getPrevBufFlags(); - //コピー先 - RGY_FRAME_FLAGS bufPicStruct = RGY_FRAME_FLAG_NONE; - int bufDst = -1; //コピーしない - if (m_nStatus == 1) { - //コピー先の決定 - bufDst = (m_nFieldBufUsed < 0) ? 0 : m_nFieldBufUsed ^ 1; - - //フィールドをバッファにコピー - bufPicStruct = pInputFrame->flags & (RGY_FRAME_FLAG_RFF_TFF | RGY_FRAME_FLAG_RFF_BFF); - auto cudaerr = copyFrameFieldAsync(&m_fieldBuf.frame, pInputFrame, - bufDst ? false : true, - (bufPicStruct & RGY_FRAME_FLAG_RFF_BFF) ? false : true, - stream); - if (cudaerr != cudaSuccess) { - AddMessage(RGY_LOG_ERROR, _T("failed to copy frame to field buffer: %s.\n"), char_to_tstring(cudaGetErrorName(cudaerr)).c_str()); - return RGY_ERR_CUDA; + const int inputTFF = (pInputFrame->flags & RGY_FRAME_FLAG_RFF_TFF) ? 1 : 0; + const int inputRFF = (pInputFrame->flags & RGY_FRAME_FLAG_RFF) ? 1 : 0; + const int prevFieldCached = (prevBufFieldPicStruct & RGY_FRAME_FLAG_RFF) ? 1 : 0; + const auto outputPicstruct = ((inputTFF + prevFieldCached) & 1) ? RGY_PICSTRUCT_FRAME_TFF : RGY_PICSTRUCT_FRAME_BFF; + const auto prevInputFlags = m_prevInputFlags; + + auto log_mes = strsprintf(_T("%6d, %12lld: %12s %s %s"), + pInputFrame->inputFrameId, pInputFrame->timestamp, picstrcut_to_str(pInputFrame->picstruct), + inputTFF ? _T("TFF") : _T(" "), + inputRFF ? _T("RFF") : _T(" ")); + + m_prevInputTimestamp = pInputFrame->timestamp; + m_prevInputFlags = pInputFrame->flags; + + const RGY_FRAME_FLAGS rff_flags = RGY_FRAME_FLAG_RFF | RGY_FRAME_FLAG_RFF_COPY | RGY_FRAME_FLAG_RFF_TFF | RGY_FRAME_FLAG_RFF_BFF; + if (!(prevBufFieldPicStruct & RGY_FRAME_FLAG_RFF)) { //バッファが使われていない場合 + // 入力フレームはそのまま (入力フレームと出力フレームは同じなので、コピーの必要はない) + if (!ppOutputFrames[0]->duration) { + ppOutputFrames[0]->duration = input_duration; } - } - if (m_nFieldBufUsed >= 0) { - //バッファからフィールドをコピー - auto cudaerr = copyFrameFieldAsync(pOutFrame, &m_fieldBuf.frame, - (m_nFieldBufPicStruct & RGY_FRAME_FLAG_RFF_BFF) ? false : true, - m_nFieldBufUsed ? false : true, - stream); - if (cudaerr != cudaSuccess) { - AddMessage(RGY_LOG_ERROR, _T("failed to copy frame to field buffer: %s.\n"), char_to_tstring(cudaGetErrorName(cudaerr)).c_str()); - return RGY_ERR_CUDA; + // RFF_TFFかRFF_BFFがあれば、RFFフラグの適用区間なので、RFF_XXXに合わせてpicstructを変更する + if (((pInputFrame->flags | prevInputFlags) & (RGY_FRAME_FLAG_RFF_TFF | RGY_FRAME_FLAG_RFF_BFF)) + || (m_prevInputPicStruct & RGY_PICSTRUCT_INTERLACED)) { + ppOutputFrames[0]->picstruct = outputPicstruct; } - } - m_nFieldBufUsed = bufDst; - m_nFieldBufPicStruct = bufPicStruct; + if ((pInputFrame->flags & RGY_FRAME_FLAG_RFF) != 0) { + // RFFがある場合、対応するフィールドをコピー + const auto copyTopField = (pInputFrame->flags & RGY_FRAME_FLAG_RFF_TFF) != 0; + if ((sts = copyFieldToBuffer(pInputFrame, copyTopField, stream)) != RGY_ERR_NONE) { return sts; } + //rffを展開する場合、時間を補正する + m_ptsOffset = -1 * input_duration / 3; + ppOutputFrames[0]->duration += m_ptsOffset; + } + log_mes += strsprintf(_T(" -> %12lld: [%6d/%6d]: %12s\n"), ppOutputFrames[0]->timestamp, + ppOutputFrames[0]->inputFrameId, ppOutputFrames[0]->inputFrameId, picstrcut_to_str(ppOutputFrames[0]->picstruct)); + } else { //バッファが使われている場合 + if (pInputFrame->flags & RGY_FRAME_FLAG_RFF) { + // RFFがある場合、自分をコピー + *pOutputFrameNum = 2; + ppOutputFrames[1] = &m_pFrameBuf[FRAME_OUT_INDEX]->frame; + sts = err_to_rgy(copyFrameAsync(ppOutputFrames[1], pInputFrame, stream)); + if (sts != RGY_ERR_NONE) { return sts; } + + // m_nFieldBufPicStruct側をバッファからコピー + auto [err, bufInputFrameId, copiedTopField] = copyFieldFromBuffer(ppOutputFrames[0], m_nFieldBufUsed - 1, stream); + if (err != RGY_ERR_NONE) { return err; } + + ppOutputFrames[0]->picstruct = outputPicstruct; + ppOutputFrames[0]->duration = input_duration * 2 / 3; + ppOutputFrames[0]->timestamp += m_ptsOffset; + + ppOutputFrames[1]->picstruct = outputPicstruct; + ppOutputFrames[1]->duration = input_duration - m_ptsOffset - ppOutputFrames[0]->duration; + ppOutputFrames[1]->timestamp = ppOutputFrames[0]->timestamp + ppOutputFrames[0]->duration; + ppOutputFrames[1]->inputFrameId = ppOutputFrames[0]->inputFrameId; + m_ptsOffset = 0; + + const auto log_mes_len = log_mes.length(); - const int input_tff = (pInputFrame->flags & RGY_FRAME_FLAG_RFF_TFF) ? 1 : 0; - const int input_rff = (pInputFrame->flags & RGY_FRAME_FLAG_RFF) ? 1 : 0; - const int output_tff = (input_tff + (input_rff ^ m_nStatus)) & 1; + log_mes += strsprintf(_T(" -> %12lld: [%6d/%6d]: %12s\n"), ppOutputFrames[0]->timestamp, + (copiedTopField) ? bufInputFrameId : ppOutputFrames[0]->inputFrameId, + (copiedTopField) ? ppOutputFrames[0]->inputFrameId : bufInputFrameId, + picstrcut_to_str(ppOutputFrames[0]->picstruct)); + log_mes += decltype(log_mes)(log_mes_len, _T(' ')); + log_mes += strsprintf(_T(" + %12lld: [%6d/%6d]: %12s\n"), ppOutputFrames[1]->timestamp, + ppOutputFrames[1]->inputFrameId, ppOutputFrames[1]->inputFrameId, + picstrcut_to_str(ppOutputFrames[1]->picstruct)); + } else { + const auto copyTopField = (prevBufFieldPicStruct & RGY_FRAME_FLAG_RFF_TFF) != 0; + if ((sts = copyFieldToBuffer(pInputFrame, copyTopField, stream)) != RGY_ERR_NONE) { return sts; } + + // m_nFieldBufPicStruct側をバッファからコピー + auto [err, bufInputFrameId, copiedTopField] = copyFieldFromBuffer(ppOutputFrames[0], m_nFieldBufUsed - 2, stream); + if (err != RGY_ERR_NONE) { return sts; } + ppOutputFrames[0]->picstruct = outputPicstruct; + ppOutputFrames[0]->timestamp += m_ptsOffset; + if (!ppOutputFrames[0]->duration) { + ppOutputFrames[0]->duration = input_duration; + } + + log_mes += strsprintf(_T(" -> %12lld: [%6d/%6d]: %12s\n"), ppOutputFrames[0]->timestamp, + (copiedTopField) ? bufInputFrameId : ppOutputFrames[0]->inputFrameId, + (copiedTopField) ? ppOutputFrames[0]->inputFrameId : bufInputFrameId, + picstrcut_to_str(ppOutputFrames[0]->picstruct)); + } + } + ppOutputFrames[0]->flags &= ~(rff_flags); + m_prevInputPicStruct = outputPicstruct; + if (m_fpLog) { + fprintf(m_fpLog.get(), "%s", tchar_to_string(log_mes).c_str()); + } + //AddMessage(RGY_LOG_WARN, _T("%s"), log_mes.c_str()); - pOutFrame->picstruct = (output_tff) ? RGY_PICSTRUCT_FRAME_TFF : RGY_PICSTRUCT_FRAME_BFF; return sts; } void NVEncFilterRff::close() { m_pFrameBuf.clear(); - m_fieldBuf.clear(); + m_fpLog.reset(); } diff --git a/NVEncCore/NVEncFilterRff.h b/NVEncCore/NVEncFilterRff.h index 67276c27..bd0163a9 100644 --- a/NVEncCore/NVEncFilterRff.h +++ b/NVEncCore/NVEncFilterRff.h @@ -31,13 +31,16 @@ #include "NVEncFilter.h" #include "NVEncParam.h" +static const int FRAME_BUF_SIZE = 2; class NVEncFilterParamRff : public NVEncFilterParam { public: + VppRff rff; rgy_rational inFps; rgy_rational timebase; + tstring outFilename; - NVEncFilterParamRff() : inFps(), timebase() { + NVEncFilterParamRff() : rff(), inFps(), timebase(), outFilename() { }; virtual ~NVEncFilterParamRff() {}; @@ -53,8 +56,18 @@ class NVEncFilterRff : public NVEncFilter { virtual RGY_ERR run_filter(const RGYFrameInfo *pInputFrame, RGYFrameInfo **ppOutputFrames, int *pOutputFrameNum, cudaStream_t stream) override; virtual void close() override; - int m_nStatus; - CUFrameBuf m_fieldBuf; + RGY_ERR checkParam(const NVEncFilterParam *param); + int64_t getInputDuration(const RGYFrameInfo *pInputFrame); + RGY_FRAME_FLAGS getPrevBufFlags() const; + + std::tuple copyFieldFromBuffer(RGYFrameInfo *dst, const int idx, cudaStream_t& stream); + RGY_ERR copyFieldToBuffer(const RGYFrameInfo *src, const bool copyTopField, cudaStream_t& stream); + int m_nFieldBufUsed; - RGY_FRAME_FLAGS m_nFieldBufPicStruct; + std::array m_nFieldBufPicStruct; + int64_t m_ptsOffset; + int64_t m_prevInputTimestamp; + RGY_FRAME_FLAGS m_prevInputFlags; + RGY_PICSTRUCT m_prevInputPicStruct; + std::unique_ptr m_fpLog; }; diff --git a/NVEncCore/rgy_cmd.cpp b/NVEncCore/rgy_cmd.cpp index 414651d8..235ec494 100644 --- a/NVEncCore/rgy_cmd.cpp +++ b/NVEncCore/rgy_cmd.cpp @@ -1019,7 +1019,7 @@ int parse_one_vpp_option(const TCHAR *option_name, const TCHAR *strInput[], int "top", "bottom", "left", "right", "method_switch", "coeff_shift", "thre_shift", "thre_deint", "thre_motion_y", "thre_motion_c", "level", "shift", "drop", "smooth", "24fps", "tune", "timecode", "ini", "preset", -#if ENCODER_NVENC +#if ENABLE_VPP_FILTER_AFS_RFF "rff", #endif "log" @@ -1220,7 +1220,7 @@ int parse_one_vpp_option(const TCHAR *option_name, const TCHAR *strInput[], int } continue; } - if (param_arg == _T("rff")) { + if (param_arg == _T("rff") && ENABLE_VPP_FILTER_AFS_RFF) { bool b = false; if (!cmd_string_to_bool(&b, param_val)) { vpp->afs.rff = b; @@ -1439,10 +1439,52 @@ int parse_one_vpp_option(const TCHAR *option_name, const TCHAR *strInput[], int } return 0; } + if (IS_OPTION("vpp-rff") && ENABLE_VPP_FILTER_RFF) { - vpp->rff = true; + vpp->rff.enable = true; + if (i + 1 >= nArgNum || strInput[i + 1][0] == _T('-')) { + return 0; + } + i++; + + const auto paramList = std::vector{ "log" }; + + for (const auto& param : split(strInput[i], _T(","))) { + auto pos = param.find_first_of(_T("=")); + if (pos != std::string::npos) { + auto param_arg = param.substr(0, pos); + auto param_val = param.substr(pos + 1); + param_arg = tolowercase(param_arg); + if (param_arg == _T("enable")) { + bool b = false; + if (!cmd_string_to_bool(&b, param_val)) { + vpp->rff.enable = b; + } else { + print_cmd_error_invalid_value(tstring(option_name) + _T(" ") + param_arg + _T("="), param_val); + return 1; + } + continue; + } + if (param_arg == _T("log")) { + bool b = false; + if (!cmd_string_to_bool(&b, param_val)) { + vpp->rff.log = b; + } else { + print_cmd_error_invalid_value(tstring(option_name) + _T(" ") + param_arg + _T("="), param_val); + return 1; + } + continue; + } + print_cmd_error_unknown_opt_param(option_name, param_arg, paramList); + return 1; + } else { + print_cmd_error_unknown_opt_param(option_name, param, paramList); + return 1; + } + } return 0; } + if (IS_OPTION("vpp-select-every") && ENABLE_VPP_FILTER_SELECT_EVERY) { vpp->selectevery.enable = true; if (i + 1 >= nArgNum || strInput[i + 1][0] == _T('-')) { @@ -3325,13 +3367,9 @@ int parse_one_input_option(const TCHAR *option_name, const TCHAR *strInput[], in i++; int value = 0; if (get_list_value(list_interlaced, strInput[i], &value)) { - if (ENCODER_QSV && value == (int)RGY_PICSTRUCT_AUTO) { //qsvではinterlace autoは未サポート - print_cmd_error_invalid_value(option_name, strInput[i], _T(""), list_interlaced, _countof(list_interlaced) - (ENCODER_QSV ? 2 : 1)); - return 1; - } input->picstruct = (RGY_PICSTRUCT)value; } else { - print_cmd_error_invalid_value(option_name, strInput[i], _T(""), list_interlaced, _countof(list_interlaced) - (ENCODER_QSV ? 2 : 1)); + print_cmd_error_invalid_value(option_name, strInput[i], _T(""), list_interlaced, _countof(list_interlaced)); return 1; } return 0; @@ -5827,7 +5865,19 @@ tstring gen_cmd(const RGYParamVpp *param, const RGYParamVpp *defaultPrm, bool sa cmd << _T(" --vpp-yadif"); } } - OPT_BOOL(_T("--vpp-rff"), _T(""), rff); + if (param->rff != defaultPrm->rff) { + tmp.str(tstring()); + if (!param->rff.enable && save_disabled_prm) { + tmp << _T(",enable=false"); + } + if (param->rff.enable || save_disabled_prm) { + ADD_BOOL(_T("log"), rff.log); + } + if (!tmp.str().empty()) { + cmd << _T(" --vpp-rff ") << tmp.str().substr(1); + } + } + if (param->decimate != defaultPrm->decimate) { tmp.str(tstring()); if (!param->decimate.enable && save_disabled_prm) { @@ -7035,7 +7085,7 @@ tstring gen_cmd_help_vpp() { _T(" smooth= (スムージング) enable smoothing (default=%s)\n") _T(" 24fps= (24fps化) force 30fps->24fps (default=%s)\n") _T(" tune= (調整モード) show scan result (default=%s)\n") -#if ENCODER_NVENC +#if ENABLE_VPP_FILTER_AFS_RFF _T(" rff= rff flag aware (default=%s)\n") #endif _T(" timecode= output timecode (default=%s)\n") @@ -7100,7 +7150,8 @@ tstring gen_cmd_help_vpp() { #endif #if ENABLE_VPP_FILTER_RFF str += strsprintf(_T("\n") - _T(" --vpp-rff apply rff flag, with avhw reader only.\n")); + _T(" --vpp-rff apply rff flag, with %savsw reader only.\n"), + ENABLE_VPP_FILTER_RFF_AVHW ? _T("avhw/") : _T("")); #endif #if ENABLE_VPP_FILTER_SELECT_EVERY str += strsprintf(_T("\n") diff --git a/NVEncCore/rgy_input_avcodec.cpp b/NVEncCore/rgy_input_avcodec.cpp index 1cec31e2..38b3c21f 100644 --- a/NVEncCore/rgy_input_avcodec.cpp +++ b/NVEncCore/rgy_input_avcodec.cpp @@ -1720,6 +1720,7 @@ RGY_ERR RGYInputAvcodec::Init(const TCHAR *strFileName, VideoInfo *inputInfo, co m_Demux.video.stream->time_base.num, m_Demux.video.stream->time_base.den, av_stream_get_codec_timebase(m_Demux.video.stream).num, av_stream_get_codec_timebase(m_Demux.video.stream).den); + m_Demux.video.decRFFStatus = 0; m_Demux.video.findPosLastIdx = 0; m_logFramePosList.clear(); if (input_prm->logFramePosList.length() > 0) { @@ -2992,15 +2993,20 @@ RGY_ERR RGYInputAvcodec::LoadNextFrameInternal(RGYFrame *pSurface) { } auto flags = RGY_FRAME_FLAG_NONE; const auto findPos = m_Demux.frames.findpts(m_Demux.video.frame->pts, &m_Demux.video.findPosLastIdx); - if (findPos.poc != FRAMEPOS_POC_INVALID - && (findPos.pic_struct & RGY_PICSTRUCT_INTERLACED) == 0 - && findPos.repeat_pict > 1) { - flags |= RGY_FRAME_FLAG_RFF; + if (findPos.poc != FRAMEPOS_POC_INVALID) { + if (findPos.repeat_pict > 1) { + flags |= RGY_FRAME_FLAG_RFF; + m_Demux.video.decRFFStatus ^= 1; // 反転させる + } + if (m_Demux.video.frame->top_field_first || findPos.repeat_pict > 1 || m_Demux.video.decRFFStatus) { + // RFF用のTFF/BFFを示すフラグを設定 (picstructとは別) + flags |= (m_Demux.video.frame->top_field_first) ? RGY_FRAME_FLAG_RFF_TFF : RGY_FRAME_FLAG_RFF_BFF; + } } pSurface->setFlags(flags); pSurface->setTimestamp(m_Demux.video.frame->pts); pSurface->setDuration(m_Demux.video.frame->pkt_duration); - if (pSurface->picstruct() == RGY_PICSTRUCT_AUTO) { //autoの時は、frameのインタレ情報をセットする + if (m_inputVideoInfo.picstruct == RGY_PICSTRUCT_AUTO) { //autoの時は、frameのインタレ情報をセットする pSurface->setPicstruct(picstruct_avframe_to_rgy(m_Demux.video.frame)); } pSurface->dataList().clear(); diff --git a/NVEncCore/rgy_input_avcodec.h b/NVEncCore/rgy_input_avcodec.h index 7d013c14..ca1ddfea 100644 --- a/NVEncCore/rgy_input_avcodec.h +++ b/NVEncCore/rgy_input_avcodec.h @@ -728,6 +728,7 @@ typedef struct AVDemuxVideo { uint32_t findPosLastIdx; //findpos用のindex int nSampleGetCount; //sampleをGetNextBitstreamで取得した数 + int decRFFStatus; //swデコード時にRFF展開中かどうか AVCodecParserContext *pParserCtx; //動画ストリームのParser AVCodecContext *pCodecCtxParser; //動画ストリームのParser用 diff --git a/NVEncCore/rgy_prm.cpp b/NVEncCore/rgy_prm.cpp index 1d23d103..6bdce697 100644 --- a/NVEncCore/rgy_prm.cpp +++ b/NVEncCore/rgy_prm.cpp @@ -269,6 +269,27 @@ bool VppColorspace::operator!=(const VppColorspace &x) const { return !(*this == x); } +VppRff::VppRff() : + enable(false), + log(false) { + +} + +bool VppRff::operator==(const VppRff &x) const { + if ( enable != x.enable + || log != x.log) { + return false; + } + return true; +} +bool VppRff::operator!=(const VppRff &x) const { + return !(*this == x); +} + +tstring VppRff::print() const { + return strsprintf(_T("rff: log %s"), (log) ? _T("on") : _T("off")); +} + VppDelogo::VppDelogo() : enable(false), logoFilePath(), @@ -1289,7 +1310,7 @@ RGYParamVpp::RGYParamVpp() : afs(), nnedi(), yadif(), - rff(false), + rff(), selectevery(), decimate(), mpdecimate(), diff --git a/NVEncCore/rgy_prm.h b/NVEncCore/rgy_prm.h index 7d388435..5a6cae96 100644 --- a/NVEncCore/rgy_prm.h +++ b/NVEncCore/rgy_prm.h @@ -49,9 +49,11 @@ static const int DEFAULT_VIDEO_IGNORE_TIMESTAMP_ERROR = 10; #define ENABLE_VPP_FILTER_COLORSPACE (ENCODER_QSV || ENCODER_VCEENC || ENCODER_MPP || CLFILTERS_AUF) #endif #define ENABLE_VPP_FILTER_AFS (ENCODER_QSV || ENCODER_NVENC || ENCODER_VCEENC || ENCODER_MPP) +#define ENABLE_VPP_FILTER_AFS_RFF (ENCODER_QSV || ENCODER_NVENC || ENCODER_VCEENC || ENCODER_MPP) #define ENABLE_VPP_FILTER_NNEDI (ENCODER_QSV || ENCODER_NVENC || ENCODER_VCEENC || ENCODER_MPP || CLFILTERS_AUF) #define ENABLE_VPP_FILTER_YADIF (ENCODER_QSV || ENCODER_NVENC || ENCODER_VCEENC || ENCODER_MPP) -#define ENABLE_VPP_FILTER_RFF (ENCODER_NVENC) +#define ENABLE_VPP_FILTER_RFF (ENCODER_QSV || ENCODER_NVENC || ENCODER_VCEENC || ENCODER_MPP) +#define ENABLE_VPP_FILTER_RFF_AVHW (ENCODER_QSV || ENCODER_NVENC) #define ENABLE_VPP_FILTER_SELECT_EVERY (ENCODER_NVENC) #define ENABLE_VPP_FILTER_DECIMATE (ENCODER_QSV || ENCODER_NVENC || ENCODER_VCEENC || ENCODER_MPP) #define ENABLE_VPP_FILTER_MPDECIMATE (ENCODER_QSV || ENCODER_NVENC || ENCODER_VCEENC || ENCODER_MPP) @@ -784,6 +786,16 @@ struct VppColorspace { bool operator!=(const VppColorspace &x) const; }; +struct VppRff { + bool enable; + bool log; + + VppRff(); + bool operator==(const VppRff& x) const; + bool operator!=(const VppRff& x) const; + tstring print() const; +}; + struct VppDelogo { bool enable; tstring logoFilePath; //ロゴファイル名 @@ -1284,7 +1296,7 @@ struct RGYParamVpp { VppAfs afs; VppNnedi nnedi; VppYadif yadif; - bool rff; + VppRff rff; VppSelectEvery selectevery; VppDecimate decimate; VppMpdecimate mpdecimate; diff --git a/NVEncCore/rgy_util.h b/NVEncCore/rgy_util.h index 0e32a794..bcba2d9c 100644 --- a/NVEncCore/rgy_util.h +++ b/NVEncCore/rgy_util.h @@ -398,6 +398,13 @@ class rgy_rational { return tmp; } + T round() const { + if (den == 1) { + return num; + } + return (T)(qdouble() + 0.5); + } + rgy_rational operator+ () { return *this; } diff --git a/NVEncCore/rgy_version.h b/NVEncCore/rgy_version.h index 63d45890..233ed079 100644 --- a/NVEncCore/rgy_version.h +++ b/NVEncCore/rgy_version.h @@ -29,9 +29,9 @@ #ifndef __RGY_CONFIG_H__ #define __RGY_CONFIG_H__ -#define VER_FILEVERSION 0,7,35,0 -#define VER_STR_FILEVERSION "7.35" -#define VER_STR_FILEVERSION_TCHAR _T("7.35") +#define VER_FILEVERSION 0,7,36,0 +#define VER_STR_FILEVERSION "7.36" +#define VER_STR_FILEVERSION_TCHAR _T("7.36") #ifdef _M_IX86 #define BUILD_ARCH_STR _T("x86")