From 7a7117f2f70b715df6b3cc0e1829de3e9247b571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 1 Mar 2023 22:38:46 +0100 Subject: [PATCH 1/3] Cache framebuffer copies (for self-texturing) until the next TexFlush instruction. Fixes #17030 , or at least improves on it - for optimal performance that big framebuffer used for bloom should be split like in Killzone, but it's not trivial. The regression in 1.14 is fixed with this, at least. I tried it with a few other games with no issues - it seems games are using TexFlush when needed. But let's see if it really is safe to rely on that... There might also be other places we should call DiscardFramebufferCopy in. --- GPU/Common/FramebufferManagerCommon.cpp | 11 +++++++++++ GPU/Common/FramebufferManagerCommon.h | 6 ++++++ GPU/GPUCommonHW.cpp | 9 ++++++++- GPU/GPUCommonHW.h | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 91005fd71ca7..cf96977c42bb 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -546,6 +546,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer // TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?) } + DiscardFramebufferCopy(); + // We already have it! } else if (vfb != currentRenderVfb_) { // Use it as a render target. @@ -562,6 +564,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(Framebuffer NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth); CopyToColorFromOverlappingFramebuffers(vfb); gstate_c.usingDepth = false; // reset depth buffer tracking + + DiscardFramebufferCopy(); } else { // Something changed, but we still got the same framebuffer we were already rendering to. // Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated @@ -1233,6 +1237,12 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF // Self-texturing, need a copy currently (some backends can potentially support it though). WARN_LOG_ONCE(selfTextureCopy, G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags); // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. + if (currentFramebufferCopy_) { + // We have a copy already that hasn't been invalidated, let's keep using it. + draw_->BindFramebufferAsTexture(currentFramebufferCopy_, stage, Draw::FB_COLOR_BIT, layer); + return true; + } + Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight); if (renderCopy) { VirtualFramebuffer copyInfo = *framebuffer; @@ -1240,6 +1250,7 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF CopyFramebufferForColorTexture(©Info, framebuffer, flags, layer); RebindFramebuffer("After BindFramebufferAsColorTexture"); draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, layer); + currentFramebufferCopy_ = renderCopy; gpuStats.numCopiesForSelfTex++; } else { // Failed to get temp FBO? Weird. diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 672130f7aa01..bda9bdc3893b 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -466,6 +466,10 @@ class FramebufferManagerCommon { return msaaLevel_; } + void DiscardFramebufferCopy() { + currentFramebufferCopy_ = nullptr; + } + protected: virtual void ReadbackFramebuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode); // Used for when a shader is required, such as GLES. @@ -552,6 +556,8 @@ class FramebufferManagerCommon { VirtualFramebuffer *currentRenderVfb_ = nullptr; + Draw::Framebuffer *currentFramebufferCopy_ = nullptr; + // The range of PSP memory that may contain FBOs. So we can skip iterating. u32 framebufRangeEnd_ = 0; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 5c51cf5e8a67..01d6dd02c2ce 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -287,7 +287,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 }, // Ignored commands - { GE_CMD_TEXFLUSH, 0 }, + { GE_CMD_TEXFLUSH, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TexFlush }, { GE_CMD_TEXSYNC, 0 }, // These are just nop or part of other later commands. @@ -1619,6 +1619,13 @@ void GPUCommonHW::Execute_BoneMtxData(u32 op, u32 diff) { gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24; } +void GPUCommonHW::Execute_TexFlush(u32 op, u32 diff) { + // Games call this when they need the effect of drawing to be visible to texturing. + // And for a bunch of other reasons, but either way, this is what we need to do. + // It's possible we could also use this as a hint for the texture cache somehow. + framebufferManager_->DiscardFramebufferCopy(); +} + size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; return snprintf(buffer, size, diff --git a/GPU/GPUCommonHW.h b/GPU/GPUCommonHW.h index 2e13a1173845..000993be79fc 100644 --- a/GPU/GPUCommonHW.h +++ b/GPU/GPUCommonHW.h @@ -60,6 +60,8 @@ class GPUCommonHW : public GPUCommon { void Execute_BoneMtxNum(u32 op, u32 diff); void Execute_BoneMtxData(u32 op, u32 diff); + void Execute_TexFlush(u32 op, u32 diff); + typedef void (GPUCommonHW::*CmdFunc)(u32 op, u32 diff); void FastRunLoop(DisplayList &list) override; From 4d0fbdb96f34affa43af8070fd3c9cadc44be6c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 1 Mar 2023 22:51:23 +0100 Subject: [PATCH 2/3] Add a safety check - don't cache partial copies. --- GPU/Common/FramebufferManagerCommon.cpp | 19 ++++++++++++++++--- GPU/Common/FramebufferManagerCommon.h | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index cf96977c42bb..e4f3bc0d94c5 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1247,10 +1247,17 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF if (renderCopy) { VirtualFramebuffer copyInfo = *framebuffer; copyInfo.fbo = renderCopy; - CopyFramebufferForColorTexture(©Info, framebuffer, flags, layer); + + bool partial = false; + CopyFramebufferForColorTexture(©Info, framebuffer, flags, layer, &partial); RebindFramebuffer("After BindFramebufferAsColorTexture"); draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, layer); - currentFramebufferCopy_ = renderCopy; + + // Only cache the copy if it wasn't a partial copy. + // TODO: Improve on this. + if (!partial) { + currentFramebufferCopy_ = renderCopy; + } gpuStats.numCopiesForSelfTex++; } else { // Failed to get temp FBO? Weird. @@ -1273,14 +1280,17 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF } } -void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer) { +void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial) { int x = 0; int y = 0; int w = src->drawnWidth; int h = src->drawnHeight; + *partial = false; + // If max is not > min, we probably could not detect it. Skip. // See the vertex decoder, where this is updated. + // TODO: We're currently not hitting this path in Dante. See #17032 if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) { x = std::max(gstate_c.vertBounds.minU, (u16)0); y = std::max(gstate_c.vertBounds.minV, (u16)0); @@ -1298,6 +1308,9 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer } if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) { + if (x != 0 || y != 0 || w < src->drawnWidth || h < src->drawnHeight) { + *partial = true; + } BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture"); } } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index bda9bdc3893b..394e5d0593a1 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -490,7 +490,7 @@ class FramebufferManagerCommon { // Used by ReadFramebufferToMemory and later framebuffer block copies void BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag); - void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer); + void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial); void EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height); From 92a18eed01988db7ea6a55f1549fc919476f0524 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 1 Mar 2023 22:03:56 -0800 Subject: [PATCH 3/3] GPU: Discard framebuffer copy when clearing. This avoids retaining the framebuffer copy any longer than the current framebuffer target. --- GPU/Common/DepthBufferCommon.cpp | 2 +- GPU/Common/FramebufferManagerCommon.cpp | 6 ++++++ GPU/Common/FramebufferManagerCommon.h | 2 ++ GPU/GLES/StencilBufferGLES.cpp | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/GPU/Common/DepthBufferCommon.cpp b/GPU/Common/DepthBufferCommon.cpp index 46a2ecfb6fcb..a29c99577575 100644 --- a/GPU/Common/DepthBufferCommon.cpp +++ b/GPU/Common/DepthBufferCommon.cpp @@ -204,7 +204,7 @@ bool FramebufferManagerCommon::ReadbackDepthbuffer(Draw::Framebuffer *fbo, int x } shaderManager_->DirtyLastShader(); - auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width() * scaleX, fbo->Height() * scaleY); + auto *blitFBO = GetTempFBO(TempFBO::Z_COPY, fbo->Width() * scaleX, fbo->Height() * scaleY); draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackDepthbufferSync"); Draw::Viewport viewport = { 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f }; draw_->SetViewport(viewport); diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e4f3bc0d94c5..e21bc35e9378 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -932,6 +932,7 @@ void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { } // Wipe some pointers + DiscardFramebufferCopy(); if (currentRenderVfb_ == v) currentRenderVfb_ = nullptr; if (displayFramebuf_ == v) @@ -1450,6 +1451,7 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int // PresentationCommon sets all kinds of state, we can't rely on anything. gstate_c.Dirty(DIRTY_ALL); + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; } @@ -1607,10 +1609,12 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) { // This may get called mid-draw if the game uses an immediate flip. // PresentationCommon sets all kinds of state, we can't rely on anything. gstate_c.Dirty(DIRTY_ALL); + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; } void FramebufferManagerCommon::DecimateFBOs() { + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; for (auto iter : fbosToDelete_) { @@ -1767,6 +1771,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, } else { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO"); } + DiscardFramebufferCopy(); currentRenderVfb_ = vfb; if (!vfb->fbo) { @@ -2568,6 +2573,7 @@ void FramebufferManagerCommon::NotifyConfigChanged() { } void FramebufferManagerCommon::DestroyAllFBOs() { + DiscardFramebufferCopy(); currentRenderVfb_ = nullptr; displayFramebuf_ = nullptr; prevDisplayFramebuf_ = nullptr; diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 394e5d0593a1..d0c83b4d6c11 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -217,6 +217,8 @@ enum class TempFBO { BLIT, // For copies of framebuffers (e.g. shader blending.) COPY, + // Used for copies when setting color to depth. + Z_COPY, // Used to copy stencil data, means we need a stencil backing. STENCIL, }; diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index 763e5a95fe76..845cd2b3ac76 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -130,7 +130,7 @@ bool FramebufferManagerGLES::ReadbackStencilbuffer(Draw::Framebuffer *fbo, int x } shaderManager_->DirtyLastShader(); - auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height()); + auto *blitFBO = GetTempFBO(TempFBO::Z_COPY, fbo->Width(), fbo->Height()); draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync"); Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f }; draw_->SetViewport(viewport);