From 80e47b7bd3f4f31c08986c23c5cb0b3ea5b2af9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 25 May 2023 14:46:33 +0200 Subject: [PATCH] Only dirty the uniform UVSCALEOFFSET when really needed Broken out from #17479 With OpenGL, greatly reduces the amount of glUniform4fv calls in many games (and similar in the other backends). --- GPU/Common/DrawEngineCommon.cpp | 4 ++-- GPU/Common/ShaderId.cpp | 2 +- GPU/Common/TextureCacheCommon.cpp | 4 ++++ GPU/Common/VertexDecoderCommon.cpp | 2 +- GPU/Common/VertexDecoderCommon.h | 2 +- GPU/GPUCommonHW.cpp | 20 +++++++------------- GPU/GPUState.h | 13 ++++++++++--- GPU/Vulkan/DrawEngineVulkan.cpp | 4 ++-- 8 files changed, 28 insertions(+), 23 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 37d4b9d2560a..234d2af22378 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -236,7 +236,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex // Code checks this reg directly, not just the vtype ID. if (!prevThrough) { gstate.vertType |= GE_VTYPE_THROUGH; - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE); } int bytesRead; @@ -246,7 +246,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex if (!prevThrough) { gstate.vertType &= ~GE_VTYPE_THROUGH; - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE); + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE); } } diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 864075e9e35a..a9e1e3485e05 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -377,7 +377,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask); // All framebuffers are array textures in Vulkan now. - if (gstate_c.arrayTexture && g_Config.iGPUBackend == (int)GPUBackend::VULKAN) { + if (gstate_c.textureIsArray && g_Config.iGPUBackend == (int)GPUBackend::VULKAN) { id.SetBit(FS_BIT_SAMPLE_ARRAY_TEXTURE); } diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 1ec6378c722e..c6193c19995e 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -406,6 +406,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { Unbind(); gstate_c.SetTextureIs3D(false); gstate_c.SetTextureIsArray(false); + gstate_c.SetTextureIsFramebuffer(false); return nullptr; } @@ -573,6 +574,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); gstate_c.SetTextureIsArray(false); gstate_c.SetTextureIsBGRA((entry->status & TexCacheEntry::STATUS_BGRA) != 0); + gstate_c.SetTextureIsFramebuffer(false); if (rehash) { // Update in case any of these changed. @@ -681,6 +683,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { gstate_c.curTextureHeight = h; gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); gstate_c.SetTextureIsArray(false); // Ordinary 2D textures still aren't used by array view in VK. We probably might as well, though, at this point.. + gstate_c.SetTextureIsFramebuffer(false); failedTexture_ = false; nextTexture_ = entry; @@ -1154,6 +1157,7 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); } gstate_c.SetTextureIsBGRA(false); + gstate_c.SetTextureIsFramebuffer(true); gstate_c.curTextureXOffset = fbInfo.xOffset; gstate_c.curTextureYOffset = fbInfo.yOffset; u32 texW = (u32)gstate.getTextureWidth(0); diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index 21b3e76a6ffa..915b89e5494f 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -1176,7 +1176,7 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, steps_[numSteps_++] = morphcount == 1 ? colstep[col] : colstep_morph[col]; - // All color formats decode to DEC_U8_4 currently. + // All color formats decode to DEC_U8_4. // They can become floats later during transform though. decFmt.c0fmt = DEC_U8_4; decFmt.c0off = decOff; diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index 423dbd918d48..7af518f0227c 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -336,7 +336,7 @@ class VertexDecoder { u32 VertexType() const { return fmt_; } - const DecVtxFormat &GetDecVtxFmt() { return decFmt; } + const DecVtxFormat &GetDecVtxFmt() const { return decFmt; } void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 809f8402d3ed..9587fab0a612 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -149,7 +149,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE }, { GE_CMD_TEXLEVEL, FLAG_EXECUTEONCHANGE, DIRTY_TEXTURE_PARAMS, &GPUCommonHW::Execute_TexLevel }, { GE_CMD_TEXLODSLOPE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, - { GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE | DIRTY_UVSCALEOFFSET }, + { GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE }, { GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, @@ -827,13 +827,14 @@ void GPUCommonHW::FastRunLoop(DisplayList &list) { } void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) { - if (diff) + if (diff) { + // TODO: We only need to dirty vshader-state here if the output format will be different. gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); - if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) { - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); - // Switching between through and non-through, we need to invalidate a bunch of stuff. - if (diff & GE_VTYPE_THROUGH_MASK) + + if (diff & GE_VTYPE_THROUGH_MASK) { + // Switching between through and non-through, we need to invalidate a bunch of stuff. gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); + } } } @@ -844,8 +845,6 @@ void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) { gstate.vertType ^= diff; Flush(); gstate.vertType ^= diff; - if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); // In this case, we may be doing weights and morphs. // Update any bone matrix uniforms so it uses them correctly. if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) { @@ -1125,8 +1124,6 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) { void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) { // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); - gstate_c.framebufFormat = gstate.FrameBufFormat(); // This also make skipping drawing very effective. @@ -1198,8 +1195,6 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) { void GPUCommonHW::Execute_Spline(u32 op, u32 diff) { // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); - gstate_c.framebufFormat = gstate.FrameBufFormat(); // This also make skipping drawing very effective. @@ -1289,7 +1284,6 @@ void GPUCommonHW::Execute_TexSize0(u32 op, u32 diff) { if (diff || gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS)) { gstate_c.curTextureWidth = gstate.getTextureWidth(0); gstate_c.curTextureHeight = gstate.getTextureHeight(0); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); // We will need to reset the texture now. gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 632fa1e86dc9..9e2c31b0d9ed 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -569,8 +569,8 @@ struct GPUStateCache { } } void SetTextureIsArray(bool isArrayTexture) { // VK only - if (arrayTexture != isArrayTexture) { - arrayTexture = isArrayTexture; + if (textureIsArray != isArrayTexture) { + textureIsArray = isArrayTexture; Dirty(DIRTY_FRAGMENTSHADER_STATE); } } @@ -580,6 +580,12 @@ struct GPUStateCache { Dirty(DIRTY_FRAGMENTSHADER_STATE); } } + void SetTextureIsFramebuffer(bool isFramebuffer) { + if (textureIsFramebuffer != isFramebuffer) { + textureIsFramebuffer = isFramebuffer; + Dirty(DIRTY_UVSCALEOFFSET); + } + } void SetUseFlags(u32 newFlags) { if (newFlags != useFlags_) { if (useFlags_ != 0) @@ -614,7 +620,8 @@ struct GPUStateCache { bool bgraTexture; bool needShaderTexClamp; - bool arrayTexture; + bool textureIsArray; + bool textureIsFramebuffer; bool useFlagsChanged; float morphWeights[8]; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 49278b356280..ad719867627c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -775,7 +775,7 @@ void DrawEngineVulkan::DoFlush() { textureCache_->ApplyTexture(); textureCache_->GetVulkanHandles(imageView, sampler); if (imageView == VK_NULL_HANDLE) - imageView = (VkImageView)draw_->GetNativeObject(gstate_c.arrayTexture ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW); + imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW); if (sampler == VK_NULL_HANDLE) sampler = nullSampler_; } @@ -910,7 +910,7 @@ void DrawEngineVulkan::DoFlush() { textureCache_->ApplyTexture(); textureCache_->GetVulkanHandles(imageView, sampler); if (imageView == VK_NULL_HANDLE) - imageView = (VkImageView)draw_->GetNativeObject(gstate_c.arrayTexture ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW); + imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW); if (sampler == VK_NULL_HANDLE) sampler = nullSampler_; }