From d4ce134292dd8d34250401a6f22ebccc829b0d42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Jan 2023 11:19:50 +0100 Subject: [PATCH 1/5] Shader generator: Move FS_TEX_ALPHA to a uniform bool. Part of #16567 --- Common/GPU/OpenGL/GLRenderManager.h | 13 ++++ GPU/Common/FragmentShaderGenerator.cpp | 94 ++++++++++---------------- GPU/Common/FragmentShaderGenerator.h | 3 +- GPU/Common/ShaderCommon.h | 4 +- GPU/Common/ShaderId.cpp | 9 --- GPU/Common/ShaderId.h | 2 +- GPU/Common/ShaderUniforms.cpp | 6 +- GPU/Common/ShaderUniforms.h | 6 +- GPU/Directx9/ShaderManagerDX9.cpp | 9 ++- GPU/Directx9/ShaderManagerDX9.h | 1 + GPU/GLES/ShaderManagerGLES.cpp | 10 ++- GPU/GLES/ShaderManagerGLES.h | 1 + GPU/GPUCommon.cpp | 2 +- GPU/Vulkan/ShaderManagerVulkan.cpp | 2 +- 14 files changed, 83 insertions(+), 79 deletions(-) diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index e2d132726f90..0acd2b579341 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -760,6 +760,19 @@ class GLRenderManager { curRenderStep_->commands.push_back(data); } + void SetUniformB(const GLint *loc, bool value) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); +#ifdef _DEBUG + _dbg_assert_(curProgram_); +#endif + GLRRenderData data{ GLRRenderCommand::UNIFORM4I }; + data.uniform4.loc = loc; + data.uniform4.count = 1; + u32 udata = value ? 1 : 0; + memcpy(data.uniform4.v, &udata, sizeof(u32)); + curRenderStep_->commands.push_back(data); + } + void SetUniformM4x4(const GLint *loc, const float *udata) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); #ifdef _DEBUG diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 64568fff86ec..15f4067b8bf6 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -105,7 +105,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO); bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); - bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); if (texture3D && arrayTexture) { *errorString = "Invalid combination of 3D texture and array texture, shouldn't happen"; @@ -257,8 +256,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE); } - if (doTexture && texFunc == GE_TEXFUNC_BLEND) { - WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + if (doTexture) { + if (texFunc == GE_TEXFUNC_BLEND) { + WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + } + WRITE(p, "bool u_texAlpha : register(c%i);\n", CONST_PS_TEXALPHA); } WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); if (texture3D) { @@ -351,6 +353,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else { WRITE(p, "uniform sampler2D tex;\n"); } + *uniformMask |= DIRTY_TEXALPHA; + WRITE(p, "uniform bool u_texAlpha;\n"); } if (readFramebufferTex) { @@ -817,64 +821,36 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu break; } - if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha) - WRITE(p, " vec4 p = v_color0;\n"); - - if (doTextureAlpha) { // texfmt == RGBA - switch (texFunc) { - case GE_TEXFUNC_MODULATE: - WRITE(p, " vec4 v = p * t + s\n;"); - break; - - case GE_TEXFUNC_DECAL: - WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n"); - break; - - case GE_TEXFUNC_BLEND: - WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n"); - break; - - case GE_TEXFUNC_REPLACE: - WRITE(p, " vec4 v = t + s;\n"); - break; - - case GE_TEXFUNC_ADD: - case GE_TEXFUNC_UNKNOWN1: - case GE_TEXFUNC_UNKNOWN2: - case GE_TEXFUNC_UNKNOWN3: - WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n"); - break; - default: - WRITE(p, " vec4 v = p;\n"); break; - } - } else { // texfmt == RGB - switch (texFunc) { - case GE_TEXFUNC_MODULATE: - WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_DECAL: - WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_BLEND: - WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a) + s;\n"); - break; - - case GE_TEXFUNC_REPLACE: - WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_ADD: - case GE_TEXFUNC_UNKNOWN1: - case GE_TEXFUNC_UNKNOWN2: - case GE_TEXFUNC_UNKNOWN3: - WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a) + s;\n"); break; - default: - WRITE(p, " vec4 v = p;\n"); break; - } + WRITE(p, " vec4 p = v_color0;\n"); + + if (texFunc != GE_TEXFUNC_REPLACE) { + WRITE(p, " if (!u_texAlpha) { t.a = 1.0; }\n"); } + switch (texFunc) { + case GE_TEXFUNC_MODULATE: + WRITE(p, " vec4 v = p * t + s;\n"); + break; + case GE_TEXFUNC_DECAL: + WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n"); + break; + case GE_TEXFUNC_BLEND: + WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n"); + break; + case GE_TEXFUNC_REPLACE: + WRITE(p, " vec4 v = (u_texAlpha ? t : vec4(t.rgb, p.a)) + s;\n"); + break; + case GE_TEXFUNC_ADD: + case GE_TEXFUNC_UNKNOWN1: + case GE_TEXFUNC_UNKNOWN2: + case GE_TEXFUNC_UNKNOWN3: + WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n"); + break; + default: + // Doesn't happen + WRITE(p, " vec4 v = p + s;\n"); break; + break; + } if (enableColorDoubling) { // This happens before fog is applied. WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n"); diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index 88c2c3f9d6cc..d69583452c40 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -36,9 +36,10 @@ struct FShaderID; #define CONST_PS_TEXCLAMP 8 #define CONST_PS_TEXCLAMPOFF 9 #define CONST_PS_MIPBIAS 10 +#define CONST_PS_TEXALPHA 11 // For stencil upload -#define CONST_PS_STENCILVALUE 11 +#define CONST_PS_STENCILVALUE 12 // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index e715936ddde3..7743dac933f9 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -90,11 +90,11 @@ enum : uint64_t { DIRTY_MIPBIAS = 1ULL << 37, DIRTY_LIGHT_CONTROL = 1ULL << 38, - // space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. + DIRTY_TEXALPHA = 1ULL << 39, DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index d7da61c29099..1ec1012f75f2 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -192,7 +192,6 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_CLEARMODE)) desc << "Clear "; if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex "); if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj "; - if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha "; if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs "; if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x "; if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat "; @@ -291,7 +290,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue(); bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix, gstate.getUVProjMode())); - bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; ShaderDepalMode shaderDepalMode = gstate_c.shaderDepalMode; @@ -303,16 +301,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType; ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil; - // All texfuncs except replace are the same for RGB as for RGBA with full alpha. - // Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes. - if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) { - doTextureAlpha = false; - } - if (gstate.isTextureMapEnabled()) { id.SetBit(FS_BIT_DO_TEXTURE); id.SetBits(FS_BIT_TEXFUNC, 3, gstate.getTextureFunction()); - id.SetBit(FS_BIT_TEXALPHA, doTextureAlpha & 1); // rgb or rgba if (gstate_c.needShaderTexClamp) { bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0; // 4 bits total. diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index e03b27f21492..beab743309e4 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -68,7 +68,7 @@ enum FShaderBit : uint8_t { FS_BIT_CLEARMODE = 0, FS_BIT_DO_TEXTURE = 1, FS_BIT_TEXFUNC = 2, // 3 bits - FS_BIT_TEXALPHA = 5, + // 1 bit free at position 5 FS_BIT_3D_TEXTURE = 6, FS_BIT_SHADER_TEX_CLAMP = 7, FS_BIT_CLAMP_S = 8, diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 92c22be0fe38..5112efb8e664 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -198,8 +198,12 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView } } + if (dirtyUniforms & DIRTY_TEXALPHA) { + ub->texAlpha = gstate.isTextureAlphaUsed() ? 1 : 0; + } + if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { - ub->stencil = (float)gstate.getStencilTestRef() * (1.0 / 255.0); + ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0); } // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index c09c231b866d..7bf61dc00531 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -9,7 +9,7 @@ enum : uint64_t { DIRTY_BASE_UNIFORMS = DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF | - DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | + DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_STENCILREPLACEVALUE | DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | DIRTY_BEZIERSPLINE | DIRTY_DEPAL, DIRTY_LIGHT_UNIFORMS = @@ -35,10 +35,11 @@ struct alignas(16) UB_VS_FS_Base { // Fragment data float fogColor[3]; uint32_t alphaColorRef; float texEnvColor[3]; uint32_t colorTestMask; - float blendFixA[3]; float stencil; + float blendFixA[3]; float stencilReplaceValue; float blendFixB[3]; float rotation; float texClamp[4]; float texClampOffset[2]; float fogCoef[2]; + uint32_t texAlpha; float pad[3]; // VR stuff is to go here, later. For normal drawing, we can then get away // with just uploading the first 448 bytes of the struct (up to and including fogCoef). }; @@ -65,6 +66,7 @@ R"( mat4 u_proj; vec4 u_texclamp; vec2 u_texclampoff; vec2 u_fogcoef; + bool u_texAlpha; float pad0; float pad1; float pad2; )"; // 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 1f820310bd07..729e21c0c9cd 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -213,6 +213,11 @@ void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) { device_->SetVertexShaderConstantF(creg, f, 1); } +void ShaderManagerDX9::PSSetBool(int creg, bool value) { + BOOL b = value ? 1 : 0; + HRESULT retval = device_->SetPixelShaderConstantB(creg, &b, 1); +} + void ShaderManagerDX9::VSSetColorUniform3ExtraFloat(int creg, u32 color, float extra) { const float col[4] = { ((color & 0xFF)) / 255.0f, @@ -279,7 +284,9 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f)); } - + if (dirtyUniforms & DIRTY_TEXALPHA) { + PSSetBool(CONST_PS_TEXALPHA, gstate.isTextureAlphaUsed()); + } if (dirtyUniforms & DIRTY_SHADERBLEND) { PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA()); PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB()); diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 372fbf9659af..eb62216fa1b4 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -95,6 +95,7 @@ class ShaderManagerDX9 : public ShaderManagerCommon { inline void PSSetColorUniform3(int creg, u32 color); inline void PSSetFloat(int creg, float value); inline void PSSetFloatArray(int creg, const float *value, int count); + void PSSetBool(int creg, bool value); void VSSetMatrix4x3_3(int creg, const float *m4x3); inline void VSSetColorUniform3(int creg, u32 color); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 8a728d7db416..3ebae535bf2b 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -152,6 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" }); queries.push_back({ &u_texclamp, "u_texclamp" }); queries.push_back({ &u_texclampoff, "u_texclampoff" }); + queries.push_back({ &u_texAlpha, "u_texAlpha" }); queries.push_back({ &u_lightControl, "u_lightControl" }); for (int i = 0; i < 4; i++) { @@ -229,6 +230,10 @@ static inline void SetColorUniform3(GLRenderManager *render, GLint *uniform, u32 render->SetUniformF(uniform, 3, f); } +static inline void SetBoolUniform(GLRenderManager *render, GLint *uniform, bool value) { + render->SetUniformB(uniform, value); +} + static void SetColorUniform3Alpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) { float f[4]; Uint8x3ToFloat4_AlphaUint8(f, color, alpha); @@ -440,6 +445,9 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin if (dirty & DIRTY_TEXENV) { SetColorUniform3(render_, &u_texenv, gstate.texenvcolor); } + if (dirty & DIRTY_TEXALPHA) { + SetBoolUniform(render_, &u_texAlpha, gstate.isTextureAlphaUsed()); + } if (dirty & DIRTY_ALPHACOLORREF) { if (shaderLanguage.bitwiseOps) { render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24)); @@ -945,7 +953,7 @@ enum class CacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0x83277592 -#define CACHE_VERSION 21 +#define CACHE_VERSION 22 struct CacheHeader { uint32_t magic; uint32_t version; diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 6520b1e4ade4..15a783084c95 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -101,6 +101,7 @@ class LinkedShader { int u_uvscaleoffset; int u_texclamp; int u_texclampoff; + int u_texAlpha; // Lighting int u_lightControl; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 0718862eb023..f7a8dfa7e660 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -94,7 +94,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, // Raster state for Direct3D 9, uncommon. { GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE }, - { GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, + { GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXALPHA }, { GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 955932550b66..df2495c088a7 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -516,7 +516,7 @@ enum class VulkanCacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 35 +#define CACHE_VERSION 36 struct VulkanCacheHeader { uint32_t magic; uint32_t version; From 5022ddc4fca68dcf86f54943bd4b9f6504a74425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Jan 2023 12:21:33 +0100 Subject: [PATCH 2/5] D3D9 bool constants have a separate register space, oops. --- GPU/Common/FragmentShaderGenerator.cpp | 2 +- GPU/Common/FragmentShaderGenerator.h | 9 ++++++--- GPU/Common/ShaderId.h | 5 ++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 15f4067b8bf6..7722e769fd18 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -260,7 +260,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (texFunc == GE_TEXFUNC_BLEND) { WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); } - WRITE(p, "bool u_texAlpha : register(c%i);\n", CONST_PS_TEXALPHA); + WRITE(p, "bool u_texAlpha : register(b%i);\n", CONST_PS_TEXALPHA); // NOTE! "b" register, not "c"! } WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); if (texture3D) { diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index d69583452c40..c2842f7ae43c 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -23,7 +23,7 @@ struct FShaderID; -// D3D9 constants +// D3D9 float constants #define CONST_PS_TEXENV 0 #define CONST_PS_ALPHACOLORREF 1 @@ -36,10 +36,13 @@ struct FShaderID; #define CONST_PS_TEXCLAMP 8 #define CONST_PS_TEXCLAMPOFF 9 #define CONST_PS_MIPBIAS 10 -#define CONST_PS_TEXALPHA 11 // For stencil upload -#define CONST_PS_STENCILVALUE 12 +#define BCONST_PS_STENCILVALUE 11 + +// D3D9 bool constants, they have their own register space. +#define CONST_PS_TEXALPHA 0 + // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index beab743309e4..62adaaf38ad4 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -6,9 +6,8 @@ #include "Common/CommonFuncs.h" -// TODO: There will be additional bits, indicating that groups of these will be -// sent to the shader and processed there. This will cut down the number of shaders ("ubershader approach") -// This is probably only really worth doing for lighting and bones. +// VS_BIT_LIGHT_UBERSHADER indicates that some groups of these will be +// sent to the shader and processed there. This cuts down the number of shaders ("ubershader approach"). enum VShaderBit : uint8_t { VS_BIT_LMODE = 0, VS_BIT_IS_THROUGH = 1, From 7df95451954df8697667b277d59a2bb43ff543fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Jan 2023 13:29:14 +0100 Subject: [PATCH 3/5] Fix D3D9 uniform update --- GPU/Directx9/ShaderManagerDX9.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 729e21c0c9cd..d46d6f2eeb47 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -266,7 +266,7 @@ static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) { in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f)); } -const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS; +const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS; void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { From 00c44ea7994435a7b4ff615b4a00a74217057a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 10 Jan 2023 10:08:34 +0100 Subject: [PATCH 4/5] Get rid of the bool, not worth it. --- Core/Util/PPGeDraw.cpp | 2 +- GPU/Common/FragmentShaderGenerator.cpp | 10 ++++++---- GPU/Common/FragmentShaderGenerator.h | 4 ++-- GPU/Common/ShaderUniforms.cpp | 2 +- GPU/Common/ShaderUniforms.h | 6 +++--- GPU/Directx9/ShaderManagerDX9.cpp | 3 ++- GPU/GLES/ShaderManagerGLES.cpp | 4 ++-- GPU/GLES/ShaderManagerGLES.h | 2 +- 8 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Core/Util/PPGeDraw.cpp b/Core/Util/PPGeDraw.cpp index 495e4d124945..525937a5128f 100644 --- a/Core/Util/PPGeDraw.cpp +++ b/Core/Util/PPGeDraw.cpp @@ -1327,7 +1327,7 @@ bool PPGeImage::Load() { unsigned char *textureData; int success; if (filename_.empty()) { - success = pngLoadPtr(Memory::GetPointerRange(png_, size_), size_, &width_, &height_, &textureData); + success = pngLoadPtr(Memory::GetPointerRange(png_, (u32)size_), size_, &width_, &height_, &textureData); } else { std::vector pngData; if (pspFileSystem.ReadEntireFile(filename_, pngData) < 0) { diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 7722e769fd18..0219a2391d95 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -260,7 +260,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (texFunc == GE_TEXFUNC_BLEND) { WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); } - WRITE(p, "bool u_texAlpha : register(b%i);\n", CONST_PS_TEXALPHA); // NOTE! "b" register, not "c"! + WRITE(p, "float u_texNoAlpha : register(c%i);\n", CONST_PS_TEX_NO_ALPHA); } WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); if (texture3D) { @@ -354,7 +354,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "uniform sampler2D tex;\n"); } *uniformMask |= DIRTY_TEXALPHA; - WRITE(p, "uniform bool u_texAlpha;\n"); + WRITE(p, "uniform float u_texNoAlpha;\n"); } if (readFramebufferTex) { @@ -824,7 +824,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " vec4 p = v_color0;\n"); if (texFunc != GE_TEXFUNC_REPLACE) { - WRITE(p, " if (!u_texAlpha) { t.a = 1.0; }\n"); + WRITE(p, " t.a = max(t.a, u_texNoAlpha);\n"); } switch (texFunc) { @@ -838,7 +838,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n"); break; case GE_TEXFUNC_REPLACE: - WRITE(p, " vec4 v = (u_texAlpha ? t : vec4(t.rgb, p.a)) + s;\n"); + WRITE(p, " vec4 r = t;\n"); + WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlpha);\n"); + WRITE(p, " vec4 v = r + s;\n"); break; case GE_TEXFUNC_ADD: case GE_TEXFUNC_UNKNOWN1: diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index c2842f7ae43c..bd96f6dcff58 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -36,12 +36,12 @@ struct FShaderID; #define CONST_PS_TEXCLAMP 8 #define CONST_PS_TEXCLAMPOFF 9 #define CONST_PS_MIPBIAS 10 +#define CONST_PS_TEX_NO_ALPHA 11 // For stencil upload -#define BCONST_PS_STENCILVALUE 11 +#define BCONST_PS_STENCILVALUE 12 // D3D9 bool constants, they have their own register space. -#define CONST_PS_TEXALPHA 0 // Can technically be deduced from the fragment shader ID, but this is safer. diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 5112efb8e664..e2baeabd1d18 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -199,7 +199,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView } if (dirtyUniforms & DIRTY_TEXALPHA) { - ub->texAlpha = gstate.isTextureAlphaUsed() ? 1 : 0; + ub->texNoAlpha = gstate.isTextureAlphaUsed() ? 0.0f : 1.0f; } if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 7bf61dc00531..4e8a560065b8 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -17,7 +17,7 @@ enum : uint64_t { DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT, }; -// Currently 448 bytes. +// Currently 496 bytes. // Every line here is a 4-float. struct alignas(16) UB_VS_FS_Base { float proj[16]; @@ -39,7 +39,7 @@ struct alignas(16) UB_VS_FS_Base { float blendFixB[3]; float rotation; float texClamp[4]; float texClampOffset[2]; float fogCoef[2]; - uint32_t texAlpha; float pad[3]; + float texNoAlpha; float pad[3]; // VR stuff is to go here, later. For normal drawing, we can then get away // with just uploading the first 448 bytes of the struct (up to and including fogCoef). }; @@ -66,7 +66,7 @@ R"( mat4 u_proj; vec4 u_texclamp; vec2 u_texclampoff; vec2 u_fogcoef; - bool u_texAlpha; float pad0; float pad1; float pad2; + float u_texNoAlpha; float pad0; float pad1; float pad2; )"; // 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index d46d6f2eeb47..726d19fc4c12 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -285,7 +285,8 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f)); } if (dirtyUniforms & DIRTY_TEXALPHA) { - PSSetBool(CONST_PS_TEXALPHA, gstate.isTextureAlphaUsed()); + // NOTE: Reversed value, more efficient in shader. + PSSetFloat(CONST_PS_TEX_NO_ALPHA, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f); } if (dirtyUniforms & DIRTY_SHADERBLEND) { PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA()); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 3ebae535bf2b..e5d0ad7f3373 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -152,7 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" }); queries.push_back({ &u_texclamp, "u_texclamp" }); queries.push_back({ &u_texclampoff, "u_texclampoff" }); - queries.push_back({ &u_texAlpha, "u_texAlpha" }); + queries.push_back({ &u_texNoAlpha, "u_texNoAlpha" }); queries.push_back({ &u_lightControl, "u_lightControl" }); for (int i = 0; i < 4; i++) { @@ -446,7 +446,7 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin SetColorUniform3(render_, &u_texenv, gstate.texenvcolor); } if (dirty & DIRTY_TEXALPHA) { - SetBoolUniform(render_, &u_texAlpha, gstate.isTextureAlphaUsed()); + render_->SetUniformF1(&u_texNoAlpha, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f); } if (dirty & DIRTY_ALPHACOLORREF) { if (shaderLanguage.bitwiseOps) { diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 15a783084c95..c5665c533a63 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -101,7 +101,7 @@ class LinkedShader { int u_uvscaleoffset; int u_texclamp; int u_texclampoff; - int u_texAlpha; + int u_texNoAlpha; // Lighting int u_lightControl; From 0a6d226ddc8a850a79cabacb9581c95167356763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 10 Jan 2023 10:25:04 +0100 Subject: [PATCH 5/5] Remove the unused bool utilities --- Common/GPU/OpenGL/GLRenderManager.h | 13 ------------- GPU/Directx9/ShaderManagerDX9.cpp | 5 ----- GPU/Directx9/ShaderManagerDX9.h | 1 - GPU/GLES/ShaderManagerGLES.cpp | 4 ---- 4 files changed, 23 deletions(-) diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 0acd2b579341..e2d132726f90 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -760,19 +760,6 @@ class GLRenderManager { curRenderStep_->commands.push_back(data); } - void SetUniformB(const GLint *loc, bool value) { - _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); -#ifdef _DEBUG - _dbg_assert_(curProgram_); -#endif - GLRRenderData data{ GLRRenderCommand::UNIFORM4I }; - data.uniform4.loc = loc; - data.uniform4.count = 1; - u32 udata = value ? 1 : 0; - memcpy(data.uniform4.v, &udata, sizeof(u32)); - curRenderStep_->commands.push_back(data); - } - void SetUniformM4x4(const GLint *loc, const float *udata) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); #ifdef _DEBUG diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 726d19fc4c12..4bb5d6b9430d 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -213,11 +213,6 @@ void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) { device_->SetVertexShaderConstantF(creg, f, 1); } -void ShaderManagerDX9::PSSetBool(int creg, bool value) { - BOOL b = value ? 1 : 0; - HRESULT retval = device_->SetPixelShaderConstantB(creg, &b, 1); -} - void ShaderManagerDX9::VSSetColorUniform3ExtraFloat(int creg, u32 color, float extra) { const float col[4] = { ((color & 0xFF)) / 255.0f, diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index eb62216fa1b4..372fbf9659af 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -95,7 +95,6 @@ class ShaderManagerDX9 : public ShaderManagerCommon { inline void PSSetColorUniform3(int creg, u32 color); inline void PSSetFloat(int creg, float value); inline void PSSetFloatArray(int creg, const float *value, int count); - void PSSetBool(int creg, bool value); void VSSetMatrix4x3_3(int creg, const float *m4x3); inline void VSSetColorUniform3(int creg, u32 color); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index e5d0ad7f3373..7bd7af82869c 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -230,10 +230,6 @@ static inline void SetColorUniform3(GLRenderManager *render, GLint *uniform, u32 render->SetUniformF(uniform, 3, f); } -static inline void SetBoolUniform(GLRenderManager *render, GLint *uniform, bool value) { - render->SetUniformB(uniform, value); -} - static void SetColorUniform3Alpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) { float f[4]; Uint8x3ToFloat4_AlphaUint8(f, color, alpha);