Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shader generator: Move FS_TEX_ALPHA to a uniform bool. #16763

Merged
merged 5 commits into from
Jan 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Core/Util/PPGeDraw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@ bool PPGeImage::Load() {
unsigned char *textureData;
int success;
if (filename_.empty()) {
success = pngLoadPtr(Memory::GetPointerRange(png_, size_), size_, &width_, &height_, &textureData);
success = pngLoadPtr(Memory::GetPointerRange(png_, (u32)size_), size_, &width_, &height_, &textureData);
} else {
std::vector<u8> pngData;
if (pspFileSystem.ReadEntireFile(filename_, pngData) < 0) {
Expand Down
96 changes: 37 additions & 59 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE);
bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);

if (texture3D && arrayTexture) {
*errorString = "Invalid combination of 3D texture and array texture, shouldn't happen";
Expand Down Expand Up @@ -257,8 +256,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE);
}
if (doTexture && texFunc == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
if (doTexture) {
if (texFunc == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
}
WRITE(p, "float u_texNoAlpha : register(c%i);\n", CONST_PS_TEX_NO_ALPHA);
}
WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR);
if (texture3D) {
Expand Down Expand Up @@ -351,6 +353,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
} else {
WRITE(p, "uniform sampler2D tex;\n");
}
*uniformMask |= DIRTY_TEXALPHA;
WRITE(p, "uniform float u_texNoAlpha;\n");
}

if (readFramebufferTex) {
Expand Down Expand Up @@ -817,64 +821,38 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
break;
}

if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha)
WRITE(p, " vec4 p = v_color0;\n");

if (doTextureAlpha) { // texfmt == RGBA
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t + s\n;");
break;

case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n");
break;

case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n");
break;

case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = t + s;\n");
break;

case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n");
break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
} else { // texfmt == RGB
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a) + s;\n");
break;

case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n");
break;

case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a) + s;\n");
break;

case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n");
break;

case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a) + s;\n"); break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
WRITE(p, " vec4 p = v_color0;\n");

if (texFunc != GE_TEXFUNC_REPLACE) {
WRITE(p, " t.a = max(t.a, u_texNoAlpha);\n");
}

switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t + s;\n");
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n");
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n");
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 r = t;\n");
WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlpha);\n");
WRITE(p, " vec4 v = r + s;\n");
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n");
break;
default:
// Doesn't happen
WRITE(p, " vec4 v = p + s;\n"); break;
break;
}
if (enableColorDoubling) {
// This happens before fog is applied.
WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
Expand Down
8 changes: 6 additions & 2 deletions GPU/Common/FragmentShaderGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

struct FShaderID;

// D3D9 constants
// D3D9 float constants

#define CONST_PS_TEXENV 0
#define CONST_PS_ALPHACOLORREF 1
Expand All @@ -36,9 +36,13 @@ struct FShaderID;
#define CONST_PS_TEXCLAMP 8
#define CONST_PS_TEXCLAMPOFF 9
#define CONST_PS_MIPBIAS 10
#define CONST_PS_TEX_NO_ALPHA 11

// For stencil upload
#define CONST_PS_STENCILVALUE 11
#define BCONST_PS_STENCILVALUE 12

// D3D9 bool constants, they have their own register space.


// Can technically be deduced from the fragment shader ID, but this is safer.
enum class FragmentShaderFlags : u32 {
Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ enum : uint64_t {
DIRTY_MIPBIAS = 1ULL << 37,
DIRTY_LIGHT_CONTROL = 1ULL << 38,

// space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
DIRTY_TEXALPHA = 1ULL << 39,

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFFULL,
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,

// Other dirty elements that aren't uniforms!
Expand Down
9 changes: 0 additions & 9 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_CLEARMODE)) desc << "Clear ";
if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex ");
if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj ";
if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha ";
if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs ";
if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x ";
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
Expand Down Expand Up @@ -291,7 +290,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix, gstate.getUVProjMode()));
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;

ShaderDepalMode shaderDepalMode = gstate_c.shaderDepalMode;
Expand All @@ -303,16 +301,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType;
ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil;

// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
// Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes.
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) {
doTextureAlpha = false;
}

if (gstate.isTextureMapEnabled()) {
id.SetBit(FS_BIT_DO_TEXTURE);
id.SetBits(FS_BIT_TEXFUNC, 3, gstate.getTextureFunction());
id.SetBit(FS_BIT_TEXALPHA, doTextureAlpha & 1); // rgb or rgba
if (gstate_c.needShaderTexClamp) {
bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0;
// 4 bits total.
Expand Down
7 changes: 3 additions & 4 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@

#include "Common/CommonFuncs.h"

// TODO: There will be additional bits, indicating that groups of these will be
// sent to the shader and processed there. This will cut down the number of shaders ("ubershader approach")
// This is probably only really worth doing for lighting and bones.
// VS_BIT_LIGHT_UBERSHADER indicates that some groups of these will be
// sent to the shader and processed there. This cuts down the number of shaders ("ubershader approach").
enum VShaderBit : uint8_t {
VS_BIT_LMODE = 0,
VS_BIT_IS_THROUGH = 1,
Expand Down Expand Up @@ -68,7 +67,7 @@ enum FShaderBit : uint8_t {
FS_BIT_CLEARMODE = 0,
FS_BIT_DO_TEXTURE = 1,
FS_BIT_TEXFUNC = 2, // 3 bits
FS_BIT_TEXALPHA = 5,
// 1 bit free at position 5
FS_BIT_3D_TEXTURE = 6,
FS_BIT_SHADER_TEX_CLAMP = 7,
FS_BIT_CLAMP_S = 8,
Expand Down
6 changes: 5 additions & 1 deletion GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,12 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
}
}

if (dirtyUniforms & DIRTY_TEXALPHA) {
ub->texNoAlpha = gstate.isTextureAlphaUsed() ? 0.0f : 1.0f;
}

if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
ub->stencil = (float)gstate.getStencilTestRef() * (1.0 / 255.0);
ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0);
}

// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
Expand Down
8 changes: 5 additions & 3 deletions GPU/Common/ShaderUniforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_BEZIERSPLINE | DIRTY_DEPAL,
DIRTY_LIGHT_UNIFORMS =
DIRTY_LIGHT_CONTROL | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
};

// Currently 448 bytes.
// Currently 496 bytes.
// Every line here is a 4-float.
struct alignas(16) UB_VS_FS_Base {
float proj[16];
Expand All @@ -35,10 +35,11 @@ struct alignas(16) UB_VS_FS_Base {
// Fragment data
float fogColor[3]; uint32_t alphaColorRef;
float texEnvColor[3]; uint32_t colorTestMask;
float blendFixA[3]; float stencil;
float blendFixA[3]; float stencilReplaceValue;
float blendFixB[3]; float rotation;
float texClamp[4];
float texClampOffset[2]; float fogCoef[2];
float texNoAlpha; float pad[3];
// VR stuff is to go here, later. For normal drawing, we can then get away
// with just uploading the first 448 bytes of the struct (up to and including fogCoef).
};
Expand All @@ -65,6 +66,7 @@ R"( mat4 u_proj;
vec4 u_texclamp;
vec2 u_texclampoff;
vec2 u_fogcoef;
float u_texNoAlpha; float pad0; float pad1; float pad2;
)";

// 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand
Expand Down
7 changes: 5 additions & 2 deletions GPU/Directx9/ShaderManagerDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) {
in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f));
}

const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS;
const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS;

void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXENV) {
Expand All @@ -279,7 +279,10 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f));
}

if (dirtyUniforms & DIRTY_TEXALPHA) {
// NOTE: Reversed value, more efficient in shader.
PSSetFloat(CONST_PS_TEX_NO_ALPHA, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA());
PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB());
Expand Down
6 changes: 5 additions & 1 deletion GPU/GLES/ShaderManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" });
queries.push_back({ &u_texclamp, "u_texclamp" });
queries.push_back({ &u_texclampoff, "u_texclampoff" });
queries.push_back({ &u_texNoAlpha, "u_texNoAlpha" });
queries.push_back({ &u_lightControl, "u_lightControl" });

for (int i = 0; i < 4; i++) {
Expand Down Expand Up @@ -440,6 +441,9 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin
if (dirty & DIRTY_TEXENV) {
SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
}
if (dirty & DIRTY_TEXALPHA) {
render_->SetUniformF1(&u_texNoAlpha, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f);
}
if (dirty & DIRTY_ALPHACOLORREF) {
if (shaderLanguage.bitwiseOps) {
render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24));
Expand Down Expand Up @@ -945,7 +949,7 @@ enum class CacheDetectFlags {
};

#define CACHE_HEADER_MAGIC 0x83277592
#define CACHE_VERSION 21
#define CACHE_VERSION 22
struct CacheHeader {
uint32_t magic;
uint32_t version;
Expand Down
1 change: 1 addition & 0 deletions GPU/GLES/ShaderManagerGLES.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class LinkedShader {
int u_uvscaleoffset;
int u_texclamp;
int u_texclampoff;
int u_texNoAlpha;

// Lighting
int u_lightControl;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
// Raster state for Direct3D 9, uncommon.
{ GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE },
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXALPHA },
{ GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/ShaderManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ enum class VulkanCacheDetectFlags {
};

#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 35
#define CACHE_VERSION 36
struct VulkanCacheHeader {
uint32_t magic;
uint32_t version;
Expand Down