Skip to content

Commit

Permalink
Merge pull request #16763 from hrydgard/texalpha-shaderflag
Browse files Browse the repository at this point in the history
Shader generator: Move FS_TEX_ALPHA to a uniform bool.
  • Loading branch information
hrydgard authored Jan 10, 2023
2 parents 44bcfd3 + 0a6d226 commit bef50f9
Show file tree
Hide file tree
Showing 13 changed files with 72 additions and 86 deletions.
2 changes: 1 addition & 1 deletion Core/Util/PPGeDraw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@ bool PPGeImage::Load() {
unsigned char *textureData;
int success;
if (filename_.empty()) {
success = pngLoadPtr(Memory::GetPointerRange(png_, size_), size_, &width_, &height_, &textureData);
success = pngLoadPtr(Memory::GetPointerRange(png_, (u32)size_), size_, &width_, &height_, &textureData);
} else {
std::vector<u8> pngData;
if (pspFileSystem.ReadEntireFile(filename_, pngData) < 0) {
Expand Down
96 changes: 37 additions & 59 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE);
bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);

if (texture3D && arrayTexture) {
*errorString = "Invalid combination of 3D texture and array texture, shouldn't happen";
Expand Down Expand Up @@ -257,8 +256,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE);
}
if (doTexture && texFunc == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
if (doTexture) {
if (texFunc == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
}
WRITE(p, "float u_texNoAlpha : register(c%i);\n", CONST_PS_TEX_NO_ALPHA);
}
WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR);
if (texture3D) {
Expand Down Expand Up @@ -351,6 +353,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
} else {
WRITE(p, "uniform sampler2D tex;\n");
}
*uniformMask |= DIRTY_TEXALPHA;
WRITE(p, "uniform float u_texNoAlpha;\n");
}

if (readFramebufferTex) {
Expand Down Expand Up @@ -817,64 +821,38 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
break;
}

if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha)
WRITE(p, " vec4 p = v_color0;\n");

if (doTextureAlpha) { // texfmt == RGBA
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t + s\n;");
break;

case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n");
break;

case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n");
break;

case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = t + s;\n");
break;

case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n");
break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
} else { // texfmt == RGB
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a) + s;\n");
break;

case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n");
break;

case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a) + s;\n");
break;

case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n");
break;

case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a) + s;\n"); break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
WRITE(p, " vec4 p = v_color0;\n");

if (texFunc != GE_TEXFUNC_REPLACE) {
WRITE(p, " t.a = max(t.a, u_texNoAlpha);\n");
}

switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t + s;\n");
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n");
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n");
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 r = t;\n");
WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlpha);\n");
WRITE(p, " vec4 v = r + s;\n");
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n");
break;
default:
// Doesn't happen
WRITE(p, " vec4 v = p + s;\n"); break;
break;
}
if (enableColorDoubling) {
// This happens before fog is applied.
WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
Expand Down
8 changes: 6 additions & 2 deletions GPU/Common/FragmentShaderGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

struct FShaderID;

// D3D9 constants
// D3D9 float constants

#define CONST_PS_TEXENV 0
#define CONST_PS_ALPHACOLORREF 1
Expand All @@ -36,9 +36,13 @@ struct FShaderID;
#define CONST_PS_TEXCLAMP 8
#define CONST_PS_TEXCLAMPOFF 9
#define CONST_PS_MIPBIAS 10
#define CONST_PS_TEX_NO_ALPHA 11

// For stencil upload
#define CONST_PS_STENCILVALUE 11
#define BCONST_PS_STENCILVALUE 12

// D3D9 bool constants, they have their own register space.


// Can technically be deduced from the fragment shader ID, but this is safer.
enum class FragmentShaderFlags : u32 {
Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ enum : uint64_t {
DIRTY_MIPBIAS = 1ULL << 37,
DIRTY_LIGHT_CONTROL = 1ULL << 38,

// space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
DIRTY_TEXALPHA = 1ULL << 39,

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFFULL,
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,

// Other dirty elements that aren't uniforms!
Expand Down
9 changes: 0 additions & 9 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_CLEARMODE)) desc << "Clear ";
if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex ");
if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj ";
if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha ";
if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs ";
if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x ";
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
Expand Down Expand Up @@ -291,7 +290,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix, gstate.getUVProjMode()));
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;

ShaderDepalMode shaderDepalMode = gstate_c.shaderDepalMode;
Expand All @@ -303,16 +301,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType;
ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil;

// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
// Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes.
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) {
doTextureAlpha = false;
}

if (gstate.isTextureMapEnabled()) {
id.SetBit(FS_BIT_DO_TEXTURE);
id.SetBits(FS_BIT_TEXFUNC, 3, gstate.getTextureFunction());
id.SetBit(FS_BIT_TEXALPHA, doTextureAlpha & 1); // rgb or rgba
if (gstate_c.needShaderTexClamp) {
bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0;
// 4 bits total.
Expand Down
7 changes: 3 additions & 4 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@

#include "Common/CommonFuncs.h"

// TODO: There will be additional bits, indicating that groups of these will be
// sent to the shader and processed there. This will cut down the number of shaders ("ubershader approach")
// This is probably only really worth doing for lighting and bones.
// VS_BIT_LIGHT_UBERSHADER indicates that some groups of these will be
// sent to the shader and processed there. This cuts down the number of shaders ("ubershader approach").
enum VShaderBit : uint8_t {
VS_BIT_LMODE = 0,
VS_BIT_IS_THROUGH = 1,
Expand Down Expand Up @@ -68,7 +67,7 @@ enum FShaderBit : uint8_t {
FS_BIT_CLEARMODE = 0,
FS_BIT_DO_TEXTURE = 1,
FS_BIT_TEXFUNC = 2, // 3 bits
FS_BIT_TEXALPHA = 5,
// 1 bit free at position 5
FS_BIT_3D_TEXTURE = 6,
FS_BIT_SHADER_TEX_CLAMP = 7,
FS_BIT_CLAMP_S = 8,
Expand Down
6 changes: 5 additions & 1 deletion GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,12 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
}
}

if (dirtyUniforms & DIRTY_TEXALPHA) {
ub->texNoAlpha = gstate.isTextureAlphaUsed() ? 0.0f : 1.0f;
}

if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
ub->stencil = (float)gstate.getStencilTestRef() * (1.0 / 255.0);
ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0);
}

// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
Expand Down
8 changes: 5 additions & 3 deletions GPU/Common/ShaderUniforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_BEZIERSPLINE | DIRTY_DEPAL,
DIRTY_LIGHT_UNIFORMS =
DIRTY_LIGHT_CONTROL | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
};

// Currently 448 bytes.
// Currently 496 bytes.
// Every line here is a 4-float.
struct alignas(16) UB_VS_FS_Base {
float proj[16];
Expand All @@ -35,10 +35,11 @@ struct alignas(16) UB_VS_FS_Base {
// Fragment data
float fogColor[3]; uint32_t alphaColorRef;
float texEnvColor[3]; uint32_t colorTestMask;
float blendFixA[3]; float stencil;
float blendFixA[3]; float stencilReplaceValue;
float blendFixB[3]; float rotation;
float texClamp[4];
float texClampOffset[2]; float fogCoef[2];
float texNoAlpha; float pad[3];
// VR stuff is to go here, later. For normal drawing, we can then get away
// with just uploading the first 448 bytes of the struct (up to and including fogCoef).
};
Expand All @@ -65,6 +66,7 @@ R"( mat4 u_proj;
vec4 u_texclamp;
vec2 u_texclampoff;
vec2 u_fogcoef;
float u_texNoAlpha; float pad0; float pad1; float pad2;
)";

// 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand
Expand Down
7 changes: 5 additions & 2 deletions GPU/Directx9/ShaderManagerDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) {
in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f));
}

const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS;
const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS;

void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXENV) {
Expand All @@ -279,7 +279,10 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f));
}

if (dirtyUniforms & DIRTY_TEXALPHA) {
// NOTE: Reversed value, more efficient in shader.
PSSetFloat(CONST_PS_TEX_NO_ALPHA, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA());
PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB());
Expand Down
6 changes: 5 additions & 1 deletion GPU/GLES/ShaderManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" });
queries.push_back({ &u_texclamp, "u_texclamp" });
queries.push_back({ &u_texclampoff, "u_texclampoff" });
queries.push_back({ &u_texNoAlpha, "u_texNoAlpha" });
queries.push_back({ &u_lightControl, "u_lightControl" });

for (int i = 0; i < 4; i++) {
Expand Down Expand Up @@ -440,6 +441,9 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin
if (dirty & DIRTY_TEXENV) {
SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
}
if (dirty & DIRTY_TEXALPHA) {
render_->SetUniformF1(&u_texNoAlpha, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f);
}
if (dirty & DIRTY_ALPHACOLORREF) {
if (shaderLanguage.bitwiseOps) {
render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24));
Expand Down Expand Up @@ -945,7 +949,7 @@ enum class CacheDetectFlags {
};

#define CACHE_HEADER_MAGIC 0x83277592
#define CACHE_VERSION 21
#define CACHE_VERSION 22
struct CacheHeader {
uint32_t magic;
uint32_t version;
Expand Down
1 change: 1 addition & 0 deletions GPU/GLES/ShaderManagerGLES.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class LinkedShader {
int u_uvscaleoffset;
int u_texclamp;
int u_texclampoff;
int u_texNoAlpha;

// Lighting
int u_lightControl;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
// Raster state for Direct3D 9, uncommon.
{ GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE },
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXALPHA },
{ GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/ShaderManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ enum class VulkanCacheDetectFlags {
};

#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 35
#define CACHE_VERSION 36
struct VulkanCacheHeader {
uint32_t magic;
uint32_t version;
Expand Down

0 comments on commit bef50f9

Please sign in to comment.