Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove "hardware" skinning #16345

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,6 @@ static ConfigSetting graphicsSettings[] = {
ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true),
ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, true, true),
ReportedConfigSetting("HardwareTransform", &g_Config.bHardwareTransform, true, true, true),
ReportedConfigSetting("SoftwareSkinning", &g_Config.bSoftwareSkinning, true, true, true),
ReportedConfigSetting("TextureFiltering", &g_Config.iTexFiltering, 1, true, true),
ReportedConfigSetting("BufferFiltering", &g_Config.iBufFilter, SCALE_LINEAR, true, true),
ReportedConfigSetting("InternalResolution", &g_Config.iInternalResolution, &DefaultInternalResolution, true, true),
Expand Down
1 change: 0 additions & 1 deletion Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ struct Config {
bool bSoftwareRendering;
bool bSoftwareRenderingJit;
bool bHardwareTransform; // only used in the GLES backend
bool bSoftwareSkinning;
bool bVendorBugChecksEnabled;
bool bUseGeometryShader;

Expand Down
106 changes: 25 additions & 81 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,10 @@ void DrawEngineCommon::NotifyConfigChanged() {

useHWTransform_ = g_Config.bHardwareTransform;
useHWTessellation_ = UpdateUseHWTessellation(g_Config.bHardwareTessellation);
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
}

u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize) {
const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), true);
VertexDecoder *dec = GetVertexDecoder(vertTypeID);
if (vertexSize)
*vertexSize = dec->VertexSize();
Expand Down Expand Up @@ -232,7 +231,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
}

int bytesRead;
uint32_t vertTypeID = GetVertTypeID(vtype, 0, decOptions_.applySkinInDecode);
uint32_t vertTypeID = GetVertTypeID(vtype, 0, true);
SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead);
DispatchFlush();

Expand Down Expand Up @@ -281,10 +280,7 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
}

// Force software skinning.
bool wasApplyingSkinInDecode = decOptions_.applySkinInDecode;
decOptions_.applySkinInDecode = true;
NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)control_points, indexLowerBound, indexUpperBound, vertType);
decOptions_.applySkinInDecode = wasApplyingSkinInDecode;

IndexConverter conv(vertType, inds);
for (int i = 0; i < vertexCount; i++) {
Expand Down Expand Up @@ -499,8 +495,7 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
// implementation of the vertex decoder.
dec->DecodeVerts(bufPtr, inPtr, lowerBound, upperBound);

// OK, morphing eliminated but bones still remain to be taken care of.
// Let's do a partial software transform where we only do skinning.
// Morph and skin are both removed during decode now.

VertexReader reader(bufPtr, dec->GetDecVtxFmt(), vertType);

Expand All @@ -513,80 +508,29 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
(u8)gstate.getMaterialAmbientA(),
};

// Let's have two separate loops, one for non skinning and one for skinning.
if (!dec->skinInDecode && (vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
int numBoneWeights = vertTypeGetNumBoneWeights(vertType);
for (int i = lowerBound; i <= upperBound; i++) {
reader.Goto(i - lowerBound);
SimpleVertex &sv = sverts[i];
if (vertType & GE_VTYPE_TC_MASK) {
reader.ReadUV(sv.uv);
}

if (vertType & GE_VTYPE_COL_MASK) {
reader.ReadColor0_8888(sv.color);
} else {
memcpy(sv.color, defaultColor, 4);
}

float nrm[3], pos[3];
float bnrm[3], bpos[3];

if (vertType & GE_VTYPE_NRM_MASK) {
// Normals are generated during tessellation anyway, not sure if any need to supply
reader.ReadNrm(nrm);
} else {
nrm[0] = 0;
nrm[1] = 0;
nrm[2] = 1.0f;
}
reader.ReadPos(pos);

// Apply skinning transform directly
float weights[8];
reader.ReadWeights(weights);
// Skinning
Vec3Packedf psum(0, 0, 0);
Vec3Packedf nsum(0, 0, 0);
for (int w = 0; w < numBoneWeights; w++) {
if (weights[w] != 0.0f) {
Vec3ByMatrix43(bpos, pos, gstate.boneMatrix + w * 12);
Vec3Packedf tpos(bpos);
psum += tpos * weights[w];

Norm3ByMatrix43(bnrm, nrm, gstate.boneMatrix + w * 12);
Vec3Packedf tnorm(bnrm);
nsum += tnorm * weights[w];
}
}
sv.pos = psum;
sv.nrm = nsum;
for (int i = lowerBound; i <= upperBound; i++) {
reader.Goto(i - lowerBound);
SimpleVertex &sv = sverts[i];
if (vertType & GE_VTYPE_TC_MASK) {
reader.ReadUV(sv.uv);
} else {
sv.uv[0] = 0.0f; // This will get filled in during tessellation
sv.uv[1] = 0.0f;
}
} else {
for (int i = lowerBound; i <= upperBound; i++) {
reader.Goto(i - lowerBound);
SimpleVertex &sv = sverts[i];
if (vertType & GE_VTYPE_TC_MASK) {
reader.ReadUV(sv.uv);
} else {
sv.uv[0] = 0.0f; // This will get filled in during tessellation
sv.uv[1] = 0.0f;
}
if (vertType & GE_VTYPE_COL_MASK) {
reader.ReadColor0_8888(sv.color);
} else {
memcpy(sv.color, defaultColor, 4);
}
if (vertType & GE_VTYPE_NRM_MASK) {
// Normals are generated during tessellation anyway, not sure if any need to supply
reader.ReadNrm((float *)&sv.nrm);
} else {
sv.nrm.x = 0.0f;
sv.nrm.y = 0.0f;
sv.nrm.z = 1.0f;
}
reader.ReadPos((float *)&sv.pos);
if (vertType & GE_VTYPE_COL_MASK) {
reader.ReadColor0_8888(sv.color);
} else {
memcpy(sv.color, defaultColor, 4);
}
if (vertType & GE_VTYPE_NRM_MASK) {
// Normals are generated during tessellation anyway, not sure if any need to supply
reader.ReadNrm((float *)&sv.nrm);
} else {
sv.nrm.x = 0.0f;
sv.nrm.y = 0.0f;
sv.nrm.z = 1.0f;
}
reader.ReadPos((float *)&sv.pos);
}

// Okay, there we are! Return the new type (but keep the index bits)
Expand Down Expand Up @@ -836,7 +780,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
numDrawCalls++;
vertexCountInDrawCalls_ += vertexCount;

if (decOptions_.applySkinInDecode && (vertTypeID & GE_VTYPE_WEIGHT_MASK)) {
if (vertTypeID & GE_VTYPE_WEIGHT_MASK) {
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
decodeCounter_++;
}
Expand Down
11 changes: 6 additions & 5 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Common/FragmentShaderGenerator.h"
#include "GPU/Vulkan/DrawEngineVulkan.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"

Expand Down Expand Up @@ -185,23 +186,23 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
}

WRITE(p, "layout (std140, set = 1, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr);
WRITE(p, "layout (std140, set = 1, binding = %d) uniform baseUBO {\n%s};\n", DRAW_BINDING_DYNUBO_BASE, ub_baseStr);
if (doTexture) {
WRITE(p, "layout (set = 1, binding = 0) uniform %s%s tex;\n", texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
WRITE(p, "layout (set = 1, binding = %d) uniform %s%s tex;\n", DRAW_BINDING_TEXTURE, texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
}

if (readFramebufferTex) {
// The framebuffer texture is always bound as an array.
p.C("layout (set = 1, binding = 1) uniform sampler2DArray fbotex;\n");
p.F("layout (set = 1, binding = %d) uniform sampler2DArray fbotex;\n", DRAW_BINDING_2ND_TEXTURE);
} else if (fetchFramebuffer) {
p.C("layout (input_attachment_index = 0, set = 1, binding = 9) uniform subpassInput inputColor;\n");
p.F("layout (input_attachment_index = 0, set = 1, binding = %d) uniform subpassInput inputColor;\n", DRAW_BINDING_INPUT_ATTACHMENT);
if (fragmentShaderFlags) {
*fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT;
}
}

if (shaderDepalMode != ShaderDepalMode::OFF) {
WRITE(p, "layout (set = 1, binding = 2) uniform sampler2D pal;\n");
WRITE(p, "layout (set = 1, binding = %d) uniform sampler2D pal;\n", DRAW_BINDING_DEPAL_TEXTURE);
}

// Note: the precision qualifiers must match the vertex shader!
Expand Down
6 changes: 1 addition & 5 deletions GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,7 @@ enum : uint64_t {
DIRTY_MIPBIAS = 1ULL << 37,
DIRTY_LIGHT_CONTROL = 1ULL << 38,

// space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0x7F00FFFFFFULL, // 00 is where bone uniforms used to be.
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,

// Other dirty elements that aren't uniforms!
Expand Down
12 changes: 0 additions & 12 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
int ls1 = id.Bits(VS_BIT_LS1, 2);

if (uvgMode) desc << uvgModes[uvgMode];
if (id.Bit(VS_BIT_ENABLE_BONES)) desc << "Bones:" << (id.Bits(VS_BIT_BONES, 3) + 1) << " ";
// Lights
if (id.Bit(VS_BIT_LIGHTING_ENABLE)) {
desc << "Light: ";
Expand All @@ -51,7 +50,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
}
}
if (id.Bits(VS_BIT_MATERIAL_UPDATE, 3)) desc << "MatUp:" << id.Bits(VS_BIT_MATERIAL_UPDATE, 3) << " ";
if (id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2)) desc << "WScale " << id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2) << " ";
if (id.Bit(VS_BIT_FLATSHADE)) desc << "Flat ";

if (id.Bit(VS_BIT_BEZIER)) desc << "Bezier ";
Expand Down Expand Up @@ -117,16 +115,6 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform,
id.SetBits(VS_BIT_LS1, 2, gstate.getUVLS1());
}

// Bones.
bool enableBones = !useSkinInDecode && vertTypeIsSkinningEnabled(vertType);
id.SetBit(VS_BIT_ENABLE_BONES, enableBones);
if (enableBones) {
id.SetBits(VS_BIT_BONES, 3, TranslateNumBones(vertTypeGetNumBoneWeights(vertType)) - 1);
// 2 bits. We should probably send in the weight scalefactor as a uniform instead,
// or simply preconvert all weights to floats.
id.SetBits(VS_BIT_WEIGHT_FMTSCALE, 2, weightsAsFloat ? 0 : (vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT);
}

if (gstate.isLightingEnabled()) {
// doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping.
id.SetBit(VS_BIT_LIGHTING_ENABLE);
Expand Down
7 changes: 2 additions & 5 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ enum VShaderBit : uint8_t {
VS_BIT_UVPROJ_MODE = 18, // 2, can overlap with LS0
VS_BIT_LS0 = 18, // 2
VS_BIT_LS1 = 20, // 2
VS_BIT_BONES = 22, // 3 should be enough, not 8
// 25 - 29 are free.
VS_BIT_ENABLE_BONES = 30,
// 21 - 30 are free.

// If this is set along with LIGHTING_ENABLE, all other lighting bits below
// are passed to the shader directly instead.
Expand All @@ -52,8 +50,7 @@ enum VShaderBit : uint8_t {
VS_BIT_LIGHT2_ENABLE = 54,
VS_BIT_LIGHT3_ENABLE = 55,
VS_BIT_LIGHTING_ENABLE = 56,
VS_BIT_WEIGHT_FMTSCALE = 57, // only two bits
// 59 - 61 are free.
// 57 - 61 are free.
VS_BIT_FLATSHADE = 62, // 1 bit
VS_BIT_BEZIER = 63, // 1 bit
// No more free
Expand Down
8 changes: 0 additions & 8 deletions GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,11 +337,3 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
}
}
}

void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
for (int i = 0; i < 8; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);
}
}
}
12 changes: 0 additions & 12 deletions GPU/Common/ShaderUniforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,6 @@ R"( vec4 u_ambient;
vec3 u_lightspecular3;
)";

// With some cleverness, we could get away with uploading just half this when only the four or five first
// bones are being used. This is 384b.
struct alignas(16) UB_VS_Bones {
float bones[8][12];
};

static const char * const ub_vs_bonesStr =
R"( mat3x4 u_bone0; mat3x4 u_bone1; mat3x4 u_bone2; mat3x4 u_bone3; mat3x4 u_bone4; mat3x4 u_bone5; mat3x4 u_bone6; mat3x4 u_bone7; mat3x4 u_bone8;
)";


static const char * const ub_frameStr =
R"(
float u_rotation;
Expand All @@ -145,7 +134,6 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo

void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering);
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);
void FrameUpdateUniforms(UB_Frame *ub, bool useBufferedRendering);

uint32_t PackLightControlBits();
4 changes: 2 additions & 2 deletions GPU/Common/SplineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
if (indices)
GetIndexBounds(indices, num_points, vertType, &index_lower_bound, &index_upper_bound);

VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode));
VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), true));
*bytesRead = num_points * origVDecoder->VertexSize();

// Simplify away bones and morph before proceeding
Expand Down Expand Up @@ -572,7 +572,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
gstate_c.uv.vOff = 0;
}

uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), true);
int generatedBytesRead;
if (output.count)
DispatchSubmitPrim(output.vertices, output.indices, PatchPrimToPrim(surface.primType), output.count, vertTypeID, gstate.getCullMode(), &generatedBytesRead);
Expand Down
52 changes: 0 additions & 52 deletions GPU/Common/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ static const ARMReg srcNEON = Q2;
static const ARMReg accNEON = Q3;

static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
Expand Down Expand Up @@ -296,55 +293,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
return (JittedVertexDecoder)start;
}

void VertexDecoderJitCache::Jit_WeightsU8() {
// Basic implementation - a byte at a time. TODO: Optimize
int j;
for (j = 0; j < dec_->nweights; j++) {
LDRB(tempReg1, srcReg, dec_->weightoff + j);
STRB(tempReg1, dstReg, dec_->decFmt.w0off + j);
}
if (j & 3) {
// Create a zero register. Might want to make a fixed one.
EOR(scratchReg, scratchReg, scratchReg);
}
while (j & 3) {
STRB(scratchReg, dstReg, dec_->decFmt.w0off + j);
j++;
}
}

void VertexDecoderJitCache::Jit_WeightsU16() {
// Basic implementation - a short at a time. TODO: Optimize
int j;
for (j = 0; j < dec_->nweights; j++) {
LDRH(tempReg1, srcReg, dec_->weightoff + j * 2);
STRH(tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
}
if (j & 3) {
// Create a zero register. Might want to make a fixed one.
EOR(scratchReg, scratchReg, scratchReg);
}
while (j & 3) {
STRH(scratchReg, dstReg, dec_->decFmt.w0off + j * 2);
j++;
}
}

void VertexDecoderJitCache::Jit_WeightsFloat() {
int j;
for (j = 0; j < dec_->nweights; j++) {
LDR(tempReg1, srcReg, dec_->weightoff + j * 4);
STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
}
if (j & 3) {
EOR(tempReg1, tempReg1, tempReg1);
}
while (j & 3) { // Zero additional weights rounding up to 4.
STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
j++;
}
}

static const ARMReg weightRegs[8] = { S8, S9, S10, S11, S12, S13, S14, S15 };
static const ARMReg neonWeightRegsD[4] = { D4, D5, D6, D7 };
static const ARMReg neonWeightRegsQ[2] = { Q2, Q3 };
Expand Down
Loading