hrydgard · hrydgard · Nov 6, 2022 · Nov 6, 2022 · Nov 6, 2022 · Nov 6, 2022
diff --git a/Core/Config.cpp b/Core/Config.cpp
@@ -879,7 +879,6 @@ static ConfigSetting graphicsSettings[] = {
 	ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true),
 	ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, true, true),
 	ReportedConfigSetting("HardwareTransform", &g_Config.bHardwareTransform, true, true, true),
-	ReportedConfigSetting("SoftwareSkinning", &g_Config.bSoftwareSkinning, true, true, true),
 	ReportedConfigSetting("TextureFiltering", &g_Config.iTexFiltering, 1, true, true),
 	ReportedConfigSetting("BufferFiltering", &g_Config.iBufFilter, SCALE_LINEAR, true, true),
 	ReportedConfigSetting("InternalResolution", &g_Config.iInternalResolution, &DefaultInternalResolution, true, true),

diff --git a/Core/Config.h b/Core/Config.h
@@ -162,7 +162,6 @@ struct Config {
 	bool bSoftwareRendering;
 	bool bSoftwareRenderingJit;
 	bool bHardwareTransform; // only used in the GLES backend
-	bool bSoftwareSkinning;
 	bool bVendorBugChecksEnabled;
 	bool bUseGeometryShader;
 

diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
@@ -180,11 +180,10 @@ void DrawEngineCommon::NotifyConfigChanged() {
 
 	useHWTransform_ = g_Config.bHardwareTransform;
 	useHWTessellation_ = UpdateUseHWTessellation(g_Config.bHardwareTessellation);
-	decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
 }
 
 u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize) {
-	const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
+	const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), true);
 	VertexDecoder *dec = GetVertexDecoder(vertTypeID);
 	if (vertexSize)
 		*vertexSize = dec->VertexSize();
@@ -232,7 +231,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
 	}
 
 	int bytesRead;
-	uint32_t vertTypeID = GetVertTypeID(vtype, 0, decOptions_.applySkinInDecode);
+	uint32_t vertTypeID = GetVertTypeID(vtype, 0, true);
 	SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead);
 	DispatchFlush();
 
@@ -281,10 +280,7 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
 		}
 
 		// Force software skinning.
-		bool wasApplyingSkinInDecode = decOptions_.applySkinInDecode;
-		decOptions_.applySkinInDecode = true;
 		NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)control_points, indexLowerBound, indexUpperBound, vertType);
-		decOptions_.applySkinInDecode = wasApplyingSkinInDecode;
 
 		IndexConverter conv(vertType, inds);
 		for (int i = 0; i < vertexCount; i++) {
@@ -499,8 +495,7 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
 	// implementation of the vertex decoder.
 	dec->DecodeVerts(bufPtr, inPtr, lowerBound, upperBound);
 
-	// OK, morphing eliminated but bones still remain to be taken care of.
-	// Let's do a partial software transform where we only do skinning.
+	// Morph and skin are both removed during decode now.
 
 	VertexReader reader(bufPtr, dec->GetDecVtxFmt(), vertType);
 
@@ -513,80 +508,29 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
 		(u8)gstate.getMaterialAmbientA(),
 	};
 
-	// Let's have two separate loops, one for non skinning and one for skinning.
-	if (!dec->skinInDecode && (vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
-		int numBoneWeights = vertTypeGetNumBoneWeights(vertType);
-		for (int i = lowerBound; i <= upperBound; i++) {
-			reader.Goto(i - lowerBound);
-			SimpleVertex &sv = sverts[i];
-			if (vertType & GE_VTYPE_TC_MASK) {
-				reader.ReadUV(sv.uv);
-			}
-
-			if (vertType & GE_VTYPE_COL_MASK) {
-				reader.ReadColor0_8888(sv.color);
-			} else {
-				memcpy(sv.color, defaultColor, 4);
-			}
-
-			float nrm[3], pos[3];
-			float bnrm[3], bpos[3];
-
-			if (vertType & GE_VTYPE_NRM_MASK) {
-				// Normals are generated during tessellation anyway, not sure if any need to supply
-				reader.ReadNrm(nrm);
-			} else {
-				nrm[0] = 0;
-				nrm[1] = 0;
-				nrm[2] = 1.0f;
-			}
-			reader.ReadPos(pos);
-
-			// Apply skinning transform directly
-			float weights[8];
-			reader.ReadWeights(weights);
-			// Skinning
-			Vec3Packedf psum(0, 0, 0);
-			Vec3Packedf nsum(0, 0, 0);
-			for (int w = 0; w < numBoneWeights; w++) {
-				if (weights[w] != 0.0f) {
-					Vec3ByMatrix43(bpos, pos, gstate.boneMatrix + w * 12);
-					Vec3Packedf tpos(bpos);
-					psum += tpos * weights[w];
-
-					Norm3ByMatrix43(bnrm, nrm, gstate.boneMatrix + w * 12);
-					Vec3Packedf tnorm(bnrm);
-					nsum += tnorm * weights[w];
-				}
-			}
-			sv.pos = psum;
-			sv.nrm = nsum;
+	for (int i = lowerBound; i <= upperBound; i++) {
+		reader.Goto(i - lowerBound);
+		SimpleVertex &sv = sverts[i];
+		if (vertType & GE_VTYPE_TC_MASK) {
+			reader.ReadUV(sv.uv);
+		} else {
+			sv.uv[0] = 0.0f;  // This will get filled in during tessellation
+			sv.uv[1] = 0.0f;
 		}
-	} else {
-		for (int i = lowerBound; i <= upperBound; i++) {
-			reader.Goto(i - lowerBound);
-			SimpleVertex &sv = sverts[i];
-			if (vertType & GE_VTYPE_TC_MASK) {
-				reader.ReadUV(sv.uv);
-			} else {
-				sv.uv[0] = 0.0f;  // This will get filled in during tessellation
-				sv.uv[1] = 0.0f;
-			}
-			if (vertType & GE_VTYPE_COL_MASK) {
-				reader.ReadColor0_8888(sv.color);
-			} else {
-				memcpy(sv.color, defaultColor, 4);
-			}
-			if (vertType & GE_VTYPE_NRM_MASK) {
-				// Normals are generated during tessellation anyway, not sure if any need to supply
-				reader.ReadNrm((float *)&sv.nrm);
-			} else {
-				sv.nrm.x = 0.0f;
-				sv.nrm.y = 0.0f;
-				sv.nrm.z = 1.0f;
-			}
-			reader.ReadPos((float *)&sv.pos);
+		if (vertType & GE_VTYPE_COL_MASK) {
+			reader.ReadColor0_8888(sv.color);
+		} else {
+			memcpy(sv.color, defaultColor, 4);
+		}
+		if (vertType & GE_VTYPE_NRM_MASK) {
+			// Normals are generated during tessellation anyway, not sure if any need to supply
+			reader.ReadNrm((float *)&sv.nrm);
+		} else {
+			sv.nrm.x = 0.0f;
+			sv.nrm.y = 0.0f;
+			sv.nrm.z = 1.0f;
 		}
+		reader.ReadPos((float *)&sv.pos);
 	}
 
 	// Okay, there we are! Return the new type (but keep the index bits)
@@ -836,7 +780,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
 	numDrawCalls++;
 	vertexCountInDrawCalls_ += vertexCount;
 
-	if (decOptions_.applySkinInDecode && (vertTypeID & GE_VTYPE_WEIGHT_MASK)) {
+	if (vertTypeID & GE_VTYPE_WEIGHT_MASK) {
 		DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
 		decodeCounter_++;
 	}

diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp
@@ -31,6 +31,7 @@
 #include "GPU/Common/ShaderId.h"
 #include "GPU/Common/ShaderUniforms.h"
 #include "GPU/Common/FragmentShaderGenerator.h"
+#include "GPU/Vulkan/DrawEngineVulkan.h"
 #include "GPU/ge_constants.h"
 #include "GPU/GPUState.h"
 
@@ -185,23 +186,23 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
 			WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
 		}
 
-		WRITE(p, "layout (std140, set = 1, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr);
+		WRITE(p, "layout (std140, set = 1, binding = %d) uniform baseUBO {\n%s};\n", DRAW_BINDING_DYNUBO_BASE, ub_baseStr);
 		if (doTexture) {
-			WRITE(p, "layout (set = 1, binding = 0) uniform %s%s tex;\n", texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
+			WRITE(p, "layout (set = 1, binding = %d) uniform %s%s tex;\n", DRAW_BINDING_TEXTURE, texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
 		}
 
 		if (readFramebufferTex) {
 			// The framebuffer texture is always bound as an array.
-			p.C("layout (set = 1, binding = 1) uniform sampler2DArray fbotex;\n");
+			p.F("layout (set = 1, binding = %d) uniform sampler2DArray fbotex;\n", DRAW_BINDING_2ND_TEXTURE);
 		} else if (fetchFramebuffer) {
-			p.C("layout (input_attachment_index = 0, set = 1, binding = 9) uniform subpassInput inputColor;\n");
+			p.F("layout (input_attachment_index = 0, set = 1, binding = %d) uniform subpassInput inputColor;\n", DRAW_BINDING_INPUT_ATTACHMENT);
 			if (fragmentShaderFlags) {
 				*fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT;
 			}
 		}
 
 		if (shaderDepalMode != ShaderDepalMode::OFF) {
-			WRITE(p, "layout (set = 1, binding = 2) uniform sampler2D pal;\n");
+			WRITE(p, "layout (set = 1, binding = %d) uniform sampler2D pal;\n", DRAW_BINDING_DEPAL_TEXTURE);
 		}
 
 		// Note: the precision qualifiers must match the vertex shader!

diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h
@@ -90,11 +90,7 @@ enum : uint64_t {
 	DIRTY_MIPBIAS = 1ULL << 37,
 	DIRTY_LIGHT_CONTROL = 1ULL << 38,
 
-	// space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
-
-	DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
-
-	DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
+	DIRTY_ALL_UNIFORMS = 0x7F00FFFFFFULL,  // 00 is where bone uniforms used to be.
 	DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,
 
 	// Other dirty elements that aren't uniforms!

diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp
@@ -35,7 +35,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
 	int ls1 = id.Bits(VS_BIT_LS1, 2);
 
 	if (uvgMode) desc << uvgModes[uvgMode];
-	if (id.Bit(VS_BIT_ENABLE_BONES)) desc << "Bones:" << (id.Bits(VS_BIT_BONES, 3) + 1) << " ";
 	// Lights
 	if (id.Bit(VS_BIT_LIGHTING_ENABLE)) {
 		desc << "Light: ";
@@ -51,7 +50,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
 		}
 	}
 	if (id.Bits(VS_BIT_MATERIAL_UPDATE, 3)) desc << "MatUp:" << id.Bits(VS_BIT_MATERIAL_UPDATE, 3) << " ";
-	if (id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2)) desc << "WScale " << id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2) << " ";
 	if (id.Bit(VS_BIT_FLATSHADE)) desc << "Flat ";
 
 	if (id.Bit(VS_BIT_BEZIER)) desc << "Bezier ";
@@ -117,16 +115,6 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform,
 			id.SetBits(VS_BIT_LS1, 2, gstate.getUVLS1());
 		}
 
-		// Bones.
-		bool enableBones = !useSkinInDecode && vertTypeIsSkinningEnabled(vertType);
-		id.SetBit(VS_BIT_ENABLE_BONES, enableBones);
-		if (enableBones) {
-			id.SetBits(VS_BIT_BONES, 3, TranslateNumBones(vertTypeGetNumBoneWeights(vertType)) - 1);
-			// 2 bits. We should probably send in the weight scalefactor as a uniform instead,
-			// or simply preconvert all weights to floats.
-			id.SetBits(VS_BIT_WEIGHT_FMTSCALE, 2, weightsAsFloat ? 0 : (vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT);
-		}
-
 		if (gstate.isLightingEnabled()) {
 			// doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping.
 			id.SetBit(VS_BIT_LIGHTING_ENABLE);

diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h
@@ -29,9 +29,7 @@ enum VShaderBit : uint8_t {
 	VS_BIT_UVPROJ_MODE = 18,  // 2, can overlap with LS0
 	VS_BIT_LS0 = 18,  // 2
 	VS_BIT_LS1 = 20,  // 2
-	VS_BIT_BONES = 22,  // 3 should be enough, not 8
-	// 25 - 29 are free.
-	VS_BIT_ENABLE_BONES = 30,
+	// 21 - 30 are free.
 
 	// If this is set along with LIGHTING_ENABLE, all other lighting bits below
 	// are passed to the shader directly instead.
@@ -52,8 +50,7 @@ enum VShaderBit : uint8_t {
 	VS_BIT_LIGHT2_ENABLE = 54,
 	VS_BIT_LIGHT3_ENABLE = 55,
 	VS_BIT_LIGHTING_ENABLE = 56,
-	VS_BIT_WEIGHT_FMTSCALE = 57,  // only two bits
-	// 59 - 61 are free.
+	// 57 - 61 are free.
 	VS_BIT_FLATSHADE = 62, // 1 bit
 	VS_BIT_BEZIER = 63, // 1 bit
 	// No more free

diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp
@@ -337,11 +337,3 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
 		}
 	}
 }
-
-void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
-	for (int i = 0; i < 8; i++) {
-		if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
-			ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);
-		}
-	}
-}
diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h
@@ -120,17 +120,6 @@ R"(	vec4 u_ambient;
 	vec3 u_lightspecular3;
 )";
 
-// With some cleverness, we could get away with uploading just half this when only the four or five first
-// bones are being used. This is 384b.
-struct alignas(16) UB_VS_Bones {
-	float bones[8][12];
-};
-
-static const char * const ub_vs_bonesStr =
-R"(	mat3x4 u_bone0; mat3x4 u_bone1; mat3x4 u_bone2; mat3x4 u_bone3; mat3x4 u_bone4; mat3x4 u_bone5; mat3x4 u_bone6; mat3x4 u_bone7; mat3x4 u_bone8;
-)";
-
-
 static const char * const ub_frameStr =
 R"(
     float u_rotation;
@@ -145,7 +134,6 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo
 
 void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering);
 void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
-void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);
 void FrameUpdateUniforms(UB_Frame *ub, bool useBufferedRendering);
 
 uint32_t PackLightControlBits();
diff --git a/GPU/Common/SplineCommon.cpp b/GPU/Common/SplineCommon.cpp
@@ -507,7 +507,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
 	if (indices)
 		GetIndexBounds(indices, num_points, vertType, &index_lower_bound, &index_upper_bound);
 
-	VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode));
+	VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), true));
 	*bytesRead = num_points * origVDecoder->VertexSize();
 
 	// Simplify away bones and morph before proceeding
@@ -572,7 +572,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
 		gstate_c.uv.vOff = 0;
 	}
 
-	uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
+	uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), true);
 	int generatedBytesRead;
 	if (output.count)
 		DispatchSubmitPrim(output.vertices, output.indices, PatchPrimToPrim(surface.primType), output.count, vertTypeID, gstate.getCullMode(), &generatedBytesRead);

diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp
@@ -103,9 +103,6 @@ static const ARMReg srcNEON = Q2;
 static const ARMReg accNEON = Q3;
 
 static const JitLookup jitLookup[] = {
-	{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
-	{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
-	{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
 	{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
 	{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
 	{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
@@ -296,55 +293,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 	return (JittedVertexDecoder)start;
 }
 
-void VertexDecoderJitCache::Jit_WeightsU8() {
-	// Basic implementation - a byte at a time. TODO: Optimize
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDRB(tempReg1, srcReg, dec_->weightoff + j);
-		STRB(tempReg1, dstReg, dec_->decFmt.w0off + j);
-	}
-	if (j & 3) {
-		// Create a zero register. Might want to make a fixed one.
-		EOR(scratchReg, scratchReg, scratchReg);
-	}
-	while (j & 3) {
-		STRB(scratchReg, dstReg, dec_->decFmt.w0off + j);
-		j++;
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsU16() {
-	// Basic implementation - a short at a time. TODO: Optimize
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDRH(tempReg1, srcReg, dec_->weightoff + j * 2);
-		STRH(tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
-	}
-	if (j & 3) {
-		// Create a zero register. Might want to make a fixed one.
-		EOR(scratchReg, scratchReg, scratchReg);
-	}
-	while (j & 3) {
-		STRH(scratchReg, dstReg, dec_->decFmt.w0off + j * 2);
-		j++;
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsFloat() {
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDR(tempReg1, srcReg, dec_->weightoff + j * 4);
-		STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
-	}
-	if (j & 3) {
-		EOR(tempReg1, tempReg1, tempReg1);
-	}
-	while (j & 3) {  // Zero additional weights rounding up to 4.
-		STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
-		j++;
-	}
-}
-
 static const ARMReg weightRegs[8] = { S8, S9, S10, S11, S12, S13, S14, S15 };
 static const ARMReg neonWeightRegsD[4] = { D4, D5, D6, D7 };
 static const ARMReg neonWeightRegsQ[2] = { Q2, Q3 };