RPCS3 · kd-11 · Jan 21, 2024 · Jan 19, 2024 · Jan 19, 2024 · Jan 21, 2024
diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp
@@ -188,6 +188,7 @@ void GLGSRender::update_draw_state()
 	if (m_rtts.get_color_surface_count())
 	{
 		// Color buffer is active
+		const auto host_write_mask = rsx::get_write_output_mask(rsx::method_registers.surface_color());
 		for (int index = 0; index < m_rtts.get_color_surface_count(); ++index)
 		{
 			bool color_mask_b = rsx::method_registers.color_mask_b(index);
@@ -207,7 +208,12 @@ void GLGSRender::update_draw_state()
 				break;
 			}
 
-			gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a);
+			gl_state.color_maski(
+				index,
+				color_mask_r && host_write_mask[0],
+				color_mask_g && host_write_mask[1],
+				color_mask_b && host_write_mask[2],
+				color_mask_a && host_write_mask[3]);
 		}
 
 		// LogicOp and Blend are mutually exclusive. If both are enabled, LogicOp takes precedence.

diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp
@@ -14,7 +14,14 @@ namespace program_common
 	{
 		for (const auto& e : enums)
 		{
-			OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
+			if constexpr (std::is_enum_v<T> || std::is_integral_v<T>)
+			{
+				OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
+			}
+			else
+			{
+				OS << "#define " << e.first << " " << e.second << "\n";
+			}
 		}
 
 		OS << "\n";
@@ -137,99 +144,25 @@ namespace glsl
 		;
 	}
 
-	void insert_rop_init(std::ostream& OS)
+	void insert_blend_prologue(std::ostream& OS)
 	{
 		OS <<
-		"	if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))\n"
-		"	{\n"
-		"		// Convert x,y to linear address\n"
-		"		const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);\n"
-		"		const uint address = stipple_coord.y * 32u + stipple_coord.x;\n"
-		"		const uint bit_offset = (address & 31u);\n"
-		"		const uint word_index = _get_bits(address, 7, 3);\n"
-		"		const uint sub_index = _get_bits(address, 5, 2);\n\n"
-
-		"		if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))\n"
-		"		{\n"
-		"			_kill();\n"
-		"		}\n"
-		"	}\n\n";
+			#include "GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl"
+			;
 	}
 
-	void insert_rop(std::ostream& OS, const shader_properties& props)
+	void insert_rop_init(std::ostream& OS)
 	{
-		const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
-		const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
-		const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
-		const std::string reg3 = props.fp32_outputs ? "r4" : "h8";
-
-		if (props.disable_early_discard)
-		{
-			OS <<
-			"	if (_fragment_discard)\n"
-			"	{\n"
-			"		discard;\n"
-			"	}\n\n";
-		}
-
-		// Pre-output stages
-		if (!props.fp32_outputs)
-		{
-			// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
-			const auto vtype = (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4";
-			OS <<
-			"	if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))\n"
-			"	{\n"
-			"		" << reg0 << " = " << vtype << "(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a);\n"
-			"		" << reg1 << " = " << vtype << "(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a);\n"
-			"		" << reg2 << " = " << vtype << "(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a);\n"
-			"		" << reg3 << " = " << vtype << "(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a);\n"
-			"	}\n\n";
-		}
-
-		// Output conversion
-		if (props.ROP_output_rounding)
-		{
-			OS <<
-			"	if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
-			"	{\n"
-			"		" << reg0 << " = round_to_8bit(" << reg0 << ");\n"
-			"		" << reg1 << " = round_to_8bit(" << reg1 << ");\n"
-			"		" << reg2 << " = round_to_8bit(" << reg2 << ");\n"
-			"		" << reg3 << " = round_to_8bit(" << reg3 << ");\n"
-			"	}\n\n";
-		}
-
-		// Post-output stages
-		// TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
 		OS <<
-		// Alpha Testing
-		"	if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))\n"
-		"	{\n"
-		"		const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n"
-		"		if (!comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func)) discard;\n"
-		"	}\n\n";
-
-		// ALPHA_TO_COVERAGE
-		if (props.emulate_coverage_tests)
-		{
-			OS <<
-			"	if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))\n"
-			"	{\n"
-			"		if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) ||\n"
-			"			!coverage_test_passes(" << reg0 << "))\n"
-			"		{\n"
-			"			discard;\n"
-			"		}\n"
-			"	}\n\n";
-		}
+			#include "GLSLSnippets/RSXProg/RSXROPPrologue.glsl"
+			;
+	}
 
-		// Commit
+	void insert_rop(std::ostream& OS, const shader_properties& props)
+	{
 		OS <<
-		"	ocol0 = " << reg0 << ";\n"
-		"	ocol1 = " << reg1 << ";\n"
-		"	ocol2 = " << reg2 << ";\n"
-		"	ocol3 = " << reg3 << ";\n\n";
+			#include "GLSLSnippets//RSXProg/RSXROPEpilogue.glsl"
+			;
 	}
 
 	void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
@@ -271,15 +204,35 @@ namespace glsl
 				{ "ROP_CMD_MASK                ", rsx::ROP_control_bits::ROP_CMD_MASK }
 			});
 
+			program_common::define_glsl_constants<const char*>(OS,
+			{
+				{ "col0", props.fp32_outputs ? "r0" : "h0" },
+				{ "col1", props.fp32_outputs ? "r2" : "h4" },
+				{ "col2", props.fp32_outputs ? "r3" : "h6" },
+				{ "col3", props.fp32_outputs ? "r4" : "h8" }
+			});
+
 			if (props.fp32_outputs || !props.supports_native_fp16)
 			{
 				enabled_options.push_back("_32_BIT_OUTPUT");
 			}
 
+			if (!props.fp32_outputs)
+			{
+				enabled_options.push_back("_ENABLE_FRAMEBUFFER_SRGB");
+			}
+
 			if (props.disable_early_discard)
 			{
 				enabled_options.push_back("_DISABLE_EARLY_DISCARD");
 			}
+
+			if (props.ROP_output_rounding)
+			{
+				enabled_options.push_back("_ENABLE_ROP_OUTPUT_ROUNDING");
+			}
+
+			enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
 		}
 
 		// Import common header

diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl
@@ -1,10 +1,27 @@
 R"(
+
 #ifdef _32_BIT_OUTPUT
-// Default. Used when we're not utilizing native fp16
-#define round_to_8bit(v4) (floor(fma(v4, vec4(255.), vec4(0.5))) / vec4(255.))
+// Everything is fp32 on ouput channels
-// Everything is fp32 on ouput channels
+// Everything is fp32 on output channels
+
-// Everything is fp32 on ouput channels
+// Everything is fp32 on output channels
+
+#define _mrt_color_t(expr) expr
 #else
-// FP16 version
-#define round_to_8bit(v4) (floor(fma(v4, f16vec4(255.), f16vec4(0.5))) / f16vec4(255.))
+// Mixed types. We have fp16 outputs
+#define _mrt_color_t f16vec4
+#endif
+
+#if defined(_ENABLE_ROP_OUTPUT_ROUNDING) || defined(_ENABLE_PROGRAMMABLE_BLENDING)
+// Default. Used when we're not utilizing native fp16
+vec4 round_to_8bit(const in vec4 v4)
+{
+	uvec4 raw = uvec4(floor(fma(v4, vec4(255.), vec4(0.5))));
+	return vec4(raw) / vec4(255.);
+}
+#ifndef _32_BIT_OUTPUT
+f16vec4 round_to_8bit(const in f16vec4 v4)
+{
+	uvec4 raw = uvec4(floor(fma(v4, f16vec4(255.), f16vec4(0.5))));
+	return f16vec4(raw) / f16vec4(255.);
+}
+#endif
 #endif
 
 #ifdef _DISABLE_EARLY_DISCARD

diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl
@@ -0,0 +1,152 @@
+R"(
+
+/**
+ * Required register definitions from ROP config
+ struct {
+ 	vec4 blend_constants;    // fp32x4
+	uint blend_func;         // rgb16, a16
+	uint blend_factors_a;    // src16, dst16
+	uint blend_factors_rgb;  // src16, dst16
+ }
+*/
+
+#define BLEND_FACTOR_ZERO 0
+#define BLEND_FACTOR_ONE  1
+#define BLEND_FACTOR_SRC_COLOR 0x0300
+#define BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x0301
+#define BLEND_FACTOR_SRC_ALPHA 0x0302
+#define BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x0303
+#define BLEND_FACTOR_DST_ALPHA 0x0304
+#define BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x0305
+#define BLEND_FACTOR_DST_COLOR 0x0306
+#define BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x0307
+#define BLEND_FACTOR_SRC_ALPHA_SATURATE 0x0308
+#define BLEND_FACTOR_CONSTANT_COLOR 0x8001
+#define BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x8002
+#define BLEND_FACTOR_CONSTANT_ALPHA 0x8003
+#define BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x8004
+
+#define BLEND_FUNC_ADD 0x8006
+#define BLEND_MIN 0x8007
+#define BLEND_MAX 0x8008
+#define BLEND_FUNC_SUBTRACT 0x800A
+#define BLEND_FUNC_REVERSE_SUBTRACT 0x800B
+#define BLEND_FUNC_REVERSE_SUBTRACT_SIGNED 0x0000F005
+#define BLEND_FUNC_ADD_SIGNED 0x0000F006
+#define BLEND_FUNC_REVERSE_ADD_SIGNED 0x0000F007
+
+float get_blend_factor_a(const in uint op, const in vec4 src, const in vec4 dst)
+{
+	switch (op)
+	{
+	case BLEND_FACTOR_ZERO: return 0.;
+	case BLEND_FACTOR_ONE: return 1.;
+	case BLEND_FACTOR_SRC_COLOR:
+	case BLEND_FACTOR_SRC_ALPHA: return src.a;
+	case BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+	case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.a;
+	case BLEND_FACTOR_DST_ALPHA:
+	case BLEND_FACTOR_DST_COLOR: return dst.a;
+	case BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+	case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.a;
+	case BLEND_FACTOR_SRC_ALPHA_SATURATE: return 1;
+	case BLEND_FACTOR_CONSTANT_COLOR:
+	case BLEND_FACTOR_CONSTANT_ALPHA: return constants.a;
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - constants.a;
+	}
+	return 0.;
+}
+
+vec3 get_blend_factor_rgb(const in uint op, const in vec4 src, const in vec4 dst)
+{
+	switch (op)
+	{
+	case BLEND_FACTOR_ZERO: return vec3(0.);
+	case BLEND_FACTOR_ONE: return vec3(1.);
+	case BLEND_FACTOR_SRC_COLOR: return src.rgb;
+	case BLEND_FACTOR_SRC_ALPHA: return src.aaa;
+	case BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return 1. - src.rgb;
+	case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.aaa;
+	case BLEND_FACTOR_DST_COLOR: return dst.rgb;
+	case BLEND_FACTOR_DST_ALPHA: return dst.a;
+	case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.rgb;
+	case BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return 1. - dst.a;
+	case BLEND_FACTOR_SRC_ALPHA_SATURATE: return src.rgb;
+	case BLEND_FACTOR_CONSTANT_COLOR: return blend_constants.rgb;
+	case BLEND_FACTOR_CONSTANT_ALPHA: return blend_constants.aaa;
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: return 1. - blend_constants.rgb;
+	case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - blend_constants.aaa;
+	}
+	return vec3(0.);
+}
+
+float apply_blend_func_a(const in vec4 src, const in vec4 dst)
+{
+	uint blend_factor_a_s = _get_bits(blend_factors_a, 0, 16);
+	uint blend_factor_a_d = _get_bits(blend_factors_a, 16, 16);
+	uint func = _get_bits(blend_func, 16, 16);
+
+	const float src_factor_a = get_blend_factor_a(blend_factor_a_s, src, dst);
+	const float dst_factor_a = get_blend_factor_a(blend_factor_a_d, src, dst);
+
+	// NOTE: Destination data is already saturated due to encoding.
+	const float s = src.a * src_factor_a;
+	const float d = dst.a * dst_factor_a;
+
+	switch (func)
+	{
+	case BLEND_FUNC_ADD: return _saturate(s) + d;
+	case BLEND_MIN: return min(_saturate(s), d);
+	case BLEND_MAX: return max(_saturate(s), d);
+	case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
+	case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
+	case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
+	case BLEND_FUNC_ADD_SIGNED: return s + d;
+	case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
+	}
+
+	return vec3(0.);
+}
+
+vec3 apply_blend_func_rgb(const in vec4 src, const in vec4 dst)
+{
+	uint blend_factor_rgb_s = _get_bits(blend_factors_rgb, 0, 16);
+	uint blend_factor_rgb_d = _get_bits(blend_factors_rgb, 16, 16);
+	uint func = _get_bits(blend_func, 0, 16);
+
+	const vec3 src_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_s, src, dst);
+	const vec3 dst_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_d, src, dst);
+
+	// NOTE: Destination data is already saturated due to encoding.
+	const vec3 s = src.rgb * src_factor_rgb;
+	const vec3 d = dst.rgb * dst_factor_rgb;
+
+	switch (func)
+	{
+	case BLEND_FUNC_ADD: return _saturate(s) + d;
+	case BLEND_MIN: return min(_saturate(s), d);
+	case BLEND_MAX: return max(_saturate(s), d);
+	case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
+	case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
+	case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
+	case BLEND_FUNC_ADD_SIGNED: return s + d;
+	case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
+	}
+
+	return vec3(0.);
+}
+
+vec4 do_blend(const in vec4 src, const in vec4 dst)
+{
+	// Read blend_constants from config and apply blend op
+	const vec4 result = vec4(
+		apply_blend_func_rgb(src, dst),
+		apply_blend_func_a(src, dst)
+	);
+
+	// Accurate int conversion with wrapping
+	return round_to_8bit(result);
+}
+
+)"