Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx: Improved 24-bit format handling and shader refactoring #15065

Merged
merged 5 commits into from
Jan 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion rpcs3/Emu/RSX/GL/GLDraw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ void GLGSRender::update_draw_state()
if (m_rtts.get_color_surface_count())
{
// Color buffer is active
const auto host_write_mask = rsx::get_write_output_mask(rsx::method_registers.surface_color());
for (int index = 0; index < m_rtts.get_color_surface_count(); ++index)
{
bool color_mask_b = rsx::method_registers.color_mask_b(index);
Expand All @@ -207,7 +208,12 @@ void GLGSRender::update_draw_state()
break;
}

gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a);
gl_state.color_maski(
index,
color_mask_r && host_write_mask[0],
color_mask_g && host_write_mask[1],
color_mask_b && host_write_mask[2],
color_mask_a && host_write_mask[3]);
}

// LogicOp and Blend are mutually exclusive. If both are enabled, LogicOp takes precedence.
Expand Down
125 changes: 39 additions & 86 deletions rpcs3/Emu/RSX/Program/GLSLCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@ namespace program_common
{
for (const auto& e : enums)
{
OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
if constexpr (std::is_enum_v<T> || std::is_integral_v<T>)
{
OS << "#define " << e.first << " " << static_cast<int>(e.second) << "\n";
}
else
{
OS << "#define " << e.first << " " << e.second << "\n";
}
}

OS << "\n";
Expand Down Expand Up @@ -137,99 +144,25 @@ namespace glsl
;
}

void insert_rop_init(std::ostream& OS)
void insert_blend_prologue(std::ostream& OS)
{
OS <<
" if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))\n"
" {\n"
" // Convert x,y to linear address\n"
" const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);\n"
" const uint address = stipple_coord.y * 32u + stipple_coord.x;\n"
" const uint bit_offset = (address & 31u);\n"
" const uint word_index = _get_bits(address, 7, 3);\n"
" const uint sub_index = _get_bits(address, 5, 2);\n\n"

" if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))\n"
" {\n"
" _kill();\n"
" }\n"
" }\n\n";
#include "GLSLSnippets/RSXProg/RSXProgrammableBlendPrologue.glsl"
;
}

void insert_rop(std::ostream& OS, const shader_properties& props)
void insert_rop_init(std::ostream& OS)
{
const std::string reg0 = props.fp32_outputs ? "r0" : "h0";
const std::string reg1 = props.fp32_outputs ? "r2" : "h4";
const std::string reg2 = props.fp32_outputs ? "r3" : "h6";
const std::string reg3 = props.fp32_outputs ? "r4" : "h8";

if (props.disable_early_discard)
{
OS <<
" if (_fragment_discard)\n"
" {\n"
" discard;\n"
" }\n\n";
}

// Pre-output stages
if (!props.fp32_outputs)
{
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
const auto vtype = (props.fp32_outputs || !props.supports_native_fp16) ? "vec4" : "f16vec4";
OS <<
" if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))\n"
" {\n"
" " << reg0 << " = " << vtype << "(linear_to_srgb(" << reg0 << ").rgb, " << reg0 << ".a);\n"
" " << reg1 << " = " << vtype << "(linear_to_srgb(" << reg1 << ").rgb, " << reg1 << ".a);\n"
" " << reg2 << " = " << vtype << "(linear_to_srgb(" << reg2 << ").rgb, " << reg2 << ".a);\n"
" " << reg3 << " = " << vtype << "(linear_to_srgb(" << reg3 << ").rgb, " << reg3 << ".a);\n"
" }\n\n";
}

// Output conversion
if (props.ROP_output_rounding)
{
OS <<
" if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))\n"
" {\n"
" " << reg0 << " = round_to_8bit(" << reg0 << ");\n"
" " << reg1 << " = round_to_8bit(" << reg1 << ");\n"
" " << reg2 << " = round_to_8bit(" << reg2 << ");\n"
" " << reg3 << " = round_to_8bit(" << reg3 << ");\n"
" }\n\n";
}

// Post-output stages
// TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
OS <<
// Alpha Testing
" if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))\n"
" {\n"
" const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);\n"
" if (!comparison_passes(" << reg0 << ".a, alpha_ref, alpha_func)) discard;\n"
" }\n\n";

// ALPHA_TO_COVERAGE
if (props.emulate_coverage_tests)
{
OS <<
" if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))\n"
" {\n"
" if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) ||\n"
" !coverage_test_passes(" << reg0 << "))\n"
" {\n"
" discard;\n"
" }\n"
" }\n\n";
}
#include "GLSLSnippets/RSXProg/RSXROPPrologue.glsl"
;
}

// Commit
void insert_rop(std::ostream& OS, const shader_properties& props)
{
OS <<
" ocol0 = " << reg0 << ";\n"
" ocol1 = " << reg1 << ";\n"
" ocol2 = " << reg2 << ";\n"
" ocol3 = " << reg3 << ";\n\n";
#include "GLSLSnippets//RSXProg/RSXROPEpilogue.glsl"
;
}

void insert_glsl_legacy_function(std::ostream& OS, const shader_properties& props)
Expand Down Expand Up @@ -271,15 +204,35 @@ namespace glsl
{ "ROP_CMD_MASK ", rsx::ROP_control_bits::ROP_CMD_MASK }
});

program_common::define_glsl_constants<const char*>(OS,
{
{ "col0", props.fp32_outputs ? "r0" : "h0" },
{ "col1", props.fp32_outputs ? "r2" : "h4" },
{ "col2", props.fp32_outputs ? "r3" : "h6" },
{ "col3", props.fp32_outputs ? "r4" : "h8" }
});

if (props.fp32_outputs || !props.supports_native_fp16)
{
enabled_options.push_back("_32_BIT_OUTPUT");
}

if (!props.fp32_outputs)
{
enabled_options.push_back("_ENABLE_FRAMEBUFFER_SRGB");
}

if (props.disable_early_discard)
{
enabled_options.push_back("_DISABLE_EARLY_DISCARD");
}

if (props.ROP_output_rounding)
{
enabled_options.push_back("_ENABLE_ROP_OUTPUT_ROUNDING");
}

enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
}

// Import common header
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
R"(

#ifdef _32_BIT_OUTPUT
// Default. Used when we're not utilizing native fp16
#define round_to_8bit(v4) (floor(fma(v4, vec4(255.), vec4(0.5))) / vec4(255.))
// Everything is fp32 on ouput channels
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Everything is fp32 on ouput channels
// Everything is fp32 on output channels

#define _mrt_color_t(expr) expr
#else
// FP16 version
#define round_to_8bit(v4) (floor(fma(v4, f16vec4(255.), f16vec4(0.5))) / f16vec4(255.))
// Mixed types. We have fp16 outputs
#define _mrt_color_t f16vec4
#endif

#if defined(_ENABLE_ROP_OUTPUT_ROUNDING) || defined(_ENABLE_PROGRAMMABLE_BLENDING)
// Default. Used when we're not utilizing native fp16
vec4 round_to_8bit(const in vec4 v4)
{
uvec4 raw = uvec4(floor(fma(v4, vec4(255.), vec4(0.5))));
return vec4(raw) / vec4(255.);
}
#ifndef _32_BIT_OUTPUT
f16vec4 round_to_8bit(const in f16vec4 v4)
{
uvec4 raw = uvec4(floor(fma(v4, f16vec4(255.), f16vec4(0.5))));
return f16vec4(raw) / f16vec4(255.);
}
#endif
#endif

#ifdef _DISABLE_EARLY_DISCARD
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
R"(

/**
* Required register definitions from ROP config
struct {
vec4 blend_constants; // fp32x4
uint blend_func; // rgb16, a16
uint blend_factors_a; // src16, dst16
uint blend_factors_rgb; // src16, dst16
}
*/

#define BLEND_FACTOR_ZERO 0
#define BLEND_FACTOR_ONE 1
#define BLEND_FACTOR_SRC_COLOR 0x0300
#define BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x0301
#define BLEND_FACTOR_SRC_ALPHA 0x0302
#define BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x0303
#define BLEND_FACTOR_DST_ALPHA 0x0304
#define BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x0305
#define BLEND_FACTOR_DST_COLOR 0x0306
#define BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x0307
#define BLEND_FACTOR_SRC_ALPHA_SATURATE 0x0308
#define BLEND_FACTOR_CONSTANT_COLOR 0x8001
#define BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x8002
#define BLEND_FACTOR_CONSTANT_ALPHA 0x8003
#define BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x8004

#define BLEND_FUNC_ADD 0x8006
#define BLEND_MIN 0x8007
#define BLEND_MAX 0x8008
#define BLEND_FUNC_SUBTRACT 0x800A
#define BLEND_FUNC_REVERSE_SUBTRACT 0x800B
#define BLEND_FUNC_REVERSE_SUBTRACT_SIGNED 0x0000F005
#define BLEND_FUNC_ADD_SIGNED 0x0000F006
#define BLEND_FUNC_REVERSE_ADD_SIGNED 0x0000F007

float get_blend_factor_a(const in uint op, const in vec4 src, const in vec4 dst)
{
switch (op)
{
case BLEND_FACTOR_ZERO: return 0.;
case BLEND_FACTOR_ONE: return 1.;
case BLEND_FACTOR_SRC_COLOR:
case BLEND_FACTOR_SRC_ALPHA: return src.a;
case BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.a;
case BLEND_FACTOR_DST_ALPHA:
case BLEND_FACTOR_DST_COLOR: return dst.a;
case BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.a;
case BLEND_FACTOR_SRC_ALPHA_SATURATE: return 1;
case BLEND_FACTOR_CONSTANT_COLOR:
case BLEND_FACTOR_CONSTANT_ALPHA: return constants.a;
case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - constants.a;
}
return 0.;
}

vec3 get_blend_factor_rgb(const in uint op, const in vec4 src, const in vec4 dst)
{
switch (op)
{
case BLEND_FACTOR_ZERO: return vec3(0.);
case BLEND_FACTOR_ONE: return vec3(1.);
case BLEND_FACTOR_SRC_COLOR: return src.rgb;
case BLEND_FACTOR_SRC_ALPHA: return src.aaa;
case BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return 1. - src.rgb;
case BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return 1. - src.aaa;
case BLEND_FACTOR_DST_COLOR: return dst.rgb;
case BLEND_FACTOR_DST_ALPHA: return dst.a;
case BLEND_FACTOR_ONE_MINUS_DST_COLOR: return 1. - dst.rgb;
case BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return 1. - dst.a;
case BLEND_FACTOR_SRC_ALPHA_SATURATE: return src.rgb;
case BLEND_FACTOR_CONSTANT_COLOR: return blend_constants.rgb;
case BLEND_FACTOR_CONSTANT_ALPHA: return blend_constants.aaa;
case BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: return 1. - blend_constants.rgb;
case BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return 1. - blend_constants.aaa;
}
return vec3(0.);
}

float apply_blend_func_a(const in vec4 src, const in vec4 dst)
{
uint blend_factor_a_s = _get_bits(blend_factors_a, 0, 16);
uint blend_factor_a_d = _get_bits(blend_factors_a, 16, 16);
uint func = _get_bits(blend_func, 16, 16);

const float src_factor_a = get_blend_factor_a(blend_factor_a_s, src, dst);
const float dst_factor_a = get_blend_factor_a(blend_factor_a_d, src, dst);

// NOTE: Destination data is already saturated due to encoding.
const float s = src.a * src_factor_a;
const float d = dst.a * dst_factor_a;

switch (func)
{
case BLEND_FUNC_ADD: return _saturate(s) + d;
case BLEND_MIN: return min(_saturate(s), d);
case BLEND_MAX: return max(_saturate(s), d);
case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
case BLEND_FUNC_ADD_SIGNED: return s + d;
case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
}

return vec3(0.);
}

vec3 apply_blend_func_rgb(const in vec4 src, const in vec4 dst)
{
uint blend_factor_rgb_s = _get_bits(blend_factors_rgb, 0, 16);
uint blend_factor_rgb_d = _get_bits(blend_factors_rgb, 16, 16);
uint func = _get_bits(blend_func, 0, 16);

const vec3 src_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_s, src, dst);
const vec3 dst_factor_rgb = get_blend_factor_rgb(blend_factor_rgb_d, src, dst);

// NOTE: Destination data is already saturated due to encoding.
const vec3 s = src.rgb * src_factor_rgb;
const vec3 d = dst.rgb * dst_factor_rgb;

switch (func)
{
case BLEND_FUNC_ADD: return _saturate(s) + d;
case BLEND_MIN: return min(_saturate(s), d);
case BLEND_MAX: return max(_saturate(s), d);
case BLEND_FUNC_SUBTRACT: return _saturate(s) - d;
case BLEND_FUNC_REVERSE_SUBTRACT: return d - _saturate(s);
case BLEND_FUNC_REVERSE_SUBTRACT_SIGNED: return d - s;
case BLEND_FUNC_ADD_SIGNED: return s + d;
case BLEND_FUNC_REVERSE_ADD_SIGNED: return s + d;
}

return vec3(0.);
}

vec4 do_blend(const in vec4 src, const in vec4 dst)
{
// Read blend_constants from config and apply blend op
const vec4 result = vec4(
apply_blend_func_rgb(src, dst),
apply_blend_func_a(src, dst)
);

// Accurate int conversion with wrapping
return round_to_8bit(result);
}

)"
Loading