Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MSL: Add option to force depth write in fragment shaders #2331

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ struct CLIArguments
bool msl_replace_recursive_inputs = false;
bool msl_readwrite_texture_fences = true;
bool msl_agx_manual_cube_grad_fixup = false;
bool msl_input_attachment_is_ds_attachment = false;
const char *msl_combined_sampler_suffix = nullptr;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
Expand Down Expand Up @@ -873,6 +874,10 @@ static void print_help_msl()
"\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n"
"\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n"
"\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
"\t[--msl-input-attachment-is-ds-attachment]:\n\t\tAdds a simple depth passthrough in fragment shaders when they do not modify the depth value.\n"
"\t\tRequired to force Metal to write to the depth/stencil attachment post fragment execution.\n"
"\t\tOtherwise, Metal may optimize the write to pre fragment execution which goes against the Vulkan spec.\n"
"\t\tOnly required if an input attachment and depth/stencil attachment reference the same resource.\n"
"\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
"\t\tEmits [[color(N)]] inputs in fragment stage.\n"
"\t\tRequires an Apple GPU.\n"
Expand Down Expand Up @@ -1257,6 +1262,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.ios_support_base_vertex_instance = true;
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
msl_opts.input_attachment_is_ds_attachment = args.msl_input_attachment_is_ds_attachment;
msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences;
msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup;
msl_comp->set_msl_options(msl_opts);
Expand Down Expand Up @@ -1823,6 +1829,7 @@ static int main_inner(int argc, char *argv[])
[&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; });
cbs.add("--msl-replace-recursive-inputs",
[&args](CLIParser &) { args.msl_replace_recursive_inputs = true; });
cbs.add("--msl-input-attachment-is-ds-attachment", [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct main0_out
{
float4 color [[color(0)]];
};

[[ early_fragment_tests ]] fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};

fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = 1.0;
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};

fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = gl_FragCoord.z;
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct main0_out
{
float4 color [[color(0)]];
};

[[ early_fragment_tests ]] fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};

fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = 1.0;
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct main0_out
{
float4 color [[color(0)]];
float gl_FragDepth [[depth(any)]];
};

fragment main0_out main0(texture2d<float> inputDepth [[texture(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
out.color = inputDepth.read(uint2(gl_FragCoord.xy));
out.gl_FragDepth = gl_FragCoord.z;
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#version 450

layout(early_fragment_tests) in;

layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;

layout (location = 0) out vec4 color;

void main()
{
color = subpassLoad(inputDepth);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#version 450

layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;

layout (location = 0) out vec4 color;

void main()
{
color = subpassLoad(inputDepth);
gl_FragDepth = 1.0f;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#version 450

layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth;

layout (location = 0) out vec4 color;

void main()
{
color = subpassLoad(inputDepth);
}
85 changes: 75 additions & 10 deletions spirv_msl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,14 @@ void CompilerMSL::build_implicit_builtins()
active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
bool force_frag_depth_passthrough =
get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input &&
msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment;

if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation ||
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size)
has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough)
{
bool has_frag_coord = false;
bool has_sample_id = false;
Expand All @@ -292,6 +295,7 @@ void CompilerMSL::build_implicit_builtins()
bool has_helper_invocation = false;
bool has_local_invocation_index = false;
bool has_workgroup_size = false;
bool has_frag_depth = false;
uint32_t workgroup_id_type = 0;

ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
Expand All @@ -312,6 +316,13 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self);
does_shader_write_sample_mask = true;
}

if (force_frag_depth_passthrough && builtin == BuiltInFragDepth)
{
builtin_frag_depth_id = var.self;
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var.self);
has_frag_depth = true;
}
}

if (var.storage != StorageClassInput)
Expand Down Expand Up @@ -902,6 +913,36 @@ void CompilerMSL::build_implicit_builtins()
builtin_workgroup_size_id = var_id;
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
}

if (!has_frag_depth && force_frag_depth_passthrough)
{
uint32_t offset = ir.increase_bound_by(3);
uint32_t type_id = offset;
uint32_t type_ptr_id = offset + 1;
uint32_t var_id = offset + 2;

// Create gl_FragDepth
SPIRType float_type { OpTypeFloat };
float_type.basetype = SPIRType::Float;
float_type.width = 32;
float_type.vecsize = 1;
set<SPIRType>(type_id, float_type);

SPIRType float_type_ptr_in = float_type;
float_type_ptr_in.op = spv::OpTypePointer;
float_type_ptr_in.pointer = true;
float_type_ptr_in.pointer_depth++;
float_type_ptr_in.parent_type = type_id;
float_type_ptr_in.storage = StorageClassOutput;

auto &ptr_in_type = set<SPIRType>(type_ptr_id, float_type_ptr_in);
ptr_in_type.self = type_id;
set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
set_decoration(var_id, DecorationBuiltIn, BuiltInFragDepth);
builtin_frag_depth_id = var_id;
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id);
active_output_builtins.set(BuiltInFragDepth);
}
}

if (needs_swizzle_buffer_def)
Expand Down Expand Up @@ -1571,6 +1612,8 @@ string CompilerMSL::compile()
add_active_interface_variable(builtin_dispatch_base_id);
if (builtin_sample_mask_id)
add_active_interface_variable(builtin_sample_mask_id);
if (builtin_frag_depth_id)
add_active_interface_variable(builtin_frag_depth_id);

// Create structs to hold input, output and uniform variables.
// Do output first to ensure out. is declared at top of entry function.
Expand Down Expand Up @@ -1869,8 +1912,13 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
{
uint32_t base_id = ops[0];
if (global_var_ids.find(base_id) != global_var_ids.end())
{
added_arg_ids.insert(base_id);

if (msl_options.input_attachment_is_ds_attachment && base_id == builtin_frag_depth_id)
writes_to_depth = true;
}

uint32_t rvalue_id = ops[1];
if (global_var_ids.find(rvalue_id) != global_var_ids.end())
added_arg_ids.insert(rvalue_id);
Expand Down Expand Up @@ -14513,16 +14561,33 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
}
}
else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment &&
is_builtin_variable(var) && active_output_builtins.get(bi_type) &&
bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask())
is_builtin_variable(var) && active_output_builtins.get(bi_type))
{
// If the additional fixed sample mask was set, we need to adjust the sample_mask
// output to reflect that. If the shader outputs the sample_mask itself too, we need
// to AND the two masks to get the final one.
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
});
switch (bi_type)
{
case BuiltInSampleMask:
if (has_additional_fixed_sample_mask())
{
// If the additional fixed sample mask was set, we need to adjust the sample_mask
// output to reflect that. If the shader outputs the sample_mask itself too, we need
// to AND the two masks to get the final one.
string op_str = does_shader_write_sample_mask ? " &= " : " = ";
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
});
}
break;
case BuiltInFragDepth:
if (msl_options.input_attachment_is_ds_attachment && !writes_to_depth)
{
entry_func.fixup_hooks_out.push_back([=]() {
statement(to_expression(builtin_frag_depth_id), " = ", to_expression(builtin_frag_coord_id), ".z;");
});
}
break;
default:
break;
}
}
});
}
Expand Down
9 changes: 9 additions & 0 deletions spirv_msl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,13 @@ class CompilerMSL : public CompilerGLSL
// with side effects. Provided as an option hoping Metal will fix this issue in the future.
bool force_fragment_with_side_effects_execution = false;

// If set, adds a depth pass through statement to circumvent the following issue:
// When the same depth/stencil is used as input and depth/stencil attachment, we need to
// force Metal to perform the depth/stencil write after fragment execution. Otherwise,
// Metal will write to the depth attachment before fragment execution. This happens
// if the fragment does not modify the depth value.
bool input_attachment_is_ds_attachment = false;
HansKristian-Work marked this conversation as resolved.
Show resolved Hide resolved

bool is_ios() const
{
return platform == iOS;
Expand Down Expand Up @@ -1094,6 +1101,7 @@ class CompilerMSL : public CompilerGLSL
uint32_t builtin_stage_input_size_id = 0;
uint32_t builtin_local_invocation_index_id = 0;
uint32_t builtin_workgroup_size_id = 0;
uint32_t builtin_frag_depth_id = 0;
uint32_t swizzle_buffer_id = 0;
uint32_t buffer_size_buffer_id = 0;
uint32_t view_mask_buffer_id = 0;
Expand Down Expand Up @@ -1190,6 +1198,7 @@ class CompilerMSL : public CompilerGLSL
bool needs_subgroup_size = false;
bool needs_sample_id = false;
bool needs_helper_invocation = false;
bool writes_to_depth = false;
std::string qual_pos_var_name;
std::string stage_in_var_name = "in";
std::string stage_out_var_name = "out";
Expand Down
2 changes: 2 additions & 0 deletions test_shaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('--msl-runtime-array-rich-descriptor')
if '.replace-recursive-inputs.' in shader:
msl_args.append('--msl-replace-recursive-inputs')
if '.input-attachment-is-ds-attachment.' in shader:
msl_args.append('--msl-input-attachment-is-ds-attachment')
if '.mask-location-0.' in shader:
msl_args.append('--mask-stage-output-location')
msl_args.append('0')
Expand Down
Loading