Skip to content

Commit

Permalink
[spv-in]: Cull unused builtins inside structs (#1469)
Browse files Browse the repository at this point in the history
Some compilers like shaderc introduce a full gl_PerVertex struct, this
includes gl_ClipDistance. Normally this isn't a problem since most
drivers optimize it away, but naga zero inits globals if they weren't
previously initialized. This causes gl_ClipDistance to be initialized to
zero which can be really bad for performance.
  • Loading branch information
JCapucho authored Oct 20, 2021
1 parent c4ab9a3 commit 70ad084
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 45 deletions.
11 changes: 9 additions & 2 deletions src/front/spv/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,16 @@ impl<I: Iterator<Item = u32>> super::Parser<I> {
..
} => {
for (index, sm) in sub_members.iter().enumerate() {
if sm.binding.is_none() {
match sm.binding {
Some(crate::Binding::BuiltIn(builtin)) => {
// Cull unused builtins to preserve performances
if !self.builtin_usage.contains(&builtin) {
continue;
}
}
// unrecognized binding, skip
continue;
None => continue,
_ => {}
}
members.push(sm.clone());
components.push(function.expressions.append(
Expand Down
17 changes: 15 additions & 2 deletions src/front/spv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use function::*;
use crate::{
arena::{Arena, Handle, UniqueArena},
proc::Layouter,
FastHashMap,
FastHashMap, FastHashSet,
};

use num_traits::cast::FromPrimitive;
Expand Down Expand Up @@ -549,6 +549,10 @@ pub struct Parser<I> {
(BodyIndex, Vec<i32>),
std::hash::BuildHasherDefault<fxhash::FxHasher>,
>,

/// Tracks usage of builtins, used to cull unused builtins since they can
/// have serious performance implications.
builtin_usage: FastHashSet<crate::BuiltIn>,
}

impl<I: Iterator<Item = u32>> Parser<I> {
Expand Down Expand Up @@ -582,6 +586,7 @@ impl<I: Iterator<Item = u32>> Parser<I> {
index_constants: Vec::new(),
index_constant_expressions: Vec::new(),
switch_cases: indexmap::IndexMap::default(),
builtin_usage: FastHashSet::default(),
}
}

Expand Down Expand Up @@ -1294,9 +1299,10 @@ impl<I: Iterator<Item = u32>> Parser<I> {
let type_lookup = self.lookup_type.lookup(acex.type_id)?;
acex = match ctx.type_arena[type_lookup.handle].inner {
// can only index a struct with a constant
crate::TypeInner::Struct { .. } => {
crate::TypeInner::Struct { ref members, .. } => {
let index = index_maybe
.ok_or_else(|| Error::InvalidAccess(index_expr_data.clone()))?;

let lookup_member = self
.lookup_member
.get(&(type_lookup.handle, index))
Expand All @@ -1308,6 +1314,13 @@ impl<I: Iterator<Item = u32>> Parser<I> {
},
span,
);

if let Some(crate::Binding::BuiltIn(builtin)) =
members[index as usize].binding
{
self.builtin_usage.insert(builtin);
}

AccessExpression {
base_handle,
type_id: lookup_member.type_id,
Expand Down
12 changes: 3 additions & 9 deletions tests/out/glsl/quad-vert.main.Vertex.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ precision highp int;
struct type9 {
vec2 member;
vec4 gen_gl_Position;
float gen_gl_PointSize;
float gen_gl_ClipDistance[1];
float gen_gl_CullDistance[1];
};

vec2 v_uv = vec2(0.0, 0.0);
Expand Down Expand Up @@ -42,12 +39,9 @@ void main() {
a_uv1 = a_uv;
a_pos1 = a_pos;
main2();
vec2 _e10 = v_uv;
vec4 _e11 = perVertexStruct.gen_gl_Position;
float _e12 = perVertexStruct.gen_gl_PointSize;
float _e13[1] = perVertexStruct.gen_gl_ClipDistance;
float _e14[1] = perVertexStruct.gen_gl_CullDistance;
type9 _tmp_return = type9(_e10, _e11, _e12, _e13, _e14);
vec2 _e7 = v_uv;
vec4 _e8 = perVertexStruct.gen_gl_Position;
type9 _tmp_return = type9(_e7, _e8);
_vs2fs_location0 = _tmp_return.member;
gl_Position = _tmp_return.gen_gl_Position;
gl_Position.yz = vec2(-gl_Position.y, gl_Position.z * 2.0 - gl_Position.w);
Expand Down
22 changes: 5 additions & 17 deletions tests/out/hlsl/quad-vert.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ struct gl_PerVertex {
struct type9 {
linear float2 member : LOC0;
float4 gl_Position : SV_Position;
float gl_PointSize : PSIZE;
float gl_ClipDistance[1] : SV_ClipDistance;
float gl_CullDistance[1] : SV_CullDistance;
};

static float2 v_uv = (float2)0;
Expand All @@ -22,9 +19,6 @@ static float2 a_pos1 = (float2)0;
struct VertexOutput_main {
float2 member : LOC0;
float4 gl_Position : SV_Position;
float gl_ClipDistance : SV_ClipDistance;
float gl_CullDistance : SV_CullDistance;
float gl_PointSize : PSIZE;
};

void main1()
Expand All @@ -36,13 +30,10 @@ void main1()
return;
}

type9 Constructtype9(float2 arg0, float4 arg1, float arg2, float arg3[1], float arg4[1]) {
type9 Constructtype9(float2 arg0, float4 arg1) {
type9 ret;
ret.member = arg0;
ret.gl_Position = arg1;
ret.gl_PointSize = arg2;
ret.gl_ClipDistance = arg3;
ret.gl_CullDistance = arg4;
return ret;
}

Expand All @@ -51,12 +42,9 @@ VertexOutput_main main(float2 a_uv : LOC1, float2 a_pos : LOC0)
a_uv1 = a_uv;
a_pos1 = a_pos;
main1();
float2 _expr10 = v_uv;
float4 _expr11 = perVertexStruct.gl_Position;
float _expr12 = perVertexStruct.gl_PointSize;
float _expr13[1] = perVertexStruct.gl_ClipDistance;
float _expr14[1] = perVertexStruct.gl_CullDistance;
const type9 type9 = Constructtype9(_expr10, _expr11, _expr12, _expr13, _expr14);
const VertexOutput_main type9_1 = { type9.member, type9.gl_Position, type9.gl_ClipDistance, type9.gl_CullDistance, type9.gl_PointSize };
float2 _expr7 = v_uv;
float4 _expr8 = perVertexStruct.gl_Position;
const type9 type9 = Constructtype9(_expr7, _expr8);
const VertexOutput_main type9_1 = { type9.member, type9.gl_Position };
return type9_1;
}
16 changes: 4 additions & 12 deletions tests/out/msl/quad-vert.msl
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ struct gl_PerVertex {
struct type9 {
metal::float2 member;
metal::float4 gl_Position;
float gl_PointSize;
type5 gl_ClipDistance;
type5 gl_CullDistance;
};
constant metal::float4 const_type3 = {0.0, 0.0, 0.0, 1.0};
constant type5 const_type5 = {0.0};
Expand All @@ -42,8 +39,6 @@ struct main1Input {
struct main1Output {
metal::float2 member [[user(loc0), center_perspective]];
metal::float4 gl_Position [[position]];
float gl_PointSize [[point_size]];
float gl_ClipDistance [[clip_distance]] [1];
};
vertex main1Output main1(
main1Input varyings [[stage_in]]
Expand All @@ -57,11 +52,8 @@ vertex main1Output main1(
a_uv1 = a_uv;
a_pos1 = a_pos;
main2(v_uv, a_uv1, perVertexStruct, a_pos1);
metal::float2 _e10 = v_uv;
metal::float4 _e11 = perVertexStruct.gl_Position;
float _e12 = perVertexStruct.gl_PointSize;
type5 _e13 = perVertexStruct.gl_ClipDistance;
type5 _e14 = perVertexStruct.gl_CullDistance;
const auto _tmp = type9 {_e10, _e11, _e12, _e13, _e14};
return main1Output { _tmp.member, _tmp.gl_Position, _tmp.gl_PointSize, {_tmp.gl_ClipDistance.inner[0]} };
metal::float2 _e7 = v_uv;
metal::float4 _e8 = perVertexStruct.gl_Position;
const auto _tmp = type9 {_e7, _e8};
return main1Output { _tmp.member, _tmp.gl_Position };
}
6 changes: 3 additions & 3 deletions tests/out/wgsl/quad-vert.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ fn main([[location(1)]] a_uv: vec2<f32>, [[location(0)]] a_pos: vec2<f32>) -> Ve
a_uv1 = a_uv;
a_pos1 = a_pos;
main1();
let e10: vec2<f32> = v_uv;
let e11: vec4<f32> = perVertexStruct.gl_Position;
return VertexOutput(e10, e11);
let e7: vec2<f32> = v_uv;
let e8: vec4<f32> = perVertexStruct.gl_Position;
return VertexOutput(e7, e8);
}

0 comments on commit 70ad084

Please sign in to comment.