Skip to content

Commit

Permalink
Reduce the size of MeshUniform
Browse files Browse the repository at this point in the history
Before:
mat4x4 x3
u32
= 196 bytes, but in an array, practically 208 bytes for 16-byte alignment.

After:
mat3x4 x2
mat2x4
f32
u32
= 136 bytes, but in an array, practically 144 bytes for 16-byte alignment.

That is a reduction of over 30% VRAM space and bandwidth usage, plus less data
to serialize.
  • Loading branch information
superdump committed Aug 11, 2023
1 parent 37915e1 commit b7845f4
Show file tree
Hide file tree
Showing 16 changed files with 211 additions and 89 deletions.
4 changes: 2 additions & 2 deletions assets/shaders/custom_vertex_attribute.wgsl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#import bevy_pbr::mesh_bindings mesh
#import bevy_pbr::mesh_functions mesh_position_local_to_clip
#import bevy_pbr::mesh_functions affine_to_square, mesh_position_local_to_clip
#import bevy_render::instance_index

struct CustomMaterial {
Expand All @@ -23,7 +23,7 @@ struct VertexOutput {
fn vertex(vertex: Vertex) -> VertexOutput {
var out: VertexOutput;
out.clip_position = mesh_position_local_to_clip(
mesh[bevy_render::instance_index::get_instance_index(vertex.instance_index)].model,
affine_to_square(mesh[bevy_render::instance_index::get_instance_index(vertex.instance_index)].model),
vec4<f32>(vertex.position, 1.0),
);
out.blend_color = vertex.blend_color;
Expand Down
4 changes: 2 additions & 2 deletions assets/shaders/instancing.wgsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#import bevy_pbr::mesh_functions mesh_position_local_to_clip
#import bevy_pbr::mesh_functions affine_to_square, mesh_position_local_to_clip
#import bevy_pbr::mesh_bindings mesh

struct Vertex {
Expand All @@ -24,7 +24,7 @@ fn vertex(vertex: Vertex) -> VertexOutput {
// This index could be passed in via another uniform instead but it's
// unnecessary for the example.
out.clip_position = mesh_position_local_to_clip(
mesh[0].model,
affine_to_square(mesh[0].model),
vec4<f32>(position, 1.0)
);
out.color = vertex.i_color;
Expand Down
6 changes: 3 additions & 3 deletions crates/bevy_pbr/src/light.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2025,7 +2025,7 @@ pub fn check_light_mesh_visibility(
view_frusta.iter().zip(view_visible_entities)
{
// Disable near-plane culling, as a shadow caster could lie before the near plane.
if !frustum.intersects_obb(aabb, &transform.compute_matrix(), false, true) {
if !frustum.intersects_obb(aabb, &transform.affine(), false, true) {
continue;
}

Expand Down Expand Up @@ -2098,7 +2098,7 @@ pub fn check_light_mesh_visibility(

// If we have an aabb and transform, do frustum culling
if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) {
let model_to_world = transform.compute_matrix();
let model_to_world = transform.affine();
// Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light
if !light_sphere.intersects_obb(aabb, &model_to_world) {
continue;
Expand Down Expand Up @@ -2162,7 +2162,7 @@ pub fn check_light_mesh_visibility(

// If we have an aabb and transform, do frustum culling
if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) {
let model_to_world = transform.compute_matrix();
let model_to_world = transform.affine();
// Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light
if !light_sphere.intersects_obb(aabb, &model_to_world) {
continue;
Expand Down
8 changes: 4 additions & 4 deletions crates/bevy_pbr/src/material.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshPipeline, MeshPipelineKey,
MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems,
MeshTransforms, MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems,
ScreenSpaceAmbientOcclusionSettings, SetMeshBindGroup, SetMeshViewBindGroup, Shadow,
};
use bevy_app::{App, Plugin};
Expand Down Expand Up @@ -382,7 +382,7 @@ pub fn queue_material_meshes<M: Material>(
material_meshes: Query<(
&Handle<M>,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)>,
images: Res<RenderAssets<Image>>,
Expand Down Expand Up @@ -468,7 +468,7 @@ pub fn queue_material_meshes<M: Material>(

let rangefinder = view.rangefinder3d();
for visible_entity in &visible_entities.entities {
if let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) =
if let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) =
material_meshes.get(*visible_entity)
{
if let (Some(mesh), Some(material)) = (
Expand Down Expand Up @@ -516,7 +516,7 @@ pub fn queue_material_meshes<M: Material>(
}
};

let distance = rangefinder.distance(&mesh_uniform.transform)
let distance = rangefinder.distance_affine(&mesh_transforms.transform)
+ material.properties.depth_bias;
match material.properties.alpha_mode {
AlphaMode::Opaque => {
Expand Down
18 changes: 9 additions & 9 deletions crates/bevy_pbr/src/prepass/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use bevy_ecs::{
SystemParamItem,
},
};
use bevy_math::Mat4;
use bevy_math::{Affine3A, Mat4};
use bevy_reflect::TypeUuid;
use bevy_render::{
globals::{GlobalsBuffer, GlobalsUniform},
Expand Down Expand Up @@ -46,8 +46,8 @@ use bevy_utils::tracing::error;

use crate::{
prepare_lights, setup_morph_and_skinning_defs, AlphaMode, DrawMesh, Material, MaterialPipeline,
MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshUniform, RenderMaterials,
SetMaterialBindGroup, SetMeshBindGroup,
MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshTransforms, MeshUniform,
RenderMaterials, SetMaterialBindGroup, SetMeshBindGroup,
};

use std::{hash::Hash, marker::PhantomData};
Expand Down Expand Up @@ -203,7 +203,7 @@ pub fn update_previous_view_projections(
}

#[derive(Component)]
pub struct PreviousGlobalTransform(pub Mat4);
pub struct PreviousGlobalTransform(pub Affine3A);

pub fn update_mesh_previous_global_transforms(
mut commands: Commands,
Expand All @@ -216,7 +216,7 @@ pub fn update_mesh_previous_global_transforms(
for (entity, transform) in &meshes {
commands
.entity(entity)
.insert(PreviousGlobalTransform(transform.compute_matrix()));
.insert(PreviousGlobalTransform(transform.affine()));
}
}
}
Expand Down Expand Up @@ -762,7 +762,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
material_meshes: Query<(
&Handle<M>,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)>,
mut views: Query<(
Expand Down Expand Up @@ -809,7 +809,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
let rangefinder = view.rangefinder3d();

for visible_entity in &visible_entities.entities {
let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) = material_meshes.get(*visible_entity) else {
let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) = material_meshes.get(*visible_entity) else {
continue;
};

Expand Down Expand Up @@ -852,8 +852,8 @@ pub fn queue_prepass_material_meshes<M: Material>(
}
};

let distance =
rangefinder.distance(&mesh_uniform.transform) + material.properties.depth_bias;
let distance = rangefinder.distance_affine(&mesh_transforms.transform)
+ material.properties.depth_bias;
match alpha_mode {
AlphaMode::Opaque => {
opaque_phase.add(Opaque3dPrepass {
Expand Down
16 changes: 11 additions & 5 deletions crates/bevy_pbr/src/prepass/prepass.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#import bevy_pbr::skinning
#import bevy_pbr::morph
#import bevy_pbr::mesh_bindings mesh
#import bevy_render::instance_index
#import bevy_render::instance_index get_instance_index

// Most of these attributes are not used in the default prepass fragment shader, but they are still needed so we can
// pass them to custom prepass shaders like pbr_prepass.wgsl.
Expand Down Expand Up @@ -92,7 +92,11 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
#else // SKINNED
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
var model = mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].model;
var model = bevy_pbr::mesh_functions::affine_to_square(
mesh[get_instance_index(
vertex_no_morph.instance_index
)].model
);
#endif // SKINNED

out.clip_position = bevy_pbr::mesh_functions::mesh_position_local_to_clip(model, vec4(vertex.position, 1.0));
Expand All @@ -113,7 +117,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
vertex.normal,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif // SKINNED

Expand All @@ -123,7 +127,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
vertex.tangent,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif // VERTEX_TANGENTS
#endif // NORMAL_PREPASS
Expand All @@ -133,7 +137,9 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
out.previous_world_position = bevy_pbr::mesh_functions::mesh_position_local_to_world(
mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].previous_model,
bevy_pbr::mesh_functions::affine_to_square(
mesh[get_instance_index(vertex_no_morph.instance_index)].previous_model
),
vec4<f32>(vertex.position, 1.0)
);
#endif // MOTION_VECTOR_PREPASS
Expand Down
113 changes: 98 additions & 15 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@ use bevy_ecs::{
query::ROQueryItem,
system::{lifetimeless::*, SystemParamItem, SystemState},
};
use bevy_math::{Mat3A, Mat4, Vec2};
use bevy_math::{Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4};
use bevy_reflect::TypeUuid;
use bevy_render::{
globals::{GlobalsBuffer, GlobalsUniform},
gpu_component_array_buffer::GpuComponentArrayBufferPlugin,
mesh::{
skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
GpuBufferInfo, InnerMeshVertexBufferLayout, Mesh, MeshVertexBufferLayout,
Expand Down Expand Up @@ -115,8 +114,6 @@ impl Plugin for MeshRenderPlugin {
load_internal_asset!(app, SKINNING_HANDLE, "skinning.wgsl", Shader::from_wgsl);
load_internal_asset!(app, MORPH_HANDLE, "morph.wgsl", Shader::from_wgsl);

app.add_plugins(GpuComponentArrayBufferPlugin::<MeshUniform>::default());

if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.init_resource::<SkinnedMeshUniform>()
Expand All @@ -129,6 +126,7 @@ impl Plugin for MeshRenderPlugin {
.add_systems(
Render,
(
prepare_mesh_uniforms.in_set(RenderSet::Prepare),
prepare_skinned_meshes.in_set(RenderSet::Prepare),
prepare_morphs.in_set(RenderSet::Prepare),
queue_mesh_bind_group.in_set(RenderSet::Queue),
Expand All @@ -151,7 +149,11 @@ impl Plugin for MeshRenderPlugin {
));
}

render_app.init_resource::<MeshPipeline>();
render_app
.insert_resource(GpuArrayBuffer::<MeshUniform>::new(
render_app.world.resource::<RenderDevice>(),
))
.init_resource::<MeshPipeline>();
}

// Load the mesh_bindings shader module here as it depends on runtime information about
Expand All @@ -166,14 +168,73 @@ impl Plugin for MeshRenderPlugin {
}
}

#[derive(Component, ShaderType, Clone)]
#[derive(Component)]
pub struct MeshTransforms {
pub transform: Affine3A,
pub previous_transform: Affine3A,
pub flags: u32,
}

#[derive(ShaderType, Clone)]
pub struct MeshUniform {
pub transform: Mat4,
pub previous_transform: Mat4,
pub inverse_transpose_model: Mat4,
// Affine 4x3 matrices transposed to 3x4
pub transform: [Vec4; 3],
pub previous_transform: [Vec4; 3],
// 3x3 matrix packed in mat2x4 and f32 as:
// [0].xyz, [1].x,
// [1].yz, [2].xy
// [2].z
pub inverse_transpose_model_a: [Vec4; 2],
pub inverse_transpose_model_b: f32,
pub flags: u32,
}

impl From<&MeshTransforms> for MeshUniform {
fn from(mesh_transforms: &MeshTransforms) -> Self {
let transpose_model_3x3 = mesh_transforms.transform.matrix3.transpose();
let transpose_previous_model_3x3 = mesh_transforms.previous_transform.matrix3.transpose();
let inverse_transpose_model_3x3 = mesh_transforms.transform.inverse().matrix3.transpose();
Self {
transform: [
transpose_model_3x3
.x_axis
.extend(mesh_transforms.transform.translation.x),
transpose_model_3x3
.y_axis
.extend(mesh_transforms.transform.translation.y),
transpose_model_3x3
.z_axis
.extend(mesh_transforms.transform.translation.z),
],
previous_transform: [
transpose_previous_model_3x3
.x_axis
.extend(mesh_transforms.previous_transform.translation.x),
transpose_previous_model_3x3
.y_axis
.extend(mesh_transforms.previous_transform.translation.y),
transpose_previous_model_3x3
.z_axis
.extend(mesh_transforms.previous_transform.translation.z),
],
inverse_transpose_model_a: [
(
inverse_transpose_model_3x3.x_axis,
inverse_transpose_model_3x3.y_axis.x,
)
.into(),
(
inverse_transpose_model_3x3.y_axis.yz(),
inverse_transpose_model_3x3.z_axis.xy(),
)
.into(),
],
inverse_transpose_model_b: inverse_transpose_model_3x3.z_axis.z,
flags: mesh_transforms.flags,
}
}
}

// NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_types.wgsl!
bitflags::bitflags! {
#[repr(transparent)]
Expand Down Expand Up @@ -210,26 +271,25 @@ pub fn extract_meshes(
for (entity, _, transform, previous_transform, handle, not_receiver, not_caster) in
visible_meshes
{
let transform = transform.compute_matrix();
let transform = transform.affine();
let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform);
let mut flags = if not_receiver.is_some() {
MeshFlags::empty()
} else {
MeshFlags::SHADOW_RECEIVER
};
if Mat3A::from_mat4(transform).determinant().is_sign_positive() {
if transform.matrix3.determinant().is_sign_positive() {
flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3;
}
let uniform = MeshUniform {
let transforms = MeshTransforms {
flags: flags.bits(),
transform,
previous_transform,
inverse_transpose_model: transform.inverse().transpose(),
};
if not_caster.is_some() {
not_caster_commands.push((entity, (handle.clone_weak(), uniform, NotShadowCaster)));
not_caster_commands.push((entity, (handle.clone_weak(), transforms, NotShadowCaster)));
} else {
caster_commands.push((entity, (handle.clone_weak(), uniform)));
caster_commands.push((entity, (handle.clone_weak(), transforms)));
}
}
*prev_caster_commands_len = caster_commands.len();
Expand Down Expand Up @@ -317,6 +377,29 @@ pub fn extract_skinned_meshes(
commands.insert_or_spawn_batch(values);
}

fn prepare_mesh_uniforms(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut gpu_array_buffer: ResMut<GpuArrayBuffer<MeshUniform>>,
components: Query<(Entity, &MeshTransforms)>,
) {
gpu_array_buffer.clear();

let entities = components
.iter()
.map(|(entity, mesh_transforms)| {
(
entity,
gpu_array_buffer.push(MeshUniform::from(mesh_transforms)),
)
})
.collect::<Vec<_>>();
commands.insert_or_spawn_batch(entities);

gpu_array_buffer.write_buffer(&render_device, &render_queue);
}

#[derive(Resource, Clone)]
pub struct MeshPipeline {
pub view_layout: BindGroupLayout,
Expand Down
Loading

0 comments on commit b7845f4

Please sign in to comment.