From 644d9ef1597e29c02ef6fcb06e237b7d733d969d Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Thu, 6 Feb 2025 16:42:02 -0800 Subject: [PATCH 01/11] Fix motion vector computation after #17688. PR #17688 broke motion vector computation, and therefore motion blur, because it enabled retention of `MeshInputUniform`s, and `MeshInputUniform`s contain the indices of the previous frame's transform and the previous frame's skinned mesh joint matrices. On frame N, if a `MeshInputUniform` is retained on GPU from the previous frame, the `previous_input_index` and `previous_skin_index` would refer to the indices for frame N - 2, not the index for frame N - 1. This patch fixes the problems. It solves these issues in two different ways, one for transforms and one for skins: 1. To fix transforms, this patch supplies the *frame index* to the shader as part of the view uniforms, and specifies which frame index each mesh's previous transform refers to. So, in the situation described above, the frame index would be N, the previous frame index would be N - 1, and the `previous_input_frame_number` would be N - 2. The shader can now detect this situation and infer that the mesh has been retained, and can therefore conclude that the mesh's transform hasn't changed. 2. To fix skins, this patch replaces the explicit `previous_skin_index` with an invariant that the index of the joints for the current frame and the index of the joints for the previous frame are the same. This means that the `MeshInputUniform` never has to be updated even if the skin is animated. The downside is that we have to copy joint matrices from the previous frame's buffer to the current frame's buffer in `extract_skins`. The rationale behind (2) is that we currently have no mechanism to detect when joints that affect a skin have been updated, short of comparing all the transforms and setting a flag for `extract_meshes_for_gpu_building` to consume, which would regress performance as we want `extract_skins` and `extract_meshes_for_gpu_building` to be able to run in parallel. To test this change, use `cargo run --example motion_blur`. --- crates/bevy_pbr/src/render/gpu_preprocess.rs | 23 +-- crates/bevy_pbr/src/render/mesh.rs | 47 +++--- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 9 +- crates/bevy_pbr/src/render/mesh_types.wgsl | 2 +- crates/bevy_pbr/src/render/skin.rs | 140 +++++++++++------- crates/bevy_pbr/src/render/skinning.wgsl | 2 +- .../mesh_preprocess_types.wgsl | 2 +- .../src/render_resource/buffer_vec.rs | 5 + crates/bevy_render/src/view/mod.rs | 17 ++- crates/bevy_render/src/view/view.wgsl | 1 + 10 files changed, 147 insertions(+), 101 deletions(-) diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs index 71ccfa09582b7..e8205114b7fc7 100644 --- a/crates/bevy_pbr/src/render/gpu_preprocess.rs +++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs @@ -626,12 +626,10 @@ impl Node for EarlyGpuPreprocessNode { continue; }; - // If we're drawing indirectly, make sure the mesh preprocessing - // shader has access to the view info it needs to do culling. - let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![]; - if !no_indirect_drawing { - dynamic_offsets.push(view_uniform_offset.offset); - } + // Make sure the mesh preprocessing shader has access to the + // view info it needs to do culling and motion vector + // computation. + let dynamic_offsets = [view_uniform_offset.offset]; // Are we drawing directly or indirectly? match *phase_bind_groups { @@ -1317,6 +1315,11 @@ fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntrie DynamicBindGroupLayoutEntries::new_with_indices( ShaderStages::COMPUTE, ( + // `view` + ( + 0, + uniform_buffer::(/* has_dynamic_offset= */ true), + ), // `current_input` (3, storage_buffer_read_only::(false)), // `previous_input` @@ -1361,11 +1364,6 @@ fn gpu_culling_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { 8, storage_buffer_read_only::(/* has_dynamic_offset= */ false), ), - // `view` - ( - 0, - uniform_buffer::(/* has_dynamic_offset= */ true), - ), )) } @@ -1802,11 +1800,14 @@ impl<'a> PreprocessBindGroupBuilder<'a> { ) .ok(); + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + Some(PhasePreprocessBindGroups::Direct( self.render_device.create_bind_group( "preprocess_direct_bind_group", &self.pipelines.direct_preprocess.bind_group_layout, &BindGroupEntries::with_indices(( + (0, view_uniforms_binding), (3, self.current_input_buffer.as_entire_binding()), (4, self.previous_input_buffer.as_entire_binding()), ( diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index fc24b5e116cc5..742d39a0e4811 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -478,13 +478,13 @@ pub struct MeshUniform { pub first_vertex_index: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, - /// The previous skin index, or `u32::MAX` if there's no previous skin. - pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. /// High 16 bits: index of the lightmap in the binding array. pub material_and_lightmap_bind_group_slot: u32, + /// Padding. + pub pad: u32, } /// Information that has to be transferred from CPU to GPU in order to produce @@ -515,6 +515,11 @@ pub struct MeshInputUniform { /// /// This is used for TAA. If not present, this will be `u32::MAX`. pub previous_input_index: u32, + /// The frame number of the previous frame. + /// + /// The shader compared this field to the current frame number in order to + /// determine whether [`Self::previous_input_index`] is valid. + pub previous_input_frame_number: u32, /// The index of this mesh's first vertex in the vertex buffer. /// /// Multiple meshes can be packed into a single vertex buffer (see @@ -534,8 +539,6 @@ pub struct MeshInputUniform { pub index_count: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, - /// The previous skin index, or `u32::MAX` if there's no previous skin. - pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. @@ -577,7 +580,6 @@ impl MeshUniform { material_bind_group_slot: MaterialBindGroupSlot, maybe_lightmap: Option<(LightmapSlotIndex, Rect)>, current_skin_index: Option, - previous_skin_index: Option, ) -> Self { let (local_from_world_transpose_a, local_from_world_transpose_b) = mesh_transforms.world_from_local.inverse_transpose_3x3(); @@ -595,9 +597,9 @@ impl MeshUniform { flags: mesh_transforms.flags, first_vertex_index, current_skin_index: current_skin_index.unwrap_or(u32::MAX), - previous_skin_index: previous_skin_index.unwrap_or(u32::MAX), material_and_lightmap_bind_group_slot: u32::from(material_bind_group_slot) | ((lightmap_bind_group_slot as u32) << 16), + pad: 0, } } } @@ -1115,11 +1117,7 @@ impl RenderMeshInstanceGpuBuilder { None => (false, 0, 0), }; - let current_skin_index = match skin_indices.current.get(&entity) { - Some(skin_indices) => skin_indices.index(), - None => u32::MAX, - }; - let previous_skin_index = match skin_indices.prev.get(&entity) { + let current_skin_index = match skin_indices.get(&entity) { Some(skin_indices) => skin_indices.index(), None => u32::MAX, }; @@ -1148,6 +1146,7 @@ impl RenderMeshInstanceGpuBuilder { lightmap_uv_rect: self.lightmap_uv_rect, flags: self.mesh_flags.bits(), previous_input_index: u32::MAX, + previous_input_frame_number: u32::MAX, first_vertex_index, first_index_index, index_count: if mesh_is_indexed { @@ -1156,7 +1155,6 @@ impl RenderMeshInstanceGpuBuilder { vertex_count }, current_skin_index, - previous_skin_index, material_and_lightmap_bind_group_slot: u32::from( self.shared.material_bindings_index.slot, ) | ((lightmap_slot as u32) << 16), @@ -1557,7 +1555,7 @@ fn set_mesh_motion_vector_flags( skin_indices: Res, morph_indices: Res, ) { - for &entity in skin_indices.prev.keys() { + for &entity in skin_indices.keys() { render_mesh_instances .insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN); } @@ -1827,9 +1825,7 @@ impl GetBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); - let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); - + let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); let material_bind_group_index = mesh_instance.material_bindings_index; Some(( @@ -1839,7 +1835,6 @@ impl GetBatchData for MeshPipeline { material_bind_group_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, - previous_skin_index, ), mesh_instance.should_batch().then_some(( material_bind_group_index.group, @@ -1897,8 +1892,7 @@ impl GetFullBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); - let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); + let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); Some(MeshUniform::new( &mesh_instance.transforms, @@ -1906,7 +1900,6 @@ impl GetFullBatchData for MeshPipeline { mesh_instance.material_bindings_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, - previous_skin_index, )) } @@ -2793,8 +2786,7 @@ impl RenderCommand

for SetMeshBindGroup { return RenderCommandResult::Success; }; - let current_skin_index = skin_indices.current.get(entity); - let prev_skin_index = skin_indices.prev.get(entity); + let current_skin_index = skin_indices.get(entity); let current_morph_index = morph_indices.current.get(entity); let prev_morph_index = morph_indices.prev.get(entity); @@ -2841,14 +2833,11 @@ impl RenderCommand

for SetMeshBindGroup { if has_motion_vector_prepass { // Attach the previous skin index for motion vector computation. If // there isn't one, just use zero as the shader will ignore it. - if current_skin_index.is_some() && skin::skins_use_uniform_buffers(&render_device) { - match prev_skin_index { - Some(prev_skin_index) => { - dynamic_offsets[offset_count] = prev_skin_index.byte_offset; - } - None => dynamic_offsets[offset_count] = 0, + if skin::skins_use_uniform_buffers(&render_device) { + if let Some(current_skin_index) = current_skin_index { + dynamic_offsets[offset_count] = current_skin_index.byte_offset; + offset_count += 1; } - offset_count += 1; } // Attach the previous morph index for motion vector computation. If diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 315dd13d3fdd6..45544ad61becf 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -193,13 +193,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { #endif // Look up the previous model matrix. + let previous_input_frame_number = current_input[input_index].previous_input_frame_number; let previous_input_index = current_input[input_index].previous_input_index; + let previous_input_is_valid = previous_input_frame_number + 1 == view.frame_number; var previous_world_from_local_affine_transpose: mat3x4; - if (previous_input_index == 0xffffffff) { - previous_world_from_local_affine_transpose = world_from_local_affine_transpose; - } else { + if (previous_input_is_valid && previous_input_index != 0xffffffffu) { previous_world_from_local_affine_transpose = previous_input[previous_input_index].world_from_local; + } else { + previous_world_from_local_affine_transpose = world_from_local_affine_transpose; } let previous_world_from_local = maths::affine3_to_square(previous_world_from_local_affine_transpose); @@ -342,7 +344,6 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; - output[mesh_output_index].previous_skin_index = current_input[input_index].previous_skin_index; output[mesh_output_index].material_and_lightmap_bind_group_slot = current_input[input_index].material_and_lightmap_bind_group_slot; } diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index f0258770c6da0..580c220d7af35 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -18,10 +18,10 @@ struct Mesh { // The index of the mesh's first vertex in the vertex buffer. first_vertex_index: u32, current_skin_index: u32, - previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, + pad: u32, }; #ifdef SKINNED diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index c248821ccafd3..40fc3c6bd0fda 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -2,9 +2,10 @@ use core::mem::{self, size_of}; use std::sync::OnceLock; use bevy_asset::Assets; +use bevy_derive::{Deref, DerefMut}; use bevy_ecs::prelude::*; use bevy_math::Mat4; -use bevy_render::sync_world::MainEntityHashMap; +use bevy_render::sync_world::{MainEntity, MainEntityHashMap}; use bevy_render::{ batching::NoAutomaticBatching, mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, @@ -25,7 +26,7 @@ use bevy_transform::prelude::GlobalTransform; pub const MAX_JOINTS: usize = 256; /// The location of the first joint matrix in the skin uniform buffer. -#[derive(Component)] +#[derive(Component, Clone, Copy)] pub struct SkinIndex { /// The byte offset of the first joint matrix. pub byte_offset: u32, @@ -52,16 +53,8 @@ impl SkinIndex { /// /// We store both the current frame's joint matrices and the previous frame's /// joint matrices for the purposes of motion vector calculation. -#[derive(Default, Resource)] -pub struct SkinIndices { - /// Maps each skinned mesh to the applicable offset within - /// [`SkinUniforms::current_buffer`]. - pub current: MainEntityHashMap, - - /// Maps each skinned mesh to the applicable offset within - /// [`SkinUniforms::prev_buffer`]. - pub prev: MainEntityHashMap, -} +#[derive(Default, Resource, Deref, DerefMut)] +pub struct SkinIndices(pub MainEntityHashMap); /// The GPU buffers containing joint matrices for all skinned meshes. /// @@ -109,18 +102,21 @@ pub fn prepare_skins( render_queue: Res, mut uniform: ResMut, ) { - if uniform.current_buffer.is_empty() { - return; + if !uniform.current_buffer.is_empty() { + let len = uniform.current_buffer.len(); + uniform.current_buffer.reserve(len, &render_device); + uniform + .current_buffer + .write_buffer(&render_device, &render_queue); } - let len = uniform.current_buffer.len(); - uniform.current_buffer.reserve(len, &render_device); - uniform - .current_buffer - .write_buffer(&render_device, &render_queue); - - // We don't need to write `uniform.prev_buffer` because we already wrote it - // last frame, and the data should still be on the GPU. + if !uniform.prev_buffer.is_empty() { + let len = uniform.prev_buffer.len(); + uniform.prev_buffer.reserve(len, &render_device); + uniform + .prev_buffer + .write_buffer(&render_device, &render_queue); + } } // Notes on implementation: @@ -150,24 +146,35 @@ pub fn prepare_skins( // which normally only support fixed size arrays. You just have to make sure // in the shader that you only read the values that are valid for that binding. pub fn extract_skins( - skin_indices: ResMut, - uniform: ResMut, + mut skin_indices: ResMut, + mut skin_uniforms: ResMut, query: Extract>, inverse_bindposes: Extract>>, joints: Extract>, render_device: Res, ) { + // We're going to make a new buffer, so figure out which we need. let skins_use_uniform_buffers = skins_use_uniform_buffers(&render_device); + let buffer_usages = if skins_use_uniform_buffers { + BufferUsages::UNIFORM + } else { + BufferUsages::STORAGE + }; - // Borrow check workaround. - let (skin_indices, uniform) = (skin_indices.into_inner(), uniform.into_inner()); + // Grab the previous frame's buffer. We're going to copy matrices from it to + // the `prev_buffer`. + let old_skin_indices = mem::take(&mut **skin_indices); + let old_buffer = mem::replace( + &mut skin_uniforms.current_buffer, + RawBufferVec::new(buffer_usages), + ); - // Swap buffers. We need to keep the previous frame's buffer around for the - // purposes of motion vector computation. - mem::swap(&mut skin_indices.current, &mut skin_indices.prev); - mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); - skin_indices.current.clear(); - uniform.current_buffer.clear(); + let skin_uniforms = skin_uniforms.into_inner(); + let current_buffer = &mut skin_uniforms.current_buffer; + let prev_buffer = &mut skin_uniforms.prev_buffer; + + // Clear out the previous buffer so we can push onto it. + prev_buffer.clear(); let mut last_start = 0; @@ -176,44 +183,71 @@ pub fn extract_skins( if !view_visibility.get() { continue; } - let buffer = &mut uniform.current_buffer; + let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else { continue; }; - let start = buffer.len(); - - let target = start + skin.joints.len().min(MAX_JOINTS); - buffer.extend( - joints - .iter_many(&skin.joints) - .zip(inverse_bindposes.iter()) - .take(MAX_JOINTS) - .map(|(joint, bindpose)| joint.affine() * *bindpose), - ); + + let main_entity = MainEntity::from(entity); + + // Determine the boundaries of this skin in the joints buffer. + let start = current_buffer.len(); + let joint_count = skin.joints.len().min(MAX_JOINTS); + let end = start + joint_count; + + let first_old_skin_index = old_skin_indices.get(&main_entity).copied(); + + // Reserve space for the joints so we don't have to allocate. + current_buffer.reserve_internal(end); + prev_buffer.reserve_internal(end); + + for (joint_index, joint) in joints.iter_many(&skin.joints).take(joint_count).enumerate() { + // Push the joint matrix for the current frame. + let joint_matrix = match inverse_bindposes.get(joint_index) { + Some(inverse_bindpose) => joint.affine() * *inverse_bindpose, + None => joint.compute_matrix(), + }; + current_buffer.push(joint_matrix); + + // Grab the joint matrix from the previous frame, if there is one, + // and copy it to the previous frame's joint matrix buffer. + let prev_joint_matrix = first_old_skin_index + .and_then(|first_old_skin_index| { + old_buffer.get(first_old_skin_index.index() + (joint_index as u32)) + }) + .copied() + .unwrap_or(joint_matrix); + prev_buffer.push(prev_joint_matrix); + } + // iter_many will skip any failed fetches. This will cause it to assign the wrong bones, // so just bail by truncating to the start. - if buffer.len() != target { - buffer.truncate(start); + if current_buffer.len() != end { + current_buffer.truncate(start); + prev_buffer.truncate(start); continue; } + last_start = last_start.max(start); + debug_assert_eq!(current_buffer.len(), prev_buffer.len()); + // Pad to 256 byte alignment if we're using a uniform buffer. // There's no need to do this if we're using storage buffers, though. if skins_use_uniform_buffers { - while buffer.len() % 4 != 0 { - buffer.push(Mat4::ZERO); + while current_buffer.len() % 4 != 0 { + current_buffer.push(Mat4::ZERO); + prev_buffer.push(Mat4::ZERO); } } - skin_indices - .current - .insert(entity.into(), SkinIndex::new(start)); + skin_indices.insert(entity.into(), SkinIndex::new(start)); } - // Pad out the buffer to ensure that there's enough space for bindings - while uniform.current_buffer.len() - last_start < MAX_JOINTS { - uniform.current_buffer.push(Mat4::ZERO); + // Pad out the buffers to ensure that there's enough space for bindings + while current_buffer.len() - last_start < MAX_JOINTS { + current_buffer.push(Mat4::ZERO); + prev_buffer.push(Mat4::ZERO); } } diff --git a/crates/bevy_pbr/src/render/skinning.wgsl b/crates/bevy_pbr/src/render/skinning.wgsl index 92e977aeb1b92..96fba57e1b3f6 100644 --- a/crates/bevy_pbr/src/render/skinning.wgsl +++ b/crates/bevy_pbr/src/render/skinning.wgsl @@ -57,7 +57,7 @@ fn skin_prev_model( + weights.z * prev_joint_matrices.data[indexes.z] + weights.w * prev_joint_matrices.data[indexes.w]; #else // SKINS_USE_UNIFORM_BUFFERS - let skin_index = mesh[instance_index].previous_skin_index; + let skin_index = mesh[instance_index].current_skin_index; return weights.x * prev_joint_matrices[skin_index + indexes.x] + weights.y * prev_joint_matrices[skin_index + indexes.y] + weights.z * prev_joint_matrices[skin_index + indexes.z] diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl index 7f4dd71f610f0..b7631cf1a0076 100644 --- a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl +++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -11,11 +11,11 @@ struct MeshInput { // Various flags. flags: u32, previous_input_index: u32, + previous_input_frame_number: u32, first_vertex_index: u32, first_index_index: u32, index_count: u32, current_skin_index: u32, - previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, diff --git a/crates/bevy_render/src/render_resource/buffer_vec.rs b/crates/bevy_render/src/render_resource/buffer_vec.rs index 0feaa8487885b..32d06ab397578 100644 --- a/crates/bevy_render/src/render_resource/buffer_vec.rs +++ b/crates/bevy_render/src/render_resource/buffer_vec.rs @@ -103,6 +103,11 @@ impl RawBufferVec { self.values.append(&mut other.values); } + /// Returns the value at the given index. + pub fn get(&self, index: u32) -> Option<&T> { + self.values.get(index as usize) + } + /// Sets the value at the given index. /// /// The index must be less than [`RawBufferVec::len`]. diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs index b0fab01d01b4e..0eb86bc6a5eea 100644 --- a/crates/bevy_render/src/view/mod.rs +++ b/crates/bevy_render/src/view/mod.rs @@ -120,7 +120,7 @@ impl Plugin for ViewPlugin { )); if let Some(render_app) = app.get_sub_app_mut(RenderApp) { - render_app.add_systems( + render_app.init_resource::().add_systems( Render, ( // `TextureView`s need to be dropped before reconfiguring window surfaces. @@ -137,6 +137,7 @@ impl Plugin for ViewPlugin { .after(crate::render_asset::prepare_assets::) .ambiguous_with(crate::camera::sort_cameras), // doesn't use `sorted_camera_index_for_target` prepare_view_uniforms.in_set(RenderSet::PrepareResources), + advance_frame.in_set(RenderSet::Cleanup), ), ); } @@ -568,6 +569,7 @@ pub struct ViewUniform { pub frustum: [Vec4; 6], pub color_grading: ColorGradingUniform, pub mip_bias: f32, + pub frame_number: u32, } #[derive(Resource)] @@ -876,6 +878,12 @@ impl ViewDepthTexture { } } +/// A number that starts at 0 and increments by 1 every frame. +/// +/// This is useful for determining which frame cached data belongs to. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct FrameNumber(pub u32); + pub fn prepare_view_uniforms( mut commands: Commands, render_device: Res, @@ -889,6 +897,7 @@ pub fn prepare_view_uniforms( Option<&TemporalJitter>, Option<&MipBias>, )>, + frame_number: Res, ) { let view_iter = views.iter(); let view_count = view_iter.len(); @@ -942,6 +951,7 @@ pub fn prepare_view_uniforms( frustum, color_grading: extracted_view.color_grading.clone().into(), mip_bias: mip_bias.unwrap_or(&MipBias(0.0)).0, + frame_number: **frame_number, }), }; @@ -1107,3 +1117,8 @@ pub fn prepare_view_targets( }); } } + +/// Updates the [`FrameNumber`] at frame end. +pub fn advance_frame(mut frame_number: ResMut) { + **frame_number += 1; +} diff --git a/crates/bevy_render/src/view/view.wgsl b/crates/bevy_render/src/view/view.wgsl index ed08599758a07..2c901c3280de3 100644 --- a/crates/bevy_render/src/view/view.wgsl +++ b/crates/bevy_render/src/view/view.wgsl @@ -60,4 +60,5 @@ struct View { frustum: array, 6>, color_grading: ColorGrading, mip_bias: f32, + frame_number: u32, }; From 636e58b62413b95b7fa5cef944e84704911be157 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 7 Feb 2025 15:22:42 -0800 Subject: [PATCH 02/11] Fix meshlets --- crates/bevy_pbr/src/meshlet/instance_manager.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/bevy_pbr/src/meshlet/instance_manager.rs b/crates/bevy_pbr/src/meshlet/instance_manager.rs index 26f6432a1fdf7..b7370e9928c44 100644 --- a/crates/bevy_pbr/src/meshlet/instance_manager.rs +++ b/crates/bevy_pbr/src/meshlet/instance_manager.rs @@ -118,14 +118,8 @@ impl InstanceManager { .cloned() .unwrap_or_default(); - let mesh_uniform = MeshUniform::new( - &transforms, - 0, - mesh_material_binding_id.slot, - None, - None, - None, - ); + let mesh_uniform = + MeshUniform::new(&transforms, 0, mesh_material_binding_id.slot, None, None); // Append instance data self.instances.push(( From fa60b0a98a4d4a080441a3b74c00c4a7c6706626 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 7 Feb 2025 17:56:13 -0800 Subject: [PATCH 03/11] Remove `FrameNumber` in favor of `FrameCount` --- crates/bevy_pbr/src/render/mesh.rs | 4 ++-- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 4 ++-- .../mesh_preprocess_types.wgsl | 2 +- crates/bevy_render/src/view/mod.rs | 21 +++++-------------- crates/bevy_render/src/view/view.wgsl | 2 +- 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 742d39a0e4811..e0f3d262513c3 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -519,7 +519,7 @@ pub struct MeshInputUniform { /// /// The shader compared this field to the current frame number in order to /// determine whether [`Self::previous_input_index`] is valid. - pub previous_input_frame_number: u32, + pub previous_input_frame_count: u32, /// The index of this mesh's first vertex in the vertex buffer. /// /// Multiple meshes can be packed into a single vertex buffer (see @@ -1146,7 +1146,7 @@ impl RenderMeshInstanceGpuBuilder { lightmap_uv_rect: self.lightmap_uv_rect, flags: self.mesh_flags.bits(), previous_input_index: u32::MAX, - previous_input_frame_number: u32::MAX, + previous_input_frame_count: u32::MAX, first_vertex_index, first_index_index, index_count: if mesh_is_indexed { diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 45544ad61becf..1c40c2c37e793 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -193,9 +193,9 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { #endif // Look up the previous model matrix. - let previous_input_frame_number = current_input[input_index].previous_input_frame_number; + let previous_input_frame_count = current_input[input_index].previous_input_frame_count; let previous_input_index = current_input[input_index].previous_input_index; - let previous_input_is_valid = previous_input_frame_number + 1 == view.frame_number; + let previous_input_is_valid = previous_input_frame_count + 1 == view.frame_count; var previous_world_from_local_affine_transpose: mat3x4; if (previous_input_is_valid && previous_input_index != 0xffffffffu) { previous_world_from_local_affine_transpose = diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl index b7631cf1a0076..e17d45897b32a 100644 --- a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl +++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -11,7 +11,7 @@ struct MeshInput { // Various flags. flags: u32, previous_input_index: u32, - previous_input_frame_number: u32, + previous_input_frame_count: u32, first_vertex_index: u32, first_index_index: u32, index_count: u32, diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs index 0eb86bc6a5eea..ba0f8cca62ced 100644 --- a/crates/bevy_render/src/view/mod.rs +++ b/crates/bevy_render/src/view/mod.rs @@ -2,6 +2,7 @@ pub mod visibility; pub mod window; use bevy_asset::{load_internal_asset, weak_handle, Handle}; +use bevy_diagnostic::FrameCount; pub use visibility::*; pub use window::*; @@ -120,7 +121,7 @@ impl Plugin for ViewPlugin { )); if let Some(render_app) = app.get_sub_app_mut(RenderApp) { - render_app.init_resource::().add_systems( + render_app.add_systems( Render, ( // `TextureView`s need to be dropped before reconfiguring window surfaces. @@ -137,7 +138,6 @@ impl Plugin for ViewPlugin { .after(crate::render_asset::prepare_assets::) .ambiguous_with(crate::camera::sort_cameras), // doesn't use `sorted_camera_index_for_target` prepare_view_uniforms.in_set(RenderSet::PrepareResources), - advance_frame.in_set(RenderSet::Cleanup), ), ); } @@ -569,7 +569,7 @@ pub struct ViewUniform { pub frustum: [Vec4; 6], pub color_grading: ColorGradingUniform, pub mip_bias: f32, - pub frame_number: u32, + pub frame_count: u32, } #[derive(Resource)] @@ -878,12 +878,6 @@ impl ViewDepthTexture { } } -/// A number that starts at 0 and increments by 1 every frame. -/// -/// This is useful for determining which frame cached data belongs to. -#[derive(Resource, Default, Deref, DerefMut)] -pub struct FrameNumber(pub u32); - pub fn prepare_view_uniforms( mut commands: Commands, render_device: Res, @@ -897,7 +891,7 @@ pub fn prepare_view_uniforms( Option<&TemporalJitter>, Option<&MipBias>, )>, - frame_number: Res, + frame_count: Res, ) { let view_iter = views.iter(); let view_count = view_iter.len(); @@ -951,7 +945,7 @@ pub fn prepare_view_uniforms( frustum, color_grading: extracted_view.color_grading.clone().into(), mip_bias: mip_bias.unwrap_or(&MipBias(0.0)).0, - frame_number: **frame_number, + frame_count: frame_count.0, }), }; @@ -1117,8 +1111,3 @@ pub fn prepare_view_targets( }); } } - -/// Updates the [`FrameNumber`] at frame end. -pub fn advance_frame(mut frame_number: ResMut) { - **frame_number += 1; -} diff --git a/crates/bevy_render/src/view/view.wgsl b/crates/bevy_render/src/view/view.wgsl index 2c901c3280de3..317de2eb88073 100644 --- a/crates/bevy_render/src/view/view.wgsl +++ b/crates/bevy_render/src/view/view.wgsl @@ -60,5 +60,5 @@ struct View { frustum: array, 6>, color_grading: ColorGrading, mip_bias: f32, - frame_number: u32, + frame_count: u32, }; From ea2029d9ac5b22c2819ae9b90e601645d662402d Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 11 Feb 2025 00:15:44 -0800 Subject: [PATCH 04/11] Try to fix motion blur properly. --- crates/bevy_pbr/Cargo.toml | 1 + crates/bevy_pbr/src/render/mesh.rs | 66 +++++--- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 15 +- crates/bevy_pbr/src/render/mesh_types.wgsl | 1 + crates/bevy_pbr/src/render/skin.rs | 154 ++++++++---------- crates/bevy_pbr/src/render/skinning.wgsl | 16 +- .../mesh_preprocess_types.wgsl | 4 +- 7 files changed, 139 insertions(+), 118 deletions(-) diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index bf568885e7fef..5031d7c91444f 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -37,6 +37,7 @@ bevy_asset = { path = "../bevy_asset", version = "0.16.0-dev" } bevy_color = { path = "../bevy_color", version = "0.16.0-dev" } bevy_core_pipeline = { path = "../bevy_core_pipeline", version = "0.16.0-dev" } bevy_derive = { path = "../bevy_derive", version = "0.16.0-dev" } +bevy_diagnostic = { path = "../bevy_diagnostic", version = "0.16.0-dev" } bevy_ecs = { path = "../bevy_ecs", version = "0.16.0-dev" } bevy_image = { path = "../bevy_image", version = "0.16.0-dev" } bevy_math = { path = "../bevy_math", version = "0.16.0-dev" } diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index e50f4a5918946..34d0debbb7c6b 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -1,4 +1,7 @@ -use crate::material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}; +use crate::{ + material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}, + skin::mark_meshes_as_changed_if_their_skins_changed, +}; use allocator::MeshAllocator; use bevy_asset::{load_internal_asset, AssetId, UntypedAssetId}; use bevy_core_pipeline::{ @@ -8,6 +11,7 @@ use bevy_core_pipeline::{ prepass::MotionVectorPrepass, }; use bevy_derive::{Deref, DerefMut}; +use bevy_diagnostic::FrameCount; use bevy_ecs::{ prelude::*, query::ROQueryItem, @@ -163,7 +167,13 @@ impl Plugin for MeshRenderPlugin { app.add_systems( PostUpdate, - (no_automatic_skin_batching, no_automatic_morph_batching), + ( + no_automatic_skin_batching, + no_automatic_morph_batching, + mark_meshes_as_changed_if_their_skins_changed + .ambiguous_with_all() + .after(mark_3d_meshes_as_changed_if_their_assets_changed), + ), ) .add_plugins(( BinnedRenderPhasePlugin::::default(), @@ -478,6 +488,8 @@ pub struct MeshUniform { pub first_vertex_index: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, + /// The previous skin index, or `u32::MAX` if there's no previous skin. + pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. @@ -515,11 +527,6 @@ pub struct MeshInputUniform { /// /// This is used for TAA. If not present, this will be `u32::MAX`. pub previous_input_index: u32, - /// The frame number of the previous frame. - /// - /// The shader compared this field to the current frame number in order to - /// determine whether [`Self::previous_input_index`] is valid. - pub previous_input_frame_count: u32, /// The index of this mesh's first vertex in the vertex buffer. /// /// Multiple meshes can be packed into a single vertex buffer (see @@ -539,15 +546,15 @@ pub struct MeshInputUniform { pub index_count: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, + pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. /// High 16 bits: index of the lightmap in the binding array. pub material_and_lightmap_bind_group_slot: u32, + pub timestamp: u32, /// User supplied tag to identify this mesh instance. pub tag: u32, - /// Padding. - pub pad: u32, } /// Information about each mesh instance needed to cull it on GPU. @@ -580,6 +587,7 @@ impl MeshUniform { material_bind_group_slot: MaterialBindGroupSlot, maybe_lightmap: Option<(LightmapSlotIndex, Rect)>, current_skin_index: Option, + previous_skin_index: Option, tag: Option, ) -> Self { let (local_from_world_transpose_a, local_from_world_transpose_b) = @@ -598,6 +606,7 @@ impl MeshUniform { flags: mesh_transforms.flags, first_vertex_index, current_skin_index: current_skin_index.unwrap_or(u32::MAX), + previous_skin_index: previous_skin_index.unwrap_or(u32::MAX), material_and_lightmap_bind_group_slot: u32::from(material_bind_group_slot) | ((lightmap_bind_group_slot as u32) << 16), tag: tag.unwrap_or(0), @@ -1103,6 +1112,7 @@ impl RenderMeshInstanceGpuBuilder { render_material_bindings: &RenderMaterialBindings, render_lightmaps: &RenderLightmaps, skin_indices: &SkinIndices, + timestamp: FrameCount, ) -> u32 { let (first_vertex_index, vertex_count) = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) { @@ -1121,8 +1131,11 @@ impl RenderMeshInstanceGpuBuilder { ), None => (false, 0, 0), }; - - let current_skin_index = match skin_indices.get(&entity) { + let current_skin_index = match skin_indices.current.get(&entity) { + Some(skin_indices) => skin_indices.index(), + None => u32::MAX, + }; + let previous_skin_index = match skin_indices.prev.get(&entity) { Some(skin_indices) => skin_indices.index(), None => u32::MAX, }; @@ -1151,7 +1164,7 @@ impl RenderMeshInstanceGpuBuilder { lightmap_uv_rect: self.lightmap_uv_rect, flags: self.mesh_flags.bits(), previous_input_index: u32::MAX, - previous_input_frame_count: u32::MAX, + timestamp: timestamp.0, first_vertex_index, first_index_index, index_count: if mesh_is_indexed { @@ -1160,11 +1173,11 @@ impl RenderMeshInstanceGpuBuilder { vertex_count }, current_skin_index, + previous_skin_index, material_and_lightmap_bind_group_slot: u32::from( self.shared.material_bindings_index.slot, ) | ((lightmap_slot as u32) << 16), tag: self.shared.tag, - pad: 0, }; // Did the last frame contain this entity as well? @@ -1566,7 +1579,7 @@ fn set_mesh_motion_vector_flags( skin_indices: Res, morph_indices: Res, ) { - for &entity in skin_indices.keys() { + for &entity in skin_indices.prev.keys() { render_mesh_instances .insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN); } @@ -1590,6 +1603,7 @@ pub fn collect_meshes_for_gpu_building( render_material_bindings: Res, render_lightmaps: Res, skin_indices: Res, + frame_count: Res, ) { let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = render_mesh_instances.into_inner() @@ -1629,6 +1643,7 @@ pub fn collect_meshes_for_gpu_building( &render_material_bindings, &render_lightmaps, &skin_indices, + *frame_count, ); } @@ -1656,6 +1671,7 @@ pub fn collect_meshes_for_gpu_building( &render_material_bindings, &render_lightmaps, &skin_indices, + *frame_count, ); mesh_culling_builder .update(&mut mesh_culling_data_buffer, instance_data_index as usize); @@ -1836,7 +1852,8 @@ impl GetBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); + let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); + let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); let material_bind_group_index = mesh_instance.material_bindings_index; Some(( @@ -1846,6 +1863,7 @@ impl GetBatchData for MeshPipeline { material_bind_group_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, + previous_skin_index, Some(mesh_instance.tag), ), mesh_instance.should_batch().then_some(( @@ -1904,7 +1922,8 @@ impl GetFullBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); + let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); + let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); Some(MeshUniform::new( &mesh_instance.transforms, @@ -1912,6 +1931,7 @@ impl GetFullBatchData for MeshPipeline { mesh_instance.material_bindings_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, + previous_skin_index, Some(mesh_instance.tag), )) } @@ -2799,7 +2819,8 @@ impl RenderCommand

for SetMeshBindGroup { return RenderCommandResult::Success; }; - let current_skin_index = skin_indices.get(entity); + let current_skin_index = skin_indices.current.get(entity); + let prev_skin_index = skin_indices.prev.get(entity); let current_morph_index = morph_indices.current.get(entity); let prev_morph_index = morph_indices.prev.get(entity); @@ -2846,11 +2867,14 @@ impl RenderCommand

for SetMeshBindGroup { if has_motion_vector_prepass { // Attach the previous skin index for motion vector computation. If // there isn't one, just use zero as the shader will ignore it. - if skin::skins_use_uniform_buffers(&render_device) { - if let Some(current_skin_index) = current_skin_index { - dynamic_offsets[offset_count] = current_skin_index.byte_offset; - offset_count += 1; + if current_skin_index.is_some() && skin::skins_use_uniform_buffers(&render_device) { + match prev_skin_index { + Some(prev_skin_index) => { + dynamic_offsets[offset_count] = prev_skin_index.byte_offset; + } + None => dynamic_offsets[offset_count] = 0, } + offset_count += 1; } // Attach the previous morph index for motion vector computation. If diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index ccdad7b48e189..932bd58a1aaab 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -192,12 +192,14 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { } #endif + // Was the mesh transform updated this frame? + let timestamp = current_input[input_index].timestamp; + let mesh_changed_this_frame = timestamp == view.frame_count; + // Look up the previous model matrix. - let previous_input_frame_count = current_input[input_index].previous_input_frame_count; let previous_input_index = current_input[input_index].previous_input_index; - let previous_input_is_valid = previous_input_frame_count + 1 == view.frame_count; var previous_world_from_local_affine_transpose: mat3x4; - if (previous_input_is_valid && previous_input_index != 0xffffffffu) { + if (mesh_changed_this_frame && previous_input_index != 0xffffffffu) { previous_world_from_local_affine_transpose = previous_input[previous_input_index].world_from_local; } else { @@ -206,6 +208,12 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { let previous_world_from_local = maths::affine3_to_square(previous_world_from_local_affine_transpose); + let previous_skin_index = select( + 0xffffffffu, + current_input[input_index].previous_skin_index, + mesh_changed_this_frame + ); + // Occlusion cull if necessary. This is done by calculating the screen-space // axis-aligned bounding box (AABB) of the mesh and testing it against the // appropriate level of the depth pyramid (a.k.a. hierarchical Z-buffer). If @@ -344,6 +352,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; + output[mesh_output_index].previous_skin_index = previous_skin_index; output[mesh_output_index].material_and_lightmap_bind_group_slot = current_input[input_index].material_and_lightmap_bind_group_slot; output[mesh_output_index].tag = current_input[input_index].tag; diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index c3fd684028974..ebac14915ea13 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -18,6 +18,7 @@ struct Mesh { // The index of the mesh's first vertex in the vertex buffer. first_vertex_index: u32, current_skin_index: u32, + previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index 40fc3c6bd0fda..c3082caf1e490 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -2,10 +2,10 @@ use core::mem::{self, size_of}; use std::sync::OnceLock; use bevy_asset::Assets; -use bevy_derive::{Deref, DerefMut}; use bevy_ecs::prelude::*; use bevy_math::Mat4; -use bevy_render::sync_world::{MainEntity, MainEntityHashMap}; +use bevy_render::mesh::Mesh3d; +use bevy_render::sync_world::MainEntityHashMap; use bevy_render::{ batching::NoAutomaticBatching, mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, @@ -53,8 +53,16 @@ impl SkinIndex { /// /// We store both the current frame's joint matrices and the previous frame's /// joint matrices for the purposes of motion vector calculation. -#[derive(Default, Resource, Deref, DerefMut)] -pub struct SkinIndices(pub MainEntityHashMap); +#[derive(Default, Resource)] +pub struct SkinIndices { + /// Maps each skinned mesh to the applicable offset within + /// [`SkinUniforms::current_buffer`]. + pub current: MainEntityHashMap, + + /// Maps each skinned mesh to the applicable offset within + /// [`SkinUniforms::prev_buffer`]. + pub prev: MainEntityHashMap, +} /// The GPU buffers containing joint matrices for all skinned meshes. /// @@ -102,21 +110,18 @@ pub fn prepare_skins( render_queue: Res, mut uniform: ResMut, ) { - if !uniform.current_buffer.is_empty() { - let len = uniform.current_buffer.len(); - uniform.current_buffer.reserve(len, &render_device); - uniform - .current_buffer - .write_buffer(&render_device, &render_queue); + if uniform.current_buffer.is_empty() { + return; } - if !uniform.prev_buffer.is_empty() { - let len = uniform.prev_buffer.len(); - uniform.prev_buffer.reserve(len, &render_device); - uniform - .prev_buffer - .write_buffer(&render_device, &render_queue); - } + let len = uniform.current_buffer.len(); + uniform.current_buffer.reserve(len, &render_device); + uniform + .current_buffer + .write_buffer(&render_device, &render_queue); + + // We don't need to write `uniform.prev_buffer` because we already wrote it + // last frame, and the data should still be on the GPU. } // Notes on implementation: @@ -146,35 +151,24 @@ pub fn prepare_skins( // which normally only support fixed size arrays. You just have to make sure // in the shader that you only read the values that are valid for that binding. pub fn extract_skins( - mut skin_indices: ResMut, - mut skin_uniforms: ResMut, + skin_indices: ResMut, + uniform: ResMut, query: Extract>, inverse_bindposes: Extract>>, joints: Extract>, render_device: Res, ) { - // We're going to make a new buffer, so figure out which we need. let skins_use_uniform_buffers = skins_use_uniform_buffers(&render_device); - let buffer_usages = if skins_use_uniform_buffers { - BufferUsages::UNIFORM - } else { - BufferUsages::STORAGE - }; - - // Grab the previous frame's buffer. We're going to copy matrices from it to - // the `prev_buffer`. - let old_skin_indices = mem::take(&mut **skin_indices); - let old_buffer = mem::replace( - &mut skin_uniforms.current_buffer, - RawBufferVec::new(buffer_usages), - ); - let skin_uniforms = skin_uniforms.into_inner(); - let current_buffer = &mut skin_uniforms.current_buffer; - let prev_buffer = &mut skin_uniforms.prev_buffer; + // Borrow check workaround. + let (skin_indices, uniform) = (skin_indices.into_inner(), uniform.into_inner()); - // Clear out the previous buffer so we can push onto it. - prev_buffer.clear(); + // Swap buffers. We need to keep the previous frame's buffer around for the + // purposes of motion vector computation. + mem::swap(&mut skin_indices.current, &mut skin_indices.prev); + mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); + skin_indices.current.clear(); + uniform.current_buffer.clear(); let mut last_start = 0; @@ -183,71 +177,44 @@ pub fn extract_skins( if !view_visibility.get() { continue; } - + let buffer = &mut uniform.current_buffer; let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else { continue; }; - - let main_entity = MainEntity::from(entity); - - // Determine the boundaries of this skin in the joints buffer. - let start = current_buffer.len(); - let joint_count = skin.joints.len().min(MAX_JOINTS); - let end = start + joint_count; - - let first_old_skin_index = old_skin_indices.get(&main_entity).copied(); - - // Reserve space for the joints so we don't have to allocate. - current_buffer.reserve_internal(end); - prev_buffer.reserve_internal(end); - - for (joint_index, joint) in joints.iter_many(&skin.joints).take(joint_count).enumerate() { - // Push the joint matrix for the current frame. - let joint_matrix = match inverse_bindposes.get(joint_index) { - Some(inverse_bindpose) => joint.affine() * *inverse_bindpose, - None => joint.compute_matrix(), - }; - current_buffer.push(joint_matrix); - - // Grab the joint matrix from the previous frame, if there is one, - // and copy it to the previous frame's joint matrix buffer. - let prev_joint_matrix = first_old_skin_index - .and_then(|first_old_skin_index| { - old_buffer.get(first_old_skin_index.index() + (joint_index as u32)) - }) - .copied() - .unwrap_or(joint_matrix); - prev_buffer.push(prev_joint_matrix); - } - + let start = buffer.len(); + + let target = start + skin.joints.len().min(MAX_JOINTS); + buffer.extend( + joints + .iter_many(&skin.joints) + .zip(inverse_bindposes.iter()) + .take(MAX_JOINTS) + .map(|(joint, bindpose)| joint.affine() * *bindpose), + ); // iter_many will skip any failed fetches. This will cause it to assign the wrong bones, // so just bail by truncating to the start. - if current_buffer.len() != end { - current_buffer.truncate(start); - prev_buffer.truncate(start); + if buffer.len() != target { + buffer.truncate(start); continue; } - last_start = last_start.max(start); - debug_assert_eq!(current_buffer.len(), prev_buffer.len()); - // Pad to 256 byte alignment if we're using a uniform buffer. // There's no need to do this if we're using storage buffers, though. if skins_use_uniform_buffers { - while current_buffer.len() % 4 != 0 { - current_buffer.push(Mat4::ZERO); - prev_buffer.push(Mat4::ZERO); + while buffer.len() % 4 != 0 { + buffer.push(Mat4::ZERO); } } - skin_indices.insert(entity.into(), SkinIndex::new(start)); + skin_indices + .current + .insert(entity.into(), SkinIndex::new(start)); } - // Pad out the buffers to ensure that there's enough space for bindings - while current_buffer.len() - last_start < MAX_JOINTS { - current_buffer.push(Mat4::ZERO); - prev_buffer.push(Mat4::ZERO); + // Pad out the buffer to ensure that there's enough space for bindings + while uniform.current_buffer.len() - last_start < MAX_JOINTS { + uniform.current_buffer.push(Mat4::ZERO); } } @@ -266,3 +233,18 @@ pub fn no_automatic_skin_batching( commands.entity(entity).try_insert(NoAutomaticBatching); } } + +pub fn mark_meshes_as_changed_if_their_skins_changed( + mut skinned_meshes_query: Query<(&mut Mesh3d, &SkinnedMesh)>, + changed_joints_query: Query>, +) { + for (mut mesh, skinned_mesh) in &mut skinned_meshes_query { + if skinned_mesh + .joints + .iter() + .any(|&joint| changed_joints_query.contains(joint)) + { + mesh.set_changed(); + } + } +} diff --git a/crates/bevy_pbr/src/render/skinning.wgsl b/crates/bevy_pbr/src/render/skinning.wgsl index 96fba57e1b3f6..038b293eed238 100644 --- a/crates/bevy_pbr/src/render/skinning.wgsl +++ b/crates/bevy_pbr/src/render/skinning.wgsl @@ -34,7 +34,7 @@ fn skin_model( + weights.z * joint_matrices.data[indexes.z] + weights.w * joint_matrices.data[indexes.w]; #else // SKINS_USE_UNIFORM_BUFFERS - let skin_index = mesh[instance_index].current_skin_index; + var skin_index = mesh[instance_index].current_skin_index; return weights.x * joint_matrices[skin_index + indexes.x] + weights.y * joint_matrices[skin_index + indexes.y] + weights.z * joint_matrices[skin_index + indexes.z] @@ -57,11 +57,15 @@ fn skin_prev_model( + weights.z * prev_joint_matrices.data[indexes.z] + weights.w * prev_joint_matrices.data[indexes.w]; #else // SKINS_USE_UNIFORM_BUFFERS - let skin_index = mesh[instance_index].current_skin_index; - return weights.x * prev_joint_matrices[skin_index + indexes.x] - + weights.y * prev_joint_matrices[skin_index + indexes.y] - + weights.z * prev_joint_matrices[skin_index + indexes.z] - + weights.w * prev_joint_matrices[skin_index + indexes.w]; + let prev_skin_index = mesh[instance_index].previous_skin_index; + if (prev_skin_index == 0xffffffffu) { + return skin_model(indexed, weights, instance_index); + } + + return weights.x * prev_joint_matrices[prev_skin_index + indexes.x] + + weights.y * prev_joint_matrices[prev_skin_index + indexes.y] + + weights.z * prev_joint_matrices[prev_skin_index + indexes.z] + + weights.w * prev_joint_matrices[prev_skin_index + indexes.w]; #endif // SKINS_USE_UNIFORM_BUFFERS } diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl index 957d5c037e7db..cbf9b2dc46338 100644 --- a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl +++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -11,17 +11,17 @@ struct MeshInput { // Various flags. flags: u32, previous_input_index: u32, - previous_input_frame_count: u32, first_vertex_index: u32, first_index_index: u32, index_count: u32, current_skin_index: u32, + previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, + timestamp: u32, // User supplied index to identify the mesh instance tag: u32, - pad: u32, } // The `wgpu` indirect parameters structure. This is a union of two structures. From b21cf8ce21415c9ea0ce3d61d47b5beb53a4b50a Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 11 Feb 2025 12:54:54 -0800 Subject: [PATCH 05/11] wip --- crates/bevy_pbr/src/render/gpu_preprocess.rs | 4 +--- crates/bevy_pbr/src/render/mesh.rs | 24 +++++++++---------- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 16 ++++++------- crates/bevy_pbr/src/render/skin.rs | 16 ------------- crates/bevy_pbr/src/render/skinning.wgsl | 2 +- 5 files changed, 21 insertions(+), 41 deletions(-) diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs index 7cd757a2e5c25..57facd14d4867 100644 --- a/crates/bevy_pbr/src/render/gpu_preprocess.rs +++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs @@ -1800,14 +1800,12 @@ impl<'a> PreprocessBindGroupBuilder<'a> { ) .ok(); - let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; - Some(PhasePreprocessBindGroups::Direct( self.render_device.create_bind_group( "preprocess_direct_bind_group", &self.pipelines.direct_preprocess.bind_group_layout, &BindGroupEntries::with_indices(( - (0, view_uniforms_binding), + (0, self.view_uniforms.uniforms.binding()?), (3, self.current_input_buffer.as_entire_binding()), (4, self.previous_input_buffer.as_entire_binding()), ( diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 34d0debbb7c6b..6406ec411d140 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -1,7 +1,4 @@ -use crate::{ - material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}, - skin::mark_meshes_as_changed_if_their_skins_changed, -}; +use crate::material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}; use allocator::MeshAllocator; use bevy_asset::{load_internal_asset, AssetId, UntypedAssetId}; use bevy_core_pipeline::{ @@ -30,7 +27,7 @@ use bevy_render::{ no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching, }, camera::Camera, - mesh::*, + mesh::{skinning::SkinnedMesh, *}, primitives::Aabb, render_asset::RenderAssets, render_phase::{ @@ -167,13 +164,7 @@ impl Plugin for MeshRenderPlugin { app.add_systems( PostUpdate, - ( - no_automatic_skin_batching, - no_automatic_morph_batching, - mark_meshes_as_changed_if_their_skins_changed - .ambiguous_with_all() - .after(mark_3d_meshes_as_changed_if_their_assets_changed), - ), + (no_automatic_skin_batching, no_automatic_morph_batching), ) .add_plugins(( BinnedRenderPhasePlugin::::default(), @@ -546,12 +537,20 @@ pub struct MeshInputUniform { pub index_count: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, + /// The previous skin index, or `u32::MAX` if there's no previous skin. pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. /// High 16 bits: index of the lightmap in the binding array. pub material_and_lightmap_bind_group_slot: u32, + /// The number of the frame on which this [`MeshInputUniform`] was built. + /// + /// This is used to validate the previous transform and skin. If this + /// [`MeshInputUniform`] wasn't updated on this frame, then we know that + /// neither this mesh's transform nor that of its joints have been updated + /// on this frame, and therefore the transforms of both this mesh and its + /// joints must be identical to those for the previous frame. pub timestamp: u32, /// User supplied tag to identify this mesh instance. pub tag: u32, @@ -1442,6 +1441,7 @@ pub fn extract_meshes_for_gpu_building( Changed, Changed, Changed, + With, )>, >, >, diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 932bd58a1aaab..32f55daaf1ccf 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -192,11 +192,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { } #endif - // Was the mesh transform updated this frame? + // See whether the `MeshInputUniform` was updated on this frame. If it + // wasn't, then we know the transforms of this mesh must be identical to + // those on the previous frame, and therefore we don't need to access the + // `previous_input_index` (in fact, we can't; that index are only valid for + // one frame and will be invalid). let timestamp = current_input[input_index].timestamp; let mesh_changed_this_frame = timestamp == view.frame_count; - // Look up the previous model matrix. + // Look up the previous model matrix, if it could have been. let previous_input_index = current_input[input_index].previous_input_index; var previous_world_from_local_affine_transpose: mat3x4; if (mesh_changed_this_frame && previous_input_index != 0xffffffffu) { @@ -208,12 +212,6 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { let previous_world_from_local = maths::affine3_to_square(previous_world_from_local_affine_transpose); - let previous_skin_index = select( - 0xffffffffu, - current_input[input_index].previous_skin_index, - mesh_changed_this_frame - ); - // Occlusion cull if necessary. This is done by calculating the screen-space // axis-aligned bounding box (AABB) of the mesh and testing it against the // appropriate level of the depth pyramid (a.k.a. hierarchical Z-buffer). If @@ -352,7 +350,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; - output[mesh_output_index].previous_skin_index = previous_skin_index; + output[mesh_output_index].previous_skin_index = current_input[input_index].previous_skin_index; output[mesh_output_index].material_and_lightmap_bind_group_slot = current_input[input_index].material_and_lightmap_bind_group_slot; output[mesh_output_index].tag = current_input[input_index].tag; diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index c3082caf1e490..09e7d0465a7e3 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -4,7 +4,6 @@ use std::sync::OnceLock; use bevy_asset::Assets; use bevy_ecs::prelude::*; use bevy_math::Mat4; -use bevy_render::mesh::Mesh3d; use bevy_render::sync_world::MainEntityHashMap; use bevy_render::{ batching::NoAutomaticBatching, @@ -233,18 +232,3 @@ pub fn no_automatic_skin_batching( commands.entity(entity).try_insert(NoAutomaticBatching); } } - -pub fn mark_meshes_as_changed_if_their_skins_changed( - mut skinned_meshes_query: Query<(&mut Mesh3d, &SkinnedMesh)>, - changed_joints_query: Query>, -) { - for (mut mesh, skinned_mesh) in &mut skinned_meshes_query { - if skinned_mesh - .joints - .iter() - .any(|&joint| changed_joints_query.contains(joint)) - { - mesh.set_changed(); - } - } -} diff --git a/crates/bevy_pbr/src/render/skinning.wgsl b/crates/bevy_pbr/src/render/skinning.wgsl index 038b293eed238..c240cc99cfb71 100644 --- a/crates/bevy_pbr/src/render/skinning.wgsl +++ b/crates/bevy_pbr/src/render/skinning.wgsl @@ -59,7 +59,7 @@ fn skin_prev_model( #else // SKINS_USE_UNIFORM_BUFFERS let prev_skin_index = mesh[instance_index].previous_skin_index; if (prev_skin_index == 0xffffffffu) { - return skin_model(indexed, weights, instance_index); + return skin_model(indexes, weights, instance_index); } return weights.x * prev_joint_matrices[prev_skin_index + indexes.x] From 8ab8e82250c221bc745a46d898890a15eda0b74a Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 11 Feb 2025 14:37:07 -0800 Subject: [PATCH 06/11] Retain skins from frame to frame. Currently, Bevy rebuilds the buffer containing all the transforms for joints every frame, during the extraction phase. This is inefficient in cases in which many skins are present in the scene and their joints don't move, such as the Caldera test scene. To address this problem, this commit switches skin extraction to use a set of retained GPU buffers with allocations managed by the offset allocator. I use fine-grained change detection in order to determine which skins need updating. Note that the granularity is on the level of an entire skin, not individual joints. Using the change detection at that level would yield poor performance in common cases in which an entire skin is animated at once. Also, this patch yields additional performance from the fact that changing joint transforms no longer requires the skinned mesh to be re-extracted. Note that this optimization can be a double-edged sword. In `many_foxes`, fine-grained change detection regressed the performance of `extract_skins` by 3.4x. This is because every joint is updated every frame in that example, so change detection is pointless and is pure overhead. Because the `many_foxes` workload is actually representative of animated scenes, this patch includes a heuristic that disables fine-grained change detection if the number of transformed entities in the frame exceeds a certain fraction of the total number of joints. Currently, this threshold is set to 25%. Note that this is a crude heuristic, because it doesn't distinguish between the number of transformed *joints* and the number of transformed *entities*; however, it should be good enough to yield the optimum code path most of the time. Finally, this patch fixes a bug whereby skinned meshes are actually being incorrectly retained if the buffer offsets of the joints of those skinned meshes changes from frame to frame. To fix this without retaining skins, we would have to re-extract every skinned mesh every frame. Doing this was a significant regression on Caldera. With this PR, by contrast, mesh joints stay at the same buffer offset, so we don't have to update the `MeshInputUniform` containing the buffer offset every frame. This also makes PR #17717 easier to implement, because that PR uses the buffer offset from the previous frame, and the logic for calculating that is simplified if the previous frame's buffer offset is guaranteed to be identical to that of the current frame. On Caldera, this patch reduces the time spent in `extract_skins` from 1.79 ms to near zero. On `many_foxes`, this patch regresses the performance of `extract_skins` by approximately 10%-25%, depending on the number of foxes. This has only a small impact on frame rate. --- crates/bevy_pbr/Cargo.toml | 1 + crates/bevy_pbr/src/render/mesh.rs | 156 +++-- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 1 - crates/bevy_pbr/src/render/mesh_types.wgsl | 2 +- crates/bevy_pbr/src/render/mod.rs | 2 +- crates/bevy_pbr/src/render/skin.rs | 554 +++++++++++++++--- crates/bevy_pbr/src/render/skinning.wgsl | 2 +- .../mesh_preprocess_types.wgsl | 4 +- examples/shader/custom_render_phase.rs | 3 +- 9 files changed, 552 insertions(+), 173 deletions(-) diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index 5ed71192897d4..72c0aff8edb2a 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -73,6 +73,7 @@ smallvec = "1.6" nonmax = "0.5" static_assertions = "1" tracing = { version = "0.1", default-features = false, features = ["std"] } +offset-allocator = "0.2" [lints] workspace = true diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index d89ee2a785a9a..43855617fa8c4 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -1,4 +1,7 @@ -use crate::material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}; +use crate::{ + material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}, + skin::mark_skins_as_changed_if_their_inverse_bindposes_changed, +}; use allocator::MeshAllocator; use bevy_asset::{load_internal_asset, AssetId, UntypedAssetId}; use bevy_core_pipeline::{ @@ -26,7 +29,7 @@ use bevy_render::{ no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching, }, camera::Camera, - mesh::*, + mesh::{skinning::SkinnedMesh, *}, primitives::Aabb, render_asset::RenderAssets, render_phase::{ @@ -46,7 +49,7 @@ use bevy_transform::components::GlobalTransform; use bevy_utils::{default, Parallel}; use core::mem::size_of; use material_bind_groups::MaterialBindingId; -use render::skin::{self, SkinIndex}; +use render::skin; use tracing::{error, warn}; use self::irradiance_volume::IRRADIANCE_VOLUMES_ARE_USABLE; @@ -163,7 +166,11 @@ impl Plugin for MeshRenderPlugin { app.add_systems( PostUpdate, - (no_automatic_skin_batching, no_automatic_morph_batching), + ( + no_automatic_skin_batching, + no_automatic_morph_batching, + mark_skins_as_changed_if_their_inverse_bindposes_changed, + ), ) .add_plugins(( BinnedRenderPhasePlugin::::default(), @@ -178,7 +185,6 @@ impl Plugin for MeshRenderPlugin { if let Some(render_app) = app.get_sub_app_mut(RenderApp) { render_app .init_resource::() - .init_resource::() .init_resource::() .init_resource::() .init_resource::() @@ -478,8 +484,6 @@ pub struct MeshUniform { pub first_vertex_index: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, - /// The previous skin index, or `u32::MAX` if there's no previous skin. - pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. @@ -487,6 +491,8 @@ pub struct MeshUniform { pub material_and_lightmap_bind_group_slot: u32, /// User supplied tag to identify this mesh instance. pub tag: u32, + /// Padding. + pub pad: u32, } /// Information that has to be transferred from CPU to GPU in order to produce @@ -536,8 +542,6 @@ pub struct MeshInputUniform { pub index_count: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, - /// The previous skin index, or `u32::MAX` if there's no previous skin. - pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. @@ -546,7 +550,9 @@ pub struct MeshInputUniform { /// User supplied tag to identify this mesh instance. pub tag: u32, /// Padding. - pub pad: u32, + pub pad_a: u32, + /// Padding. + pub pad_b: u32, } /// Information about each mesh instance needed to cull it on GPU. @@ -579,7 +585,6 @@ impl MeshUniform { material_bind_group_slot: MaterialBindGroupSlot, maybe_lightmap: Option<(LightmapSlotIndex, Rect)>, current_skin_index: Option, - previous_skin_index: Option, tag: Option, ) -> Self { let (local_from_world_transpose_a, local_from_world_transpose_b) = @@ -598,10 +603,10 @@ impl MeshUniform { flags: mesh_transforms.flags, first_vertex_index, current_skin_index: current_skin_index.unwrap_or(u32::MAX), - previous_skin_index: previous_skin_index.unwrap_or(u32::MAX), material_and_lightmap_bind_group_slot: u32::from(material_bind_group_slot) | ((lightmap_bind_group_slot as u32) << 16), tag: tag.unwrap_or(0), + pad: 0, } } } @@ -1107,7 +1112,7 @@ impl RenderMeshInstanceGpuBuilder { mesh_material_ids: &RenderMeshMaterialIds, render_material_bindings: &RenderMaterialBindings, render_lightmaps: &RenderLightmaps, - skin_indices: &SkinIndices, + skin_uniforms: &SkinUniforms, ) -> u32 { let (first_vertex_index, vertex_count) = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) { @@ -1126,13 +1131,8 @@ impl RenderMeshInstanceGpuBuilder { ), None => (false, 0, 0), }; - - let current_skin_index = match skin_indices.current.get(&entity) { - Some(skin_indices) => skin_indices.index(), - None => u32::MAX, - }; - let previous_skin_index = match skin_indices.prev.get(&entity) { - Some(skin_indices) => skin_indices.index(), + let current_skin_index = match skin_uniforms.skin_byte_offset(entity) { + Some(skin_index) => skin_index.index(), None => u32::MAX, }; @@ -1168,12 +1168,12 @@ impl RenderMeshInstanceGpuBuilder { vertex_count }, current_skin_index, - previous_skin_index, material_and_lightmap_bind_group_slot: u32::from( self.shared.material_bindings_index.slot, ) | ((lightmap_slot as u32) << 16), tag: self.shared.tag, - pad: 0, + pad_a: 0, + pad_b: 0, }; // Did the last frame contain this entity as well? @@ -1441,6 +1441,7 @@ pub fn extract_meshes_for_gpu_building( Changed, Changed, Changed, + Changed, )>, >, >, @@ -1575,10 +1576,10 @@ pub fn extract_meshes_for_gpu_building( /// loop. fn set_mesh_motion_vector_flags( mut render_mesh_instances: ResMut, - skin_indices: Res, + skin_uniforms: Res, morph_indices: Res, ) { - for &entity in skin_indices.prev.keys() { + for &entity in skin_uniforms.all_skins() { render_mesh_instances .insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN); } @@ -1601,7 +1602,7 @@ pub fn collect_meshes_for_gpu_building( mesh_material_ids: Res, render_material_bindings: Res, render_lightmaps: Res, - skin_indices: Res, + skin_uniforms: Res, ) { let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = render_mesh_instances.into_inner() @@ -1640,7 +1641,7 @@ pub fn collect_meshes_for_gpu_building( &mesh_material_ids, &render_material_bindings, &render_lightmaps, - &skin_indices, + &skin_uniforms, ); } @@ -1667,7 +1668,7 @@ pub fn collect_meshes_for_gpu_building( &mesh_material_ids, &render_material_bindings, &render_lightmaps, - &skin_indices, + &skin_uniforms, ); mesh_culling_builder .update(&mut mesh_culling_data_buffer, instance_data_index as usize); @@ -1817,7 +1818,7 @@ impl GetBatchData for MeshPipeline { SRes, SRes>, SRes, - SRes, + SRes, ); // The material bind group ID, the mesh ID, and the lightmap ID, // respectively. @@ -1830,7 +1831,9 @@ impl GetBatchData for MeshPipeline { type BufferData = MeshUniform; fn get_batch_data( - (mesh_instances, lightmaps, _, mesh_allocator, skin_indices): &SystemParamItem, + (mesh_instances, lightmaps, _, mesh_allocator, skin_uniforms): &SystemParamItem< + Self::Param, + >, (_entity, main_entity): (Entity, MainEntity), ) -> Option<(Self::BufferData, Option)> { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1848,9 +1851,7 @@ impl GetBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); - let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); - + let current_skin_index = skin_uniforms.skin_index(main_entity); let material_bind_group_index = mesh_instance.material_bindings_index; Some(( @@ -1860,7 +1861,6 @@ impl GetBatchData for MeshPipeline { material_bind_group_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, - previous_skin_index, Some(mesh_instance.tag), ), mesh_instance.should_batch().then_some(( @@ -1902,7 +1902,9 @@ impl GetFullBatchData for MeshPipeline { } fn get_binned_batch_data( - (mesh_instances, lightmaps, _, mesh_allocator, skin_indices): &SystemParamItem, + (mesh_instances, lightmaps, _, mesh_allocator, skin_uniforms): &SystemParamItem< + Self::Param, + >, main_entity: MainEntity, ) -> Option { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1919,8 +1921,7 @@ impl GetFullBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); - let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); + let current_skin_index = skin_uniforms.skin_index(main_entity); Some(MeshUniform::new( &mesh_instance.transforms, @@ -1928,7 +1929,6 @@ impl GetFullBatchData for MeshPipeline { mesh_instance.material_bindings_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, - previous_skin_index, Some(mesh_instance.tag), )) } @@ -2660,14 +2660,11 @@ pub fn prepare_mesh_bind_group( // Create the skinned mesh bind group with the current and previous buffers // (the latter being for motion vector computation). If there's no previous // buffer, just use the current one as the shader will ignore it. - let skin = skins_uniform.current_buffer.buffer(); - if let Some(skin) = skin { - let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin); - groups.skinned = Some(MeshBindGroupPair { - motion_vectors: layouts.skinned_motion(&render_device, &model, skin, prev_skin), - no_motion_vectors: layouts.skinned(&render_device, &model, skin), - }); - } + let (skin, prev_skin) = (&skins_uniform.current_buffer, &skins_uniform.prev_buffer); + groups.skinned = Some(MeshBindGroupPair { + motion_vectors: layouts.skinned_motion(&render_device, &model, skin, prev_skin), + no_motion_vectors: layouts.skinned(&render_device, &model, skin), + }); // Create the morphed bind groups just like we did for the skinned bind // group. @@ -2675,29 +2672,28 @@ pub fn prepare_mesh_bind_group( let prev_weights = weights_uniform.prev_buffer.buffer().unwrap_or(weights); for (id, gpu_mesh) in meshes.iter() { if let Some(targets) = gpu_mesh.morph_targets.as_ref() { - let bind_group_pair = match skin.filter(|_| is_skinned(&gpu_mesh.layout)) { - Some(skin) => { - let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin); - MeshBindGroupPair { - motion_vectors: layouts.morphed_skinned_motion( - &render_device, - &model, - skin, - weights, - targets, - prev_skin, - prev_weights, - ), - no_motion_vectors: layouts.morphed_skinned( - &render_device, - &model, - skin, - weights, - targets, - ), - } + let bind_group_pair = if is_skinned(&gpu_mesh.layout) { + let prev_skin = &skins_uniform.prev_buffer; + MeshBindGroupPair { + motion_vectors: layouts.morphed_skinned_motion( + &render_device, + &model, + skin, + weights, + targets, + prev_skin, + prev_weights, + ), + no_motion_vectors: layouts.morphed_skinned( + &render_device, + &model, + skin, + weights, + targets, + ), } - None => MeshBindGroupPair { + } else { + MeshBindGroupPair { motion_vectors: layouts.morphed_motion( &render_device, &model, @@ -2711,7 +2707,7 @@ pub fn prepare_mesh_bind_group( weights, targets, ), - }, + } }; groups.morph_targets.insert(id, bind_group_pair); } @@ -2783,7 +2779,7 @@ impl RenderCommand

for SetMeshBindGroup { SRes, SRes, SRes, - SRes, + SRes, SRes, SRes, ); @@ -2799,7 +2795,7 @@ impl RenderCommand

for SetMeshBindGroup { render_device, bind_groups, mesh_instances, - skin_indices, + skin_uniforms, morph_indices, lightmaps, ): SystemParamItem<'w, '_, Self::Param>, @@ -2807,7 +2803,7 @@ impl RenderCommand

for SetMeshBindGroup { ) -> RenderCommandResult { let bind_groups = bind_groups.into_inner(); let mesh_instances = mesh_instances.into_inner(); - let skin_indices = skin_indices.into_inner(); + let skin_uniforms = skin_uniforms.into_inner(); let morph_indices = morph_indices.into_inner(); let entity = &item.main_entity(); @@ -2816,12 +2812,11 @@ impl RenderCommand

for SetMeshBindGroup { return RenderCommandResult::Success; }; - let current_skin_index = skin_indices.current.get(entity); - let prev_skin_index = skin_indices.prev.get(entity); + let current_skin_byte_offset = skin_uniforms.skin_byte_offset(*entity); let current_morph_index = morph_indices.current.get(entity); let prev_morph_index = morph_indices.prev.get(entity); - let is_skinned = current_skin_index.is_some(); + let is_skinned = current_skin_byte_offset.is_some(); let is_morphed = current_morph_index.is_some(); let lightmap_slab_index = lightmaps @@ -2849,7 +2844,7 @@ impl RenderCommand

for SetMeshBindGroup { dynamic_offsets[offset_count] = dynamic_offset; offset_count += 1; } - if let Some(current_skin_index) = current_skin_index { + if let Some(current_skin_index) = current_skin_byte_offset { if skin::skins_use_uniform_buffers(&render_device) { dynamic_offsets[offset_count] = current_skin_index.byte_offset; offset_count += 1; @@ -2864,14 +2859,11 @@ impl RenderCommand

for SetMeshBindGroup { if has_motion_vector_prepass { // Attach the previous skin index for motion vector computation. If // there isn't one, just use zero as the shader will ignore it. - if current_skin_index.is_some() && skin::skins_use_uniform_buffers(&render_device) { - match prev_skin_index { - Some(prev_skin_index) => { - dynamic_offsets[offset_count] = prev_skin_index.byte_offset; - } - None => dynamic_offsets[offset_count] = 0, + if skin::skins_use_uniform_buffers(&render_device) { + if let Some(current_skin_byte_offset) = current_skin_byte_offset { + dynamic_offsets[offset_count] = current_skin_byte_offset.byte_offset; + offset_count += 1; } - offset_count += 1; } // Attach the previous morph index for motion vector computation. If diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 9bbe96e80d633..e219faa9ba547 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -342,7 +342,6 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; - output[mesh_output_index].previous_skin_index = current_input[input_index].previous_skin_index; output[mesh_output_index].material_and_lightmap_bind_group_slot = current_input[input_index].material_and_lightmap_bind_group_slot; output[mesh_output_index].tag = current_input[input_index].tag; diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index ebac14915ea13..502b91b427d7f 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -18,12 +18,12 @@ struct Mesh { // The index of the mesh's first vertex in the vertex buffer. first_vertex_index: u32, current_skin_index: u32, - previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, // User supplied index to identify the mesh instance tag: u32, + pad: u32, }; #ifdef SKINNED diff --git a/crates/bevy_pbr/src/render/mod.rs b/crates/bevy_pbr/src/render/mod.rs index 8e26e869a1c96..4b0a20ebecff5 100644 --- a/crates/bevy_pbr/src/render/mod.rs +++ b/crates/bevy_pbr/src/render/mod.rs @@ -13,4 +13,4 @@ pub use light::*; pub use mesh::*; pub use mesh_bindings::MeshLayouts; pub use mesh_view_bindings::*; -pub use skin::{extract_skins, prepare_skins, SkinIndices, SkinUniforms, MAX_JOINTS}; +pub use skin::{extract_skins, prepare_skins, SkinUniforms, MAX_JOINTS}; diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index c248821ccafd3..ec29f6f75efb0 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -1,19 +1,25 @@ use core::mem::{self, size_of}; use std::sync::OnceLock; -use bevy_asset::Assets; +use bevy_asset::{AssetEvent, Assets}; use bevy_ecs::prelude::*; use bevy_math::Mat4; -use bevy_render::sync_world::MainEntityHashMap; +use bevy_platform_support::collections::hash_map::Entry; +use bevy_platform_support::collections::HashSet; +use bevy_render::render_resource::{Buffer, BufferDescriptor}; +use bevy_render::sync_world::{MainEntity, MainEntityHashMap, MainEntityHashSet}; use bevy_render::{ batching::NoAutomaticBatching, mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, - render_resource::{BufferUsages, RawBufferVec}, + render_resource::BufferUsages, renderer::{RenderDevice, RenderQueue}, view::ViewVisibility, Extract, }; use bevy_transform::prelude::GlobalTransform; +use offset_allocator::{Allocation, Allocator}; +use smallvec::SmallVec; +use tracing::error; /// Maximum number of joints supported for skinned meshes. /// @@ -24,17 +30,39 @@ use bevy_transform::prelude::GlobalTransform; /// of the GPU at runtime, which would mean not using consts anymore. pub const MAX_JOINTS: usize = 256; +/// The total number of joints we support. +/// +/// This is 256 GiB worth of joint matrices, which we will never hit under any +/// reasonable circumstances. +const MAX_TOTAL_JOINTS: u32 = 1024 * 1024 * 1024; + +/// The number of joints that we allocate at a time. +/// +/// Some hardware requires that uniforms be allocated on 256-byte boundaries, so +/// we need to allocate 4 64-byte matrices at a time to satisfy alignment +/// requirements. +const JOINTS_PER_ALLOCATION_UNIT: u32 = (256 / size_of::()) as u32; + +/// The maximum ratio of the number of entities whose transforms changed to the +/// total number of joints before we re-extract all joints. +/// +/// We use this as a heuristic to decide whether it's worth switching over to +/// fine-grained detection to determine which skins need extraction. If the +/// number of changed entities is over this threshold, we skip change detection +/// and simply re-extract the transforms of all joints. +const JOINT_EXTRACTION_THRESHOLD_FACTOR: f64 = 0.25; + /// The location of the first joint matrix in the skin uniform buffer. -#[derive(Component)] -pub struct SkinIndex { +#[derive(Component, Clone, Copy)] +pub struct SkinByteOffset { /// The byte offset of the first joint matrix. pub byte_offset: u32, } -impl SkinIndex { +impl SkinByteOffset { /// Index to be in address space based on the size of a skin uniform. const fn new(start: usize) -> Self { - SkinIndex { + SkinByteOffset { byte_offset: (start * size_of::()) as u32, } } @@ -47,22 +75,6 @@ impl SkinIndex { } } -/// Maps each skinned mesh to the applicable offset within the [`SkinUniforms`] -/// buffer. -/// -/// We store both the current frame's joint matrices and the previous frame's -/// joint matrices for the purposes of motion vector calculation. -#[derive(Default, Resource)] -pub struct SkinIndices { - /// Maps each skinned mesh to the applicable offset within - /// [`SkinUniforms::current_buffer`]. - pub current: MainEntityHashMap, - - /// Maps each skinned mesh to the applicable offset within - /// [`SkinUniforms::prev_buffer`]. - pub prev: MainEntityHashMap, -} - /// The GPU buffers containing joint matrices for all skinned meshes. /// /// This is double-buffered: we store the joint matrices of each mesh for the @@ -74,28 +86,109 @@ pub struct SkinIndices { /// Notes on implementation: see comment on top of the `extract_skins` system. #[derive(Resource)] pub struct SkinUniforms { - /// Stores all the joint matrices for skinned meshes in the current frame. - pub current_buffer: RawBufferVec, - /// Stores all the joint matrices for skinned meshes in the previous frame. - pub prev_buffer: RawBufferVec, + /// The CPU-side buffer that stores the joint matrices for skinned meshes in + /// the current frame. + pub current_staging_buffer: Vec, + /// The GPU-side buffer that stores the joint matrices for skinned meshes in + /// the current frame. + pub current_buffer: Buffer, + /// The GPU-side buffer that stores the joint matrices for skinned meshes in + /// the previous frame. + pub prev_buffer: Buffer, + /// The offset allocator that manages the placement of the joints within the + /// [`Self::current_buffer`]. + allocator: Allocator, + /// Allocation information that we keep about each skin. + skin_uniform_info: MainEntityHashMap, + /// Maps each joint entity to the skins it's associated with. + /// + /// We use this in conjunction with change detection to only update the + /// skins that need updating each frame. + /// + /// Note that conceptually this is a hash map of sets, but we use a + /// [`SmallVec`] to avoid allocations for the vast majority of the cases in + /// which each bone belongs to exactly one skin. + joint_to_skins: MainEntityHashMap>, + /// The total number of joints in the scene. + /// + /// We use this as part of our heuristic to decide whether to use + /// fine-grained change detection. + total_joints: usize, } impl FromWorld for SkinUniforms { fn from_world(world: &mut World) -> Self { let device = world.resource::(); - let buffer_usages = if skins_use_uniform_buffers(device) { + let buffer_usages = (if skins_use_uniform_buffers(device) { BufferUsages::UNIFORM } else { BufferUsages::STORAGE - }; + }) | BufferUsages::COPY_DST; + + // Create the current and previous buffer with the minimum sizes. + // + // These will be swapped every frame. + let current_buffer = device.create_buffer(&BufferDescriptor { + label: Some("skin uniform buffer"), + size: MAX_JOINTS as u64 * size_of::() as u64, + usage: buffer_usages, + mapped_at_creation: false, + }); + let prev_buffer = device.create_buffer(&BufferDescriptor { + label: Some("skin uniform buffer"), + size: MAX_JOINTS as u64 * size_of::() as u64, + usage: buffer_usages, + mapped_at_creation: false, + }); Self { - current_buffer: RawBufferVec::new(buffer_usages), - prev_buffer: RawBufferVec::new(buffer_usages), + current_staging_buffer: vec![], + current_buffer, + prev_buffer, + allocator: Allocator::new(MAX_TOTAL_JOINTS), + skin_uniform_info: MainEntityHashMap::default(), + joint_to_skins: MainEntityHashMap::default(), + total_joints: 0, } } } +impl SkinUniforms { + /// Returns the current offset in joints of the skin in the buffer. + pub fn skin_index(&self, skin: MainEntity) -> Option { + self.skin_uniform_info + .get(&skin) + .map(SkinUniformInfo::offset) + } + + /// Returns the current offset in bytes of the skin in the buffer. + pub fn skin_byte_offset(&self, skin: MainEntity) -> Option { + self.skin_uniform_info + .get(&skin) + .map(|skin_uniform_info| SkinByteOffset::new(skin_uniform_info.offset() as usize)) + } + + /// Returns an iterator over all skins in the scene. + pub fn all_skins(&self) -> impl Iterator { + self.skin_uniform_info.keys() + } +} + +/// Allocation information about each skin. +struct SkinUniformInfo { + /// The allocation of the joints within the [`SkinUniforms::current_buffer`]. + allocation: Allocation, + /// The entities that comprise the joints. + joints: Vec, +} + +impl SkinUniformInfo { + /// The offset in joints within the [`SkinUniforms::current_staging_buffer`]. + fn offset(&self) -> u32 { + self.allocation.offset * JOINTS_PER_ALLOCATION_UNIT + } +} + /// Returns true if skinning must use uniforms (and dynamic offsets) because /// storage buffers aren't supported on the current platform. pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool { @@ -104,20 +197,51 @@ pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool { .get_or_init(|| render_device.limits().max_storage_buffers_per_shader_stage == 0) } +/// Uploads the buffers containing the joints to the GPU. pub fn prepare_skins( render_device: Res, render_queue: Res, - mut uniform: ResMut, + uniform: ResMut, ) { - if uniform.current_buffer.is_empty() { + let uniform = uniform.into_inner(); + + if uniform.current_staging_buffer.is_empty() { return; } - let len = uniform.current_buffer.len(); - uniform.current_buffer.reserve(len, &render_device); - uniform - .current_buffer - .write_buffer(&render_device, &render_queue); + // Swap current and previous buffers. + mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); + + // Resize the buffer if necessary. + let needed_size = uniform.current_staging_buffer.len() as u64 * size_of::() as u64; + if uniform.current_buffer.size() < needed_size { + let mut new_size = uniform.current_buffer.size(); + while new_size < needed_size { + // 1.5× growth factor. + new_size += new_size / 2; + } + + // Create a new buffer. + let buffer_usages = if skins_use_uniform_buffers(&render_device) { + BufferUsages::UNIFORM + } else { + BufferUsages::STORAGE + } | BufferUsages::COPY_DST; + uniform.current_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("skin uniform buffer"), + usage: buffer_usages, + size: new_size, + mapped_at_creation: false, + }); + } + + // Write the data from `uniform.current_staging_buffer` into + // `uniform.current_buffer`. + render_queue.write_buffer( + &uniform.current_buffer, + 0, + bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]), + ); // We don't need to write `uniform.prev_buffer` because we already wrote it // last frame, and the data should still be on the GPU. @@ -150,70 +274,334 @@ pub fn prepare_skins( // which normally only support fixed size arrays. You just have to make sure // in the shader that you only read the values that are valid for that binding. pub fn extract_skins( - skin_indices: ResMut, - uniform: ResMut, - query: Extract>, - inverse_bindposes: Extract>>, + skin_uniforms: ResMut, + skinned_meshes: Extract>, + changed_skinned_meshes: Extract< + Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<(Changed, Changed)>, + >, + >, + skinned_mesh_inverse_bindposes: Extract>>, + changed_transforms: Extract>>, joints: Extract>, - render_device: Res, + mut removed_visibilities_query: Extract>, + mut removed_skinned_meshes_query: Extract>, ) { - let skins_use_uniform_buffers = skins_use_uniform_buffers(&render_device); + let skin_uniforms = skin_uniforms.into_inner(); - // Borrow check workaround. - let (skin_indices, uniform) = (skin_indices.into_inner(), uniform.into_inner()); + // Find skins that have become visible or invisible on this frame. Allocate, + // reallocate, or free space for them as necessary. + add_or_delete_skins( + skin_uniforms, + &changed_skinned_meshes, + &skinned_mesh_inverse_bindposes, + &joints, + ); - // Swap buffers. We need to keep the previous frame's buffer around for the - // purposes of motion vector computation. - mem::swap(&mut skin_indices.current, &mut skin_indices.prev); - mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); - skin_indices.current.clear(); - uniform.current_buffer.clear(); + // Extract the transforms for all joints from the scene, and write them into + // the staging buffer at the appropriate spot. + extract_joints( + skin_uniforms, + &skinned_meshes, + &changed_skinned_meshes, + &skinned_mesh_inverse_bindposes, + &changed_transforms, + &joints, + ); + + // Delete skins that became invisible. + for skinned_mesh_entity in removed_visibilities_query + .read() + .chain(removed_skinned_meshes_query.read()) + { + // Only remove a skin if we didn't pick it up in `add_or_delete_skins`. + // It's possible that a necessary component was removed and re-added in + // the same frame. + if !changed_skinned_meshes.contains(skinned_mesh_entity) { + remove_skin(skin_uniforms, skinned_mesh_entity.into()); + } + } +} - let mut last_start = 0; +/// Searches for all skins that have become visible or invisible this frame and +/// allocations for them as necessary. +fn add_or_delete_skins( + skin_uniforms: &mut SkinUniforms, + changed_skinned_meshes: &Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<(Changed, Changed)>, + >, + skinned_mesh_inverse_bindposes: &Assets, + joints: &Query<&GlobalTransform>, +) { + // Find every skinned mesh that changed one of (1) visibility; (2) joint + // entities (part of `SkinnedMesh`); (3) the associated + // `SkinnedMeshInverseBindposes` asset. + for (skinned_mesh_entity, skinned_mesh_view_visibility, skinned_mesh) in changed_skinned_meshes + { + // Remove the skin if it existed last frame. + let skinned_mesh_entity = MainEntity::from(skinned_mesh_entity); + remove_skin(skin_uniforms, skinned_mesh_entity); - // PERF: This can be expensive, can we move this to prepare? - for (entity, view_visibility, skin) in &query { - if !view_visibility.get() { + // If the skin is invisible, we're done. + if !(*skinned_mesh_view_visibility).get() { continue; } - let buffer = &mut uniform.current_buffer; - let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else { + + // Initialize the skin. + add_skin( + skinned_mesh_entity, + skinned_mesh, + skin_uniforms, + skinned_mesh_inverse_bindposes, + joints, + ); + } +} + +/// Extracts the global transforms of all joints and updates the staging buffer +/// as necessary. +fn extract_joints( + skin_uniforms: &mut SkinUniforms, + skinned_meshes: &Query<(Entity, &SkinnedMesh)>, + changed_skinned_meshes: &Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<(Changed, Changed)>, + >, + skinned_mesh_inverse_bindposes: &Assets, + changed_transforms: &Query<(Entity, &GlobalTransform), Changed>, + joints: &Query<&GlobalTransform>, +) { + // If the number of entities that changed transforms exceeds a certain + // fraction (currently 25%) of the total joints in the scene, then skip + // fine-grained change detection. + // + // Note that this is a crude heuristic, for performance reasons. It doesn't + // consider the ratio of modified *joints* to total joints, only the ratio + // of modified *entities* to total joints. Thus in the worst case we might + // end up re-extracting all skins even though none of the joints changed. + // But making the heuristic finer-grained would make it slower to evaluate, + // and we don't want to lose performance. + let threshold = + (skin_uniforms.total_joints as f64 * JOINT_EXTRACTION_THRESHOLD_FACTOR).floor() as usize; + + if changed_transforms.iter().nth(threshold).is_some() { + // Go ahead and re-extract all skins in the scene. + for (skin_entity, skin) in skinned_meshes { + extract_joints_for_skin( + skin_entity.into(), + skin, + skin_uniforms, + changed_skinned_meshes, + skinned_mesh_inverse_bindposes, + joints, + ); + } + return; + } + + // Use fine-grained change detection to figure out only the skins that need + // to have their joints re-extracted. + let dirty_skins: MainEntityHashSet = changed_transforms + .iter() + .flat_map(|(joint, _)| skin_uniforms.joint_to_skins.get(&MainEntity::from(joint))) + .flat_map(|skin_joint_mappings| skin_joint_mappings.iter()) + .copied() + .collect(); + + // Re-extract the joints for only those skins. + for skin_entity in dirty_skins { + let Ok((_, skin)) = skinned_meshes.get(*skin_entity) else { continue; }; - let start = buffer.len(); - - let target = start + skin.joints.len().min(MAX_JOINTS); - buffer.extend( - joints - .iter_many(&skin.joints) - .zip(inverse_bindposes.iter()) - .take(MAX_JOINTS) - .map(|(joint, bindpose)| joint.affine() * *bindpose), + extract_joints_for_skin( + skin_entity, + skin, + skin_uniforms, + changed_skinned_meshes, + skinned_mesh_inverse_bindposes, + joints, ); - // iter_many will skip any failed fetches. This will cause it to assign the wrong bones, - // so just bail by truncating to the start. - if buffer.len() != target { - buffer.truncate(start); + } +} + +/// Extracts all joints for a single skin and writes their transforms into the +/// CPU staging buffer. +fn extract_joints_for_skin( + skin_entity: MainEntity, + skin: &SkinnedMesh, + skin_uniforms: &mut SkinUniforms, + changed_skinned_meshes: &Query< + (Entity, &ViewVisibility, &SkinnedMesh), + Or<(Changed, Changed)>, + >, + skinned_mesh_inverse_bindposes: &Assets, + joints: &Query<&GlobalTransform>, +) { + // If we initialized the skin this frame, we already populated all + // the joints, so there's no need to populate them again. + if changed_skinned_meshes.contains(*skin_entity) { + return; + } + + // Fetch information about the skin. + let Some(skin_uniform_info) = skin_uniforms.skin_uniform_info.get(&skin_entity) else { + return; + }; + let Some(skinned_mesh_inverse_bindposes) = + skinned_mesh_inverse_bindposes.get(&skin.inverse_bindposes) + else { + return; + }; + + // Calculate and write in the new joint matrices. + for (joint_index, (&joint, skinned_mesh_inverse_bindpose)) in skin + .joints + .iter() + .zip(skinned_mesh_inverse_bindposes.iter()) + .enumerate() + { + let Ok(joint_transform) = joints.get(joint) else { continue; + }; + + let joint_matrix = joint_transform.affine() * *skinned_mesh_inverse_bindpose; + skin_uniforms.current_staging_buffer[skin_uniform_info.offset() as usize + joint_index] = + joint_matrix; + } +} + +/// Allocates space for a new skin in the buffers, and populates its joints. +fn add_skin( + skinned_mesh_entity: MainEntity, + skinned_mesh: &SkinnedMesh, + skin_uniforms: &mut SkinUniforms, + skinned_mesh_inverse_bindposes: &Assets, + joints: &Query<&GlobalTransform>, +) { + // Allocate space for the joints. + let Some(allocation) = skin_uniforms.allocator.allocate( + skinned_mesh + .joints + .len() + .div_ceil(JOINTS_PER_ALLOCATION_UNIT as usize) as u32, + ) else { + error!( + "Out of space for skin: {:?}. Tried to allocate space for {:?} joints.", + skinned_mesh_entity, + skinned_mesh.joints.len() + ); + return; + }; + + // Store that allocation. + let skin_uniform_info = SkinUniformInfo { + allocation, + joints: skinned_mesh + .joints + .iter() + .map(|entity| MainEntity::from(*entity)) + .collect(), + }; + + let skinned_mesh_inverse_bindposes = + skinned_mesh_inverse_bindposes.get(&skinned_mesh.inverse_bindposes); + + for (joint_index, &joint) in skinned_mesh.joints.iter().enumerate() { + // Calculate the initial joint matrix. + let skinned_mesh_inverse_bindpose = + skinned_mesh_inverse_bindposes.and_then(|skinned_mesh_inverse_bindposes| { + skinned_mesh_inverse_bindposes.get(joint_index) + }); + let joint_matrix = match (skinned_mesh_inverse_bindpose, joints.get(joint)) { + (Some(skinned_mesh_inverse_bindpose), Ok(transform)) => { + transform.affine() * *skinned_mesh_inverse_bindpose + } + _ => Mat4::IDENTITY, + }; + + // Write in the new joint matrix, growing the staging buffer if + // necessary. + let buffer_index = skin_uniform_info.offset() as usize + joint_index; + if skin_uniforms.current_staging_buffer.len() < buffer_index + 1 { + skin_uniforms + .current_staging_buffer + .resize(buffer_index + 1, Mat4::IDENTITY); } - last_start = last_start.max(start); + skin_uniforms.current_staging_buffer[buffer_index] = joint_matrix; + + // Record the inverse mapping from the joint back to the skin. We use + // this in order to perform fine-grained joint extraction. + skin_uniforms + .joint_to_skins + .entry(MainEntity::from(joint)) + .or_default() + .push(skinned_mesh_entity); + } + + // Record the number of joints. + skin_uniforms.total_joints += skinned_mesh.joints.len(); + + skin_uniforms + .skin_uniform_info + .insert(skinned_mesh_entity, skin_uniform_info); +} + +/// Deallocates a skin and removes it from the [`SkinUniforms`]. +fn remove_skin(skin_uniforms: &mut SkinUniforms, skinned_mesh_entity: MainEntity) { + let Some(old_skin_uniform_info) = skin_uniforms.skin_uniform_info.remove(&skinned_mesh_entity) + else { + return; + }; - // Pad to 256 byte alignment if we're using a uniform buffer. - // There's no need to do this if we're using storage buffers, though. - if skins_use_uniform_buffers { - while buffer.len() % 4 != 0 { - buffer.push(Mat4::ZERO); + // Free the allocation. + skin_uniforms + .allocator + .free(old_skin_uniform_info.allocation); + + // Remove the inverse mapping from each joint back to the skin. + for &joint in &old_skin_uniform_info.joints { + if let Entry::Occupied(mut entry) = skin_uniforms.joint_to_skins.entry(joint) { + entry.get_mut().retain(|skin| *skin != skinned_mesh_entity); + if entry.get_mut().is_empty() { + entry.remove(); } } + } + + // Update the total number of joints. + skin_uniforms.total_joints -= old_skin_uniform_info.joints.len(); +} + +/// Marks [`SkinnedMesh`] components as changed if the +/// [`SkinnedMeshInverseBindposes`] assets that they refer to changed. +/// +/// This is needed to ensure that we pick up the new inverse bindposes in +/// [`extract_skins`]. +pub fn mark_skins_as_changed_if_their_inverse_bindposes_changed( + mut events: EventReader>, + mut skins: Query>, +) { + let dirty_skinned_mesh_inverse_bindposes: HashSet<_> = events + .read() + .map(|event| match *event { + AssetEvent::Added { id } + | AssetEvent::Modified { id } + | AssetEvent::Removed { id } + | AssetEvent::Unused { id } + | AssetEvent::LoadedWithDependencies { id } => id, + }) + .collect(); - skin_indices - .current - .insert(entity.into(), SkinIndex::new(start)); + if dirty_skinned_mesh_inverse_bindposes.is_empty() { + return; } - // Pad out the buffer to ensure that there's enough space for bindings - while uniform.current_buffer.len() - last_start < MAX_JOINTS { - uniform.current_buffer.push(Mat4::ZERO); + for mut skin in &mut skins { + if dirty_skinned_mesh_inverse_bindposes.contains(&skin.inverse_bindposes.id()) { + skin.set_changed(); + } } } diff --git a/crates/bevy_pbr/src/render/skinning.wgsl b/crates/bevy_pbr/src/render/skinning.wgsl index 92e977aeb1b92..96fba57e1b3f6 100644 --- a/crates/bevy_pbr/src/render/skinning.wgsl +++ b/crates/bevy_pbr/src/render/skinning.wgsl @@ -57,7 +57,7 @@ fn skin_prev_model( + weights.z * prev_joint_matrices.data[indexes.z] + weights.w * prev_joint_matrices.data[indexes.w]; #else // SKINS_USE_UNIFORM_BUFFERS - let skin_index = mesh[instance_index].previous_skin_index; + let skin_index = mesh[instance_index].current_skin_index; return weights.x * prev_joint_matrices[skin_index + indexes.x] + weights.y * prev_joint_matrices[skin_index + indexes.y] + weights.z * prev_joint_matrices[skin_index + indexes.z] diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl index 6d1199adb4282..d12dc81a15f73 100644 --- a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl +++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -15,13 +15,13 @@ struct MeshInput { first_index_index: u32, index_count: u32, current_skin_index: u32, - previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, // User supplied index to identify the mesh instance tag: u32, - pad: u32, + pad_a: u32, + pad_b: u32, } // The `wgpu` indirect parameters structure. This is a union of two structures. diff --git a/examples/shader/custom_render_phase.rs b/examples/shader/custom_render_phase.rs index 12a9c55f2ff77..dae1c4d98d2f8 100644 --- a/examples/shader/custom_render_phase.rs +++ b/examples/shader/custom_render_phase.rs @@ -370,9 +370,9 @@ impl GetBatchData for StencilPipeline { flags: mesh_transforms.flags, first_vertex_index, current_skin_index: u32::MAX, - previous_skin_index: u32::MAX, material_and_lightmap_bind_group_slot: 0, tag: 0, + pad: 0, } }; Some((mesh_uniform, None)) @@ -426,7 +426,6 @@ impl GetFullBatchData for StencilPipeline { None, None, None, - None, )) } From 3b9c765f698b692cc69d8eba6f2cfa9bc2ac6954 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 11 Feb 2025 22:28:50 -0800 Subject: [PATCH 07/11] Fix meshlets --- crates/bevy_pbr/src/meshlet/instance_manager.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/bevy_pbr/src/meshlet/instance_manager.rs b/crates/bevy_pbr/src/meshlet/instance_manager.rs index f615114d417a1..26f6432a1fdf7 100644 --- a/crates/bevy_pbr/src/meshlet/instance_manager.rs +++ b/crates/bevy_pbr/src/meshlet/instance_manager.rs @@ -125,7 +125,6 @@ impl InstanceManager { None, None, None, - None, ); // Append instance data From d58aae4672f234a6ecc012cd57b711e792c8f824 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 11 Feb 2025 23:46:01 -0800 Subject: [PATCH 08/11] Fix ambiguity --- crates/bevy_pbr/src/render/mesh.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 43855617fa8c4..ed02b1bd1bc2b 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -3,7 +3,7 @@ use crate::{ skin::mark_skins_as_changed_if_their_inverse_bindposes_changed, }; use allocator::MeshAllocator; -use bevy_asset::{load_internal_asset, AssetId, UntypedAssetId}; +use bevy_asset::{load_internal_asset, AssetEvents, AssetId, UntypedAssetId}; use bevy_core_pipeline::{ core_3d::{AlphaMask3d, Opaque3d, Transmissive3d, Transparent3d, CORE_3D_DEPTH_FORMAT}, deferred::{AlphaMask3dDeferred, Opaque3dDeferred}, @@ -169,7 +169,7 @@ impl Plugin for MeshRenderPlugin { ( no_automatic_skin_batching, no_automatic_morph_batching, - mark_skins_as_changed_if_their_inverse_bindposes_changed, + mark_skins_as_changed_if_their_inverse_bindposes_changed.after(AssetEvents), ), ) .add_plugins(( From f043ffbffa025170f29ac94261ca1e392deff5a5 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Wed, 12 Feb 2025 11:35:53 -0800 Subject: [PATCH 09/11] Make `SkinByteOffset` not derive `Component` --- crates/bevy_pbr/src/render/skin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index ec29f6f75efb0..a60db63138fac 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -53,7 +53,7 @@ const JOINTS_PER_ALLOCATION_UNIT: u32 = (256 / size_of::()) as u32; const JOINT_EXTRACTION_THRESHOLD_FACTOR: f64 = 0.25; /// The location of the first joint matrix in the skin uniform buffer. -#[derive(Component, Clone, Copy)] +#[derive(Clone, Copy)] pub struct SkinByteOffset { /// The byte offset of the first joint matrix. pub byte_offset: u32, From c048c79c257f357161ad83ab3b8dae4536af1d33 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Wed, 12 Feb 2025 12:43:26 -0800 Subject: [PATCH 10/11] Leave enough space at the end of the buffer if we use a uniform buffer. --- crates/bevy_pbr/src/render/skin.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index a60db63138fac..dbcbe008d709e 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -212,8 +212,11 @@ pub fn prepare_skins( // Swap current and previous buffers. mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); - // Resize the buffer if necessary. - let needed_size = uniform.current_staging_buffer.len() as u64 * size_of::() as u64; + // Resize the buffer if necessary. Include extra space equal to `MAX_JOINTS` + // because we need to be able to bind a full uniform buffer's worth of data + // if skins use uniform buffers on this platform. + let needed_size = (uniform.current_staging_buffer.len() as u64 + MAX_JOINTS as u64) + * size_of::() as u64; if uniform.current_buffer.size() < needed_size { let mut new_size = uniform.current_buffer.size(); while new_size < needed_size { From 124ac322310cee31f5da4777cb3d061536a66dde Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 18 Feb 2025 00:57:40 -0800 Subject: [PATCH 11/11] Warning police --- crates/bevy_pbr/src/render/mesh.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index c824839a1f1ff..459a629707dac 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -2762,7 +2762,7 @@ fn prepare_mesh_bind_groups_for_phase( let prev_skin = &skins_uniform.prev_buffer; MeshBindGroupPair { motion_vectors: layouts.morphed_skinned_motion( - &render_device, + render_device, &model, skin, weights, @@ -2771,7 +2771,7 @@ fn prepare_mesh_bind_groups_for_phase( prev_weights, ), no_motion_vectors: layouts.morphed_skinned( - &render_device, + render_device, &model, skin, weights, @@ -2787,12 +2787,7 @@ fn prepare_mesh_bind_groups_for_phase( targets, prev_weights, ), - no_motion_vectors: layouts.morphed( - render_device, - &model, - weights, - targets, - ), + no_motion_vectors: layouts.morphed(render_device, &model, weights, targets), } }; groups.morph_targets.insert(id, bind_group_pair);