diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs index dfc8739ca4779..5ab700a7301f3 100644 --- a/crates/bevy_pbr/src/render/gpu_preprocess.rs +++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs @@ -48,7 +48,7 @@ use bevy_render::{ }, renderer::{RenderContext, RenderDevice, RenderQueue}, settings::WgpuFeatures, - view::{NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms}, + view::{ExtractedView, NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms}, Render, RenderApp, RenderSet, }; use bevy_utils::TypeIdMap; @@ -100,7 +100,7 @@ pub struct GpuMeshPreprocessPlugin { pub struct EarlyGpuPreprocessNode { view_query: QueryState< ( - Entity, + Read, Option>, Option>, Has, @@ -120,7 +120,11 @@ pub struct EarlyGpuPreprocessNode { /// metadata for the subsequent [`LatePrepassBuildIndirectParametersNode`]. pub struct LateGpuPreprocessNode { view_query: QueryState< - (Entity, Read, Read), + ( + Read, + Read, + Read, + ), ( Without, Without, @@ -580,7 +584,8 @@ impl Node for EarlyGpuPreprocessNode { }; // Grab the work item buffers for this view. - let Some(phase_work_item_buffers) = index_buffers.get(&view) else { + let Some(phase_work_item_buffers) = index_buffers.get(&view.retained_view_entity) + else { warn!("The preprocessing index buffer wasn't present"); continue; }; @@ -791,7 +796,8 @@ impl Node for LateGpuPreprocessNode { // Run the compute passes. for (view, bind_groups, view_uniform_offset) in self.view_query.iter_manual(world) { // Grab the work item buffers for this view. - let Some(phase_work_item_buffers) = work_item_buffers.get(&view) else { + let Some(phase_work_item_buffers) = work_item_buffers.get(&view.retained_view_entity) + else { warn!("The preprocessing index buffer wasn't present"); continue; }; @@ -1619,6 +1625,7 @@ impl BuildIndirectParametersPipeline { )] pub fn prepare_preprocess_bind_groups( mut commands: Commands, + views: Query<(Entity, &ExtractedView)>, view_depth_pyramids: Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>, render_device: Res, batched_instance_buffers: Res>, @@ -1651,14 +1658,19 @@ pub fn prepare_preprocess_bind_groups( let mut any_indirect = false; // Loop over each view. - for (view, phase_work_item_buffers) in work_item_buffers { + for (view_entity, view) in &views { + let Some(phase_work_item_buffers) = work_item_buffers.get(&view.retained_view_entity) + else { + continue; + }; + let mut bind_groups = TypeIdMap::default(); // Loop over each phase. for (&phase_id, work_item_buffers) in phase_work_item_buffers { // Create the `PreprocessBindGroupBuilder`. let preprocess_bind_group_builder = PreprocessBindGroupBuilder { - view: *view, + view: view_entity, late_indexed_indirect_parameters_buffer, late_non_indexed_indirect_parameters_buffer, render_device: &render_device, @@ -1719,7 +1731,7 @@ pub fn prepare_preprocess_bind_groups( // Save the bind groups. commands - .entity(*view) + .entity(view_entity) .insert(PreprocessBindGroups(bind_groups)); } diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs index ff5f79f43dd6d..69e759da2498d 100644 --- a/crates/bevy_render/src/batching/gpu_preprocessing.rs +++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs @@ -4,7 +4,6 @@ use core::any::TypeId; use bevy_app::{App, Plugin}; use bevy_ecs::{ - entity::{hash_map::EntityHashMap, Entity}, query::{Has, With}, resource::Resource, schedule::IntoSystemConfigs as _, @@ -13,7 +12,7 @@ use bevy_ecs::{ }; use bevy_encase_derive::ShaderType; use bevy_math::UVec4; -use bevy_platform_support::collections::hash_map::Entry; +use bevy_platform_support::collections::{hash_map::Entry, HashMap, HashSet}; use bevy_utils::{default, TypeIdMap}; use bytemuck::{Pod, Zeroable}; use nonmax::NonMaxU32; @@ -30,7 +29,7 @@ use crate::{ }, render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec}, renderer::{RenderAdapter, RenderDevice, RenderQueue}, - view::{ExtractedView, NoIndirectDrawing}, + view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity}, Render, RenderApp, RenderSet, }; @@ -157,7 +156,7 @@ where /// corresponds to each instance. /// /// This is keyed off each view. Each view has a separate buffer. - pub work_item_buffers: EntityHashMap>, + pub work_item_buffers: HashMap>, /// The uniform data inputs for the current frame. /// @@ -409,8 +408,8 @@ impl Default for LatePreprocessWorkItemIndirectParameters { /// You may need to call this function if you're implementing your own custom /// render phases. See the `specialized_mesh_pipeline` example. pub fn get_or_create_work_item_buffer<'a, I>( - work_item_buffers: &'a mut EntityHashMap>, - view: Entity, + work_item_buffers: &'a mut HashMap>, + view: RetainedViewEntity, no_indirect_drawing: bool, gpu_occlusion_culling: bool, late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec< @@ -493,6 +492,28 @@ impl PreprocessWorkItemBuffers { } } } + + /// Clears out the GPU work item buffers in preparation for a new frame. + pub fn clear(&mut self) { + match *self { + PreprocessWorkItemBuffers::Direct(ref mut buffer) => { + buffer.clear(); + } + PreprocessWorkItemBuffers::Indirect { + indexed: ref mut indexed_buffer, + non_indexed: ref mut non_indexed_buffer, + ref mut gpu_occlusion_culling, + } => { + indexed_buffer.clear(); + non_indexed_buffer.clear(); + + if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling { + gpu_occlusion_culling.late_indexed.clear(); + gpu_occlusion_culling.late_non_indexed.clear(); + } + } + } + } } /// One invocation of the preprocessing shader: i.e. one mesh instance in a @@ -942,7 +963,7 @@ where pub fn new() -> Self { BatchedInstanceBuffers { data_buffer: UninitBufferVec::new(BufferUsages::STORAGE), - work_item_buffers: EntityHashMap::default(), + work_item_buffers: HashMap::default(), current_input_buffer: InstanceInputUniformBuffer::new(), previous_input_buffer: InstanceInputUniformBuffer::new(), late_indexed_indirect_parameters_buffer: RawBufferVec::new( @@ -968,7 +989,14 @@ where self.data_buffer.clear(); self.late_indexed_indirect_parameters_buffer.clear(); self.late_non_indexed_indirect_parameters_buffer.clear(); - self.work_item_buffers.clear(); + + // Clear each individual set of buffers, but don't depopulate the hash + // table. We want to avoid reallocating these vectors every frame. + for view_work_item_buffers in self.work_item_buffers.values_mut() { + for phase_work_item_buffers in view_work_item_buffers.values_mut() { + phase_work_item_buffers.clear(); + } + } } } @@ -1074,13 +1102,17 @@ pub fn delete_old_work_item_buffers( mut gpu_batched_instance_buffers: ResMut< BatchedInstanceBuffers, >, - extracted_views: Query>, + extracted_views: Query<&ExtractedView>, ) where GFBD: GetFullBatchData, { + let retained_view_entities: HashSet<_> = extracted_views + .iter() + .map(|extracted_view| extracted_view.retained_view_entity) + .collect(); gpu_batched_instance_buffers .work_item_buffers - .retain(|entity, _| extracted_views.contains(*entity)); + .retain(|retained_view_entity, _| retained_view_entities.contains(retained_view_entity)); } /// Batch the items in a sorted render phase, when GPU instance buffer building @@ -1091,7 +1123,6 @@ pub fn batch_and_prepare_sorted_render_phase( mut indirect_parameters_buffers: ResMut, mut sorted_render_phases: ResMut>, mut views: Query<( - Entity, &ExtractedView, Has, Has, @@ -1110,7 +1141,7 @@ pub fn batch_and_prepare_sorted_render_phase( .. } = gpu_array_buffer.into_inner(); - for (view, extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { + for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else { continue; }; @@ -1118,7 +1149,7 @@ pub fn batch_and_prepare_sorted_render_phase( // Create the work item buffer if necessary. let work_item_buffer = get_or_create_work_item_buffer::( work_item_buffers, - view, + extracted_view.retained_view_entity, no_indirect_drawing, gpu_occlusion_culling, late_indexed_indirect_parameters_buffer, @@ -1248,7 +1279,6 @@ pub fn batch_and_prepare_binned_render_phase( mut binned_render_phases: ResMut>, mut views: Query< ( - Entity, &ExtractedView, Has, Has, @@ -1270,7 +1300,7 @@ pub fn batch_and_prepare_binned_render_phase( .. } = gpu_array_buffer.into_inner(); - for (view, extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { + for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else { continue; }; @@ -1279,7 +1309,7 @@ pub fn batch_and_prepare_binned_render_phase( // used this frame. let work_item_buffer = get_or_create_work_item_buffer::( work_item_buffers, - view, + extracted_view.retained_view_entity, no_indirect_drawing, gpu_occlusion_culling, late_indexed_indirect_parameters_buffer, diff --git a/examples/shader/specialized_mesh_pipeline.rs b/examples/shader/specialized_mesh_pipeline.rs index 9cc414e26a392..060799160179b 100644 --- a/examples/shader/specialized_mesh_pipeline.rs +++ b/examples/shader/specialized_mesh_pipeline.rs @@ -280,7 +280,6 @@ fn queue_custom_mesh_pipeline( ), mut specialized_mesh_pipelines: ResMut>, views: Query<( - Entity, &RenderVisibleEntities, &ExtractedView, &Msaa, @@ -318,14 +317,8 @@ fn queue_custom_mesh_pipeline( // Render phases are per-view, so we need to iterate over all views so that // the entity appears in them. (In this example, we have only one view, but // it's good practice to loop over all views anyway.) - for ( - view_entity, - view_visible_entities, - view, - msaa, - no_indirect_drawing, - gpu_occlusion_culling, - ) in views.iter() + for (view_visible_entities, view, msaa, no_indirect_drawing, gpu_occlusion_culling) in + views.iter() { let Some(opaque_phase) = opaque_render_phases.get_mut(&view.retained_view_entity) else { continue; @@ -336,7 +329,7 @@ fn queue_custom_mesh_pipeline( // enabled. let work_item_buffer = gpu_preprocessing::get_or_create_work_item_buffer::( work_item_buffers, - view_entity, + view.retained_view_entity, no_indirect_drawing, gpu_occlusion_culling, late_indexed_indirect_parameters_buffer,