diff --git a/crates/bevy_pbr/src/decal/forward.rs b/crates/bevy_pbr/src/decal/forward.rs index 1229a688a9cf7..7732f1d3a4ab3 100644 --- a/crates/bevy_pbr/src/decal/forward.rs +++ b/crates/bevy_pbr/src/decal/forward.rs @@ -14,6 +14,7 @@ use bevy_render::{ AsBindGroup, CompareFunction, RenderPipelineDescriptor, Shader, SpecializedMeshPipelineError, }, + RenderDebugFlags, }; const FORWARD_DECAL_MESH_HANDLE: Handle = @@ -48,6 +49,7 @@ impl Plugin for ForwardDecalPlugin { app.add_plugins(MaterialPlugin::> { prepass_enabled: false, shadows_enabled: false, + debug_flags: RenderDebugFlags::default(), ..Default::default() }); } diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs index 88403900aeedb..10997ab43eea2 100644 --- a/crates/bevy_pbr/src/lib.rs +++ b/crates/bevy_pbr/src/lib.rs @@ -125,7 +125,7 @@ use bevy_render::{ sync_component::SyncComponentPlugin, texture::GpuImage, view::VisibilitySystems, - ExtractSchedule, Render, RenderApp, RenderSet, + ExtractSchedule, Render, RenderApp, RenderDebugFlags, RenderSet, }; use bevy_transform::TransformSystem; @@ -182,6 +182,8 @@ pub struct PbrPlugin { /// This requires compute shader support and so will be forcibly disabled if /// the platform doesn't support those. pub use_gpu_instance_buffer_builder: bool, + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, } impl Default for PbrPlugin { @@ -190,6 +192,7 @@ impl Default for PbrPlugin { prepass_enabled: true, add_default_deferred_lighting_plugin: true, use_gpu_instance_buffer_builder: true, + debug_flags: RenderDebugFlags::default(), } } } @@ -333,9 +336,11 @@ impl Plugin for PbrPlugin { .add_plugins(( MeshRenderPlugin { use_gpu_instance_buffer_builder: self.use_gpu_instance_buffer_builder, + debug_flags: self.debug_flags, }, MaterialPlugin:: { prepass_enabled: self.prepass_enabled, + debug_flags: self.debug_flags, ..Default::default() }, ScreenSpaceAmbientOcclusionPlugin, diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index e4ee53e0b7b73..7c6d93ec32c2e 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -252,6 +252,8 @@ pub struct MaterialPlugin { pub prepass_enabled: bool, /// Controls if shadows are enabled for the Material. pub shadows_enabled: bool, + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, pub _marker: PhantomData, } @@ -260,6 +262,7 @@ impl Default for MaterialPlugin { Self { prepass_enabled: true, shadows_enabled: true, + debug_flags: RenderDebugFlags::default(), _marker: Default::default(), } } @@ -374,7 +377,7 @@ where } if self.prepass_enabled { - app.add_plugins(PrepassPlugin::::default()); + app.add_plugins(PrepassPlugin::::new(self.debug_flags)); } } diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 1da0eb4f1c336..4885238e10d04 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -19,7 +19,7 @@ use bevy_render::{ renderer::RenderAdapter, sync_world::RenderEntity, view::{RenderVisibilityRanges, VISIBILITY_RANGES_STORAGE_BUFFER_COUNT}, - ExtractSchedule, Render, RenderApp, RenderSet, + ExtractSchedule, Render, RenderApp, RenderDebugFlags, RenderSet, }; pub use prepass_bindings::*; @@ -146,11 +146,19 @@ where /// Sets up the prepasses for a [`Material`]. /// /// This depends on the [`PrepassPipelinePlugin`]. -pub struct PrepassPlugin(PhantomData); +pub struct PrepassPlugin { + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, + pub phantom: PhantomData, +} -impl Default for PrepassPlugin { - fn default() -> Self { - Self(Default::default()) +impl PrepassPlugin { + /// Creates a new [`PrepassPlugin`] with the given debug flags. + pub fn new(debug_flags: RenderDebugFlags) -> Self { + PrepassPlugin { + debug_flags, + phantom: PhantomData, + } } } @@ -176,8 +184,10 @@ where ), ) .add_plugins(( - BinnedRenderPhasePlugin::::default(), - BinnedRenderPhasePlugin::::default(), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new( + self.debug_flags, + ), )); } diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs index 26559f9223dd5..495b6b4112f00 100644 --- a/crates/bevy_pbr/src/render/gpu_preprocess.rs +++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs @@ -29,12 +29,14 @@ use bevy_ecs::{ system::{lifetimeless::Read, Commands, Query, Res, ResMut}, world::{FromWorld, World}, }; +use bevy_render::batching::gpu_preprocessing::UntypedPhaseIndirectParametersBuffers; use bevy_render::{ batching::gpu_preprocessing::{ BatchedInstanceBuffers, GpuOcclusionCullingWorkItemBuffers, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers, IndirectParametersIndexed, IndirectParametersMetadata, IndirectParametersNonIndexed, LatePreprocessWorkItemIndirectParameters, PreprocessWorkItem, PreprocessWorkItemBuffers, + UntypedPhaseBatchedInstanceBuffers, }, experimental::occlusion_culling::OcclusionCulling, render_graph::{Node, NodeRunError, RenderGraphApp, RenderGraphContext}, @@ -393,8 +395,22 @@ pub enum PhasePreprocessBindGroups { /// The bind groups for the compute shaders that reset indirect draw counts and /// build indirect parameters. -#[derive(Resource)] -pub struct BuildIndirectParametersBindGroups { +/// +/// There's one set of bind group for each phase. Phases are keyed off their +/// [`core::any::TypeId`]. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct BuildIndirectParametersBindGroups(pub TypeIdMap); + +impl BuildIndirectParametersBindGroups { + /// Creates a new, empty [`BuildIndirectParametersBindGroups`] table. + pub fn new() -> BuildIndirectParametersBindGroups { + Self::default() + } +} + +/// The per-phase set of bind groups for the compute shaders that reset indirect +/// draw counts and build indirect parameters. +pub struct PhaseBuildIndirectParametersBindGroups { /// The bind group for the `reset_indirect_batch_sets.wgsl` shader, for /// indexed meshes. reset_indexed_indirect_batch_sets: Option, @@ -470,9 +486,10 @@ impl Plugin for GpuMeshPreprocessPlugin { ( prepare_preprocess_pipelines.in_set(RenderSet::Prepare), prepare_preprocess_bind_groups - .run_if( - resource_exists::>, - ) + .run_if(resource_exists::>) .in_set(RenderSet::PrepareBindGroups), write_mesh_culling_data_buffer.in_set(RenderSet::PrepareResourcesFlush), ), @@ -511,7 +528,7 @@ impl Plugin for GpuMeshPreprocessPlugin { .add_render_graph_edge( Core3d, NodePbr::MainBuildIndirectParameters, - Node3d::DeferredPrepass + Node3d::DeferredPrepass, ); } } @@ -538,10 +555,8 @@ impl Node for EarlyGpuPreprocessNode { world: &'w World, ) -> Result<(), NodeRunError> { // Grab the [`BatchedInstanceBuffers`]. - let BatchedInstanceBuffers { - work_item_buffers: ref index_buffers, - .. - } = world.resource::>(); + let batched_instance_buffers = + world.resource::>(); let pipeline_cache = world.resource::(); let preprocess_pipelines = world.resource::(); @@ -583,13 +598,6 @@ impl Node for EarlyGpuPreprocessNode { continue; }; - // Grab the work item buffers for this view. - let Some(phase_work_item_buffers) = index_buffers.get(&view.retained_view_entity) - else { - warn!("The preprocessing index buffer wasn't present"); - continue; - }; - // Select the right pipeline, depending on whether GPU culling is in // use. let maybe_pipeline_id = if no_indirect_drawing { @@ -620,7 +628,17 @@ impl Node for EarlyGpuPreprocessNode { compute_pass.set_pipeline(preprocess_pipeline); // Loop over each render phase. - for (phase_type_id, work_item_buffers) in phase_work_item_buffers { + for (phase_type_id, batched_phase_instance_buffers) in + &batched_instance_buffers.phase_instance_buffers + { + // Grab the work item buffers for this view. + let Some(work_item_buffers) = batched_phase_instance_buffers + .work_item_buffers + .get(&view.retained_view_entity) + else { + continue; + }; + // Fetch the bind group for the render phase. let Some(phase_bind_groups) = bind_groups.get(phase_type_id) else { continue; @@ -775,12 +793,8 @@ impl Node for LateGpuPreprocessNode { world: &'w World, ) -> Result<(), NodeRunError> { // Grab the [`BatchedInstanceBuffers`]. - let BatchedInstanceBuffers { - ref work_item_buffers, - ref late_indexed_indirect_parameters_buffer, - ref late_non_indexed_indirect_parameters_buffer, - .. - } = world.resource::>(); + let batched_instance_buffers = + world.resource::>(); let pipeline_cache = world.resource::(); let preprocess_pipelines = world.resource::(); @@ -795,13 +809,6 @@ impl Node for LateGpuPreprocessNode { // Run the compute passes. for (view, bind_groups, view_uniform_offset) in self.view_query.iter_manual(world) { - // Grab the work item buffers for this view. - let Some(phase_work_item_buffers) = work_item_buffers.get(&view.retained_view_entity) - else { - warn!("The preprocessing index buffer wasn't present"); - continue; - }; - let maybe_pipeline_id = preprocess_pipelines .late_gpu_occlusion_culling_preprocess .pipeline_id; @@ -821,7 +828,25 @@ impl Node for LateGpuPreprocessNode { compute_pass.set_pipeline(preprocess_pipeline); - for (phase_type_id, work_item_buffers) in phase_work_item_buffers { + // Loop over each phase. Because we built the phases in parallel, + // each phase has a separate set of instance buffers. + for (phase_type_id, batched_phase_instance_buffers) in + &batched_instance_buffers.phase_instance_buffers + { + let UntypedPhaseBatchedInstanceBuffers { + ref work_item_buffers, + ref late_indexed_indirect_parameters_buffer, + ref late_non_indexed_indirect_parameters_buffer, + .. + } = *batched_phase_instance_buffers; + + // Grab the work item buffers for this view. + let Some(phase_work_item_buffers) = + work_item_buffers.get(&view.retained_view_entity) + else { + continue; + }; + let ( PreprocessWorkItemBuffers::Indirect { gpu_occlusion_culling: @@ -840,7 +865,7 @@ impl Node for LateGpuPreprocessNode { Some(late_indexed_indirect_parameters_buffer), Some(late_non_indexed_indirect_parameters_buffer), ) = ( - work_item_buffers, + phase_work_item_buffers, bind_groups.get(phase_type_id), late_indexed_indirect_parameters_buffer.buffer(), late_non_indexed_indirect_parameters_buffer.buffer(), @@ -1029,57 +1054,69 @@ fn run_build_indirect_parameters_node( return Ok(()); }; - // Build indexed indirect parameters. - if let ( - Some(reset_indexed_indirect_batch_sets_bind_group), - Some(build_indirect_indexed_params_bind_group), - ) = ( - &build_indirect_params_bind_groups.reset_indexed_indirect_batch_sets, - &build_indirect_params_bind_groups.build_indexed_indirect, - ) { - compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline); - compute_pass.set_bind_group(0, reset_indexed_indirect_batch_sets_bind_group, &[]); - let workgroup_count = indirect_parameters_buffers - .batch_set_count(true) - .div_ceil(WORKGROUP_SIZE); - if workgroup_count > 0 { - compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); - } + // Loop over each phase. As each has as separate set of buffers, we need to + // build indirect parameters individually for each phase. + for (phase_type_id, phase_build_indirect_params_bind_groups) in + build_indirect_params_bind_groups.iter() + { + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffers.get(phase_type_id) + else { + continue; + }; - compute_pass.set_pipeline(build_indexed_indirect_params_pipeline); - compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]); - let workgroup_count = indirect_parameters_buffers - .indexed_batch_count() - .div_ceil(WORKGROUP_SIZE); - if workgroup_count > 0 { - compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); - } - } + // Build indexed indirect parameters. + if let ( + Some(reset_indexed_indirect_batch_sets_bind_group), + Some(build_indirect_indexed_params_bind_group), + ) = ( + &phase_build_indirect_params_bind_groups.reset_indexed_indirect_batch_sets, + &phase_build_indirect_params_bind_groups.build_indexed_indirect, + ) { + compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline); + compute_pass.set_bind_group(0, reset_indexed_indirect_batch_sets_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .batch_set_count(true) + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } - // Build non-indexed indirect parameters. - if let ( - Some(reset_non_indexed_indirect_batch_sets_bind_group), - Some(build_indirect_non_indexed_params_bind_group), - ) = ( - &build_indirect_params_bind_groups.reset_non_indexed_indirect_batch_sets, - &build_indirect_params_bind_groups.build_non_indexed_indirect, - ) { - compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline); - compute_pass.set_bind_group(0, reset_non_indexed_indirect_batch_sets_bind_group, &[]); - let workgroup_count = indirect_parameters_buffers - .batch_set_count(false) - .div_ceil(WORKGROUP_SIZE); - if workgroup_count > 0 { - compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + compute_pass.set_pipeline(build_indexed_indirect_params_pipeline); + compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .indexed_batch_count() + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } } - compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline); - compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]); - let workgroup_count = indirect_parameters_buffers - .non_indexed_batch_count() - .div_ceil(WORKGROUP_SIZE); - if workgroup_count > 0 { - compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + // Build non-indexed indirect parameters. + if let ( + Some(reset_non_indexed_indirect_batch_sets_bind_group), + Some(build_indirect_non_indexed_params_bind_group), + ) = ( + &phase_build_indirect_params_bind_groups.reset_non_indexed_indirect_batch_sets, + &phase_build_indirect_params_bind_groups.build_non_indexed_indirect, + ) { + compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline); + compute_pass.set_bind_group(0, reset_non_indexed_indirect_batch_sets_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .batch_set_count(false) + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } + + compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline); + compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]); + let workgroup_count = phase_indirect_parameters_buffers + .non_indexed_batch_count() + .div_ceil(WORKGROUP_SIZE); + if workgroup_count > 0 { + compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); + } } } @@ -1637,18 +1674,14 @@ pub fn prepare_preprocess_bind_groups( ) { // Grab the `BatchedInstanceBuffers`. let BatchedInstanceBuffers { - data_buffer: ref data_buffer_vec, - ref work_item_buffers, current_input_buffer: ref current_input_buffer_vec, previous_input_buffer: ref previous_input_buffer_vec, - ref late_indexed_indirect_parameters_buffer, - ref late_non_indexed_indirect_parameters_buffer, + ref phase_instance_buffers, } = batched_instance_buffers.into_inner(); - let (Some(current_input_buffer), Some(previous_input_buffer), Some(data_buffer)) = ( + let (Some(current_input_buffer), Some(previous_input_buffer)) = ( current_input_buffer_vec.buffer().buffer(), previous_input_buffer_vec.buffer().buffer(), - data_buffer_vec.buffer(), ) else { return; }; @@ -1659,22 +1692,39 @@ pub fn prepare_preprocess_bind_groups( // Loop over each view. for (view_entity, view) in &views { - let Some(phase_work_item_buffers) = work_item_buffers.get(&view.retained_view_entity) - else { - continue; - }; - let mut bind_groups = TypeIdMap::default(); // Loop over each phase. - for (&phase_id, work_item_buffers) in phase_work_item_buffers { + for (phase_type_id, phase_instance_buffers) in phase_instance_buffers { + let UntypedPhaseBatchedInstanceBuffers { + data_buffer: ref data_buffer_vec, + ref work_item_buffers, + ref late_indexed_indirect_parameters_buffer, + ref late_non_indexed_indirect_parameters_buffer, + } = *phase_instance_buffers; + + let Some(data_buffer) = data_buffer_vec.buffer() else { + continue; + }; + + // Grab the indirect parameters buffers for this phase. + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffers.get(phase_type_id) + else { + continue; + }; + + let Some(work_item_buffers) = work_item_buffers.get(&view.retained_view_entity) else { + continue; + }; + // Create the `PreprocessBindGroupBuilder`. let preprocess_bind_group_builder = PreprocessBindGroupBuilder { view: view_entity, late_indexed_indirect_parameters_buffer, late_non_indexed_indirect_parameters_buffer, render_device: &render_device, - indirect_parameters_buffers: &indirect_parameters_buffers, + phase_indirect_parameters_buffers, mesh_culling_data_buffer: &mesh_culling_data_buffer, view_uniforms: &view_uniforms, previous_view_uniforms: &previous_view_uniforms, @@ -1725,7 +1775,7 @@ pub fn prepare_preprocess_bind_groups( // Write that bind group in. if let Some(bind_group) = bind_group { any_indirect = any_indirect || was_indirect; - bind_groups.insert(phase_id, bind_group); + bind_groups.insert(*phase_type_id, bind_group); } } @@ -1764,7 +1814,7 @@ struct PreprocessBindGroupBuilder<'a> { /// The device. render_device: &'a RenderDevice, /// The buffers that store indirect draw parameters. - indirect_parameters_buffers: &'a IndirectParametersBuffers, + phase_indirect_parameters_buffers: &'a UntypedPhaseIndirectParametersBuffers, /// The GPU buffer that stores the information needed to cull each mesh. mesh_culling_data_buffer: &'a MeshCullingDataBuffer, /// The GPU buffer that stores information about the view. @@ -1884,7 +1934,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> { let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; match ( - self.indirect_parameters_buffers.indexed_metadata_buffer(), + self.phase_indirect_parameters_buffers + .indexed_metadata_buffer(), indexed_work_item_buffer.buffer(), late_indexed_work_item_buffer.buffer(), self.late_indexed_indirect_parameters_buffer.buffer(), @@ -1975,7 +2026,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> { let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; match ( - self.indirect_parameters_buffers + self.phase_indirect_parameters_buffers .non_indexed_metadata_buffer(), non_indexed_work_item_buffer.buffer(), late_non_indexed_work_item_buffer.buffer(), @@ -2066,7 +2117,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> { let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; match ( - self.indirect_parameters_buffers.indexed_metadata_buffer(), + self.phase_indirect_parameters_buffers + .indexed_metadata_buffer(), late_indexed_work_item_buffer.buffer(), self.late_indexed_indirect_parameters_buffer.buffer(), ) { @@ -2146,7 +2198,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> { let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?; match ( - self.indirect_parameters_buffers + self.phase_indirect_parameters_buffers .non_indexed_metadata_buffer(), late_non_indexed_work_item_buffer.buffer(), self.late_non_indexed_indirect_parameters_buffer.buffer(), @@ -2240,7 +2292,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> { let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; match ( - self.indirect_parameters_buffers.indexed_metadata_buffer(), + self.phase_indirect_parameters_buffers + .indexed_metadata_buffer(), indexed_work_item_buffer.buffer(), ) { (Some(indexed_metadata_buffer), Some(indexed_work_item_gpu_buffer)) => { @@ -2293,7 +2346,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> { let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; match ( - self.indirect_parameters_buffers + self.phase_indirect_parameters_buffers .non_indexed_metadata_buffer(), non_indexed_work_item_buffer.buffer(), ) { @@ -2346,121 +2399,134 @@ fn create_build_indirect_parameters_bind_groups( render_device: &RenderDevice, pipelines: &PreprocessPipelines, current_input_buffer: &Buffer, - indirect_parameters_buffer: &IndirectParametersBuffers, + indirect_parameters_buffers: &IndirectParametersBuffers, ) { - commands.insert_resource(BuildIndirectParametersBindGroups { - reset_indexed_indirect_batch_sets: match ( - indirect_parameters_buffer.indexed_batch_sets_buffer(), - ) { - (Some(indexed_batch_sets_buffer),) => Some( - render_device.create_bind_group( - "reset_indexed_indirect_batch_sets_bind_group", - // The early bind group is good for the main phase and late - // phase too. They bind the same buffers. - &pipelines - .early_phase - .reset_indirect_batch_sets - .bind_group_layout, - &BindGroupEntries::sequential((indexed_batch_sets_buffer.as_entire_binding(),)), - ), - ), - _ => None, - }, + let mut build_indirect_parameters_bind_groups = BuildIndirectParametersBindGroups::new(); + + for (phase_type_id, phase_indirect_parameters_buffer) in indirect_parameters_buffers.iter() { + build_indirect_parameters_bind_groups.insert( + *phase_type_id, + PhaseBuildIndirectParametersBindGroups { + reset_indexed_indirect_batch_sets: match ( + phase_indirect_parameters_buffer.indexed_batch_sets_buffer(), + ) { + (Some(indexed_batch_sets_buffer),) => Some( + render_device.create_bind_group( + "reset_indexed_indirect_batch_sets_bind_group", + // The early bind group is good for the main phase and late + // phase too. They bind the same buffers. + &pipelines + .early_phase + .reset_indirect_batch_sets + .bind_group_layout, + &BindGroupEntries::sequential(( + indexed_batch_sets_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, - reset_non_indexed_indirect_batch_sets: match ( - indirect_parameters_buffer.non_indexed_batch_sets_buffer(), - ) { - (Some(non_indexed_batch_sets_buffer),) => Some( - render_device.create_bind_group( - "reset_non_indexed_indirect_batch_sets_bind_group", - // The early bind group is good for the main phase and late - // phase too. They bind the same buffers. - &pipelines - .early_phase - .reset_indirect_batch_sets - .bind_group_layout, - &BindGroupEntries::sequential(( - non_indexed_batch_sets_buffer.as_entire_binding(), - )), - ), - ), - _ => None, - }, + reset_non_indexed_indirect_batch_sets: match ( + phase_indirect_parameters_buffer.non_indexed_batch_sets_buffer(), + ) { + (Some(non_indexed_batch_sets_buffer),) => Some( + render_device.create_bind_group( + "reset_non_indexed_indirect_batch_sets_bind_group", + // The early bind group is good for the main phase and late + // phase too. They bind the same buffers. + &pipelines + .early_phase + .reset_indirect_batch_sets + .bind_group_layout, + &BindGroupEntries::sequential(( + non_indexed_batch_sets_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, - build_indexed_indirect: match ( - indirect_parameters_buffer.indexed_metadata_buffer(), - indirect_parameters_buffer.indexed_data_buffer(), - indirect_parameters_buffer.indexed_batch_sets_buffer(), - ) { - ( - Some(indexed_indirect_parameters_metadata_buffer), - Some(indexed_indirect_parameters_data_buffer), - Some(indexed_batch_sets_buffer), - ) => Some( - render_device.create_bind_group( - "build_indexed_indirect_parameters_bind_group", - // The frustum culling bind group is good for occlusion culling - // too. They bind the same buffers. - &pipelines - .gpu_frustum_culling_build_indexed_indirect_params - .bind_group_layout, - &BindGroupEntries::sequential(( - current_input_buffer.as_entire_binding(), - // Don't use `as_entire_binding` here; the shader reads - // the length and `RawBufferVec` overallocates. - BufferBinding { - buffer: indexed_indirect_parameters_metadata_buffer, - offset: 0, - size: NonZeroU64::new( - indirect_parameters_buffer.indexed_batch_count() as u64 - * size_of::() as u64, - ), - }, - indexed_batch_sets_buffer.as_entire_binding(), - indexed_indirect_parameters_data_buffer.as_entire_binding(), - )), - ), - ), - _ => None, - }, + build_indexed_indirect: match ( + phase_indirect_parameters_buffer.indexed_metadata_buffer(), + phase_indirect_parameters_buffer.indexed_data_buffer(), + phase_indirect_parameters_buffer.indexed_batch_sets_buffer(), + ) { + ( + Some(indexed_indirect_parameters_metadata_buffer), + Some(indexed_indirect_parameters_data_buffer), + Some(indexed_batch_sets_buffer), + ) => Some( + render_device.create_bind_group( + "build_indexed_indirect_parameters_bind_group", + // The frustum culling bind group is good for occlusion culling + // too. They bind the same buffers. + &pipelines + .gpu_frustum_culling_build_indexed_indirect_params + .bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + // Don't use `as_entire_binding` here; the shader reads + // the length and `RawBufferVec` overallocates. + BufferBinding { + buffer: indexed_indirect_parameters_metadata_buffer, + offset: 0, + size: NonZeroU64::new( + phase_indirect_parameters_buffer.indexed_batch_count() + as u64 + * size_of::() as u64, + ), + }, + indexed_batch_sets_buffer.as_entire_binding(), + indexed_indirect_parameters_data_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, - build_non_indexed_indirect: match ( - indirect_parameters_buffer.non_indexed_metadata_buffer(), - indirect_parameters_buffer.non_indexed_data_buffer(), - indirect_parameters_buffer.non_indexed_batch_sets_buffer(), - ) { - ( - Some(non_indexed_indirect_parameters_metadata_buffer), - Some(non_indexed_indirect_parameters_data_buffer), - Some(non_indexed_batch_sets_buffer), - ) => Some( - render_device.create_bind_group( - "build_non_indexed_indirect_parameters_bind_group", - // The frustum culling bind group is good for occlusion culling - // too. They bind the same buffers. - &pipelines - .gpu_frustum_culling_build_non_indexed_indirect_params - .bind_group_layout, - &BindGroupEntries::sequential(( - current_input_buffer.as_entire_binding(), - // Don't use `as_entire_binding` here; the shader reads - // the length and `RawBufferVec` overallocates. - BufferBinding { - buffer: non_indexed_indirect_parameters_metadata_buffer, - offset: 0, - size: NonZeroU64::new( - indirect_parameters_buffer.non_indexed_batch_count() as u64 - * size_of::() as u64, - ), - }, - non_indexed_batch_sets_buffer.as_entire_binding(), - non_indexed_indirect_parameters_data_buffer.as_entire_binding(), - )), - ), - ), - _ => None, - }, - }); + build_non_indexed_indirect: match ( + phase_indirect_parameters_buffer.non_indexed_metadata_buffer(), + phase_indirect_parameters_buffer.non_indexed_data_buffer(), + phase_indirect_parameters_buffer.non_indexed_batch_sets_buffer(), + ) { + ( + Some(non_indexed_indirect_parameters_metadata_buffer), + Some(non_indexed_indirect_parameters_data_buffer), + Some(non_indexed_batch_sets_buffer), + ) => Some( + render_device.create_bind_group( + "build_non_indexed_indirect_parameters_bind_group", + // The frustum culling bind group is good for occlusion culling + // too. They bind the same buffers. + &pipelines + .gpu_frustum_culling_build_non_indexed_indirect_params + .bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + // Don't use `as_entire_binding` here; the shader reads + // the length and `RawBufferVec` overallocates. + BufferBinding { + buffer: non_indexed_indirect_parameters_metadata_buffer, + offset: 0, + size: NonZeroU64::new( + phase_indirect_parameters_buffer.non_indexed_batch_count() + as u64 + * size_of::() as u64, + ), + }, + non_indexed_batch_sets_buffer.as_entire_binding(), + non_indexed_indirect_parameters_data_buffer.as_entire_binding(), + )), + ), + ), + _ => None, + }, + }, + ); + } + + commands.insert_resource(build_indirect_parameters_bind_groups); } /// Writes the information needed to do GPU mesh culling to the GPU. diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index d89ee2a785a9a..6690724fb0e6d 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -21,7 +21,7 @@ use bevy_render::{ gpu_preprocessing::{ self, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers, IndirectParametersIndexed, IndirectParametersMetadata, IndirectParametersNonIndexed, - InstanceInputUniformBuffer, + InstanceInputUniformBuffer, UntypedPhaseIndirectParametersBuffers, }, no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching, }, @@ -43,7 +43,8 @@ use bevy_render::{ Extract, }; use bevy_transform::components::GlobalTransform; -use bevy_utils::{default, Parallel}; +use bevy_utils::{default, Parallel, TypeIdMap}; +use core::any::TypeId; use core::mem::size_of; use material_bind_groups::MaterialBindingId; use render::skin::{self, SkinIndex}; @@ -79,13 +80,24 @@ use smallvec::{smallvec, SmallVec}; use static_assertions::const_assert_eq; /// Provides support for rendering 3D meshes. -#[derive(Default)] pub struct MeshRenderPlugin { /// Whether we're building [`MeshUniform`]s on GPU. /// /// This requires compute shader support and so will be forcibly disabled if /// the platform doesn't support those. pub use_gpu_instance_buffer_builder: bool, + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, +} + +impl MeshRenderPlugin { + /// Creates a new [`MeshRenderPlugin`] with the given debug flags. + pub fn new(debug_flags: RenderDebugFlags) -> MeshRenderPlugin { + MeshRenderPlugin { + use_gpu_instance_buffer_builder: false, + debug_flags, + } + } } pub const FORWARD_IO_HANDLE: Handle = weak_handle!("38111de1-6e35-4dbb-877b-7b6f9334baf6"); @@ -166,18 +178,17 @@ impl Plugin for MeshRenderPlugin { (no_automatic_skin_batching, no_automatic_morph_batching), ) .add_plugins(( - BinnedRenderPhasePlugin::::default(), - BinnedRenderPhasePlugin::::default(), - BinnedRenderPhasePlugin::::default(), - BinnedRenderPhasePlugin::::default(), - BinnedRenderPhasePlugin::::default(), - SortedRenderPhasePlugin::::default(), - SortedRenderPhasePlugin::::default(), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + BinnedRenderPhasePlugin::::new(self.debug_flags), + SortedRenderPhasePlugin::::new(self.debug_flags), + SortedRenderPhasePlugin::::new(self.debug_flags), )); if let Some(render_app) = app.get_sub_app_mut(RenderApp) { render_app - .init_resource::() .init_resource::() .init_resource::() .init_resource::() @@ -202,7 +213,7 @@ impl Plugin for MeshRenderPlugin { set_mesh_motion_vector_flags.in_set(RenderSet::PrepareMeshes), prepare_skins.in_set(RenderSet::PrepareResources), prepare_morphs.in_set(RenderSet::PrepareResources), - prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups), + prepare_mesh_bind_groups.in_set(RenderSet::PrepareBindGroups), prepare_mesh_view_bind_groups .in_set(RenderSet::PrepareBindGroups) .after(prepare_oit_buffers), @@ -238,12 +249,14 @@ impl Plugin for MeshRenderPlugin { if use_gpu_instance_buffer_builder { render_app - .init_resource::>() + .init_resource::>() .init_resource::() .add_systems( ExtractSchedule, - extract_meshes_for_gpu_building - .in_set(ExtractMeshesSet), + extract_meshes_for_gpu_building.in_set(ExtractMeshesSet), ) .add_systems( Render, @@ -1956,7 +1969,7 @@ impl GetFullBatchData for MeshPipeline { indexed: bool, base_output_index: u32, batch_set_index: Option, - indirect_parameters_buffer: &mut IndirectParametersBuffers, + phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers, indirect_parameters_offset: u32, ) { let indirect_parameters = IndirectParametersMetadata { @@ -1971,9 +1984,10 @@ impl GetFullBatchData for MeshPipeline { }; if indexed { - indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters); + phase_indirect_parameters_buffers + .set_indexed(indirect_parameters_offset, indirect_parameters); } else { - indirect_parameters_buffer + phase_indirect_parameters_buffers .set_non_indexed(indirect_parameters_offset, indirect_parameters); } } @@ -2567,9 +2581,12 @@ impl SpecializedMeshPipeline for MeshPipeline { } } -/// Bind groups for meshes currently loaded. -#[derive(Resource, Default)] -pub struct MeshBindGroups { +/// The bind groups for meshes currently loaded. +/// +/// If GPU mesh preprocessing isn't in use, these are global to the scene. If +/// GPU mesh preprocessing is in use, these are specific to a single phase. +#[derive(Default)] +pub struct MeshPhaseBindGroups { model_only: Option, skinned: Option, morph_targets: HashMap, MeshBindGroupPair>, @@ -2581,7 +2598,18 @@ pub struct MeshBindGroupPair { no_motion_vectors: BindGroup, } -impl MeshBindGroups { +/// All bind groups for meshes currently loaded. +#[derive(Resource)] +pub enum MeshBindGroups { + /// The bind groups for the meshes for the entire scene, if GPU mesh + /// preprocessing isn't in use. + CpuPreprocessing(MeshPhaseBindGroups), + /// A mapping from the type ID of a phase (e.g. [`Opaque3d`]) to the mesh + /// bind groups for that phase. + GpuPreprocessing(TypeIdMap), +} + +impl MeshPhaseBindGroups { pub fn reset(&mut self) { self.model_only = None; self.skinned = None; @@ -2623,9 +2651,10 @@ impl MeshBindGroupPair { } } -pub fn prepare_mesh_bind_group( +/// Creates the per-mesh bind groups for each type of mesh and each phase. +pub fn prepare_mesh_bind_groups( + mut commands: Commands, meshes: Res>, - mut groups: ResMut, mesh_pipeline: Res, render_device: Res, cpu_batched_instance_buffer: Option< @@ -2638,24 +2667,80 @@ pub fn prepare_mesh_bind_group( weights_uniform: Res, mut render_lightmaps: ResMut, ) { - groups.reset(); + // CPU mesh preprocessing path. + if let Some(cpu_batched_instance_buffer) = cpu_batched_instance_buffer { + if let Some(instance_data_binding) = cpu_batched_instance_buffer + .into_inner() + .instance_data_binding() + { + // In this path, we only have a single set of bind groups for all phases. + let cpu_preprocessing_mesh_bind_groups = prepare_mesh_bind_groups_for_phase( + instance_data_binding, + &meshes, + &mesh_pipeline, + &render_device, + &skins_uniform, + &weights_uniform, + &mut render_lightmaps, + ); + + commands.insert_resource(MeshBindGroups::CpuPreprocessing( + cpu_preprocessing_mesh_bind_groups, + )); + return; + } + } + + // GPU mesh preprocessing path. + if let Some(gpu_batched_instance_buffers) = gpu_batched_instance_buffers { + let mut gpu_preprocessing_mesh_bind_groups = TypeIdMap::default(); + + // Loop over each phase. + for (phase_type_id, batched_phase_instance_buffers) in + &gpu_batched_instance_buffers.phase_instance_buffers + { + let Some(instance_data_binding) = + batched_phase_instance_buffers.instance_data_binding() + else { + continue; + }; + + let mesh_phase_bind_groups = prepare_mesh_bind_groups_for_phase( + instance_data_binding, + &meshes, + &mesh_pipeline, + &render_device, + &skins_uniform, + &weights_uniform, + &mut render_lightmaps, + ); + + gpu_preprocessing_mesh_bind_groups.insert(*phase_type_id, mesh_phase_bind_groups); + } + commands.insert_resource(MeshBindGroups::GpuPreprocessing( + gpu_preprocessing_mesh_bind_groups, + )); + } +} + +/// Creates the per-mesh bind groups for each type of mesh, for a single phase. +fn prepare_mesh_bind_groups_for_phase( + model: BindingResource, + meshes: &RenderAssets, + mesh_pipeline: &MeshPipeline, + render_device: &RenderDevice, + skins_uniform: &SkinUniforms, + weights_uniform: &MorphUniforms, + render_lightmaps: &mut RenderLightmaps, +) -> MeshPhaseBindGroups { let layouts = &mesh_pipeline.mesh_layouts; - let model = if let Some(cpu_batched_instance_buffer) = cpu_batched_instance_buffer { - cpu_batched_instance_buffer - .into_inner() - .instance_data_binding() - } else if let Some(gpu_batched_instance_buffers) = gpu_batched_instance_buffers { - gpu_batched_instance_buffers - .into_inner() - .instance_data_binding() - } else { - return; + // TODO: Reuse allocations. + let mut groups = MeshPhaseBindGroups { + model_only: Some(layouts.model_only(render_device, &model)), + ..default() }; - let Some(model) = model else { return }; - - groups.model_only = Some(layouts.model_only(&render_device, &model)); // Create the skinned mesh bind group with the current and previous buffers // (the latter being for motion vector computation). If there's no previous @@ -2664,8 +2749,8 @@ pub fn prepare_mesh_bind_group( if let Some(skin) = skin { let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin); groups.skinned = Some(MeshBindGroupPair { - motion_vectors: layouts.skinned_motion(&render_device, &model, skin, prev_skin), - no_motion_vectors: layouts.skinned(&render_device, &model, skin), + motion_vectors: layouts.skinned_motion(render_device, &model, skin, prev_skin), + no_motion_vectors: layouts.skinned(render_device, &model, skin), }); } @@ -2680,7 +2765,7 @@ pub fn prepare_mesh_bind_group( let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin); MeshBindGroupPair { motion_vectors: layouts.morphed_skinned_motion( - &render_device, + render_device, &model, skin, weights, @@ -2689,7 +2774,7 @@ pub fn prepare_mesh_bind_group( prev_weights, ), no_motion_vectors: layouts.morphed_skinned( - &render_device, + render_device, &model, skin, weights, @@ -2699,18 +2784,13 @@ pub fn prepare_mesh_bind_group( } None => MeshBindGroupPair { motion_vectors: layouts.morphed_motion( - &render_device, + render_device, &model, weights, targets, prev_weights, ), - no_motion_vectors: layouts.morphed( - &render_device, - &model, - weights, - targets, - ), + no_motion_vectors: layouts.morphed(render_device, &model, weights, targets), }, }; groups.morph_targets.insert(id, bind_group_pair); @@ -2723,9 +2803,11 @@ pub fn prepare_mesh_bind_group( for (lightmap_slab_id, lightmap_slab) in render_lightmaps.slabs.iter_mut().enumerate() { groups.lightmaps.insert( LightmapSlabIndex(NonMaxU32::new(lightmap_slab_id as u32).unwrap()), - layouts.lightmapped(&render_device, &model, lightmap_slab, bindless_supported), + layouts.lightmapped(render_device, &model, lightmap_slab, bindless_supported), ); } + + groups } pub struct SetMeshViewBindGroup; @@ -2829,7 +2911,20 @@ impl RenderCommand

for SetMeshBindGroup { .get(entity) .map(|render_lightmap| render_lightmap.slab_index); - let Some(bind_group) = bind_groups.get( + let Some(mesh_phase_bind_groups) = (match *bind_groups { + MeshBindGroups::CpuPreprocessing(ref mesh_phase_bind_groups) => { + Some(mesh_phase_bind_groups) + } + MeshBindGroups::GpuPreprocessing(ref mesh_phase_bind_groups) => { + mesh_phase_bind_groups.get(&TypeId::of::

()) + } + }) else { + // This is harmless if e.g. we're rendering the `Shadow` phase and + // there weren't any shadows. + return RenderCommandResult::Success; + }; + + let Some(bind_group) = mesh_phase_bind_groups.get( mesh_asset_id, lightmap_slab_index, is_skinned, @@ -2981,9 +3076,18 @@ impl RenderCommand

for DrawMesh { // Look up the indirect parameters buffer, as well as // the buffer we're going to use for // `multi_draw_indexed_indirect_count` (if available). + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffer.get(&TypeId::of::

()) + else { + warn!( + "Not rendering mesh because indexed indirect parameters buffer \ + wasn't present for this phase", + ); + return RenderCommandResult::Skip; + }; let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = ( - indirect_parameters_buffer.indexed_data_buffer(), - indirect_parameters_buffer.indexed_batch_sets_buffer(), + phase_indirect_parameters_buffers.indexed_data_buffer(), + phase_indirect_parameters_buffers.indexed_batch_sets_buffer(), ) else { warn!( "Not rendering mesh because indexed indirect parameters buffer \ @@ -3038,9 +3142,18 @@ impl RenderCommand

for DrawMesh { // Look up the indirect parameters buffer, as well as the // buffer we're going to use for // `multi_draw_indirect_count` (if available). + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffer.get(&TypeId::of::

()) + else { + warn!( + "Not rendering mesh because indexed indirect parameters buffer \ + wasn't present for this phase", + ); + return RenderCommandResult::Skip; + }; let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = ( - indirect_parameters_buffer.non_indexed_data_buffer(), - indirect_parameters_buffer.non_indexed_batch_sets_buffer(), + phase_indirect_parameters_buffers.non_indexed_data_buffer(), + phase_indirect_parameters_buffers.non_indexed_batch_sets_buffer(), ) else { warn!( "Not rendering mesh because non-indexed indirect parameters buffer \ diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml index 33fc2aa856f4d..4167185632fa3 100644 --- a/crates/bevy_render/Cargo.toml +++ b/crates/bevy_render/Cargo.toml @@ -101,6 +101,7 @@ variadics_please = "1.1" tracing = { version = "0.1", default-features = false, features = ["std"] } indexmap = { version = "2" } fixedbitset = { version = "0.5" } +bitflags = "2" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] # Omit the `glsl` feature in non-WebAssembly by default. diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs index ccfd2729707b5..6637638f389be 100644 --- a/crates/bevy_render/src/batching/gpu_preprocessing.rs +++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs @@ -1,8 +1,9 @@ //! Batching functionality when GPU preprocessing is in use. -use core::any::TypeId; +use core::{any::TypeId, marker::PhantomData, mem}; use bevy_app::{App, Plugin}; +use bevy_derive::{Deref, DerefMut}; use bevy_ecs::{ prelude::Entity, query::{Has, With}, @@ -24,26 +25,22 @@ use crate::{ experimental::occlusion_culling::OcclusionCulling, render_phase::{ BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet, - BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, InputUniformIndex, + BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, InputUniformIndex, PhaseItem, PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases, }, render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec}, renderer::{RenderAdapter, RenderDevice, RenderQueue}, view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity}, - Render, RenderApp, RenderSet, + Render, RenderApp, RenderDebugFlags, RenderSet, }; use super::{BatchMeta, GetBatchData, GetFullBatchData}; #[derive(Default)] pub struct BatchingPlugin { - /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so - /// that they can be read back to CPU. - /// - /// This is a debugging feature that may reduce performance. It primarily - /// exists for the `occlusion_culling` example. - pub allow_copies_from_indirect_parameters: bool, + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, } impl Plugin for BatchingPlugin { @@ -54,7 +51,8 @@ impl Plugin for BatchingPlugin { render_app .insert_resource(IndirectParametersBuffers::new( - self.allow_copies_from_indirect_parameters, + self.debug_flags + .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS), )) .add_systems( Render, @@ -147,18 +145,6 @@ where BD: GpuArrayBufferable + Sync + Send + 'static, BDI: Pod + Default, { - /// A storage area for the buffer data that the GPU compute shader is - /// expected to write to. - /// - /// There will be one entry for each index. - pub data_buffer: UninitBufferVec, - - /// The index of the buffer data in the current input buffer that - /// corresponds to each instance. - /// - /// This is keyed off each view. Each view has a separate buffer. - pub work_item_buffers: HashMap>, - /// The uniform data inputs for the current frame. /// /// These are uploaded during the extraction phase. @@ -173,6 +159,81 @@ where /// corresponding buffer data input uniform in this list. pub previous_input_buffer: InstanceInputUniformBuffer, + /// The data needed to render buffers for each phase. + /// + /// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`, + /// `AlphaMask3d`, etc. + pub phase_instance_buffers: TypeIdMap>, +} + +impl Default for BatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, + BDI: Pod + Sync + Send + Default + 'static, +{ + fn default() -> Self { + BatchedInstanceBuffers { + current_input_buffer: InstanceInputUniformBuffer::new(), + previous_input_buffer: InstanceInputUniformBuffer::new(), + phase_instance_buffers: HashMap::default(), + } + } +} + +/// The GPU buffers holding the data needed to render batches for a single +/// phase. +/// +/// These are split out per phase so that we can run the phases in parallel. +/// This is the version of the structure that has a type parameter, which +/// enables Bevy's scheduler to run the batching operations for the different +/// phases in parallel. +/// +/// See the documentation for [`BatchedInstanceBuffers`] for more information. +#[derive(Resource)] +pub struct PhaseBatchedInstanceBuffers +where + PI: PhaseItem, + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + /// The buffers for this phase. + pub buffers: UntypedPhaseBatchedInstanceBuffers, + phantom: PhantomData, +} + +impl Default for PhaseBatchedInstanceBuffers +where + PI: PhaseItem, + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + fn default() -> Self { + PhaseBatchedInstanceBuffers { + buffers: UntypedPhaseBatchedInstanceBuffers::default(), + phantom: PhantomData, + } + } +} + +/// The GPU buffers holding the data needed to render batches for a single +/// phase, without a type parameter for that phase. +/// +/// Since this structure doesn't have a type parameter, it can be placed in +/// [`BatchedInstanceBuffers::phase_instance_buffers`]. +pub struct UntypedPhaseBatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + /// A storage area for the buffer data that the GPU compute shader is + /// expected to write to. + /// + /// There will be one entry for each index. + pub data_buffer: UninitBufferVec, + + /// The index of the buffer data in the current input buffer that + /// corresponds to each instance. + /// + /// This is keyed off each view. Each view has a separate buffer. + pub work_item_buffers: HashMap, + /// A buffer that holds the number of indexed meshes that weren't visible in /// the previous frame, when GPU occlusion culling is in use. /// @@ -351,11 +412,11 @@ pub struct GpuOcclusionCullingWorkItemBuffers { /// The buffer of work items corresponding to non-indexed meshes. pub late_non_indexed: UninitBufferVec, /// The offset into the - /// [`BatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`] + /// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`] /// where this view's indirect dispatch counts for indexed meshes live. pub late_indirect_parameters_indexed_offset: u32, /// The offset into the - /// [`BatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`] + /// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`] /// where this view's indirect dispatch counts for non-indexed meshes live. pub late_indirect_parameters_non_indexed_offset: u32, } @@ -409,7 +470,7 @@ impl Default for LatePreprocessWorkItemIndirectParameters { /// You may need to call this function if you're implementing your own custom /// render phases. See the `specialized_mesh_pipeline` example. pub fn get_or_create_work_item_buffer<'a, I>( - work_item_buffers: &'a mut HashMap>, + work_item_buffers: &'a mut HashMap, view: RetainedViewEntity, no_indirect_drawing: bool, enable_gpu_occlusion_culling: bool, @@ -417,11 +478,7 @@ pub fn get_or_create_work_item_buffer<'a, I>( where I: 'static, { - let preprocess_work_item_buffers = match work_item_buffers - .entry(view) - .or_default() - .entry(TypeId::of::()) - { + let preprocess_work_item_buffers = match work_item_buffers.entry(view) { Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), Entry::Vacant(vacant_entry) => { if no_indirect_drawing { @@ -700,8 +757,71 @@ pub struct IndirectBatchSet { /// pass can determine how many meshes are actually to be drawn. /// /// These buffers will remain empty if indirect drawing isn't in use. -#[derive(Resource)] +#[derive(Resource, Deref, DerefMut)] pub struct IndirectParametersBuffers { + /// A mapping from a phase type ID to the indirect parameters buffers for + /// that phase. + /// + /// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`. + #[deref] + pub buffers: TypeIdMap, + /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so + /// that they can be read back to CPU. + /// + /// This is a debugging feature that may reduce performance. It primarily + /// exists for the `occlusion_culling` example. + pub allow_copies_from_indirect_parameter_buffers: bool, +} + +impl IndirectParametersBuffers { + /// Initializes a new [`IndirectParametersBuffers`] resource. + pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers { + IndirectParametersBuffers { + buffers: TypeIdMap::default(), + allow_copies_from_indirect_parameter_buffers, + } + } +} + +/// The buffers containing all the information that indirect draw commands use +/// to draw the scene, for a single phase. +/// +/// This is the version of the structure that has a type parameter, so that the +/// batching for different phases can run in parallel. +/// +/// See the [`IndirectParametersBuffers`] documentation for more information. +#[derive(Resource)] +pub struct PhaseIndirectParametersBuffers +where + PI: PhaseItem, +{ + /// The indirect draw buffers for the phase. + pub buffers: UntypedPhaseIndirectParametersBuffers, + phantom: PhantomData, +} + +impl PhaseIndirectParametersBuffers +where + PI: PhaseItem, +{ + pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self { + PhaseIndirectParametersBuffers { + buffers: UntypedPhaseIndirectParametersBuffers::new( + allow_copies_from_indirect_parameter_buffers, + ), + phantom: PhantomData, + } + } +} + +/// The buffers containing all the information that indirect draw commands use +/// to draw the scene, for a single phase. +/// +/// This is the version of the structure that doesn't have a type parameter, so +/// that it can be inserted into [`IndirectParametersBuffers::buffers`] +/// +/// See the [`IndirectParametersBuffers`] documentation for more information. +pub struct UntypedPhaseIndirectParametersBuffers { /// The GPU buffer that stores the indirect draw parameters for non-indexed /// meshes. /// @@ -751,15 +871,17 @@ pub struct IndirectParametersBuffers { indexed_batch_sets: RawBufferVec, } -impl IndirectParametersBuffers { +impl UntypedPhaseIndirectParametersBuffers { /// Creates the indirect parameters buffers. - pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers { + pub fn new( + allow_copies_from_indirect_parameter_buffers: bool, + ) -> UntypedPhaseIndirectParametersBuffers { let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT; if allow_copies_from_indirect_parameter_buffers { indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC; } - IndirectParametersBuffers { + UntypedPhaseIndirectParametersBuffers { non_indexed_data: UninitBufferVec::new(indirect_parameter_buffer_usages), non_indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE), non_indexed_batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages), @@ -952,6 +1074,15 @@ impl IndirectParametersBuffers { pub fn get_next_batch_set_index(&self, indexed: bool) -> Option { NonMaxU32::new(self.batch_set_count(indexed) as u32) } + + pub fn clear(&mut self) { + self.indexed_data.clear(); + self.indexed_metadata.clear(); + self.indexed_batch_sets.clear(); + self.non_indexed_data.clear(); + self.non_indexed_metadata.clear(); + self.non_indexed_batch_sets.clear(); + } } impl Default for IndirectParametersBuffers { @@ -1007,11 +1138,24 @@ where { /// Creates new buffers. pub fn new() -> Self { - BatchedInstanceBuffers { + Self::default() + } + + /// Clears out the buffers in preparation for a new frame. + pub fn clear(&mut self) { + // TODO: Don't do this. + self.phase_instance_buffers.clear(); + } +} + +impl UntypedPhaseBatchedInstanceBuffers +where + BD: GpuArrayBufferable + Sync + Send + 'static, +{ + pub fn new() -> Self { + UntypedPhaseBatchedInstanceBuffers { data_buffer: UninitBufferVec::new(BufferUsages::STORAGE), work_item_buffers: HashMap::default(), - current_input_buffer: InstanceInputUniformBuffer::new(), - previous_input_buffer: InstanceInputUniformBuffer::new(), late_indexed_indirect_parameters_buffer: RawBufferVec::new( BufferUsages::STORAGE | BufferUsages::INDIRECT, ), @@ -1039,17 +1183,14 @@ where // Clear each individual set of buffers, but don't depopulate the hash // table. We want to avoid reallocating these vectors every frame. for view_work_item_buffers in self.work_item_buffers.values_mut() { - for phase_work_item_buffers in view_work_item_buffers.values_mut() { - phase_work_item_buffers.clear(); - } + view_work_item_buffers.clear(); } } } -impl Default for BatchedInstanceBuffers +impl Default for UntypedPhaseBatchedInstanceBuffers where BD: GpuArrayBufferable + Sync + Send + 'static, - BDI: Pod + Default + Sync + Send + 'static, { fn default() -> Self { Self::new() @@ -1098,7 +1239,7 @@ where self, instance_end_index: u32, phase: &mut SortedRenderPhase, - indirect_parameters_buffers: &mut IndirectParametersBuffers, + phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers, ) where I: CachedRenderPipelinePhaseItem + SortedPhaseItem, { @@ -1114,7 +1255,7 @@ where None => PhaseItemExtraIndex::None, }; if let Some(indirect_parameters_index) = self.indirect_parameters_index { - indirect_parameters_buffers + phase_indirect_parameters_buffers .add_batch_set(self.indexed, indirect_parameters_index.into()); } } @@ -1156,17 +1297,23 @@ pub fn delete_old_work_item_buffers( .iter() .map(|extracted_view| extracted_view.retained_view_entity) .collect(); - gpu_batched_instance_buffers - .work_item_buffers - .retain(|retained_view_entity, _| retained_view_entities.contains(retained_view_entity)); + for phase_instance_buffers in gpu_batched_instance_buffers + .phase_instance_buffers + .values_mut() + { + phase_instance_buffers + .work_item_buffers + .retain(|retained_view_entity, _| { + retained_view_entities.contains(retained_view_entity) + }); + } } /// Batch the items in a sorted render phase, when GPU instance buffer building /// is in use. This means comparing metadata needed to draw each phase item and /// trying to combine the draws into a batch. pub fn batch_and_prepare_sorted_render_phase( - gpu_array_buffer: ResMut>, - mut indirect_parameters_buffers: ResMut, + indirect_parameters_buffers: Res, mut sorted_render_phases: ResMut>, mut views: Query<( &ExtractedView, @@ -1178,14 +1325,19 @@ pub fn batch_and_prepare_sorted_render_phase( I: CachedRenderPipelinePhaseItem + SortedPhaseItem, GFBD: GetFullBatchData, { + let mut phase_batched_instance_buffers = + UntypedPhaseBatchedInstanceBuffers::::new(); + let mut phase_indirect_parameters_buffers = UntypedPhaseIndirectParametersBuffers::new( + indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers, + ); + // We only process GPU-built batch data in this function. - let BatchedInstanceBuffers { + let UntypedPhaseBatchedInstanceBuffers { ref mut data_buffer, ref mut work_item_buffers, ref mut late_indexed_indirect_parameters_buffer, ref mut late_non_indexed_indirect_parameters_buffer, - .. - } = gpu_array_buffer.into_inner(); + } = phase_batched_instance_buffers; for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else { @@ -1231,7 +1383,7 @@ pub fn batch_and_prepare_sorted_render_phase( batch.flush( data_buffer.len() as u32, phase, - &mut indirect_parameters_buffers, + &mut phase_indirect_parameters_buffers, ); } @@ -1257,15 +1409,15 @@ pub fn batch_and_prepare_sorted_render_phase( if !can_batch { // Break a batch if we need to. if let Some(batch) = batch.take() { - batch.flush(output_index, phase, &mut indirect_parameters_buffers); + batch.flush(output_index, phase, &mut phase_indirect_parameters_buffers); } let indirect_parameters_index = if no_indirect_drawing { None } else if item_is_indexed { - Some(indirect_parameters_buffers.allocate_indexed(1)) + Some(phase_indirect_parameters_buffers.allocate_indexed(1)) } else { - Some(indirect_parameters_buffers.allocate_non_indexed(1)) + Some(phase_indirect_parameters_buffers.allocate_non_indexed(1)) }; // Start a new batch. @@ -1275,7 +1427,7 @@ pub fn batch_and_prepare_sorted_render_phase( item_is_indexed, output_index, None, - &mut indirect_parameters_buffers, + &mut phase_indirect_parameters_buffers, indirect_parameters_index, ); }; @@ -1317,7 +1469,7 @@ pub fn batch_and_prepare_sorted_render_phase( batch.flush( data_buffer.len() as u32, phase, - &mut indirect_parameters_buffers, + &mut phase_indirect_parameters_buffers, ); } } @@ -1325,8 +1477,8 @@ pub fn batch_and_prepare_sorted_render_phase( /// Creates batches for a render phase that uses bins. pub fn batch_and_prepare_binned_render_phase( - gpu_array_buffer: ResMut>, - mut indirect_parameters_buffers: ResMut, + mut phase_batched_instance_buffers: ResMut>, + mut phase_indirect_parameters_buffers: ResMut>, mut binned_render_phases: ResMut>, mut views: Query< ( @@ -1343,13 +1495,12 @@ pub fn batch_and_prepare_binned_render_phase( { let system_param_item = param.into_inner(); - let BatchedInstanceBuffers { + let UntypedPhaseBatchedInstanceBuffers { ref mut data_buffer, ref mut work_item_buffers, ref mut late_indexed_indirect_parameters_buffer, ref mut late_non_indexed_indirect_parameters_buffer, - .. - } = gpu_array_buffer.into_inner(); + } = phase_batched_instance_buffers.buffers; for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views { let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else { @@ -1376,8 +1527,10 @@ pub fn batch_and_prepare_binned_render_phase( for (batch_set_key, bins) in &phase.multidrawable_meshes { let mut batch_set = None; - let indirect_parameters_base = - indirect_parameters_buffers.batch_count(batch_set_key.indexed()) as u32; + let indirect_parameters_base = phase_indirect_parameters_buffers + .buffers + .batch_count(batch_set_key.indexed()) + as u32; for (bin_key, bin) in bins { let first_output_index = data_buffer.len() as u32; let mut batch: Option = None; @@ -1408,9 +1561,11 @@ pub fn batch_and_prepare_binned_render_phase( None => { // Start a new batch, in indirect mode. - let indirect_parameters_index = - indirect_parameters_buffers.allocate(batch_set_key.indexed(), 1); - let batch_set_index = indirect_parameters_buffers + let indirect_parameters_index = phase_indirect_parameters_buffers + .buffers + .allocate(batch_set_key.indexed(), 1); + let batch_set_index = phase_indirect_parameters_buffers + .buffers .get_next_batch_set_index(batch_set_key.indexed()); GFBD::write_batch_indirect_parameters_metadata( @@ -1418,7 +1573,7 @@ pub fn batch_and_prepare_binned_render_phase( batch_set_key.indexed(), output_index, batch_set_index, - &mut indirect_parameters_buffers, + &mut phase_indirect_parameters_buffers.buffers, indirect_parameters_index, ); work_item_buffer.push( @@ -1447,7 +1602,8 @@ pub fn batch_and_prepare_binned_render_phase( first_batch: batch, batch_count: 1, bin_key: bin_key.clone(), - index: indirect_parameters_buffers + index: phase_indirect_parameters_buffers + .buffers .batch_set_count(batch_set_key.indexed()) as u32, }); @@ -1464,7 +1620,8 @@ pub fn batch_and_prepare_binned_render_phase( { if let Some(batch_set) = batch_set { batch_sets.push(batch_set); - indirect_parameters_buffers + phase_indirect_parameters_buffers + .buffers .add_batch_set(batch_set_key.indexed(), indirect_parameters_base); } } @@ -1513,17 +1670,19 @@ pub fn batch_and_prepare_binned_render_phase( None if !no_indirect_drawing => { // Start a new batch, in indirect mode. - let indirect_parameters_index = - indirect_parameters_buffers.allocate(key.0.indexed(), 1); - let batch_set_index = - indirect_parameters_buffers.get_next_batch_set_index(key.0.indexed()); + let indirect_parameters_index = phase_indirect_parameters_buffers + .buffers + .allocate(key.0.indexed(), 1); + let batch_set_index = phase_indirect_parameters_buffers + .buffers + .get_next_batch_set_index(key.0.indexed()); GFBD::write_batch_indirect_parameters_metadata( input_index, key.0.indexed(), output_index, batch_set_index, - &mut indirect_parameters_buffers, + &mut phase_indirect_parameters_buffers.buffers, indirect_parameters_index, ); work_item_buffer.push( @@ -1580,7 +1739,9 @@ pub fn batch_and_prepare_binned_render_phase( first_batch: batch, batch_count: 1, bin_key: key.1.clone(), - index: indirect_parameters_buffers.batch_set_count(key.0.indexed()) + index: phase_indirect_parameters_buffers + .buffers + .batch_set_count(key.0.indexed()) as u32, }); } @@ -1595,12 +1756,14 @@ pub fn batch_and_prepare_binned_render_phase( None } else if key.0.indexed() { Some( - indirect_parameters_buffers + phase_indirect_parameters_buffers + .buffers .allocate_indexed(unbatchables.entities.len() as u32), ) } else { Some( - indirect_parameters_buffers + phase_indirect_parameters_buffers + .buffers .allocate_non_indexed(unbatchables.entities.len() as u32), ) }; @@ -1620,7 +1783,7 @@ pub fn batch_and_prepare_binned_render_phase( key.0.indexed(), output_index, None, - &mut indirect_parameters_buffers, + &mut phase_indirect_parameters_buffers.buffers, *indirect_parameters_index, ); work_item_buffer.push( @@ -1640,7 +1803,8 @@ pub fn batch_and_prepare_binned_render_phase( batch_set_index: None, }, }); - indirect_parameters_buffers + phase_indirect_parameters_buffers + .buffers .add_batch_set(key.0.indexed(), *indirect_parameters_index); *indirect_parameters_index += 1; } else { @@ -1664,6 +1828,64 @@ pub fn batch_and_prepare_binned_render_phase( } } +/// A system that gathers up the per-phase GPU buffers and inserts them into the +/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables. +/// +/// This runs after the [`batch_and_prepare_binned_render_phase`] or +/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase +/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`] +/// resources and inserts them into the global [`BatchedInstanceBuffers`] and +/// [`IndirectParametersBuffers`] tables. +/// +/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and +/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one +/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and +/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in +/// parallel. +pub fn collect_buffers_for_phase( + mut phase_batched_instance_buffers: ResMut>, + mut phase_indirect_parameters_buffers: ResMut>, + mut batched_instance_buffers: ResMut< + BatchedInstanceBuffers, + >, + mut indirect_parameters_buffers: ResMut, +) where + PI: PhaseItem, + GFBD: GetFullBatchData + Send + Sync + 'static, +{ + // Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace + // the contents of the per-phase resource with the old batched instance + // buffers in order to reuse allocations. + let untyped_phase_batched_instance_buffers = + mem::take(&mut phase_batched_instance_buffers.buffers); + if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers + .phase_instance_buffers + .insert(TypeId::of::(), untyped_phase_batched_instance_buffers) + { + old_untyped_phase_batched_instance_buffers.clear(); + phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers; + } + + // Insert the `PhaseIndirectParametersBuffers` into the global table. + // Replace the contents of the per-phase resource with the old indirect + // parameters buffers in order to reuse allocations. + let untyped_phase_indirect_parameters_buffers = mem::replace( + &mut phase_indirect_parameters_buffers.buffers, + UntypedPhaseIndirectParametersBuffers::new( + indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers, + ), + ); + if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers + .insert( + TypeId::of::(), + untyped_phase_indirect_parameters_buffers, + ) + { + old_untyped_phase_indirect_parameters_buffers.clear(); + phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers; + } +} + /// A system that writes all instance buffers to the GPU. pub fn write_batched_instance_buffers( render_device: Res, @@ -1673,26 +1895,31 @@ pub fn write_batched_instance_buffers( GFBD: GetFullBatchData, { let BatchedInstanceBuffers { - ref mut data_buffer, - ref mut work_item_buffers, ref mut current_input_buffer, ref mut previous_input_buffer, - ref mut late_indexed_indirect_parameters_buffer, - ref mut late_non_indexed_indirect_parameters_buffer, + ref mut phase_instance_buffers, } = gpu_array_buffer.into_inner(); - data_buffer.write_buffer(&render_device); current_input_buffer .buffer .write_buffer(&render_device, &render_queue); previous_input_buffer .buffer .write_buffer(&render_device, &render_queue); - late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); - late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); - for view_work_item_buffers in work_item_buffers.values_mut() { - for phase_work_item_buffers in view_work_item_buffers.values_mut() { + for phase_instance_buffers in phase_instance_buffers.values_mut() { + let UntypedPhaseBatchedInstanceBuffers { + ref mut data_buffer, + ref mut work_item_buffers, + ref mut late_indexed_indirect_parameters_buffer, + ref mut late_non_indexed_indirect_parameters_buffer, + } = *phase_instance_buffers; + + data_buffer.write_buffer(&render_device); + late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); + late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); + + for phase_work_item_buffers in work_item_buffers.values_mut() { match *phase_work_item_buffers { PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => { buffer_vec.write_buffer(&render_device, &render_queue); @@ -1728,12 +1955,9 @@ pub fn write_batched_instance_buffers( pub fn clear_indirect_parameters_buffers( mut indirect_parameters_buffers: ResMut, ) { - indirect_parameters_buffers.indexed_data.clear(); - indirect_parameters_buffers.indexed_metadata.clear(); - indirect_parameters_buffers.indexed_batch_sets.clear(); - indirect_parameters_buffers.non_indexed_data.clear(); - indirect_parameters_buffers.non_indexed_metadata.clear(); - indirect_parameters_buffers.non_indexed_batch_sets.clear(); + for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() { + phase_indirect_parameters_buffers.clear(); + } } pub fn write_indirect_parameters_buffers( @@ -1741,26 +1965,28 @@ pub fn write_indirect_parameters_buffers( render_queue: Res, mut indirect_parameters_buffers: ResMut, ) { - indirect_parameters_buffers - .indexed_data - .write_buffer(&render_device); - indirect_parameters_buffers - .non_indexed_data - .write_buffer(&render_device); - - indirect_parameters_buffers - .indexed_metadata - .write_buffer(&render_device, &render_queue); - indirect_parameters_buffers - .non_indexed_metadata - .write_buffer(&render_device, &render_queue); - - indirect_parameters_buffers - .indexed_batch_sets - .write_buffer(&render_device, &render_queue); - indirect_parameters_buffers - .non_indexed_batch_sets - .write_buffer(&render_device, &render_queue); + for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() { + phase_indirect_parameters_buffers + .indexed_data + .write_buffer(&render_device); + phase_indirect_parameters_buffers + .non_indexed_data + .write_buffer(&render_device); + + phase_indirect_parameters_buffers + .indexed_metadata + .write_buffer(&render_device, &render_queue); + phase_indirect_parameters_buffers + .non_indexed_metadata + .write_buffer(&render_device, &render_queue); + + phase_indirect_parameters_buffers + .indexed_batch_sets + .write_buffer(&render_device, &render_queue); + phase_indirect_parameters_buffers + .non_indexed_batch_sets + .write_buffer(&render_device, &render_queue); + } } #[cfg(test)] diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs index ddafb6f5162f7..ad866c357a698 100644 --- a/crates/bevy_render/src/batching/mod.rs +++ b/crates/bevy_render/src/batching/mod.rs @@ -4,18 +4,15 @@ use bevy_ecs::{ system::{ResMut, SystemParam, SystemParamItem}, }; use bytemuck::Pod; +use gpu_preprocessing::UntypedPhaseIndirectParametersBuffers; use nonmax::NonMaxU32; -use self::gpu_preprocessing::IndirectParametersBuffers; use crate::{ render_phase::{ - BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, SortedPhaseItem, - SortedRenderPhase, ViewBinnedRenderPhases, + BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, InputUniformIndex, + PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, ViewBinnedRenderPhases, }, render_resource::{CachedRenderPipelineId, GpuArrayBufferable}, -}; -use crate::{ - render_phase::{InputUniformIndex, PhaseItemExtraIndex}, sync_world::MainEntity, }; @@ -179,7 +176,7 @@ pub trait GetFullBatchData: GetBatchData { indexed: bool, base_output_index: u32, batch_set_index: Option, - indirect_parameters_buffers: &mut IndirectParametersBuffers, + indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers, indirect_parameters_offset: u32, ); } diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs index 0b3e57fac1b11..76e1f1f0b7619 100644 --- a/crates/bevy_render/src/lib.rs +++ b/crates/bevy_render/src/lib.rs @@ -102,6 +102,7 @@ use alloc::sync::Arc; use bevy_app::{App, AppLabel, Plugin, SubApp}; use bevy_asset::{load_internal_asset, weak_handle, AssetApp, AssetServer, Handle}; use bevy_ecs::{prelude::*, schedule::ScheduleLabel}; +use bitflags::bitflags; use core::ops::{Deref, DerefMut}; use std::sync::Mutex; use tracing::debug; @@ -120,12 +121,21 @@ pub struct RenderPlugin { /// If `true`, disables asynchronous pipeline compilation. /// This has no effect on macOS, Wasm, iOS, or without the `multi_threaded` feature. pub synchronous_pipeline_compilation: bool, - /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so - /// that they can be read back to CPU. - /// - /// This is a debugging feature that may reduce performance. It primarily - /// exists for the `occlusion_culling` example. - pub allow_copies_from_indirect_parameters: bool, + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, +} + +bitflags! { + /// Debugging flags that can optionally be set when constructing the renderer. + #[derive(Clone, Copy, PartialEq, Default, Debug)] + pub struct RenderDebugFlags: u8 { + /// If true, this sets the `COPY_SRC` flag on indirect draw parameters + /// so that they can be read back to CPU. + /// + /// This is a debugging feature that may reduce performance. It + /// primarily exists for the `occlusion_culling` example. + const ALLOW_COPIES_FROM_INDIRECT_PARAMETERS = 1; + } } /// The systems sets of the default [`App`] rendering schedule. @@ -159,6 +169,9 @@ pub enum RenderSet { Prepare, /// A sub-set within [`Prepare`](RenderSet::Prepare) for initializing buffers, textures and uniforms for use in bind groups. PrepareResources, + /// Collect phase buffers after + /// [`PrepareResources`](RenderSet::PrepareResources) has run. + PrepareResourcesCollectPhaseBuffers, /// Flush buffers after [`PrepareResources`](RenderSet::PrepareResources), but before [`PrepareBindGroups`](RenderSet::PrepareBindGroups). PrepareResourcesFlush, /// A sub-set within [`Prepare`](RenderSet::Prepare) for constructing bind groups, or other data that relies on render resources prepared in [`PrepareResources`](RenderSet::PrepareResources). @@ -210,7 +223,12 @@ impl Render { .after(prepare_assets::), ); schedule.configure_sets( - (PrepareResources, PrepareResourcesFlush, PrepareBindGroups) + ( + PrepareResources, + PrepareResourcesCollectPhaseBuffers, + PrepareResourcesFlush, + PrepareBindGroups, + ) .chain() .in_set(Prepare), ); @@ -380,7 +398,7 @@ impl Plugin for RenderPlugin { GlobalsPlugin, MorphPlugin, BatchingPlugin { - allow_copies_from_indirect_parameters: self.allow_copies_from_indirect_parameters, + debug_flags: self.debug_flags, }, SyncWorldPlugin, StoragePlugin, diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs index 4ddd7a86abf65..ede05bb2fc618 100644 --- a/crates/bevy_render/src/render_phase/mod.rs +++ b/crates/bevy_render/src/render_phase/mod.rs @@ -43,10 +43,14 @@ use nonmax::NonMaxU32; pub use rangefinder::*; use wgpu::Features; -use crate::batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport}; +use crate::batching::gpu_preprocessing::{ + GpuPreprocessingMode, GpuPreprocessingSupport, PhaseBatchedInstanceBuffers, + PhaseIndirectParametersBuffers, +}; use crate::renderer::RenderDevice; use crate::sync_world::{MainEntity, MainEntityHashMap}; use crate::view::RetainedViewEntity; +use crate::RenderDebugFlags; use crate::{ batching::{ self, @@ -1011,18 +1015,26 @@ impl UnbatchableBinnedEntityIndexSet { /// /// This is the version used when the pipeline supports GPU preprocessing: e.g. /// 3D PBR meshes. -pub struct BinnedRenderPhasePlugin(PhantomData<(BPI, GFBD)>) +pub struct BinnedRenderPhasePlugin where BPI: BinnedPhaseItem, - GFBD: GetFullBatchData; + GFBD: GetFullBatchData, +{ + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, + phantom: PhantomData<(BPI, GFBD)>, +} -impl Default for BinnedRenderPhasePlugin +impl BinnedRenderPhasePlugin where BPI: BinnedPhaseItem, GFBD: GetFullBatchData, { - fn default() -> Self { - Self(PhantomData) + pub fn new(debug_flags: RenderDebugFlags) -> Self { + Self { + debug_flags, + phantom: PhantomData, + } } } @@ -1038,6 +1050,11 @@ where render_app .init_resource::>() + .init_resource::>() + .insert_resource(PhaseIndirectParametersBuffers::::new( + self.debug_flags + .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS), + )) .add_systems( Render, ( @@ -1054,6 +1071,13 @@ where ) .in_set(RenderSet::PrepareResources), sweep_old_entities::.in_set(RenderSet::QueueSweep), + gpu_preprocessing::collect_buffers_for_phase:: + .run_if( + resource_exists::< + BatchedInstanceBuffers, + >, + ) + .in_set(RenderSet::PrepareResourcesCollectPhaseBuffers), ), ); } @@ -1097,18 +1121,26 @@ where /// /// This is the version used when the pipeline supports GPU preprocessing: e.g. /// 3D PBR meshes. -pub struct SortedRenderPhasePlugin(PhantomData<(SPI, GFBD)>) +pub struct SortedRenderPhasePlugin where SPI: SortedPhaseItem, - GFBD: GetFullBatchData; + GFBD: GetFullBatchData, +{ + /// Debugging flags that can optionally be set when constructing the renderer. + pub debug_flags: RenderDebugFlags, + phantom: PhantomData<(SPI, GFBD)>, +} -impl Default for SortedRenderPhasePlugin +impl SortedRenderPhasePlugin where SPI: SortedPhaseItem, GFBD: GetFullBatchData, { - fn default() -> Self { - Self(PhantomData) + pub fn new(debug_flags: RenderDebugFlags) -> Self { + Self { + debug_flags, + phantom: PhantomData, + } } } @@ -1124,18 +1156,33 @@ where render_app .init_resource::>() + .init_resource::>() + .insert_resource(PhaseIndirectParametersBuffers::::new( + self.debug_flags + .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS), + )) .add_systems( Render, ( - no_gpu_preprocessing::batch_and_prepare_sorted_render_phase:: - .run_if(resource_exists::>), - gpu_preprocessing::batch_and_prepare_sorted_render_phase::.run_if( - resource_exists::< - BatchedInstanceBuffers, - >, - ), - ) - .in_set(RenderSet::PrepareResources), + ( + no_gpu_preprocessing::batch_and_prepare_sorted_render_phase:: + .run_if(resource_exists::>), + gpu_preprocessing::batch_and_prepare_sorted_render_phase:: + .run_if( + resource_exists::< + BatchedInstanceBuffers, + >, + ), + ) + .in_set(RenderSet::PrepareResources), + gpu_preprocessing::collect_buffers_for_phase:: + .run_if( + resource_exists::< + BatchedInstanceBuffers, + >, + ) + .in_set(RenderSet::PrepareResourcesCollectPhaseBuffers), + ), ); } } diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs index 8cb2cbb16db8a..5d61b879076e0 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh.rs +++ b/crates/bevy_sprite/src/mesh2d/mesh.rs @@ -479,7 +479,7 @@ impl GetFullBatchData for Mesh2dPipeline { indexed: bool, base_output_index: u32, batch_set_index: Option, - indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffers, + indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::UntypedPhaseIndirectParametersBuffers, indirect_parameters_offset: u32, ) { // Note that `IndirectParameters` covers both of these structures, even diff --git a/examples/3d/occlusion_culling.rs b/examples/3d/occlusion_culling.rs index 11bdde698a0bd..767875e86108d 100644 --- a/examples/3d/occlusion_culling.rs +++ b/examples/3d/occlusion_culling.rs @@ -6,6 +6,7 @@ //! the effects of occlusion culling can be seen. use std::{ + any::TypeId, f32::consts::PI, fmt::Write as _, result::Result, @@ -15,9 +16,13 @@ use std::{ use bevy::{ color::palettes::css::{SILVER, WHITE}, core_pipeline::{ - core_3d::graph::{Core3d, Node3d}, + core_3d::{ + graph::{Core3d, Node3d}, + Opaque3d, + }, prepass::DepthPrepass, }, + pbr::PbrPlugin, prelude::*, render::{ batching::gpu_preprocessing::{ @@ -29,7 +34,7 @@ use bevy::{ render_resource::{Buffer, BufferDescriptor, BufferUsages, MapMode}, renderer::{RenderAdapter, RenderContext, RenderDevice}, settings::WgpuFeatures, - Render, RenderApp, RenderPlugin, RenderSet, + Render, RenderApp, RenderDebugFlags, RenderPlugin, RenderSet, }, }; use bytemuck::Pod; @@ -172,6 +177,8 @@ impl Default for AppStatus { } fn main() { + let render_debug_flags = RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS; + App::new() .add_plugins( DefaultPlugins @@ -183,7 +190,11 @@ fn main() { ..default() }) .set(RenderPlugin { - allow_copies_from_indirect_parameters: true, + debug_flags: render_debug_flags, + ..default() + }) + .set(PbrPlugin { + debug_flags: render_debug_flags, ..default() }), ) @@ -421,6 +432,14 @@ impl render_graph::Node for ReadbackIndirectParametersNode { return Ok(()); }; + // Get the indirect parameters buffers corresponding to the opaque 3D + // phase, since all our meshes are in that phase. + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffers.get(&TypeId::of::()) + else { + return Ok(()); + }; + // Grab both the buffers we're copying from and the staging buffers // we're copying to. Remember that we can't map the indirect parameters // buffers directly, so we have to copy their contents to a staging @@ -431,8 +450,8 @@ impl render_graph::Node for ReadbackIndirectParametersNode { Some(indirect_parameters_staging_data_buffer), Some(indirect_parameters_staging_batch_sets_buffer), ) = ( - indirect_parameters_buffers.indexed_data_buffer(), - indirect_parameters_buffers.indexed_batch_sets_buffer(), + phase_indirect_parameters_buffers.indexed_data_buffer(), + phase_indirect_parameters_buffers.indexed_batch_sets_buffer(), indirect_parameters_mapping_buffers.data.as_ref(), indirect_parameters_mapping_buffers.batch_sets.as_ref(), ) @@ -474,10 +493,16 @@ fn create_indirect_parameters_staging_buffers( indirect_parameters_buffers: Res, render_device: Res, ) { + let Some(phase_indirect_parameters_buffers) = + indirect_parameters_buffers.get(&TypeId::of::()) + else { + return; + }; + // Fetch the indirect parameters buffers that we're going to copy from. let (Some(indexed_data_buffer), Some(indexed_batch_set_buffer)) = ( - indirect_parameters_buffers.indexed_data_buffer(), - indirect_parameters_buffers.indexed_batch_sets_buffer(), + phase_indirect_parameters_buffers.indexed_data_buffer(), + phase_indirect_parameters_buffers.indexed_batch_sets_buffer(), ) else { return; }; diff --git a/examples/shader/custom_render_phase.rs b/examples/shader/custom_render_phase.rs index 12a9c55f2ff77..b11028abf9a04 100644 --- a/examples/shader/custom_render_phase.rs +++ b/examples/shader/custom_render_phase.rs @@ -29,6 +29,7 @@ use bevy::{ batching::{ gpu_preprocessing::{ batch_and_prepare_sorted_render_phase, IndirectParametersMetadata, + UntypedPhaseIndirectParametersBuffers, }, GetBatchData, GetFullBatchData, }, @@ -435,7 +436,7 @@ impl GetFullBatchData for StencilPipeline { indexed: bool, base_output_index: u32, batch_set_index: Option, - indirect_parameters_buffers: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffers, + indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers, indirect_parameters_offset: u32, ) { // Note that `IndirectParameters` covers both of these structures, even diff --git a/examples/shader/specialized_mesh_pipeline.rs b/examples/shader/specialized_mesh_pipeline.rs index 780ca3c36ee5d..e228d776456f0 100644 --- a/examples/shader/specialized_mesh_pipeline.rs +++ b/examples/shader/specialized_mesh_pipeline.rs @@ -16,12 +16,12 @@ use bevy::{ }, prelude::*, render::{ - batching::GetFullBatchData, batching::{ gpu_preprocessing::{ - self, BatchedInstanceBuffers, IndirectParametersBuffers, PreprocessWorkItem, + self, PhaseBatchedInstanceBuffers, PhaseIndirectParametersBuffers, + PreprocessWorkItem, UntypedPhaseBatchedInstanceBuffers, }, - GetBatchData, + GetBatchData, GetFullBatchData, }, experimental::occlusion_culling::OcclusionCulling, extract_component::{ExtractComponent, ExtractComponentPlugin}, @@ -291,24 +291,21 @@ fn queue_custom_mesh_pipeline( Res, ), param: StaticSystemParam<::Param>, - gpu_array_buffer: ResMut< - BatchedInstanceBuffers< - ::BufferData, - ::BufferInputData, - >, + mut phase_batched_instance_buffers: ResMut< + PhaseBatchedInstanceBuffers::BufferData>, >, - mut indirect_parameters_buffers: ResMut, + mut phase_indirect_parameters_buffers: ResMut>, mut change_tick: Local, ) { let system_param_item = param.into_inner(); - let BatchedInstanceBuffers { + let UntypedPhaseBatchedInstanceBuffers { ref mut data_buffer, ref mut work_item_buffers, ref mut late_indexed_indirect_parameters_buffer, ref mut late_non_indexed_indirect_parameters_buffer, .. - } = gpu_array_buffer.into_inner(); + } = phase_batched_instance_buffers.buffers; // Get the id for our custom draw function let draw_function_id = opaque_draw_functions @@ -378,7 +375,8 @@ fn queue_custom_mesh_pipeline( // batch set. if mesh_batch_set_info.is_none() { mesh_batch_set_info = Some(MeshBatchSetInfo { - indirect_parameters_index: indirect_parameters_buffers + indirect_parameters_index: phase_indirect_parameters_buffers + .buffers .allocate(mesh.indexed(), 1), is_indexed: mesh.indexed(), }); @@ -450,7 +448,8 @@ fn queue_custom_mesh_pipeline( // indirect parameters buffer, so that the renderer will end up // enqueuing a command to draw the mesh. if let Some(mesh_info) = mesh_batch_set_info { - indirect_parameters_buffers + phase_indirect_parameters_buffers + .buffers .add_batch_set(mesh_info.is_indexed, mesh_info.indirect_parameters_index); } }