diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs index 21780b222797d..71ba6a82872d8 100644 --- a/crates/bevy_render/src/batching/gpu_preprocessing.rs +++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs @@ -15,6 +15,7 @@ use bevy_ecs::{ use bevy_encase_derive::ShaderType; use bevy_math::UVec4; use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet}; +use bevy_tasks::ComputeTaskPool; use bevy_utils::{default, TypeIdMap}; use bytemuck::{Pod, Zeroable}; use encase::{internal::WriteInto, ShaderSize}; @@ -2017,56 +2018,74 @@ pub fn write_batched_instance_buffers( phase_instance_buffers, } = gpu_array_buffer.into_inner(); - current_input_buffer - .buffer - .write_buffer(&render_device, &render_queue); - previous_input_buffer - .buffer - .write_buffer(&render_device, &render_queue); - - for phase_instance_buffers in phase_instance_buffers.values_mut() { - let UntypedPhaseBatchedInstanceBuffers { - ref mut data_buffer, - ref mut work_item_buffers, - ref mut late_indexed_indirect_parameters_buffer, - ref mut late_non_indexed_indirect_parameters_buffer, - } = *phase_instance_buffers; - - data_buffer.write_buffer(&render_device); - late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); - late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue); - - for phase_work_item_buffers in work_item_buffers.values_mut() { - match *phase_work_item_buffers { - PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => { - buffer_vec.write_buffer(&render_device, &render_queue); - } - PreprocessWorkItemBuffers::Indirect { - ref mut indexed, - ref mut non_indexed, - ref mut gpu_occlusion_culling, - } => { - indexed.write_buffer(&render_device, &render_queue); - non_indexed.write_buffer(&render_device, &render_queue); - - if let Some(GpuOcclusionCullingWorkItemBuffers { - ref mut late_indexed, - ref mut late_non_indexed, - late_indirect_parameters_indexed_offset: _, - late_indirect_parameters_non_indexed_offset: _, - }) = *gpu_occlusion_culling - { - if !late_indexed.is_empty() { - late_indexed.write_buffer(&render_device); + let render_device = &*render_device; + let render_queue = &*render_queue; + + ComputeTaskPool::get().scope(|scope| { + scope.spawn(async { + let _span = tracing::info_span!("write_current_input_buffers").entered(); + current_input_buffer + .buffer + .write_buffer(render_device, render_queue); + }); + scope.spawn(async { + let _span = tracing::info_span!("write_previous_input_buffers").entered(); + previous_input_buffer + .buffer + .write_buffer(render_device, render_queue); + }); + + for phase_instance_buffers in phase_instance_buffers.values_mut() { + let UntypedPhaseBatchedInstanceBuffers { + ref mut data_buffer, + ref mut work_item_buffers, + ref mut late_indexed_indirect_parameters_buffer, + ref mut late_non_indexed_indirect_parameters_buffer, + } = *phase_instance_buffers; + + scope.spawn(async { + let _span = tracing::info_span!("write_phase_instance_buffers").entered(); + data_buffer.write_buffer(render_device); + late_indexed_indirect_parameters_buffer.write_buffer(render_device, render_queue); + late_non_indexed_indirect_parameters_buffer + .write_buffer(render_device, render_queue); + }); + + for phase_work_item_buffers in work_item_buffers.values_mut() { + scope.spawn(async { + let _span = tracing::info_span!("write_work_item_buffers").entered(); + match *phase_work_item_buffers { + PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => { + buffer_vec.write_buffer(render_device, render_queue); } - if !late_non_indexed.is_empty() { - late_non_indexed.write_buffer(&render_device); + PreprocessWorkItemBuffers::Indirect { + ref mut indexed, + ref mut non_indexed, + ref mut gpu_occlusion_culling, + } => { + indexed.write_buffer(render_device, render_queue); + non_indexed.write_buffer(render_device, render_queue); + + if let Some(GpuOcclusionCullingWorkItemBuffers { + ref mut late_indexed, + ref mut late_non_indexed, + late_indirect_parameters_indexed_offset: _, + late_indirect_parameters_non_indexed_offset: _, + }) = *gpu_occlusion_culling + { + if !late_indexed.is_empty() { + late_indexed.write_buffer(render_device); + } + if !late_non_indexed.is_empty() { + late_non_indexed.write_buffer(render_device); + } + } } } - } + }); } } - } + }); } pub fn clear_indirect_parameters_buffers( @@ -2082,43 +2101,71 @@ pub fn write_indirect_parameters_buffers( render_queue: Res, mut indirect_parameters_buffers: ResMut, ) { - for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() { - phase_indirect_parameters_buffers - .indexed - .data - .write_buffer(&render_device); - phase_indirect_parameters_buffers - .non_indexed - .data - .write_buffer(&render_device); - - phase_indirect_parameters_buffers - .indexed - .cpu_metadata - .write_buffer(&render_device, &render_queue); - phase_indirect_parameters_buffers - .non_indexed - .cpu_metadata - .write_buffer(&render_device, &render_queue); - - phase_indirect_parameters_buffers - .non_indexed - .gpu_metadata - .write_buffer(&render_device); - phase_indirect_parameters_buffers - .indexed - .gpu_metadata - .write_buffer(&render_device); - - phase_indirect_parameters_buffers - .indexed - .batch_sets - .write_buffer(&render_device, &render_queue); - phase_indirect_parameters_buffers - .non_indexed - .batch_sets - .write_buffer(&render_device, &render_queue); - } + let render_device = &*render_device; + let render_queue = &*render_queue; + ComputeTaskPool::get().scope(|scope| { + for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() { + scope.spawn(async { + let _span = tracing::info_span!("indexed_data").entered(); + phase_indirect_parameters_buffers + .indexed + .data + .write_buffer(render_device); + }); + scope.spawn(async { + let _span = tracing::info_span!("non_indexed_data").entered(); + phase_indirect_parameters_buffers + .non_indexed + .data + .write_buffer(render_device); + }); + + scope.spawn(async { + let _span = tracing::info_span!("indexed_cpu_metadata").entered(); + phase_indirect_parameters_buffers + .indexed + .cpu_metadata + .write_buffer(render_device, render_queue); + }); + scope.spawn(async { + let _span = tracing::info_span!("non_indexed_cpu_metadata").entered(); + phase_indirect_parameters_buffers + .non_indexed + .cpu_metadata + .write_buffer(render_device, render_queue); + }); + + scope.spawn(async { + let _span = tracing::info_span!("non_indexed_gpu_metadata").entered(); + phase_indirect_parameters_buffers + .non_indexed + .gpu_metadata + .write_buffer(render_device); + }); + scope.spawn(async { + let _span = tracing::info_span!("indexed_gpu_metadata").entered(); + phase_indirect_parameters_buffers + .indexed + .gpu_metadata + .write_buffer(render_device); + }); + + scope.spawn(async { + let _span = tracing::info_span!("indexed_batch_sets").entered(); + phase_indirect_parameters_buffers + .indexed + .batch_sets + .write_buffer(render_device, render_queue); + }); + scope.spawn(async { + let _span = tracing::info_span!("non_indexed_batch_sets").entered(); + phase_indirect_parameters_buffers + .non_indexed + .batch_sets + .write_buffer(render_device, render_queue); + }); + } + }); } #[cfg(test)]