diff --git a/crates/bevy_pbr/src/decal/forward.rs b/crates/bevy_pbr/src/decal/forward.rs
index 1229a688a9cf7..7732f1d3a4ab3 100644
--- a/crates/bevy_pbr/src/decal/forward.rs
+++ b/crates/bevy_pbr/src/decal/forward.rs
@@ -14,6 +14,7 @@ use bevy_render::{
         AsBindGroup, CompareFunction, RenderPipelineDescriptor, Shader,
         SpecializedMeshPipelineError,
     },
+    RenderDebugFlags,
 };
 
 const FORWARD_DECAL_MESH_HANDLE: Handle<Mesh> =
@@ -48,6 +49,7 @@ impl Plugin for ForwardDecalPlugin {
         app.add_plugins(MaterialPlugin::<ForwardDecalMaterial<StandardMaterial>> {
             prepass_enabled: false,
             shadows_enabled: false,
+            debug_flags: RenderDebugFlags::default(),
             ..Default::default()
         });
     }
diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs
index 88403900aeedb..10997ab43eea2 100644
--- a/crates/bevy_pbr/src/lib.rs
+++ b/crates/bevy_pbr/src/lib.rs
@@ -125,7 +125,7 @@ use bevy_render::{
     sync_component::SyncComponentPlugin,
     texture::GpuImage,
     view::VisibilitySystems,
-    ExtractSchedule, Render, RenderApp, RenderSet,
+    ExtractSchedule, Render, RenderApp, RenderDebugFlags, RenderSet,
 };
 
 use bevy_transform::TransformSystem;
@@ -182,6 +182,8 @@ pub struct PbrPlugin {
     /// This requires compute shader support and so will be forcibly disabled if
     /// the platform doesn't support those.
     pub use_gpu_instance_buffer_builder: bool,
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
 }
 
 impl Default for PbrPlugin {
@@ -190,6 +192,7 @@ impl Default for PbrPlugin {
             prepass_enabled: true,
             add_default_deferred_lighting_plugin: true,
             use_gpu_instance_buffer_builder: true,
+            debug_flags: RenderDebugFlags::default(),
         }
     }
 }
@@ -333,9 +336,11 @@ impl Plugin for PbrPlugin {
             .add_plugins((
                 MeshRenderPlugin {
                     use_gpu_instance_buffer_builder: self.use_gpu_instance_buffer_builder,
+                    debug_flags: self.debug_flags,
                 },
                 MaterialPlugin::<StandardMaterial> {
                     prepass_enabled: self.prepass_enabled,
+                    debug_flags: self.debug_flags,
                     ..Default::default()
                 },
                 ScreenSpaceAmbientOcclusionPlugin,
diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index e4ee53e0b7b73..7c6d93ec32c2e 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -252,6 +252,8 @@ pub struct MaterialPlugin<M: Material> {
     pub prepass_enabled: bool,
     /// Controls if shadows are enabled for the Material.
     pub shadows_enabled: bool,
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
     pub _marker: PhantomData<M>,
 }
 
@@ -260,6 +262,7 @@ impl<M: Material> Default for MaterialPlugin<M> {
         Self {
             prepass_enabled: true,
             shadows_enabled: true,
+            debug_flags: RenderDebugFlags::default(),
             _marker: Default::default(),
         }
     }
@@ -374,7 +377,7 @@ where
         }
 
         if self.prepass_enabled {
-            app.add_plugins(PrepassPlugin::<M>::default());
+            app.add_plugins(PrepassPlugin::<M>::new(self.debug_flags));
         }
     }
 
diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs
index 1da0eb4f1c336..4885238e10d04 100644
--- a/crates/bevy_pbr/src/prepass/mod.rs
+++ b/crates/bevy_pbr/src/prepass/mod.rs
@@ -19,7 +19,7 @@ use bevy_render::{
     renderer::RenderAdapter,
     sync_world::RenderEntity,
     view::{RenderVisibilityRanges, VISIBILITY_RANGES_STORAGE_BUFFER_COUNT},
-    ExtractSchedule, Render, RenderApp, RenderSet,
+    ExtractSchedule, Render, RenderApp, RenderDebugFlags, RenderSet,
 };
 pub use prepass_bindings::*;
 
@@ -146,11 +146,19 @@ where
 /// Sets up the prepasses for a [`Material`].
 ///
 /// This depends on the [`PrepassPipelinePlugin`].
-pub struct PrepassPlugin<M: Material>(PhantomData<M>);
+pub struct PrepassPlugin<M: Material> {
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
+    pub phantom: PhantomData<M>,
+}
 
-impl<M: Material> Default for PrepassPlugin<M> {
-    fn default() -> Self {
-        Self(Default::default())
+impl<M: Material> PrepassPlugin<M> {
+    /// Creates a new [`PrepassPlugin`] with the given debug flags.
+    pub fn new(debug_flags: RenderDebugFlags) -> Self {
+        PrepassPlugin {
+            debug_flags,
+            phantom: PhantomData,
+        }
     }
 }
 
@@ -176,8 +184,10 @@ where
                     ),
                 )
                 .add_plugins((
-                    BinnedRenderPhasePlugin::<Opaque3dPrepass, MeshPipeline>::default(),
-                    BinnedRenderPhasePlugin::<AlphaMask3dPrepass, MeshPipeline>::default(),
+                    BinnedRenderPhasePlugin::<Opaque3dPrepass, MeshPipeline>::new(self.debug_flags),
+                    BinnedRenderPhasePlugin::<AlphaMask3dPrepass, MeshPipeline>::new(
+                        self.debug_flags,
+                    ),
                 ));
         }
 
diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs
index 26559f9223dd5..495b6b4112f00 100644
--- a/crates/bevy_pbr/src/render/gpu_preprocess.rs
+++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs
@@ -29,12 +29,14 @@ use bevy_ecs::{
     system::{lifetimeless::Read, Commands, Query, Res, ResMut},
     world::{FromWorld, World},
 };
+use bevy_render::batching::gpu_preprocessing::UntypedPhaseIndirectParametersBuffers;
 use bevy_render::{
     batching::gpu_preprocessing::{
         BatchedInstanceBuffers, GpuOcclusionCullingWorkItemBuffers, GpuPreprocessingSupport,
         IndirectBatchSet, IndirectParametersBuffers, IndirectParametersIndexed,
         IndirectParametersMetadata, IndirectParametersNonIndexed,
         LatePreprocessWorkItemIndirectParameters, PreprocessWorkItem, PreprocessWorkItemBuffers,
+        UntypedPhaseBatchedInstanceBuffers,
     },
     experimental::occlusion_culling::OcclusionCulling,
     render_graph::{Node, NodeRunError, RenderGraphApp, RenderGraphContext},
@@ -393,8 +395,22 @@ pub enum PhasePreprocessBindGroups {
 
 /// The bind groups for the compute shaders that reset indirect draw counts and
 /// build indirect parameters.
-#[derive(Resource)]
-pub struct BuildIndirectParametersBindGroups {
+///
+/// There's one set of bind group for each phase. Phases are keyed off their
+/// [`core::any::TypeId`].
+#[derive(Resource, Default, Deref, DerefMut)]
+pub struct BuildIndirectParametersBindGroups(pub TypeIdMap<PhaseBuildIndirectParametersBindGroups>);
+
+impl BuildIndirectParametersBindGroups {
+    /// Creates a new, empty [`BuildIndirectParametersBindGroups`] table.
+    pub fn new() -> BuildIndirectParametersBindGroups {
+        Self::default()
+    }
+}
+
+/// The per-phase set of bind groups for the compute shaders that reset indirect
+/// draw counts and build indirect parameters.
+pub struct PhaseBuildIndirectParametersBindGroups {
     /// The bind group for the `reset_indirect_batch_sets.wgsl` shader, for
     /// indexed meshes.
     reset_indexed_indirect_batch_sets: Option<BindGroup>,
@@ -470,9 +486,10 @@ impl Plugin for GpuMeshPreprocessPlugin {
                 (
                     prepare_preprocess_pipelines.in_set(RenderSet::Prepare),
                     prepare_preprocess_bind_groups
-                        .run_if(
-                            resource_exists::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>,
-                        )
+                        .run_if(resource_exists::<BatchedInstanceBuffers<
+                            MeshUniform,
+                            MeshInputUniform
+                        >>)
                         .in_set(RenderSet::PrepareBindGroups),
                     write_mesh_culling_data_buffer.in_set(RenderSet::PrepareResourcesFlush),
                 ),
@@ -511,7 +528,7 @@ impl Plugin for GpuMeshPreprocessPlugin {
             .add_render_graph_edge(
                 Core3d,
                 NodePbr::MainBuildIndirectParameters,
-                Node3d::DeferredPrepass
+                Node3d::DeferredPrepass,
             );
     }
 }
@@ -538,10 +555,8 @@ impl Node for EarlyGpuPreprocessNode {
         world: &'w World,
     ) -> Result<(), NodeRunError> {
         // Grab the [`BatchedInstanceBuffers`].
-        let BatchedInstanceBuffers {
-            work_item_buffers: ref index_buffers,
-            ..
-        } = world.resource::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>();
+        let batched_instance_buffers =
+            world.resource::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>();
 
         let pipeline_cache = world.resource::<PipelineCache>();
         let preprocess_pipelines = world.resource::<PreprocessPipelines>();
@@ -583,13 +598,6 @@ impl Node for EarlyGpuPreprocessNode {
                 continue;
             };
 
-            // Grab the work item buffers for this view.
-            let Some(phase_work_item_buffers) = index_buffers.get(&view.retained_view_entity)
-            else {
-                warn!("The preprocessing index buffer wasn't present");
-                continue;
-            };
-
             // Select the right pipeline, depending on whether GPU culling is in
             // use.
             let maybe_pipeline_id = if no_indirect_drawing {
@@ -620,7 +628,17 @@ impl Node for EarlyGpuPreprocessNode {
             compute_pass.set_pipeline(preprocess_pipeline);
 
             // Loop over each render phase.
-            for (phase_type_id, work_item_buffers) in phase_work_item_buffers {
+            for (phase_type_id, batched_phase_instance_buffers) in
+                &batched_instance_buffers.phase_instance_buffers
+            {
+                // Grab the work item buffers for this view.
+                let Some(work_item_buffers) = batched_phase_instance_buffers
+                    .work_item_buffers
+                    .get(&view.retained_view_entity)
+                else {
+                    continue;
+                };
+
                 // Fetch the bind group for the render phase.
                 let Some(phase_bind_groups) = bind_groups.get(phase_type_id) else {
                     continue;
@@ -775,12 +793,8 @@ impl Node for LateGpuPreprocessNode {
         world: &'w World,
     ) -> Result<(), NodeRunError> {
         // Grab the [`BatchedInstanceBuffers`].
-        let BatchedInstanceBuffers {
-            ref work_item_buffers,
-            ref late_indexed_indirect_parameters_buffer,
-            ref late_non_indexed_indirect_parameters_buffer,
-            ..
-        } = world.resource::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>();
+        let batched_instance_buffers =
+            world.resource::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>();
 
         let pipeline_cache = world.resource::<PipelineCache>();
         let preprocess_pipelines = world.resource::<PreprocessPipelines>();
@@ -795,13 +809,6 @@ impl Node for LateGpuPreprocessNode {
 
         // Run the compute passes.
         for (view, bind_groups, view_uniform_offset) in self.view_query.iter_manual(world) {
-            // Grab the work item buffers for this view.
-            let Some(phase_work_item_buffers) = work_item_buffers.get(&view.retained_view_entity)
-            else {
-                warn!("The preprocessing index buffer wasn't present");
-                continue;
-            };
-
             let maybe_pipeline_id = preprocess_pipelines
                 .late_gpu_occlusion_culling_preprocess
                 .pipeline_id;
@@ -821,7 +828,25 @@ impl Node for LateGpuPreprocessNode {
 
             compute_pass.set_pipeline(preprocess_pipeline);
 
-            for (phase_type_id, work_item_buffers) in phase_work_item_buffers {
+            // Loop over each phase. Because we built the phases in parallel,
+            // each phase has a separate set of instance buffers.
+            for (phase_type_id, batched_phase_instance_buffers) in
+                &batched_instance_buffers.phase_instance_buffers
+            {
+                let UntypedPhaseBatchedInstanceBuffers {
+                    ref work_item_buffers,
+                    ref late_indexed_indirect_parameters_buffer,
+                    ref late_non_indexed_indirect_parameters_buffer,
+                    ..
+                } = *batched_phase_instance_buffers;
+
+                // Grab the work item buffers for this view.
+                let Some(phase_work_item_buffers) =
+                    work_item_buffers.get(&view.retained_view_entity)
+                else {
+                    continue;
+                };
+
                 let (
                     PreprocessWorkItemBuffers::Indirect {
                         gpu_occlusion_culling:
@@ -840,7 +865,7 @@ impl Node for LateGpuPreprocessNode {
                     Some(late_indexed_indirect_parameters_buffer),
                     Some(late_non_indexed_indirect_parameters_buffer),
                 ) = (
-                    work_item_buffers,
+                    phase_work_item_buffers,
                     bind_groups.get(phase_type_id),
                     late_indexed_indirect_parameters_buffer.buffer(),
                     late_non_indexed_indirect_parameters_buffer.buffer(),
@@ -1029,57 +1054,69 @@ fn run_build_indirect_parameters_node(
         return Ok(());
     };
 
-    // Build indexed indirect parameters.
-    if let (
-        Some(reset_indexed_indirect_batch_sets_bind_group),
-        Some(build_indirect_indexed_params_bind_group),
-    ) = (
-        &build_indirect_params_bind_groups.reset_indexed_indirect_batch_sets,
-        &build_indirect_params_bind_groups.build_indexed_indirect,
-    ) {
-        compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline);
-        compute_pass.set_bind_group(0, reset_indexed_indirect_batch_sets_bind_group, &[]);
-        let workgroup_count = indirect_parameters_buffers
-            .batch_set_count(true)
-            .div_ceil(WORKGROUP_SIZE);
-        if workgroup_count > 0 {
-            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
-        }
+    // Loop over each phase. As each has as separate set of buffers, we need to
+    // build indirect parameters individually for each phase.
+    for (phase_type_id, phase_build_indirect_params_bind_groups) in
+        build_indirect_params_bind_groups.iter()
+    {
+        let Some(phase_indirect_parameters_buffers) =
+            indirect_parameters_buffers.get(phase_type_id)
+        else {
+            continue;
+        };
 
-        compute_pass.set_pipeline(build_indexed_indirect_params_pipeline);
-        compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]);
-        let workgroup_count = indirect_parameters_buffers
-            .indexed_batch_count()
-            .div_ceil(WORKGROUP_SIZE);
-        if workgroup_count > 0 {
-            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
-        }
-    }
+        // Build indexed indirect parameters.
+        if let (
+            Some(reset_indexed_indirect_batch_sets_bind_group),
+            Some(build_indirect_indexed_params_bind_group),
+        ) = (
+            &phase_build_indirect_params_bind_groups.reset_indexed_indirect_batch_sets,
+            &phase_build_indirect_params_bind_groups.build_indexed_indirect,
+        ) {
+            compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline);
+            compute_pass.set_bind_group(0, reset_indexed_indirect_batch_sets_bind_group, &[]);
+            let workgroup_count = phase_indirect_parameters_buffers
+                .batch_set_count(true)
+                .div_ceil(WORKGROUP_SIZE);
+            if workgroup_count > 0 {
+                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            }
 
-    // Build non-indexed indirect parameters.
-    if let (
-        Some(reset_non_indexed_indirect_batch_sets_bind_group),
-        Some(build_indirect_non_indexed_params_bind_group),
-    ) = (
-        &build_indirect_params_bind_groups.reset_non_indexed_indirect_batch_sets,
-        &build_indirect_params_bind_groups.build_non_indexed_indirect,
-    ) {
-        compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline);
-        compute_pass.set_bind_group(0, reset_non_indexed_indirect_batch_sets_bind_group, &[]);
-        let workgroup_count = indirect_parameters_buffers
-            .batch_set_count(false)
-            .div_ceil(WORKGROUP_SIZE);
-        if workgroup_count > 0 {
-            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            compute_pass.set_pipeline(build_indexed_indirect_params_pipeline);
+            compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]);
+            let workgroup_count = phase_indirect_parameters_buffers
+                .indexed_batch_count()
+                .div_ceil(WORKGROUP_SIZE);
+            if workgroup_count > 0 {
+                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            }
         }
 
-        compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline);
-        compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]);
-        let workgroup_count = indirect_parameters_buffers
-            .non_indexed_batch_count()
-            .div_ceil(WORKGROUP_SIZE);
-        if workgroup_count > 0 {
-            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+        // Build non-indexed indirect parameters.
+        if let (
+            Some(reset_non_indexed_indirect_batch_sets_bind_group),
+            Some(build_indirect_non_indexed_params_bind_group),
+        ) = (
+            &phase_build_indirect_params_bind_groups.reset_non_indexed_indirect_batch_sets,
+            &phase_build_indirect_params_bind_groups.build_non_indexed_indirect,
+        ) {
+            compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline);
+            compute_pass.set_bind_group(0, reset_non_indexed_indirect_batch_sets_bind_group, &[]);
+            let workgroup_count = phase_indirect_parameters_buffers
+                .batch_set_count(false)
+                .div_ceil(WORKGROUP_SIZE);
+            if workgroup_count > 0 {
+                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            }
+
+            compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline);
+            compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]);
+            let workgroup_count = phase_indirect_parameters_buffers
+                .non_indexed_batch_count()
+                .div_ceil(WORKGROUP_SIZE);
+            if workgroup_count > 0 {
+                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            }
         }
     }
 
@@ -1637,18 +1674,14 @@ pub fn prepare_preprocess_bind_groups(
 ) {
     // Grab the `BatchedInstanceBuffers`.
     let BatchedInstanceBuffers {
-        data_buffer: ref data_buffer_vec,
-        ref work_item_buffers,
         current_input_buffer: ref current_input_buffer_vec,
         previous_input_buffer: ref previous_input_buffer_vec,
-        ref late_indexed_indirect_parameters_buffer,
-        ref late_non_indexed_indirect_parameters_buffer,
+        ref phase_instance_buffers,
     } = batched_instance_buffers.into_inner();
 
-    let (Some(current_input_buffer), Some(previous_input_buffer), Some(data_buffer)) = (
+    let (Some(current_input_buffer), Some(previous_input_buffer)) = (
         current_input_buffer_vec.buffer().buffer(),
         previous_input_buffer_vec.buffer().buffer(),
-        data_buffer_vec.buffer(),
     ) else {
         return;
     };
@@ -1659,22 +1692,39 @@ pub fn prepare_preprocess_bind_groups(
 
     // Loop over each view.
     for (view_entity, view) in &views {
-        let Some(phase_work_item_buffers) = work_item_buffers.get(&view.retained_view_entity)
-        else {
-            continue;
-        };
-
         let mut bind_groups = TypeIdMap::default();
 
         // Loop over each phase.
-        for (&phase_id, work_item_buffers) in phase_work_item_buffers {
+        for (phase_type_id, phase_instance_buffers) in phase_instance_buffers {
+            let UntypedPhaseBatchedInstanceBuffers {
+                data_buffer: ref data_buffer_vec,
+                ref work_item_buffers,
+                ref late_indexed_indirect_parameters_buffer,
+                ref late_non_indexed_indirect_parameters_buffer,
+            } = *phase_instance_buffers;
+
+            let Some(data_buffer) = data_buffer_vec.buffer() else {
+                continue;
+            };
+
+            // Grab the indirect parameters buffers for this phase.
+            let Some(phase_indirect_parameters_buffers) =
+                indirect_parameters_buffers.get(phase_type_id)
+            else {
+                continue;
+            };
+
+            let Some(work_item_buffers) = work_item_buffers.get(&view.retained_view_entity) else {
+                continue;
+            };
+
             // Create the `PreprocessBindGroupBuilder`.
             let preprocess_bind_group_builder = PreprocessBindGroupBuilder {
                 view: view_entity,
                 late_indexed_indirect_parameters_buffer,
                 late_non_indexed_indirect_parameters_buffer,
                 render_device: &render_device,
-                indirect_parameters_buffers: &indirect_parameters_buffers,
+                phase_indirect_parameters_buffers,
                 mesh_culling_data_buffer: &mesh_culling_data_buffer,
                 view_uniforms: &view_uniforms,
                 previous_view_uniforms: &previous_view_uniforms,
@@ -1725,7 +1775,7 @@ pub fn prepare_preprocess_bind_groups(
             // Write that bind group in.
             if let Some(bind_group) = bind_group {
                 any_indirect = any_indirect || was_indirect;
-                bind_groups.insert(phase_id, bind_group);
+                bind_groups.insert(*phase_type_id, bind_group);
             }
         }
 
@@ -1764,7 +1814,7 @@ struct PreprocessBindGroupBuilder<'a> {
     /// The device.
     render_device: &'a RenderDevice,
     /// The buffers that store indirect draw parameters.
-    indirect_parameters_buffers: &'a IndirectParametersBuffers,
+    phase_indirect_parameters_buffers: &'a UntypedPhaseIndirectParametersBuffers,
     /// The GPU buffer that stores the information needed to cull each mesh.
     mesh_culling_data_buffer: &'a MeshCullingDataBuffer,
     /// The GPU buffer that stores information about the view.
@@ -1884,7 +1934,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
         let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
 
         match (
-            self.indirect_parameters_buffers.indexed_metadata_buffer(),
+            self.phase_indirect_parameters_buffers
+                .indexed_metadata_buffer(),
             indexed_work_item_buffer.buffer(),
             late_indexed_work_item_buffer.buffer(),
             self.late_indexed_indirect_parameters_buffer.buffer(),
@@ -1975,7 +2026,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
         let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
 
         match (
-            self.indirect_parameters_buffers
+            self.phase_indirect_parameters_buffers
                 .non_indexed_metadata_buffer(),
             non_indexed_work_item_buffer.buffer(),
             late_non_indexed_work_item_buffer.buffer(),
@@ -2066,7 +2117,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
         let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
 
         match (
-            self.indirect_parameters_buffers.indexed_metadata_buffer(),
+            self.phase_indirect_parameters_buffers
+                .indexed_metadata_buffer(),
             late_indexed_work_item_buffer.buffer(),
             self.late_indexed_indirect_parameters_buffer.buffer(),
         ) {
@@ -2146,7 +2198,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
         let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
 
         match (
-            self.indirect_parameters_buffers
+            self.phase_indirect_parameters_buffers
                 .non_indexed_metadata_buffer(),
             late_non_indexed_work_item_buffer.buffer(),
             self.late_non_indexed_indirect_parameters_buffer.buffer(),
@@ -2240,7 +2292,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
         let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
 
         match (
-            self.indirect_parameters_buffers.indexed_metadata_buffer(),
+            self.phase_indirect_parameters_buffers
+                .indexed_metadata_buffer(),
             indexed_work_item_buffer.buffer(),
         ) {
             (Some(indexed_metadata_buffer), Some(indexed_work_item_gpu_buffer)) => {
@@ -2293,7 +2346,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
         let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
 
         match (
-            self.indirect_parameters_buffers
+            self.phase_indirect_parameters_buffers
                 .non_indexed_metadata_buffer(),
             non_indexed_work_item_buffer.buffer(),
         ) {
@@ -2346,121 +2399,134 @@ fn create_build_indirect_parameters_bind_groups(
     render_device: &RenderDevice,
     pipelines: &PreprocessPipelines,
     current_input_buffer: &Buffer,
-    indirect_parameters_buffer: &IndirectParametersBuffers,
+    indirect_parameters_buffers: &IndirectParametersBuffers,
 ) {
-    commands.insert_resource(BuildIndirectParametersBindGroups {
-        reset_indexed_indirect_batch_sets: match (
-            indirect_parameters_buffer.indexed_batch_sets_buffer(),
-        ) {
-            (Some(indexed_batch_sets_buffer),) => Some(
-                render_device.create_bind_group(
-                    "reset_indexed_indirect_batch_sets_bind_group",
-                    // The early bind group is good for the main phase and late
-                    // phase too. They bind the same buffers.
-                    &pipelines
-                        .early_phase
-                        .reset_indirect_batch_sets
-                        .bind_group_layout,
-                    &BindGroupEntries::sequential((indexed_batch_sets_buffer.as_entire_binding(),)),
-                ),
-            ),
-            _ => None,
-        },
+    let mut build_indirect_parameters_bind_groups = BuildIndirectParametersBindGroups::new();
+
+    for (phase_type_id, phase_indirect_parameters_buffer) in indirect_parameters_buffers.iter() {
+        build_indirect_parameters_bind_groups.insert(
+            *phase_type_id,
+            PhaseBuildIndirectParametersBindGroups {
+                reset_indexed_indirect_batch_sets: match (
+                    phase_indirect_parameters_buffer.indexed_batch_sets_buffer(),
+                ) {
+                    (Some(indexed_batch_sets_buffer),) => Some(
+                        render_device.create_bind_group(
+                            "reset_indexed_indirect_batch_sets_bind_group",
+                            // The early bind group is good for the main phase and late
+                            // phase too. They bind the same buffers.
+                            &pipelines
+                                .early_phase
+                                .reset_indirect_batch_sets
+                                .bind_group_layout,
+                            &BindGroupEntries::sequential((
+                                indexed_batch_sets_buffer.as_entire_binding(),
+                            )),
+                        ),
+                    ),
+                    _ => None,
+                },
 
-        reset_non_indexed_indirect_batch_sets: match (
-            indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
-        ) {
-            (Some(non_indexed_batch_sets_buffer),) => Some(
-                render_device.create_bind_group(
-                    "reset_non_indexed_indirect_batch_sets_bind_group",
-                    // The early bind group is good for the main phase and late
-                    // phase too. They bind the same buffers.
-                    &pipelines
-                        .early_phase
-                        .reset_indirect_batch_sets
-                        .bind_group_layout,
-                    &BindGroupEntries::sequential((
-                        non_indexed_batch_sets_buffer.as_entire_binding(),
-                    )),
-                ),
-            ),
-            _ => None,
-        },
+                reset_non_indexed_indirect_batch_sets: match (
+                    phase_indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
+                ) {
+                    (Some(non_indexed_batch_sets_buffer),) => Some(
+                        render_device.create_bind_group(
+                            "reset_non_indexed_indirect_batch_sets_bind_group",
+                            // The early bind group is good for the main phase and late
+                            // phase too. They bind the same buffers.
+                            &pipelines
+                                .early_phase
+                                .reset_indirect_batch_sets
+                                .bind_group_layout,
+                            &BindGroupEntries::sequential((
+                                non_indexed_batch_sets_buffer.as_entire_binding(),
+                            )),
+                        ),
+                    ),
+                    _ => None,
+                },
 
-        build_indexed_indirect: match (
-            indirect_parameters_buffer.indexed_metadata_buffer(),
-            indirect_parameters_buffer.indexed_data_buffer(),
-            indirect_parameters_buffer.indexed_batch_sets_buffer(),
-        ) {
-            (
-                Some(indexed_indirect_parameters_metadata_buffer),
-                Some(indexed_indirect_parameters_data_buffer),
-                Some(indexed_batch_sets_buffer),
-            ) => Some(
-                render_device.create_bind_group(
-                    "build_indexed_indirect_parameters_bind_group",
-                    // The frustum culling bind group is good for occlusion culling
-                    // too. They bind the same buffers.
-                    &pipelines
-                        .gpu_frustum_culling_build_indexed_indirect_params
-                        .bind_group_layout,
-                    &BindGroupEntries::sequential((
-                        current_input_buffer.as_entire_binding(),
-                        // Don't use `as_entire_binding` here; the shader reads
-                        // the length and `RawBufferVec` overallocates.
-                        BufferBinding {
-                            buffer: indexed_indirect_parameters_metadata_buffer,
-                            offset: 0,
-                            size: NonZeroU64::new(
-                                indirect_parameters_buffer.indexed_batch_count() as u64
-                                    * size_of::<IndirectParametersMetadata>() as u64,
-                            ),
-                        },
-                        indexed_batch_sets_buffer.as_entire_binding(),
-                        indexed_indirect_parameters_data_buffer.as_entire_binding(),
-                    )),
-                ),
-            ),
-            _ => None,
-        },
+                build_indexed_indirect: match (
+                    phase_indirect_parameters_buffer.indexed_metadata_buffer(),
+                    phase_indirect_parameters_buffer.indexed_data_buffer(),
+                    phase_indirect_parameters_buffer.indexed_batch_sets_buffer(),
+                ) {
+                    (
+                        Some(indexed_indirect_parameters_metadata_buffer),
+                        Some(indexed_indirect_parameters_data_buffer),
+                        Some(indexed_batch_sets_buffer),
+                    ) => Some(
+                        render_device.create_bind_group(
+                            "build_indexed_indirect_parameters_bind_group",
+                            // The frustum culling bind group is good for occlusion culling
+                            // too. They bind the same buffers.
+                            &pipelines
+                                .gpu_frustum_culling_build_indexed_indirect_params
+                                .bind_group_layout,
+                            &BindGroupEntries::sequential((
+                                current_input_buffer.as_entire_binding(),
+                                // Don't use `as_entire_binding` here; the shader reads
+                                // the length and `RawBufferVec` overallocates.
+                                BufferBinding {
+                                    buffer: indexed_indirect_parameters_metadata_buffer,
+                                    offset: 0,
+                                    size: NonZeroU64::new(
+                                        phase_indirect_parameters_buffer.indexed_batch_count()
+                                            as u64
+                                            * size_of::<IndirectParametersMetadata>() as u64,
+                                    ),
+                                },
+                                indexed_batch_sets_buffer.as_entire_binding(),
+                                indexed_indirect_parameters_data_buffer.as_entire_binding(),
+                            )),
+                        ),
+                    ),
+                    _ => None,
+                },
 
-        build_non_indexed_indirect: match (
-            indirect_parameters_buffer.non_indexed_metadata_buffer(),
-            indirect_parameters_buffer.non_indexed_data_buffer(),
-            indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
-        ) {
-            (
-                Some(non_indexed_indirect_parameters_metadata_buffer),
-                Some(non_indexed_indirect_parameters_data_buffer),
-                Some(non_indexed_batch_sets_buffer),
-            ) => Some(
-                render_device.create_bind_group(
-                    "build_non_indexed_indirect_parameters_bind_group",
-                    // The frustum culling bind group is good for occlusion culling
-                    // too. They bind the same buffers.
-                    &pipelines
-                        .gpu_frustum_culling_build_non_indexed_indirect_params
-                        .bind_group_layout,
-                    &BindGroupEntries::sequential((
-                        current_input_buffer.as_entire_binding(),
-                        // Don't use `as_entire_binding` here; the shader reads
-                        // the length and `RawBufferVec` overallocates.
-                        BufferBinding {
-                            buffer: non_indexed_indirect_parameters_metadata_buffer,
-                            offset: 0,
-                            size: NonZeroU64::new(
-                                indirect_parameters_buffer.non_indexed_batch_count() as u64
-                                    * size_of::<IndirectParametersMetadata>() as u64,
-                            ),
-                        },
-                        non_indexed_batch_sets_buffer.as_entire_binding(),
-                        non_indexed_indirect_parameters_data_buffer.as_entire_binding(),
-                    )),
-                ),
-            ),
-            _ => None,
-        },
-    });
+                build_non_indexed_indirect: match (
+                    phase_indirect_parameters_buffer.non_indexed_metadata_buffer(),
+                    phase_indirect_parameters_buffer.non_indexed_data_buffer(),
+                    phase_indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
+                ) {
+                    (
+                        Some(non_indexed_indirect_parameters_metadata_buffer),
+                        Some(non_indexed_indirect_parameters_data_buffer),
+                        Some(non_indexed_batch_sets_buffer),
+                    ) => Some(
+                        render_device.create_bind_group(
+                            "build_non_indexed_indirect_parameters_bind_group",
+                            // The frustum culling bind group is good for occlusion culling
+                            // too. They bind the same buffers.
+                            &pipelines
+                                .gpu_frustum_culling_build_non_indexed_indirect_params
+                                .bind_group_layout,
+                            &BindGroupEntries::sequential((
+                                current_input_buffer.as_entire_binding(),
+                                // Don't use `as_entire_binding` here; the shader reads
+                                // the length and `RawBufferVec` overallocates.
+                                BufferBinding {
+                                    buffer: non_indexed_indirect_parameters_metadata_buffer,
+                                    offset: 0,
+                                    size: NonZeroU64::new(
+                                        phase_indirect_parameters_buffer.non_indexed_batch_count()
+                                            as u64
+                                            * size_of::<IndirectParametersMetadata>() as u64,
+                                    ),
+                                },
+                                non_indexed_batch_sets_buffer.as_entire_binding(),
+                                non_indexed_indirect_parameters_data_buffer.as_entire_binding(),
+                            )),
+                        ),
+                    ),
+                    _ => None,
+                },
+            },
+        );
+    }
+
+    commands.insert_resource(build_indirect_parameters_bind_groups);
 }
 
 /// Writes the information needed to do GPU mesh culling to the GPU.
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index d89ee2a785a9a..6690724fb0e6d 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -21,7 +21,7 @@ use bevy_render::{
         gpu_preprocessing::{
             self, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers,
             IndirectParametersIndexed, IndirectParametersMetadata, IndirectParametersNonIndexed,
-            InstanceInputUniformBuffer,
+            InstanceInputUniformBuffer, UntypedPhaseIndirectParametersBuffers,
         },
         no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching,
     },
@@ -43,7 +43,8 @@ use bevy_render::{
     Extract,
 };
 use bevy_transform::components::GlobalTransform;
-use bevy_utils::{default, Parallel};
+use bevy_utils::{default, Parallel, TypeIdMap};
+use core::any::TypeId;
 use core::mem::size_of;
 use material_bind_groups::MaterialBindingId;
 use render::skin::{self, SkinIndex};
@@ -79,13 +80,24 @@ use smallvec::{smallvec, SmallVec};
 use static_assertions::const_assert_eq;
 
 /// Provides support for rendering 3D meshes.
-#[derive(Default)]
 pub struct MeshRenderPlugin {
     /// Whether we're building [`MeshUniform`]s on GPU.
     ///
     /// This requires compute shader support and so will be forcibly disabled if
     /// the platform doesn't support those.
     pub use_gpu_instance_buffer_builder: bool,
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
+}
+
+impl MeshRenderPlugin {
+    /// Creates a new [`MeshRenderPlugin`] with the given debug flags.
+    pub fn new(debug_flags: RenderDebugFlags) -> MeshRenderPlugin {
+        MeshRenderPlugin {
+            use_gpu_instance_buffer_builder: false,
+            debug_flags,
+        }
+    }
 }
 
 pub const FORWARD_IO_HANDLE: Handle<Shader> = weak_handle!("38111de1-6e35-4dbb-877b-7b6f9334baf6");
@@ -166,18 +178,17 @@ impl Plugin for MeshRenderPlugin {
             (no_automatic_skin_batching, no_automatic_morph_batching),
         )
         .add_plugins((
-            BinnedRenderPhasePlugin::<Opaque3d, MeshPipeline>::default(),
-            BinnedRenderPhasePlugin::<AlphaMask3d, MeshPipeline>::default(),
-            BinnedRenderPhasePlugin::<Shadow, MeshPipeline>::default(),
-            BinnedRenderPhasePlugin::<Opaque3dDeferred, MeshPipeline>::default(),
-            BinnedRenderPhasePlugin::<AlphaMask3dDeferred, MeshPipeline>::default(),
-            SortedRenderPhasePlugin::<Transmissive3d, MeshPipeline>::default(),
-            SortedRenderPhasePlugin::<Transparent3d, MeshPipeline>::default(),
+            BinnedRenderPhasePlugin::<Opaque3d, MeshPipeline>::new(self.debug_flags),
+            BinnedRenderPhasePlugin::<AlphaMask3d, MeshPipeline>::new(self.debug_flags),
+            BinnedRenderPhasePlugin::<Shadow, MeshPipeline>::new(self.debug_flags),
+            BinnedRenderPhasePlugin::<Opaque3dDeferred, MeshPipeline>::new(self.debug_flags),
+            BinnedRenderPhasePlugin::<AlphaMask3dDeferred, MeshPipeline>::new(self.debug_flags),
+            SortedRenderPhasePlugin::<Transmissive3d, MeshPipeline>::new(self.debug_flags),
+            SortedRenderPhasePlugin::<Transparent3d, MeshPipeline>::new(self.debug_flags),
         ));
 
         if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
             render_app
-                .init_resource::<MeshBindGroups>()
                 .init_resource::<SkinIndices>()
                 .init_resource::<MorphUniforms>()
                 .init_resource::<MorphIndices>()
@@ -202,7 +213,7 @@ impl Plugin for MeshRenderPlugin {
                         set_mesh_motion_vector_flags.in_set(RenderSet::PrepareMeshes),
                         prepare_skins.in_set(RenderSet::PrepareResources),
                         prepare_morphs.in_set(RenderSet::PrepareResources),
-                        prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups),
+                        prepare_mesh_bind_groups.in_set(RenderSet::PrepareBindGroups),
                         prepare_mesh_view_bind_groups
                             .in_set(RenderSet::PrepareBindGroups)
                             .after(prepare_oit_buffers),
@@ -238,12 +249,14 @@ impl Plugin for MeshRenderPlugin {
 
             if use_gpu_instance_buffer_builder {
                 render_app
-                    .init_resource::<gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>()
+                    .init_resource::<gpu_preprocessing::BatchedInstanceBuffers<
+                        MeshUniform,
+                        MeshInputUniform
+                    >>()
                     .init_resource::<RenderMeshInstanceGpuQueues>()
                     .add_systems(
                         ExtractSchedule,
-                        extract_meshes_for_gpu_building
-                            .in_set(ExtractMeshesSet),
+                        extract_meshes_for_gpu_building.in_set(ExtractMeshesSet),
                     )
                     .add_systems(
                         Render,
@@ -1956,7 +1969,7 @@ impl GetFullBatchData for MeshPipeline {
         indexed: bool,
         base_output_index: u32,
         batch_set_index: Option<NonMaxU32>,
-        indirect_parameters_buffer: &mut IndirectParametersBuffers,
+        phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
         indirect_parameters_offset: u32,
     ) {
         let indirect_parameters = IndirectParametersMetadata {
@@ -1971,9 +1984,10 @@ impl GetFullBatchData for MeshPipeline {
         };
 
         if indexed {
-            indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters);
+            phase_indirect_parameters_buffers
+                .set_indexed(indirect_parameters_offset, indirect_parameters);
         } else {
-            indirect_parameters_buffer
+            phase_indirect_parameters_buffers
                 .set_non_indexed(indirect_parameters_offset, indirect_parameters);
         }
     }
@@ -2567,9 +2581,12 @@ impl SpecializedMeshPipeline for MeshPipeline {
     }
 }
 
-/// Bind groups for meshes currently loaded.
-#[derive(Resource, Default)]
-pub struct MeshBindGroups {
+/// The bind groups for meshes currently loaded.
+///
+/// If GPU mesh preprocessing isn't in use, these are global to the scene. If
+/// GPU mesh preprocessing is in use, these are specific to a single phase.
+#[derive(Default)]
+pub struct MeshPhaseBindGroups {
     model_only: Option<BindGroup>,
     skinned: Option<MeshBindGroupPair>,
     morph_targets: HashMap<AssetId<Mesh>, MeshBindGroupPair>,
@@ -2581,7 +2598,18 @@ pub struct MeshBindGroupPair {
     no_motion_vectors: BindGroup,
 }
 
-impl MeshBindGroups {
+/// All bind groups for meshes currently loaded.
+#[derive(Resource)]
+pub enum MeshBindGroups {
+    /// The bind groups for the meshes for the entire scene, if GPU mesh
+    /// preprocessing isn't in use.
+    CpuPreprocessing(MeshPhaseBindGroups),
+    /// A mapping from the type ID of a phase (e.g. [`Opaque3d`]) to the mesh
+    /// bind groups for that phase.
+    GpuPreprocessing(TypeIdMap<MeshPhaseBindGroups>),
+}
+
+impl MeshPhaseBindGroups {
     pub fn reset(&mut self) {
         self.model_only = None;
         self.skinned = None;
@@ -2623,9 +2651,10 @@ impl MeshBindGroupPair {
     }
 }
 
-pub fn prepare_mesh_bind_group(
+/// Creates the per-mesh bind groups for each type of mesh and each phase.
+pub fn prepare_mesh_bind_groups(
+    mut commands: Commands,
     meshes: Res<RenderAssets<RenderMesh>>,
-    mut groups: ResMut<MeshBindGroups>,
     mesh_pipeline: Res<MeshPipeline>,
     render_device: Res<RenderDevice>,
     cpu_batched_instance_buffer: Option<
@@ -2638,24 +2667,80 @@ pub fn prepare_mesh_bind_group(
     weights_uniform: Res<MorphUniforms>,
     mut render_lightmaps: ResMut<RenderLightmaps>,
 ) {
-    groups.reset();
+    // CPU mesh preprocessing path.
+    if let Some(cpu_batched_instance_buffer) = cpu_batched_instance_buffer {
+        if let Some(instance_data_binding) = cpu_batched_instance_buffer
+            .into_inner()
+            .instance_data_binding()
+        {
+            // In this path, we only have a single set of bind groups for all phases.
+            let cpu_preprocessing_mesh_bind_groups = prepare_mesh_bind_groups_for_phase(
+                instance_data_binding,
+                &meshes,
+                &mesh_pipeline,
+                &render_device,
+                &skins_uniform,
+                &weights_uniform,
+                &mut render_lightmaps,
+            );
+
+            commands.insert_resource(MeshBindGroups::CpuPreprocessing(
+                cpu_preprocessing_mesh_bind_groups,
+            ));
+            return;
+        }
+    }
+
+    // GPU mesh preprocessing path.
+    if let Some(gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
+        let mut gpu_preprocessing_mesh_bind_groups = TypeIdMap::default();
+
+        // Loop over each phase.
+        for (phase_type_id, batched_phase_instance_buffers) in
+            &gpu_batched_instance_buffers.phase_instance_buffers
+        {
+            let Some(instance_data_binding) =
+                batched_phase_instance_buffers.instance_data_binding()
+            else {
+                continue;
+            };
+
+            let mesh_phase_bind_groups = prepare_mesh_bind_groups_for_phase(
+                instance_data_binding,
+                &meshes,
+                &mesh_pipeline,
+                &render_device,
+                &skins_uniform,
+                &weights_uniform,
+                &mut render_lightmaps,
+            );
+
+            gpu_preprocessing_mesh_bind_groups.insert(*phase_type_id, mesh_phase_bind_groups);
+        }
 
+        commands.insert_resource(MeshBindGroups::GpuPreprocessing(
+            gpu_preprocessing_mesh_bind_groups,
+        ));
+    }
+}
+
+/// Creates the per-mesh bind groups for each type of mesh, for a single phase.
+fn prepare_mesh_bind_groups_for_phase(
+    model: BindingResource,
+    meshes: &RenderAssets<RenderMesh>,
+    mesh_pipeline: &MeshPipeline,
+    render_device: &RenderDevice,
+    skins_uniform: &SkinUniforms,
+    weights_uniform: &MorphUniforms,
+    render_lightmaps: &mut RenderLightmaps,
+) -> MeshPhaseBindGroups {
     let layouts = &mesh_pipeline.mesh_layouts;
 
-    let model = if let Some(cpu_batched_instance_buffer) = cpu_batched_instance_buffer {
-        cpu_batched_instance_buffer
-            .into_inner()
-            .instance_data_binding()
-    } else if let Some(gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
-        gpu_batched_instance_buffers
-            .into_inner()
-            .instance_data_binding()
-    } else {
-        return;
+    // TODO: Reuse allocations.
+    let mut groups = MeshPhaseBindGroups {
+        model_only: Some(layouts.model_only(render_device, &model)),
+        ..default()
     };
-    let Some(model) = model else { return };
-
-    groups.model_only = Some(layouts.model_only(&render_device, &model));
 
     // Create the skinned mesh bind group with the current and previous buffers
     // (the latter being for motion vector computation). If there's no previous
@@ -2664,8 +2749,8 @@ pub fn prepare_mesh_bind_group(
     if let Some(skin) = skin {
         let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin);
         groups.skinned = Some(MeshBindGroupPair {
-            motion_vectors: layouts.skinned_motion(&render_device, &model, skin, prev_skin),
-            no_motion_vectors: layouts.skinned(&render_device, &model, skin),
+            motion_vectors: layouts.skinned_motion(render_device, &model, skin, prev_skin),
+            no_motion_vectors: layouts.skinned(render_device, &model, skin),
         });
     }
 
@@ -2680,7 +2765,7 @@ pub fn prepare_mesh_bind_group(
                         let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin);
                         MeshBindGroupPair {
                             motion_vectors: layouts.morphed_skinned_motion(
-                                &render_device,
+                                render_device,
                                 &model,
                                 skin,
                                 weights,
@@ -2689,7 +2774,7 @@ pub fn prepare_mesh_bind_group(
                                 prev_weights,
                             ),
                             no_motion_vectors: layouts.morphed_skinned(
-                                &render_device,
+                                render_device,
                                 &model,
                                 skin,
                                 weights,
@@ -2699,18 +2784,13 @@ pub fn prepare_mesh_bind_group(
                     }
                     None => MeshBindGroupPair {
                         motion_vectors: layouts.morphed_motion(
-                            &render_device,
+                            render_device,
                             &model,
                             weights,
                             targets,
                             prev_weights,
                         ),
-                        no_motion_vectors: layouts.morphed(
-                            &render_device,
-                            &model,
-                            weights,
-                            targets,
-                        ),
+                        no_motion_vectors: layouts.morphed(render_device, &model, weights, targets),
                     },
                 };
                 groups.morph_targets.insert(id, bind_group_pair);
@@ -2723,9 +2803,11 @@ pub fn prepare_mesh_bind_group(
     for (lightmap_slab_id, lightmap_slab) in render_lightmaps.slabs.iter_mut().enumerate() {
         groups.lightmaps.insert(
             LightmapSlabIndex(NonMaxU32::new(lightmap_slab_id as u32).unwrap()),
-            layouts.lightmapped(&render_device, &model, lightmap_slab, bindless_supported),
+            layouts.lightmapped(render_device, &model, lightmap_slab, bindless_supported),
         );
     }
+
+    groups
 }
 
 pub struct SetMeshViewBindGroup<const I: usize>;
@@ -2829,7 +2911,20 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
             .get(entity)
             .map(|render_lightmap| render_lightmap.slab_index);
 
-        let Some(bind_group) = bind_groups.get(
+        let Some(mesh_phase_bind_groups) = (match *bind_groups {
+            MeshBindGroups::CpuPreprocessing(ref mesh_phase_bind_groups) => {
+                Some(mesh_phase_bind_groups)
+            }
+            MeshBindGroups::GpuPreprocessing(ref mesh_phase_bind_groups) => {
+                mesh_phase_bind_groups.get(&TypeId::of::<P>())
+            }
+        }) else {
+            // This is harmless if e.g. we're rendering the `Shadow` phase and
+            // there weren't any shadows.
+            return RenderCommandResult::Success;
+        };
+
+        let Some(bind_group) = mesh_phase_bind_groups.get(
             mesh_asset_id,
             lightmap_slab_index,
             is_skinned,
@@ -2981,9 +3076,18 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
                         // Look up the indirect parameters buffer, as well as
                         // the buffer we're going to use for
                         // `multi_draw_indexed_indirect_count` (if available).
+                        let Some(phase_indirect_parameters_buffers) =
+                            indirect_parameters_buffer.get(&TypeId::of::<P>())
+                        else {
+                            warn!(
+                                "Not rendering mesh because indexed indirect parameters buffer \
+                                 wasn't present for this phase",
+                            );
+                            return RenderCommandResult::Skip;
+                        };
                         let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
-                            indirect_parameters_buffer.indexed_data_buffer(),
-                            indirect_parameters_buffer.indexed_batch_sets_buffer(),
+                            phase_indirect_parameters_buffers.indexed_data_buffer(),
+                            phase_indirect_parameters_buffers.indexed_batch_sets_buffer(),
                         ) else {
                             warn!(
                                 "Not rendering mesh because indexed indirect parameters buffer \
@@ -3038,9 +3142,18 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
                     // Look up the indirect parameters buffer, as well as the
                     // buffer we're going to use for
                     // `multi_draw_indirect_count` (if available).
+                    let Some(phase_indirect_parameters_buffers) =
+                        indirect_parameters_buffer.get(&TypeId::of::<P>())
+                    else {
+                        warn!(
+                            "Not rendering mesh because indexed indirect parameters buffer \
+                                 wasn't present for this phase",
+                        );
+                        return RenderCommandResult::Skip;
+                    };
                     let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
-                        indirect_parameters_buffer.non_indexed_data_buffer(),
-                        indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
+                        phase_indirect_parameters_buffers.non_indexed_data_buffer(),
+                        phase_indirect_parameters_buffers.non_indexed_batch_sets_buffer(),
                     ) else {
                         warn!(
                             "Not rendering mesh because non-indexed indirect parameters buffer \
diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml
index 33fc2aa856f4d..4167185632fa3 100644
--- a/crates/bevy_render/Cargo.toml
+++ b/crates/bevy_render/Cargo.toml
@@ -101,6 +101,7 @@ variadics_please = "1.1"
 tracing = { version = "0.1", default-features = false, features = ["std"] }
 indexmap = { version = "2" }
 fixedbitset = { version = "0.5" }
+bitflags = "2"
 
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 # Omit the `glsl` feature in non-WebAssembly by default.
diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs
index ccfd2729707b5..6637638f389be 100644
--- a/crates/bevy_render/src/batching/gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs
@@ -1,8 +1,9 @@
 //! Batching functionality when GPU preprocessing is in use.
 
-use core::any::TypeId;
+use core::{any::TypeId, marker::PhantomData, mem};
 
 use bevy_app::{App, Plugin};
+use bevy_derive::{Deref, DerefMut};
 use bevy_ecs::{
     prelude::Entity,
     query::{Has, With},
@@ -24,26 +25,22 @@ use crate::{
     experimental::occlusion_culling::OcclusionCulling,
     render_phase::{
         BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
-        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, InputUniformIndex,
+        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, InputUniformIndex, PhaseItem,
         PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase,
         UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases,
     },
     render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
     renderer::{RenderAdapter, RenderDevice, RenderQueue},
     view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
-    Render, RenderApp, RenderSet,
+    Render, RenderApp, RenderDebugFlags, RenderSet,
 };
 
 use super::{BatchMeta, GetBatchData, GetFullBatchData};
 
 #[derive(Default)]
 pub struct BatchingPlugin {
-    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
-    /// that they can be read back to CPU.
-    ///
-    /// This is a debugging feature that may reduce performance. It primarily
-    /// exists for the `occlusion_culling` example.
-    pub allow_copies_from_indirect_parameters: bool,
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
 }
 
 impl Plugin for BatchingPlugin {
@@ -54,7 +51,8 @@ impl Plugin for BatchingPlugin {
 
         render_app
             .insert_resource(IndirectParametersBuffers::new(
-                self.allow_copies_from_indirect_parameters,
+                self.debug_flags
+                    .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
             ))
             .add_systems(
                 Render,
@@ -147,18 +145,6 @@ where
     BD: GpuArrayBufferable + Sync + Send + 'static,
     BDI: Pod + Default,
 {
-    /// A storage area for the buffer data that the GPU compute shader is
-    /// expected to write to.
-    ///
-    /// There will be one entry for each index.
-    pub data_buffer: UninitBufferVec<BD>,
-
-    /// The index of the buffer data in the current input buffer that
-    /// corresponds to each instance.
-    ///
-    /// This is keyed off each view. Each view has a separate buffer.
-    pub work_item_buffers: HashMap<RetainedViewEntity, TypeIdMap<PreprocessWorkItemBuffers>>,
-
     /// The uniform data inputs for the current frame.
     ///
     /// These are uploaded during the extraction phase.
@@ -173,6 +159,81 @@ where
     /// corresponding buffer data input uniform in this list.
     pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,
 
+    /// The data needed to render buffers for each phase.
+    ///
+    /// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`,
+    /// `AlphaMask3d`, etc.
+    pub phase_instance_buffers: TypeIdMap<UntypedPhaseBatchedInstanceBuffers<BD>>,
+}
+
+impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
+where
+    BD: GpuArrayBufferable + Sync + Send + 'static,
+    BDI: Pod + Sync + Send + Default + 'static,
+{
+    fn default() -> Self {
+        BatchedInstanceBuffers {
+            current_input_buffer: InstanceInputUniformBuffer::new(),
+            previous_input_buffer: InstanceInputUniformBuffer::new(),
+            phase_instance_buffers: HashMap::default(),
+        }
+    }
+}
+
+/// The GPU buffers holding the data needed to render batches for a single
+/// phase.
+///
+/// These are split out per phase so that we can run the phases in parallel.
+/// This is the version of the structure that has a type parameter, which
+/// enables Bevy's scheduler to run the batching operations for the different
+/// phases in parallel.
+///
+/// See the documentation for [`BatchedInstanceBuffers`] for more information.
+#[derive(Resource)]
+pub struct PhaseBatchedInstanceBuffers<PI, BD>
+where
+    PI: PhaseItem,
+    BD: GpuArrayBufferable + Sync + Send + 'static,
+{
+    /// The buffers for this phase.
+    pub buffers: UntypedPhaseBatchedInstanceBuffers<BD>,
+    phantom: PhantomData<PI>,
+}
+
+impl<PI, BD> Default for PhaseBatchedInstanceBuffers<PI, BD>
+where
+    PI: PhaseItem,
+    BD: GpuArrayBufferable + Sync + Send + 'static,
+{
+    fn default() -> Self {
+        PhaseBatchedInstanceBuffers {
+            buffers: UntypedPhaseBatchedInstanceBuffers::default(),
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// The GPU buffers holding the data needed to render batches for a single
+/// phase, without a type parameter for that phase.
+///
+/// Since this structure doesn't have a type parameter, it can be placed in
+/// [`BatchedInstanceBuffers::phase_instance_buffers`].
+pub struct UntypedPhaseBatchedInstanceBuffers<BD>
+where
+    BD: GpuArrayBufferable + Sync + Send + 'static,
+{
+    /// A storage area for the buffer data that the GPU compute shader is
+    /// expected to write to.
+    ///
+    /// There will be one entry for each index.
+    pub data_buffer: UninitBufferVec<BD>,
+
+    /// The index of the buffer data in the current input buffer that
+    /// corresponds to each instance.
+    ///
+    /// This is keyed off each view. Each view has a separate buffer.
+    pub work_item_buffers: HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
+
     /// A buffer that holds the number of indexed meshes that weren't visible in
     /// the previous frame, when GPU occlusion culling is in use.
     ///
@@ -351,11 +412,11 @@ pub struct GpuOcclusionCullingWorkItemBuffers {
     /// The buffer of work items corresponding to non-indexed meshes.
     pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,
     /// The offset into the
-    /// [`BatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
+    /// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
     /// where this view's indirect dispatch counts for indexed meshes live.
     pub late_indirect_parameters_indexed_offset: u32,
     /// The offset into the
-    /// [`BatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
+    /// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
     /// where this view's indirect dispatch counts for non-indexed meshes live.
     pub late_indirect_parameters_non_indexed_offset: u32,
 }
@@ -409,7 +470,7 @@ impl Default for LatePreprocessWorkItemIndirectParameters {
 /// You may need to call this function if you're implementing your own custom
 /// render phases. See the `specialized_mesh_pipeline` example.
 pub fn get_or_create_work_item_buffer<'a, I>(
-    work_item_buffers: &'a mut HashMap<RetainedViewEntity, TypeIdMap<PreprocessWorkItemBuffers>>,
+    work_item_buffers: &'a mut HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
     view: RetainedViewEntity,
     no_indirect_drawing: bool,
     enable_gpu_occlusion_culling: bool,
@@ -417,11 +478,7 @@ pub fn get_or_create_work_item_buffer<'a, I>(
 where
     I: 'static,
 {
-    let preprocess_work_item_buffers = match work_item_buffers
-        .entry(view)
-        .or_default()
-        .entry(TypeId::of::<I>())
-    {
+    let preprocess_work_item_buffers = match work_item_buffers.entry(view) {
         Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
         Entry::Vacant(vacant_entry) => {
             if no_indirect_drawing {
@@ -700,8 +757,71 @@ pub struct IndirectBatchSet {
 /// pass can determine how many meshes are actually to be drawn.
 ///
 /// These buffers will remain empty if indirect drawing isn't in use.
-#[derive(Resource)]
+#[derive(Resource, Deref, DerefMut)]
 pub struct IndirectParametersBuffers {
+    /// A mapping from a phase type ID to the indirect parameters buffers for
+    /// that phase.
+    ///
+    /// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`.
+    #[deref]
+    pub buffers: TypeIdMap<UntypedPhaseIndirectParametersBuffers>,
+    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
+    /// that they can be read back to CPU.
+    ///
+    /// This is a debugging feature that may reduce performance. It primarily
+    /// exists for the `occlusion_culling` example.
+    pub allow_copies_from_indirect_parameter_buffers: bool,
+}
+
+impl IndirectParametersBuffers {
+    /// Initializes a new [`IndirectParametersBuffers`] resource.
+    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
+        IndirectParametersBuffers {
+            buffers: TypeIdMap::default(),
+            allow_copies_from_indirect_parameter_buffers,
+        }
+    }
+}
+
+/// The buffers containing all the information that indirect draw commands use
+/// to draw the scene, for a single phase.
+///
+/// This is the version of the structure that has a type parameter, so that the
+/// batching for different phases can run in parallel.
+///
+/// See the [`IndirectParametersBuffers`] documentation for more information.
+#[derive(Resource)]
+pub struct PhaseIndirectParametersBuffers<PI>
+where
+    PI: PhaseItem,
+{
+    /// The indirect draw buffers for the phase.
+    pub buffers: UntypedPhaseIndirectParametersBuffers,
+    phantom: PhantomData<PI>,
+}
+
+impl<PI> PhaseIndirectParametersBuffers<PI>
+where
+    PI: PhaseItem,
+{
+    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self {
+        PhaseIndirectParametersBuffers {
+            buffers: UntypedPhaseIndirectParametersBuffers::new(
+                allow_copies_from_indirect_parameter_buffers,
+            ),
+            phantom: PhantomData,
+        }
+    }
+}
+
+/// The buffers containing all the information that indirect draw commands use
+/// to draw the scene, for a single phase.
+///
+/// This is the version of the structure that doesn't have a type parameter, so
+/// that it can be inserted into [`IndirectParametersBuffers::buffers`]
+///
+/// See the [`IndirectParametersBuffers`] documentation for more information.
+pub struct UntypedPhaseIndirectParametersBuffers {
     /// The GPU buffer that stores the indirect draw parameters for non-indexed
     /// meshes.
     ///
@@ -751,15 +871,17 @@ pub struct IndirectParametersBuffers {
     indexed_batch_sets: RawBufferVec<IndirectBatchSet>,
 }
 
-impl IndirectParametersBuffers {
+impl UntypedPhaseIndirectParametersBuffers {
     /// Creates the indirect parameters buffers.
-    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
+    pub fn new(
+        allow_copies_from_indirect_parameter_buffers: bool,
+    ) -> UntypedPhaseIndirectParametersBuffers {
         let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
         if allow_copies_from_indirect_parameter_buffers {
             indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
         }
 
-        IndirectParametersBuffers {
+        UntypedPhaseIndirectParametersBuffers {
             non_indexed_data: UninitBufferVec::new(indirect_parameter_buffer_usages),
             non_indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE),
             non_indexed_batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
@@ -952,6 +1074,15 @@ impl IndirectParametersBuffers {
     pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
         NonMaxU32::new(self.batch_set_count(indexed) as u32)
     }
+
+    pub fn clear(&mut self) {
+        self.indexed_data.clear();
+        self.indexed_metadata.clear();
+        self.indexed_batch_sets.clear();
+        self.non_indexed_data.clear();
+        self.non_indexed_metadata.clear();
+        self.non_indexed_batch_sets.clear();
+    }
 }
 
 impl Default for IndirectParametersBuffers {
@@ -1007,11 +1138,24 @@ where
 {
     /// Creates new buffers.
     pub fn new() -> Self {
-        BatchedInstanceBuffers {
+        Self::default()
+    }
+
+    /// Clears out the buffers in preparation for a new frame.
+    pub fn clear(&mut self) {
+        // TODO: Don't do this.
+        self.phase_instance_buffers.clear();
+    }
+}
+
+impl<BD> UntypedPhaseBatchedInstanceBuffers<BD>
+where
+    BD: GpuArrayBufferable + Sync + Send + 'static,
+{
+    pub fn new() -> Self {
+        UntypedPhaseBatchedInstanceBuffers {
             data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
             work_item_buffers: HashMap::default(),
-            current_input_buffer: InstanceInputUniformBuffer::new(),
-            previous_input_buffer: InstanceInputUniformBuffer::new(),
             late_indexed_indirect_parameters_buffer: RawBufferVec::new(
                 BufferUsages::STORAGE | BufferUsages::INDIRECT,
             ),
@@ -1039,17 +1183,14 @@ where
         // Clear each individual set of buffers, but don't depopulate the hash
         // table. We want to avoid reallocating these vectors every frame.
         for view_work_item_buffers in self.work_item_buffers.values_mut() {
-            for phase_work_item_buffers in view_work_item_buffers.values_mut() {
-                phase_work_item_buffers.clear();
-            }
+            view_work_item_buffers.clear();
         }
     }
 }
 
-impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
+impl<BD> Default for UntypedPhaseBatchedInstanceBuffers<BD>
 where
     BD: GpuArrayBufferable + Sync + Send + 'static,
-    BDI: Pod + Default + Sync + Send + 'static,
 {
     fn default() -> Self {
         Self::new()
@@ -1098,7 +1239,7 @@ where
         self,
         instance_end_index: u32,
         phase: &mut SortedRenderPhase<I>,
-        indirect_parameters_buffers: &mut IndirectParametersBuffers,
+        phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
     ) where
         I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
     {
@@ -1114,7 +1255,7 @@ where
             None => PhaseItemExtraIndex::None,
         };
         if let Some(indirect_parameters_index) = self.indirect_parameters_index {
-            indirect_parameters_buffers
+            phase_indirect_parameters_buffers
                 .add_batch_set(self.indexed, indirect_parameters_index.into());
         }
     }
@@ -1156,17 +1297,23 @@ pub fn delete_old_work_item_buffers<GFBD>(
         .iter()
         .map(|extracted_view| extracted_view.retained_view_entity)
         .collect();
-    gpu_batched_instance_buffers
-        .work_item_buffers
-        .retain(|retained_view_entity, _| retained_view_entities.contains(retained_view_entity));
+    for phase_instance_buffers in gpu_batched_instance_buffers
+        .phase_instance_buffers
+        .values_mut()
+    {
+        phase_instance_buffers
+            .work_item_buffers
+            .retain(|retained_view_entity, _| {
+                retained_view_entities.contains(retained_view_entity)
+            });
+    }
 }
 
 /// Batch the items in a sorted render phase, when GPU instance buffer building
 /// is in use. This means comparing metadata needed to draw each phase item and
 /// trying to combine the draws into a batch.
 pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
-    gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
-    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
+    indirect_parameters_buffers: Res<IndirectParametersBuffers>,
     mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
     mut views: Query<(
         &ExtractedView,
@@ -1178,14 +1325,19 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
     I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
     GFBD: GetFullBatchData,
 {
+    let mut phase_batched_instance_buffers =
+        UntypedPhaseBatchedInstanceBuffers::<GFBD::BufferData>::new();
+    let mut phase_indirect_parameters_buffers = UntypedPhaseIndirectParametersBuffers::new(
+        indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,
+    );
+
     // We only process GPU-built batch data in this function.
-    let BatchedInstanceBuffers {
+    let UntypedPhaseBatchedInstanceBuffers {
         ref mut data_buffer,
         ref mut work_item_buffers,
         ref mut late_indexed_indirect_parameters_buffer,
         ref mut late_non_indexed_indirect_parameters_buffer,
-        ..
-    } = gpu_array_buffer.into_inner();
+    } = phase_batched_instance_buffers;
 
     for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
         let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {
@@ -1231,7 +1383,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
                     batch.flush(
                         data_buffer.len() as u32,
                         phase,
-                        &mut indirect_parameters_buffers,
+                        &mut phase_indirect_parameters_buffers,
                     );
                 }
 
@@ -1257,15 +1409,15 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
             if !can_batch {
                 // Break a batch if we need to.
                 if let Some(batch) = batch.take() {
-                    batch.flush(output_index, phase, &mut indirect_parameters_buffers);
+                    batch.flush(output_index, phase, &mut phase_indirect_parameters_buffers);
                 }
 
                 let indirect_parameters_index = if no_indirect_drawing {
                     None
                 } else if item_is_indexed {
-                    Some(indirect_parameters_buffers.allocate_indexed(1))
+                    Some(phase_indirect_parameters_buffers.allocate_indexed(1))
                 } else {
-                    Some(indirect_parameters_buffers.allocate_non_indexed(1))
+                    Some(phase_indirect_parameters_buffers.allocate_non_indexed(1))
                 };
 
                 // Start a new batch.
@@ -1275,7 +1427,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
                         item_is_indexed,
                         output_index,
                         None,
-                        &mut indirect_parameters_buffers,
+                        &mut phase_indirect_parameters_buffers,
                         indirect_parameters_index,
                     );
                 };
@@ -1317,7 +1469,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
             batch.flush(
                 data_buffer.len() as u32,
                 phase,
-                &mut indirect_parameters_buffers,
+                &mut phase_indirect_parameters_buffers,
             );
         }
     }
@@ -1325,8 +1477,8 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
 
 /// Creates batches for a render phase that uses bins.
 pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
-    gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
-    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
+    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>,
+    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<BPI>>,
     mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
     mut views: Query<
         (
@@ -1343,13 +1495,12 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
 {
     let system_param_item = param.into_inner();
 
-    let BatchedInstanceBuffers {
+    let UntypedPhaseBatchedInstanceBuffers {
         ref mut data_buffer,
         ref mut work_item_buffers,
         ref mut late_indexed_indirect_parameters_buffer,
         ref mut late_non_indexed_indirect_parameters_buffer,
-        ..
-    } = gpu_array_buffer.into_inner();
+    } = phase_batched_instance_buffers.buffers;
 
     for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
         let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {
@@ -1376,8 +1527,10 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
 
         for (batch_set_key, bins) in &phase.multidrawable_meshes {
             let mut batch_set = None;
-            let indirect_parameters_base =
-                indirect_parameters_buffers.batch_count(batch_set_key.indexed()) as u32;
+            let indirect_parameters_base = phase_indirect_parameters_buffers
+                .buffers
+                .batch_count(batch_set_key.indexed())
+                as u32;
             for (bin_key, bin) in bins {
                 let first_output_index = data_buffer.len() as u32;
                 let mut batch: Option<BinnedRenderPhaseBatch> = None;
@@ -1408,9 +1561,11 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
 
                         None => {
                             // Start a new batch, in indirect mode.
-                            let indirect_parameters_index =
-                                indirect_parameters_buffers.allocate(batch_set_key.indexed(), 1);
-                            let batch_set_index = indirect_parameters_buffers
+                            let indirect_parameters_index = phase_indirect_parameters_buffers
+                                .buffers
+                                .allocate(batch_set_key.indexed(), 1);
+                            let batch_set_index = phase_indirect_parameters_buffers
+                                .buffers
                                 .get_next_batch_set_index(batch_set_key.indexed());
 
                             GFBD::write_batch_indirect_parameters_metadata(
@@ -1418,7 +1573,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                                 batch_set_key.indexed(),
                                 output_index,
                                 batch_set_index,
-                                &mut indirect_parameters_buffers,
+                                &mut phase_indirect_parameters_buffers.buffers,
                                 indirect_parameters_index,
                             );
                             work_item_buffer.push(
@@ -1447,7 +1602,8 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                                 first_batch: batch,
                                 batch_count: 1,
                                 bin_key: bin_key.clone(),
-                                index: indirect_parameters_buffers
+                                index: phase_indirect_parameters_buffers
+                                    .buffers
                                     .batch_set_count(batch_set_key.indexed())
                                     as u32,
                             });
@@ -1464,7 +1620,8 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
             {
                 if let Some(batch_set) = batch_set {
                     batch_sets.push(batch_set);
-                    indirect_parameters_buffers
+                    phase_indirect_parameters_buffers
+                        .buffers
                         .add_batch_set(batch_set_key.indexed(), indirect_parameters_base);
                 }
             }
@@ -1513,17 +1670,19 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
 
                     None if !no_indirect_drawing => {
                         // Start a new batch, in indirect mode.
-                        let indirect_parameters_index =
-                            indirect_parameters_buffers.allocate(key.0.indexed(), 1);
-                        let batch_set_index =
-                            indirect_parameters_buffers.get_next_batch_set_index(key.0.indexed());
+                        let indirect_parameters_index = phase_indirect_parameters_buffers
+                            .buffers
+                            .allocate(key.0.indexed(), 1);
+                        let batch_set_index = phase_indirect_parameters_buffers
+                            .buffers
+                            .get_next_batch_set_index(key.0.indexed());
 
                         GFBD::write_batch_indirect_parameters_metadata(
                             input_index,
                             key.0.indexed(),
                             output_index,
                             batch_set_index,
-                            &mut indirect_parameters_buffers,
+                            &mut phase_indirect_parameters_buffers.buffers,
                             indirect_parameters_index,
                         );
                         work_item_buffer.push(
@@ -1580,7 +1739,9 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                             first_batch: batch,
                             batch_count: 1,
                             bin_key: key.1.clone(),
-                            index: indirect_parameters_buffers.batch_set_count(key.0.indexed())
+                            index: phase_indirect_parameters_buffers
+                                .buffers
+                                .batch_set_count(key.0.indexed())
                                 as u32,
                         });
                     }
@@ -1595,12 +1756,14 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                 None
             } else if key.0.indexed() {
                 Some(
-                    indirect_parameters_buffers
+                    phase_indirect_parameters_buffers
+                        .buffers
                         .allocate_indexed(unbatchables.entities.len() as u32),
                 )
             } else {
                 Some(
-                    indirect_parameters_buffers
+                    phase_indirect_parameters_buffers
+                        .buffers
                         .allocate_non_indexed(unbatchables.entities.len() as u32),
                 )
             };
@@ -1620,7 +1783,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                         key.0.indexed(),
                         output_index,
                         None,
-                        &mut indirect_parameters_buffers,
+                        &mut phase_indirect_parameters_buffers.buffers,
                         *indirect_parameters_index,
                     );
                     work_item_buffer.push(
@@ -1640,7 +1803,8 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                                 batch_set_index: None,
                             },
                         });
-                    indirect_parameters_buffers
+                    phase_indirect_parameters_buffers
+                        .buffers
                         .add_batch_set(key.0.indexed(), *indirect_parameters_index);
                     *indirect_parameters_index += 1;
                 } else {
@@ -1664,6 +1828,64 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
     }
 }
 
+/// A system that gathers up the per-phase GPU buffers and inserts them into the
+/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables.
+///
+/// This runs after the [`batch_and_prepare_binned_render_phase`] or
+/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase
+/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`]
+/// resources and inserts them into the global [`BatchedInstanceBuffers`] and
+/// [`IndirectParametersBuffers`] tables.
+///
+/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and
+/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one
+/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and
+/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in
+/// parallel.
+pub fn collect_buffers_for_phase<PI, GFBD>(
+    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<PI, GFBD::BufferData>>,
+    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<PI>>,
+    mut batched_instance_buffers: ResMut<
+        BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
+    >,
+    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
+) where
+    PI: PhaseItem,
+    GFBD: GetFullBatchData + Send + Sync + 'static,
+{
+    // Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace
+    // the contents of the per-phase resource with the old batched instance
+    // buffers in order to reuse allocations.
+    let untyped_phase_batched_instance_buffers =
+        mem::take(&mut phase_batched_instance_buffers.buffers);
+    if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers
+        .phase_instance_buffers
+        .insert(TypeId::of::<PI>(), untyped_phase_batched_instance_buffers)
+    {
+        old_untyped_phase_batched_instance_buffers.clear();
+        phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers;
+    }
+
+    // Insert the `PhaseIndirectParametersBuffers` into the global table.
+    // Replace the contents of the per-phase resource with the old indirect
+    // parameters buffers in order to reuse allocations.
+    let untyped_phase_indirect_parameters_buffers = mem::replace(
+        &mut phase_indirect_parameters_buffers.buffers,
+        UntypedPhaseIndirectParametersBuffers::new(
+            indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,
+        ),
+    );
+    if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers
+        .insert(
+            TypeId::of::<PI>(),
+            untyped_phase_indirect_parameters_buffers,
+        )
+    {
+        old_untyped_phase_indirect_parameters_buffers.clear();
+        phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers;
+    }
+}
+
 /// A system that writes all instance buffers to the GPU.
 pub fn write_batched_instance_buffers<GFBD>(
     render_device: Res<RenderDevice>,
@@ -1673,26 +1895,31 @@ pub fn write_batched_instance_buffers<GFBD>(
     GFBD: GetFullBatchData,
 {
     let BatchedInstanceBuffers {
-        ref mut data_buffer,
-        ref mut work_item_buffers,
         ref mut current_input_buffer,
         ref mut previous_input_buffer,
-        ref mut late_indexed_indirect_parameters_buffer,
-        ref mut late_non_indexed_indirect_parameters_buffer,
+        ref mut phase_instance_buffers,
     } = gpu_array_buffer.into_inner();
 
-    data_buffer.write_buffer(&render_device);
     current_input_buffer
         .buffer
         .write_buffer(&render_device, &render_queue);
     previous_input_buffer
         .buffer
         .write_buffer(&render_device, &render_queue);
-    late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
-    late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
 
-    for view_work_item_buffers in work_item_buffers.values_mut() {
-        for phase_work_item_buffers in view_work_item_buffers.values_mut() {
+    for phase_instance_buffers in phase_instance_buffers.values_mut() {
+        let UntypedPhaseBatchedInstanceBuffers {
+            ref mut data_buffer,
+            ref mut work_item_buffers,
+            ref mut late_indexed_indirect_parameters_buffer,
+            ref mut late_non_indexed_indirect_parameters_buffer,
+        } = *phase_instance_buffers;
+
+        data_buffer.write_buffer(&render_device);
+        late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
+        late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
+
+        for phase_work_item_buffers in work_item_buffers.values_mut() {
             match *phase_work_item_buffers {
                 PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {
                     buffer_vec.write_buffer(&render_device, &render_queue);
@@ -1728,12 +1955,9 @@ pub fn write_batched_instance_buffers<GFBD>(
 pub fn clear_indirect_parameters_buffers(
     mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
 ) {
-    indirect_parameters_buffers.indexed_data.clear();
-    indirect_parameters_buffers.indexed_metadata.clear();
-    indirect_parameters_buffers.indexed_batch_sets.clear();
-    indirect_parameters_buffers.non_indexed_data.clear();
-    indirect_parameters_buffers.non_indexed_metadata.clear();
-    indirect_parameters_buffers.non_indexed_batch_sets.clear();
+    for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
+        phase_indirect_parameters_buffers.clear();
+    }
 }
 
 pub fn write_indirect_parameters_buffers(
@@ -1741,26 +1965,28 @@ pub fn write_indirect_parameters_buffers(
     render_queue: Res<RenderQueue>,
     mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
 ) {
-    indirect_parameters_buffers
-        .indexed_data
-        .write_buffer(&render_device);
-    indirect_parameters_buffers
-        .non_indexed_data
-        .write_buffer(&render_device);
-
-    indirect_parameters_buffers
-        .indexed_metadata
-        .write_buffer(&render_device, &render_queue);
-    indirect_parameters_buffers
-        .non_indexed_metadata
-        .write_buffer(&render_device, &render_queue);
-
-    indirect_parameters_buffers
-        .indexed_batch_sets
-        .write_buffer(&render_device, &render_queue);
-    indirect_parameters_buffers
-        .non_indexed_batch_sets
-        .write_buffer(&render_device, &render_queue);
+    for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
+        phase_indirect_parameters_buffers
+            .indexed_data
+            .write_buffer(&render_device);
+        phase_indirect_parameters_buffers
+            .non_indexed_data
+            .write_buffer(&render_device);
+
+        phase_indirect_parameters_buffers
+            .indexed_metadata
+            .write_buffer(&render_device, &render_queue);
+        phase_indirect_parameters_buffers
+            .non_indexed_metadata
+            .write_buffer(&render_device, &render_queue);
+
+        phase_indirect_parameters_buffers
+            .indexed_batch_sets
+            .write_buffer(&render_device, &render_queue);
+        phase_indirect_parameters_buffers
+            .non_indexed_batch_sets
+            .write_buffer(&render_device, &render_queue);
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index ddafb6f5162f7..ad866c357a698 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -4,18 +4,15 @@ use bevy_ecs::{
     system::{ResMut, SystemParam, SystemParamItem},
 };
 use bytemuck::Pod;
+use gpu_preprocessing::UntypedPhaseIndirectParametersBuffers;
 use nonmax::NonMaxU32;
 
-use self::gpu_preprocessing::IndirectParametersBuffers;
 use crate::{
     render_phase::{
-        BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, SortedPhaseItem,
-        SortedRenderPhase, ViewBinnedRenderPhases,
+        BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, InputUniformIndex,
+        PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, ViewBinnedRenderPhases,
     },
     render_resource::{CachedRenderPipelineId, GpuArrayBufferable},
-};
-use crate::{
-    render_phase::{InputUniformIndex, PhaseItemExtraIndex},
     sync_world::MainEntity,
 };
 
@@ -179,7 +176,7 @@ pub trait GetFullBatchData: GetBatchData {
         indexed: bool,
         base_output_index: u32,
         batch_set_index: Option<NonMaxU32>,
-        indirect_parameters_buffers: &mut IndirectParametersBuffers,
+        indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
         indirect_parameters_offset: u32,
     );
 }
diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs
index 0b3e57fac1b11..76e1f1f0b7619 100644
--- a/crates/bevy_render/src/lib.rs
+++ b/crates/bevy_render/src/lib.rs
@@ -102,6 +102,7 @@ use alloc::sync::Arc;
 use bevy_app::{App, AppLabel, Plugin, SubApp};
 use bevy_asset::{load_internal_asset, weak_handle, AssetApp, AssetServer, Handle};
 use bevy_ecs::{prelude::*, schedule::ScheduleLabel};
+use bitflags::bitflags;
 use core::ops::{Deref, DerefMut};
 use std::sync::Mutex;
 use tracing::debug;
@@ -120,12 +121,21 @@ pub struct RenderPlugin {
     /// If `true`, disables asynchronous pipeline compilation.
     /// This has no effect on macOS, Wasm, iOS, or without the `multi_threaded` feature.
     pub synchronous_pipeline_compilation: bool,
-    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
-    /// that they can be read back to CPU.
-    ///
-    /// This is a debugging feature that may reduce performance. It primarily
-    /// exists for the `occlusion_culling` example.
-    pub allow_copies_from_indirect_parameters: bool,
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
+}
+
+bitflags! {
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    #[derive(Clone, Copy, PartialEq, Default, Debug)]
+    pub struct RenderDebugFlags: u8 {
+        /// If true, this sets the `COPY_SRC` flag on indirect draw parameters
+        /// so that they can be read back to CPU.
+        ///
+        /// This is a debugging feature that may reduce performance. It
+        /// primarily exists for the `occlusion_culling` example.
+        const ALLOW_COPIES_FROM_INDIRECT_PARAMETERS = 1;
+    }
 }
 
 /// The systems sets of the default [`App`] rendering schedule.
@@ -159,6 +169,9 @@ pub enum RenderSet {
     Prepare,
     /// A sub-set within [`Prepare`](RenderSet::Prepare) for initializing buffers, textures and uniforms for use in bind groups.
     PrepareResources,
+    /// Collect phase buffers after
+    /// [`PrepareResources`](RenderSet::PrepareResources) has run.
+    PrepareResourcesCollectPhaseBuffers,
     /// Flush buffers after [`PrepareResources`](RenderSet::PrepareResources), but before [`PrepareBindGroups`](RenderSet::PrepareBindGroups).
     PrepareResourcesFlush,
     /// A sub-set within [`Prepare`](RenderSet::Prepare) for constructing bind groups, or other data that relies on render resources prepared in [`PrepareResources`](RenderSet::PrepareResources).
@@ -210,7 +223,12 @@ impl Render {
                 .after(prepare_assets::<RenderMesh>),
         );
         schedule.configure_sets(
-            (PrepareResources, PrepareResourcesFlush, PrepareBindGroups)
+            (
+                PrepareResources,
+                PrepareResourcesCollectPhaseBuffers,
+                PrepareResourcesFlush,
+                PrepareBindGroups,
+            )
                 .chain()
                 .in_set(Prepare),
         );
@@ -380,7 +398,7 @@ impl Plugin for RenderPlugin {
             GlobalsPlugin,
             MorphPlugin,
             BatchingPlugin {
-                allow_copies_from_indirect_parameters: self.allow_copies_from_indirect_parameters,
+                debug_flags: self.debug_flags,
             },
             SyncWorldPlugin,
             StoragePlugin,
diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs
index 4ddd7a86abf65..ede05bb2fc618 100644
--- a/crates/bevy_render/src/render_phase/mod.rs
+++ b/crates/bevy_render/src/render_phase/mod.rs
@@ -43,10 +43,14 @@ use nonmax::NonMaxU32;
 pub use rangefinder::*;
 use wgpu::Features;
 
-use crate::batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport};
+use crate::batching::gpu_preprocessing::{
+    GpuPreprocessingMode, GpuPreprocessingSupport, PhaseBatchedInstanceBuffers,
+    PhaseIndirectParametersBuffers,
+};
 use crate::renderer::RenderDevice;
 use crate::sync_world::{MainEntity, MainEntityHashMap};
 use crate::view::RetainedViewEntity;
+use crate::RenderDebugFlags;
 use crate::{
     batching::{
         self,
@@ -1011,18 +1015,26 @@ impl UnbatchableBinnedEntityIndexSet {
 ///
 /// This is the version used when the pipeline supports GPU preprocessing: e.g.
 /// 3D PBR meshes.
-pub struct BinnedRenderPhasePlugin<BPI, GFBD>(PhantomData<(BPI, GFBD)>)
+pub struct BinnedRenderPhasePlugin<BPI, GFBD>
 where
     BPI: BinnedPhaseItem,
-    GFBD: GetFullBatchData;
+    GFBD: GetFullBatchData,
+{
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
+    phantom: PhantomData<(BPI, GFBD)>,
+}
 
-impl<BPI, GFBD> Default for BinnedRenderPhasePlugin<BPI, GFBD>
+impl<BPI, GFBD> BinnedRenderPhasePlugin<BPI, GFBD>
 where
     BPI: BinnedPhaseItem,
     GFBD: GetFullBatchData,
 {
-    fn default() -> Self {
-        Self(PhantomData)
+    pub fn new(debug_flags: RenderDebugFlags) -> Self {
+        Self {
+            debug_flags,
+            phantom: PhantomData,
+        }
     }
 }
 
@@ -1038,6 +1050,11 @@ where
 
         render_app
             .init_resource::<ViewBinnedRenderPhases<BPI>>()
+            .init_resource::<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>()
+            .insert_resource(PhaseIndirectParametersBuffers::<BPI>::new(
+                self.debug_flags
+                    .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
+            ))
             .add_systems(
                 Render,
                 (
@@ -1054,6 +1071,13 @@ where
                     )
                         .in_set(RenderSet::PrepareResources),
                     sweep_old_entities::<BPI>.in_set(RenderSet::QueueSweep),
+                    gpu_preprocessing::collect_buffers_for_phase::<BPI, GFBD>
+                        .run_if(
+                            resource_exists::<
+                                BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
+                            >,
+                        )
+                        .in_set(RenderSet::PrepareResourcesCollectPhaseBuffers),
                 ),
             );
     }
@@ -1097,18 +1121,26 @@ where
 ///
 /// This is the version used when the pipeline supports GPU preprocessing: e.g.
 /// 3D PBR meshes.
-pub struct SortedRenderPhasePlugin<SPI, GFBD>(PhantomData<(SPI, GFBD)>)
+pub struct SortedRenderPhasePlugin<SPI, GFBD>
 where
     SPI: SortedPhaseItem,
-    GFBD: GetFullBatchData;
+    GFBD: GetFullBatchData,
+{
+    /// Debugging flags that can optionally be set when constructing the renderer.
+    pub debug_flags: RenderDebugFlags,
+    phantom: PhantomData<(SPI, GFBD)>,
+}
 
-impl<SPI, GFBD> Default for SortedRenderPhasePlugin<SPI, GFBD>
+impl<SPI, GFBD> SortedRenderPhasePlugin<SPI, GFBD>
 where
     SPI: SortedPhaseItem,
     GFBD: GetFullBatchData,
 {
-    fn default() -> Self {
-        Self(PhantomData)
+    pub fn new(debug_flags: RenderDebugFlags) -> Self {
+        Self {
+            debug_flags,
+            phantom: PhantomData,
+        }
     }
 }
 
@@ -1124,18 +1156,33 @@ where
 
         render_app
             .init_resource::<ViewSortedRenderPhases<SPI>>()
+            .init_resource::<PhaseBatchedInstanceBuffers<SPI, GFBD::BufferData>>()
+            .insert_resource(PhaseIndirectParametersBuffers::<SPI>::new(
+                self.debug_flags
+                    .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
+            ))
             .add_systems(
                 Render,
                 (
-                    no_gpu_preprocessing::batch_and_prepare_sorted_render_phase::<SPI, GFBD>
-                        .run_if(resource_exists::<BatchedInstanceBuffer<GFBD::BufferData>>),
-                    gpu_preprocessing::batch_and_prepare_sorted_render_phase::<SPI, GFBD>.run_if(
-                        resource_exists::<
-                            BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
-                        >,
-                    ),
-                )
-                    .in_set(RenderSet::PrepareResources),
+                    (
+                        no_gpu_preprocessing::batch_and_prepare_sorted_render_phase::<SPI, GFBD>
+                            .run_if(resource_exists::<BatchedInstanceBuffer<GFBD::BufferData>>),
+                        gpu_preprocessing::batch_and_prepare_sorted_render_phase::<SPI, GFBD>
+                            .run_if(
+                                resource_exists::<
+                                    BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
+                                >,
+                            ),
+                    )
+                        .in_set(RenderSet::PrepareResources),
+                    gpu_preprocessing::collect_buffers_for_phase::<SPI, GFBD>
+                        .run_if(
+                            resource_exists::<
+                                BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
+                            >,
+                        )
+                        .in_set(RenderSet::PrepareResourcesCollectPhaseBuffers),
+                ),
             );
     }
 }
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 8cb2cbb16db8a..5d61b879076e0 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -479,7 +479,7 @@ impl GetFullBatchData for Mesh2dPipeline {
         indexed: bool,
         base_output_index: u32,
         batch_set_index: Option<NonMaxU32>,
-        indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffers,
+        indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::UntypedPhaseIndirectParametersBuffers,
         indirect_parameters_offset: u32,
     ) {
         // Note that `IndirectParameters` covers both of these structures, even
diff --git a/examples/3d/occlusion_culling.rs b/examples/3d/occlusion_culling.rs
index 11bdde698a0bd..767875e86108d 100644
--- a/examples/3d/occlusion_culling.rs
+++ b/examples/3d/occlusion_culling.rs
@@ -6,6 +6,7 @@
 //! the effects of occlusion culling can be seen.
 
 use std::{
+    any::TypeId,
     f32::consts::PI,
     fmt::Write as _,
     result::Result,
@@ -15,9 +16,13 @@ use std::{
 use bevy::{
     color::palettes::css::{SILVER, WHITE},
     core_pipeline::{
-        core_3d::graph::{Core3d, Node3d},
+        core_3d::{
+            graph::{Core3d, Node3d},
+            Opaque3d,
+        },
         prepass::DepthPrepass,
     },
+    pbr::PbrPlugin,
     prelude::*,
     render::{
         batching::gpu_preprocessing::{
@@ -29,7 +34,7 @@ use bevy::{
         render_resource::{Buffer, BufferDescriptor, BufferUsages, MapMode},
         renderer::{RenderAdapter, RenderContext, RenderDevice},
         settings::WgpuFeatures,
-        Render, RenderApp, RenderPlugin, RenderSet,
+        Render, RenderApp, RenderDebugFlags, RenderPlugin, RenderSet,
     },
 };
 use bytemuck::Pod;
@@ -172,6 +177,8 @@ impl Default for AppStatus {
 }
 
 fn main() {
+    let render_debug_flags = RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS;
+
     App::new()
         .add_plugins(
             DefaultPlugins
@@ -183,7 +190,11 @@ fn main() {
                     ..default()
                 })
                 .set(RenderPlugin {
-                    allow_copies_from_indirect_parameters: true,
+                    debug_flags: render_debug_flags,
+                    ..default()
+                })
+                .set(PbrPlugin {
+                    debug_flags: render_debug_flags,
                     ..default()
                 }),
         )
@@ -421,6 +432,14 @@ impl render_graph::Node for ReadbackIndirectParametersNode {
             return Ok(());
         };
 
+        // Get the indirect parameters buffers corresponding to the opaque 3D
+        // phase, since all our meshes are in that phase.
+        let Some(phase_indirect_parameters_buffers) =
+            indirect_parameters_buffers.get(&TypeId::of::<Opaque3d>())
+        else {
+            return Ok(());
+        };
+
         // Grab both the buffers we're copying from and the staging buffers
         // we're copying to. Remember that we can't map the indirect parameters
         // buffers directly, so we have to copy their contents to a staging
@@ -431,8 +450,8 @@ impl render_graph::Node for ReadbackIndirectParametersNode {
             Some(indirect_parameters_staging_data_buffer),
             Some(indirect_parameters_staging_batch_sets_buffer),
         ) = (
-            indirect_parameters_buffers.indexed_data_buffer(),
-            indirect_parameters_buffers.indexed_batch_sets_buffer(),
+            phase_indirect_parameters_buffers.indexed_data_buffer(),
+            phase_indirect_parameters_buffers.indexed_batch_sets_buffer(),
             indirect_parameters_mapping_buffers.data.as_ref(),
             indirect_parameters_mapping_buffers.batch_sets.as_ref(),
         )
@@ -474,10 +493,16 @@ fn create_indirect_parameters_staging_buffers(
     indirect_parameters_buffers: Res<IndirectParametersBuffers>,
     render_device: Res<RenderDevice>,
 ) {
+    let Some(phase_indirect_parameters_buffers) =
+        indirect_parameters_buffers.get(&TypeId::of::<Opaque3d>())
+    else {
+        return;
+    };
+
     // Fetch the indirect parameters buffers that we're going to copy from.
     let (Some(indexed_data_buffer), Some(indexed_batch_set_buffer)) = (
-        indirect_parameters_buffers.indexed_data_buffer(),
-        indirect_parameters_buffers.indexed_batch_sets_buffer(),
+        phase_indirect_parameters_buffers.indexed_data_buffer(),
+        phase_indirect_parameters_buffers.indexed_batch_sets_buffer(),
     ) else {
         return;
     };
diff --git a/examples/shader/custom_render_phase.rs b/examples/shader/custom_render_phase.rs
index 12a9c55f2ff77..b11028abf9a04 100644
--- a/examples/shader/custom_render_phase.rs
+++ b/examples/shader/custom_render_phase.rs
@@ -29,6 +29,7 @@ use bevy::{
         batching::{
             gpu_preprocessing::{
                 batch_and_prepare_sorted_render_phase, IndirectParametersMetadata,
+                UntypedPhaseIndirectParametersBuffers,
             },
             GetBatchData, GetFullBatchData,
         },
@@ -435,7 +436,7 @@ impl GetFullBatchData for StencilPipeline {
         indexed: bool,
         base_output_index: u32,
         batch_set_index: Option<NonMaxU32>,
-        indirect_parameters_buffers: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffers,
+        indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
         indirect_parameters_offset: u32,
     ) {
         // Note that `IndirectParameters` covers both of these structures, even
diff --git a/examples/shader/specialized_mesh_pipeline.rs b/examples/shader/specialized_mesh_pipeline.rs
index 780ca3c36ee5d..e228d776456f0 100644
--- a/examples/shader/specialized_mesh_pipeline.rs
+++ b/examples/shader/specialized_mesh_pipeline.rs
@@ -16,12 +16,12 @@ use bevy::{
     },
     prelude::*,
     render::{
-        batching::GetFullBatchData,
         batching::{
             gpu_preprocessing::{
-                self, BatchedInstanceBuffers, IndirectParametersBuffers, PreprocessWorkItem,
+                self, PhaseBatchedInstanceBuffers, PhaseIndirectParametersBuffers,
+                PreprocessWorkItem, UntypedPhaseBatchedInstanceBuffers,
             },
-            GetBatchData,
+            GetBatchData, GetFullBatchData,
         },
         experimental::occlusion_culling::OcclusionCulling,
         extract_component::{ExtractComponent, ExtractComponentPlugin},
@@ -291,24 +291,21 @@ fn queue_custom_mesh_pipeline(
         Res<RenderMeshInstances>,
     ),
     param: StaticSystemParam<<MeshPipeline as GetBatchData>::Param>,
-    gpu_array_buffer: ResMut<
-        BatchedInstanceBuffers<
-            <MeshPipeline as GetBatchData>::BufferData,
-            <MeshPipeline as GetFullBatchData>::BufferInputData,
-        >,
+    mut phase_batched_instance_buffers: ResMut<
+        PhaseBatchedInstanceBuffers<Opaque3d, <MeshPipeline as GetBatchData>::BufferData>,
     >,
-    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
+    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<Opaque3d>>,
     mut change_tick: Local<Tick>,
 ) {
     let system_param_item = param.into_inner();
 
-    let BatchedInstanceBuffers {
+    let UntypedPhaseBatchedInstanceBuffers {
         ref mut data_buffer,
         ref mut work_item_buffers,
         ref mut late_indexed_indirect_parameters_buffer,
         ref mut late_non_indexed_indirect_parameters_buffer,
         ..
-    } = gpu_array_buffer.into_inner();
+    } = phase_batched_instance_buffers.buffers;
 
     // Get the id for our custom draw function
     let draw_function_id = opaque_draw_functions
@@ -378,7 +375,8 @@ fn queue_custom_mesh_pipeline(
             // batch set.
             if mesh_batch_set_info.is_none() {
                 mesh_batch_set_info = Some(MeshBatchSetInfo {
-                    indirect_parameters_index: indirect_parameters_buffers
+                    indirect_parameters_index: phase_indirect_parameters_buffers
+                        .buffers
                         .allocate(mesh.indexed(), 1),
                     is_indexed: mesh.indexed(),
                 });
@@ -450,7 +448,8 @@ fn queue_custom_mesh_pipeline(
         // indirect parameters buffer, so that the renderer will end up
         // enqueuing a command to draw the mesh.
         if let Some(mesh_info) = mesh_batch_set_info {
-            indirect_parameters_buffers
+            phase_indirect_parameters_buffers
+                .buffers
                 .add_batch_set(mesh_info.is_indexed, mesh_info.indirect_parameters_index);
         }
     }