From 5468edc2a5d04032c995f346a32aa0a4b991dea7 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Mon, 28 Aug 2023 22:36:14 +0200
Subject: [PATCH 01/33] Implement batching for 2D and 3D meshes

---
 assets/shaders/custom_gltf_2d.wgsl            |   6 +-
 crates/bevy_core_pipeline/Cargo.toml          |   1 +
 crates/bevy_core_pipeline/src/core_2d/mod.rs  |  25 +-
 crates/bevy_core_pipeline/src/core_3d/mod.rs  |  69 +++-
 crates/bevy_core_pipeline/src/prepass/mod.rs  |  47 ++-
 crates/bevy_gizmos/src/pipeline_2d.rs         |   3 +-
 crates/bevy_gizmos/src/pipeline_3d.rs         |   3 +-
 crates/bevy_pbr/Cargo.toml                    |   1 +
 crates/bevy_pbr/src/material.rs               |  69 +++-
 crates/bevy_pbr/src/prepass/mod.rs            |   4 +
 crates/bevy_pbr/src/render/light.rs           |  27 +-
 crates/bevy_pbr/src/render/mesh.rs            | 244 ++++++++++----
 crates/bevy_pbr/src/wireframe.rs              |   3 +-
 crates/bevy_render/Cargo.toml                 |   1 +
 crates/bevy_render/src/render_phase/mod.rs    |  35 +-
 .../render_resource/batched_uniform_buffer.rs |   3 +-
 .../src/render_resource/gpu_array_buffer.rs   |  15 +-
 crates/bevy_sprite/Cargo.toml                 |   1 +
 crates/bevy_sprite/src/mesh2d/material.rs     |  71 +++-
 crates/bevy_sprite/src/mesh2d/mesh.rs         | 311 +++++++++++++++---
 crates/bevy_sprite/src/mesh2d/mesh2d.wgsl     |   8 +-
 .../src/mesh2d/mesh2d_bindings.wgsl           |  23 +-
 .../src/mesh2d/mesh2d_functions.wgsl          |  34 +-
 .../bevy_sprite/src/mesh2d/mesh2d_types.wgsl  |  12 +-
 crates/bevy_sprite/src/render/mod.rs          |  14 +-
 crates/bevy_ui/Cargo.toml                     |   1 +
 crates/bevy_ui/src/render/mod.rs              |   8 +-
 crates/bevy_ui/src/render/render_pass.rs      |  25 +-
 examples/2d/mesh2d_manual.rs                  |  26 +-
 examples/shader/shader_instancing.rs          |   3 +-
 30 files changed, 884 insertions(+), 209 deletions(-)
diff --git a/assets/shaders/custom_gltf_2d.wgsl b/assets/shaders/custom_gltf_2d.wgsl
index 58058d9501033..5e0a908c875d4 100644
--- a/assets/shaders/custom_gltf_2d.wgsl
+++ b/assets/shaders/custom_gltf_2d.wgsl
@@ -1,8 +1,9 @@
 #import bevy_sprite::mesh2d_view_bindings   globals
 #import bevy_sprite::mesh2d_bindings        mesh
-#import bevy_sprite::mesh2d_functions       mesh2d_position_local_to_clip
+#import bevy_sprite::mesh2d_functions       get_model_matrix, mesh2d_position_local_to_clip
 
 struct Vertex {
+    @builtin(instance_index) instance_index: u32,
     @location(0) position: vec3<f32>,
     @location(1) color: vec4<f32>,
     @location(2) barycentric: vec3<f32>,
@@ -17,7 +18,8 @@ struct VertexOutput {
 @vertex
 fn vertex(vertex: Vertex) -> VertexOutput {
     var out: VertexOutput;
-    out.clip_position = mesh2d_position_local_to_clip(mesh.model, vec4<f32>(vertex.position, 1.0));
+    let model = get_model_matrix(vertex.instance_index);
+    out.clip_position = mesh2d_position_local_to_clip(model, vec4<f32>(vertex.position, 1.0));
     out.color = vertex.color;
     out.barycentric = vertex.barycentric;
     return out;
diff --git a/crates/bevy_core_pipeline/Cargo.toml b/crates/bevy_core_pipeline/Cargo.toml
index 27825880d3473..e15cdf85775ec 100644
--- a/crates/bevy_core_pipeline/Cargo.toml
+++ b/crates/bevy_core_pipeline/Cargo.toml
@@ -33,3 +33,4 @@ bevy_utils = { path = "../bevy_utils", version = "0.12.0-dev" }
 serde = { version = "1", features = ["derive"] }
 bitflags = "2.3"
 radsort = "0.1"
+nonmax = "0.5.3"
diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs
index 49f4260b203fc..064775345e075 100644
--- a/crates/bevy_core_pipeline/src/core_2d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs
@@ -19,6 +19,8 @@ pub mod graph {
 }
 pub const CORE_2D: &str = graph::NAME;
 
+use std::ops::Range;
+
 pub use camera_2d::*;
 pub use main_pass_2d_node::*;
 
@@ -36,6 +38,7 @@ use bevy_render::{
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
 use bevy_utils::FloatOrd;
+use nonmax::NonMaxU32;
 
 use crate::{tonemapping::TonemappingNode, upscaling::UpscalingNode};
 
@@ -83,7 +86,8 @@ pub struct Transparent2d {
     pub entity: Entity,
     pub pipeline: CachedRenderPipelineId,
     pub draw_function: DrawFunctionId,
-    pub batch_size: usize,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for Transparent2d {
@@ -110,8 +114,23 @@ impl PhaseItem for Transparent2d {
     }
 
     #[inline]
-    fn batch_size(&self) -> usize {
-        self.batch_size
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
     }
 }
 
diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs
index f415751cde719..8cd4245afd3f6 100644
--- a/crates/bevy_core_pipeline/src/core_3d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs
@@ -24,7 +24,7 @@ pub mod graph {
 }
 pub const CORE_3D: &str = graph::NAME;
 
-use std::cmp::Reverse;
+use std::{cmp::Reverse, ops::Range};
 
 pub use camera_3d::*;
 pub use main_opaque_pass_3d_node::*;
@@ -51,6 +51,7 @@ use bevy_render::{
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
 use bevy_utils::{FloatOrd, HashMap};
+use nonmax::NonMaxU32;
 
 use crate::{
     prepass::{
@@ -135,7 +136,8 @@ pub struct Opaque3d {
     pub pipeline: CachedRenderPipelineId,
     pub entity: Entity,
     pub draw_function: DrawFunctionId,
-    pub batch_size: usize,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for Opaque3d {
@@ -164,8 +166,23 @@ impl PhaseItem for Opaque3d {
     }
 
     #[inline]
-    fn batch_size(&self) -> usize {
-        self.batch_size
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
     }
 }
 
@@ -181,7 +198,8 @@ pub struct AlphaMask3d {
     pub pipeline: CachedRenderPipelineId,
     pub entity: Entity,
     pub draw_function: DrawFunctionId,
-    pub batch_size: usize,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for AlphaMask3d {
@@ -210,8 +228,23 @@ impl PhaseItem for AlphaMask3d {
     }
 
     #[inline]
-    fn batch_size(&self) -> usize {
-        self.batch_size
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
     }
 }
 
@@ -227,7 +260,8 @@ pub struct Transparent3d {
     pub pipeline: CachedRenderPipelineId,
     pub entity: Entity,
     pub draw_function: DrawFunctionId,
-    pub batch_size: usize,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for Transparent3d {
@@ -255,8 +289,23 @@ impl PhaseItem for Transparent3d {
     }
 
     #[inline]
-    fn batch_size(&self) -> usize {
-        self.batch_size
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
     }
 }
 
diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs
index 38c71050a194b..7e484547fbd43 100644
--- a/crates/bevy_core_pipeline/src/prepass/mod.rs
+++ b/crates/bevy_core_pipeline/src/prepass/mod.rs
@@ -27,7 +27,7 @@
 
 pub mod node;
 
-use std::cmp::Reverse;
+use std::{cmp::Reverse, ops::Range};
 
 use bevy_ecs::prelude::*;
 use bevy_reflect::Reflect;
@@ -37,6 +37,7 @@ use bevy_render::{
     texture::CachedTexture,
 };
 use bevy_utils::FloatOrd;
+use nonmax::NonMaxU32;
 
 pub const DEPTH_PREPASS_FORMAT: TextureFormat = TextureFormat::Depth32Float;
 pub const NORMAL_PREPASS_FORMAT: TextureFormat = TextureFormat::Rgb10a2Unorm;
@@ -83,6 +84,8 @@ pub struct Opaque3dPrepass {
     pub entity: Entity,
     pub pipeline_id: CachedRenderPipelineId,
     pub draw_function: DrawFunctionId,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for Opaque3dPrepass {
@@ -109,6 +112,26 @@ impl PhaseItem for Opaque3dPrepass {
         // Key negated to match reversed SortKey ordering
         radsort::sort_by_key(items, |item| -item.distance);
     }
+
+    #[inline]
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
+    }
 }
 
 impl CachedRenderPipelinePhaseItem for Opaque3dPrepass {
@@ -128,6 +151,8 @@ pub struct AlphaMask3dPrepass {
     pub entity: Entity,
     pub pipeline_id: CachedRenderPipelineId,
     pub draw_function: DrawFunctionId,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for AlphaMask3dPrepass {
@@ -154,6 +179,26 @@ impl PhaseItem for AlphaMask3dPrepass {
         // Key negated to match reversed SortKey ordering
         radsort::sort_by_key(items, |item| -item.distance);
     }
+
+    #[inline]
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
+    }
 }
 
 impl CachedRenderPipelinePhaseItem for AlphaMask3dPrepass {
diff --git a/crates/bevy_gizmos/src/pipeline_2d.rs b/crates/bevy_gizmos/src/pipeline_2d.rs
index fa345f2bf05e1..e5472ff61a4dd 100644
--- a/crates/bevy_gizmos/src/pipeline_2d.rs
+++ b/crates/bevy_gizmos/src/pipeline_2d.rs
@@ -178,7 +178,8 @@ fn queue_line_gizmos_2d(
                 draw_function,
                 pipeline,
                 sort_key: FloatOrd(f32::INFINITY),
-                batch_size: 1,
+                batch_range: 0..1,
+                dynamic_offset: None,
             });
         }
     }
diff --git a/crates/bevy_gizmos/src/pipeline_3d.rs b/crates/bevy_gizmos/src/pipeline_3d.rs
index 33712fa020557..c15a1e404256a 100644
--- a/crates/bevy_gizmos/src/pipeline_3d.rs
+++ b/crates/bevy_gizmos/src/pipeline_3d.rs
@@ -192,7 +192,8 @@ fn queue_line_gizmos_3d(
                 draw_function,
                 pipeline,
                 distance: 0.,
-                batch_size: 1,
+                batch_range: 0..1,
+                dynamic_offset: None,
             });
         }
     }
diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml
index 5ff50b66d6644..371cb5c4a87bf 100644
--- a/crates/bevy_pbr/Cargo.toml
+++ b/crates/bevy_pbr/Cargo.toml
@@ -33,3 +33,4 @@ bytemuck = { version = "1", features = ["derive"] }
 naga_oil = "0.8"
 radsort = "0.1"
 smallvec = "1.6"
+nonmax = "0.5.3"
diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index 68361938dec16..4347c05629b0c 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -21,7 +21,6 @@ use bevy_ecs::{
 };
 use bevy_reflect::{TypePath, TypeUuid};
 use bevy_render::{
-    extract_component::ExtractComponentPlugin,
     mesh::{Mesh, MeshVertexBufferLayout},
     prelude::Image,
     render_asset::{prepare_assets, RenderAssets},
@@ -30,13 +29,13 @@ use bevy_render::{
         RenderPhase, SetItemPipeline, TrackedRenderPass,
     },
     render_resource::{
-        AsBindGroup, AsBindGroupError, BindGroup, BindGroupLayout, OwnedBindingResource,
-        PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef, SpecializedMeshPipeline,
-        SpecializedMeshPipelineError, SpecializedMeshPipelines,
+        AsBindGroup, AsBindGroupError, BindGroup, BindGroupId, BindGroupLayout,
+        OwnedBindingResource, PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef,
+        SpecializedMeshPipeline, SpecializedMeshPipelineError, SpecializedMeshPipelines,
     },
     renderer::RenderDevice,
     texture::FallbackImage,
-    view::{ExtractedView, Msaa, VisibleEntities},
+    view::{ExtractedView, Msaa, ViewVisibility, VisibleEntities},
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
 use bevy_utils::{tracing::error, HashMap, HashSet};
@@ -187,8 +186,7 @@ where
     M::Data: PartialEq + Eq + Hash + Clone,
 {
     fn build(&self, app: &mut App) {
-        app.add_asset::<M>()
-            .add_plugins(ExtractComponentPlugin::<Handle<M>>::extract_visible());
+        app.add_asset::<M>();
 
         if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
             render_app
@@ -200,7 +198,10 @@ where
                 .init_resource::<ExtractedMaterials<M>>()
                 .init_resource::<RenderMaterials<M>>()
                 .init_resource::<SpecializedMeshPipelines<MaterialPipeline<M>>>()
-                .add_systems(ExtractSchedule, extract_materials::<M>)
+                .add_systems(
+                    ExtractSchedule,
+                    (extract_materials::<M>, extract_material_meshes::<M>),
+                )
                 .add_systems(
                     Render,
                     (
@@ -232,6 +233,27 @@ where
     }
 }
 
+fn extract_material_meshes<M: Material>(
+    mut commands: Commands,
+    mut previous_len: Local<usize>,
+    query: Extract<Query<(Entity, &ViewVisibility, &Handle<M>)>>,
+) {
+    let mut values = Vec::with_capacity(*previous_len);
+    for (entity, view_visibility, material) in &query {
+        if view_visibility.get() {
+            // NOTE: MaterialBindGroupId is inserted here to avoid a table move. Upcoming changes
+            // to use SparseSet for render world entity storage will do this automatically.
+            values.push((
+                entity,
+                (material.clone_weak(), MaterialBindGroupId::default()),
+            ));
+        }
+    }
+    *previous_len = values.len();
+    // FIXME: Entities still have to be spawned because phases assume entities exist
+    commands.insert_or_spawn_batch(values);
+}
+
 /// A key uniquely identifying a specialized [`MaterialPipeline`].
 pub struct MaterialPipelineKey<M: Material> {
     pub mesh_key: MeshPipelineKey,
@@ -383,7 +405,12 @@ pub fn queue_material_meshes<M: Material>(
     msaa: Res<Msaa>,
     render_meshes: Res<RenderAssets<Mesh>>,
     render_materials: Res<RenderMaterials<M>>,
-    material_meshes: Query<(&Handle<M>, &Handle<Mesh>, &MeshTransforms)>,
+    mut material_meshes: Query<(
+        &Handle<M>,
+        &mut MaterialBindGroupId,
+        &Handle<Mesh>,
+        &MeshTransforms,
+    )>,
     images: Res<RenderAssets<Image>>,
     mut views: Query<(
         &ExtractedView,
@@ -467,8 +494,8 @@ pub fn queue_material_meshes<M: Material>(
 
         let rangefinder = view.rangefinder3d();
         for visible_entity in &visible_entities.entities {
-            if let Ok((material_handle, mesh_handle, mesh_transforms)) =
-                material_meshes.get(*visible_entity)
+            if let Ok((material_handle, mut material_bind_group_id, mesh_handle, mesh_transforms)) =
+                material_meshes.get_mut(*visible_entity)
             {
                 if let (Some(mesh), Some(material)) = (
                     render_meshes.get(mesh_handle),
@@ -515,6 +542,8 @@ pub fn queue_material_meshes<M: Material>(
                         }
                     };
 
+                    *material_bind_group_id = material.get_bind_group_id();
+
                     let distance = rangefinder
                         .distance_translation(&mesh_transforms.transform.translation)
                         + material.properties.depth_bias;
@@ -525,7 +554,8 @@ pub fn queue_material_meshes<M: Material>(
                                 draw_function: draw_opaque_pbr,
                                 pipeline: pipeline_id,
                                 distance,
-                                batch_size: 1,
+                                batch_range: 0..1,
+                                dynamic_offset: None,
                             });
                         }
                         AlphaMode::Mask(_) => {
@@ -534,7 +564,8 @@ pub fn queue_material_meshes<M: Material>(
                                 draw_function: draw_alpha_mask_pbr,
                                 pipeline: pipeline_id,
                                 distance,
-                                batch_size: 1,
+                                batch_range: 0..1,
+                                dynamic_offset: None,
                             });
                         }
                         AlphaMode::Blend
@@ -546,7 +577,8 @@ pub fn queue_material_meshes<M: Material>(
                                 draw_function: draw_transparent_pbr,
                                 pipeline: pipeline_id,
                                 distance,
-                                batch_size: 1,
+                                batch_range: 0..1,
+                                dynamic_offset: None,
                             });
                         }
                     }
@@ -574,6 +606,15 @@ pub struct PreparedMaterial<T: Material> {
     pub properties: MaterialProperties,
 }
 
+#[derive(Component, Default, PartialEq, Eq, Deref, DerefMut)]
+pub struct MaterialBindGroupId(Option<BindGroupId>);
+
+impl<T: Material> PreparedMaterial<T> {
+    pub fn get_bind_group_id(&self) -> MaterialBindGroupId {
+        MaterialBindGroupId(Some(self.bind_group.id()))
+    }
+}
+
 #[derive(Resource)]
 pub struct ExtractedMaterials<M: Material> {
     extracted: Vec<(Handle<M>, M)>,
diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs
index 21ea58e57f8f6..a5065df88417b 100644
--- a/crates/bevy_pbr/src/prepass/mod.rs
+++ b/crates/bevy_pbr/src/prepass/mod.rs
@@ -852,6 +852,8 @@ pub fn queue_prepass_material_meshes<M: Material>(
                         draw_function: opaque_draw_prepass,
                         pipeline_id,
                         distance,
+                        batch_range: 0..1,
+                        dynamic_offset: None,
                     });
                 }
                 AlphaMode::Mask(_) => {
@@ -860,6 +862,8 @@ pub fn queue_prepass_material_meshes<M: Material>(
                         draw_function: alpha_mask_draw_prepass,
                         pipeline_id,
                         distance,
+                        batch_range: 0..1,
+                        dynamic_offset: None,
                     });
                 }
                 AlphaMode::Blend
diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs
index e18e9d4acae4a..ddfba8710132d 100644
--- a/crates/bevy_pbr/src/render/light.rs
+++ b/crates/bevy_pbr/src/render/light.rs
@@ -30,7 +30,8 @@ use bevy_utils::{
     tracing::{error, warn},
     HashMap,
 };
-use std::{hash::Hash, num::NonZeroU64};
+use nonmax::NonMaxU32;
+use std::{hash::Hash, num::NonZeroU64, ops::Range};
 
 #[derive(Component)]
 pub struct ExtractedPointLight {
@@ -1641,6 +1642,8 @@ pub fn queue_shadows<M: Material>(
                             pipeline: pipeline_id,
                             entity,
                             distance: 0.0, // TODO: sort front-to-back
+                            batch_range: 0..1,
+                            dynamic_offset: None,
                         });
                     }
                 }
@@ -1654,6 +1657,8 @@ pub struct Shadow {
     pub entity: Entity,
     pub pipeline: CachedRenderPipelineId,
     pub draw_function: DrawFunctionId,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for Shadow {
@@ -1681,6 +1686,26 @@ impl PhaseItem for Shadow {
         // better than rebinding everything at a high rate.
         radsort::sort_by_key(items, |item| item.sort_key());
     }
+
+    #[inline]
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
+    }
 }
 
 impl CachedRenderPipelinePhaseItem for Shadow {
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index e9abd14609851..e10a4a1dab164 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -1,15 +1,15 @@
 use crate::{
     environment_map, prepass, EnvironmentMapLight, FogMeta, GlobalLightMeta, GpuFog, GpuLights,
-    GpuPointLights, LightMeta, NotShadowCaster, NotShadowReceiver, PreviousGlobalTransform,
-    ScreenSpaceAmbientOcclusionTextures, Shadow, ShadowSamplers, ViewClusterBindings,
-    ViewFogUniformOffset, ViewLightsUniformOffset, ViewShadowBindings,
+    GpuPointLights, LightMeta, MaterialBindGroupId, NotShadowCaster, NotShadowReceiver,
+    PreviousGlobalTransform, ScreenSpaceAmbientOcclusionTextures, Shadow, ShadowSamplers,
+    ViewClusterBindings, ViewFogUniformOffset, ViewLightsUniformOffset, ViewShadowBindings,
     CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT, MAX_DIRECTIONAL_LIGHTS,
 };
 use bevy_app::Plugin;
 use bevy_asset::{load_internal_asset, Assets, Handle, HandleId, HandleUntyped};
 use bevy_core_pipeline::{
     core_3d::{AlphaMask3d, Opaque3d, Transparent3d},
-    prepass::ViewPrepassTextures,
+    prepass::{AlphaMask3dPrepass, Opaque3dPrepass, ViewPrepassTextures},
     tonemapping::{
         get_lut_bind_group_layout_entries, get_lut_bindings, Tonemapping, TonemappingLuts,
     },
@@ -30,7 +30,10 @@ use bevy_render::{
     },
     prelude::Msaa,
     render_asset::RenderAssets,
-    render_phase::{PhaseItem, RenderCommand, RenderCommandResult, RenderPhase, TrackedRenderPass},
+    render_phase::{
+        CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, RenderCommand,
+        RenderCommandResult, RenderPhase, TrackedRenderPass,
+    },
     render_resource::*,
     renderer::{RenderDevice, RenderQueue},
     texture::{
@@ -42,7 +45,7 @@ use bevy_render::{
 };
 use bevy_transform::components::GlobalTransform;
 use bevy_utils::{tracing::error, HashMap, Hashed};
-use fixedbitset::FixedBitSet;
+use nonmax::NonMaxU32;
 
 use crate::render::{
     morph::{extract_morphs, prepare_morphs, MorphIndex, MorphUniform},
@@ -384,60 +387,180 @@ pub fn extract_skinned_meshes(
     commands.insert_or_spawn_batch(values);
 }
 
+/// Data necessary to be equal for two draw commands to be mergeable
+///
+/// This is based on the following assumptions:
+/// - Only entities with prepared assets (pipelines, materials, meshes) are
+///   queued to phases
+/// - View bindings are constant across a phase for a given draw function as
+///   phases are per-view
+/// - `prepare_mesh_uniforms` is the only system that performs this batching
+///   and has sole responsibility for preparing the per-object data. As such
+///   the mesh binding and dynamic offsets are assumed to only be variable as a
+///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
+///   data across separate uniform bindings within the same buffer due to the
+///   maximum uniform buffer binding size.
+#[derive(Default, PartialEq, Eq)]
+struct BatchMeta<'mat, 'mesh> {
+    /// The pipeline id encompasses all pipeline configuration including vertex
+    /// buffers and layouts, shaders and their specializations, bind group
+    /// layouts, etc.
+    pipeline_id: Option<CachedRenderPipelineId>,
+    /// The draw function id defines the RenderCommands that are called to
+    /// set the pipeline and bindings, and make the draw command
+    draw_function_id: Option<DrawFunctionId>,
+    /// The material binding meta includes the material bind group id and
+    /// dynamic offsets.
+    material_binding_meta: Option<&'mat MaterialBindGroupId>,
+    mesh_handle: Option<&'mesh Handle<Mesh>>,
+    dynamic_offset: Option<NonMaxU32>,
+}
+
+impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
+    #[inline]
+    fn matches(&self, other: &BatchMeta<'mat, 'mesh>, consider_material: bool) -> bool {
+        self.pipeline_id == other.pipeline_id
+            && self.draw_function_id == other.draw_function_id
+            && self.mesh_handle == other.mesh_handle
+            && self.dynamic_offset == other.dynamic_offset
+            && (!consider_material || self.material_binding_meta == other.material_binding_meta)
+    }
+}
+
+#[derive(Default)]
+struct BatchState<'mat, 'mesh> {
+    meta: BatchMeta<'mat, 'mesh>,
+    /// The base index in the object data binding's array
+    gpu_array_buffer_index: GpuArrayBufferIndex<MeshUniform>,
+    /// The number of entities in the batch
+    count: u32,
+    item_index: usize,
+}
+
+fn update_batch_data<I: PhaseItem>(item: &mut I, batch: &BatchState) {
+    let BatchState {
+        count,
+        gpu_array_buffer_index,
+        ..
+    } = batch;
+    *item.batch_range_mut() = gpu_array_buffer_index.index..(gpu_array_buffer_index.index + *count);
+    *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset;
+}
+
+fn process_phase<I: CachedRenderPipelinePhaseItem>(
+    object_data_buffer: &mut GpuArrayBuffer<MeshUniform>,
+    object_query: &ObjectQuery,
+    phase: &mut RenderPhase<I>,
+    consider_material: bool,
+) {
+    let mut batch = BatchState::default();
+    for i in 0..phase.items.len() {
+        let item = &mut phase.items[i];
+        let Ok((material_binding_meta, mesh_handle, mesh_transforms)) =
+            object_query.get(item.entity())
+        else {
+            // It is necessary to start a new batch if an entity not matching the query is
+            // encountered. This can be achieved by resetting the pipelined id.
+            batch.meta.pipeline_id = None;
+            continue;
+        };
+        let gpu_array_buffer_index = object_data_buffer.push(MeshUniform::from(mesh_transforms));
+        let batch_meta = BatchMeta {
+            pipeline_id: Some(item.cached_pipeline()),
+            draw_function_id: Some(item.draw_function()),
+            material_binding_meta,
+            mesh_handle: Some(mesh_handle),
+            dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+        };
+        if !batch_meta.matches(&batch.meta, consider_material) {
+            if batch.count > 0 {
+                update_batch_data(&mut phase.items[batch.item_index], &batch);
+            }
+
+            batch.meta = batch_meta;
+            batch.gpu_array_buffer_index = gpu_array_buffer_index;
+            batch.count = 0;
+            batch.item_index = i;
+        }
+        batch.count += 1;
+    }
+    if !phase.items.is_empty() && batch.count > 0 {
+        update_batch_data(&mut phase.items[batch.item_index], &batch);
+    }
+}
+
+type ObjectQuery<'w, 's, 'mat, 'mesh, 'data> = Query<
+    'w,
+    's,
+    (
+        Option<&'mat MaterialBindGroupId>,
+        &'mesh Handle<Mesh>,
+        &'data MeshTransforms,
+    ),
+>;
+
 #[allow(clippy::too_many_arguments)]
 pub fn prepare_mesh_uniforms(
-    mut seen: Local<FixedBitSet>,
-    mut commands: Commands,
-    mut previous_len: Local<usize>,
     render_device: Res<RenderDevice>,
     render_queue: Res<RenderQueue>,
-    mut gpu_array_buffer: ResMut<GpuArrayBuffer<MeshUniform>>,
-    views: Query<(
-        &RenderPhase<Opaque3d>,
-        &RenderPhase<Transparent3d>,
-        &RenderPhase<AlphaMask3d>,
+    gpu_array_buffer: ResMut<GpuArrayBuffer<MeshUniform>>,
+    mut views: Query<(
+        Option<&mut RenderPhase<Opaque3dPrepass>>,
+        Option<&mut RenderPhase<AlphaMask3dPrepass>>,
+        &mut RenderPhase<Opaque3d>,
+        &mut RenderPhase<AlphaMask3d>,
+        &mut RenderPhase<Transparent3d>,
     )>,
-    shadow_views: Query<&RenderPhase<Shadow>>,
-    meshes: Query<(Entity, &MeshTransforms)>,
+    mut shadow_views: Query<&mut RenderPhase<Shadow>>,
+    meshes: ObjectQuery,
 ) {
-    gpu_array_buffer.clear();
-    seen.clear();
-
-    let mut indices = Vec::with_capacity(*previous_len);
-    let mut push_indices = |(mesh, mesh_uniform): (Entity, &MeshTransforms)| {
-        let index = mesh.index() as usize;
-        if !seen.contains(index) {
-            if index >= seen.len() {
-                seen.grow(index + 1);
-            }
-            seen.insert(index);
-            indices.push((mesh, gpu_array_buffer.push(mesh_uniform.into())));
-        }
-    };
+    let gpu_array_buffer = gpu_array_buffer.into_inner();
 
-    for (opaque_phase, transparent_phase, alpha_phase) in &views {
-        meshes
-            .iter_many(opaque_phase.iter_entities())
-            .for_each(&mut push_indices);
-
-        meshes
-            .iter_many(transparent_phase.iter_entities())
-            .for_each(&mut push_indices);
+    gpu_array_buffer.clear();
 
-        meshes
-            .iter_many(alpha_phase.iter_entities())
-            .for_each(&mut push_indices);
+    for (
+        opaque_prepass_phase,
+        alpha_mask_prepass_phase,
+        opaque_phase,
+        alpha_mask_phase,
+        transparent_phase,
+    ) in &mut views
+    {
+        if let Some(opaque_prepass_phase) = opaque_prepass_phase {
+            process_phase(
+                gpu_array_buffer,
+                &meshes,
+                opaque_prepass_phase.into_inner(),
+                false,
+            );
+        }
+        if let Some(alpha_mask_prepass_phase) = alpha_mask_prepass_phase {
+            process_phase(
+                gpu_array_buffer,
+                &meshes,
+                alpha_mask_prepass_phase.into_inner(),
+                true,
+            );
+        }
+        process_phase(gpu_array_buffer, &meshes, opaque_phase.into_inner(), true);
+        process_phase(
+            gpu_array_buffer,
+            &meshes,
+            alpha_mask_phase.into_inner(),
+            true,
+        );
+        process_phase(
+            gpu_array_buffer,
+            &meshes,
+            transparent_phase.into_inner(),
+            true,
+        );
     }
 
-    for shadow_phase in &shadow_views {
-        meshes
-            .iter_many(shadow_phase.iter_entities())
-            .for_each(&mut push_indices);
+    for shadow_phase in &mut shadow_views {
+        process_phase(gpu_array_buffer, &meshes, shadow_phase.into_inner(), false);
     }
 
-    *previous_len = indices.len();
-    commands.insert_or_spawn_batch(indices);
-
     gpu_array_buffer.write_buffer(&render_device, &render_queue);
 }
 
@@ -1371,16 +1494,15 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
     type ViewWorldQuery = ();
     type ItemWorldQuery = (
         Read<Handle<Mesh>>,
-        Read<GpuArrayBufferIndex<MeshUniform>>,
         Option<Read<SkinnedMeshJoints>>,
         Option<Read<MorphIndex>>,
     );
 
     #[inline]
     fn render<'w>(
-        _item: &P,
+        item: &P,
         _view: (),
-        (mesh, batch_indices, skin_index, morph_index): ROQueryItem<Self::ItemWorldQuery>,
+        (mesh, skin_index, morph_index): ROQueryItem<Self::ItemWorldQuery>,
         bind_groups: SystemParamItem<'w, '_, Self::Param>,
         pass: &mut TrackedRenderPass<'w>,
     ) -> RenderCommandResult {
@@ -1399,8 +1521,8 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
 
         let mut dynamic_offsets: [u32; 3] = Default::default();
         let mut index_count = 0;
-        if let Some(mesh_index) = batch_indices.dynamic_offset {
-            dynamic_offsets[index_count] = mesh_index;
+        if let Some(mesh_index) = item.dynamic_offset() {
+            dynamic_offsets[index_count] = mesh_index.get();
             index_count += 1;
         }
         if let Some(skin_index) = skin_index {
@@ -1421,22 +1543,23 @@ pub struct DrawMesh;
 impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
     type Param = SRes<RenderAssets<Mesh>>;
     type ViewWorldQuery = ();
-    type ItemWorldQuery = (Read<GpuArrayBufferIndex<MeshUniform>>, Read<Handle<Mesh>>);
+    type ItemWorldQuery = Read<Handle<Mesh>>;
     #[inline]
     fn render<'w>(
-        _item: &P,
+        item: &P,
         _view: (),
-        (batch_indices, mesh_handle): ROQueryItem<'_, Self::ItemWorldQuery>,
+        mesh_handle: ROQueryItem<'_, Self::ItemWorldQuery>,
         meshes: SystemParamItem<'w, '_, Self::Param>,
         pass: &mut TrackedRenderPass<'w>,
     ) -> RenderCommandResult {
         if let Some(gpu_mesh) = meshes.into_inner().get(mesh_handle) {
+            let batch_range = item.batch_range();
             pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..));
             #[cfg(all(feature = "webgl", target_arch = "wasm32"))]
             pass.set_push_constants(
                 ShaderStages::VERTEX,
                 0,
-                &(batch_indices.index as i32).to_le_bytes(),
+                &(batch_range.start as i32).to_le_bytes(),
             );
             match &gpu_mesh.buffer_info {
                 GpuBufferInfo::Indexed {
@@ -1445,13 +1568,10 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
                     count,
                 } => {
                     pass.set_index_buffer(buffer.slice(..), 0, *index_format);
-                    pass.draw_indexed(0..*count, 0, batch_indices.index..batch_indices.index + 1);
+                    pass.draw_indexed(0..*count, 0, batch_range.clone());
                 }
                 GpuBufferInfo::NonIndexed => {
-                    pass.draw(
-                        0..gpu_mesh.vertex_count,
-                        batch_indices.index..batch_indices.index + 1,
-                    );
+                    pass.draw(0..gpu_mesh.vertex_count, batch_range.clone());
                 }
             }
             RenderCommandResult::Success
diff --git a/crates/bevy_pbr/src/wireframe.rs b/crates/bevy_pbr/src/wireframe.rs
index e227bfdb7d4d6..c9c7de18ae6c6 100644
--- a/crates/bevy_pbr/src/wireframe.rs
+++ b/crates/bevy_pbr/src/wireframe.rs
@@ -152,7 +152,8 @@ fn queue_wireframes(
                         draw_function: draw_custom,
                         distance: rangefinder
                             .distance_translation(&mesh_transforms.transform.translation),
-                        batch_size: 1,
+                        batch_range: 0..1,
+                        dynamic_offset: None,
                     });
                 }
             };
diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml
index dd2f90aef4683..fe945ab00b6bc 100644
--- a/crates/bevy_render/Cargo.toml
+++ b/crates/bevy_render/Cargo.toml
@@ -83,6 +83,7 @@ encase = { version = "0.6.1", features = ["glam"] }
 # For wgpu profiling using tracing. Use `RUST_LOG=info` to also capture the wgpu spans.
 profiling = { version = "1", features = ["profile-with-tracing"], optional = true }
 async-channel = "1.8"
+nonmax = "0.5.3"
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 js-sys = "0.3"
diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs
index 54870cfc260b7..dec5ddf77621d 100644
--- a/crates/bevy_render/src/render_phase/mod.rs
+++ b/crates/bevy_render/src/render_phase/mod.rs
@@ -31,6 +31,7 @@ mod rangefinder;
 
 pub use draw::*;
 pub use draw_state::*;
+use nonmax::NonMaxU32;
 pub use rangefinder::*;
 
 use crate::render_resource::{CachedRenderPipelineId, PipelineCache};
@@ -93,13 +94,13 @@ impl<I: PhaseItem> RenderPhase<I> {
         let mut index = 0;
         while index < self.items.len() {
             let item = &self.items[index];
-            let batch_size = item.batch_size();
-            if batch_size > 0 {
+            let batch_range = item.batch_range();
+            if batch_range.is_empty() {
+                index += 1;
+            } else {
                 let draw_function = draw_functions.get_mut(item.draw_function()).unwrap();
                 draw_function.draw(world, render_pass, view, item);
-                index += batch_size;
-            } else {
-                index += 1;
+                index += batch_range.len();
             }
         }
     }
@@ -124,13 +125,13 @@ impl<I: PhaseItem> RenderPhase<I> {
         let mut index = 0;
         while index < items.len() {
             let item = &items[index];
-            let batch_size = item.batch_size();
-            if batch_size > 0 {
+            let batch_range = item.batch_range();
+            if batch_range.is_empty() {
+                index += 1;
+            } else {
                 let draw_function = draw_functions.get_mut(item.draw_function()).unwrap();
                 draw_function.draw(world, render_pass, view, item);
-                index += batch_size;
-            } else {
-                index += 1;
+                index += batch_range.len();
             }
         }
     }
@@ -182,12 +183,14 @@ pub trait PhaseItem: Sized + Send + Sync + 'static {
         items.sort_unstable_by_key(|item| item.sort_key());
     }
 
-    /// The number of items to skip after rendering this [`PhaseItem`].
-    ///
-    /// Items with a `batch_size` of 0 will not be rendered.
-    fn batch_size(&self) -> usize {
-        1
-    }
+    /// The range of instances that the batch covers. After doing a batched draw, batch range
+    /// length phase items will be skipped. This design is to avoid having to restructure the
+    /// render phase unnecessarily.
+    fn batch_range(&self) -> &Range<u32>;
+    fn batch_range_mut(&mut self) -> &mut Range<u32>;
+
+    fn dynamic_offset(&self) -> Option<NonMaxU32>;
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32>;
 }
 
 /// A [`PhaseItem`] item, that automatically sets the appropriate render pipeline,
diff --git a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
index a9fba2ac7fb42..8a850ff9bd001 100644
--- a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
+++ b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
@@ -7,6 +7,7 @@ use encase::{
     private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer},
     ShaderType,
 };
+use nonmax::NonMaxU32;
 use std::{marker::PhantomData, num::NonZeroU64};
 use wgpu::{BindingResource, Limits};
 
@@ -77,7 +78,7 @@ impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> {
     pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> {
         let result = GpuArrayBufferIndex {
             index: self.temp.0.len() as u32,
-            dynamic_offset: Some(self.current_offset),
+            dynamic_offset: NonMaxU32::new(self.current_offset),
             element_type: PhantomData,
         };
         self.temp.0.push(component);
diff --git a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
index 45eaba4f73246..92fbab4fb1216 100644
--- a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
+++ b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
@@ -5,6 +5,7 @@ use crate::{
 };
 use bevy_ecs::{prelude::Component, system::Resource};
 use encase::{private::WriteInto, ShaderSize, ShaderType};
+use nonmax::NonMaxU32;
 use std::{marker::PhantomData, mem};
 use wgpu::{BindGroupLayoutEntry, BindingResource, BindingType, BufferBindingType, ShaderStages};
 
@@ -118,12 +119,22 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
 }
 
 /// An index into a [`GpuArrayBuffer`] for a given element.
-#[derive(Component)]
+#[derive(Component, Clone)]
 pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
     /// The index to use in a shader into the array.
     pub index: u32,
     /// The dynamic offset to use when setting the bind group in a pass.
     /// Only used on platforms that don't support storage buffers.
-    pub dynamic_offset: Option<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
     pub element_type: PhantomData<T>,
 }
+
+impl<T: GpuArrayBufferable> Default for GpuArrayBufferIndex<T> {
+    fn default() -> Self {
+        Self {
+            index: u32::MAX,
+            dynamic_offset: None,
+            element_type: Default::default(),
+        }
+    }
+}
diff --git a/crates/bevy_sprite/Cargo.toml b/crates/bevy_sprite/Cargo.toml
index 37db1b9eb2a20..69494d9da180b 100644
--- a/crates/bevy_sprite/Cargo.toml
+++ b/crates/bevy_sprite/Cargo.toml
@@ -31,3 +31,4 @@ guillotiere = "0.6.0"
 thiserror = "1.0"
 rectangle-pack = "0.4"
 bitflags = "2.3"
+nonmax = "0.5.3"
diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs
index 98fd7791935df..49043e7c7b98f 100644
--- a/crates/bevy_sprite/src/mesh2d/material.rs
+++ b/crates/bevy_sprite/src/mesh2d/material.rs
@@ -16,7 +16,6 @@ use bevy_ecs::{
 use bevy_log::error;
 use bevy_reflect::{TypePath, TypeUuid};
 use bevy_render::{
-    extract_component::ExtractComponentPlugin,
     mesh::{Mesh, MeshVertexBufferLayout},
     prelude::Image,
     render_asset::{prepare_assets, RenderAssets},
@@ -25,9 +24,9 @@ use bevy_render::{
         RenderPhase, SetItemPipeline, TrackedRenderPass,
     },
     render_resource::{
-        AsBindGroup, AsBindGroupError, BindGroup, BindGroupLayout, OwnedBindingResource,
-        PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef, SpecializedMeshPipeline,
-        SpecializedMeshPipelineError, SpecializedMeshPipelines,
+        AsBindGroup, AsBindGroupError, BindGroup, BindGroupId, BindGroupLayout,
+        OwnedBindingResource, PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef,
+        SpecializedMeshPipeline, SpecializedMeshPipelineError, SpecializedMeshPipelines,
     },
     renderer::RenderDevice,
     texture::FallbackImage,
@@ -40,8 +39,8 @@ use std::hash::Hash;
 use std::marker::PhantomData;
 
 use crate::{
-    DrawMesh2d, Mesh2dHandle, Mesh2dPipeline, Mesh2dPipelineKey, Mesh2dUniform, SetMesh2dBindGroup,
-    SetMesh2dViewBindGroup,
+    DrawMesh2d, Mesh2dHandle, Mesh2dPipeline, Mesh2dPipelineKey, Mesh2dTransforms,
+    SetMesh2dBindGroup, SetMesh2dViewBindGroup,
 };
 
 /// Materials are used alongside [`Material2dPlugin`] and [`MaterialMesh2dBundle`]
@@ -151,8 +150,7 @@ where
     M::Data: PartialEq + Eq + Hash + Clone,
 {
     fn build(&self, app: &mut App) {
-        app.add_asset::<M>()
-            .add_plugins(ExtractComponentPlugin::<Handle<M>>::extract_visible());
+        app.add_asset::<M>();
 
         if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
             render_app
@@ -160,7 +158,10 @@ where
                 .init_resource::<ExtractedMaterials2d<M>>()
                 .init_resource::<RenderMaterials2d<M>>()
                 .init_resource::<SpecializedMeshPipelines<Material2dPipeline<M>>>()
-                .add_systems(ExtractSchedule, extract_materials_2d::<M>)
+                .add_systems(
+                    ExtractSchedule,
+                    (extract_materials_2d::<M>, extract_material_meshes_2d::<M>),
+                )
                 .add_systems(
                     Render,
                     (
@@ -182,6 +183,26 @@ where
     }
 }
 
+fn extract_material_meshes_2d<M: Material2d>(
+    mut commands: Commands,
+    mut previous_len: Local<usize>,
+    query: Extract<Query<(Entity, &ViewVisibility, &Handle<M>)>>,
+) {
+    let mut values = Vec::with_capacity(*previous_len);
+    for (entity, view_visibility, material) in &query {
+        if view_visibility.get() {
+            // NOTE: Material2dBindGroupId is inserted here to avoid a table move. Upcoming changes
+            // to use SparseSet for render world entity storage will do this automatically.
+            values.push((
+                entity,
+                (material.clone_weak(), Material2dBindGroupId::default()),
+            ));
+        }
+    }
+    *previous_len = values.len();
+    commands.insert_or_spawn_batch(values);
+}
+
 /// Render pipeline data for a given [`Material2d`]
 #[derive(Resource)]
 pub struct Material2dPipeline<M: Material2d> {
@@ -335,7 +356,12 @@ pub fn queue_material2d_meshes<M: Material2d>(
     msaa: Res<Msaa>,
     render_meshes: Res<RenderAssets<Mesh>>,
     render_materials: Res<RenderMaterials2d<M>>,
-    material2d_meshes: Query<(&Handle<M>, &Mesh2dHandle, &Mesh2dUniform)>,
+    mut material2d_meshes: Query<(
+        &Handle<M>,
+        &mut Material2dBindGroupId,
+        &Mesh2dHandle,
+        &Mesh2dTransforms,
+    )>,
     mut views: Query<(
         &ExtractedView,
         &VisibleEntities,
@@ -380,8 +406,12 @@ pub fn queue_material2d_meshes<M: Material2d>(
         }
 
         for visible_entity in &visible_entities.entities {
-            if let Ok((material2d_handle, mesh2d_handle, mesh2d_uniform)) =
-                material2d_meshes.get(*visible_entity)
+            if let Ok((
+                material2d_handle,
+                mut material2d_bind_group_id,
+                mesh2d_handle,
+                mesh2d_uniform,
+            )) = material2d_meshes.get_mut(*visible_entity)
             {
                 if let Some(material2d) = render_materials.get(material2d_handle) {
                     if let Some(mesh) = render_meshes.get(&mesh2d_handle.0) {
@@ -406,7 +436,8 @@ pub fn queue_material2d_meshes<M: Material2d>(
                             }
                         };
 
-                        let mesh_z = mesh2d_uniform.transform.w_axis.z;
+                        *material2d_bind_group_id = material2d.get_bind_group_id();
+                        let mesh_z = mesh2d_uniform.transform.translation.z;
                         transparent_phase.add(Transparent2d {
                             entity: *visible_entity,
                             draw_function: draw_transparent_pbr,
@@ -416,8 +447,9 @@ pub fn queue_material2d_meshes<M: Material2d>(
                             // -z in front of the camera, the largest distance is -far with values increasing toward the
                             // camera. As such we can just use mesh_z as the distance
                             sort_key: FloatOrd(mesh_z),
-                            // This material is not batched
-                            batch_size: 1,
+                            // Batching is done in prepare_mesh2d_uniforms
+                            batch_range: 0..1,
+                            dynamic_offset: None,
                         });
                     }
                 }
@@ -426,6 +458,9 @@ pub fn queue_material2d_meshes<M: Material2d>(
     }
 }
 
+#[derive(Component, Default, PartialEq, Eq, Deref, DerefMut)]
+pub struct Material2dBindGroupId(Option<BindGroupId>);
+
 /// Data prepared for a [`Material2d`] instance.
 pub struct PreparedMaterial2d<T: Material2d> {
     pub bindings: Vec<OwnedBindingResource>,
@@ -433,6 +468,12 @@ pub struct PreparedMaterial2d<T: Material2d> {
     pub key: T::Data,
 }
 
+impl<T: Material2d> PreparedMaterial2d<T> {
+    pub fn get_bind_group_id(&self) -> Material2dBindGroupId {
+        Material2dBindGroupId(Some(self.bind_group.id()))
+    }
+}
+
 #[derive(Resource)]
 pub struct ExtractedMaterials2d<M: Material2d> {
     extracted: Vec<(Handle<M>, M)>,
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 8489d152fa427..6bf23b7d7411c 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -1,19 +1,22 @@
 use bevy_app::Plugin;
 use bevy_asset::{load_internal_asset, Handle, HandleUntyped};
 
+use bevy_core_pipeline::core_2d::Transparent2d;
 use bevy_ecs::{
     prelude::*,
     query::ROQueryItem,
     system::{lifetimeless::*, SystemParamItem, SystemState},
 };
-use bevy_math::{Mat4, Vec2};
+use bevy_math::{Affine3, Affine3A, Vec2, Vec3Swizzles, Vec4};
 use bevy_reflect::{Reflect, TypeUuid};
 use bevy_render::{
-    extract_component::{ComponentUniforms, DynamicUniformIndex, UniformComponentPlugin},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
-    render_phase::{PhaseItem, RenderCommand, RenderCommandResult, TrackedRenderPass},
+    render_phase::{
+        CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, RenderCommand,
+        RenderCommandResult, RenderPhase, TrackedRenderPass,
+    },
     render_resource::*,
     renderer::{RenderDevice, RenderQueue},
     texture::{
@@ -25,11 +28,14 @@ use bevy_render::{
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
 use bevy_transform::components::GlobalTransform;
+use nonmax::NonMaxU32;
+
+use crate::Material2dBindGroupId;
 
 /// Component for rendering with meshes in the 2d pipeline, usually with a [2d material](crate::Material2d) such as [`ColorMaterial`](crate::ColorMaterial).
 ///
 /// It wraps a [`Handle<Mesh>`] to differentiate from the 3d pipelines which use the handles directly as components
-#[derive(Default, Clone, Component, Debug, Reflect)]
+#[derive(Default, Clone, Component, Debug, Reflect, PartialEq, Eq)]
 #[reflect(Component)]
 pub struct Mesh2dHandle(pub Handle<Mesh>);
 
@@ -83,12 +89,6 @@ impl Plugin for Mesh2dRenderPlugin {
             "mesh2d_types.wgsl",
             Shader::from_wgsl
         );
-        load_internal_asset!(
-            app,
-            MESH2D_BINDINGS_HANDLE,
-            "mesh2d_bindings.wgsl",
-            Shader::from_wgsl
-        );
         load_internal_asset!(
             app,
             MESH2D_FUNCTIONS_HANDLE,
@@ -97,8 +97,6 @@ impl Plugin for Mesh2dRenderPlugin {
         );
         load_internal_asset!(app, MESH2D_SHADER_HANDLE, "mesh2d.wgsl", Shader::from_wgsl);
 
-        app.add_plugins(UniformComponentPlugin::<Mesh2dUniform>::default());
-
         if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
             render_app
                 .init_resource::<SpecializedMeshPipelines<Mesh2dPipeline>>()
@@ -106,6 +104,7 @@ impl Plugin for Mesh2dRenderPlugin {
                 .add_systems(
                     Render,
                     (
+                        prepare_mesh2d_uniforms.in_set(RenderSet::PrepareResources),
                         prepare_mesh2d_bind_group.in_set(RenderSet::PrepareBindGroups),
                         prepare_mesh2d_view_bind_groups.in_set(RenderSet::PrepareBindGroups),
                     ),
@@ -114,19 +113,93 @@ impl Plugin for Mesh2dRenderPlugin {
     }
 
     fn finish(&self, app: &mut bevy_app::App) {
+        let mut mesh_bindings_shader_defs = Vec::with_capacity(1);
+
         if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
-            render_app.init_resource::<Mesh2dPipeline>();
+            if let Some(per_object_buffer_batch_size) = GpuArrayBuffer::<Mesh2dUniform>::batch_size(
+                render_app.world.resource::<RenderDevice>(),
+            ) {
+                mesh_bindings_shader_defs.push(ShaderDefVal::UInt(
+                    "PER_OBJECT_BUFFER_BATCH_SIZE".into(),
+                    per_object_buffer_batch_size,
+                ));
+            }
+
+            render_app
+                .insert_resource(GpuArrayBuffer::<Mesh2dUniform>::new(
+                    render_app.world.resource::<RenderDevice>(),
+                ))
+                .init_resource::<Mesh2dPipeline>();
         }
+
+        // Load the mesh_bindings shader module here as it depends on runtime information about
+        // whether storage buffers are supported, or the maximum uniform buffer binding size.
+        load_internal_asset!(
+            app,
+            MESH2D_BINDINGS_HANDLE,
+            "mesh2d_bindings.wgsl",
+            Shader::from_wgsl_with_defs,
+            mesh_bindings_shader_defs
+        );
     }
 }
 
-#[derive(Component, ShaderType, Clone)]
+#[derive(Component)]
+pub struct Mesh2dTransforms {
+    pub transform: Affine3,
+    pub flags: u32,
+}
+
+#[derive(ShaderType, Clone)]
 pub struct Mesh2dUniform {
-    pub transform: Mat4,
-    pub inverse_transpose_model: Mat4,
+    // Affine 4x3 matrix transposed to 3x4
+    pub transform: [Vec4; 3],
+    // 3x3 matrix packed in mat2x4 and f32 as:
+    //   [0].xyz, [1].x,
+    //   [1].yz, [2].xy
+    //   [2].z
+    pub inverse_transpose_model_a: [Vec4; 2],
+    pub inverse_transpose_model_b: f32,
     pub flags: u32,
 }
 
+impl From<&Mesh2dTransforms> for Mesh2dUniform {
+    fn from(mesh_transforms: &Mesh2dTransforms) -> Self {
+        let transpose_model_3x3 = mesh_transforms.transform.matrix3.transpose();
+        let inverse_transpose_model_3x3 = Affine3A::from(&mesh_transforms.transform)
+            .inverse()
+            .matrix3
+            .transpose();
+        Self {
+            transform: [
+                transpose_model_3x3
+                    .x_axis
+                    .extend(mesh_transforms.transform.translation.x),
+                transpose_model_3x3
+                    .y_axis
+                    .extend(mesh_transforms.transform.translation.y),
+                transpose_model_3x3
+                    .z_axis
+                    .extend(mesh_transforms.transform.translation.z),
+            ],
+            inverse_transpose_model_a: [
+                (
+                    inverse_transpose_model_3x3.x_axis,
+                    inverse_transpose_model_3x3.y_axis.x,
+                )
+                    .into(),
+                (
+                    inverse_transpose_model_3x3.y_axis.yz(),
+                    inverse_transpose_model_3x3.z_axis.xy(),
+                )
+                    .into(),
+            ],
+            inverse_transpose_model_b: inverse_transpose_model_3x3.z_axis.z,
+            flags: mesh_transforms.flags,
+        }
+    }
+}
+
 // NOTE: These must match the bit flags in bevy_sprite/src/mesh2d/mesh2d.wgsl!
 bitflags::bitflags! {
     #[repr(transparent)]
@@ -146,15 +219,13 @@ pub fn extract_mesh2d(
         if !view_visibility.get() {
             continue;
         }
-        let transform = transform.compute_matrix();
         values.push((
             entity,
             (
                 Mesh2dHandle(handle.0.clone_weak()),
-                Mesh2dUniform {
+                Mesh2dTransforms {
+                    transform: (&transform.affine()).into(),
                     flags: MeshFlags::empty().bits(),
-                    transform,
-                    inverse_transpose_model: transform.inverse().transpose(),
                 },
             ),
         ));
@@ -163,19 +234,163 @@ pub fn extract_mesh2d(
     commands.insert_or_spawn_batch(values);
 }
 
+/// Data necessary to be equal for two draw commands to be mergeable
+///
+/// This is based on the following assumptions:
+/// - Only entities with prepared assets (pipelines, materials, meshes) are
+///   queued to phases
+/// - View bindings are constant across a phase for a given draw function as
+///   phases are per-view
+/// - `prepare_mesh_uniforms` is the only system that performs this batching
+///   and has sole responsibility for preparing the per-object data. As such
+///   the mesh binding and dynamic offsets are assumed to only be variable as a
+///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
+///   data across separate uniform bindings within the same buffer due to the
+///   maximum uniform buffer binding size.
+#[derive(Default, PartialEq, Eq)]
+struct BatchMeta<'mat, 'mesh> {
+    /// The pipeline id encompasses all pipeline configuration including vertex
+    /// buffers and layouts, shaders and their specializations, bind group
+    /// layouts, etc.
+    pipeline_id: Option<CachedRenderPipelineId>,
+    /// The draw function id defines the RenderCommands that are called to
+    /// set the pipeline and bindings, and make the draw command
+    draw_function_id: Option<DrawFunctionId>,
+    /// The material binding meta includes the material bind group id and
+    /// dynamic offsets.
+    material2d_bind_group: Option<&'mat Material2dBindGroupId>,
+    mesh_handle: Option<&'mesh Mesh2dHandle>,
+    dynamic_offset: Option<NonMaxU32>,
+}
+
+impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
+    fn matches(&self, other: &BatchMeta<'mat, 'mesh>, consider_material: bool) -> bool {
+        self.pipeline_id == other.pipeline_id
+            && self.draw_function_id == other.draw_function_id
+            && self.mesh_handle == other.mesh_handle
+            && self.dynamic_offset == other.dynamic_offset
+            && (!consider_material || self.material2d_bind_group == other.material2d_bind_group)
+    }
+}
+
+#[derive(Default)]
+struct BatchState<'mat, 'mesh> {
+    meta: BatchMeta<'mat, 'mesh>,
+    /// The base index in the object data binding's array
+    gpu_array_buffer_index: GpuArrayBufferIndex<Mesh2dUniform>,
+    /// The number of entities in the batch
+    count: u32,
+    item_index: usize,
+}
+
+fn update_batch_data<I: PhaseItem>(item: &mut I, batch: &BatchState) {
+    let BatchState {
+        count,
+        gpu_array_buffer_index,
+        ..
+    } = batch;
+    *item.batch_range_mut() = gpu_array_buffer_index.index..(gpu_array_buffer_index.index + *count);
+    *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset;
+}
+
+fn process_phase<I: CachedRenderPipelinePhaseItem>(
+    object_data_buffer: &mut GpuArrayBuffer<Mesh2dUniform>,
+    object_query: &ObjectQuery,
+    phase: &mut RenderPhase<I>,
+    consider_material: bool,
+) {
+    let mut batch = BatchState::default();
+    for i in 0..phase.items.len() {
+        let item = &mut phase.items[i];
+        let Ok((material2d_bind_group, mesh_handle, mesh_transforms)) =
+            object_query.get(item.entity())
+        else {
+            // It is necessary to start a new batch if an entity not matching the query is
+            // encountered. This can be achieved by resetting the pipelined id.
+            batch.meta.pipeline_id = None;
+            continue;
+        };
+        let gpu_array_buffer_index = object_data_buffer.push(Mesh2dUniform::from(mesh_transforms));
+        let batch_meta = BatchMeta {
+            pipeline_id: Some(item.cached_pipeline()),
+            draw_function_id: Some(item.draw_function()),
+            material2d_bind_group,
+            mesh_handle: Some(mesh_handle),
+            dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+        };
+        if !batch_meta.matches(&batch.meta, consider_material) {
+            if batch.count > 0 {
+                update_batch_data(&mut phase.items[batch.item_index], &batch);
+            }
+
+            batch.meta = batch_meta;
+            batch.gpu_array_buffer_index = gpu_array_buffer_index;
+            batch.count = 0;
+            batch.item_index = i;
+        }
+        batch.count += 1;
+    }
+    if !phase.items.is_empty() && batch.count > 0 {
+        update_batch_data(&mut phase.items[batch.item_index], &batch);
+    }
+}
+
+type ObjectQuery<'w, 's, 'mat, 'mesh, 'data> = Query<
+    'w,
+    's,
+    (
+        Option<&'mat Material2dBindGroupId>,
+        &'mesh Mesh2dHandle,
+        &'data Mesh2dTransforms,
+    ),
+>;
+
+#[allow(clippy::too_many_arguments)]
+pub fn prepare_mesh2d_uniforms(
+    render_device: Res<RenderDevice>,
+    render_queue: Res<RenderQueue>,
+    gpu_array_buffer: ResMut<GpuArrayBuffer<Mesh2dUniform>>,
+    mut views: Query<&mut RenderPhase<Transparent2d>>,
+    meshes: ObjectQuery,
+) {
+    if meshes.is_empty() {
+        return;
+    }
+
+    let gpu_array_buffer = gpu_array_buffer.into_inner();
+
+    gpu_array_buffer.clear();
+
+    for transparent_phase in &mut views {
+        process_phase(
+            gpu_array_buffer,
+            &meshes,
+            transparent_phase.into_inner(),
+            true,
+        );
+    }
+
+    gpu_array_buffer.write_buffer(&render_device, &render_queue);
+}
+
 #[derive(Resource, Clone)]
 pub struct Mesh2dPipeline {
     pub view_layout: BindGroupLayout,
     pub mesh_layout: BindGroupLayout,
     // This dummy white texture is to be used in place of optional textures
     pub dummy_white_gpu_image: GpuImage,
+    pub per_object_buffer_batch_size: Option<u32>,
 }
 
 impl FromWorld for Mesh2dPipeline {
     fn from_world(world: &mut World) -> Self {
-        let mut system_state: SystemState<(Res<RenderDevice>, Res<DefaultImageSampler>)> =
-            SystemState::new(world);
-        let (render_device, default_sampler) = system_state.get_mut(world);
+        let mut system_state: SystemState<(
+            Res<RenderDevice>,
+            Res<RenderQueue>,
+            Res<DefaultImageSampler>,
+        )> = SystemState::new(world);
+        let (render_device, render_queue, default_sampler) = system_state.get_mut(world);
+        let render_device = render_device.into_inner();
         let view_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor {
             entries: &[
                 // View
@@ -204,16 +419,11 @@ impl FromWorld for Mesh2dPipeline {
         });
 
         let mesh_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor {
-            entries: &[BindGroupLayoutEntry {
-                binding: 0,
-                visibility: ShaderStages::VERTEX | ShaderStages::FRAGMENT,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Uniform,
-                    has_dynamic_offset: true,
-                    min_binding_size: Some(Mesh2dUniform::min_size()),
-                },
-                count: None,
-            }],
+            entries: &[GpuArrayBuffer::<Mesh2dUniform>::binding_layout(
+                0,
+                ShaderStages::VERTEX_FRAGMENT,
+                render_device,
+            )],
             label: Some("mesh2d_layout"),
         });
         // A 1x1x1 'all 1.0' texture to use as a dummy texture to use in place of optional StandardMaterial textures
@@ -226,7 +436,6 @@ impl FromWorld for Mesh2dPipeline {
             };
 
             let format_size = image.texture_descriptor.format.pixel_size();
-            let render_queue = world.resource_mut::<RenderQueue>();
             render_queue.write_texture(
                 ImageCopyTexture {
                     texture: &texture,
@@ -260,6 +469,9 @@ impl FromWorld for Mesh2dPipeline {
             view_layout,
             mesh_layout,
             dummy_white_gpu_image,
+            per_object_buffer_batch_size: GpuArrayBuffer::<Mesh2dUniform>::batch_size(
+                render_device,
+            ),
         }
     }
 }
@@ -484,9 +696,9 @@ pub fn prepare_mesh2d_bind_group(
     mut commands: Commands,
     mesh2d_pipeline: Res<Mesh2dPipeline>,
     render_device: Res<RenderDevice>,
-    mesh2d_uniforms: Res<ComponentUniforms<Mesh2dUniform>>,
+    mesh2d_uniforms: Res<GpuArrayBuffer<Mesh2dUniform>>,
 ) {
-    if let Some(binding) = mesh2d_uniforms.uniforms().binding() {
+    if let Some(binding) = mesh2d_uniforms.binding() {
         commands.insert_resource(Mesh2dBindGroup {
             value: render_device.create_bind_group(&BindGroupDescriptor {
                 entries: &[BindGroupEntry {
@@ -564,20 +776,26 @@ pub struct SetMesh2dBindGroup<const I: usize>;
 impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMesh2dBindGroup<I> {
     type Param = SRes<Mesh2dBindGroup>;
     type ViewWorldQuery = ();
-    type ItemWorldQuery = Read<DynamicUniformIndex<Mesh2dUniform>>;
+    type ItemWorldQuery = ();
 
     #[inline]
     fn render<'w>(
-        _item: &P,
+        item: &P,
         _view: (),
-        mesh2d_index: &'_ DynamicUniformIndex<Mesh2dUniform>,
+        _item_query: (),
         mesh2d_bind_group: SystemParamItem<'w, '_, Self::Param>,
         pass: &mut TrackedRenderPass<'w>,
     ) -> RenderCommandResult {
+        let mut dynamic_offsets: [u32; 1] = Default::default();
+        let mut index_count = 0;
+        if let Some(mesh_index) = item.dynamic_offset() {
+            dynamic_offsets[index_count] = mesh_index.get();
+            index_count += 1;
+        }
         pass.set_bind_group(
             I,
             &mesh2d_bind_group.into_inner().value,
-            &[mesh2d_index.index()],
+            &dynamic_offsets[..index_count],
         );
         RenderCommandResult::Success
     }
@@ -591,14 +809,21 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh2d {
 
     #[inline]
     fn render<'w>(
-        _item: &P,
+        item: &P,
         _view: (),
         mesh_handle: ROQueryItem<'w, Self::ItemWorldQuery>,
         meshes: SystemParamItem<'w, '_, Self::Param>,
         pass: &mut TrackedRenderPass<'w>,
     ) -> RenderCommandResult {
+        let batch_range = item.batch_range();
         if let Some(gpu_mesh) = meshes.into_inner().get(&mesh_handle.0) {
             pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..));
+            #[cfg(all(feature = "webgl", target_arch = "wasm32"))]
+            pass.set_push_constants(
+                ShaderStages::VERTEX,
+                0,
+                &(batch_range.start as i32).to_le_bytes(),
+            );
             match &gpu_mesh.buffer_info {
                 GpuBufferInfo::Indexed {
                     buffer,
@@ -606,10 +831,10 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh2d {
                     count,
                 } => {
                     pass.set_index_buffer(buffer.slice(..), 0, *index_format);
-                    pass.draw_indexed(0..*count, 0, 0..1);
+                    pass.draw_indexed(0..*count, 0, batch_range.clone());
                 }
                 GpuBufferInfo::NonIndexed => {
-                    pass.draw(0..gpu_mesh.vertex_count, 0..1);
+                    pass.draw(0..gpu_mesh.vertex_count, batch_range.clone());
                 }
             }
             RenderCommandResult::Success
diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl
index 2b99639836d31..003f7dda13af9 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl
+++ b/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl
@@ -8,6 +8,7 @@
 #endif
 
 struct Vertex {
+    @builtin(instance_index) instance_index: u32,
 #ifdef VERTEX_POSITIONS
     @location(0) position: vec3<f32>,
 #endif
@@ -33,20 +34,21 @@ fn vertex(vertex: Vertex) -> MeshVertexOutput {
 #endif
 
 #ifdef VERTEX_POSITIONS
+    var model = mesh_functions::get_model_matrix(vertex.instance_index);
     out.world_position = mesh_functions::mesh2d_position_local_to_world(
-        mesh.model, 
+        model,
         vec4<f32>(vertex.position, 1.0)
     );
     out.position = mesh_functions::mesh2d_position_world_to_clip(out.world_position);
 #endif
 
 #ifdef VERTEX_NORMALS
-    out.world_normal = mesh_functions::mesh2d_normal_local_to_world(vertex.normal);
+    out.world_normal = mesh_functions::mesh2d_normal_local_to_world(vertex.normal, vertex.instance_index);
 #endif
 
 #ifdef VERTEX_TANGENTS
     out.world_tangent = mesh_functions::mesh2d_tangent_local_to_world(
-        mesh.model, 
+        model,
         vertex.tangent
     );
 #endif
diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl
index 6d51f963e083f..0ae9374a2cfc3 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl
+++ b/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl
@@ -1,6 +1,25 @@
 #define_import_path bevy_sprite::mesh2d_bindings
 
-#import bevy_sprite::mesh2d_types
+#import bevy_sprite::mesh2d_types Mesh2d
 
+#ifdef MESH_BINDGROUP_1
+
+#ifdef PER_OBJECT_BUFFER_BATCH_SIZE
+@group(1) @binding(0)
+var<uniform> mesh: array<Mesh2d, #{PER_OBJECT_BUFFER_BATCH_SIZE}u>;
+#else
+@group(1) @binding(0)
+var<storage> mesh: array<Mesh2d>;
+#endif // PER_OBJECT_BUFFER_BATCH_SIZE
+
+#else // MESH_BINDGROUP_1
+
+#ifdef PER_OBJECT_BUFFER_BATCH_SIZE
 @group(2) @binding(0)
-var<uniform> mesh: bevy_sprite::mesh2d_types::Mesh2d;
+var<uniform> mesh: array<Mesh2d, #{PER_OBJECT_BUFFER_BATCH_SIZE}u>;
+#else
+@group(2) @binding(0)
+var<storage> mesh: array<Mesh2d>;
+#endif // PER_OBJECT_BUFFER_BATCH_SIZE
+
+#endif // MESH_BINDGROUP_1
diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl
index cf8d6e2522068..e242b08badefd 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl
+++ b/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl
@@ -2,6 +2,31 @@
 
 #import bevy_sprite::mesh2d_view_bindings  view
 #import bevy_sprite::mesh2d_bindings       mesh
+#import bevy_render::instance_index        get_instance_index
+
+fn affine_to_square(affine: mat3x4<f32>) -> mat4x4<f32> {
+    return transpose(mat4x4<f32>(
+        affine[0],
+        affine[1],
+        affine[2],
+        vec4<f32>(0.0, 0.0, 0.0, 1.0),
+    ));
+}
+
+fn mat2x4_f32_to_mat3x3_unpack(
+    a: mat2x4<f32>,
+    b: f32,
+) -> mat3x3<f32> {
+    return mat3x3<f32>(
+        a[0].xyz,
+        vec3<f32>(a[0].w, a[1].xy),
+        vec3<f32>(a[1].zw, b),
+    );
+}
+
+fn get_model_matrix(instance_index: u32) -> mat4x4<f32> {
+    return affine_to_square(mesh[get_instance_index(instance_index)].model);
+}
 
 fn mesh2d_position_local_to_world(model: mat4x4<f32>, vertex_position: vec4<f32>) -> vec4<f32> {
     return model * vertex_position;
@@ -19,11 +44,10 @@ fn mesh2d_position_local_to_clip(model: mat4x4<f32>, vertex_position: vec4<f32>)
     return mesh2d_position_world_to_clip(world_position);
 }
 
-fn mesh2d_normal_local_to_world(vertex_normal: vec3<f32>) -> vec3<f32> {
-    return mat3x3<f32>(
-        mesh.inverse_transpose_model[0].xyz,
-        mesh.inverse_transpose_model[1].xyz,
-        mesh.inverse_transpose_model[2].xyz
+fn mesh2d_normal_local_to_world(vertex_normal: vec3<f32>, instance_index: u32) -> vec3<f32> {
+    return mat2x4_f32_to_mat3x3_unpack(
+        mesh[instance_index].inverse_transpose_model_a,
+        mesh[instance_index].inverse_transpose_model_b,
     ) * vertex_normal;
 }
 
diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl
index 1de0218112a47..d65b8010e667e 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl
+++ b/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl
@@ -1,8 +1,16 @@
 #define_import_path bevy_sprite::mesh2d_types
 
 struct Mesh2d {
-    model: mat4x4<f32>,
-    inverse_transpose_model: mat4x4<f32>,
+    // Affine 4x3 matrix transposed to 3x4
+    // Use bevy_sprite::mesh2d_functions::affine_to_square to unpack
+    model: mat3x4<f32>,
+    // 3x3 matrix packed in mat2x4 and f32 as:
+    // [0].xyz, [1].x,
+    // [1].yz, [2].xy
+    // [2].z
+    // Use bevy_sprite::mesh2d_functions::mat2x4_f32_to_mat3x3_unpack to unpack
+    inverse_transpose_model_a: mat2x4<f32>,
+    inverse_transpose_model_b: f32,
     // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options.
     flags: u32,
 };
diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs
index 3949fbe10d733..6c18198dfbeeb 100644
--- a/crates/bevy_sprite/src/render/mod.rs
+++ b/crates/bevy_sprite/src/render/mod.rs
@@ -576,8 +576,9 @@ pub fn queue_sprites(
                     pipeline: colored_pipeline,
                     entity: *entity,
                     sort_key,
-                    // batch_size will be calculated in prepare_sprites
-                    batch_size: 0,
+                    // batch_range and dynamic_offset will be calculated in prepare_sprites
+                    batch_range: 0..0,
+                    dynamic_offset: None,
                 });
             } else {
                 transparent_phase.add(Transparent2d {
@@ -585,8 +586,9 @@ pub fn queue_sprites(
                     pipeline,
                     entity: *entity,
                     sort_key,
-                    // batch_size will be calculated in prepare_sprites
-                    batch_size: 0,
+                    // batch_range and dynamic_offset will be calculated in prepare_sprites
+                    batch_range: 0..0,
+                    dynamic_offset: None,
                 });
             }
         }
@@ -750,7 +752,9 @@ pub fn prepare_sprites(
                     ));
                 }
 
-                transparent_phase.items[batch_item_index].batch_size += 1;
+                transparent_phase.items[batch_item_index]
+                    .batch_range_mut()
+                    .end += 1;
                 batches.last_mut().unwrap().1.range.end += 1;
                 index += 1;
             }
diff --git a/crates/bevy_ui/Cargo.toml b/crates/bevy_ui/Cargo.toml
index 60e8d74477ac5..41d6451eef342 100644
--- a/crates/bevy_ui/Cargo.toml
+++ b/crates/bevy_ui/Cargo.toml
@@ -36,3 +36,4 @@ serde = { version = "1", features = ["derive"] }
 smallvec = { version = "1.6", features = ["union", "const_generics"] }
 bytemuck = { version = "1.5", features = ["derive"] }
 thiserror = "1.0.0"
+nonmax = "0.5.3"
diff --git a/crates/bevy_ui/src/render/mod.rs b/crates/bevy_ui/src/render/mod.rs
index 2f4229b5b83f5..53c5269b2d3a9 100644
--- a/crates/bevy_ui/src/render/mod.rs
+++ b/crates/bevy_ui/src/render/mod.rs
@@ -4,6 +4,7 @@ mod render_pass;
 use bevy_core_pipeline::{core_2d::Camera2d, core_3d::Camera3d};
 use bevy_ecs::storage::SparseSet;
 use bevy_hierarchy::Parent;
+use bevy_render::render_phase::PhaseItem;
 use bevy_render::view::ViewVisibility;
 use bevy_render::{ExtractSchedule, Render};
 use bevy_window::{PrimaryWindow, Window};
@@ -670,8 +671,9 @@ pub fn queue_uinodes(
                 pipeline,
                 entity: *entity,
                 sort_key: FloatOrd(extracted_uinode.stack_index as f32),
-                // batch_size will be calculated in prepare_uinodes
-                batch_size: 0,
+                // batch_range will be calculated in prepare_uinodes
+                batch_range: 0..0,
+                dynamic_offset: None,
             });
         }
     }
@@ -895,7 +897,7 @@ pub fn prepare_uinodes(
                     }
                     index += QUAD_INDICES.len() as u32;
                     existing_batch.unwrap().1.range.end = index;
-                    ui_phase.items[batch_item_index].batch_size += 1;
+                    ui_phase.items[batch_item_index].batch_range_mut().end += 1;
                 } else {
                     batch_image_handle = HandleId::Id(Uuid::nil(), u64::MAX);
                 }
diff --git a/crates/bevy_ui/src/render/render_pass.rs b/crates/bevy_ui/src/render/render_pass.rs
index 697aa11104c7e..66cc02155a40c 100644
--- a/crates/bevy_ui/src/render/render_pass.rs
+++ b/crates/bevy_ui/src/render/render_pass.rs
@@ -1,3 +1,5 @@
+use std::ops::Range;
+
 use super::{UiBatch, UiImageBindGroups, UiMeta};
 use crate::{prelude::UiCameraConfig, DefaultCameraView};
 use bevy_asset::Handle;
@@ -13,6 +15,7 @@ use bevy_render::{
     view::*,
 };
 use bevy_utils::FloatOrd;
+use nonmax::NonMaxU32;
 
 pub struct UiPassNode {
     ui_view_query: QueryState<
@@ -91,7 +94,8 @@ pub struct TransparentUi {
     pub entity: Entity,
     pub pipeline: CachedRenderPipelineId,
     pub draw_function: DrawFunctionId,
-    pub batch_size: usize,
+    pub batch_range: Range<u32>,
+    pub dynamic_offset: Option<NonMaxU32>,
 }
 
 impl PhaseItem for TransparentUi {
@@ -118,8 +122,23 @@ impl PhaseItem for TransparentUi {
     }
 
     #[inline]
-    fn batch_size(&self) -> usize {
-        self.batch_size
+    fn batch_range(&self) -> &Range<u32> {
+        &self.batch_range
+    }
+
+    #[inline]
+    fn batch_range_mut(&mut self) -> &mut Range<u32> {
+        &mut self.batch_range
+    }
+
+    #[inline]
+    fn dynamic_offset(&self) -> Option<NonMaxU32> {
+        self.dynamic_offset
+    }
+
+    #[inline]
+    fn dynamic_offset_mut(&mut self) -> &mut Option<NonMaxU32> {
+        &mut self.dynamic_offset
     }
 }
 
diff --git a/examples/2d/mesh2d_manual.rs b/examples/2d/mesh2d_manual.rs
index 7a3ee4a9d68d8..83f9aaf3ebfcf 100644
--- a/examples/2d/mesh2d_manual.rs
+++ b/examples/2d/mesh2d_manual.rs
@@ -24,7 +24,7 @@ use bevy::{
         Extract, Render, RenderApp, RenderSet,
     },
     sprite::{
-        DrawMesh2d, Mesh2dHandle, Mesh2dPipeline, Mesh2dPipelineKey, Mesh2dUniform,
+        DrawMesh2d, Mesh2dHandle, Mesh2dPipeline, Mesh2dPipelineKey, Mesh2dTransforms,
         SetMesh2dBindGroup, SetMesh2dViewBindGroup,
     },
     utils::FloatOrd,
@@ -150,19 +150,21 @@ impl SpecializedRenderPipeline for ColoredMesh2dPipeline {
             false => TextureFormat::bevy_default(),
         };
 
+        let shader_defs = vec!["MESH_BINDGROUP_1".into()];
+
         RenderPipelineDescriptor {
             vertex: VertexState {
                 // Use our custom shader
                 shader: COLORED_MESH2D_SHADER_HANDLE.typed::<Shader>(),
                 entry_point: "vertex".into(),
-                shader_defs: Vec::new(),
+                shader_defs: shader_defs.clone(),
                 // Use our custom vertex buffer
                 buffers: vec![vertex_layout],
             },
             fragment: Some(FragmentState {
                 // Use our custom shader
                 shader: COLORED_MESH2D_SHADER_HANDLE.typed::<Shader>(),
-                shader_defs: Vec::new(),
+                shader_defs,
                 entry_point: "fragment".into(),
                 targets: vec![Some(ColorTargetState {
                     format,
@@ -214,14 +216,12 @@ type DrawColoredMesh2d = (
 // using `include_str!()`, or loaded like any other asset with `asset_server.load()`.
 const COLORED_MESH2D_SHADER: &str = r"
 // Import the standard 2d mesh uniforms and set their bind groups
-#import bevy_sprite::mesh2d_types as MeshTypes
+#import bevy_sprite::mesh2d_bindings mesh
 #import bevy_sprite::mesh2d_functions as MeshFunctions
 
-@group(1) @binding(0)
-var<uniform> mesh: MeshTypes::Mesh2d;
-
 // The structure of the vertex buffer is as specified in `specialize()`
 struct Vertex {
+    @builtin(instance_index) instance_index: u32,
     @location(0) position: vec3<f32>,
     @location(1) color: u32,
 };
@@ -238,7 +238,8 @@ struct VertexOutput {
 fn vertex(vertex: Vertex) -> VertexOutput {
     var out: VertexOutput;
     // Project the world position of the mesh into screen position
-    out.clip_position = MeshFunctions::mesh2d_position_local_to_clip(mesh.model, vec4<f32>(vertex.position, 1.0));
+    let model = MeshFunctions::get_model_matrix(vertex.instance_index);
+    out.clip_position = MeshFunctions::mesh2d_position_local_to_clip(model, vec4<f32>(vertex.position, 1.0));
     // Unpack the `u32` from the vertex buffer into the `vec4<f32>` used by the fragment shader
     out.color = vec4<f32>((vec4<u32>(vertex.color) >> vec4<u32>(0u, 8u, 16u, 24u)) & vec4<u32>(255u)) / 255.0;
     return out;
@@ -318,7 +319,7 @@ pub fn queue_colored_mesh2d(
     pipeline_cache: Res<PipelineCache>,
     msaa: Res<Msaa>,
     render_meshes: Res<RenderAssets<Mesh>>,
-    colored_mesh2d: Query<(&Mesh2dHandle, &Mesh2dUniform), With<ColoredMesh2d>>,
+    colored_mesh2d: Query<(&Mesh2dHandle, &Mesh2dTransforms), With<ColoredMesh2d>>,
     mut views: Query<(
         &VisibleEntities,
         &mut RenderPhase<Transparent2d>,
@@ -337,7 +338,7 @@ pub fn queue_colored_mesh2d(
 
         // Queue all entities visible to that view
         for visible_entity in &visible_entities.entities {
-            if let Ok((mesh2d_handle, mesh2d_uniform)) = colored_mesh2d.get(*visible_entity) {
+            if let Ok((mesh2d_handle, mesh2d_transforms)) = colored_mesh2d.get(*visible_entity) {
                 // Get our specialized pipeline
                 let mut mesh2d_key = mesh_key;
                 if let Some(mesh) = render_meshes.get(&mesh2d_handle.0) {
@@ -348,7 +349,7 @@ pub fn queue_colored_mesh2d(
                 let pipeline_id =
                     pipelines.specialize(&pipeline_cache, &colored_mesh2d_pipeline, mesh2d_key);
 
-                let mesh_z = mesh2d_uniform.transform.w_axis.z;
+                let mesh_z = mesh2d_transforms.transform.translation.z;
                 transparent_phase.add(Transparent2d {
                     entity: *visible_entity,
                     draw_function: draw_colored_mesh2d,
@@ -357,7 +358,8 @@ pub fn queue_colored_mesh2d(
                     // in order to get correct transparency
                     sort_key: FloatOrd(mesh_z),
                     // This material is not batched
-                    batch_size: 1,
+                    batch_range: 0..1,
+                    dynamic_offset: None,
                 });
             }
         }
diff --git a/examples/shader/shader_instancing.rs b/examples/shader/shader_instancing.rs
index 326183d917a0b..d5e751ae0fa1d 100644
--- a/examples/shader/shader_instancing.rs
+++ b/examples/shader/shader_instancing.rs
@@ -136,7 +136,8 @@ fn queue_custom(
                     draw_function: draw_custom,
                     distance: rangefinder
                         .distance_translation(&mesh_transforms.transform.translation),
-                    batch_size: 1,
+                    batch_range: 0..1,
+                    dynamic_offset: None,
                 });
             }
         }

From 950abd79486621dc7c3677f799e40a9e5011a9cf Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Mon, 4 Sep 2023 17:00:29 +0200
Subject: [PATCH 02/33] HACK: Split batches for skinning or morph targets

---
 crates/bevy_pbr/src/material.rs            | 21 +++++++++++++++------
 crates/bevy_pbr/src/render/mesh.rs         |  9 +++++++--
 crates/bevy_pbr/src/render/mesh_types.wgsl |  4 +++-
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index 4347c05629b0c..76eb8a1960ca2 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -1,7 +1,7 @@
 use crate::{
-    render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshPipeline, MeshPipelineKey,
-    MeshTransforms, PrepassPipelinePlugin, PrepassPlugin, ScreenSpaceAmbientOcclusionSettings,
-    SetMeshBindGroup, SetMeshViewBindGroup, Shadow,
+    is_skinned, render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshFlags,
+    MeshPipeline, MeshPipelineKey, MeshTransforms, PrepassPipelinePlugin, PrepassPlugin,
+    ScreenSpaceAmbientOcclusionSettings, SetMeshBindGroup, SetMeshViewBindGroup, Shadow,
 };
 use bevy_app::{App, Plugin};
 use bevy_asset::{AddAsset, AssetEvent, AssetServer, Assets, Handle};
@@ -409,7 +409,7 @@ pub fn queue_material_meshes<M: Material>(
         &Handle<M>,
         &mut MaterialBindGroupId,
         &Handle<Mesh>,
-        &MeshTransforms,
+        &mut MeshTransforms,
     )>,
     images: Res<RenderAssets<Image>>,
     mut views: Query<(
@@ -494,8 +494,12 @@ pub fn queue_material_meshes<M: Material>(
 
         let rangefinder = view.rangefinder3d();
         for visible_entity in &visible_entities.entities {
-            if let Ok((material_handle, mut material_bind_group_id, mesh_handle, mesh_transforms)) =
-                material_meshes.get_mut(*visible_entity)
+            if let Ok((
+                material_handle,
+                mut material_bind_group_id,
+                mesh_handle,
+                mut mesh_transforms,
+            )) = material_meshes.get_mut(*visible_entity)
             {
                 if let (Some(mesh), Some(material)) = (
                     render_meshes.get(mesh_handle),
@@ -504,8 +508,13 @@ pub fn queue_material_meshes<M: Material>(
                     let mut mesh_key =
                         MeshPipelineKey::from_primitive_topology(mesh.primitive_topology)
                             | view_key;
+
+                    if is_skinned(&mesh.layout) {
+                        mesh_transforms.flags |= MeshFlags::SKINNED.bits();
+                    }
                     if mesh.morph_targets.is_some() {
                         mesh_key |= MeshPipelineKey::MORPH_TARGETS;
+                        mesh_transforms.flags |= MeshFlags::MORPH_TARGETS.bits();
                     }
                     match material.properties.alpha_mode {
                         AlphaMode::Blend => {
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index e10a4a1dab164..de1049850c2c3 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -247,8 +247,10 @@ impl From<&MeshTransforms> for MeshUniform {
 // NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_types.wgsl!
 bitflags::bitflags! {
     #[repr(transparent)]
-    struct MeshFlags: u32 {
+    pub struct MeshFlags: u32 {
         const SHADOW_RECEIVER            = (1 << 0);
+        const SKINNED                    = (1 << 1);
+        const MORPH_TARGETS              = (1 << 2);
         // Indicates the sign of the determinant of the 3x3 model matrix. If the sign is positive,
         // then the flag should be set, else it should not be set.
         const SIGN_DETERMINANT_MODEL_3X3 = (1 << 31);
@@ -413,6 +415,7 @@ struct BatchMeta<'mat, 'mesh> {
     /// dynamic offsets.
     material_binding_meta: Option<&'mat MaterialBindGroupId>,
     mesh_handle: Option<&'mesh Handle<Mesh>>,
+    mesh_flags: u32,
     dynamic_offset: Option<NonMaxU32>,
 }
 
@@ -422,6 +425,7 @@ impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
             && self.mesh_handle == other.mesh_handle
+            && (self.mesh_flags & (MeshFlags::SKINNED | MeshFlags::MORPH_TARGETS).bits()) == 0
             && self.dynamic_offset == other.dynamic_offset
             && (!consider_material || self.material_binding_meta == other.material_binding_meta)
     }
@@ -470,6 +474,7 @@ fn process_phase<I: CachedRenderPipelinePhaseItem>(
             draw_function_id: Some(item.draw_function()),
             material_binding_meta,
             mesh_handle: Some(mesh_handle),
+            mesh_flags: mesh_transforms.flags,
             dynamic_offset: gpu_array_buffer_index.dynamic_offset,
         };
         if !batch_meta.matches(&batch.meta, consider_material) {
@@ -939,7 +944,7 @@ impl MeshPipelineKey {
     }
 }
 
-fn is_skinned(layout: &Hashed<InnerMeshVertexBufferLayout>) -> bool {
+pub fn is_skinned(layout: &Hashed<InnerMeshVertexBufferLayout>) -> bool {
     layout.contains(Mesh::ATTRIBUTE_JOINT_INDEX) && layout.contains(Mesh::ATTRIBUTE_JOINT_WEIGHT)
 }
 pub fn setup_morph_and_skinning_defs(
diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl
index 7412de7a8a5f7..ba04c18e4a7d6 100644
--- a/crates/bevy_pbr/src/render/mesh_types.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_types.wgsl
@@ -28,6 +28,8 @@ struct MorphWeights {
 };
 #endif
 
-const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32 = 1u;
+const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32            = 1u;
+const MESH_FLAGS_SKINNED_BIT: u32                    = 2u;
+const MESH_FLAGS_MORPH_TARGETS_BIT: u32              = 4u;
 // 2^31 - if the flag is set, the sign is positive, else it is negative
 const MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT: u32 = 2147483648u;

From d3f38df558035479b9992fdf580bbb865470d7fe Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Wed, 6 Sep 2023 13:22:05 +0200
Subject: [PATCH 03/33] Only specify major and minor versions for nonmax

---
 crates/bevy_core_pipeline/Cargo.toml | 2 +-
 crates/bevy_pbr/Cargo.toml           | 2 +-
 crates/bevy_render/Cargo.toml        | 2 +-
 crates/bevy_sprite/Cargo.toml        | 2 +-
 crates/bevy_ui/Cargo.toml            | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/bevy_core_pipeline/Cargo.toml b/crates/bevy_core_pipeline/Cargo.toml
index e15cdf85775ec..128fa346ba0d2 100644
--- a/crates/bevy_core_pipeline/Cargo.toml
+++ b/crates/bevy_core_pipeline/Cargo.toml
@@ -33,4 +33,4 @@ bevy_utils = { path = "../bevy_utils", version = "0.12.0-dev" }
 serde = { version = "1", features = ["derive"] }
 bitflags = "2.3"
 radsort = "0.1"
-nonmax = "0.5.3"
+nonmax = "0.5"
diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml
index 371cb5c4a87bf..427a90eb4dfee 100644
--- a/crates/bevy_pbr/Cargo.toml
+++ b/crates/bevy_pbr/Cargo.toml
@@ -33,4 +33,4 @@ bytemuck = { version = "1", features = ["derive"] }
 naga_oil = "0.8"
 radsort = "0.1"
 smallvec = "1.6"
-nonmax = "0.5.3"
+nonmax = "0.5"
diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml
index fe945ab00b6bc..c267524e38e1b 100644
--- a/crates/bevy_render/Cargo.toml
+++ b/crates/bevy_render/Cargo.toml
@@ -83,7 +83,7 @@ encase = { version = "0.6.1", features = ["glam"] }
 # For wgpu profiling using tracing. Use `RUST_LOG=info` to also capture the wgpu spans.
 profiling = { version = "1", features = ["profile-with-tracing"], optional = true }
 async-channel = "1.8"
-nonmax = "0.5.3"
+nonmax = "0.5"
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 js-sys = "0.3"
diff --git a/crates/bevy_sprite/Cargo.toml b/crates/bevy_sprite/Cargo.toml
index 69494d9da180b..550ea2af166c7 100644
--- a/crates/bevy_sprite/Cargo.toml
+++ b/crates/bevy_sprite/Cargo.toml
@@ -31,4 +31,4 @@ guillotiere = "0.6.0"
 thiserror = "1.0"
 rectangle-pack = "0.4"
 bitflags = "2.3"
-nonmax = "0.5.3"
+nonmax = "0.5"
diff --git a/crates/bevy_ui/Cargo.toml b/crates/bevy_ui/Cargo.toml
index 41d6451eef342..9e0c3fd6c7a84 100644
--- a/crates/bevy_ui/Cargo.toml
+++ b/crates/bevy_ui/Cargo.toml
@@ -36,4 +36,4 @@ serde = { version = "1", features = ["derive"] }
 smallvec = { version = "1.6", features = ["union", "const_generics"] }
 bytemuck = { version = "1.5", features = ["derive"] }
 thiserror = "1.0.0"
-nonmax = "0.5.3"
+nonmax = "0.5"

From 806bd82981c772c02fdb96360cc6a8627dab3215 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Wed, 6 Sep 2023 13:47:38 +0200
Subject: [PATCH 04/33] Drop consider_material from batching

---
 crates/bevy_pbr/src/render/mesh.rs    | 39 ++++++++-------------------
 crates/bevy_sprite/src/mesh2d/mesh.rs | 14 +++-------
 2 files changed, 15 insertions(+), 38 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index de1049850c2c3..844cd687b258b 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -413,7 +413,7 @@ struct BatchMeta<'mat, 'mesh> {
     draw_function_id: Option<DrawFunctionId>,
     /// The material binding meta includes the material bind group id and
     /// dynamic offsets.
-    material_binding_meta: Option<&'mat MaterialBindGroupId>,
+    material_bind_group_id: Option<&'mat MaterialBindGroupId>,
     mesh_handle: Option<&'mesh Handle<Mesh>>,
     mesh_flags: u32,
     dynamic_offset: Option<NonMaxU32>,
@@ -421,13 +421,13 @@ struct BatchMeta<'mat, 'mesh> {
 
 impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
     #[inline]
-    fn matches(&self, other: &BatchMeta<'mat, 'mesh>, consider_material: bool) -> bool {
+    fn matches(&self, other: &BatchMeta<'mat, 'mesh>) -> bool {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
             && self.mesh_handle == other.mesh_handle
             && (self.mesh_flags & (MeshFlags::SKINNED | MeshFlags::MORPH_TARGETS).bits()) == 0
             && self.dynamic_offset == other.dynamic_offset
-            && (!consider_material || self.material_binding_meta == other.material_binding_meta)
+            && self.material_bind_group_id == other.material_bind_group_id
     }
 }
 
@@ -455,12 +455,11 @@ fn process_phase<I: CachedRenderPipelinePhaseItem>(
     object_data_buffer: &mut GpuArrayBuffer<MeshUniform>,
     object_query: &ObjectQuery,
     phase: &mut RenderPhase<I>,
-    consider_material: bool,
 ) {
     let mut batch = BatchState::default();
     for i in 0..phase.items.len() {
         let item = &mut phase.items[i];
-        let Ok((material_binding_meta, mesh_handle, mesh_transforms)) =
+        let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
             object_query.get(item.entity())
         else {
             // It is necessary to start a new batch if an entity not matching the query is
@@ -472,12 +471,12 @@ fn process_phase<I: CachedRenderPipelinePhaseItem>(
         let batch_meta = BatchMeta {
             pipeline_id: Some(item.cached_pipeline()),
             draw_function_id: Some(item.draw_function()),
-            material_binding_meta,
+            material_bind_group_id,
             mesh_handle: Some(mesh_handle),
             mesh_flags: mesh_transforms.flags,
             dynamic_offset: gpu_array_buffer_index.dynamic_offset,
         };
-        if !batch_meta.matches(&batch.meta, consider_material) {
+        if !batch_meta.matches(&batch.meta) {
             if batch.count > 0 {
                 update_batch_data(&mut phase.items[batch.item_index], &batch);
             }
@@ -532,38 +531,22 @@ pub fn prepare_mesh_uniforms(
     ) in &mut views
     {
         if let Some(opaque_prepass_phase) = opaque_prepass_phase {
-            process_phase(
-                gpu_array_buffer,
-                &meshes,
-                opaque_prepass_phase.into_inner(),
-                false,
-            );
+            process_phase(gpu_array_buffer, &meshes, opaque_prepass_phase.into_inner());
         }
         if let Some(alpha_mask_prepass_phase) = alpha_mask_prepass_phase {
             process_phase(
                 gpu_array_buffer,
                 &meshes,
                 alpha_mask_prepass_phase.into_inner(),
-                true,
             );
         }
-        process_phase(gpu_array_buffer, &meshes, opaque_phase.into_inner(), true);
-        process_phase(
-            gpu_array_buffer,
-            &meshes,
-            alpha_mask_phase.into_inner(),
-            true,
-        );
-        process_phase(
-            gpu_array_buffer,
-            &meshes,
-            transparent_phase.into_inner(),
-            true,
-        );
+        process_phase(gpu_array_buffer, &meshes, opaque_phase.into_inner());
+        process_phase(gpu_array_buffer, &meshes, alpha_mask_phase.into_inner());
+        process_phase(gpu_array_buffer, &meshes, transparent_phase.into_inner());
     }
 
     for shadow_phase in &mut shadow_views {
-        process_phase(gpu_array_buffer, &meshes, shadow_phase.into_inner(), false);
+        process_phase(gpu_array_buffer, &meshes, shadow_phase.into_inner());
     }
 
     gpu_array_buffer.write_buffer(&render_device, &render_queue);
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 6bf23b7d7411c..0ac6ea39fb001 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -264,12 +264,12 @@ struct BatchMeta<'mat, 'mesh> {
 }
 
 impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
-    fn matches(&self, other: &BatchMeta<'mat, 'mesh>, consider_material: bool) -> bool {
+    fn matches(&self, other: &BatchMeta<'mat, 'mesh>) -> bool {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
             && self.mesh_handle == other.mesh_handle
             && self.dynamic_offset == other.dynamic_offset
-            && (!consider_material || self.material2d_bind_group == other.material2d_bind_group)
+            && self.material2d_bind_group == other.material2d_bind_group
     }
 }
 
@@ -297,7 +297,6 @@ fn process_phase<I: CachedRenderPipelinePhaseItem>(
     object_data_buffer: &mut GpuArrayBuffer<Mesh2dUniform>,
     object_query: &ObjectQuery,
     phase: &mut RenderPhase<I>,
-    consider_material: bool,
 ) {
     let mut batch = BatchState::default();
     for i in 0..phase.items.len() {
@@ -318,7 +317,7 @@ fn process_phase<I: CachedRenderPipelinePhaseItem>(
             mesh_handle: Some(mesh_handle),
             dynamic_offset: gpu_array_buffer_index.dynamic_offset,
         };
-        if !batch_meta.matches(&batch.meta, consider_material) {
+        if !batch_meta.matches(&batch.meta) {
             if batch.count > 0 {
                 update_batch_data(&mut phase.items[batch.item_index], &batch);
             }
@@ -362,12 +361,7 @@ pub fn prepare_mesh2d_uniforms(
     gpu_array_buffer.clear();
 
     for transparent_phase in &mut views {
-        process_phase(
-            gpu_array_buffer,
-            &meshes,
-            transparent_phase.into_inner(),
-            true,
-        );
+        process_phase(gpu_array_buffer, &meshes, transparent_phase.into_inner());
     }
 
     gpu_array_buffer.write_buffer(&render_device, &render_queue);

From 693c4f2c52e82d98aa5b549d5a1adfbdb97261f5 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Wed, 6 Sep 2023 13:51:02 +0200
Subject: [PATCH 05/33] Reuse functions from bevy_render::maths shader import

---
 .../src/mesh2d/mesh2d_functions.wgsl          | 21 +------------------
 .../bevy_sprite/src/mesh2d/mesh2d_types.wgsl  |  4 ++--
 2 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl
index e242b08badefd..b936cad10f66f 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl
+++ b/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl
@@ -3,26 +3,7 @@
 #import bevy_sprite::mesh2d_view_bindings  view
 #import bevy_sprite::mesh2d_bindings       mesh
 #import bevy_render::instance_index        get_instance_index
-
-fn affine_to_square(affine: mat3x4<f32>) -> mat4x4<f32> {
-    return transpose(mat4x4<f32>(
-        affine[0],
-        affine[1],
-        affine[2],
-        vec4<f32>(0.0, 0.0, 0.0, 1.0),
-    ));
-}
-
-fn mat2x4_f32_to_mat3x3_unpack(
-    a: mat2x4<f32>,
-    b: f32,
-) -> mat3x3<f32> {
-    return mat3x3<f32>(
-        a[0].xyz,
-        vec3<f32>(a[0].w, a[1].xy),
-        vec3<f32>(a[1].zw, b),
-    );
-}
+#import bevy_render::maths                 affine_to_square, mat2x4_f32_to_mat3x3_unpack
 
 fn get_model_matrix(instance_index: u32) -> mat4x4<f32> {
     return affine_to_square(mesh[get_instance_index(instance_index)].model);
diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl
index d65b8010e667e..f855707790001 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl
+++ b/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl
@@ -2,13 +2,13 @@
 
 struct Mesh2d {
     // Affine 4x3 matrix transposed to 3x4
-    // Use bevy_sprite::mesh2d_functions::affine_to_square to unpack
+    // Use bevy_render::maths::affine_to_square to unpack
     model: mat3x4<f32>,
     // 3x3 matrix packed in mat2x4 and f32 as:
     // [0].xyz, [1].x,
     // [1].yz, [2].xy
     // [2].z
-    // Use bevy_sprite::mesh2d_functions::mat2x4_f32_to_mat3x3_unpack to unpack
+    // Use bevy_render::maths::mat2x4_f32_to_mat3x3_unpack to unpack
     inverse_transpose_model_a: mat2x4<f32>,
     inverse_transpose_model_b: f32,
     // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options.

From a0d2a77a7b95892e59f2461daedba85d21455cc0 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Wed, 6 Sep 2023 17:50:29 +0200
Subject: [PATCH 06/33] Use Option<BatchMeta> instead of using Options inside
 BatchMeta

---
 crates/bevy_pbr/src/render/mesh.rs    | 28 +++++++++++++++------------
 crates/bevy_sprite/src/mesh2d/mesh.rs | 28 +++++++++++++++------------
 2 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 844cd687b258b..c072b749f7817 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -402,19 +402,19 @@ pub fn extract_skinned_meshes(
 ///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
-#[derive(Default, PartialEq, Eq)]
+#[derive(PartialEq, Eq)]
 struct BatchMeta<'mat, 'mesh> {
     /// The pipeline id encompasses all pipeline configuration including vertex
     /// buffers and layouts, shaders and their specializations, bind group
     /// layouts, etc.
-    pipeline_id: Option<CachedRenderPipelineId>,
+    pipeline_id: CachedRenderPipelineId,
     /// The draw function id defines the RenderCommands that are called to
     /// set the pipeline and bindings, and make the draw command
-    draw_function_id: Option<DrawFunctionId>,
+    draw_function_id: DrawFunctionId,
     /// The material binding meta includes the material bind group id and
     /// dynamic offsets.
     material_bind_group_id: Option<&'mat MaterialBindGroupId>,
-    mesh_handle: Option<&'mesh Handle<Mesh>>,
+    mesh_handle: &'mesh Handle<Mesh>,
     mesh_flags: u32,
     dynamic_offset: Option<NonMaxU32>,
 }
@@ -433,7 +433,7 @@ impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
 
 #[derive(Default)]
 struct BatchState<'mat, 'mesh> {
-    meta: BatchMeta<'mat, 'mesh>,
+    meta: Option<BatchMeta<'mat, 'mesh>>,
     /// The base index in the object data binding's array
     gpu_array_buffer_index: GpuArrayBufferIndex<MeshUniform>,
     /// The number of entities in the batch
@@ -463,25 +463,29 @@ fn process_phase<I: CachedRenderPipelinePhaseItem>(
             object_query.get(item.entity())
         else {
             // It is necessary to start a new batch if an entity not matching the query is
-            // encountered. This can be achieved by resetting the pipelined id.
-            batch.meta.pipeline_id = None;
+            // encountered. This can be achieved by resetting the batch meta.
+            batch.meta = None;
             continue;
         };
         let gpu_array_buffer_index = object_data_buffer.push(MeshUniform::from(mesh_transforms));
         let batch_meta = BatchMeta {
-            pipeline_id: Some(item.cached_pipeline()),
-            draw_function_id: Some(item.draw_function()),
+            pipeline_id: item.cached_pipeline(),
+            draw_function_id: item.draw_function(),
             material_bind_group_id,
-            mesh_handle: Some(mesh_handle),
+            mesh_handle,
             mesh_flags: mesh_transforms.flags,
             dynamic_offset: gpu_array_buffer_index.dynamic_offset,
         };
-        if !batch_meta.matches(&batch.meta) {
+        if !batch
+            .meta
+            .as_ref()
+            .map_or(false, |meta| meta.matches(&batch_meta))
+        {
             if batch.count > 0 {
                 update_batch_data(&mut phase.items[batch.item_index], &batch);
             }
 
-            batch.meta = batch_meta;
+            batch.meta = Some(batch_meta);
             batch.gpu_array_buffer_index = gpu_array_buffer_index;
             batch.count = 0;
             batch.item_index = i;
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 0ac6ea39fb001..74093b223b0cd 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -247,19 +247,19 @@ pub fn extract_mesh2d(
 ///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
-#[derive(Default, PartialEq, Eq)]
+#[derive(PartialEq, Eq)]
 struct BatchMeta<'mat, 'mesh> {
     /// The pipeline id encompasses all pipeline configuration including vertex
     /// buffers and layouts, shaders and their specializations, bind group
     /// layouts, etc.
-    pipeline_id: Option<CachedRenderPipelineId>,
+    pipeline_id: CachedRenderPipelineId,
     /// The draw function id defines the RenderCommands that are called to
     /// set the pipeline and bindings, and make the draw command
-    draw_function_id: Option<DrawFunctionId>,
+    draw_function_id: DrawFunctionId,
     /// The material binding meta includes the material bind group id and
     /// dynamic offsets.
     material2d_bind_group: Option<&'mat Material2dBindGroupId>,
-    mesh_handle: Option<&'mesh Mesh2dHandle>,
+    mesh_handle: &'mesh Mesh2dHandle,
     dynamic_offset: Option<NonMaxU32>,
 }
 
@@ -275,7 +275,7 @@ impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
 
 #[derive(Default)]
 struct BatchState<'mat, 'mesh> {
-    meta: BatchMeta<'mat, 'mesh>,
+    meta: Option<BatchMeta<'mat, 'mesh>>,
     /// The base index in the object data binding's array
     gpu_array_buffer_index: GpuArrayBufferIndex<Mesh2dUniform>,
     /// The number of entities in the batch
@@ -305,24 +305,28 @@ fn process_phase<I: CachedRenderPipelinePhaseItem>(
             object_query.get(item.entity())
         else {
             // It is necessary to start a new batch if an entity not matching the query is
-            // encountered. This can be achieved by resetting the pipelined id.
-            batch.meta.pipeline_id = None;
+            // encountered. This can be achieved by resetting the batch meta.
+            batch.meta = None;
             continue;
         };
         let gpu_array_buffer_index = object_data_buffer.push(Mesh2dUniform::from(mesh_transforms));
         let batch_meta = BatchMeta {
-            pipeline_id: Some(item.cached_pipeline()),
-            draw_function_id: Some(item.draw_function()),
+            pipeline_id: item.cached_pipeline(),
+            draw_function_id: item.draw_function(),
             material2d_bind_group,
-            mesh_handle: Some(mesh_handle),
+            mesh_handle,
             dynamic_offset: gpu_array_buffer_index.dynamic_offset,
         };
-        if !batch_meta.matches(&batch.meta) {
+        if !batch
+            .meta
+            .as_ref()
+            .map_or(false, |meta| meta.matches(&batch_meta))
+        {
             if batch.count > 0 {
                 update_batch_data(&mut phase.items[batch.item_index], &batch);
             }
 
-            batch.meta = batch_meta;
+            batch.meta = Some(batch_meta);
             batch.gpu_array_buffer_index = gpu_array_buffer_index;
             batch.count = 0;
             batch.item_index = i;

From 684d1a042f6449b7c5c527d3602124b3ec1b1d99 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Sun, 10 Sep 2023 23:38:35 +0200
Subject: [PATCH 07/33] Make batching code reusable

---
 crates/bevy_pbr/src/material.rs               |   2 +-
 crates/bevy_pbr/src/render/mesh.rs            | 221 ++++++++++--------
 crates/bevy_render/src/batching/mod.rs        |  80 +++++++
 crates/bevy_render/src/lib.rs                 |   1 +
 .../render_resource/batched_uniform_buffer.rs |   2 +-
 .../src/render_resource/gpu_array_buffer.rs   |   6 +-
 crates/bevy_sprite/src/mesh2d/material.rs     |   2 +-
 crates/bevy_sprite/src/mesh2d/mesh.rs         | 123 +++-------
 8 files changed, 245 insertions(+), 192 deletions(-)
 create mode 100644 crates/bevy_render/src/batching/mod.rs

diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index ffaba8246258c..29f107fa5be6f 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -611,7 +611,7 @@ pub struct PreparedMaterial<T: Material> {
     pub properties: MaterialProperties,
 }
 
-#[derive(Component, Default, PartialEq, Eq, Deref, DerefMut)]
+#[derive(Component, Clone, Default, PartialEq, Eq, Deref, DerefMut)]
 pub struct MaterialBindGroupId(Option<BindGroupId>);
 
 impl<T: Material> PreparedMaterial<T> {
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index f7ac0167cb797..157d16553f4b2 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -21,6 +21,7 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3, Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4};
 use bevy_render::{
+    batching::{process_phase, BatchMeta},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{
         skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
@@ -122,7 +123,7 @@ impl Plugin for MeshRenderPlugin {
                 .add_systems(
                     Render,
                     (
-                        prepare_mesh_uniforms.in_set(RenderSet::PrepareResources),
+                        prepare_and_batch_meshes.in_set(RenderSet::PrepareResources),
                         prepare_skinned_meshes.in_set(RenderSet::PrepareResources),
                         prepare_morphs.in_set(RenderSet::PrepareResources),
                         prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups),
@@ -393,7 +394,7 @@ pub fn extract_skinned_meshes(
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
 #[derive(PartialEq, Eq)]
-struct BatchMeta<'mat, 'mesh> {
+struct BatchMeta3d {
     /// The pipeline id encompasses all pipeline configuration including vertex
     /// buffers and layouts, shaders and their specializations, bind group
     /// layouts, etc.
@@ -401,104 +402,26 @@ struct BatchMeta<'mat, 'mesh> {
     /// The draw function id defines the RenderCommands that are called to
     /// set the pipeline and bindings, and make the draw command
     draw_function_id: DrawFunctionId,
-    /// The material binding meta includes the material bind group id and
-    /// dynamic offsets.
-    material_bind_group_id: Option<&'mat MaterialBindGroupId>,
-    mesh_handle: &'mesh Handle<Mesh>,
+    material_bind_group_id: Option<MaterialBindGroupId>,
+    mesh_asset_id: AssetId<Mesh>,
     mesh_flags: u32,
     dynamic_offset: Option<NonMaxU32>,
 }
 
-impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
+impl BatchMeta<BatchMeta3d> for BatchMeta3d {
     #[inline]
-    fn matches(&self, other: &BatchMeta<'mat, 'mesh>) -> bool {
+    fn matches(&self, other: &BatchMeta3d) -> bool {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
-            && self.mesh_handle == other.mesh_handle
+            && self.mesh_asset_id == other.mesh_asset_id
             && (self.mesh_flags & (MeshFlags::SKINNED | MeshFlags::MORPH_TARGETS).bits()) == 0
             && self.dynamic_offset == other.dynamic_offset
             && self.material_bind_group_id == other.material_bind_group_id
     }
 }
 
-#[derive(Default)]
-struct BatchState<'mat, 'mesh> {
-    meta: Option<BatchMeta<'mat, 'mesh>>,
-    /// The base index in the object data binding's array
-    gpu_array_buffer_index: GpuArrayBufferIndex<MeshUniform>,
-    /// The number of entities in the batch
-    count: u32,
-    item_index: usize,
-}
-
-fn update_batch_data<I: PhaseItem>(item: &mut I, batch: &BatchState) {
-    let BatchState {
-        count,
-        gpu_array_buffer_index,
-        ..
-    } = batch;
-    *item.batch_range_mut() = gpu_array_buffer_index.index..(gpu_array_buffer_index.index + *count);
-    *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset;
-}
-
-fn process_phase<I: CachedRenderPipelinePhaseItem>(
-    object_data_buffer: &mut GpuArrayBuffer<MeshUniform>,
-    object_query: &ObjectQuery,
-    phase: &mut RenderPhase<I>,
-) {
-    let mut batch = BatchState::default();
-    for i in 0..phase.items.len() {
-        let item = &mut phase.items[i];
-        let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
-            object_query.get(item.entity())
-        else {
-            // It is necessary to start a new batch if an entity not matching the query is
-            // encountered. This can be achieved by resetting the batch meta.
-            batch.meta = None;
-            continue;
-        };
-        let gpu_array_buffer_index = object_data_buffer.push(MeshUniform::from(mesh_transforms));
-        let batch_meta = BatchMeta {
-            pipeline_id: item.cached_pipeline(),
-            draw_function_id: item.draw_function(),
-            material_bind_group_id,
-            mesh_handle,
-            mesh_flags: mesh_transforms.flags,
-            dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-        };
-        if !batch
-            .meta
-            .as_ref()
-            .map_or(false, |meta| meta.matches(&batch_meta))
-        {
-            if batch.count > 0 {
-                update_batch_data(&mut phase.items[batch.item_index], &batch);
-            }
-
-            batch.meta = Some(batch_meta);
-            batch.gpu_array_buffer_index = gpu_array_buffer_index;
-            batch.count = 0;
-            batch.item_index = i;
-        }
-        batch.count += 1;
-    }
-    if !phase.items.is_empty() && batch.count > 0 {
-        update_batch_data(&mut phase.items[batch.item_index], &batch);
-    }
-}
-
-type ObjectQuery<'w, 's, 'mat, 'mesh, 'data> = Query<
-    'w,
-    's,
-    (
-        Option<&'mat MaterialBindGroupId>,
-        &'mesh Handle<Mesh>,
-        &'data MeshTransforms,
-    ),
->;
-
 #[allow(clippy::too_many_arguments)]
-pub fn prepare_mesh_uniforms(
+pub fn prepare_and_batch_meshes(
     render_device: Res<RenderDevice>,
     render_queue: Res<RenderQueue>,
     gpu_array_buffer: ResMut<GpuArrayBuffer<MeshUniform>>,
@@ -510,7 +433,7 @@ pub fn prepare_mesh_uniforms(
         &mut RenderPhase<Transparent3d>,
     )>,
     mut shadow_views: Query<&mut RenderPhase<Shadow>>,
-    meshes: ObjectQuery,
+    meshes: Query<(Option<&MaterialBindGroupId>, &Handle<Mesh>, &MeshTransforms)>,
 ) {
     let gpu_array_buffer = gpu_array_buffer.into_inner();
 
@@ -525,22 +448,126 @@ pub fn prepare_mesh_uniforms(
     ) in &mut views
     {
         if let Some(opaque_prepass_phase) = opaque_prepass_phase {
-            process_phase(gpu_array_buffer, &meshes, opaque_prepass_phase.into_inner());
+            process_phase(opaque_prepass_phase.into_inner(), |item| {
+                let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
+                    meshes.get(item.entity())
+                else {
+                    return None;
+                };
+                let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+                Some((
+                    BatchMeta3d {
+                        pipeline_id: item.cached_pipeline(),
+                        draw_function_id: item.draw_function(),
+                        material_bind_group_id: material_bind_group_id.cloned(),
+                        mesh_asset_id: mesh_handle.id(),
+                        mesh_flags: mesh_transforms.flags,
+                        dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+                    },
+                    gpu_array_buffer_index,
+                ))
+            });
         }
         if let Some(alpha_mask_prepass_phase) = alpha_mask_prepass_phase {
-            process_phase(
-                gpu_array_buffer,
-                &meshes,
-                alpha_mask_prepass_phase.into_inner(),
-            );
+            process_phase(alpha_mask_prepass_phase.into_inner(), |item| {
+                let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
+                    meshes.get(item.entity())
+                else {
+                    return None;
+                };
+                let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+                Some((
+                    BatchMeta3d {
+                        pipeline_id: item.cached_pipeline(),
+                        draw_function_id: item.draw_function(),
+                        material_bind_group_id: material_bind_group_id.cloned(),
+                        mesh_asset_id: mesh_handle.id(),
+                        mesh_flags: mesh_transforms.flags,
+                        dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+                    },
+                    gpu_array_buffer_index,
+                ))
+            });
         }
-        process_phase(gpu_array_buffer, &meshes, opaque_phase.into_inner());
-        process_phase(gpu_array_buffer, &meshes, alpha_mask_phase.into_inner());
-        process_phase(gpu_array_buffer, &meshes, transparent_phase.into_inner());
+        process_phase(opaque_phase.into_inner(), |item| {
+            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
+                meshes.get(item.entity())
+            else {
+                return None;
+            };
+            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+            Some((
+                BatchMeta3d {
+                    pipeline_id: item.cached_pipeline(),
+                    draw_function_id: item.draw_function(),
+                    material_bind_group_id: material_bind_group_id.cloned(),
+                    mesh_asset_id: mesh_handle.id(),
+                    mesh_flags: mesh_transforms.flags,
+                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+                },
+                gpu_array_buffer_index,
+            ))
+        });
+        process_phase(alpha_mask_phase.into_inner(), |item| {
+            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
+                meshes.get(item.entity())
+            else {
+                return None;
+            };
+            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+            Some((
+                BatchMeta3d {
+                    pipeline_id: item.cached_pipeline(),
+                    draw_function_id: item.draw_function(),
+                    material_bind_group_id: material_bind_group_id.cloned(),
+                    mesh_asset_id: mesh_handle.id(),
+                    mesh_flags: mesh_transforms.flags,
+                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+                },
+                gpu_array_buffer_index,
+            ))
+        });
+        process_phase(transparent_phase.into_inner(), |item| {
+            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
+                meshes.get(item.entity())
+            else {
+                return None;
+            };
+            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+            Some((
+                BatchMeta3d {
+                    pipeline_id: item.cached_pipeline(),
+                    draw_function_id: item.draw_function(),
+                    material_bind_group_id: material_bind_group_id.cloned(),
+                    mesh_asset_id: mesh_handle.id(),
+                    mesh_flags: mesh_transforms.flags,
+                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+                },
+                gpu_array_buffer_index,
+            ))
+        });
     }
 
     for shadow_phase in &mut shadow_views {
-        process_phase(gpu_array_buffer, &meshes, shadow_phase.into_inner());
+        process_phase(shadow_phase.into_inner(), |item| {
+            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
+                meshes.get(item.entity())
+            else {
+                return None;
+            };
+            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+            Some((
+                BatchMeta3d {
+                    pipeline_id: item.cached_pipeline(),
+                    draw_function_id: item.draw_function(),
+                    material_bind_group_id: material_bind_group_id.cloned(),
+                    mesh_asset_id: mesh_handle.id(),
+                    mesh_flags: mesh_transforms.flags,
+                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+                },
+                gpu_array_buffer_index,
+            ))
+        });
     }
 
     gpu_array_buffer.write_buffer(&render_device, &render_queue);
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
new file mode 100644
index 0000000000000..9ad937252c684
--- /dev/null
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -0,0 +1,80 @@
+use crate::{
+    render_phase::{CachedRenderPipelinePhaseItem, PhaseItem, RenderPhase},
+    render_resource::{GpuArrayBufferIndex, GpuArrayBufferable},
+};
+
+struct BatchState<T: BatchMeta<T>, D: GpuArrayBufferable> {
+    meta: Option<T>,
+    /// The base index in the object data binding's array
+    gpu_array_buffer_index: GpuArrayBufferIndex<D>,
+    /// The number of entities in the batch
+    count: u32,
+    item_index: usize,
+}
+
+impl<T: BatchMeta<T>, D: GpuArrayBufferable> Default for BatchState<T, D> {
+    fn default() -> Self {
+        Self {
+            meta: Default::default(),
+            gpu_array_buffer_index: Default::default(),
+            count: Default::default(),
+            item_index: Default::default(),
+        }
+    }
+}
+
+fn update_batch_data<I: PhaseItem, T: BatchMeta<T>, D: GpuArrayBufferable>(
+    item: &mut I,
+    batch: &BatchState<T, D>,
+) {
+    let BatchState {
+        count,
+        gpu_array_buffer_index,
+        ..
+    } = batch;
+    let index = gpu_array_buffer_index.index.map_or(0, |index| index.get());
+    *item.batch_range_mut() = index..(index + *count);
+    *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset;
+}
+
+pub trait BatchMeta<T: BatchMeta<T>> {
+    fn matches(&self, other: &T) -> bool;
+}
+
+pub fn process_phase<
+    I: CachedRenderPipelinePhaseItem,
+    T: BatchMeta<T>,       // Batch metadata used for distinguishing batches
+    D: GpuArrayBufferable, // Per-instance data
+>(
+    phase: &mut RenderPhase<I>,
+    mut get_batch_meta: impl FnMut(&mut I) -> Option<(T, GpuArrayBufferIndex<D>)>,
+) {
+    let mut batch = BatchState::<T, D>::default();
+    for i in 0..phase.items.len() {
+        let item = &mut phase.items[i];
+        let Some((batch_meta, gpu_array_buffer_index)) = get_batch_meta(item) else {
+            // It is necessary to start a new batch if an entity not matching the query is
+            // encountered. This can be achieved by resetting the batch meta.
+            batch.meta = None;
+            continue;
+        };
+        if !batch
+            .meta
+            .as_ref()
+            .map_or(false, |meta| meta.matches(&batch_meta))
+        {
+            if batch.count > 0 {
+                update_batch_data(&mut phase.items[batch.item_index], &batch);
+            }
+
+            batch.meta = Some(batch_meta);
+            batch.gpu_array_buffer_index = gpu_array_buffer_index;
+            batch.count = 0;
+            batch.item_index = i;
+        }
+        batch.count += 1;
+    }
+    if !phase.items.is_empty() && batch.count > 0 {
+        update_batch_data(&mut phase.items[batch.item_index], &batch);
+    }
+}
diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs
index 2f36d3a1cf8a6..cae881bb3c527 100644
--- a/crates/bevy_render/src/lib.rs
+++ b/crates/bevy_render/src/lib.rs
@@ -5,6 +5,7 @@ compile_error!("bevy_render cannot compile for a 16-bit platform.");
 
 extern crate core;
 
+pub mod batching;
 pub mod camera;
 pub mod color;
 pub mod extract_component;
diff --git a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
index 8a850ff9bd001..f6da91bd12ea9 100644
--- a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
+++ b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
@@ -77,7 +77,7 @@ impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> {
 
     pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> {
         let result = GpuArrayBufferIndex {
-            index: self.temp.0.len() as u32,
+            index: NonMaxU32::new(self.temp.0.len() as u32),
             dynamic_offset: NonMaxU32::new(self.current_offset),
             element_type: PhantomData,
         };
diff --git a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
index 92fbab4fb1216..6d9c87888a2f1 100644
--- a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
+++ b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
@@ -53,7 +53,7 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
         match self {
             GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
             GpuArrayBuffer::Storage((_, buffer)) => {
-                let index = buffer.len() as u32;
+                let index = NonMaxU32::new(buffer.len() as u32);
                 buffer.push(value);
                 GpuArrayBufferIndex {
                     index,
@@ -122,7 +122,7 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
 #[derive(Component, Clone)]
 pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
     /// The index to use in a shader into the array.
-    pub index: u32,
+    pub index: Option<NonMaxU32>,
     /// The dynamic offset to use when setting the bind group in a pass.
     /// Only used on platforms that don't support storage buffers.
     pub dynamic_offset: Option<NonMaxU32>,
@@ -132,7 +132,7 @@ pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
 impl<T: GpuArrayBufferable> Default for GpuArrayBufferIndex<T> {
     fn default() -> Self {
         Self {
-            index: u32::MAX,
+            index: None,
             dynamic_offset: None,
             element_type: Default::default(),
         }
diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs
index 53e95954106e7..fa69b91b41970 100644
--- a/crates/bevy_sprite/src/mesh2d/material.rs
+++ b/crates/bevy_sprite/src/mesh2d/material.rs
@@ -454,7 +454,7 @@ pub fn queue_material2d_meshes<M: Material2d>(
     }
 }
 
-#[derive(Component, Default, PartialEq, Eq, Deref, DerefMut)]
+#[derive(Component, Clone, Default, PartialEq, Eq, Deref, DerefMut)]
 pub struct Material2dBindGroupId(Option<BindGroupId>);
 
 /// Data prepared for a [`Material2d`] instance.
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 377fb1ea59798..6d22a04826acf 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -1,5 +1,5 @@
 use bevy_app::Plugin;
-use bevy_asset::{load_internal_asset, Handle};
+use bevy_asset::{load_internal_asset, AssetId, Handle};
 
 use bevy_core_pipeline::core_2d::Transparent2d;
 use bevy_ecs::{
@@ -10,6 +10,7 @@ use bevy_ecs::{
 use bevy_math::{Affine3, Affine3A, Vec2, Vec3Swizzles, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
+    batching::{process_phase, BatchMeta},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
@@ -97,7 +98,7 @@ impl Plugin for Mesh2dRenderPlugin {
                 .add_systems(
                     Render,
                     (
-                        prepare_mesh2d_uniforms.in_set(RenderSet::PrepareResources),
+                        prepare_and_batch_meshes2d.in_set(RenderSet::PrepareResources),
                         prepare_mesh2d_bind_group.in_set(RenderSet::PrepareBindGroups),
                         prepare_mesh2d_view_bind_groups.in_set(RenderSet::PrepareBindGroups),
                     ),
@@ -241,7 +242,7 @@ pub fn extract_mesh2d(
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
 #[derive(PartialEq, Eq)]
-struct BatchMeta<'mat, 'mesh> {
+struct BatchMeta2d {
     /// The pipeline id encompasses all pipeline configuration including vertex
     /// buffers and layouts, shaders and their specializations, bind group
     /// layouts, etc.
@@ -249,105 +250,32 @@ struct BatchMeta<'mat, 'mesh> {
     /// The draw function id defines the RenderCommands that are called to
     /// set the pipeline and bindings, and make the draw command
     draw_function_id: DrawFunctionId,
-    /// The material binding meta includes the material bind group id and
-    /// dynamic offsets.
-    material2d_bind_group: Option<&'mat Material2dBindGroupId>,
-    mesh_handle: &'mesh Mesh2dHandle,
+    material2d_bind_group_id: Option<Material2dBindGroupId>,
+    mesh_asset_id: AssetId<Mesh>,
     dynamic_offset: Option<NonMaxU32>,
 }
 
-impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> {
-    fn matches(&self, other: &BatchMeta<'mat, 'mesh>) -> bool {
+impl BatchMeta<BatchMeta2d> for BatchMeta2d {
+    fn matches(&self, other: &BatchMeta2d) -> bool {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
-            && self.mesh_handle == other.mesh_handle
+            && self.mesh_asset_id == other.mesh_asset_id
             && self.dynamic_offset == other.dynamic_offset
-            && self.material2d_bind_group == other.material2d_bind_group
+            && self.material2d_bind_group_id == other.material2d_bind_group_id
     }
 }
 
-#[derive(Default)]
-struct BatchState<'mat, 'mesh> {
-    meta: Option<BatchMeta<'mat, 'mesh>>,
-    /// The base index in the object data binding's array
-    gpu_array_buffer_index: GpuArrayBufferIndex<Mesh2dUniform>,
-    /// The number of entities in the batch
-    count: u32,
-    item_index: usize,
-}
-
-fn update_batch_data<I: PhaseItem>(item: &mut I, batch: &BatchState) {
-    let BatchState {
-        count,
-        gpu_array_buffer_index,
-        ..
-    } = batch;
-    *item.batch_range_mut() = gpu_array_buffer_index.index..(gpu_array_buffer_index.index + *count);
-    *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset;
-}
-
-fn process_phase<I: CachedRenderPipelinePhaseItem>(
-    object_data_buffer: &mut GpuArrayBuffer<Mesh2dUniform>,
-    object_query: &ObjectQuery,
-    phase: &mut RenderPhase<I>,
-) {
-    let mut batch = BatchState::default();
-    for i in 0..phase.items.len() {
-        let item = &mut phase.items[i];
-        let Ok((material2d_bind_group, mesh_handle, mesh_transforms)) =
-            object_query.get(item.entity())
-        else {
-            // It is necessary to start a new batch if an entity not matching the query is
-            // encountered. This can be achieved by resetting the batch meta.
-            batch.meta = None;
-            continue;
-        };
-        let gpu_array_buffer_index = object_data_buffer.push(Mesh2dUniform::from(mesh_transforms));
-        let batch_meta = BatchMeta {
-            pipeline_id: item.cached_pipeline(),
-            draw_function_id: item.draw_function(),
-            material2d_bind_group,
-            mesh_handle,
-            dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-        };
-        if !batch
-            .meta
-            .as_ref()
-            .map_or(false, |meta| meta.matches(&batch_meta))
-        {
-            if batch.count > 0 {
-                update_batch_data(&mut phase.items[batch.item_index], &batch);
-            }
-
-            batch.meta = Some(batch_meta);
-            batch.gpu_array_buffer_index = gpu_array_buffer_index;
-            batch.count = 0;
-            batch.item_index = i;
-        }
-        batch.count += 1;
-    }
-    if !phase.items.is_empty() && batch.count > 0 {
-        update_batch_data(&mut phase.items[batch.item_index], &batch);
-    }
-}
-
-type ObjectQuery<'w, 's, 'mat, 'mesh, 'data> = Query<
-    'w,
-    's,
-    (
-        Option<&'mat Material2dBindGroupId>,
-        &'mesh Mesh2dHandle,
-        &'data Mesh2dTransforms,
-    ),
->;
-
 #[allow(clippy::too_many_arguments)]
-pub fn prepare_mesh2d_uniforms(
+pub fn prepare_and_batch_meshes2d(
     render_device: Res<RenderDevice>,
     render_queue: Res<RenderQueue>,
     gpu_array_buffer: ResMut<GpuArrayBuffer<Mesh2dUniform>>,
     mut views: Query<&mut RenderPhase<Transparent2d>>,
-    meshes: ObjectQuery,
+    meshes: Query<(
+        Option<&Material2dBindGroupId>,
+        &Mesh2dHandle,
+        &Mesh2dTransforms,
+    )>,
 ) {
     if meshes.is_empty() {
         return;
@@ -358,7 +286,24 @@ pub fn prepare_mesh2d_uniforms(
     gpu_array_buffer.clear();
 
     for transparent_phase in &mut views {
-        process_phase(gpu_array_buffer, &meshes, transparent_phase.into_inner());
+        process_phase(transparent_phase.into_inner(), |item| {
+            let Ok((material2d_bind_group_id, mesh_handle, mesh_transforms)) =
+                meshes.get(item.entity())
+            else {
+                return None;
+            };
+            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+            Some((
+                BatchMeta2d {
+                    pipeline_id: item.cached_pipeline(),
+                    draw_function_id: item.draw_function(),
+                    material2d_bind_group_id: material2d_bind_group_id.cloned(),
+                    mesh_asset_id: mesh_handle.0.id(),
+                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+                },
+                gpu_array_buffer_index,
+            ))
+        });
     }
 
     gpu_array_buffer.write_buffer(&render_device, &render_queue);

From 756b7edff545b2c4bfcedd7c78784f3e6808fd7d Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Sun, 10 Sep 2023 23:49:06 +0200
Subject: [PATCH 08/33] Refactor closures in prepare_and_batch_meshes

---
 crates/bevy_pbr/src/render/mesh.rs | 126 ++++++-----------------------
 1 file changed, 24 insertions(+), 102 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 157d16553f4b2..c3661f016ed58 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -439,6 +439,24 @@ pub fn prepare_and_batch_meshes(
 
     gpu_array_buffer.clear();
 
+    let mut get_batch_meta = |entity, pipeline_id, draw_function_id| {
+        let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) = meshes.get(entity) else {
+            return None;
+        };
+        let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
+        Some((
+            BatchMeta3d {
+                pipeline_id,
+                draw_function_id,
+                material_bind_group_id: material_bind_group_id.cloned(),
+                mesh_asset_id: mesh_handle.id(),
+                mesh_flags: mesh_transforms.flags,
+                dynamic_offset: gpu_array_buffer_index.dynamic_offset,
+            },
+            gpu_array_buffer_index,
+        ))
+    };
+
     for (
         opaque_prepass_phase,
         alpha_mask_prepass_phase,
@@ -449,124 +467,28 @@ pub fn prepare_and_batch_meshes(
     {
         if let Some(opaque_prepass_phase) = opaque_prepass_phase {
             process_phase(opaque_prepass_phase.into_inner(), |item| {
-                let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
-                    meshes.get(item.entity())
-                else {
-                    return None;
-                };
-                let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-                Some((
-                    BatchMeta3d {
-                        pipeline_id: item.cached_pipeline(),
-                        draw_function_id: item.draw_function(),
-                        material_bind_group_id: material_bind_group_id.cloned(),
-                        mesh_asset_id: mesh_handle.id(),
-                        mesh_flags: mesh_transforms.flags,
-                        dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-                    },
-                    gpu_array_buffer_index,
-                ))
+                get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
             });
         }
         if let Some(alpha_mask_prepass_phase) = alpha_mask_prepass_phase {
             process_phase(alpha_mask_prepass_phase.into_inner(), |item| {
-                let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
-                    meshes.get(item.entity())
-                else {
-                    return None;
-                };
-                let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-                Some((
-                    BatchMeta3d {
-                        pipeline_id: item.cached_pipeline(),
-                        draw_function_id: item.draw_function(),
-                        material_bind_group_id: material_bind_group_id.cloned(),
-                        mesh_asset_id: mesh_handle.id(),
-                        mesh_flags: mesh_transforms.flags,
-                        dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-                    },
-                    gpu_array_buffer_index,
-                ))
+                get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
             });
         }
         process_phase(opaque_phase.into_inner(), |item| {
-            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
-                meshes.get(item.entity())
-            else {
-                return None;
-            };
-            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-            Some((
-                BatchMeta3d {
-                    pipeline_id: item.cached_pipeline(),
-                    draw_function_id: item.draw_function(),
-                    material_bind_group_id: material_bind_group_id.cloned(),
-                    mesh_asset_id: mesh_handle.id(),
-                    mesh_flags: mesh_transforms.flags,
-                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-                },
-                gpu_array_buffer_index,
-            ))
+            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
         process_phase(alpha_mask_phase.into_inner(), |item| {
-            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
-                meshes.get(item.entity())
-            else {
-                return None;
-            };
-            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-            Some((
-                BatchMeta3d {
-                    pipeline_id: item.cached_pipeline(),
-                    draw_function_id: item.draw_function(),
-                    material_bind_group_id: material_bind_group_id.cloned(),
-                    mesh_asset_id: mesh_handle.id(),
-                    mesh_flags: mesh_transforms.flags,
-                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-                },
-                gpu_array_buffer_index,
-            ))
+            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
         process_phase(transparent_phase.into_inner(), |item| {
-            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
-                meshes.get(item.entity())
-            else {
-                return None;
-            };
-            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-            Some((
-                BatchMeta3d {
-                    pipeline_id: item.cached_pipeline(),
-                    draw_function_id: item.draw_function(),
-                    material_bind_group_id: material_bind_group_id.cloned(),
-                    mesh_asset_id: mesh_handle.id(),
-                    mesh_flags: mesh_transforms.flags,
-                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-                },
-                gpu_array_buffer_index,
-            ))
+            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
     }
 
     for shadow_phase in &mut shadow_views {
         process_phase(shadow_phase.into_inner(), |item| {
-            let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) =
-                meshes.get(item.entity())
-            else {
-                return None;
-            };
-            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-            Some((
-                BatchMeta3d {
-                    pipeline_id: item.cached_pipeline(),
-                    draw_function_id: item.draw_function(),
-                    material_bind_group_id: material_bind_group_id.cloned(),
-                    mesh_asset_id: mesh_handle.id(),
-                    mesh_flags: mesh_transforms.flags,
-                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-                },
-                gpu_array_buffer_index,
-            ))
+            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
     }
 

From 861808e62f91f3ce14351ee512d98158f9eea135 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Sun, 10 Sep 2023 23:56:51 +0200
Subject: [PATCH 09/33] Rename process_phase -> batch_render_phase now it is
 API

---
 crates/bevy_pbr/src/render/mesh.rs     | 14 +++++++-------
 crates/bevy_render/src/batching/mod.rs |  8 +++++---
 crates/bevy_sprite/src/mesh2d/mesh.rs  |  4 ++--
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index c3661f016ed58..c43e896e3198a 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -21,7 +21,7 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3, Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4};
 use bevy_render::{
-    batching::{process_phase, BatchMeta},
+    batching::{batch_render_phase, BatchMeta},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{
         skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
@@ -466,28 +466,28 @@ pub fn prepare_and_batch_meshes(
     ) in &mut views
     {
         if let Some(opaque_prepass_phase) = opaque_prepass_phase {
-            process_phase(opaque_prepass_phase.into_inner(), |item| {
+            batch_render_phase(opaque_prepass_phase.into_inner(), |item| {
                 get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
             });
         }
         if let Some(alpha_mask_prepass_phase) = alpha_mask_prepass_phase {
-            process_phase(alpha_mask_prepass_phase.into_inner(), |item| {
+            batch_render_phase(alpha_mask_prepass_phase.into_inner(), |item| {
                 get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
             });
         }
-        process_phase(opaque_phase.into_inner(), |item| {
+        batch_render_phase(opaque_phase.into_inner(), |item| {
             get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
-        process_phase(alpha_mask_phase.into_inner(), |item| {
+        batch_render_phase(alpha_mask_phase.into_inner(), |item| {
             get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
-        process_phase(transparent_phase.into_inner(), |item| {
+        batch_render_phase(transparent_phase.into_inner(), |item| {
             get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
     }
 
     for shadow_phase in &mut shadow_views {
-        process_phase(shadow_phase.into_inner(), |item| {
+        batch_render_phase(shadow_phase.into_inner(), |item| {
             get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
         });
     }
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 9ad937252c684..d3ba9cb0e3d24 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -41,17 +41,19 @@ pub trait BatchMeta<T: BatchMeta<T>> {
     fn matches(&self, other: &T) -> bool;
 }
 
-pub fn process_phase<
+/// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
+/// and trying to combine the draws into a batch.
+pub fn batch_render_phase<
     I: CachedRenderPipelinePhaseItem,
     T: BatchMeta<T>,       // Batch metadata used for distinguishing batches
     D: GpuArrayBufferable, // Per-instance data
 >(
     phase: &mut RenderPhase<I>,
-    mut get_batch_meta: impl FnMut(&mut I) -> Option<(T, GpuArrayBufferIndex<D>)>,
+    mut get_batch_meta: impl FnMut(&I) -> Option<(T, GpuArrayBufferIndex<D>)>,
 ) {
     let mut batch = BatchState::<T, D>::default();
     for i in 0..phase.items.len() {
-        let item = &mut phase.items[i];
+        let item = &phase.items[i];
         let Some((batch_meta, gpu_array_buffer_index)) = get_batch_meta(item) else {
             // It is necessary to start a new batch if an entity not matching the query is
             // encountered. This can be achieved by resetting the batch meta.
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 6d22a04826acf..096372051aeca 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -10,7 +10,7 @@ use bevy_ecs::{
 use bevy_math::{Affine3, Affine3A, Vec2, Vec3Swizzles, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
-    batching::{process_phase, BatchMeta},
+    batching::{batch_render_phase, BatchMeta},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
@@ -286,7 +286,7 @@ pub fn prepare_and_batch_meshes2d(
     gpu_array_buffer.clear();
 
     for transparent_phase in &mut views {
-        process_phase(transparent_phase.into_inner(), |item| {
+        batch_render_phase(transparent_phase.into_inner(), |item| {
             let Ok((material2d_bind_group_id, mesh_handle, mesh_transforms)) =
                 meshes.get(item.entity())
             else {

From c6a52f8ed8681760224265d22d6a8f66c6357e38 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Mon, 11 Sep 2023 00:13:04 +0200
Subject: [PATCH 10/33] Remove batching dependency on GpuArrayBuffer

---
 crates/bevy_pbr/src/render/mesh.rs     |  3 +-
 crates/bevy_render/src/batching/mod.rs | 42 +++++++++++++-------------
 crates/bevy_sprite/src/mesh2d/mesh.rs  |  3 +-
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index c43e896e3198a..7533eea67e524 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -453,7 +453,8 @@ pub fn prepare_and_batch_meshes(
                 mesh_flags: mesh_transforms.flags,
                 dynamic_offset: gpu_array_buffer_index.dynamic_offset,
             },
-            gpu_array_buffer_index,
+            gpu_array_buffer_index.index,
+            gpu_array_buffer_index.dynamic_offset,
         ))
     };
 
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index d3ba9cb0e3d24..67ad14af19465 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -1,40 +1,40 @@
-use crate::{
-    render_phase::{CachedRenderPipelinePhaseItem, PhaseItem, RenderPhase},
-    render_resource::{GpuArrayBufferIndex, GpuArrayBufferable},
-};
+use nonmax::NonMaxU32;
 
-struct BatchState<T: BatchMeta<T>, D: GpuArrayBufferable> {
+use crate::render_phase::{CachedRenderPipelinePhaseItem, PhaseItem, RenderPhase};
+
+struct BatchState<T: BatchMeta<T>> {
     meta: Option<T>,
     /// The base index in the object data binding's array
-    gpu_array_buffer_index: GpuArrayBufferIndex<D>,
+    index: Option<NonMaxU32>,
+    /// The dynamic offset of the data binding
+    dynamic_offset: Option<NonMaxU32>,
     /// The number of entities in the batch
     count: u32,
     item_index: usize,
 }
 
-impl<T: BatchMeta<T>, D: GpuArrayBufferable> Default for BatchState<T, D> {
+impl<T: BatchMeta<T>> Default for BatchState<T> {
     fn default() -> Self {
         Self {
             meta: Default::default(),
-            gpu_array_buffer_index: Default::default(),
+            index: Default::default(),
+            dynamic_offset: Default::default(),
             count: Default::default(),
             item_index: Default::default(),
         }
     }
 }
 
-fn update_batch_data<I: PhaseItem, T: BatchMeta<T>, D: GpuArrayBufferable>(
-    item: &mut I,
-    batch: &BatchState<T, D>,
-) {
+fn update_batch_data<I: PhaseItem, T: BatchMeta<T>>(item: &mut I, batch: &BatchState<T>) {
     let BatchState {
         count,
-        gpu_array_buffer_index,
+        index,
+        dynamic_offset,
         ..
     } = batch;
-    let index = gpu_array_buffer_index.index.map_or(0, |index| index.get());
+    let index = index.map_or(0, |index| index.get());
     *item.batch_range_mut() = index..(index + *count);
-    *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset;
+    *item.dynamic_offset_mut() = *dynamic_offset;
 }
 
 pub trait BatchMeta<T: BatchMeta<T>> {
@@ -45,16 +45,15 @@ pub trait BatchMeta<T: BatchMeta<T>> {
 /// and trying to combine the draws into a batch.
 pub fn batch_render_phase<
     I: CachedRenderPipelinePhaseItem,
-    T: BatchMeta<T>,       // Batch metadata used for distinguishing batches
-    D: GpuArrayBufferable, // Per-instance data
+    T: BatchMeta<T>, // Batch metadata used for distinguishing batches
 >(
     phase: &mut RenderPhase<I>,
-    mut get_batch_meta: impl FnMut(&I) -> Option<(T, GpuArrayBufferIndex<D>)>,
+    mut get_batch_meta: impl FnMut(&I) -> Option<(T, Option<NonMaxU32>, Option<NonMaxU32>)>,
 ) {
-    let mut batch = BatchState::<T, D>::default();
+    let mut batch = BatchState::<T>::default();
     for i in 0..phase.items.len() {
         let item = &phase.items[i];
-        let Some((batch_meta, gpu_array_buffer_index)) = get_batch_meta(item) else {
+        let Some((batch_meta, index, dynamic_offset)) = get_batch_meta(item) else {
             // It is necessary to start a new batch if an entity not matching the query is
             // encountered. This can be achieved by resetting the batch meta.
             batch.meta = None;
@@ -70,7 +69,8 @@ pub fn batch_render_phase<
             }
 
             batch.meta = Some(batch_meta);
-            batch.gpu_array_buffer_index = gpu_array_buffer_index;
+            batch.index = index;
+            batch.dynamic_offset = dynamic_offset;
             batch.count = 0;
             batch.item_index = i;
         }
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 096372051aeca..85f0453cc1bfe 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -301,7 +301,8 @@ pub fn prepare_and_batch_meshes2d(
                     mesh_asset_id: mesh_handle.0.id(),
                     dynamic_offset: gpu_array_buffer_index.dynamic_offset,
                 },
-                gpu_array_buffer_index,
+                gpu_array_buffer_index.index,
+                gpu_array_buffer_index.dynamic_offset,
             ))
         });
     }

From 739e9ad8c08477523954b7012bfd5f05e7bdca5e Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Mon, 11 Sep 2023 00:17:15 +0200
Subject: [PATCH 11/33] Add ability to disable automatic batching per-entity

---
 crates/bevy_pbr/src/render/mesh.rs     |  7 +++++--
 crates/bevy_render/src/batching/mod.rs | 13 +++++++++----
 crates/bevy_sprite/src/mesh2d/mesh.rs  | 15 +++++++++------
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 7533eea67e524..0c7cb2f356f08 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -21,7 +21,7 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3, Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4};
 use bevy_render::{
-    batching::{batch_render_phase, BatchMeta},
+    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{
         skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
@@ -433,7 +433,10 @@ pub fn prepare_and_batch_meshes(
         &mut RenderPhase<Transparent3d>,
     )>,
     mut shadow_views: Query<&mut RenderPhase<Shadow>>,
-    meshes: Query<(Option<&MaterialBindGroupId>, &Handle<Mesh>, &MeshTransforms)>,
+    meshes: Query<
+        (Option<&MaterialBindGroupId>, &Handle<Mesh>, &MeshTransforms),
+        Without<NoAutomaticBatching>,
+    >,
 ) {
     let gpu_array_buffer = gpu_array_buffer.into_inner();
 
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 67ad14af19465..f2e09241cfc6d 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -1,7 +1,16 @@
+use bevy_ecs::component::Component;
 use nonmax::NonMaxU32;
 
 use crate::render_phase::{CachedRenderPipelinePhaseItem, PhaseItem, RenderPhase};
 
+/// Add this component to mesh entities to disable automatic batching
+#[derive(Component)]
+pub struct NoAutomaticBatching;
+
+pub trait BatchMeta<T: BatchMeta<T>> {
+    fn matches(&self, other: &T) -> bool;
+}
+
 struct BatchState<T: BatchMeta<T>> {
     meta: Option<T>,
     /// The base index in the object data binding's array
@@ -37,10 +46,6 @@ fn update_batch_data<I: PhaseItem, T: BatchMeta<T>>(item: &mut I, batch: &BatchS
     *item.dynamic_offset_mut() = *dynamic_offset;
 }
 
-pub trait BatchMeta<T: BatchMeta<T>> {
-    fn matches(&self, other: &T) -> bool;
-}
-
 /// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
 /// and trying to combine the draws into a batch.
 pub fn batch_render_phase<
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 85f0453cc1bfe..c1891d7e1374f 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -10,7 +10,7 @@ use bevy_ecs::{
 use bevy_math::{Affine3, Affine3A, Vec2, Vec3Swizzles, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
-    batching::{batch_render_phase, BatchMeta},
+    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
@@ -271,11 +271,14 @@ pub fn prepare_and_batch_meshes2d(
     render_queue: Res<RenderQueue>,
     gpu_array_buffer: ResMut<GpuArrayBuffer<Mesh2dUniform>>,
     mut views: Query<&mut RenderPhase<Transparent2d>>,
-    meshes: Query<(
-        Option<&Material2dBindGroupId>,
-        &Mesh2dHandle,
-        &Mesh2dTransforms,
-    )>,
+    meshes: Query<
+        (
+            Option<&Material2dBindGroupId>,
+            &Mesh2dHandle,
+            &Mesh2dTransforms,
+        ),
+        Without<NoAutomaticBatching>,
+    >,
 ) {
     if meshes.is_empty() {
         return;

From fd93970ced3ed548289b498ff64b8b7394b490bf Mon Sep 17 00:00:00 2001
From: robtfm <50659922+robtfm@users.noreply.github.com>
Date: Wed, 13 Sep 2023 02:51:27 +0100
Subject: [PATCH 12/33] batching

---
 crates/bevy_pbr/src/render/mesh.rs            |  8 +-
 crates/bevy_render/src/batching/mod.rs        | 91 +++++--------------
 .../render_resource/batched_uniform_buffer.rs |  2 +-
 .../src/render_resource/gpu_array_buffer.rs   | 14 +--
 crates/bevy_sprite/src/mesh2d/mesh.rs         | 12 +--
 5 files changed, 33 insertions(+), 94 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 0c7cb2f356f08..1e057eee77249 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -21,7 +21,7 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3, Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4};
 use bevy_render::{
-    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
+    batching::{batch_render_phase, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{
         skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
@@ -393,7 +393,6 @@ pub fn extract_skinned_meshes(
 ///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
-#[derive(PartialEq, Eq)]
 struct BatchMeta3d {
     /// The pipeline id encompasses all pipeline configuration including vertex
     /// buffers and layouts, shaders and their specializations, bind group
@@ -408,13 +407,14 @@ struct BatchMeta3d {
     dynamic_offset: Option<NonMaxU32>,
 }
 
-impl BatchMeta<BatchMeta3d> for BatchMeta3d {
+impl PartialEq for BatchMeta3d {
     #[inline]
-    fn matches(&self, other: &BatchMeta3d) -> bool {
+    fn eq(&self, other: &BatchMeta3d) -> bool {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
             && self.mesh_asset_id == other.mesh_asset_id
             && (self.mesh_flags & (MeshFlags::SKINNED | MeshFlags::MORPH_TARGETS).bits()) == 0
+            && (other.mesh_flags & (MeshFlags::SKINNED | MeshFlags::MORPH_TARGETS).bits()) == 0
             && self.dynamic_offset == other.dynamic_offset
             && self.material_bind_group_id == other.material_bind_group_id
     }
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index f2e09241cfc6d..beebfbf7ec5a3 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -1,87 +1,46 @@
 use bevy_ecs::component::Component;
 use nonmax::NonMaxU32;
 
-use crate::render_phase::{CachedRenderPipelinePhaseItem, PhaseItem, RenderPhase};
+use crate::render_phase::{CachedRenderPipelinePhaseItem, RenderPhase};
 
 /// Add this component to mesh entities to disable automatic batching
 #[derive(Component)]
 pub struct NoAutomaticBatching;
 
-pub trait BatchMeta<T: BatchMeta<T>> {
-    fn matches(&self, other: &T) -> bool;
-}
-
-struct BatchState<T: BatchMeta<T>> {
-    meta: Option<T>,
-    /// The base index in the object data binding's array
-    index: Option<NonMaxU32>,
-    /// The dynamic offset of the data binding
-    dynamic_offset: Option<NonMaxU32>,
-    /// The number of entities in the batch
-    count: u32,
-    item_index: usize,
-}
-
-impl<T: BatchMeta<T>> Default for BatchState<T> {
-    fn default() -> Self {
-        Self {
-            meta: Default::default(),
-            index: Default::default(),
-            dynamic_offset: Default::default(),
-            count: Default::default(),
-            item_index: Default::default(),
-        }
-    }
-}
-
-fn update_batch_data<I: PhaseItem, T: BatchMeta<T>>(item: &mut I, batch: &BatchState<T>) {
-    let BatchState {
-        count,
-        index,
-        dynamic_offset,
-        ..
-    } = batch;
-    let index = index.map_or(0, |index| index.get());
-    *item.batch_range_mut() = index..(index + *count);
-    *item.dynamic_offset_mut() = *dynamic_offset;
-}
-
 /// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
 /// and trying to combine the draws into a batch.
 pub fn batch_render_phase<
     I: CachedRenderPipelinePhaseItem,
-    T: BatchMeta<T>, // Batch metadata used for distinguishing batches
+    T: PartialEq, // Batch metadata used for distinguishing batches
 >(
     phase: &mut RenderPhase<I>,
-    mut get_batch_meta: impl FnMut(&I) -> Option<(T, Option<NonMaxU32>, Option<NonMaxU32>)>,
+    mut get_batch_meta: impl FnMut(&I) -> Option<(T, NonMaxU32, Option<NonMaxU32>)>,
 ) {
-    let mut batch = BatchState::<T>::default();
-    for i in 0..phase.items.len() {
-        let item = &phase.items[i];
-        let Some((batch_meta, index, dynamic_offset)) = get_batch_meta(item) else {
-            // It is necessary to start a new batch if an entity not matching the query is
-            // encountered. This can be achieved by resetting the batch meta.
-            batch.meta = None;
+    // we iterate in reverse so that we can write to the last item of the current batch, and still skip the 
+    // right number of phase items when iterating forwards in the Render stage
+    let mut items = phase.items.iter_mut().rev().peekable();
+    let mut batch_start_index = None;
+    let mut next_batch = items.peek().and_then(|item| get_batch_meta(item));
+    while let Some(item) = items.next() {
+        // get current batch meta and update next batch meta
+        let Some((batch_meta, index, dynamic_offset)) = std::mem::replace(
+            &mut next_batch,
+            items.peek().and_then(|item| get_batch_meta(item)),
+        ) else {
+            // if the current phase item doesn't match the query, we don't modify it
             continue;
         };
-        if !batch
-            .meta
-            .as_ref()
-            .map_or(false, |meta| meta.matches(&batch_meta))
-        {
-            if batch.count > 0 {
-                update_batch_data(&mut phase.items[batch.item_index], &batch);
-            }
 
-            batch.meta = Some(batch_meta);
-            batch.index = index;
-            batch.dynamic_offset = dynamic_offset;
-            batch.count = 0;
-            batch.item_index = i;
+        // record the start index if we are beginning a new batch
+        if batch_start_index.is_none() {
+            batch_start_index = Some(index);
+        }
+
+        if Some(&batch_meta) != next_batch.as_ref().map(|(meta, ..)| meta) {
+            // next item doesn't match, update the phase item to render this batch
+            *item.batch_range_mut() = batch_start_index.take().unwrap().get()..index.get() + 1;
+            *item.dynamic_offset_mut() = dynamic_offset;
+            batch_start_index = None;
         }
-        batch.count += 1;
-    }
-    if !phase.items.is_empty() && batch.count > 0 {
-        update_batch_data(&mut phase.items[batch.item_index], &batch);
     }
 }
diff --git a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
index f6da91bd12ea9..983f241b44f63 100644
--- a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
+++ b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
@@ -77,7 +77,7 @@ impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> {
 
     pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> {
         let result = GpuArrayBufferIndex {
-            index: NonMaxU32::new(self.temp.0.len() as u32),
+            index: NonMaxU32::new(self.temp.0.len() as u32).unwrap(),
             dynamic_offset: NonMaxU32::new(self.current_offset),
             element_type: PhantomData,
         };
diff --git a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
index 6d9c87888a2f1..c004b9beeab84 100644
--- a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
+++ b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
@@ -53,7 +53,7 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
         match self {
             GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
             GpuArrayBuffer::Storage((_, buffer)) => {
-                let index = NonMaxU32::new(buffer.len() as u32);
+                let index = NonMaxU32::new(buffer.len() as u32).unwrap();
                 buffer.push(value);
                 GpuArrayBufferIndex {
                     index,
@@ -122,19 +122,9 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
 #[derive(Component, Clone)]
 pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
     /// The index to use in a shader into the array.
-    pub index: Option<NonMaxU32>,
+    pub index: NonMaxU32,
     /// The dynamic offset to use when setting the bind group in a pass.
     /// Only used on platforms that don't support storage buffers.
     pub dynamic_offset: Option<NonMaxU32>,
     pub element_type: PhantomData<T>,
 }
-
-impl<T: GpuArrayBufferable> Default for GpuArrayBufferIndex<T> {
-    fn default() -> Self {
-        Self {
-            index: None,
-            dynamic_offset: None,
-            element_type: Default::default(),
-        }
-    }
-}
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index c1891d7e1374f..916fbe4a54254 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -10,7 +10,7 @@ use bevy_ecs::{
 use bevy_math::{Affine3, Affine3A, Vec2, Vec3Swizzles, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
-    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
+    batching::{batch_render_phase, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
@@ -255,16 +255,6 @@ struct BatchMeta2d {
     dynamic_offset: Option<NonMaxU32>,
 }
 
-impl BatchMeta<BatchMeta2d> for BatchMeta2d {
-    fn matches(&self, other: &BatchMeta2d) -> bool {
-        self.pipeline_id == other.pipeline_id
-            && self.draw_function_id == other.draw_function_id
-            && self.mesh_asset_id == other.mesh_asset_id
-            && self.dynamic_offset == other.dynamic_offset
-            && self.material2d_bind_group_id == other.material2d_bind_group_id
-    }
-}
-
 #[allow(clippy::too_many_arguments)]
 pub fn prepare_and_batch_meshes2d(
     render_device: Res<RenderDevice>,

From 667c70addb322d0d38bd00f9883c29e8a65d4029 Mon Sep 17 00:00:00 2001
From: robtfm <50659922+robtfm@users.noreply.github.com>
Date: Wed, 13 Sep 2023 23:37:48 +0100
Subject: [PATCH 13/33] don't reverse, preserve render order within batch

---
 crates/bevy_render/src/batching/mod.rs | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index beebfbf7ec5a3..4bcfc8b55eb62 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -16,10 +16,9 @@ pub fn batch_render_phase<
     phase: &mut RenderPhase<I>,
     mut get_batch_meta: impl FnMut(&I) -> Option<(T, NonMaxU32, Option<NonMaxU32>)>,
 ) {
-    // we iterate in reverse so that we can write to the last item of the current batch, and still skip the 
-    // right number of phase items when iterating forwards in the Render stage
-    let mut items = phase.items.iter_mut().rev().peekable();
-    let mut batch_start_index = None;
+    let mut items = phase.items.iter_mut().peekable();
+    let mut batch_start_item = None;
+    let mut batch_start_index = 0;
     let mut next_batch = items.peek().and_then(|item| get_batch_meta(item));
     while let Some(item) = items.next() {
         // get current batch meta and update next batch meta
@@ -31,16 +30,18 @@ pub fn batch_render_phase<
             continue;
         };
 
-        // record the start index if we are beginning a new batch
-        if batch_start_index.is_none() {
-            batch_start_index = Some(index);
+        // if we are beginning a new batch record the start item and index 
+        if batch_start_item.is_none() {
+            batch_start_item = Some(item);
+            batch_start_index = index.get();
         }
 
         if Some(&batch_meta) != next_batch.as_ref().map(|(meta, ..)| meta) {
-            // next item doesn't match, update the phase item to render this batch
-            *item.batch_range_mut() = batch_start_index.take().unwrap().get()..index.get() + 1;
-            *item.dynamic_offset_mut() = dynamic_offset;
-            batch_start_index = None;
+            // next item doesn't match the current batch (or doesn't exist), 
+            // update the phase item to render this batch
+            let batch_start_item = batch_start_item.take().unwrap();
+            *batch_start_item.batch_range_mut() = batch_start_index..index.get() + 1;
+            *batch_start_item.dynamic_offset_mut() = dynamic_offset;
         }
     }
 }

From 89dcd4ba44ea06879fc7f49511d34002ff5bd682 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Fri, 15 Sep 2023 01:16:03 +0200
Subject: [PATCH 14/33] Update examples/2d/mesh2d_manual.rs

Co-authored-by: robtfm <50659922+robtfm@users.noreply.github.com>
---
 examples/2d/mesh2d_manual.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/2d/mesh2d_manual.rs b/examples/2d/mesh2d_manual.rs
index d1f31ddb83dc6..44ada047dffce 100644
--- a/examples/2d/mesh2d_manual.rs
+++ b/examples/2d/mesh2d_manual.rs
@@ -148,6 +148,9 @@ impl SpecializedRenderPipeline for ColoredMesh2dPipeline {
             false => TextureFormat::bevy_default(),
         };
 
+        // meshes typically live in bind group 2. because we are using bindgroup 1
+        // we need to add MESH_BINDGROUP_1 shader def so that the bindings are correctly
+        // linked in the shader
         let shader_defs = vec!["MESH_BINDGROUP_1".into()];
 
         RenderPipelineDescriptor {

From 245285d8f1837bdbb9b9a08ac015f2e6ea7595a0 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Fri, 15 Sep 2023 01:28:19 +0200
Subject: [PATCH 15/33] Capitalisation in comments

---
 crates/bevy_render/src/batching/mod.rs | 10 +++++-----
 examples/2d/mesh2d_manual.rs           |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 4bcfc8b55eb62..b802ae1c07d7b 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -21,24 +21,24 @@ pub fn batch_render_phase<
     let mut batch_start_index = 0;
     let mut next_batch = items.peek().and_then(|item| get_batch_meta(item));
     while let Some(item) = items.next() {
-        // get current batch meta and update next batch meta
+        // Get the current batch meta and update the next batch meta
         let Some((batch_meta, index, dynamic_offset)) = std::mem::replace(
             &mut next_batch,
             items.peek().and_then(|item| get_batch_meta(item)),
         ) else {
-            // if the current phase item doesn't match the query, we don't modify it
+            // If the current phase item doesn't match the query, we don't modify it
             continue;
         };
 
-        // if we are beginning a new batch record the start item and index 
+        // If we are beginning a new batch, record the start item and index
         if batch_start_item.is_none() {
             batch_start_item = Some(item);
             batch_start_index = index.get();
         }
 
         if Some(&batch_meta) != next_batch.as_ref().map(|(meta, ..)| meta) {
-            // next item doesn't match the current batch (or doesn't exist), 
-            // update the phase item to render this batch
+            // The next item doesn't match the current batch (or doesn't exist).
+            // Update the phase item to render this batch.
             let batch_start_item = batch_start_item.take().unwrap();
             *batch_start_item.batch_range_mut() = batch_start_index..index.get() + 1;
             *batch_start_item.dynamic_offset_mut() = dynamic_offset;
diff --git a/examples/2d/mesh2d_manual.rs b/examples/2d/mesh2d_manual.rs
index 44ada047dffce..f1047a9fb88d4 100644
--- a/examples/2d/mesh2d_manual.rs
+++ b/examples/2d/mesh2d_manual.rs
@@ -148,9 +148,9 @@ impl SpecializedRenderPipeline for ColoredMesh2dPipeline {
             false => TextureFormat::bevy_default(),
         };
 
-        // meshes typically live in bind group 2. because we are using bindgroup 1
-        // we need to add MESH_BINDGROUP_1 shader def so that the bindings are correctly
-        // linked in the shader
+        // Meshes typically live in bind group 2. Because we are using bind group 1
+        // we need to add the MESH_BINDGROUP_1 shader def so that the bindings are correctly
+        // linked in the shader.
         let shader_defs = vec!["MESH_BINDGROUP_1".into()];
 
         RenderPipelineDescriptor {

From d88f2d669808f88a004a54d7fc6fc7392816a633 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Fri, 15 Sep 2023 01:48:32 +0200
Subject: [PATCH 16/33] Consolidate conversion of Affine3 to GPU buffers

---
 crates/bevy_math/src/affine3.rs       | 32 ++++++++++++++++-
 crates/bevy_pbr/src/render/mesh.rs    | 49 ++++-----------------------
 crates/bevy_sprite/src/mesh2d/mesh.rs | 36 ++++----------------
 3 files changed, 44 insertions(+), 73 deletions(-)

diff --git a/crates/bevy_math/src/affine3.rs b/crates/bevy_math/src/affine3.rs
index 51598e4beae12..a03f12dd57e65 100644
--- a/crates/bevy_math/src/affine3.rs
+++ b/crates/bevy_math/src/affine3.rs
@@ -1,4 +1,4 @@
-use glam::{Affine3A, Mat3, Vec3};
+use glam::{Affine3A, Mat3, Vec3, Vec3Swizzles, Vec4};
 
 /// Reduced-size version of `glam::Affine3A` for use when storage has
 /// significant performance impact. Convert to `glam::Affine3A` to do
@@ -10,6 +10,36 @@ pub struct Affine3 {
     pub translation: Vec3,
 }
 
+impl Affine3 {
+    /// Calculates the transpose of the affine 4x3 matrix to a 3x4 and formats it for packing into GPU buffers
+    #[inline]
+    pub fn to_transpose(&self) -> [Vec4; 3] {
+        let transpose_3x3 = self.matrix3.transpose();
+        [
+            transpose_3x3.x_axis.extend(self.translation.x),
+            transpose_3x3.y_axis.extend(self.translation.y),
+            transpose_3x3.z_axis.extend(self.translation.z),
+        ]
+    }
+
+    /// Calculates the inverse transpose of the 3x3 matrix and formats it for packing into GPU buffers
+    #[inline]
+    pub fn inverse_transpose_3x3(&self) -> ([Vec4; 2], f32) {
+        let inverse_transpose_3x3 = Affine3A::from(self).inverse().matrix3.transpose();
+        (
+            [
+                (inverse_transpose_3x3.x_axis, inverse_transpose_3x3.y_axis.x).into(),
+                (
+                    inverse_transpose_3x3.y_axis.yz(),
+                    inverse_transpose_3x3.z_axis.xy(),
+                )
+                    .into(),
+            ],
+            inverse_transpose_3x3.z_axis.z,
+        )
+    }
+}
+
 impl From<&Affine3A> for Affine3 {
     fn from(affine: &Affine3A) -> Self {
         Self {
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 1e057eee77249..626f2fb88420a 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -19,7 +19,7 @@ use bevy_ecs::{
     query::ROQueryItem,
     system::{lifetimeless::*, SystemParamItem, SystemState},
 };
-use bevy_math::{Affine3, Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4};
+use bevy_math::{Affine3, Mat4, Vec2, Vec4};
 use bevy_render::{
     batching::{batch_render_phase, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
@@ -188,48 +188,13 @@ pub struct MeshUniform {
 
 impl From<&MeshTransforms> for MeshUniform {
     fn from(mesh_transforms: &MeshTransforms) -> Self {
-        let transpose_model_3x3 = mesh_transforms.transform.matrix3.transpose();
-        let transpose_previous_model_3x3 = mesh_transforms.previous_transform.matrix3.transpose();
-        let inverse_transpose_model_3x3 = Affine3A::from(&mesh_transforms.transform)
-            .inverse()
-            .matrix3
-            .transpose();
+        let (inverse_transpose_model_a, inverse_transpose_model_b) =
+            mesh_transforms.transform.inverse_transpose_3x3();
         Self {
-            transform: [
-                transpose_model_3x3
-                    .x_axis
-                    .extend(mesh_transforms.transform.translation.x),
-                transpose_model_3x3
-                    .y_axis
-                    .extend(mesh_transforms.transform.translation.y),
-                transpose_model_3x3
-                    .z_axis
-                    .extend(mesh_transforms.transform.translation.z),
-            ],
-            previous_transform: [
-                transpose_previous_model_3x3
-                    .x_axis
-                    .extend(mesh_transforms.previous_transform.translation.x),
-                transpose_previous_model_3x3
-                    .y_axis
-                    .extend(mesh_transforms.previous_transform.translation.y),
-                transpose_previous_model_3x3
-                    .z_axis
-                    .extend(mesh_transforms.previous_transform.translation.z),
-            ],
-            inverse_transpose_model_a: [
-                (
-                    inverse_transpose_model_3x3.x_axis,
-                    inverse_transpose_model_3x3.y_axis.x,
-                )
-                    .into(),
-                (
-                    inverse_transpose_model_3x3.y_axis.yz(),
-                    inverse_transpose_model_3x3.z_axis.xy(),
-                )
-                    .into(),
-            ],
-            inverse_transpose_model_b: inverse_transpose_model_3x3.z_axis.z,
+            transform: mesh_transforms.transform.to_transpose(),
+            previous_transform: mesh_transforms.previous_transform.to_transpose(),
+            inverse_transpose_model_a,
+            inverse_transpose_model_b,
             flags: mesh_transforms.flags,
         }
     }
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 916fbe4a54254..e7c16a9a86fb0 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -7,7 +7,7 @@ use bevy_ecs::{
     query::ROQueryItem,
     system::{lifetimeless::*, SystemParamItem, SystemState},
 };
-use bevy_math::{Affine3, Affine3A, Vec2, Vec3Swizzles, Vec4};
+use bevy_math::{Affine3, Vec2, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
     batching::{batch_render_phase, NoAutomaticBatching},
@@ -159,36 +159,12 @@ pub struct Mesh2dUniform {
 
 impl From<&Mesh2dTransforms> for Mesh2dUniform {
     fn from(mesh_transforms: &Mesh2dTransforms) -> Self {
-        let transpose_model_3x3 = mesh_transforms.transform.matrix3.transpose();
-        let inverse_transpose_model_3x3 = Affine3A::from(&mesh_transforms.transform)
-            .inverse()
-            .matrix3
-            .transpose();
+        let (inverse_transpose_model_a, inverse_transpose_model_b) =
+            mesh_transforms.transform.inverse_transpose_3x3();
         Self {
-            transform: [
-                transpose_model_3x3
-                    .x_axis
-                    .extend(mesh_transforms.transform.translation.x),
-                transpose_model_3x3
-                    .y_axis
-                    .extend(mesh_transforms.transform.translation.y),
-                transpose_model_3x3
-                    .z_axis
-                    .extend(mesh_transforms.transform.translation.z),
-            ],
-            inverse_transpose_model_a: [
-                (
-                    inverse_transpose_model_3x3.x_axis,
-                    inverse_transpose_model_3x3.y_axis.x,
-                )
-                    .into(),
-                (
-                    inverse_transpose_model_3x3.y_axis.yz(),
-                    inverse_transpose_model_3x3.z_axis.xy(),
-                )
-                    .into(),
-            ],
-            inverse_transpose_model_b: inverse_transpose_model_3x3.z_axis.z,
+            transform: mesh_transforms.transform.to_transpose(),
+            inverse_transpose_model_a,
+            inverse_transpose_model_b,
             flags: mesh_transforms.flags,
         }
     }

From 68969ab1b8535d11e17915ed4e61b086e09bce16 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Fri, 15 Sep 2023 02:15:37 +0200
Subject: [PATCH 17/33] Use NoAutomaticBatching to split batches for skinning
 and morph targets

---
 crates/bevy_pbr/src/material.rs            | 20 ++++++--------------
 crates/bevy_pbr/src/render/mesh.rs         | 15 +++++++--------
 crates/bevy_pbr/src/render/mesh_types.wgsl |  2 --
 crates/bevy_pbr/src/render/morph.rs        |  5 ++++-
 4 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index 29f107fa5be6f..d49e268cd9adb 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -1,7 +1,7 @@
 use crate::{
-    is_skinned, render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshFlags,
-    MeshPipeline, MeshPipelineKey, MeshTransforms, PrepassPipelinePlugin, PrepassPlugin,
-    ScreenSpaceAmbientOcclusionSettings, SetMeshBindGroup, SetMeshViewBindGroup, Shadow,
+    render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshPipeline, MeshPipelineKey,
+    MeshTransforms, PrepassPipelinePlugin, PrepassPlugin, ScreenSpaceAmbientOcclusionSettings,
+    SetMeshBindGroup, SetMeshViewBindGroup, Shadow,
 };
 use bevy_app::{App, Plugin};
 use bevy_asset::{Asset, AssetApp, AssetEvent, AssetId, AssetServer, Assets, Handle};
@@ -405,7 +405,7 @@ pub fn queue_material_meshes<M: Material>(
         &Handle<M>,
         &mut MaterialBindGroupId,
         &Handle<Mesh>,
-        &mut MeshTransforms,
+        &MeshTransforms,
     )>,
     images: Res<RenderAssets<Image>>,
     mut views: Query<(
@@ -490,12 +490,8 @@ pub fn queue_material_meshes<M: Material>(
 
         let rangefinder = view.rangefinder3d();
         for visible_entity in &visible_entities.entities {
-            if let Ok((
-                material_handle,
-                mut material_bind_group_id,
-                mesh_handle,
-                mut mesh_transforms,
-            )) = material_meshes.get_mut(*visible_entity)
+            if let Ok((material_handle, mut material_bind_group_id, mesh_handle, mesh_transforms)) =
+                material_meshes.get_mut(*visible_entity)
             {
                 if let (Some(mesh), Some(material)) = (
                     render_meshes.get(mesh_handle),
@@ -505,12 +501,8 @@ pub fn queue_material_meshes<M: Material>(
                         MeshPipelineKey::from_primitive_topology(mesh.primitive_topology)
                             | view_key;
 
-                    if is_skinned(&mesh.layout) {
-                        mesh_transforms.flags |= MeshFlags::SKINNED.bits();
-                    }
                     if mesh.morph_targets.is_some() {
                         mesh_key |= MeshPipelineKey::MORPH_TARGETS;
-                        mesh_transforms.flags |= MeshFlags::MORPH_TARGETS.bits();
                     }
                     match material.properties.alpha_mode {
                         AlphaMode::Blend => {
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 626f2fb88420a..3ab8a2016b609 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -205,8 +205,6 @@ bitflags::bitflags! {
     #[repr(transparent)]
     pub struct MeshFlags: u32 {
         const SHADOW_RECEIVER            = (1 << 0);
-        const SKINNED                    = (1 << 1);
-        const MORPH_TARGETS              = (1 << 2);
         // Indicates the sign of the determinant of the 3x3 model matrix. If the sign is positive,
         // then the flag should be set, else it should not be set.
         const SIGN_DETERMINANT_MODEL_3X3 = (1 << 31);
@@ -332,7 +330,12 @@ pub fn extract_skinned_meshes(
             SkinnedMeshJoints::build(skin, &inverse_bindposes, &joint_query, &mut uniform.buffer)
         {
             last_start = last_start.max(skinned_joints.index as usize);
-            values.push((entity, skinned_joints.to_buffer_index()));
+            // NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per
+            // entity and so cannot currently be batched.
+            values.push((
+                entity,
+                (skinned_joints.to_buffer_index(), NoAutomaticBatching),
+            ));
         }
     }
 
@@ -368,7 +371,6 @@ struct BatchMeta3d {
     draw_function_id: DrawFunctionId,
     material_bind_group_id: Option<MaterialBindGroupId>,
     mesh_asset_id: AssetId<Mesh>,
-    mesh_flags: u32,
     dynamic_offset: Option<NonMaxU32>,
 }
 
@@ -378,8 +380,6 @@ impl PartialEq for BatchMeta3d {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
             && self.mesh_asset_id == other.mesh_asset_id
-            && (self.mesh_flags & (MeshFlags::SKINNED | MeshFlags::MORPH_TARGETS).bits()) == 0
-            && (other.mesh_flags & (MeshFlags::SKINNED | MeshFlags::MORPH_TARGETS).bits()) == 0
             && self.dynamic_offset == other.dynamic_offset
             && self.material_bind_group_id == other.material_bind_group_id
     }
@@ -418,7 +418,6 @@ pub fn prepare_and_batch_meshes(
                 draw_function_id,
                 material_bind_group_id: material_bind_group_id.cloned(),
                 mesh_asset_id: mesh_handle.id(),
-                mesh_flags: mesh_transforms.flags,
                 dynamic_offset: gpu_array_buffer_index.dynamic_offset,
             },
             gpu_array_buffer_index.index,
@@ -839,7 +838,7 @@ impl MeshPipelineKey {
     }
 }
 
-pub fn is_skinned(layout: &Hashed<InnerMeshVertexBufferLayout>) -> bool {
+fn is_skinned(layout: &Hashed<InnerMeshVertexBufferLayout>) -> bool {
     layout.contains(Mesh::ATTRIBUTE_JOINT_INDEX) && layout.contains(Mesh::ATTRIBUTE_JOINT_WEIGHT)
 }
 pub fn setup_morph_and_skinning_defs(
diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl
index ba04c18e4a7d6..eb5096e564bd9 100644
--- a/crates/bevy_pbr/src/render/mesh_types.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_types.wgsl
@@ -29,7 +29,5 @@ struct MorphWeights {
 #endif
 
 const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32            = 1u;
-const MESH_FLAGS_SKINNED_BIT: u32                    = 2u;
-const MESH_FLAGS_MORPH_TARGETS_BIT: u32              = 4u;
 // 2^31 - if the flag is set, the sign is positive, else it is negative
 const MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT: u32 = 2147483648u;
diff --git a/crates/bevy_pbr/src/render/morph.rs b/crates/bevy_pbr/src/render/morph.rs
index 753c366726934..5b98de2ad84d9 100644
--- a/crates/bevy_pbr/src/render/morph.rs
+++ b/crates/bevy_pbr/src/render/morph.rs
@@ -2,6 +2,7 @@ use std::{iter, mem};
 
 use bevy_ecs::prelude::*;
 use bevy_render::{
+    batching::NoAutomaticBatching,
     mesh::morph::{MeshMorphWeights, MAX_MORPH_WEIGHTS},
     render_resource::{BufferUsages, BufferVec},
     renderer::{RenderDevice, RenderQueue},
@@ -89,7 +90,9 @@ pub fn extract_morphs(
         add_to_alignment::<f32>(&mut uniform.buffer);
 
         let index = (start * mem::size_of::<f32>()) as u32;
-        values.push((entity, MorphIndex { index }));
+        // NOTE: Because morph targets require per-morph target texture bindings, they cannot
+        // currently be batched.
+        values.push((entity, (MorphIndex { index }, NoAutomaticBatching)));
     }
     *previous_len = values.len();
     commands.insert_or_spawn_batch(values);

From b3139215976b4d07d5bed4c6150a6933b3ab87f3 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Fri, 15 Sep 2023 02:25:00 +0200
Subject: [PATCH 18/33] Consolidate BatchMeta into bevy_render

---
 crates/bevy_pbr/src/render/mesh.rs     | 46 +++-----------------------
 crates/bevy_render/src/batching/mod.rs | 44 +++++++++++++++++++++++-
 crates/bevy_sprite/Cargo.toml          |  1 -
 crates/bevy_sprite/src/mesh2d/mesh.rs  | 40 ++++------------------
 4 files changed, 53 insertions(+), 78 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 3ab8a2016b609..6faff60cf82e6 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -21,7 +21,7 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3, Mat4, Vec2, Vec4};
 use bevy_render::{
-    batching::{batch_render_phase, NoAutomaticBatching},
+    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{
         skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
@@ -31,8 +31,8 @@ use bevy_render::{
     prelude::Msaa,
     render_asset::RenderAssets,
     render_phase::{
-        CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, RenderCommand,
-        RenderCommandResult, RenderPhase, TrackedRenderPass,
+        CachedRenderPipelinePhaseItem, PhaseItem, RenderCommand, RenderCommandResult, RenderPhase,
+        TrackedRenderPass,
     },
     render_resource::*,
     renderer::{RenderDevice, RenderQueue},
@@ -45,7 +45,6 @@ use bevy_render::{
 };
 use bevy_transform::components::GlobalTransform;
 use bevy_utils::{tracing::error, HashMap, Hashed};
-use nonmax::NonMaxU32;
 
 use crate::render::{
     morph::{extract_morphs, prepare_morphs, MorphIndex, MorphUniform},
@@ -348,43 +347,6 @@ pub fn extract_skinned_meshes(
     commands.insert_or_spawn_batch(values);
 }
 
-/// Data necessary to be equal for two draw commands to be mergeable
-///
-/// This is based on the following assumptions:
-/// - Only entities with prepared assets (pipelines, materials, meshes) are
-///   queued to phases
-/// - View bindings are constant across a phase for a given draw function as
-///   phases are per-view
-/// - `prepare_mesh_uniforms` is the only system that performs this batching
-///   and has sole responsibility for preparing the per-object data. As such
-///   the mesh binding and dynamic offsets are assumed to only be variable as a
-///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
-///   data across separate uniform bindings within the same buffer due to the
-///   maximum uniform buffer binding size.
-struct BatchMeta3d {
-    /// The pipeline id encompasses all pipeline configuration including vertex
-    /// buffers and layouts, shaders and their specializations, bind group
-    /// layouts, etc.
-    pipeline_id: CachedRenderPipelineId,
-    /// The draw function id defines the RenderCommands that are called to
-    /// set the pipeline and bindings, and make the draw command
-    draw_function_id: DrawFunctionId,
-    material_bind_group_id: Option<MaterialBindGroupId>,
-    mesh_asset_id: AssetId<Mesh>,
-    dynamic_offset: Option<NonMaxU32>,
-}
-
-impl PartialEq for BatchMeta3d {
-    #[inline]
-    fn eq(&self, other: &BatchMeta3d) -> bool {
-        self.pipeline_id == other.pipeline_id
-            && self.draw_function_id == other.draw_function_id
-            && self.mesh_asset_id == other.mesh_asset_id
-            && self.dynamic_offset == other.dynamic_offset
-            && self.material_bind_group_id == other.material_bind_group_id
-    }
-}
-
 #[allow(clippy::too_many_arguments)]
 pub fn prepare_and_batch_meshes(
     render_device: Res<RenderDevice>,
@@ -413,7 +375,7 @@ pub fn prepare_and_batch_meshes(
         };
         let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
         Some((
-            BatchMeta3d {
+            BatchMeta::<MaterialBindGroupId> {
                 pipeline_id,
                 draw_function_id,
                 material_bind_group_id: material_bind_group_id.cloned(),
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index b802ae1c07d7b..46924f870f70c 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -1,12 +1,54 @@
+use bevy_asset::AssetId;
 use bevy_ecs::component::Component;
 use nonmax::NonMaxU32;
 
-use crate::render_phase::{CachedRenderPipelinePhaseItem, RenderPhase};
+use crate::{
+    mesh::Mesh,
+    render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, RenderPhase},
+    render_resource::CachedRenderPipelineId,
+};
 
 /// Add this component to mesh entities to disable automatic batching
 #[derive(Component)]
 pub struct NoAutomaticBatching;
 
+/// Data necessary to be equal for two draw commands to be mergeable
+///
+/// This is based on the following assumptions:
+/// - Only entities with prepared assets (pipelines, materials, meshes) are
+///   queued to phases
+/// - View bindings are constant across a phase for a given draw function as
+///   phases are per-view
+/// - `prepare_mesh_uniforms` is the only system that performs this batching
+///   and has sole responsibility for preparing the per-object data. As such
+///   the mesh binding and dynamic offsets are assumed to only be variable as a
+///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
+///   data across separate uniform bindings within the same buffer due to the
+///   maximum uniform buffer binding size.
+pub struct BatchMeta<T: PartialEq> {
+    /// The pipeline id encompasses all pipeline configuration including vertex
+    /// buffers and layouts, shaders and their specializations, bind group
+    /// layouts, etc.
+    pub pipeline_id: CachedRenderPipelineId,
+    /// The draw function id defines the RenderCommands that are called to
+    /// set the pipeline and bindings, and make the draw command
+    pub draw_function_id: DrawFunctionId,
+    pub material_bind_group_id: Option<T>,
+    pub mesh_asset_id: AssetId<Mesh>,
+    pub dynamic_offset: Option<NonMaxU32>,
+}
+
+impl<T: PartialEq> PartialEq for BatchMeta<T> {
+    #[inline]
+    fn eq(&self, other: &BatchMeta<T>) -> bool {
+        self.pipeline_id == other.pipeline_id
+            && self.draw_function_id == other.draw_function_id
+            && self.mesh_asset_id == other.mesh_asset_id
+            && self.dynamic_offset == other.dynamic_offset
+            && self.material_bind_group_id == other.material_bind_group_id
+    }
+}
+
 /// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
 /// and trying to combine the draws into a batch.
 pub fn batch_render_phase<
diff --git a/crates/bevy_sprite/Cargo.toml b/crates/bevy_sprite/Cargo.toml
index 550ea2af166c7..37db1b9eb2a20 100644
--- a/crates/bevy_sprite/Cargo.toml
+++ b/crates/bevy_sprite/Cargo.toml
@@ -31,4 +31,3 @@ guillotiere = "0.6.0"
 thiserror = "1.0"
 rectangle-pack = "0.4"
 bitflags = "2.3"
-nonmax = "0.5"
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index e7c16a9a86fb0..7a15ecec7c866 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -1,5 +1,5 @@
 use bevy_app::Plugin;
-use bevy_asset::{load_internal_asset, AssetId, Handle};
+use bevy_asset::{load_internal_asset, Handle};
 
 use bevy_core_pipeline::core_2d::Transparent2d;
 use bevy_ecs::{
@@ -10,13 +10,13 @@ use bevy_ecs::{
 use bevy_math::{Affine3, Vec2, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
-    batching::{batch_render_phase, NoAutomaticBatching},
+    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
     render_phase::{
-        CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, RenderCommand,
-        RenderCommandResult, RenderPhase, TrackedRenderPass,
+        CachedRenderPipelinePhaseItem, PhaseItem, RenderCommand, RenderCommandResult, RenderPhase,
+        TrackedRenderPass,
     },
     render_resource::*,
     renderer::{RenderDevice, RenderQueue},
@@ -29,7 +29,6 @@ use bevy_render::{
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
 use bevy_transform::components::GlobalTransform;
-use nonmax::NonMaxU32;
 
 use crate::Material2dBindGroupId;
 
@@ -204,33 +203,6 @@ pub fn extract_mesh2d(
     commands.insert_or_spawn_batch(values);
 }
 
-/// Data necessary to be equal for two draw commands to be mergeable
-///
-/// This is based on the following assumptions:
-/// - Only entities with prepared assets (pipelines, materials, meshes) are
-///   queued to phases
-/// - View bindings are constant across a phase for a given draw function as
-///   phases are per-view
-/// - `prepare_mesh_uniforms` is the only system that performs this batching
-///   and has sole responsibility for preparing the per-object data. As such
-///   the mesh binding and dynamic offsets are assumed to only be variable as a
-///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
-///   data across separate uniform bindings within the same buffer due to the
-///   maximum uniform buffer binding size.
-#[derive(PartialEq, Eq)]
-struct BatchMeta2d {
-    /// The pipeline id encompasses all pipeline configuration including vertex
-    /// buffers and layouts, shaders and their specializations, bind group
-    /// layouts, etc.
-    pipeline_id: CachedRenderPipelineId,
-    /// The draw function id defines the RenderCommands that are called to
-    /// set the pipeline and bindings, and make the draw command
-    draw_function_id: DrawFunctionId,
-    material2d_bind_group_id: Option<Material2dBindGroupId>,
-    mesh_asset_id: AssetId<Mesh>,
-    dynamic_offset: Option<NonMaxU32>,
-}
-
 #[allow(clippy::too_many_arguments)]
 pub fn prepare_and_batch_meshes2d(
     render_device: Res<RenderDevice>,
@@ -263,10 +235,10 @@ pub fn prepare_and_batch_meshes2d(
             };
             let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
             Some((
-                BatchMeta2d {
+                BatchMeta::<Material2dBindGroupId> {
                     pipeline_id: item.cached_pipeline(),
                     draw_function_id: item.draw_function(),
-                    material2d_bind_group_id: material2d_bind_group_id.cloned(),
+                    material_bind_group_id: material2d_bind_group_id.cloned(),
                     mesh_asset_id: mesh_handle.0.id(),
                     dynamic_offset: gpu_array_buffer_index.dynamic_offset,
                 },

From 54f0b2ed74d34b10fbcfca070ce25c9f250285b4 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Fri, 15 Sep 2023 02:31:23 +0200
Subject: [PATCH 19/33] Clean up outdated comments

---
 crates/bevy_pbr/src/material.rs        | 1 -
 crates/bevy_render/src/batching/mod.rs | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index d49e268cd9adb..e18a4d426e29f 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -246,7 +246,6 @@ fn extract_material_meshes<M: Material>(
         }
     }
     *previous_len = values.len();
-    // FIXME: Entities still have to be spawned because phases assume entities exist
     commands.insert_or_spawn_batch(values);
 }
 
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 46924f870f70c..7cadbee0afda5 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -19,10 +19,10 @@ pub struct NoAutomaticBatching;
 ///   queued to phases
 /// - View bindings are constant across a phase for a given draw function as
 ///   phases are per-view
-/// - `prepare_mesh_uniforms` is the only system that performs this batching
+/// - `prepare_and_batch_meshes` is the only system that performs this batching
 ///   and has sole responsibility for preparing the per-object data. As such
 ///   the mesh binding and dynamic offsets are assumed to only be variable as a
-///   result of the `prepare_mesh_uniforms` system, e.g. due to having to split
+///   result of the `prepare_and_batch_meshes` system, e.g. due to having to split
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
 pub struct BatchMeta<T: PartialEq> {

From 7e1fad6d5216b1b2c0f3f9ee40ec5d1b5e669890 Mon Sep 17 00:00:00 2001
From: robtfm <50659922+robtfm@users.noreply.github.com>
Date: Sat, 16 Sep 2023 14:21:34 +0100
Subject: [PATCH 20/33] generic batching

---
 crates/bevy_pbr/src/render/mesh.rs     | 123 ++++++++-----------------
 crates/bevy_render/src/batching/mod.rs | 120 ++++++++++++++++--------
 crates/bevy_sprite/src/mesh2d/mesh.rs  |  80 +++++-----------
 3 files changed, 146 insertions(+), 177 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 6faff60cf82e6..51ed6c09f9734 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -8,7 +8,7 @@ use crate::{
 use bevy_app::Plugin;
 use bevy_asset::{load_internal_asset, AssetId, Assets, Handle};
 use bevy_core_pipeline::{
-    core_3d::{AlphaMask3d, Opaque3d, Transparent3d},
+    core_3d::{Opaque3d, Transparent3d},
     prepass::{AlphaMask3dPrepass, Opaque3dPrepass, ViewPrepassTextures},
     tonemapping::{
         get_lut_bind_group_layout_entries, get_lut_bindings, Tonemapping, TonemappingLuts,
@@ -21,7 +21,7 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3, Mat4, Vec2, Vec4};
 use bevy_render::{
-    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
+    batching::{batch_render_phase, flush_buffer, GetBatchData, NoAutomaticBatching},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{
         skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
@@ -30,10 +30,7 @@ use bevy_render::{
     },
     prelude::Msaa,
     render_asset::RenderAssets,
-    render_phase::{
-        CachedRenderPipelinePhaseItem, PhaseItem, RenderCommand, RenderCommandResult, RenderPhase,
-        TrackedRenderPass,
-    },
+    render_phase::{PhaseItem, RenderCommand, RenderCommandResult, TrackedRenderPass},
     render_resource::*,
     renderer::{RenderDevice, RenderQueue},
     texture::{
@@ -122,7 +119,22 @@ impl Plugin for MeshRenderPlugin {
                 .add_systems(
                     Render,
                     (
-                        prepare_and_batch_meshes.in_set(RenderSet::PrepareResources),
+                        (
+                            batch_render_phase::<Opaque3dPrepass, MeshPipeline>
+                                .in_set(RenderSet::PrepareResources),
+                            batch_render_phase::<AlphaMask3dPrepass, MeshPipeline>
+                                .in_set(RenderSet::PrepareResources),
+                            batch_render_phase::<Opaque3d, MeshPipeline>
+                                .in_set(RenderSet::PrepareResources),
+                            batch_render_phase::<Transparent3d, MeshPipeline>
+                                .in_set(RenderSet::PrepareResources),
+                            batch_render_phase::<AlphaMask3dPrepass, MeshPipeline>
+                                .in_set(RenderSet::PrepareResources),
+                            batch_render_phase::<Shadow, MeshPipeline>
+                                .in_set(RenderSet::PrepareResources),
+                        )
+                            .chain(),
+                        flush_buffer::<MeshPipeline>.in_set(RenderSet::PrepareResourcesFlush),
                         prepare_skinned_meshes.in_set(RenderSet::PrepareResources),
                         prepare_morphs.in_set(RenderSet::PrepareResources),
                         prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups),
@@ -347,84 +359,6 @@ pub fn extract_skinned_meshes(
     commands.insert_or_spawn_batch(values);
 }
 
-#[allow(clippy::too_many_arguments)]
-pub fn prepare_and_batch_meshes(
-    render_device: Res<RenderDevice>,
-    render_queue: Res<RenderQueue>,
-    gpu_array_buffer: ResMut<GpuArrayBuffer<MeshUniform>>,
-    mut views: Query<(
-        Option<&mut RenderPhase<Opaque3dPrepass>>,
-        Option<&mut RenderPhase<AlphaMask3dPrepass>>,
-        &mut RenderPhase<Opaque3d>,
-        &mut RenderPhase<AlphaMask3d>,
-        &mut RenderPhase<Transparent3d>,
-    )>,
-    mut shadow_views: Query<&mut RenderPhase<Shadow>>,
-    meshes: Query<
-        (Option<&MaterialBindGroupId>, &Handle<Mesh>, &MeshTransforms),
-        Without<NoAutomaticBatching>,
-    >,
-) {
-    let gpu_array_buffer = gpu_array_buffer.into_inner();
-
-    gpu_array_buffer.clear();
-
-    let mut get_batch_meta = |entity, pipeline_id, draw_function_id| {
-        let Ok((material_bind_group_id, mesh_handle, mesh_transforms)) = meshes.get(entity) else {
-            return None;
-        };
-        let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-        Some((
-            BatchMeta::<MaterialBindGroupId> {
-                pipeline_id,
-                draw_function_id,
-                material_bind_group_id: material_bind_group_id.cloned(),
-                mesh_asset_id: mesh_handle.id(),
-                dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-            },
-            gpu_array_buffer_index.index,
-            gpu_array_buffer_index.dynamic_offset,
-        ))
-    };
-
-    for (
-        opaque_prepass_phase,
-        alpha_mask_prepass_phase,
-        opaque_phase,
-        alpha_mask_phase,
-        transparent_phase,
-    ) in &mut views
-    {
-        if let Some(opaque_prepass_phase) = opaque_prepass_phase {
-            batch_render_phase(opaque_prepass_phase.into_inner(), |item| {
-                get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
-            });
-        }
-        if let Some(alpha_mask_prepass_phase) = alpha_mask_prepass_phase {
-            batch_render_phase(alpha_mask_prepass_phase.into_inner(), |item| {
-                get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
-            });
-        }
-        batch_render_phase(opaque_phase.into_inner(), |item| {
-            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
-        });
-        batch_render_phase(alpha_mask_phase.into_inner(), |item| {
-            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
-        });
-        batch_render_phase(transparent_phase.into_inner(), |item| {
-            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
-        });
-    }
-
-    for shadow_phase in &mut shadow_views {
-        batch_render_phase(shadow_phase.into_inner(), |item| {
-            get_batch_meta(item.entity(), item.cached_pipeline(), item.draw_function())
-        });
-    }
-
-    gpu_array_buffer.write_buffer(&render_device, &render_queue);
-}
-
 #[derive(Resource, Clone)]
 pub struct MeshPipeline {
     pub view_layout: BindGroupLayout,
@@ -709,6 +643,25 @@ impl MeshPipeline {
     }
 }
 
+impl GetBatchData for MeshPipeline {
+    type Query = (
+        Option<&'static MaterialBindGroupId>,
+        &'static Handle<Mesh>,
+        &'static MeshTransforms,
+    );
+    type CompareData = (Option<MaterialBindGroupId>, AssetId<Mesh>);
+    type BufferData = MeshUniform;
+
+    fn get_batch_data(
+        (material_bind_group_id, mesh_handle, mesh_transforms): <Self::Query as bevy_ecs::query::WorldQuery>::Item<'_>,
+    ) -> (Self::CompareData, Self::BufferData) {
+        (
+            (material_bind_group_id.cloned(), mesh_handle.id()),
+            mesh_transforms.into(),
+        )
+    }
+}
+
 bitflags::bitflags! {
     #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
     #[repr(transparent)]
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 7cadbee0afda5..e990ae9d0d116 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -1,11 +1,15 @@
-use bevy_asset::AssetId;
-use bevy_ecs::component::Component;
+use bevy_ecs::{
+    component::Component,
+    prelude::Res,
+    query::{ReadOnlyWorldQuery, WorldQuery},
+    system::{Query, ResMut},
+};
 use nonmax::NonMaxU32;
 
 use crate::{
-    mesh::Mesh,
     render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, RenderPhase},
-    render_resource::CachedRenderPipelineId,
+    render_resource::{CachedRenderPipelineId, GpuArrayBuffer, GpuArrayBufferable},
+    renderer::{RenderDevice, RenderQueue},
 };
 
 /// Add this component to mesh entities to disable automatic batching
@@ -25,7 +29,7 @@ pub struct NoAutomaticBatching;
 ///   result of the `prepare_and_batch_meshes` system, e.g. due to having to split
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
-pub struct BatchMeta<T: PartialEq> {
+struct BatchMeta<T: PartialEq> {
     /// The pipeline id encompasses all pipeline configuration including vertex
     /// buffers and layouts, shaders and their specializations, bind group
     /// layouts, etc.
@@ -33,9 +37,8 @@ pub struct BatchMeta<T: PartialEq> {
     /// The draw function id defines the RenderCommands that are called to
     /// set the pipeline and bindings, and make the draw command
     pub draw_function_id: DrawFunctionId,
-    pub material_bind_group_id: Option<T>,
-    pub mesh_asset_id: AssetId<Mesh>,
     pub dynamic_offset: Option<NonMaxU32>,
+    pub user_data: T,
 }
 
 impl<T: PartialEq> PartialEq for BatchMeta<T> {
@@ -43,47 +46,90 @@ impl<T: PartialEq> PartialEq for BatchMeta<T> {
     fn eq(&self, other: &BatchMeta<T>) -> bool {
         self.pipeline_id == other.pipeline_id
             && self.draw_function_id == other.draw_function_id
-            && self.mesh_asset_id == other.mesh_asset_id
             && self.dynamic_offset == other.dynamic_offset
-            && self.material_bind_group_id == other.material_bind_group_id
+            && self.user_data == other.user_data
     }
 }
 
+pub trait GetBatchData {
+    type Query: ReadOnlyWorldQuery;
+    type CompareData: PartialEq;
+    type BufferData: GpuArrayBufferable + Sync + Send + 'static;
+    fn get_batch_data(
+        batch_data: <Self::Query as WorldQuery>::Item<'_>,
+    ) -> (Self::CompareData, Self::BufferData);
+}
+
 /// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
 /// and trying to combine the draws into a batch.
-pub fn batch_render_phase<
-    I: CachedRenderPipelinePhaseItem,
-    T: PartialEq, // Batch metadata used for distinguishing batches
->(
-    phase: &mut RenderPhase<I>,
-    mut get_batch_meta: impl FnMut(&I) -> Option<(T, NonMaxU32, Option<NonMaxU32>)>,
+pub fn batch_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBatchData>(
+    gpu_array_buffer: ResMut<GpuArrayBuffer<F::BufferData>>,
+    mut views: Query<&mut RenderPhase<I>>,
+    query: Query<(Option<&NoAutomaticBatching>, F::Query)>,
 ) {
-    let mut items = phase.items.iter_mut().peekable();
-    let mut batch_start_item = None;
-    let mut batch_start_index = 0;
-    let mut next_batch = items.peek().and_then(|item| get_batch_meta(item));
-    while let Some(item) = items.next() {
-        // Get the current batch meta and update the next batch meta
-        let Some((batch_meta, index, dynamic_offset)) = std::mem::replace(
-            &mut next_batch,
-            items.peek().and_then(|item| get_batch_meta(item)),
-        ) else {
-            // If the current phase item doesn't match the query, we don't modify it
-            continue;
+    let gpu_array_buffer = gpu_array_buffer.into_inner();
+
+    let mut process_item = |item: &mut I| -> Option<BatchMeta<F::CompareData>> {
+        let Ok((no_batching, batch_data)) = query.get(item.entity()) else {
+            return None;
         };
 
-        // If we are beginning a new batch, record the start item and index
-        if batch_start_item.is_none() {
-            batch_start_item = Some(item);
-            batch_start_index = index.get();
+        let (user_data, buffer_data) = F::get_batch_data(batch_data);
+
+        let buffer_index = gpu_array_buffer.push(buffer_data);
+        *item.batch_range_mut() = buffer_index.index.get()..buffer_index.index.get() + 1;
+        *item.dynamic_offset_mut() = buffer_index.dynamic_offset;
+
+        if no_batching.is_some() {
+            None
+        } else {
+            Some(BatchMeta {
+                pipeline_id: item.cached_pipeline(),
+                draw_function_id: item.draw_function(),
+                dynamic_offset: buffer_index.dynamic_offset,
+                user_data,
+            })
         }
+    };
 
-        if Some(&batch_meta) != next_batch.as_ref().map(|(meta, ..)| meta) {
-            // The next item doesn't match the current batch (or doesn't exist).
-            // Update the phase item to render this batch.
-            let batch_start_item = batch_start_item.take().unwrap();
-            *batch_start_item.batch_range_mut() = batch_start_index..index.get() + 1;
-            *batch_start_item.dynamic_offset_mut() = dynamic_offset;
+    for mut phase in &mut views {
+        let mut items = phase.items.iter_mut().peekable();
+        let mut batch_start_item = None;
+        let mut next_batch = items.peek_mut().and_then(|i| process_item(i));
+        while let Some(item) = items.next() {
+            // Get the current batch meta and update the next batch meta
+            let Some(batch_meta) = std::mem::replace(
+                &mut next_batch,
+                items.peek_mut().and_then(|i| process_item(i)),
+            ) else {
+                // If the current phase item doesn't match the query or has NoAutomaticBatching,
+                // we don't modify it any further
+                continue;
+            };
+
+            let batch_end_item = item.batch_range().end;
+
+            // If we are beginning a new batch, record the start item
+            if batch_start_item.is_none() {
+                batch_start_item = Some(item);
+            }
+
+            if Some(&batch_meta) != next_batch.as_ref() {
+                // The next item doesn't match the current batch (or doesn't exist).
+                // Update the first phase item to render the full batch.
+                let batch_start_item = batch_start_item.take().unwrap();
+                batch_start_item.batch_range_mut().end = batch_end_item;
+            }
         }
     }
 }
+
+pub fn flush_buffer<F: GetBatchData>(
+    render_device: Res<RenderDevice>,
+    render_queue: Res<RenderQueue>,
+    gpu_array_buffer: ResMut<GpuArrayBuffer<F::BufferData>>,
+) {
+    let gpu_array_buffer = gpu_array_buffer.into_inner();
+    gpu_array_buffer.write_buffer(&render_device, &render_queue);
+    gpu_array_buffer.clear();
+}
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 7a15ecec7c866..88e8ad59dd5d4 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -1,5 +1,5 @@
 use bevy_app::Plugin;
-use bevy_asset::{load_internal_asset, Handle};
+use bevy_asset::{load_internal_asset, AssetId, Handle};
 
 use bevy_core_pipeline::core_2d::Transparent2d;
 use bevy_ecs::{
@@ -10,14 +10,11 @@ use bevy_ecs::{
 use bevy_math::{Affine3, Vec2, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
-    batching::{batch_render_phase, BatchMeta, NoAutomaticBatching},
+    batching::{batch_render_phase, flush_buffer, GetBatchData},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
-    render_phase::{
-        CachedRenderPipelinePhaseItem, PhaseItem, RenderCommand, RenderCommandResult, RenderPhase,
-        TrackedRenderPass,
-    },
+    render_phase::{PhaseItem, RenderCommand, RenderCommandResult, TrackedRenderPass},
     render_resource::*,
     renderer::{RenderDevice, RenderQueue},
     texture::{
@@ -97,7 +94,9 @@ impl Plugin for Mesh2dRenderPlugin {
                 .add_systems(
                     Render,
                     (
-                        prepare_and_batch_meshes2d.in_set(RenderSet::PrepareResources),
+                        batch_render_phase::<Transparent2d, Mesh2dPipeline>
+                            .in_set(RenderSet::PrepareResources),
+                        flush_buffer::<Mesh2dPipeline>.in_set(RenderSet::PrepareResourcesFlush),
                         prepare_mesh2d_bind_group.in_set(RenderSet::PrepareBindGroups),
                         prepare_mesh2d_view_bind_groups.in_set(RenderSet::PrepareBindGroups),
                     ),
@@ -203,54 +202,6 @@ pub fn extract_mesh2d(
     commands.insert_or_spawn_batch(values);
 }
 
-#[allow(clippy::too_many_arguments)]
-pub fn prepare_and_batch_meshes2d(
-    render_device: Res<RenderDevice>,
-    render_queue: Res<RenderQueue>,
-    gpu_array_buffer: ResMut<GpuArrayBuffer<Mesh2dUniform>>,
-    mut views: Query<&mut RenderPhase<Transparent2d>>,
-    meshes: Query<
-        (
-            Option<&Material2dBindGroupId>,
-            &Mesh2dHandle,
-            &Mesh2dTransforms,
-        ),
-        Without<NoAutomaticBatching>,
-    >,
-) {
-    if meshes.is_empty() {
-        return;
-    }
-
-    let gpu_array_buffer = gpu_array_buffer.into_inner();
-
-    gpu_array_buffer.clear();
-
-    for transparent_phase in &mut views {
-        batch_render_phase(transparent_phase.into_inner(), |item| {
-            let Ok((material2d_bind_group_id, mesh_handle, mesh_transforms)) =
-                meshes.get(item.entity())
-            else {
-                return None;
-            };
-            let gpu_array_buffer_index = gpu_array_buffer.push(mesh_transforms.into());
-            Some((
-                BatchMeta::<Material2dBindGroupId> {
-                    pipeline_id: item.cached_pipeline(),
-                    draw_function_id: item.draw_function(),
-                    material_bind_group_id: material2d_bind_group_id.cloned(),
-                    mesh_asset_id: mesh_handle.0.id(),
-                    dynamic_offset: gpu_array_buffer_index.dynamic_offset,
-                },
-                gpu_array_buffer_index.index,
-                gpu_array_buffer_index.dynamic_offset,
-            ))
-        });
-    }
-
-    gpu_array_buffer.write_buffer(&render_device, &render_queue);
-}
-
 #[derive(Resource, Clone)]
 pub struct Mesh2dPipeline {
     pub view_layout: BindGroupLayout,
@@ -372,6 +323,25 @@ impl Mesh2dPipeline {
     }
 }
 
+impl GetBatchData for Mesh2dPipeline {
+    type Query = (
+        Option<&'static Material2dBindGroupId>,
+        &'static Mesh2dHandle,
+        &'static Mesh2dTransforms,
+    );
+    type CompareData = (Option<Material2dBindGroupId>, AssetId<Mesh>);
+    type BufferData = Mesh2dUniform;
+
+    fn get_batch_data(
+        (material_bind_group_id, mesh_handle, mesh_transforms): <Self::Query as bevy_ecs::query::WorldQuery>::Item<'_>,
+    ) -> (Self::CompareData, Self::BufferData) {
+        (
+            (material_bind_group_id.cloned(), mesh_handle.0.id()),
+            mesh_transforms.into(),
+        )
+    }
+}
+
 bitflags::bitflags! {
     #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
     #[repr(transparent)]

From 315b2713c07435e5877eb2ee210127129936ce56 Mon Sep 17 00:00:00 2001
From: robtfm <50659922+robtfm@users.noreply.github.com>
Date: Sat, 16 Sep 2023 18:19:07 +0100
Subject: [PATCH 21/33] accidentally removed AlphaMask3d

---
 crates/bevy_pbr/src/render/mesh.rs | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 51ed6c09f9734..61bf8ee470eb9 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -8,7 +8,7 @@ use crate::{
 use bevy_app::Plugin;
 use bevy_asset::{load_internal_asset, AssetId, Assets, Handle};
 use bevy_core_pipeline::{
-    core_3d::{Opaque3d, Transparent3d},
+    core_3d::{AlphaMask3d, Opaque3d, Transparent3d},
     prepass::{AlphaMask3dPrepass, Opaque3dPrepass, ViewPrepassTextures},
     tonemapping::{
         get_lut_bind_group_layout_entries, get_lut_bindings, Tonemapping, TonemappingLuts,
@@ -120,20 +120,15 @@ impl Plugin for MeshRenderPlugin {
                     Render,
                     (
                         (
-                            batch_render_phase::<Opaque3dPrepass, MeshPipeline>
-                                .in_set(RenderSet::PrepareResources),
-                            batch_render_phase::<AlphaMask3dPrepass, MeshPipeline>
-                                .in_set(RenderSet::PrepareResources),
-                            batch_render_phase::<Opaque3d, MeshPipeline>
-                                .in_set(RenderSet::PrepareResources),
-                            batch_render_phase::<Transparent3d, MeshPipeline>
-                                .in_set(RenderSet::PrepareResources),
-                            batch_render_phase::<AlphaMask3dPrepass, MeshPipeline>
-                                .in_set(RenderSet::PrepareResources),
-                            batch_render_phase::<Shadow, MeshPipeline>
-                                .in_set(RenderSet::PrepareResources),
+                            batch_render_phase::<Opaque3dPrepass, MeshPipeline>,
+                            batch_render_phase::<AlphaMask3dPrepass, MeshPipeline>,
+                            batch_render_phase::<Opaque3d, MeshPipeline>,
+                            batch_render_phase::<Transparent3d, MeshPipeline>,
+                            batch_render_phase::<AlphaMask3d, MeshPipeline>,
+                            batch_render_phase::<Shadow, MeshPipeline>,
                         )
-                            .chain(),
+                            .chain()
+                            .in_set(RenderSet::PrepareResources),
                         flush_buffer::<MeshPipeline>.in_set(RenderSet::PrepareResourcesFlush),
                         prepare_skinned_meshes.in_set(RenderSet::PrepareResources),
                         prepare_morphs.in_set(RenderSet::PrepareResources),

From 86867418b572b65d6d6d356978e5fbec4b50ba84 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Sun, 17 Sep 2023 13:01:51 +0200
Subject: [PATCH 22/33] Minor cleanup

---
 crates/bevy_pbr/src/render/mesh.rs    | 18 +++++++++---------
 crates/bevy_sprite/src/mesh2d/mesh.rs | 16 +++++++++-------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 61bf8ee470eb9..7954240be43b0 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -1334,20 +1334,20 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
         };
 
         let mut dynamic_offsets: [u32; 3] = Default::default();
-        let mut index_count = 0;
-        if let Some(mesh_index) = item.dynamic_offset() {
-            dynamic_offsets[index_count] = mesh_index.get();
-            index_count += 1;
+        let mut offset_count = 0;
+        if let Some(dynamic_offset) = item.dynamic_offset() {
+            dynamic_offsets[offset_count] = dynamic_offset.get();
+            offset_count += 1;
         }
         if let Some(skin_index) = skin_index {
-            dynamic_offsets[index_count] = skin_index.index;
-            index_count += 1;
+            dynamic_offsets[offset_count] = skin_index.index;
+            offset_count += 1;
         }
         if let Some(morph_index) = morph_index {
-            dynamic_offsets[index_count] = morph_index.index;
-            index_count += 1;
+            dynamic_offsets[offset_count] = morph_index.index;
+            offset_count += 1;
         }
-        pass.set_bind_group(I, bind_group, &dynamic_offsets[0..index_count]);
+        pass.set_bind_group(I, bind_group, &dynamic_offsets[0..offset_count]);
 
         RenderCommandResult::Success
     }
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 88e8ad59dd5d4..1d294caefd4c4 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -4,7 +4,7 @@ use bevy_asset::{load_internal_asset, AssetId, Handle};
 use bevy_core_pipeline::core_2d::Transparent2d;
 use bevy_ecs::{
     prelude::*,
-    query::ROQueryItem,
+    query::{ROQueryItem, WorldQuery},
     system::{lifetimeless::*, SystemParamItem, SystemState},
 };
 use bevy_math::{Affine3, Vec2, Vec4};
@@ -333,7 +333,9 @@ impl GetBatchData for Mesh2dPipeline {
     type BufferData = Mesh2dUniform;
 
     fn get_batch_data(
-        (material_bind_group_id, mesh_handle, mesh_transforms): <Self::Query as bevy_ecs::query::WorldQuery>::Item<'_>,
+        (material_bind_group_id, mesh_handle, mesh_transforms): <Self::Query as WorldQuery>::Item<
+            '_,
+        >,
     ) -> (Self::CompareData, Self::BufferData) {
         (
             (material_bind_group_id.cloned(), mesh_handle.0.id()),
@@ -635,15 +637,15 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMesh2dBindGroup<I> {
         pass: &mut TrackedRenderPass<'w>,
     ) -> RenderCommandResult {
         let mut dynamic_offsets: [u32; 1] = Default::default();
-        let mut index_count = 0;
-        if let Some(mesh_index) = item.dynamic_offset() {
-            dynamic_offsets[index_count] = mesh_index.get();
-            index_count += 1;
+        let mut offset_count = 0;
+        if let Some(dynamic_offset) = item.dynamic_offset() {
+            dynamic_offsets[offset_count] = dynamic_offset.get();
+            offset_count += 1;
         }
         pass.set_bind_group(
             I,
             &mesh2d_bind_group.into_inner().value,
-            &dynamic_offsets[..index_count],
+            &dynamic_offsets[..offset_count],
         );
         RenderCommandResult::Success
     }

From 64292a7894ce2c16f7bc7e547d9569249641c201 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Sun, 17 Sep 2023 13:11:54 +0200
Subject: [PATCH 23/33] Use clearer system names

---
 crates/bevy_pbr/src/render/mesh.rs     | 20 ++++++++++++--------
 crates/bevy_render/src/batching/mod.rs |  4 ++--
 crates/bevy_sprite/src/mesh2d/mesh.rs  |  7 ++++---
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 7954240be43b0..2c9d3e060f67b 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -21,7 +21,10 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3, Mat4, Vec2, Vec4};
 use bevy_render::{
-    batching::{batch_render_phase, flush_buffer, GetBatchData, NoAutomaticBatching},
+    batching::{
+        batch_and_prepare_render_phase, write_batched_instance_buffer, GetBatchData,
+        NoAutomaticBatching,
+    },
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{
         skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
@@ -120,16 +123,17 @@ impl Plugin for MeshRenderPlugin {
                     Render,
                     (
                         (
-                            batch_render_phase::<Opaque3dPrepass, MeshPipeline>,
-                            batch_render_phase::<AlphaMask3dPrepass, MeshPipeline>,
-                            batch_render_phase::<Opaque3d, MeshPipeline>,
-                            batch_render_phase::<Transparent3d, MeshPipeline>,
-                            batch_render_phase::<AlphaMask3d, MeshPipeline>,
-                            batch_render_phase::<Shadow, MeshPipeline>,
+                            batch_and_prepare_render_phase::<Opaque3dPrepass, MeshPipeline>,
+                            batch_and_prepare_render_phase::<AlphaMask3dPrepass, MeshPipeline>,
+                            batch_and_prepare_render_phase::<Opaque3d, MeshPipeline>,
+                            batch_and_prepare_render_phase::<Transparent3d, MeshPipeline>,
+                            batch_and_prepare_render_phase::<AlphaMask3d, MeshPipeline>,
+                            batch_and_prepare_render_phase::<Shadow, MeshPipeline>,
                         )
                             .chain()
                             .in_set(RenderSet::PrepareResources),
-                        flush_buffer::<MeshPipeline>.in_set(RenderSet::PrepareResourcesFlush),
+                        write_batched_instance_buffer::<MeshPipeline>
+                            .in_set(RenderSet::PrepareResourcesFlush),
                         prepare_skinned_meshes.in_set(RenderSet::PrepareResources),
                         prepare_morphs.in_set(RenderSet::PrepareResources),
                         prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups),
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index e990ae9d0d116..3db59d5c97ed8 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -62,7 +62,7 @@ pub trait GetBatchData {
 
 /// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
 /// and trying to combine the draws into a batch.
-pub fn batch_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBatchData>(
+pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBatchData>(
     gpu_array_buffer: ResMut<GpuArrayBuffer<F::BufferData>>,
     mut views: Query<&mut RenderPhase<I>>,
     query: Query<(Option<&NoAutomaticBatching>, F::Query)>,
@@ -124,7 +124,7 @@ pub fn batch_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBatchData>(
     }
 }
 
-pub fn flush_buffer<F: GetBatchData>(
+pub fn write_batched_instance_buffer<F: GetBatchData>(
     render_device: Res<RenderDevice>,
     render_queue: Res<RenderQueue>,
     gpu_array_buffer: ResMut<GpuArrayBuffer<F::BufferData>>,
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 1d294caefd4c4..6228c69971e09 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -10,7 +10,7 @@ use bevy_ecs::{
 use bevy_math::{Affine3, Vec2, Vec4};
 use bevy_reflect::Reflect;
 use bevy_render::{
-    batching::{batch_render_phase, flush_buffer, GetBatchData},
+    batching::{batch_and_prepare_render_phase, write_batched_instance_buffer, GetBatchData},
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout},
     render_asset::RenderAssets,
@@ -94,9 +94,10 @@ impl Plugin for Mesh2dRenderPlugin {
                 .add_systems(
                     Render,
                     (
-                        batch_render_phase::<Transparent2d, Mesh2dPipeline>
+                        batch_and_prepare_render_phase::<Transparent2d, Mesh2dPipeline>
                             .in_set(RenderSet::PrepareResources),
-                        flush_buffer::<Mesh2dPipeline>.in_set(RenderSet::PrepareResourcesFlush),
+                        write_batched_instance_buffer::<Mesh2dPipeline>
+                            .in_set(RenderSet::PrepareResourcesFlush),
                         prepare_mesh2d_bind_group.in_set(RenderSet::PrepareBindGroups),
                         prepare_mesh2d_view_bind_groups.in_set(RenderSet::PrepareBindGroups),
                     ),

From 892cfc42849c1ed3177c5181d4ebd5141981f383 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Sun, 17 Sep 2023 15:56:17 +0200
Subject: [PATCH 24/33] Move prepass system scheduling to prepass plugin

And remove ordering.
---
 crates/bevy_pbr/src/prepass/mod.rs | 8 +++++++-
 crates/bevy_pbr/src/render/mesh.rs | 5 +----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs
index f180d3005a20d..ca1caf5c44720 100644
--- a/crates/bevy_pbr/src/prepass/mod.rs
+++ b/crates/bevy_pbr/src/prepass/mod.rs
@@ -17,6 +17,7 @@ use bevy_ecs::{
 };
 use bevy_math::{Affine3A, Mat4};
 use bevy_render::{
+    batching::batch_and_prepare_render_phase,
     globals::{GlobalsBuffer, GlobalsUniform},
     mesh::MeshVertexBufferLayout,
     prelude::{Camera, Mesh},
@@ -158,7 +159,12 @@ where
                 .add_systems(ExtractSchedule, extract_camera_previous_view_projection)
                 .add_systems(
                     Render,
-                    prepare_previous_view_projection_uniforms.in_set(RenderSet::PrepareResources),
+                    (
+                        prepare_previous_view_projection_uniforms,
+                        batch_and_prepare_render_phase::<Opaque3dPrepass, MeshPipeline>,
+                        batch_and_prepare_render_phase::<AlphaMask3dPrepass, MeshPipeline>,
+                    )
+                        .in_set(RenderSet::PrepareResources),
                 );
         }
 
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 2c9d3e060f67b..55201d66e8733 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -9,7 +9,7 @@ use bevy_app::Plugin;
 use bevy_asset::{load_internal_asset, AssetId, Assets, Handle};
 use bevy_core_pipeline::{
     core_3d::{AlphaMask3d, Opaque3d, Transparent3d},
-    prepass::{AlphaMask3dPrepass, Opaque3dPrepass, ViewPrepassTextures},
+    prepass::ViewPrepassTextures,
     tonemapping::{
         get_lut_bind_group_layout_entries, get_lut_bindings, Tonemapping, TonemappingLuts,
     },
@@ -123,14 +123,11 @@ impl Plugin for MeshRenderPlugin {
                     Render,
                     (
                         (
-                            batch_and_prepare_render_phase::<Opaque3dPrepass, MeshPipeline>,
-                            batch_and_prepare_render_phase::<AlphaMask3dPrepass, MeshPipeline>,
                             batch_and_prepare_render_phase::<Opaque3d, MeshPipeline>,
                             batch_and_prepare_render_phase::<Transparent3d, MeshPipeline>,
                             batch_and_prepare_render_phase::<AlphaMask3d, MeshPipeline>,
                             batch_and_prepare_render_phase::<Shadow, MeshPipeline>,
                         )
-                            .chain()
                             .in_set(RenderSet::PrepareResources),
                         write_batched_instance_buffer::<MeshPipeline>
                             .in_set(RenderSet::PrepareResourcesFlush),

From 2dd8e61b6843709ca7fd056790e0654db3babb53 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Mon, 18 Sep 2023 23:29:33 +0200
Subject: [PATCH 25/33] Use Has<T> instead of Option<&T>.is_some()

---
 crates/bevy_render/src/batching/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 3db59d5c97ed8..74c2c3fc42f74 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -1,7 +1,7 @@
 use bevy_ecs::{
     component::Component,
     prelude::Res,
-    query::{ReadOnlyWorldQuery, WorldQuery},
+    query::{Has, ReadOnlyWorldQuery, WorldQuery},
     system::{Query, ResMut},
 };
 use nonmax::NonMaxU32;
@@ -65,7 +65,7 @@ pub trait GetBatchData {
 pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBatchData>(
     gpu_array_buffer: ResMut<GpuArrayBuffer<F::BufferData>>,
     mut views: Query<&mut RenderPhase<I>>,
-    query: Query<(Option<&NoAutomaticBatching>, F::Query)>,
+    query: Query<(Has<NoAutomaticBatching>, F::Query)>,
 ) {
     let gpu_array_buffer = gpu_array_buffer.into_inner();
 
@@ -80,7 +80,7 @@ pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBa
         *item.batch_range_mut() = buffer_index.index.get()..buffer_index.index.get() + 1;
         *item.dynamic_offset_mut() = buffer_index.dynamic_offset;
 
-        if no_batching.is_some() {
+        if no_batching {
             None
         } else {
             Some(BatchMeta {

From e76ed4677604a366cd12fb766de2a62dc0e9de1c Mon Sep 17 00:00:00 2001
From: Nicola Papale <nico@nicopap.ch>
Date: Tue, 19 Sep 2023 14:19:31 +0200
Subject: [PATCH 26/33] Use reduce over complex loop

---
 crates/bevy_pbr/src/render/mesh.rs         |  4 +-
 crates/bevy_render/src/batching/mod.rs     | 92 ++++++++--------------
 crates/bevy_render/src/render_phase/mod.rs | 29 ++-----
 crates/bevy_sprite/src/mesh2d/mesh.rs      |  6 +-
 4 files changed, 43 insertions(+), 88 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 55201d66e8733..91e95f481a536 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -16,7 +16,7 @@ use bevy_core_pipeline::{
 };
 use bevy_ecs::{
     prelude::*,
-    query::ROQueryItem,
+    query::{QueryItem, ROQueryItem},
     system::{lifetimeless::*, SystemParamItem, SystemState},
 };
 use bevy_math::{Affine3, Mat4, Vec2, Vec4};
@@ -649,7 +649,7 @@ impl GetBatchData for MeshPipeline {
     type BufferData = MeshUniform;
 
     fn get_batch_data(
-        (material_bind_group_id, mesh_handle, mesh_transforms): <Self::Query as bevy_ecs::query::WorldQuery>::Item<'_>,
+        (material_bind_group_id, mesh_handle, mesh_transforms): QueryItem<Self::Query>,
     ) -> (Self::CompareData, Self::BufferData) {
         (
             (material_bind_group_id.cloned(), mesh_handle.id()),
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 74c2c3fc42f74..396b19d703bd1 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -1,7 +1,7 @@
 use bevy_ecs::{
     component::Component,
     prelude::Res,
-    query::{Has, ReadOnlyWorldQuery, WorldQuery},
+    query::{Has, QueryItem, ReadOnlyWorldQuery},
     system::{Query, ResMut},
 };
 use nonmax::NonMaxU32;
@@ -29,25 +29,26 @@ pub struct NoAutomaticBatching;
 ///   result of the `prepare_and_batch_meshes` system, e.g. due to having to split
 ///   data across separate uniform bindings within the same buffer due to the
 ///   maximum uniform buffer binding size.
+#[derive(PartialEq)]
 struct BatchMeta<T: PartialEq> {
     /// The pipeline id encompasses all pipeline configuration including vertex
     /// buffers and layouts, shaders and their specializations, bind group
     /// layouts, etc.
-    pub pipeline_id: CachedRenderPipelineId,
+    pipeline_id: CachedRenderPipelineId,
     /// The draw function id defines the RenderCommands that are called to
     /// set the pipeline and bindings, and make the draw command
-    pub draw_function_id: DrawFunctionId,
-    pub dynamic_offset: Option<NonMaxU32>,
-    pub user_data: T,
+    draw_function_id: DrawFunctionId,
+    dynamic_offset: Option<NonMaxU32>,
+    user_data: T,
 }
-
-impl<T: PartialEq> PartialEq for BatchMeta<T> {
-    #[inline]
-    fn eq(&self, other: &BatchMeta<T>) -> bool {
-        self.pipeline_id == other.pipeline_id
-            && self.draw_function_id == other.draw_function_id
-            && self.dynamic_offset == other.dynamic_offset
-            && self.user_data == other.user_data
+impl<T: PartialEq> BatchMeta<T> {
+    fn new(item: &impl CachedRenderPipelinePhaseItem, user_data: T) -> Self {
+        BatchMeta {
+            pipeline_id: item.cached_pipeline(),
+            draw_function_id: item.draw_function(),
+            dynamic_offset: item.dynamic_offset(),
+            user_data,
+        }
     }
 }
 
@@ -55,9 +56,7 @@ pub trait GetBatchData {
     type Query: ReadOnlyWorldQuery;
     type CompareData: PartialEq;
     type BufferData: GpuArrayBufferable + Sync + Send + 'static;
-    fn get_batch_data(
-        batch_data: <Self::Query as WorldQuery>::Item<'_>,
-    ) -> (Self::CompareData, Self::BufferData);
+    fn get_batch_data(batch_data: QueryItem<Self::Query>) -> (Self::CompareData, Self::BufferData);
 }
 
 /// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
@@ -69,58 +68,31 @@ pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBa
 ) {
     let gpu_array_buffer = gpu_array_buffer.into_inner();
 
-    let mut process_item = |item: &mut I| -> Option<BatchMeta<F::CompareData>> {
-        let Ok((no_batching, batch_data)) = query.get(item.entity()) else {
-            return None;
-        };
-
-        let (user_data, buffer_data) = F::get_batch_data(batch_data);
+    let mut process_item = |item: &mut I| {
+        let (no_auto_batching, batch_query_item) = query.get(item.entity()).ok()?;
+        let (user_data, buffer_data) = F::get_batch_data(batch_query_item);
 
         let buffer_index = gpu_array_buffer.push(buffer_data);
-        *item.batch_range_mut() = buffer_index.index.get()..buffer_index.index.get() + 1;
+        let index = buffer_index.index.get();
+        *item.batch_range_mut() = index..index + 1;
         *item.dynamic_offset_mut() = buffer_index.dynamic_offset;
 
-        if no_batching {
-            None
-        } else {
-            Some(BatchMeta {
-                pipeline_id: item.cached_pipeline(),
-                draw_function_id: item.draw_function(),
-                dynamic_offset: buffer_index.dynamic_offset,
-                user_data,
-            })
-        }
+        (!no_auto_batching).then(|| BatchMeta::new(item, user_data))
     };
 
     for mut phase in &mut views {
-        let mut items = phase.items.iter_mut().peekable();
-        let mut batch_start_item = None;
-        let mut next_batch = items.peek_mut().and_then(|i| process_item(i));
-        while let Some(item) = items.next() {
-            // Get the current batch meta and update the next batch meta
-            let Some(batch_meta) = std::mem::replace(
-                &mut next_batch,
-                items.peek_mut().and_then(|i| process_item(i)),
-            ) else {
-                // If the current phase item doesn't match the query or has NoAutomaticBatching,
-                // we don't modify it any further
-                continue;
-            };
-
-            let batch_end_item = item.batch_range().end;
-
-            // If we are beginning a new batch, record the start item
-            if batch_start_item.is_none() {
-                batch_start_item = Some(item);
-            }
-
-            if Some(&batch_meta) != next_batch.as_ref() {
-                // The next item doesn't match the current batch (or doesn't exist).
-                // Update the first phase item to render the full batch.
-                let batch_start_item = batch_start_item.take().unwrap();
-                batch_start_item.batch_range_mut().end = batch_end_item;
+        let items = phase.items.iter_mut().map(|i| {
+            let batch_data = process_item(i);
+            (i.batch_range_mut(), batch_data)
+        });
+        items.reduce(|(mut start_range, old_batch_meta), (range, batch_meta)| {
+            if old_batch_meta == batch_meta && batch_meta.is_some() {
+                start_range.end = range.end;
+            } else {
+                start_range = range;
             }
-        }
+            (start_range, batch_meta)
+        });
     }
 }
 
diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs
index dec5ddf77621d..bf04d2d13f438 100644
--- a/crates/bevy_render/src/render_phase/mod.rs
+++ b/crates/bevy_render/src/render_phase/mod.rs
@@ -39,7 +39,7 @@ use bevy_ecs::{
     prelude::*,
     system::{lifetimeless::SRes, SystemParamItem},
 };
-use std::ops::Range;
+use std::{ops::Range, slice::SliceIndex};
 
 /// A collection of all rendering instructions, that will be executed by the GPU, for a
 /// single render phase for a single view.
@@ -87,22 +87,7 @@ impl<I: PhaseItem> RenderPhase<I> {
         world: &'w World,
         view: Entity,
     ) {
-        let draw_functions = world.resource::<DrawFunctions<I>>();
-        let mut draw_functions = draw_functions.write();
-        draw_functions.prepare(world);
-
-        let mut index = 0;
-        while index < self.items.len() {
-            let item = &self.items[index];
-            let batch_range = item.batch_range();
-            if batch_range.is_empty() {
-                index += 1;
-            } else {
-                let draw_function = draw_functions.get_mut(item.draw_function()).unwrap();
-                draw_function.draw(world, render_pass, view, item);
-                index += batch_range.len();
-            }
-        }
+        self.render_range(render_pass, world, view, ..);
     }
 
     /// Renders all [`PhaseItem`]s in the provided `range` (based on their index in `self.items`) using their corresponding draw functions.
@@ -111,17 +96,17 @@ impl<I: PhaseItem> RenderPhase<I> {
         render_pass: &mut TrackedRenderPass<'w>,
         world: &'w World,
         view: Entity,
-        range: Range<usize>,
+        range: impl SliceIndex<[I], Output = [I]>,
     ) {
-        let draw_functions = world.resource::<DrawFunctions<I>>();
-        let mut draw_functions = draw_functions.write();
-        draw_functions.prepare(world);
-
         let items = self
             .items
             .get(range)
             .expect("`Range` provided to `render_range()` is out of bounds");
 
+        let draw_functions = world.resource::<DrawFunctions<I>>();
+        let mut draw_functions = draw_functions.write();
+        draw_functions.prepare(world);
+
         let mut index = 0;
         while index < items.len() {
             let item = &items[index];
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 6228c69971e09..8ed10896b5980 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -4,7 +4,7 @@ use bevy_asset::{load_internal_asset, AssetId, Handle};
 use bevy_core_pipeline::core_2d::Transparent2d;
 use bevy_ecs::{
     prelude::*,
-    query::{ROQueryItem, WorldQuery},
+    query::{QueryItem, ROQueryItem},
     system::{lifetimeless::*, SystemParamItem, SystemState},
 };
 use bevy_math::{Affine3, Vec2, Vec4};
@@ -334,9 +334,7 @@ impl GetBatchData for Mesh2dPipeline {
     type BufferData = Mesh2dUniform;
 
     fn get_batch_data(
-        (material_bind_group_id, mesh_handle, mesh_transforms): <Self::Query as WorldQuery>::Item<
-            '_,
-        >,
+        (material_bind_group_id, mesh_handle, mesh_transforms): QueryItem<Self::Query>,
     ) -> (Self::CompareData, Self::BufferData) {
         (
             (material_bind_group_id.cloned(), mesh_handle.0.id()),

From c177c6eed11a3e58466401117bdcf36dad5b4396 Mon Sep 17 00:00:00 2001
From: Nicola Papale <nico@nicopap.ch>
Date: Tue, 19 Sep 2023 14:46:50 +0200
Subject: [PATCH 27/33] In reduce, do not change start_range value

---
 crates/bevy_render/src/batching/mod.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 396b19d703bd1..99b97c2254718 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -85,13 +85,13 @@ pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBa
             let batch_data = process_item(i);
             (i.batch_range_mut(), batch_data)
         });
-        items.reduce(|(mut start_range, old_batch_meta), (range, batch_meta)| {
-            if old_batch_meta == batch_meta && batch_meta.is_some() {
+        items.reduce(|(start_range, old_batch_meta), (range, batch_meta)| {
+            if batch_meta.is_some() && old_batch_meta == batch_meta {
                 start_range.end = range.end;
+                (start_range, old_batch_meta)
             } else {
-                start_range = range;
+                (range, batch_meta)
             }
-            (start_range, batch_meta)
         });
     }
 }

From 0bddc59506ae6ea1de1e9e23e72def36102a393b Mon Sep 17 00:00:00 2001
From: Nicola Papale <nico@nicopap.ch>
Date: Tue, 19 Sep 2023 15:03:54 +0200
Subject: [PATCH 28/33] prev over old

---
 crates/bevy_render/src/batching/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 99b97c2254718..87fe8b2a02410 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -85,10 +85,10 @@ pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBa
             let batch_data = process_item(i);
             (i.batch_range_mut(), batch_data)
         });
-        items.reduce(|(start_range, old_batch_meta), (range, batch_meta)| {
-            if batch_meta.is_some() && old_batch_meta == batch_meta {
+        items.reduce(|(start_range, prev_batch_meta), (range, batch_meta)| {
+            if batch_meta.is_some() && prev_batch_meta == batch_meta {
                 start_range.end = range.end;
-                (start_range, old_batch_meta)
+                (start_range, prev_batch_meta)
             } else {
                 (range, batch_meta)
             }

From 7e08cfb877add24f2a8b4e71be10528b7d75f15e Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Tue, 19 Sep 2023 00:09:43 +0200
Subject: [PATCH 29/33] Split get_batch_data into two functions and document
 GetBatchData

---
 crates/bevy_pbr/src/render/mesh.rs     | 15 +++++-----
 crates/bevy_render/src/batching/mod.rs | 40 ++++++++++++++++++--------
 crates/bevy_sprite/src/mesh2d/mesh.rs  | 15 +++++-----
 3 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 91e95f481a536..1121406f5eeaa 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -648,13 +648,14 @@ impl GetBatchData for MeshPipeline {
     type CompareData = (Option<MaterialBindGroupId>, AssetId<Mesh>);
     type BufferData = MeshUniform;
 
-    fn get_batch_data(
-        (material_bind_group_id, mesh_handle, mesh_transforms): QueryItem<Self::Query>,
-    ) -> (Self::CompareData, Self::BufferData) {
-        (
-            (material_bind_group_id.cloned(), mesh_handle.id()),
-            mesh_transforms.into(),
-        )
+    fn get_buffer_data(&(.., mesh_transforms): &QueryItem<Self::Query>) -> Self::BufferData {
+        mesh_transforms.into()
+    }
+
+    fn get_compare_data(
+        &(material_bind_group_id, mesh_handle, ..): &QueryItem<Self::Query>,
+    ) -> Self::CompareData {
+        (material_bind_group_id.cloned(), mesh_handle.id())
     }
 }
 
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index 87fe8b2a02410..d1ae72e78a5fb 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -23,12 +23,12 @@ pub struct NoAutomaticBatching;
 ///   queued to phases
 /// - View bindings are constant across a phase for a given draw function as
 ///   phases are per-view
-/// - `prepare_and_batch_meshes` is the only system that performs this batching
-///   and has sole responsibility for preparing the per-object data. As such
-///   the mesh binding and dynamic offsets are assumed to only be variable as a
-///   result of the `prepare_and_batch_meshes` system, e.g. due to having to split
-///   data across separate uniform bindings within the same buffer due to the
-///   maximum uniform buffer binding size.
+/// - `batch_and_prepare_render_phase` is the only system that performs this
+///   batching and has sole responsibility for preparing the per-object data.
+///   As such the mesh binding and dynamic offsets are assumed to only be
+///   variable as a result of the `batch_and_prepare_render_phase` system, e.g.
+///   due to having to split data across separate uniform bindings within the
+///   same buffer due to the maximum uniform buffer binding size.
 #[derive(PartialEq)]
 struct BatchMeta<T: PartialEq> {
     /// The pipeline id encompasses all pipeline configuration including vertex
@@ -41,6 +41,7 @@ struct BatchMeta<T: PartialEq> {
     dynamic_offset: Option<NonMaxU32>,
     user_data: T,
 }
+
 impl<T: PartialEq> BatchMeta<T> {
     fn new(item: &impl CachedRenderPipelinePhaseItem, user_data: T) -> Self {
         BatchMeta {
@@ -52,11 +53,22 @@ impl<T: PartialEq> BatchMeta<T> {
     }
 }
 
+/// A trait to support getting data used for batching draw commands via phase
+/// items.
 pub trait GetBatchData {
     type Query: ReadOnlyWorldQuery;
+    /// Data used for comparison between phase items. If the pipeline id, draw
+    /// function id, per-instance data buffer dynamic offset and this data
+    /// matches, the draws can be batched.
     type CompareData: PartialEq;
+    /// The per-instance data to be inserted into the [`GpuArrayBuffer`]
+    /// containing these data for all instances.
     type BufferData: GpuArrayBufferable + Sync + Send + 'static;
-    fn get_batch_data(batch_data: QueryItem<Self::Query>) -> (Self::CompareData, Self::BufferData);
+    /// Get the per-instance data to be inserted into the [`GpuArrayBuffer`].
+    fn get_buffer_data(query_item: &QueryItem<Self::Query>) -> Self::BufferData;
+    /// Get the data used for comparison when deciding whether draws can be
+    /// batched.
+    fn get_compare_data(query_item: &QueryItem<Self::Query>) -> Self::CompareData;
 }
 
 /// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
@@ -70,20 +82,24 @@ pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBa
 
     let mut process_item = |item: &mut I| {
         let (no_auto_batching, batch_query_item) = query.get(item.entity()).ok()?;
-        let (user_data, buffer_data) = F::get_batch_data(batch_query_item);
 
+        let buffer_data = F::get_buffer_data(&batch_query_item);
         let buffer_index = gpu_array_buffer.push(buffer_data);
+
         let index = buffer_index.index.get();
         *item.batch_range_mut() = index..index + 1;
         *item.dynamic_offset_mut() = buffer_index.dynamic_offset;
 
-        (!no_auto_batching).then(|| BatchMeta::new(item, user_data))
+        (!no_auto_batching).then(|| {
+            let compare_data = F::get_compare_data(&batch_query_item);
+            BatchMeta::new(item, compare_data)
+        })
     };
 
     for mut phase in &mut views {
-        let items = phase.items.iter_mut().map(|i| {
-            let batch_data = process_item(i);
-            (i.batch_range_mut(), batch_data)
+        let items = phase.items.iter_mut().map(|item| {
+            let batch_data = process_item(item);
+            (item.batch_range_mut(), batch_data)
         });
         items.reduce(|(start_range, prev_batch_meta), (range, batch_meta)| {
             if batch_meta.is_some() && prev_batch_meta == batch_meta {
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 8ed10896b5980..c3c1fdca3bf5b 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -333,13 +333,14 @@ impl GetBatchData for Mesh2dPipeline {
     type CompareData = (Option<Material2dBindGroupId>, AssetId<Mesh>);
     type BufferData = Mesh2dUniform;
 
-    fn get_batch_data(
-        (material_bind_group_id, mesh_handle, mesh_transforms): QueryItem<Self::Query>,
-    ) -> (Self::CompareData, Self::BufferData) {
-        (
-            (material_bind_group_id.cloned(), mesh_handle.0.id()),
-            mesh_transforms.into(),
-        )
+    fn get_buffer_data(&(.., mesh_transforms): &QueryItem<Self::Query>) -> Self::BufferData {
+        mesh_transforms.into()
+    }
+
+    fn get_compare_data(
+        &(material_bind_group_id, mesh_handle, ..): &QueryItem<Self::Query>,
+    ) -> Self::CompareData {
+        (material_bind_group_id.cloned(), mesh_handle.0.id())
     }
 }
 

From 8fccad4cb3fb1c364133f6aeee9f70988609419b Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Thu, 21 Sep 2023 20:40:52 +0200
Subject: [PATCH 30/33] Correct prepare_mesh2d_uniforms comment reference

---
 crates/bevy_sprite/src/mesh2d/material.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs
index 4907579093609..3581ef787e2a2 100644
--- a/crates/bevy_sprite/src/mesh2d/material.rs
+++ b/crates/bevy_sprite/src/mesh2d/material.rs
@@ -447,7 +447,7 @@ pub fn queue_material2d_meshes<M: Material2d>(
                 // -z in front of the camera, the largest distance is -far with values increasing toward the
                 // camera. As such we can just use mesh_z as the distance
                 sort_key: FloatOrd(mesh_z),
-                // Batching is done in prepare_mesh2d_uniforms
+                // Batching is done in batch_and_prepare_render_phase
                 batch_range: 0..1,
                 dynamic_offset: None,
             });

From 9ab2a95c5ee87f386e537c447a4998c1590c06fc Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Thu, 21 Sep 2023 23:26:05 +0200
Subject: [PATCH 31/33] Move nonmax to bevy_utils and reexport

---
 crates/bevy_core_pipeline/Cargo.toml                         | 1 -
 crates/bevy_core_pipeline/src/core_2d/mod.rs                 | 3 +--
 crates/bevy_core_pipeline/src/core_3d/mod.rs                 | 3 +--
 crates/bevy_core_pipeline/src/prepass/mod.rs                 | 3 +--
 crates/bevy_pbr/Cargo.toml                                   | 1 -
 crates/bevy_pbr/src/render/light.rs                          | 2 +-
 crates/bevy_render/Cargo.toml                                | 1 -
 crates/bevy_render/src/batching/mod.rs                       | 2 +-
 crates/bevy_render/src/render_phase/mod.rs                   | 2 +-
 .../src/render_resource/batched_uniform_buffer.rs            | 2 +-
 crates/bevy_render/src/render_resource/gpu_array_buffer.rs   | 2 +-
 crates/bevy_ui/Cargo.toml                                    | 1 -
 crates/bevy_ui/src/render/render_pass.rs                     | 3 +--
 crates/bevy_utils/Cargo.toml                                 | 1 +
 crates/bevy_utils/src/lib.rs                                 | 5 +++++
 15 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/crates/bevy_core_pipeline/Cargo.toml b/crates/bevy_core_pipeline/Cargo.toml
index 128fa346ba0d2..27825880d3473 100644
--- a/crates/bevy_core_pipeline/Cargo.toml
+++ b/crates/bevy_core_pipeline/Cargo.toml
@@ -33,4 +33,3 @@ bevy_utils = { path = "../bevy_utils", version = "0.12.0-dev" }
 serde = { version = "1", features = ["derive"] }
 bitflags = "2.3"
 radsort = "0.1"
-nonmax = "0.5"
diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs
index d9aec69aab3da..530d48cde38a5 100644
--- a/crates/bevy_core_pipeline/src/core_2d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs
@@ -37,8 +37,7 @@ use bevy_render::{
     render_resource::CachedRenderPipelineId,
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
-use bevy_utils::FloatOrd;
-use nonmax::NonMaxU32;
+use bevy_utils::{nonmax::NonMaxU32, FloatOrd};
 
 use crate::{tonemapping::TonemappingNode, upscaling::UpscalingNode};
 
diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs
index e3e58a6055e0d..e30b20b8d5e49 100644
--- a/crates/bevy_core_pipeline/src/core_3d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs
@@ -50,8 +50,7 @@ use bevy_render::{
     view::ViewDepthTexture,
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
-use bevy_utils::{FloatOrd, HashMap};
-use nonmax::NonMaxU32;
+use bevy_utils::{nonmax::NonMaxU32, FloatOrd, HashMap};
 
 use crate::{
     prepass::{
diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs
index 7e484547fbd43..f408a168e7c7c 100644
--- a/crates/bevy_core_pipeline/src/prepass/mod.rs
+++ b/crates/bevy_core_pipeline/src/prepass/mod.rs
@@ -36,8 +36,7 @@ use bevy_render::{
     render_resource::{CachedRenderPipelineId, Extent3d, TextureFormat},
     texture::CachedTexture,
 };
-use bevy_utils::FloatOrd;
-use nonmax::NonMaxU32;
+use bevy_utils::{nonmax::NonMaxU32, FloatOrd};
 
 pub const DEPTH_PREPASS_FORMAT: TextureFormat = TextureFormat::Depth32Float;
 pub const NORMAL_PREPASS_FORMAT: TextureFormat = TextureFormat::Rgb10a2Unorm;
diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml
index 427a90eb4dfee..5ff50b66d6644 100644
--- a/crates/bevy_pbr/Cargo.toml
+++ b/crates/bevy_pbr/Cargo.toml
@@ -33,4 +33,3 @@ bytemuck = { version = "1", features = ["derive"] }
 naga_oil = "0.8"
 radsort = "0.1"
 smallvec = "1.6"
-nonmax = "0.5"
diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs
index ee089e9a13d4c..bad686b92f2f9 100644
--- a/crates/bevy_pbr/src/render/light.rs
+++ b/crates/bevy_pbr/src/render/light.rs
@@ -27,10 +27,10 @@ use bevy_render::{
 };
 use bevy_transform::{components::GlobalTransform, prelude::Transform};
 use bevy_utils::{
+    nonmax::NonMaxU32,
     tracing::{error, warn},
     HashMap,
 };
-use nonmax::NonMaxU32;
 use std::{hash::Hash, num::NonZeroU64, ops::Range};
 
 #[derive(Component)]
diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml
index 9d4c767b12fdf..32573be7b9a11 100644
--- a/crates/bevy_render/Cargo.toml
+++ b/crates/bevy_render/Cargo.toml
@@ -82,7 +82,6 @@ encase = { version = "0.6.1", features = ["glam"] }
 # For wgpu profiling using tracing. Use `RUST_LOG=info` to also capture the wgpu spans.
 profiling = { version = "1", features = ["profile-with-tracing"], optional = true }
 async-channel = "1.8"
-nonmax = "0.5"
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 js-sys = "0.3"
diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs
index d1ae72e78a5fb..715402b2b4b16 100644
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@@ -4,7 +4,7 @@ use bevy_ecs::{
     query::{Has, QueryItem, ReadOnlyWorldQuery},
     system::{Query, ResMut},
 };
-use nonmax::NonMaxU32;
+use bevy_utils::nonmax::NonMaxU32;
 
 use crate::{
     render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, RenderPhase},
diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs
index bf04d2d13f438..6230d2e1d9cfa 100644
--- a/crates/bevy_render/src/render_phase/mod.rs
+++ b/crates/bevy_render/src/render_phase/mod.rs
@@ -29,9 +29,9 @@ mod draw;
 mod draw_state;
 mod rangefinder;
 
+use bevy_utils::nonmax::NonMaxU32;
 pub use draw::*;
 pub use draw_state::*;
-use nonmax::NonMaxU32;
 pub use rangefinder::*;
 
 use crate::render_resource::{CachedRenderPipelineId, PipelineCache};
diff --git a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
index 983f241b44f63..08c29a8664856 100644
--- a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
+++ b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
@@ -3,11 +3,11 @@ use crate::{
     render_resource::DynamicUniformBuffer,
     renderer::{RenderDevice, RenderQueue},
 };
+use bevy_utils::nonmax::NonMaxU32;
 use encase::{
     private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer},
     ShaderType,
 };
-use nonmax::NonMaxU32;
 use std::{marker::PhantomData, num::NonZeroU64};
 use wgpu::{BindingResource, Limits};
 
diff --git a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
index c004b9beeab84..13694439ba5dc 100644
--- a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
+++ b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs
@@ -4,8 +4,8 @@ use crate::{
     renderer::{RenderDevice, RenderQueue},
 };
 use bevy_ecs::{prelude::Component, system::Resource};
+use bevy_utils::nonmax::NonMaxU32;
 use encase::{private::WriteInto, ShaderSize, ShaderType};
-use nonmax::NonMaxU32;
 use std::{marker::PhantomData, mem};
 use wgpu::{BindGroupLayoutEntry, BindingResource, BindingType, BufferBindingType, ShaderStages};
 
diff --git a/crates/bevy_ui/Cargo.toml b/crates/bevy_ui/Cargo.toml
index 9e0c3fd6c7a84..60e8d74477ac5 100644
--- a/crates/bevy_ui/Cargo.toml
+++ b/crates/bevy_ui/Cargo.toml
@@ -36,4 +36,3 @@ serde = { version = "1", features = ["derive"] }
 smallvec = { version = "1.6", features = ["union", "const_generics"] }
 bytemuck = { version = "1.5", features = ["derive"] }
 thiserror = "1.0.0"
-nonmax = "0.5"
diff --git a/crates/bevy_ui/src/render/render_pass.rs b/crates/bevy_ui/src/render/render_pass.rs
index c5b4f1a13d091..f483c8cf0bd90 100644
--- a/crates/bevy_ui/src/render/render_pass.rs
+++ b/crates/bevy_ui/src/render/render_pass.rs
@@ -13,8 +13,7 @@ use bevy_render::{
     renderer::*,
     view::*,
 };
-use bevy_utils::FloatOrd;
-use nonmax::NonMaxU32;
+use bevy_utils::{nonmax::NonMaxU32, FloatOrd};
 
 pub struct UiPassNode {
     ui_view_query: QueryState<
diff --git a/crates/bevy_utils/Cargo.toml b/crates/bevy_utils/Cargo.toml
index ba887e8057220..20a4cb32bb99c 100644
--- a/crates/bevy_utils/Cargo.toml
+++ b/crates/bevy_utils/Cargo.toml
@@ -20,6 +20,7 @@ hashbrown = { version = "0.14", features = ["serde"] }
 bevy_utils_proc_macros = {version = "0.12.0-dev", path = "macros"}
 petgraph = "0.6"
 thiserror = "1.0"
+nonmax = "0.5"
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 getrandom = {version = "0.2.0", features = ["js"]}
diff --git a/crates/bevy_utils/src/lib.rs b/crates/bevy_utils/src/lib.rs
index 7916caf769476..52f33f31d7dc6 100644
--- a/crates/bevy_utils/src/lib.rs
+++ b/crates/bevy_utils/src/lib.rs
@@ -34,6 +34,11 @@ pub use thiserror;
 pub use tracing;
 pub use uuid::Uuid;
 
+#[allow(missing_docs)]
+pub mod nonmax {
+    pub use nonmax::*;
+}
+
 use hashbrown::hash_map::RawEntryMut;
 use std::{
     fmt::Debug,

From 9334cc7f59e68d1f1808b0b9031475cb24327417 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Thu, 21 Sep 2023 23:26:52 +0200
Subject: [PATCH 32/33] Remove unnecessary whitespace

---
 crates/bevy_pbr/src/render/mesh_types.wgsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl
index eb5096e564bd9..7412de7a8a5f7 100644
--- a/crates/bevy_pbr/src/render/mesh_types.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_types.wgsl
@@ -28,6 +28,6 @@ struct MorphWeights {
 };
 #endif
 
-const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32            = 1u;
+const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32 = 1u;
 // 2^31 - if the flag is set, the sign is positive, else it is negative
 const MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT: u32 = 2147483648u;

From 69ff8afa532d57ba7805f2ddde55825946f75426 Mon Sep 17 00:00:00 2001
From: Robert Swain <robert.swain@gmail.com>
Date: Thu, 21 Sep 2023 23:45:55 +0200
Subject: [PATCH 33/33] Make Material*BindGroupId Copy and used copied() in hot
 code

---
 crates/bevy_pbr/src/material.rs           | 2 +-
 crates/bevy_pbr/src/render/mesh.rs        | 2 +-
 crates/bevy_sprite/src/mesh2d/material.rs | 2 +-
 crates/bevy_sprite/src/mesh2d/mesh.rs     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index 21fe61094a231..d9c835abcaffa 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -591,7 +591,7 @@ pub struct PreparedMaterial<T: Material> {
     pub properties: MaterialProperties,
 }
 
-#[derive(Component, Clone, Default, PartialEq, Eq, Deref, DerefMut)]
+#[derive(Component, Clone, Copy, Default, PartialEq, Eq, Deref, DerefMut)]
 pub struct MaterialBindGroupId(Option<BindGroupId>);
 
 impl<T: Material> PreparedMaterial<T> {
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 34b271d798071..995c8bfa59f2c 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -653,7 +653,7 @@ impl GetBatchData for MeshPipeline {
     fn get_compare_data(
         &(material_bind_group_id, mesh_handle, ..): &QueryItem<Self::Query>,
     ) -> Self::CompareData {
-        (material_bind_group_id.cloned(), mesh_handle.id())
+        (material_bind_group_id.copied(), mesh_handle.id())
     }
 }
 
diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs
index 3581ef787e2a2..4b496c7242ac4 100644
--- a/crates/bevy_sprite/src/mesh2d/material.rs
+++ b/crates/bevy_sprite/src/mesh2d/material.rs
@@ -455,7 +455,7 @@ pub fn queue_material2d_meshes<M: Material2d>(
     }
 }
 
-#[derive(Component, Clone, Default, PartialEq, Eq, Deref, DerefMut)]
+#[derive(Component, Clone, Copy, Default, PartialEq, Eq, Deref, DerefMut)]
 pub struct Material2dBindGroupId(Option<BindGroupId>);
 
 /// Data prepared for a [`Material2d`] instance.
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index c3c1fdca3bf5b..2717acd394d4e 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -340,7 +340,7 @@ impl GetBatchData for Mesh2dPipeline {
     fn get_compare_data(
         &(material_bind_group_id, mesh_handle, ..): &QueryItem<Self::Query>,
     ) -> Self::CompareData {
-        (material_bind_group_id.cloned(), mesh_handle.0.id())
+        (material_bind_group_id.copied(), mesh_handle.0.id())
     }
 }