Make indirect drawing opt-out instead of opt-in, enabling multidraw b…

…y default. (bevyengine#16757) This patch replaces the undocumented `NoGpuCulling` component with a new component, `NoIndirectDrawing`, effectively turning indirect drawing on by default. Indirect mode is needed for the recently-landed multidraw feature (bevyengine#16427). Since multidraw is such a win for performance, when that feature is supported the small performance tax that indirect mode incurs is virtually always worth paying. To ensure that custom drawing code such as that in the `custom_shader_instancing` example continues to function, this commit additionally makes GPU culling take the `NoFrustumCulling` component into account. This PR is an alternative to bevyengine#16670 that doesn't break the `custom_shader_instancing` example. **PR bevyengine#16755 should land first in order to avoid breaking deferred rendering, as multidraw currently breaks it**. ## Migration Guide * Indirect drawing (GPU culling) is now enabled by default, so the `GpuCulling` component is no longer available. To disable indirect mode, which may be useful with custom render nodes, add the new `NoIndirectDrawing` component to your camera.
VitalyAnkh · Dec 13, 2024 · 00722b8 · 00722b8
1 parent 116c2b0
commit 00722b8
Show file tree

Hide file tree

Showing 11 changed files with 109 additions and 69 deletions.
diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs
@@ -69,7 +69,7 @@ use bevy_render::{
     batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
     mesh::allocator::SlabId,
     render_phase::PhaseItemBinKey,
-    view::GpuCulling,
+    view::NoIndirectDrawing,
 };
 pub use camera_3d::*;
 pub use main_opaque_pass_3d_node::*;
@@ -569,20 +569,20 @@ pub fn extract_core_3d_camera_phases(
     mut alpha_mask_3d_phases: ResMut<ViewBinnedRenderPhases<AlphaMask3d>>,
     mut transmissive_3d_phases: ResMut<ViewSortedRenderPhases<Transmissive3d>>,
     mut transparent_3d_phases: ResMut<ViewSortedRenderPhases<Transparent3d>>,
-    cameras_3d: Extract<Query<(RenderEntity, &Camera, Has<GpuCulling>), With<Camera3d>>>,
+    cameras_3d: Extract<Query<(RenderEntity, &Camera, Has<NoIndirectDrawing>), With<Camera3d>>>,
     mut live_entities: Local<EntityHashSet>,
     gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
 ) {
     live_entities.clear();
 
-    for (entity, camera, has_gpu_culling) in &cameras_3d {
+    for (entity, camera, no_indirect_drawing) in &cameras_3d {
         if !camera.is_active {
             continue;
         }
 
         // If GPU culling is in use, use it (and indirect mode); otherwise, just
         // preprocess the meshes.
-        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if has_gpu_culling {
+        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
             GpuPreprocessingMode::Culling
         } else {
             GpuPreprocessingMode::PreprocessingOnly
@@ -616,7 +616,7 @@ pub fn extract_camera_prepass_phase(
             (
                 RenderEntity,
                 &Camera,
-                Has<GpuCulling>,
+                Has<NoIndirectDrawing>,
                 Has<DepthPrepass>,
                 Has<NormalPrepass>,
                 Has<MotionVectorPrepass>,
@@ -633,7 +633,7 @@ pub fn extract_camera_prepass_phase(
     for (
         entity,
         camera,
-        gpu_culling,
+        no_indirect_drawing,
         depth_prepass,
         normal_prepass,
         motion_vector_prepass,
@@ -646,7 +646,7 @@ pub fn extract_camera_prepass_phase(
 
         // If GPU culling is in use, use it (and indirect mode); otherwise, just
         // preprocess the meshes.
-        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if gpu_culling {
+        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
             GpuPreprocessingMode::Culling
         } else {
             GpuPreprocessingMode::PreprocessingOnly

diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs
@@ -33,7 +33,7 @@ use bevy_render::{
         SpecializedComputePipeline, SpecializedComputePipelines,
     },
     renderer::{RenderContext, RenderDevice, RenderQueue},
-    view::{GpuCulling, ViewUniform, ViewUniformOffset, ViewUniforms},
+    view::{NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms},
     Render, RenderApp, RenderSet,
 };
 use bevy_utils::tracing::warn;
@@ -70,7 +70,7 @@ pub struct GpuPreprocessNode {
             Entity,
             Read<PreprocessBindGroup>,
             Read<ViewUniformOffset>,
-            Has<GpuCulling>,
+            Has<NoIndirectDrawing>,
         ),
         Without<SkipGpuPreprocess>,
     >,
@@ -202,7 +202,7 @@ impl Node for GpuPreprocessNode {
                 });
 
         // Run the compute passes.
-        for (view, bind_group, view_uniform_offset, gpu_culling) in
+        for (view, bind_group, view_uniform_offset, no_indirect_drawing) in
             self.view_query.iter_manual(world)
         {
             // Grab the index buffer for this view.
@@ -213,7 +213,7 @@ impl Node for GpuPreprocessNode {
 
             // Select the right pipeline, depending on whether GPU culling is in
             // use.
-            let maybe_pipeline_id = if gpu_culling {
+            let maybe_pipeline_id = if !no_indirect_drawing {
                 preprocess_pipelines.gpu_culling.pipeline_id
             } else {
                 preprocess_pipelines.direct.pipeline_id
@@ -235,7 +235,7 @@ impl Node for GpuPreprocessNode {
             compute_pass.set_pipeline(preprocess_pipeline);
 
             let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
-            if gpu_culling {
+            if !no_indirect_drawing {
                 dynamic_offsets.push(view_uniform_offset.offset);
             }
             compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets);
@@ -422,7 +422,7 @@ pub fn prepare_preprocess_bind_groups(
         )
         .ok();
 
-        let bind_group = if index_buffer_vec.gpu_culling {
+        let bind_group = if !index_buffer_vec.no_indirect_drawing {
             let (
                 Some(indirect_parameters_buffer),
                 Some(mesh_culling_data_buffer),

diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs
@@ -15,7 +15,7 @@ use bevy_render::{
     batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
     camera::SortedCameras,
     mesh::allocator::MeshAllocator,
-    view::GpuCulling,
+    view::NoIndirectDrawing,
 };
 use bevy_render::{
     diagnostic::RecordDiagnostics,
@@ -687,7 +687,7 @@ pub fn prepare_lights(
             &ExtractedView,
             &ExtractedClusterConfig,
             Option<&RenderLayers>,
-            Has<GpuCulling>,
+            Has<NoIndirectDrawing>,
         ),
         With<Camera3d>,
     >,
@@ -1096,15 +1096,15 @@ pub fn prepare_lights(
     let mut live_views = EntityHashSet::with_capacity_and_hasher(views_count, EntityHash);
 
     // set up light data for each view
-    for (entity, extracted_view, clusters, maybe_layers, has_gpu_culling) in sorted_cameras
+    for (entity, extracted_view, clusters, maybe_layers, no_indirect_drawing) in sorted_cameras
         .0
         .iter()
         .filter_map(|sorted_camera| views.get(sorted_camera.entity).ok())
     {
         live_views.insert(entity);
         let mut view_lights = Vec::new();
 
-        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if has_gpu_culling {
+        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
             GpuPreprocessingMode::Culling
         } else {
             GpuPreprocessingMode::PreprocessingOnly
@@ -1237,8 +1237,8 @@ pub fn prepare_lights(
                     },
                 ));
 
-                if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
-                    commands.entity(view_light_entity).insert(GpuCulling);
+                if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
+                    commands.entity(view_light_entity).insert(NoIndirectDrawing);
                 }
 
                 view_lights.push(view_light_entity);
@@ -1329,8 +1329,8 @@ pub fn prepare_lights(
                 LightEntity::Spot { light_entity },
             ));
 
-            if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
-                commands.entity(view_light_entity).insert(GpuCulling);
+            if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
+                commands.entity(view_light_entity).insert(NoIndirectDrawing);
             }
 
             view_lights.push(view_light_entity);
@@ -1464,8 +1464,8 @@ pub fn prepare_lights(
                     },
                 ));
 
-                if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
-                    commands.entity(view_light_entity).insert(GpuCulling);
+                if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
+                    commands.entity(view_light_entity).insert(NoIndirectDrawing);
                 }
 
                 view_lights.push(view_light_entity);

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
@@ -37,8 +37,8 @@ use bevy_render::{
     renderer::{RenderDevice, RenderQueue},
     texture::DefaultImageSampler,
     view::{
-        prepare_view_targets, GpuCulling, RenderVisibilityRanges, ViewTarget, ViewUniformOffset,
-        ViewVisibility, VisibilityRange,
+        prepare_view_targets, NoFrustumCulling, NoIndirectDrawing, RenderVisibilityRanges,
+        ViewTarget, ViewUniformOffset, ViewVisibility, VisibilityRange,
     },
     Extract,
 };
@@ -421,6 +421,11 @@ bitflags::bitflags! {
         ///
         /// This will be `u16::MAX` if this mesh has no LOD.
         const LOD_INDEX_MASK              = (1 << 16) - 1;
+        /// Disables frustum culling for this mesh.
+        ///
+        /// This corresponds to the
+        /// [`bevy_render::view::visibility::NoFrustumCulling`] component.
+        const NO_FRUSTUM_CULLING          = 1 << 28;
         const SHADOW_RECEIVER             = 1 << 29;
         const TRANSMITTED_SHADOW_RECEIVER = 1 << 30;
         // Indicates the sign of the determinant of the 3x3 model matrix. If the sign is positive,
@@ -435,6 +440,7 @@ impl MeshFlags {
     fn from_components(
         transform: &GlobalTransform,
         lod_index: Option<NonMaxU16>,
+        no_frustum_culling: bool,
         not_shadow_receiver: bool,
         transmitted_receiver: bool,
     ) -> MeshFlags {
@@ -443,6 +449,9 @@ impl MeshFlags {
         } else {
             MeshFlags::SHADOW_RECEIVER
         };
+        if no_frustum_culling {
+            mesh_flags |= MeshFlags::NO_FRUSTUM_CULLING;
+        }
         if transmitted_receiver {
             mesh_flags |= MeshFlags::TRANSMITTED_SHADOW_RECEIVER;
         }
@@ -1046,6 +1055,7 @@ pub fn extract_meshes_for_cpu_building(
             &GlobalTransform,
             Option<&PreviousGlobalTransform>,
             &Mesh3d,
+            Has<NoFrustumCulling>,
             Has<NotShadowReceiver>,
             Has<TransmittedShadowReceiver>,
             Has<NotShadowCaster>,
@@ -1063,6 +1073,7 @@ pub fn extract_meshes_for_cpu_building(
             transform,
             previous_transform,
             mesh,
+            no_frustum_culling,
             not_shadow_receiver,
             transmitted_receiver,
             not_shadow_caster,
@@ -1084,6 +1095,7 @@ pub fn extract_meshes_for_cpu_building(
             let mesh_flags = MeshFlags::from_components(
                 transform,
                 lod_index,
+                no_frustum_culling,
                 not_shadow_receiver,
                 transmitted_receiver,
             );
@@ -1155,6 +1167,7 @@ pub fn extract_meshes_for_gpu_building(
                 Option<&Lightmap>,
                 Option<&Aabb>,
                 &Mesh3d,
+                Has<NoFrustumCulling>,
                 Has<NotShadowReceiver>,
                 Has<TransmittedShadowReceiver>,
                 Has<NotShadowCaster>,
@@ -1168,6 +1181,7 @@ pub fn extract_meshes_for_gpu_building(
                 Changed<Lightmap>,
                 Changed<Aabb>,
                 Changed<Mesh3d>,
+                Changed<NoFrustumCulling>,
                 Changed<NotShadowReceiver>,
                 Changed<TransmittedShadowReceiver>,
                 Changed<NotShadowCaster>,
@@ -1179,7 +1193,7 @@ pub fn extract_meshes_for_gpu_building(
     mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
     mut removed_global_transforms_query: Extract<RemovedComponents<GlobalTransform>>,
     mut removed_meshes_query: Extract<RemovedComponents<Mesh3d>>,
-    cameras_query: Extract<Query<(), (With<Camera>, With<GpuCulling>)>>,
+    cameras_query: Extract<Query<(), (With<Camera>, Without<NoIndirectDrawing>)>>,
 ) {
     let any_gpu_culling = !cameras_query.is_empty();
     for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() {
@@ -1209,6 +1223,7 @@ pub fn extract_meshes_for_gpu_building(
             lightmap,
             aabb,
             mesh,
+            no_frustum_culling,
             not_shadow_receiver,
             transmitted_receiver,
             not_shadow_caster,
@@ -1231,6 +1246,7 @@ pub fn extract_meshes_for_gpu_building(
             let mesh_flags = MeshFlags::from_components(
                 transform,
                 lod_index,
+                no_frustum_culling,
                 not_shadow_receiver,
                 transmitted_receiver,
             );

diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
@@ -7,7 +7,7 @@
 // mesh's transform on the previous frame and writes it into the `MeshUniform`
 // so that TAA works.
 
-#import bevy_pbr::mesh_types::Mesh
+#import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT}
 #import bevy_render::maths
 #import bevy_render::view::View
 
@@ -145,13 +145,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
 
     // Cull if necessary.
 #ifdef FRUSTUM_CULLING
-    let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
-    let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;
-
-    // Do an OBB-based frustum cull.
-    let model_center = world_from_local * vec4(aabb_center, 1.0);
-    if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
-        return;
+    if ((current_input[input_index].flags & MESH_FLAGS_NO_FRUSTUM_CULLING_BIT) == 0u) {
+        let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
+        let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;
+
+        // Do an OBB-based frustum cull.
+        let model_center = world_from_local * vec4(aabb_center, 1.0);
+        if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
+            return;
+        }
     }
 #endif
 

diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl
@@ -37,6 +37,8 @@ struct MorphWeights {
 
 // [2^0, 2^16)
 const MESH_FLAGS_VISIBILITY_RANGE_INDEX_BITS: u32 = 65535u;
+// 2^28
+const MESH_FLAGS_NO_FRUSTUM_CULLING_BIT: u32 = 268435456u;
 // 2^29
 const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32 = 536870912u;
 // 2^30