Make indirect drawing opt-out instead of opt-in, enabling multidraw by default. (#16757)

This patch replaces the undocumented `NoGpuCulling` component with a new component, `NoIndirectDrawing`, effectively turning indirect drawing on by default. Indirect mode is needed for the recently-landed multidraw feature (#16427). Since multidraw is such a win for performance, when that feature is supported the small performance tax that indirect mode incurs is virtually always worth paying. To ensure that custom drawing code such as that in the `custom_shader_instancing` example continues to function, this commit additionally makes GPU culling take the `NoFrustumCulling` component into account. This PR is an alternative to #16670 that doesn't break the `custom_shader_instancing` example. **PR #16755 should land first in order to avoid breaking deferred rendering, as multidraw currently breaks it**. ## Migration Guide * Indirect drawing (GPU culling) is now enabled by default, so the `GpuCulling` component is no longer available. To disable indirect mode, which may be useful with custom render nodes, add the new `NoIndirectDrawing` component to your camera.
2024-12-12 22:16:57 -08:00 · 2024-12-12 22:16:57 -08:00 · 00722b8d0f
commit 00722b8d0f
parent 116c2b02fe
11 changed files with 108 additions and 68 deletions
--- a/crates/bevy_core_pipeline/src/core_3d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs
@ -69,7 +69,7 @@ use bevy_render::{
    batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
    mesh::allocator::SlabId,
    render_phase::PhaseItemBinKey,
-    view::GpuCulling,
+    view::NoIndirectDrawing,
 };
 pub use camera_3d::*;
 pub use main_opaque_pass_3d_node::*;
@ -569,20 +569,20 @@ pub fn extract_core_3d_camera_phases(
    mut alpha_mask_3d_phases: ResMut<ViewBinnedRenderPhases<AlphaMask3d>>,
    mut transmissive_3d_phases: ResMut<ViewSortedRenderPhases<Transmissive3d>>,
    mut transparent_3d_phases: ResMut<ViewSortedRenderPhases<Transparent3d>>,
-    cameras_3d: Extract<Query<(RenderEntity, &Camera, Has<GpuCulling>), With<Camera3d>>>,
+    cameras_3d: Extract<Query<(RenderEntity, &Camera, Has<NoIndirectDrawing>), With<Camera3d>>>,
    mut live_entities: Local<EntityHashSet>,
    gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
 ) {
    live_entities.clear();

-    for (entity, camera, has_gpu_culling) in &cameras_3d {
+    for (entity, camera, no_indirect_drawing) in &cameras_3d {
        if !camera.is_active {
            continue;
        }

        // If GPU culling is in use, use it (and indirect mode); otherwise, just
        // preprocess the meshes.
-        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if has_gpu_culling {
+        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
            GpuPreprocessingMode::Culling
        } else {
            GpuPreprocessingMode::PreprocessingOnly
@ -616,7 +616,7 @@ pub fn extract_camera_prepass_phase(
            (
                RenderEntity,
                &Camera,
-                Has<GpuCulling>,
+                Has<NoIndirectDrawing>,
                Has<DepthPrepass>,
                Has<NormalPrepass>,
                Has<MotionVectorPrepass>,
@ -633,7 +633,7 @@ pub fn extract_camera_prepass_phase(
    for (
        entity,
        camera,
-        gpu_culling,
+        no_indirect_drawing,
        depth_prepass,
        normal_prepass,
        motion_vector_prepass,
@ -646,7 +646,7 @@ pub fn extract_camera_prepass_phase(

        // If GPU culling is in use, use it (and indirect mode); otherwise, just
        // preprocess the meshes.
-        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if gpu_culling {
+        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
            GpuPreprocessingMode::Culling
        } else {
            GpuPreprocessingMode::PreprocessingOnly
--- a/crates/bevy_pbr/src/render/gpu_preprocess.rs
+++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs
@ -33,7 +33,7 @@ use bevy_render::{
        SpecializedComputePipeline, SpecializedComputePipelines,
    },
    renderer::{RenderContext, RenderDevice, RenderQueue},
-    view::{GpuCulling, ViewUniform, ViewUniformOffset, ViewUniforms},
+    view::{NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms},
    Render, RenderApp, RenderSet,
 };
 use bevy_utils::tracing::warn;
@ -70,7 +70,7 @@ pub struct GpuPreprocessNode {
            Entity,
            Read<PreprocessBindGroup>,
            Read<ViewUniformOffset>,
-            Has<GpuCulling>,
+            Has<NoIndirectDrawing>,
        ),
        Without<SkipGpuPreprocess>,
    >,
@ -202,7 +202,7 @@ impl Node for GpuPreprocessNode {
                });

        // Run the compute passes.
-        for (view, bind_group, view_uniform_offset, gpu_culling) in
+        for (view, bind_group, view_uniform_offset, no_indirect_drawing) in
            self.view_query.iter_manual(world)
        {
            // Grab the index buffer for this view.
@ -213,7 +213,7 @@ impl Node for GpuPreprocessNode {

            // Select the right pipeline, depending on whether GPU culling is in
            // use.
-            let maybe_pipeline_id = if gpu_culling {
+            let maybe_pipeline_id = if !no_indirect_drawing {
                preprocess_pipelines.gpu_culling.pipeline_id
            } else {
                preprocess_pipelines.direct.pipeline_id
@ -235,7 +235,7 @@ impl Node for GpuPreprocessNode {
            compute_pass.set_pipeline(preprocess_pipeline);

            let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
-            if gpu_culling {
+            if !no_indirect_drawing {
                dynamic_offsets.push(view_uniform_offset.offset);
            }
            compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets);
@ -422,7 +422,7 @@ pub fn prepare_preprocess_bind_groups(
        )
        .ok();

-        let bind_group = if index_buffer_vec.gpu_culling {
+        let bind_group = if !index_buffer_vec.no_indirect_drawing {
            let (
                Some(indirect_parameters_buffer),
                Some(mesh_culling_data_buffer),
--- a/crates/bevy_pbr/src/render/light.rs
+++ b/crates/bevy_pbr/src/render/light.rs
@ -15,7 +15,7 @@ use bevy_render::{
    batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
    camera::SortedCameras,
    mesh::allocator::MeshAllocator,
-    view::GpuCulling,
+    view::NoIndirectDrawing,
 };
 use bevy_render::{
    diagnostic::RecordDiagnostics,
@ -687,7 +687,7 @@ pub fn prepare_lights(
            &ExtractedView,
            &ExtractedClusterConfig,
            Option<&RenderLayers>,
-            Has<GpuCulling>,
+            Has<NoIndirectDrawing>,
        ),
        With<Camera3d>,
    >,
@ -1096,7 +1096,7 @@ pub fn prepare_lights(
    let mut live_views = EntityHashSet::with_capacity_and_hasher(views_count, EntityHash);

    // set up light data for each view
-    for (entity, extracted_view, clusters, maybe_layers, has_gpu_culling) in sorted_cameras
+    for (entity, extracted_view, clusters, maybe_layers, no_indirect_drawing) in sorted_cameras
        .0
        .iter()
        .filter_map(|sorted_camera| views.get(sorted_camera.entity).ok())
@ -1104,7 +1104,7 @@ pub fn prepare_lights(
        live_views.insert(entity);
        let mut view_lights = Vec::new();

-        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if has_gpu_culling {
+        let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
            GpuPreprocessingMode::Culling
        } else {
            GpuPreprocessingMode::PreprocessingOnly
@ -1237,8 +1237,8 @@ pub fn prepare_lights(
                    },
                ));

-                if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
-                    commands.entity(view_light_entity).insert(GpuCulling);
+                if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
+                    commands.entity(view_light_entity).insert(NoIndirectDrawing);
                }

                view_lights.push(view_light_entity);
@ -1329,8 +1329,8 @@ pub fn prepare_lights(
                LightEntity::Spot { light_entity },
            ));

-            if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
-                commands.entity(view_light_entity).insert(GpuCulling);
+            if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
+                commands.entity(view_light_entity).insert(NoIndirectDrawing);
            }

            view_lights.push(view_light_entity);
@ -1464,8 +1464,8 @@ pub fn prepare_lights(
                    },
                ));

-                if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
-                    commands.entity(view_light_entity).insert(GpuCulling);
+                if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
+                    commands.entity(view_light_entity).insert(NoIndirectDrawing);
                }

                view_lights.push(view_light_entity);
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@ -37,8 +37,8 @@ use bevy_render::{
    renderer::{RenderDevice, RenderQueue},
    texture::DefaultImageSampler,
    view::{
-        prepare_view_targets, GpuCulling, RenderVisibilityRanges, ViewTarget, ViewUniformOffset,
-        ViewVisibility, VisibilityRange,
+        prepare_view_targets, NoFrustumCulling, NoIndirectDrawing, RenderVisibilityRanges,
+        ViewTarget, ViewUniformOffset, ViewVisibility, VisibilityRange,
    },
    Extract,
 };
@ -421,6 +421,11 @@ bitflags::bitflags! {
        ///
        /// This will be `u16::MAX` if this mesh has no LOD.
        const LOD_INDEX_MASK              = (1 << 16) - 1;
+        /// Disables frustum culling for this mesh.
+        ///
+        /// This corresponds to the
+        /// [`bevy_render::view::visibility::NoFrustumCulling`] component.
+        const NO_FRUSTUM_CULLING          = 1 << 28;
        const SHADOW_RECEIVER             = 1 << 29;
        const TRANSMITTED_SHADOW_RECEIVER = 1 << 30;
        // Indicates the sign of the determinant of the 3x3 model matrix. If the sign is positive,
@ -435,6 +440,7 @@ impl MeshFlags {
    fn from_components(
        transform: &GlobalTransform,
        lod_index: Option<NonMaxU16>,
+        no_frustum_culling: bool,
        not_shadow_receiver: bool,
        transmitted_receiver: bool,
    ) -> MeshFlags {
@ -443,6 +449,9 @@ impl MeshFlags {
        } else {
            MeshFlags::SHADOW_RECEIVER
        };
+        if no_frustum_culling {
+            mesh_flags |= MeshFlags::NO_FRUSTUM_CULLING;
+        }
        if transmitted_receiver {
            mesh_flags |= MeshFlags::TRANSMITTED_SHADOW_RECEIVER;
        }
@ -1046,6 +1055,7 @@ pub fn extract_meshes_for_cpu_building(
            &GlobalTransform,
            Option<&PreviousGlobalTransform>,
            &Mesh3d,
+            Has<NoFrustumCulling>,
            Has<NotShadowReceiver>,
            Has<TransmittedShadowReceiver>,
            Has<NotShadowCaster>,
@ -1063,6 +1073,7 @@ pub fn extract_meshes_for_cpu_building(
            transform,
            previous_transform,
            mesh,
+            no_frustum_culling,
            not_shadow_receiver,
            transmitted_receiver,
            not_shadow_caster,
@ -1084,6 +1095,7 @@ pub fn extract_meshes_for_cpu_building(
            let mesh_flags = MeshFlags::from_components(
                transform,
                lod_index,
+                no_frustum_culling,
                not_shadow_receiver,
                transmitted_receiver,
            );
@ -1155,6 +1167,7 @@ pub fn extract_meshes_for_gpu_building(
                Option<&Lightmap>,
                Option<&Aabb>,
                &Mesh3d,
+                Has<NoFrustumCulling>,
                Has<NotShadowReceiver>,
                Has<TransmittedShadowReceiver>,
                Has<NotShadowCaster>,
@ -1168,6 +1181,7 @@ pub fn extract_meshes_for_gpu_building(
                Changed<Lightmap>,
                Changed<Aabb>,
                Changed<Mesh3d>,
+                Changed<NoFrustumCulling>,
                Changed<NotShadowReceiver>,
                Changed<TransmittedShadowReceiver>,
                Changed<NotShadowCaster>,
@ -1179,7 +1193,7 @@ pub fn extract_meshes_for_gpu_building(
    mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
    mut removed_global_transforms_query: Extract<RemovedComponents<GlobalTransform>>,
    mut removed_meshes_query: Extract<RemovedComponents<Mesh3d>>,
-    cameras_query: Extract<Query<(), (With<Camera>, With<GpuCulling>)>>,
+    cameras_query: Extract<Query<(), (With<Camera>, Without<NoIndirectDrawing>)>>,
 ) {
    let any_gpu_culling = !cameras_query.is_empty();
    for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() {
@ -1209,6 +1223,7 @@ pub fn extract_meshes_for_gpu_building(
            lightmap,
            aabb,
            mesh,
+            no_frustum_culling,
            not_shadow_receiver,
            transmitted_receiver,
            not_shadow_caster,
@ -1231,6 +1246,7 @@ pub fn extract_meshes_for_gpu_building(
            let mesh_flags = MeshFlags::from_components(
                transform,
                lod_index,
+                no_frustum_culling,
                not_shadow_receiver,
                transmitted_receiver,
            );
--- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
@ -7,7 +7,7 @@
 // mesh's transform on the previous frame and writes it into the `MeshUniform`
 // so that TAA works.

-#import bevy_pbr::mesh_types::Mesh
+#import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT}
 #import bevy_render::maths
 #import bevy_render::view::View

@ -145,13 +145,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {

    // Cull if necessary.
 #ifdef FRUSTUM_CULLING
-    let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
-    let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;
+    if ((current_input[input_index].flags & MESH_FLAGS_NO_FRUSTUM_CULLING_BIT) == 0u) {
+        let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
+        let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;

-    // Do an OBB-based frustum cull.
-    let model_center = world_from_local * vec4(aabb_center, 1.0);
-    if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
-        return;
+        // Do an OBB-based frustum cull.
+        let model_center = world_from_local * vec4(aabb_center, 1.0);
+        if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
+            return;
+        }
    }
 #endif

--- a/crates/bevy_pbr/src/render/mesh_types.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_types.wgsl
@ -37,6 +37,8 @@ struct MorphWeights {

 // [2^0, 2^16)
 const MESH_FLAGS_VISIBILITY_RANGE_INDEX_BITS: u32 = 65535u;
+// 2^28
+const MESH_FLAGS_NO_FRUSTUM_CULLING_BIT: u32 = 268435456u;
 // 2^29
 const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32 = 536870912u;
 // 2^30
--- a/crates/bevy_render/src/batching/gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs
@ -24,7 +24,7 @@ use crate::{
    },
    render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
    renderer::{RenderAdapter, RenderDevice, RenderQueue},
-    view::{ExtractedView, GpuCulling, ViewTarget},
+    view::{ExtractedView, NoIndirectDrawing, ViewTarget},
    Render, RenderApp, RenderSet,
 };

@ -101,12 +101,13 @@ pub enum GpuPreprocessingMode {

    /// GPU preprocessing is in use, but GPU culling isn't.
    ///
-    /// This is used by default.
+    /// This is used when the [`NoIndirectDrawing`] component is present on the
+    /// camera.
    PreprocessingOnly,

    /// Both GPU preprocessing and GPU culling are in use.
    ///
-    /// This is used when the [`GpuCulling`] component is present on the camera.
+    /// This is used by default.
    Culling,
 }

@ -247,8 +248,8 @@ where
 pub struct PreprocessWorkItemBuffer {
    /// The buffer of work items.
    pub buffer: BufferVec<PreprocessWorkItem>,
-    /// True if we're using GPU culling.
-    pub gpu_culling: bool,
+    /// True if we're drawing directly instead of indirectly.
+    pub no_indirect_drawing: bool,
 }

 /// One invocation of the preprocessing shader: i.e. one mesh instance in a
@ -382,7 +383,7 @@ impl FromWorld for GpuPreprocessingSupport {
            GpuPreprocessingMode::None
        } else if !device
            .features()
-            .contains(Features::INDIRECT_FIRST_INSTANCE) ||
+            .contains(Features::INDIRECT_FIRST_INSTANCE | Features::MULTI_DRAW_INDIRECT) ||
            !adapter.get_downlevel_capabilities().flags.contains(
        DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW)
        {
@ -529,7 +530,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
    gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
    mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
    mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
-    mut views: Query<(Entity, Has<GpuCulling>), With<ExtractedView>>,
+    mut views: Query<(Entity, Has<NoIndirectDrawing>), With<ExtractedView>>,
    system_param_item: StaticSystemParam<GFBD::Param>,
 ) where
    I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
@ -542,7 +543,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
        ..
    } = gpu_array_buffer.into_inner();

-    for (view, gpu_culling) in &mut views {
+    for (view, no_indirect_drawing) in &mut views {
        let Some(phase) = sorted_render_phases.get_mut(&view) else {
            continue;
        };
@ -553,7 +554,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
                .entry(view)
                .or_insert_with(|| PreprocessWorkItemBuffer {
                    buffer: BufferVec::new(BufferUsages::STORAGE),
-                    gpu_culling,
+                    no_indirect_drawing,
                });

        // Walk through the list of phase items, building up batches as we go.
@ -604,7 +605,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
                }

                // Start a new batch.
-                let indirect_parameters_index = if gpu_culling {
+                let indirect_parameters_index = if !no_indirect_drawing {
                    GFBD::get_batch_indirect_parameters_index(
                        &system_param_item,
                        &mut indirect_parameters_buffer,
@ -647,7 +648,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
    gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
    mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
    mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
-    mut views: Query<(Entity, Has<GpuCulling>), With<ExtractedView>>,
+    mut views: Query<(Entity, Has<NoIndirectDrawing>), With<ExtractedView>>,
    param: StaticSystemParam<GFBD::Param>,
 ) where
    BPI: BinnedPhaseItem,
@ -661,7 +662,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
        ..
    } = gpu_array_buffer.into_inner();

-    for (view, gpu_culling) in &mut views {
+    for (view, no_indirect_drawing) in &mut views {
        let Some(phase) = binned_render_phases.get_mut(&view) else {
            continue;
        };
@ -673,7 +674,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                .entry(view)
                .or_insert_with(|| PreprocessWorkItemBuffer {
                    buffer: BufferVec::new(BufferUsages::STORAGE),
-                    gpu_culling,
+                    no_indirect_drawing,
                });

        // Prepare batchables.
@ -697,6 +698,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(

                match batch {
                    Some(ref mut batch) => {
+                        // Append to the current batch.
                        batch.instance_range.end = output_index + 1;
                        work_item_buffer.buffer.push(PreprocessWorkItem {
                            input_index: input_index.into(),
@ -710,7 +712,8 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                        });
                    }

-                    None if gpu_culling => {
+                    None if !no_indirect_drawing => {
+                        // Start a new batch, in indirect mode.
                        let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
                            &system_param_item,
                            &mut indirect_parameters_buffer,
@ -731,6 +734,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                    }

                    None => {
+                        // Start a new batch, in direct mode.
                        work_item_buffer.buffer.push(PreprocessWorkItem {
                            input_index: input_index.into(),
                            output_index,
@ -783,7 +787,9 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                };
                let output_index = data_buffer.add() as u32;

-                if gpu_culling {
+                if !no_indirect_drawing {
+                    // We're in indirect mode, so add an indirect parameters
+                    // index.
                    let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
                        &system_param_item,
                        &mut indirect_parameters_buffer,
--- a/crates/bevy_render/src/camera/camera.rs
+++ b/crates/bevy_render/src/camera/camera.rs
@ -9,7 +9,7 @@ use crate::{
    sync_world::{RenderEntity, SyncToRenderWorld},
    texture::GpuImage,
    view::{
-        ColorGrading, ExtractedView, ExtractedWindows, GpuCulling, Msaa, RenderLayers,
+        ColorGrading, ExtractedView, ExtractedWindows, Msaa, NoIndirectDrawing, RenderLayers,
        RenderVisibleEntities, ViewUniformOffset, Visibility, VisibleEntities,
    },
    Extract,
@ -32,7 +32,7 @@ use bevy_math::{ops, vec2, Dir3, Mat4, Ray3d, Rect, URect, UVec2, UVec4, Vec2, V
 use bevy_reflect::prelude::*;
 use bevy_render_macros::ExtractComponent;
 use bevy_transform::components::{GlobalTransform, Transform};
-use bevy_utils::{tracing::warn, warn_once, HashMap, HashSet};
+use bevy_utils::{tracing::warn, HashMap, HashSet};
 use bevy_window::{
    NormalizedWindowRef, PrimaryWindow, Window, WindowCreated, WindowRef, WindowResized,
    WindowScaleFactorChanged,
@ -1033,7 +1033,7 @@ pub fn extract_cameras(
            Option<&TemporalJitter>,
            Option<&RenderLayers>,
            Option<&Projection>,
-            Has<GpuCulling>,
+            Has<NoIndirectDrawing>,
        )>,
    >,
    primary_window: Extract<Query<Entity, With<PrimaryWindow>>>,
@ -1053,7 +1053,7 @@ pub fn extract_cameras(
        temporal_jitter,
        render_layers,
        projection,
-        gpu_culling,
+        no_indirect_drawing,
    ) in query.iter()
    {
        if !camera.is_active {
@ -1064,7 +1064,7 @@ pub fn extract_cameras(
                TemporalJitter,
                RenderLayers,
                Projection,
-                GpuCulling,
+                NoIndirectDrawing,
                ViewUniformOffset,
            )>();
            continue;
@ -1156,14 +1156,13 @@ pub fn extract_cameras(
                commands.insert(perspective.clone());
            }

-            if gpu_culling {
-                if gpu_preprocessing_support.max_supported_mode == GpuPreprocessingMode::Culling {
-                    commands.insert(GpuCulling);
-                } else {
-                    warn_once!(
-                        "GPU culling isn't supported on this platform; ignoring `GpuCulling`."
-                    );
-                }
+            if no_indirect_drawing
+                || !matches!(
+                    gpu_preprocessing_support.max_supported_mode,
+                    GpuPreprocessingMode::Culling
+                )
+            {
+                commands.insert(NoIndirectDrawing);
            }
        };
    }
--- a/crates/bevy_render/src/view/mod.rs
+++ b/crates/bevy_render/src/view/mod.rs
@ -620,8 +620,19 @@ impl From<ColorGrading> for ColorGradingUniform {
    }
 }

+/// Add this component to a camera to disable *indirect mode*.
+///
+/// Indirect mode, automatically enabled on supported hardware, allows Bevy to
+/// offload transform and cull operations to the GPU, reducing CPU overhead.
+/// Doing this, however, reduces the amount of control that your app has over
+/// instancing decisions. In certain circumstances, you may want to disable
+/// indirect drawing so that your app can manually instance meshes as it sees
+/// fit. See the `custom_shader_instancing` example.
+///
+/// The vast majority of applications will not need to use this component, as it
+/// generally reduces rendering performance.
 #[derive(Component)]
-pub struct GpuCulling;
+pub struct NoIndirectDrawing;

 #[derive(Component)]
 pub struct NoCpuCulling;
--- a/examples/shader/custom_shader_instancing.rs
+++ b/examples/shader/custom_shader_instancing.rs
@ -30,7 +30,7 @@ use bevy::{
        render_resource::*,
        renderer::RenderDevice,
        sync_world::MainEntity,
-        view::{ExtractedView, NoFrustumCulling},
+        view::{ExtractedView, NoFrustumCulling, NoIndirectDrawing},
        Render, RenderApp, RenderSet,
    },
 };
@ -73,6 +73,10 @@ fn setup(mut commands: Commands, mut meshes: ResMut<Assets<Mesh>>) {
    commands.spawn((
        Camera3d::default(),
        Transform::from_xyz(0.0, 0.0, 15.0).looking_at(Vec3::ZERO, Vec3::Y),
+        // We need this component because we use `draw_indexed` and `draw`
+        // instead of `draw_indirect_indexed` and `draw_indirect` in
+        // `DrawMeshInstanced::render`.
+        NoIndirectDrawing,
    ));
 }

--- a/examples/stress_tests/many_cubes.rs
+++ b/examples/stress_tests/many_cubes.rs
@ -20,7 +20,7 @@ use bevy::{
        batching::NoAutomaticBatching,
        render_asset::RenderAssetUsages,
        render_resource::{Extent3d, TextureDimension, TextureFormat},
-        view::{GpuCulling, NoCpuCulling, NoFrustumCulling},
+        view::{NoCpuCulling, NoFrustumCulling, NoIndirectDrawing},
    },
    window::{PresentMode, WindowResolution},
    winit::{UpdateMode, WinitSettings},
@ -59,9 +59,9 @@ struct Args {
    #[argh(switch)]
    no_automatic_batching: bool,

-    /// whether to enable GPU culling.
+    /// whether to disable indirect drawing.
    #[argh(switch)]
-    gpu_culling: bool,
+    no_indirect_drawing: bool,

    /// whether to disable CPU culling.
    #[argh(switch)]
@ -176,8 +176,8 @@ fn setup(

            // camera
            let mut camera = commands.spawn(Camera3d::default());
-            if args.gpu_culling {
-                camera.insert(GpuCulling);
+            if args.no_indirect_drawing {
+                camera.insert(NoIndirectDrawing);
            }
            if args.no_cpu_culling {
                camera.insert(NoCpuCulling);