diff --git a/Cargo.toml b/Cargo.toml
index 502f73a17e..7383747e7f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4132,3 +4132,14 @@ name = "Clustered Decals"
 description = "Demonstrates clustered decals"
 category = "3D Rendering"
 wasm = false
+
+[[example]]
+name = "occlusion_culling"
+path = "examples/3d/occlusion_culling.rs"
+doc-scrape-examples = true
+
+[package.metadata.example.occlusion_culling]
+name = "Occlusion Culling"
+description = "Demonstration of Occlusion Culling"
+category = "3D Rendering"
+wasm = false
diff --git a/crates/bevy_core_pipeline/Cargo.toml b/crates/bevy_core_pipeline/Cargo.toml
index 643ae1bf18..3994040369 100644
--- a/crates/bevy_core_pipeline/Cargo.toml
+++ b/crates/bevy_core_pipeline/Cargo.toml
@@ -47,6 +47,7 @@ nonmax = "0.5"
 smallvec = "1"
 thiserror = { version = "2", default-features = false }
 tracing = { version = "0.1", default-features = false, features = ["std"] }
+bytemuck = { version = "1" }
 
 [lints]
 workspace = true
diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs
index 60b245c417..f820a245ce 100644
--- a/crates/bevy_core_pipeline/src/core_2d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs
@@ -312,6 +312,8 @@ impl PhaseItem for AlphaMask2d {
 }
 
 impl BinnedPhaseItem for AlphaMask2d {
+    // Since 2D meshes presently can't be multidrawn, the batch set key is
+    // irrelevant.
     type BatchSetKey = BatchSetKey2d;
 
     type BinKey = AlphaMask2dBinKey;
diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs
index 19b691429e..0d6df5c887 100644
--- a/crates/bevy_core_pipeline/src/core_3d/mod.rs
+++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs
@@ -16,7 +16,9 @@ pub mod graph {
     #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
     pub enum Node3d {
         MsaaWriteback,
-        Prepass,
+        EarlyPrepass,
+        EarlyDownsampleDepth,
+        LatePrepass,
         DeferredPrepass,
         CopyDeferredLightingId,
         EndPrepasses,
@@ -25,6 +27,7 @@ pub mod graph {
         MainTransmissivePass,
         MainTransparentPass,
         EndMainPass,
+        LateDownsampleDepth,
         Taa,
         MotionBlur,
         Bloom,
@@ -67,9 +70,10 @@ use core::ops::Range;
 
 use bevy_render::{
     batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
+    experimental::occlusion_culling::OcclusionCulling,
     mesh::allocator::SlabId,
     render_phase::PhaseItemBatchSetKey,
-    view::{NoIndirectDrawing, RetainedViewEntity},
+    view::{prepare_view_targets, NoIndirectDrawing, RetainedViewEntity},
 };
 pub use camera_3d::*;
 pub use main_opaque_pass_3d_node::*;
@@ -114,8 +118,9 @@ use crate::{
     },
     dof::DepthOfFieldNode,
     prepass::{
-        node::PrepassNode, AlphaMask3dPrepass, DeferredPrepass, DepthPrepass, MotionVectorPrepass,
-        NormalPrepass, Opaque3dPrepass, OpaqueNoLightmap3dBatchSetKey, OpaqueNoLightmap3dBinKey,
+        node::{EarlyPrepassNode, LatePrepassNode},
+        AlphaMask3dPrepass, DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass,
+        Opaque3dPrepass, OpaqueNoLightmap3dBatchSetKey, OpaqueNoLightmap3dBinKey,
         ViewPrepassTextures, MOTION_VECTOR_PREPASS_FORMAT, NORMAL_PREPASS_FORMAT,
     },
     skybox::SkyboxPlugin,
@@ -161,6 +166,9 @@ impl Plugin for Core3dPlugin {
                 (
                     sort_phase_system::<Transmissive3d>.in_set(RenderSet::PhaseSort),
                     sort_phase_system::<Transparent3d>.in_set(RenderSet::PhaseSort),
+                    configure_occlusion_culling_view_targets
+                        .after(prepare_view_targets)
+                        .in_set(RenderSet::ManageViews),
                     prepare_core_3d_depth_textures.in_set(RenderSet::PrepareResources),
                     prepare_core_3d_transmission_textures.in_set(RenderSet::PrepareResources),
                     prepare_prepass_textures.in_set(RenderSet::PrepareResources),
@@ -169,7 +177,8 @@ impl Plugin for Core3dPlugin {
 
         render_app
             .add_render_sub_graph(Core3d)
-            .add_render_graph_node::<ViewNodeRunner<PrepassNode>>(Core3d, Node3d::Prepass)
+            .add_render_graph_node::<ViewNodeRunner<EarlyPrepassNode>>(Core3d, Node3d::EarlyPrepass)
+            .add_render_graph_node::<ViewNodeRunner<LatePrepassNode>>(Core3d, Node3d::LatePrepass)
             .add_render_graph_node::<ViewNodeRunner<DeferredGBufferPrepassNode>>(
                 Core3d,
                 Node3d::DeferredPrepass,
@@ -200,7 +209,8 @@ impl Plugin for Core3dPlugin {
             .add_render_graph_edges(
                 Core3d,
                 (
-                    Node3d::Prepass,
+                    Node3d::EarlyPrepass,
+                    Node3d::LatePrepass,
                     Node3d::DeferredPrepass,
                     Node3d::CopyDeferredLightingId,
                     Node3d::EndPrepasses,
@@ -898,6 +908,28 @@ pub fn prepare_core_3d_transmission_textures(
     }
 }
 
+/// Sets the `TEXTURE_BINDING` flag on the depth texture if necessary for
+/// occlusion culling.
+///
+/// We need that flag to be set in order to read from the texture.
+fn configure_occlusion_culling_view_targets(
+    mut view_targets: Query<
+        &mut Camera3d,
+        (
+            With<OcclusionCulling>,
+            Without<NoIndirectDrawing>,
+            With<DepthPrepass>,
+            Without<DeferredPrepass>,
+        ),
+    >,
+) {
+    for mut camera_3d in &mut view_targets {
+        let mut depth_texture_usages = TextureUsages::from(camera_3d.depth_texture_usages);
+        depth_texture_usages |= TextureUsages::TEXTURE_BINDING;
+        camera_3d.depth_texture_usages = depth_texture_usages.into();
+    }
+}
+
 // Disable MSAA and warn if using deferred rendering
 pub fn check_msaa(mut deferred_views: Query<&mut Msaa, (With<Camera>, With<DeferredPrepass>)>) {
     for mut msaa in deferred_views.iter_mut() {
diff --git a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl b/crates/bevy_core_pipeline/src/experimental/mip_generation/downsample_depth.wgsl
similarity index 92%
rename from crates/bevy_pbr/src/meshlet/downsample_depth.wgsl
rename to crates/bevy_core_pipeline/src/experimental/mip_generation/downsample_depth.wgsl
index 80dd7d4baa..d24afa390d 100644
--- a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl
+++ b/crates/bevy_core_pipeline/src/experimental/mip_generation/downsample_depth.wgsl
@@ -1,8 +1,16 @@
 #ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
 @group(0) @binding(0) var<storage, read> mip_0: array<u64>; // Per pixel
 #else
+#ifdef MESHLET
 @group(0) @binding(0) var<storage, read> mip_0: array<u32>; // Per pixel
-#endif
+#else   // MESHLET
+#ifdef MULTISAMPLE
+@group(0) @binding(0) var mip_0: texture_depth_multisampled_2d;
+#else   // MULTISAMPLE
+@group(0) @binding(0) var mip_0: texture_depth_2d;
+#endif  // MULTISAMPLE
+#endif  // MESHLET
+#endif  // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
 @group(0) @binding(1) var mip_1: texture_storage_2d<r32float, write>;
 @group(0) @binding(2) var mip_2: texture_storage_2d<r32float, write>;
 @group(0) @binding(3) var mip_3: texture_storage_2d<r32float, write>;
@@ -304,9 +312,25 @@ fn load_mip_0(x: u32, y: u32) -> f32 {
     let i = y * constants.view_width + x;
 #ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
     return bitcast<f32>(u32(mip_0[i] >> 32u));
-#else
+#else   // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
+#ifdef MESHLET
     return bitcast<f32>(mip_0[i]);
-#endif
+#else   // MESHLET
+    // Downsample the top level.
+#ifdef MULTISAMPLE
+    // The top level is multisampled, so we need to loop over all the samples
+    // and reduce them to 1.
+    var result = textureLoad(mip_0, vec2(x, y), 0);
+    let sample_count = i32(textureNumSamples(mip_0));
+    for (var sample = 1; sample < sample_count; sample += 1) {
+        result = min(result, textureLoad(mip_0, vec2(x, y), sample));
+    }
+    return result;
+#else   // MULTISAMPLE
+    return textureLoad(mip_0, vec2(x, y), 0);
+#endif  // MULTISAMPLE
+#endif  // MESHLET
+#endif  // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
 }
 
 fn reduce_4(v: vec4f) -> f32 {
diff --git a/crates/bevy_core_pipeline/src/experimental/mip_generation/mod.rs b/crates/bevy_core_pipeline/src/experimental/mip_generation/mod.rs
new file mode 100644
index 0000000000..3140408886
--- /dev/null
+++ b/crates/bevy_core_pipeline/src/experimental/mip_generation/mod.rs
@@ -0,0 +1,708 @@
+//! Downsampling of textures to produce mipmap levels.
+//!
+//! Currently, this module only supports generation of hierarchical Z buffers
+//! for occlusion culling. It's marked experimental because the shader is
+//! designed only for power-of-two texture sizes and is slightly incorrect for
+//! non-power-of-two depth buffer sizes.
+
+use core::array;
+
+use bevy_app::{App, Plugin};
+use bevy_asset::{load_internal_asset, Handle};
+use bevy_derive::{Deref, DerefMut};
+use bevy_ecs::{
+    component::Component,
+    entity::Entity,
+    prelude::{resource_exists, Without},
+    query::{QueryItem, With},
+    resource::Resource,
+    schedule::IntoSystemConfigs as _,
+    system::{lifetimeless::Read, Commands, Local, Query, Res, ResMut},
+    world::{FromWorld, World},
+};
+use bevy_math::{uvec2, UVec2, Vec4Swizzles as _};
+use bevy_render::{
+    experimental::occlusion_culling::OcclusionCulling,
+    render_graph::{NodeRunError, RenderGraphApp, RenderGraphContext, ViewNode, ViewNodeRunner},
+    render_resource::{
+        binding_types::{sampler, texture_2d, texture_2d_multisampled, texture_storage_2d},
+        BindGroup, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries,
+        CachedComputePipelineId, ComputePassDescriptor, ComputePipeline, ComputePipelineDescriptor,
+        DownlevelFlags, Extent3d, IntoBinding, PipelineCache, PushConstantRange, Sampler,
+        SamplerBindingType, SamplerDescriptor, Shader, ShaderStages, SpecializedComputePipeline,
+        SpecializedComputePipelines, StorageTextureAccess, TextureAspect, TextureDescriptor,
+        TextureDimension, TextureFormat, TextureSampleType, TextureUsages, TextureView,
+        TextureViewDescriptor, TextureViewDimension,
+    },
+    renderer::{RenderAdapter, RenderContext, RenderDevice},
+    texture::TextureCache,
+    view::{ExtractedView, NoIndirectDrawing, ViewDepthTexture},
+    Render, RenderApp, RenderSet,
+};
+use bitflags::bitflags;
+
+use crate::{
+    core_3d::{
+        graph::{Core3d, Node3d},
+        prepare_core_3d_depth_textures,
+    },
+    prepass::{DeferredPrepass, DepthPrepass},
+};
+
+/// Identifies the `downsample_depth.wgsl` shader.
+pub const DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle<Shader> =
+    Handle::weak_from_u128(3876351454330663524);
+
+/// The maximum number of mip levels that we can produce.
+///
+/// 2^12 is 4096, so that's the maximum size of the depth buffer that we
+/// support.
+pub const DEPTH_PYRAMID_MIP_COUNT: usize = 12;
+
+/// A plugin that allows Bevy to repeatedly downsample textures to create
+/// mipmaps.
+///
+/// Currently, this is only used for hierarchical Z buffer generation for the
+/// purposes of occlusion culling.
+pub struct MipGenerationPlugin;
+
+impl Plugin for MipGenerationPlugin {
+    fn build(&self, app: &mut App) {
+        load_internal_asset!(
+            app,
+            DOWNSAMPLE_DEPTH_SHADER_HANDLE,
+            "downsample_depth.wgsl",
+            Shader::from_wgsl
+        );
+
+        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
+            return;
+        };
+
+        render_app
+            .init_resource::<SpecializedComputePipelines<DownsampleDepthPipeline>>()
+            .add_render_graph_node::<ViewNodeRunner<DownsampleDepthNode>>(
+                Core3d,
+                Node3d::EarlyDownsampleDepth,
+            )
+            .add_render_graph_node::<ViewNodeRunner<DownsampleDepthNode>>(
+                Core3d,
+                Node3d::LateDownsampleDepth,
+            )
+            .add_render_graph_edges(
+                Core3d,
+                (
+                    Node3d::EarlyPrepass,
+                    Node3d::EarlyDownsampleDepth,
+                    Node3d::LatePrepass,
+                    Node3d::DeferredPrepass,
+                ),
+            )
+            .add_render_graph_edges(
+                Core3d,
+                (
+                    Node3d::EndMainPass,
+                    Node3d::LateDownsampleDepth,
+                    Node3d::EndMainPassPostProcessing,
+                ),
+            )
+            .add_systems(
+                Render,
+                create_downsample_depth_pipelines.in_set(RenderSet::Prepare),
+            )
+            .add_systems(
+                Render,
+                (
+                    prepare_view_depth_pyramids,
+                    prepare_downsample_depth_view_bind_groups,
+                )
+                    .chain()
+                    .in_set(RenderSet::PrepareResources)
+                    .run_if(resource_exists::<DownsampleDepthPipelines>)
+                    .after(prepare_core_3d_depth_textures),
+            );
+    }
+
+    fn finish(&self, app: &mut App) {
+        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
+            return;
+        };
+        render_app.init_resource::<DepthPyramidDummyTexture>();
+    }
+}
+
+/// The nodes that produce a hierarchical Z-buffer, also known as a depth
+/// pyramid.
+///
+/// This runs the single-pass downsampling (SPD) shader with the *min* filter in
+/// order to generate a series of mipmaps for the Z buffer. The resulting
+/// hierarchical Z buffer can be used for occlusion culling.
+///
+/// There are two instances of this node. The *early* downsample depth pass is
+/// the first hierarchical Z-buffer stage, which runs after the early prepass
+/// and before the late prepass. It prepares the Z-buffer for the bounding box
+/// tests that the late mesh preprocessing stage will perform. The *late*
+/// downsample depth pass runs at the end of the main phase. It prepares the
+/// Z-buffer for the occlusion culling that the early mesh preprocessing phase
+/// of the *next* frame will perform.
+///
+/// This node won't do anything if occlusion culling isn't on.
+#[derive(Default)]
+pub struct DownsampleDepthNode;
+
+impl ViewNode for DownsampleDepthNode {
+    type ViewQuery = (
+        Read<ViewDepthPyramid>,
+        Read<ViewDownsampleDepthBindGroup>,
+        Read<ViewDepthTexture>,
+    );
+
+    fn run<'w>(
+        &self,
+        render_graph_context: &mut RenderGraphContext,
+        render_context: &mut RenderContext<'w>,
+        (view_depth_pyramid, view_downsample_depth_bind_group, view_depth_texture): QueryItem<
+            'w,
+            Self::ViewQuery,
+        >,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        // Produce a depth pyramid from the current depth buffer for a single
+        // view. The resulting depth pyramid can be used for occlusion testing.
+
+        let downsample_depth_pipelines = world.resource::<DownsampleDepthPipelines>();
+        let pipeline_cache = world.resource::<PipelineCache>();
+
+        // Despite the name "single-pass downsampling", we actually need two
+        // passes because of the lack of `coherent` buffers in WGPU/WGSL.
+        // Between each pass, there's an implicit synchronization barrier.
+
+        // Fetch the appropriate pipeline ID, depending on whether the depth
+        // buffer is multisampled or not.
+        let (Some(first_downsample_depth_pipeline_id), Some(second_downsample_depth_pipeline_id)) =
+            (if view_depth_texture.texture.sample_count() > 1 {
+                (
+                    downsample_depth_pipelines.first_multisample.pipeline_id,
+                    downsample_depth_pipelines.second_multisample.pipeline_id,
+                )
+            } else {
+                (
+                    downsample_depth_pipelines.first.pipeline_id,
+                    downsample_depth_pipelines.second.pipeline_id,
+                )
+            })
+        else {
+            return Ok(());
+        };
+
+        // Fetch the pipelines for the two passes.
+        let (Some(first_downsample_depth_pipeline), Some(second_downsample_depth_pipeline)) = (
+            pipeline_cache.get_compute_pipeline(first_downsample_depth_pipeline_id),
+            pipeline_cache.get_compute_pipeline(second_downsample_depth_pipeline_id),
+        ) else {
+            return Ok(());
+        };
+
+        // Run the depth downsampling.
+        let view_size = uvec2(
+            view_depth_texture.texture.width(),
+            view_depth_texture.texture.height(),
+        );
+        view_depth_pyramid.downsample_depth(
+            &format!("{:?}", render_graph_context.label()),
+            render_context,
+            view_size,
+            view_downsample_depth_bind_group,
+            first_downsample_depth_pipeline,
+            second_downsample_depth_pipeline,
+        );
+        Ok(())
+    }
+}
+
+/// A single depth downsample pipeline.
+#[derive(Resource)]
+pub struct DownsampleDepthPipeline {
+    /// The bind group layout for this pipeline.
+    bind_group_layout: BindGroupLayout,
+    /// A handle that identifies the compiled shader.
+    pipeline_id: Option<CachedComputePipelineId>,
+}
+
+impl DownsampleDepthPipeline {
+    /// Creates a new [`DownsampleDepthPipeline`] from a bind group layout.
+    ///
+    /// This doesn't actually specialize the pipeline; that must be done
+    /// afterward.
+    fn new(bind_group_layout: BindGroupLayout) -> DownsampleDepthPipeline {
+        DownsampleDepthPipeline {
+            bind_group_layout,
+            pipeline_id: None,
+        }
+    }
+}
+
+/// Stores all depth buffer downsampling pipelines.
+#[derive(Resource)]
+pub struct DownsampleDepthPipelines {
+    /// The first pass of the pipeline, when the depth buffer is *not*
+    /// multisampled.
+    first: DownsampleDepthPipeline,
+    /// The second pass of the pipeline, when the depth buffer is *not*
+    /// multisampled.
+    second: DownsampleDepthPipeline,
+    /// The first pass of the pipeline, when the depth buffer is multisampled.
+    first_multisample: DownsampleDepthPipeline,
+    /// The second pass of the pipeline, when the depth buffer is multisampled.
+    second_multisample: DownsampleDepthPipeline,
+    /// The sampler that the depth downsampling shader uses to sample the depth
+    /// buffer.
+    sampler: Sampler,
+}
+
+/// Creates the [`DownsampleDepthPipelines`] if downsampling is supported on the
+/// current platform.
+fn create_downsample_depth_pipelines(
+    mut commands: Commands,
+    render_device: Res<RenderDevice>,
+    render_adapter: Res<RenderAdapter>,
+    pipeline_cache: Res<PipelineCache>,
+    mut specialized_compute_pipelines: ResMut<SpecializedComputePipelines<DownsampleDepthPipeline>>,
+    mut has_run: Local<bool>,
+) {
+    // Only run once.
+    // We can't use a `resource_exists` or similar run condition here because
+    // this function might fail to create downsample depth pipelines if the
+    // current platform doesn't support compute shaders.
+    if *has_run {
+        return;
+    }
+    *has_run = true;
+
+    // If we don't have compute shaders, we can't invoke the downsample depth
+    // compute shader.
+    if !render_adapter
+        .get_downlevel_capabilities()
+        .flags
+        .contains(DownlevelFlags::COMPUTE_SHADERS)
+    {
+        return;
+    }
+
+    // Create the bind group layouts. The bind group layouts are identical
+    // between the first and second passes, so the only thing we need to
+    // treat specially is the type of the first mip level (non-multisampled
+    // or multisampled).
+    let standard_bind_group_layout =
+        create_downsample_depth_bind_group_layout(&render_device, false);
+    let multisampled_bind_group_layout =
+        create_downsample_depth_bind_group_layout(&render_device, true);
+
+    // Create the depth pyramid sampler. This is shared among all shaders.
+    let sampler = render_device.create_sampler(&SamplerDescriptor {
+        label: Some("depth pyramid sampler"),
+        ..SamplerDescriptor::default()
+    });
+
+    // Initialize the pipelines.
+    let mut downsample_depth_pipelines = DownsampleDepthPipelines {
+        first: DownsampleDepthPipeline::new(standard_bind_group_layout.clone()),
+        second: DownsampleDepthPipeline::new(standard_bind_group_layout.clone()),
+        first_multisample: DownsampleDepthPipeline::new(multisampled_bind_group_layout.clone()),
+        second_multisample: DownsampleDepthPipeline::new(multisampled_bind_group_layout.clone()),
+        sampler,
+    };
+
+    // Specialize each pipeline with the appropriate
+    // `DownsampleDepthPipelineKey`.
+    downsample_depth_pipelines.first.pipeline_id = Some(specialized_compute_pipelines.specialize(
+        &pipeline_cache,
+        &downsample_depth_pipelines.first,
+        DownsampleDepthPipelineKey::empty(),
+    ));
+    downsample_depth_pipelines.second.pipeline_id = Some(specialized_compute_pipelines.specialize(
+        &pipeline_cache,
+        &downsample_depth_pipelines.second,
+        DownsampleDepthPipelineKey::SECOND_PHASE,
+    ));
+    downsample_depth_pipelines.first_multisample.pipeline_id =
+        Some(specialized_compute_pipelines.specialize(
+            &pipeline_cache,
+            &downsample_depth_pipelines.first_multisample,
+            DownsampleDepthPipelineKey::MULTISAMPLE,
+        ));
+    downsample_depth_pipelines.second_multisample.pipeline_id =
+        Some(specialized_compute_pipelines.specialize(
+            &pipeline_cache,
+            &downsample_depth_pipelines.second_multisample,
+            DownsampleDepthPipelineKey::SECOND_PHASE | DownsampleDepthPipelineKey::MULTISAMPLE,
+        ));
+
+    commands.insert_resource(downsample_depth_pipelines);
+}
+
+/// Creates a single bind group layout for the downsample depth pass.
+fn create_downsample_depth_bind_group_layout(
+    render_device: &RenderDevice,
+    is_multisampled: bool,
+) -> BindGroupLayout {
+    render_device.create_bind_group_layout(
+        if is_multisampled {
+            "downsample multisample depth bind group layout"
+        } else {
+            "downsample depth bind group layout"
+        },
+        &BindGroupLayoutEntries::sequential(
+            ShaderStages::COMPUTE,
+            (
+                // We only care about the multisample status of the depth buffer
+                // for the first mip level. After the first mip level is
+                // sampled, we drop to a single sample.
+                if is_multisampled {
+                    texture_2d_multisampled(TextureSampleType::Depth)
+                } else {
+                    texture_2d(TextureSampleType::Depth)
+                },
+                // All the mip levels follow:
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::ReadWrite),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly),
+                sampler(SamplerBindingType::NonFiltering),
+            ),
+        ),
+    )
+}
+
+bitflags! {
+    /// Uniquely identifies a configuration of the downsample depth shader.
+    ///
+    /// Note that meshlets maintain their downsample depth shaders on their own
+    /// and don't use this infrastructure; thus there's no flag for meshlets in
+    /// here, even though the shader has defines for it.
+    #[derive(Clone, Copy, PartialEq, Eq, Hash)]
+    pub struct DownsampleDepthPipelineKey: u8 {
+        /// True if the depth buffer is multisampled.
+        const MULTISAMPLE = 1;
+        /// True if this shader is the second phase of the downsample depth
+        /// process; false if this shader is the first phase.
+        const SECOND_PHASE = 2;
+    }
+}
+
+impl SpecializedComputePipeline for DownsampleDepthPipeline {
+    type Key = DownsampleDepthPipelineKey;
+
+    fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
+        let mut shader_defs = vec![];
+        if key.contains(DownsampleDepthPipelineKey::MULTISAMPLE) {
+            shader_defs.push("MULTISAMPLE".into());
+        }
+
+        let label = format!(
+            "downsample depth{}{} pipeline",
+            if key.contains(DownsampleDepthPipelineKey::MULTISAMPLE) {
+                " multisample"
+            } else {
+                ""
+            },
+            if key.contains(DownsampleDepthPipelineKey::SECOND_PHASE) {
+                " second phase"
+            } else {
+                " first phase"
+            }
+        )
+        .into();
+
+        ComputePipelineDescriptor {
+            label: Some(label),
+            layout: vec![self.bind_group_layout.clone()],
+            push_constant_ranges: vec![PushConstantRange {
+                stages: ShaderStages::COMPUTE,
+                range: 0..8,
+            }],
+            shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
+            shader_defs,
+            entry_point: if key.contains(DownsampleDepthPipelineKey::SECOND_PHASE) {
+                "downsample_depth_second".into()
+            } else {
+                "downsample_depth_first".into()
+            },
+            zero_initialize_workgroup_memory: false,
+        }
+    }
+}
+
+/// Stores a placeholder texture that can be bound to a depth pyramid binding if
+/// no depth pyramid is needed.
+#[derive(Resource, Deref, DerefMut)]
+pub struct DepthPyramidDummyTexture(TextureView);
+
+impl FromWorld for DepthPyramidDummyTexture {
+    fn from_world(world: &mut World) -> Self {
+        let render_device = world.resource::<RenderDevice>();
+
+        DepthPyramidDummyTexture(create_depth_pyramid_dummy_texture(
+            render_device,
+            "depth pyramid dummy texture",
+            "depth pyramid dummy texture view",
+        ))
+    }
+}
+
+/// Creates a placeholder texture that can be bound to a depth pyramid binding
+/// if no depth pyramid is needed.
+pub fn create_depth_pyramid_dummy_texture(
+    render_device: &RenderDevice,
+    texture_label: &'static str,
+    texture_view_label: &'static str,
+) -> TextureView {
+    render_device
+        .create_texture(&TextureDescriptor {
+            label: Some(texture_label),
+            size: Extent3d {
+                width: 1,
+                height: 1,
+                depth_or_array_layers: 1,
+            },
+            mip_level_count: 1,
+            sample_count: 1,
+            dimension: TextureDimension::D2,
+            format: TextureFormat::R32Float,
+            usage: TextureUsages::STORAGE_BINDING,
+            view_formats: &[],
+        })
+        .create_view(&TextureViewDescriptor {
+            label: Some(texture_view_label),
+            format: Some(TextureFormat::R32Float),
+            dimension: Some(TextureViewDimension::D2),
+            aspect: TextureAspect::All,
+            base_mip_level: 0,
+            mip_level_count: Some(1),
+            base_array_layer: 0,
+            array_layer_count: Some(1),
+        })
+}
+
+/// Stores a hierarchical Z-buffer for a view, which is a series of mipmaps
+/// useful for efficient occlusion culling.
+///
+/// This will only be present on a view when occlusion culling is enabled.
+#[derive(Component)]
+pub struct ViewDepthPyramid {
+    /// A texture view containing the entire depth texture.
+    pub all_mips: TextureView,
+    /// A series of texture views containing one mip level each.
+    pub mips: [TextureView; DEPTH_PYRAMID_MIP_COUNT],
+    /// The total number of mipmap levels.
+    ///
+    /// This is the base-2 logarithm of the greatest dimension of the depth
+    /// buffer, rounded up.
+    pub mip_count: u32,
+}
+
+impl ViewDepthPyramid {
+    /// Allocates a new depth pyramid for a depth buffer with the given size.
+    pub fn new(
+        render_device: &RenderDevice,
+        texture_cache: &mut TextureCache,
+        depth_pyramid_dummy_texture: &TextureView,
+        size: UVec2,
+        texture_label: &'static str,
+        texture_view_label: &'static str,
+    ) -> ViewDepthPyramid {
+        // Calculate the size of the depth pyramid.
+        let depth_pyramid_size = Extent3d {
+            width: size.x.div_ceil(2),
+            height: size.y.div_ceil(2),
+            depth_or_array_layers: 1,
+        };
+
+        // Calculate the number of mip levels we need.
+        let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2);
+
+        // Create the depth pyramid.
+        let depth_pyramid = texture_cache.get(
+            render_device,
+            TextureDescriptor {
+                label: Some(texture_label),
+                size: depth_pyramid_size,
+                mip_level_count: depth_pyramid_mip_count,
+                sample_count: 1,
+                dimension: TextureDimension::D2,
+                format: TextureFormat::R32Float,
+                usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
+                view_formats: &[],
+            },
+        );
+
+        // Create individual views for each level of the depth pyramid.
+        let depth_pyramid_mips = array::from_fn(|i| {
+            if (i as u32) < depth_pyramid_mip_count {
+                depth_pyramid.texture.create_view(&TextureViewDescriptor {
+                    label: Some(texture_view_label),
+                    format: Some(TextureFormat::R32Float),
+                    dimension: Some(TextureViewDimension::D2),
+                    aspect: TextureAspect::All,
+                    base_mip_level: i as u32,
+                    mip_level_count: Some(1),
+                    base_array_layer: 0,
+                    array_layer_count: Some(1),
+                })
+            } else {
+                (*depth_pyramid_dummy_texture).clone()
+            }
+        });
+
+        // Create the view for the depth pyramid as a whole.
+        let depth_pyramid_all_mips = depth_pyramid.default_view.clone();
+
+        Self {
+            all_mips: depth_pyramid_all_mips,
+            mips: depth_pyramid_mips,
+            mip_count: depth_pyramid_mip_count,
+        }
+    }
+
+    /// Creates a bind group that allows the depth buffer to be attached to the
+    /// `downsample_depth.wgsl` shader.
+    pub fn create_bind_group<'a, R>(
+        &'a self,
+        render_device: &RenderDevice,
+        label: &'static str,
+        bind_group_layout: &BindGroupLayout,
+        source_image: R,
+        sampler: &'a Sampler,
+    ) -> BindGroup
+    where
+        R: IntoBinding<'a>,
+    {
+        render_device.create_bind_group(
+            label,
+            bind_group_layout,
+            &BindGroupEntries::sequential((
+                source_image,
+                &self.mips[0],
+                &self.mips[1],
+                &self.mips[2],
+                &self.mips[3],
+                &self.mips[4],
+                &self.mips[5],
+                &self.mips[6],
+                &self.mips[7],
+                &self.mips[8],
+                &self.mips[9],
+                &self.mips[10],
+                &self.mips[11],
+                sampler,
+            )),
+        )
+    }
+
+    /// Invokes the shaders to generate the hierarchical Z-buffer.
+    ///
+    /// This is intended to be invoked as part of a render node.
+    pub fn downsample_depth(
+        &self,
+        label: &str,
+        render_context: &mut RenderContext,
+        view_size: UVec2,
+        downsample_depth_bind_group: &BindGroup,
+        downsample_depth_first_pipeline: &ComputePipeline,
+        downsample_depth_second_pipeline: &ComputePipeline,
+    ) {
+        let command_encoder = render_context.command_encoder();
+        let mut downsample_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
+            label: Some(label),
+            timestamp_writes: None,
+        });
+        downsample_pass.set_pipeline(downsample_depth_first_pipeline);
+        // Pass the mip count and the texture width as push constants, for
+        // simplicity.
+        downsample_pass.set_push_constants(0, bytemuck::cast_slice(&[self.mip_count, view_size.x]));
+        downsample_pass.set_bind_group(0, downsample_depth_bind_group, &[]);
+        downsample_pass.dispatch_workgroups(view_size.x.div_ceil(64), view_size.y.div_ceil(64), 1);
+
+        if self.mip_count >= 7 {
+            downsample_pass.set_pipeline(downsample_depth_second_pipeline);
+            downsample_pass.dispatch_workgroups(1, 1, 1);
+        }
+    }
+}
+
+/// Creates depth pyramids for views that have occlusion culling enabled.
+fn prepare_view_depth_pyramids(
+    mut commands: Commands,
+    render_device: Res<RenderDevice>,
+    mut texture_cache: ResMut<TextureCache>,
+    depth_pyramid_dummy_texture: Res<DepthPyramidDummyTexture>,
+    views: Query<
+        (Entity, &ExtractedView),
+        (
+            With<OcclusionCulling>,
+            Without<NoIndirectDrawing>,
+            With<DepthPrepass>,
+            Without<DeferredPrepass>,
+        ),
+    >,
+) {
+    for (view_entity, view) in &views {
+        commands.entity(view_entity).insert(ViewDepthPyramid::new(
+            &render_device,
+            &mut texture_cache,
+            &depth_pyramid_dummy_texture,
+            view.viewport.zw(),
+            "view depth pyramid texture",
+            "view depth pyramid texture view",
+        ));
+    }
+}
+
+/// The bind group that we use to attach the depth buffer and depth pyramid for
+/// a view to the `downsample_depth.wgsl` shader.
+///
+/// This will only be present for a view if occlusion culling is enabled.
+#[derive(Component, Deref, DerefMut)]
+pub struct ViewDownsampleDepthBindGroup(BindGroup);
+
+/// Creates the [`ViewDownsampleDepthBindGroup`]s for all views with occlusion
+/// culling enabled.
+fn prepare_downsample_depth_view_bind_groups(
+    mut commands: Commands,
+    render_device: Res<RenderDevice>,
+    downsample_depth_pipelines: Res<DownsampleDepthPipelines>,
+    view_depth_textures: Query<(Entity, &ViewDepthPyramid, &ViewDepthTexture)>,
+) {
+    for (view_entity, view_depth_pyramid, view_depth_texture) in &view_depth_textures {
+        let is_multisampled = view_depth_texture.texture.sample_count() > 1;
+        commands
+            .entity(view_entity)
+            .insert(ViewDownsampleDepthBindGroup(
+                view_depth_pyramid.create_bind_group(
+                    &render_device,
+                    if is_multisampled {
+                        "downsample multisample depth bind group"
+                    } else {
+                        "downsample depth bind group"
+                    },
+                    if is_multisampled {
+                        &downsample_depth_pipelines
+                            .first_multisample
+                            .bind_group_layout
+                    } else {
+                        &downsample_depth_pipelines.first.bind_group_layout
+                    },
+                    view_depth_texture.view(),
+                    &downsample_depth_pipelines.sampler,
+                ),
+            ));
+    }
+}
diff --git a/crates/bevy_core_pipeline/src/experimental/mod.rs b/crates/bevy_core_pipeline/src/experimental/mod.rs
new file mode 100644
index 0000000000..4f957477ea
--- /dev/null
+++ b/crates/bevy_core_pipeline/src/experimental/mod.rs
@@ -0,0 +1,11 @@
+//! Experimental rendering features.
+//!
+//! Experimental features are features with known problems, missing features,
+//! compatibility issues, low performance, and/or future breaking changes, but
+//! are included nonetheless for testing purposes.
+
+pub mod mip_generation;
+
+pub mod taa {
+    pub use crate::taa::{TemporalAntiAliasNode, TemporalAntiAliasPlugin, TemporalAntiAliasing};
+}
diff --git a/crates/bevy_core_pipeline/src/lib.rs b/crates/bevy_core_pipeline/src/lib.rs
index 6c2ee5bec4..49b9b7a20b 100644
--- a/crates/bevy_core_pipeline/src/lib.rs
+++ b/crates/bevy_core_pipeline/src/lib.rs
@@ -14,6 +14,7 @@ pub mod core_2d;
 pub mod core_3d;
 pub mod deferred;
 pub mod dof;
+pub mod experimental;
 pub mod fullscreen_vertex_shader;
 pub mod fxaa;
 pub mod motion_blur;
@@ -29,17 +30,6 @@ pub mod upscaling;
 
 pub use skybox::Skybox;
 
-/// Experimental features that are not yet finished. Please report any issues you encounter!
-///
-/// Expect bugs, missing features, compatibility issues, low performance, and/or future breaking changes.
-pub mod experimental {
-    pub mod taa {
-        pub use crate::taa::{
-            TemporalAntiAliasNode, TemporalAntiAliasPlugin, TemporalAntiAliasing,
-        };
-    }
-}
-
 /// The core pipeline prelude.
 ///
 /// This includes the most common types in this crate, re-exported for your convenience.
@@ -56,6 +46,7 @@ use crate::{
     core_3d::Core3dPlugin,
     deferred::copy_lighting_id::CopyDeferredLightingIdPlugin,
     dof::DepthOfFieldPlugin,
+    experimental::mip_generation::MipGenerationPlugin,
     fullscreen_vertex_shader::FULLSCREEN_SHADER_HANDLE,
     fxaa::FxaaPlugin,
     motion_blur::MotionBlurPlugin,
@@ -87,10 +78,8 @@ impl Plugin for CorePipelinePlugin {
             .register_type::<NormalPrepass>()
             .register_type::<MotionVectorPrepass>()
             .register_type::<DeferredPrepass>()
+            .add_plugins((Core2dPlugin, Core3dPlugin, CopyDeferredLightingIdPlugin))
             .add_plugins((
-                Core2dPlugin,
-                Core3dPlugin,
-                CopyDeferredLightingIdPlugin,
                 BlitPlugin,
                 MsaaWritebackPlugin,
                 TonemappingPlugin,
@@ -103,6 +92,7 @@ impl Plugin for CorePipelinePlugin {
                 SmaaPlugin,
                 PostProcessingPlugin,
                 OrderIndependentTransparencyPlugin,
+                MipGenerationPlugin,
             ));
     }
 }
diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs
index 7fb2dfcea9..1e663a79a4 100644
--- a/crates/bevy_core_pipeline/src/prepass/mod.rs
+++ b/crates/bevy_core_pipeline/src/prepass/mod.rs
@@ -78,6 +78,7 @@ pub struct DeferredPrepass;
 pub struct PreviousViewData {
     pub view_from_world: Mat4,
     pub clip_from_world: Mat4,
+    pub clip_from_view: Mat4,
 }
 
 #[derive(Resource, Default)]
diff --git a/crates/bevy_core_pipeline/src/prepass/node.rs b/crates/bevy_core_pipeline/src/prepass/node.rs
index 9019890d7e..942983b063 100644
--- a/crates/bevy_core_pipeline/src/prepass/node.rs
+++ b/crates/bevy_core_pipeline/src/prepass/node.rs
@@ -2,11 +2,12 @@ use bevy_ecs::{prelude::*, query::QueryItem};
 use bevy_render::{
     camera::ExtractedCamera,
     diagnostic::RecordDiagnostics,
+    experimental::occlusion_culling::OcclusionCulling,
     render_graph::{NodeRunError, RenderGraphContext, ViewNode},
     render_phase::{TrackedRenderPass, ViewBinnedRenderPhases},
     render_resource::{CommandEncoderDescriptor, PipelineCache, RenderPassDescriptor, StoreOp},
     renderer::RenderContext,
-    view::{ExtractedView, ViewDepthTexture, ViewUniformOffset},
+    view::{ExtractedView, NoIndirectDrawing, ViewDepthTexture, ViewUniformOffset},
 };
 use tracing::error;
 #[cfg(feature = "trace")]
@@ -19,13 +20,40 @@ use super::{
     ViewPrepassTextures,
 };
 
-/// Render node used by the prepass.
+/// The phase of the prepass that draws meshes that were visible last frame.
 ///
-/// By default, inserted before the main pass in the render graph.
+/// If occlusion culling isn't in use, this prepass simply draws all meshes.
+///
+/// Like all prepass nodes, this is inserted before the main pass in the render
+/// graph.
 #[derive(Default)]
-pub struct PrepassNode;
+pub struct EarlyPrepassNode;
 
-impl ViewNode for PrepassNode {
+impl ViewNode for EarlyPrepassNode {
+    type ViewQuery = <LatePrepassNode as ViewNode>::ViewQuery;
+
+    fn run<'w>(
+        &self,
+        graph: &mut RenderGraphContext,
+        render_context: &mut RenderContext<'w>,
+        view_query: QueryItem<'w, Self::ViewQuery>,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        run_prepass(graph, render_context, view_query, world, "early prepass")
+    }
+}
+
+/// The phase of the prepass that runs after occlusion culling against the
+/// meshes that were visible last frame.
+///
+/// If occlusion culling isn't in use, this is a no-op.
+///
+/// Like all prepass nodes, this is inserted before the main pass in the render
+/// graph.
+#[derive(Default)]
+pub struct LatePrepassNode;
+
+impl ViewNode for LatePrepassNode {
     type ViewQuery = (
         &'static ExtractedCamera,
         &'static ExtractedView,
@@ -36,156 +64,176 @@ impl ViewNode for PrepassNode {
         Option<&'static RenderSkyboxPrepassPipeline>,
         Option<&'static SkyboxPrepassBindGroup>,
         Option<&'static PreviousViewUniformOffset>,
+        Has<OcclusionCulling>,
+        Has<NoIndirectDrawing>,
     );
 
     fn run<'w>(
         &self,
         graph: &mut RenderGraphContext,
         render_context: &mut RenderContext<'w>,
-        (
-            camera,
-            extracted_view,
-            view_depth_texture,
-            view_prepass_textures,
-            view_uniform_offset,
-            deferred_prepass,
+        query: QueryItem<'w, Self::ViewQuery>,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        // We only need a late prepass if we have occlusion culling and indirect
+        // drawing.
+        let (_, _, _, _, _, _, _, _, _, occlusion_culling, no_indirect_drawing) = query;
+        if !occlusion_culling || no_indirect_drawing {
+            return Ok(());
+        }
+
+        run_prepass(graph, render_context, query, world, "late prepass")
+    }
+}
+
+/// Runs a prepass that draws all meshes to the depth buffer, and possibly
+/// normal and motion vector buffers as well.
+///
+/// If occlusion culling isn't in use, and a prepass is enabled, then there's
+/// only one prepass. If occlusion culling is in use, then any prepass is split
+/// into two: an *early* prepass and a *late* prepass. The early prepass draws
+/// what was visible last frame, and the last prepass performs occlusion culling
+/// against a conservative hierarchical Z buffer before drawing unoccluded
+/// meshes.
+fn run_prepass<'w>(
+    graph: &mut RenderGraphContext,
+    render_context: &mut RenderContext<'w>,
+    (
+        camera,
+        extracted_view,
+        view_depth_texture,
+        view_prepass_textures,
+        view_uniform_offset,
+        deferred_prepass,
+        skybox_prepass_pipeline,
+        skybox_prepass_bind_group,
+        view_prev_uniform_offset,
+        _,
+        _,
+    ): QueryItem<'w, <LatePrepassNode as ViewNode>::ViewQuery>,
+    world: &'w World,
+    label: &'static str,
+) -> Result<(), NodeRunError> {
+    let (Some(opaque_prepass_phases), Some(alpha_mask_prepass_phases)) = (
+        world.get_resource::<ViewBinnedRenderPhases<Opaque3dPrepass>>(),
+        world.get_resource::<ViewBinnedRenderPhases<AlphaMask3dPrepass>>(),
+    ) else {
+        return Ok(());
+    };
+
+    let (Some(opaque_prepass_phase), Some(alpha_mask_prepass_phase)) = (
+        opaque_prepass_phases.get(&extracted_view.retained_view_entity),
+        alpha_mask_prepass_phases.get(&extracted_view.retained_view_entity),
+    ) else {
+        return Ok(());
+    };
+
+    let diagnostics = render_context.diagnostic_recorder();
+
+    let mut color_attachments = vec![
+        view_prepass_textures
+            .normal
+            .as_ref()
+            .map(|normals_texture| normals_texture.get_attachment()),
+        view_prepass_textures
+            .motion_vectors
+            .as_ref()
+            .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()),
+        // Use None in place of deferred attachments
+        None,
+        None,
+    ];
+
+    // If all color attachments are none: clear the color attachment list so that no fragment shader is required
+    if color_attachments.iter().all(Option::is_none) {
+        color_attachments.clear();
+    }
+
+    let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store));
+
+    let view_entity = graph.view_entity();
+    render_context.add_command_buffer_generation_task(move |render_device| {
+        #[cfg(feature = "trace")]
+        let _prepass_span = info_span!("prepass").entered();
+
+        // Command encoder setup
+        let mut command_encoder = render_device.create_command_encoder(&CommandEncoderDescriptor {
+            label: Some("prepass_command_encoder"),
+        });
+
+        // Render pass setup
+        let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
+            label: Some(label),
+            color_attachments: &color_attachments,
+            depth_stencil_attachment,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+        });
+
+        let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
+        let pass_span = diagnostics.pass_span(&mut render_pass, label);
+
+        if let Some(viewport) = camera.viewport.as_ref() {
+            render_pass.set_camera_viewport(viewport);
+        }
+
+        // Opaque draws
+        if !opaque_prepass_phase.is_empty() {
+            #[cfg(feature = "trace")]
+            let _opaque_prepass_span = info_span!("opaque_prepass").entered();
+            if let Err(err) = opaque_prepass_phase.render(&mut render_pass, world, view_entity) {
+                error!("Error encountered while rendering the opaque prepass phase {err:?}");
+            }
+        }
+
+        // Alpha masked draws
+        if !alpha_mask_prepass_phase.is_empty() {
+            #[cfg(feature = "trace")]
+            let _alpha_mask_prepass_span = info_span!("alpha_mask_prepass").entered();
+            if let Err(err) = alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity)
+            {
+                error!("Error encountered while rendering the alpha mask prepass phase {err:?}");
+            }
+        }
+
+        // Skybox draw using a fullscreen triangle
+        if let (
+            Some(skybox_prepass_pipeline),
+            Some(skybox_prepass_bind_group),
+            Some(view_prev_uniform_offset),
+        ) = (
             skybox_prepass_pipeline,
             skybox_prepass_bind_group,
             view_prev_uniform_offset,
-        ): QueryItem<'w, Self::ViewQuery>,
-        world: &'w World,
-    ) -> Result<(), NodeRunError> {
-        let (Some(opaque_prepass_phases), Some(alpha_mask_prepass_phases)) = (
-            world.get_resource::<ViewBinnedRenderPhases<Opaque3dPrepass>>(),
-            world.get_resource::<ViewBinnedRenderPhases<AlphaMask3dPrepass>>(),
-        ) else {
-            return Ok(());
-        };
-
-        let (Some(opaque_prepass_phase), Some(alpha_mask_prepass_phase)) = (
-            opaque_prepass_phases.get(&extracted_view.retained_view_entity),
-            alpha_mask_prepass_phases.get(&extracted_view.retained_view_entity),
-        ) else {
-            return Ok(());
-        };
-
-        let diagnostics = render_context.diagnostic_recorder();
-
-        let mut color_attachments = vec![
-            view_prepass_textures
-                .normal
-                .as_ref()
-                .map(|normals_texture| normals_texture.get_attachment()),
-            view_prepass_textures
-                .motion_vectors
-                .as_ref()
-                .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()),
-            // Use None in place of deferred attachments
-            None,
-            None,
-        ];
-
-        // If all color attachments are none: clear the color attachment list so that no fragment shader is required
-        if color_attachments.iter().all(Option::is_none) {
-            color_attachments.clear();
+        ) {
+            let pipeline_cache = world.resource::<PipelineCache>();
+            if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_prepass_pipeline.0) {
+                render_pass.set_render_pipeline(pipeline);
+                render_pass.set_bind_group(
+                    0,
+                    &skybox_prepass_bind_group.0,
+                    &[view_uniform_offset.offset, view_prev_uniform_offset.offset],
+                );
+                render_pass.draw(0..3, 0..1);
+            }
         }
 
-        let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store));
+        pass_span.end(&mut render_pass);
+        drop(render_pass);
 
-        let view_entity = graph.view_entity();
-        render_context.add_command_buffer_generation_task(move |render_device| {
-            #[cfg(feature = "trace")]
-            let _prepass_span = info_span!("prepass").entered();
-
-            // Command encoder setup
-            let mut command_encoder =
-                render_device.create_command_encoder(&CommandEncoderDescriptor {
-                    label: Some("prepass_command_encoder"),
-                });
-
-            // Render pass setup
-            let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
-                label: Some("prepass"),
-                color_attachments: &color_attachments,
-                depth_stencil_attachment,
-                timestamp_writes: None,
-                occlusion_query_set: None,
-            });
-
-            let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
-            let pass_span = diagnostics.pass_span(&mut render_pass, "prepass");
-
-            if let Some(viewport) = camera.viewport.as_ref() {
-                render_pass.set_camera_viewport(viewport);
+        // After rendering to the view depth texture, copy it to the prepass depth texture if deferred isn't going to
+        if deferred_prepass.is_none() {
+            if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
+                command_encoder.copy_texture_to_texture(
+                    view_depth_texture.texture.as_image_copy(),
+                    prepass_depth_texture.texture.texture.as_image_copy(),
+                    view_prepass_textures.size,
+                );
             }
+        }
 
-            // Opaque draws
-            if !opaque_prepass_phase.multidrawable_mesh_keys.is_empty()
-                || !opaque_prepass_phase.batchable_mesh_keys.is_empty()
-                || !opaque_prepass_phase.unbatchable_mesh_keys.is_empty()
-            {
-                #[cfg(feature = "trace")]
-                let _opaque_prepass_span = info_span!("opaque_prepass").entered();
-                if let Err(err) = opaque_prepass_phase.render(&mut render_pass, world, view_entity)
-                {
-                    error!("Error encountered while rendering the opaque prepass phase {err:?}");
-                }
-            }
+        command_encoder.finish()
+    });
 
-            // Alpha masked draws
-            if !alpha_mask_prepass_phase.is_empty() {
-                #[cfg(feature = "trace")]
-                let _alpha_mask_prepass_span = info_span!("alpha_mask_prepass").entered();
-                if let Err(err) =
-                    alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity)
-                {
-                    error!(
-                        "Error encountered while rendering the alpha mask prepass phase {err:?}"
-                    );
-                }
-            }
-
-            // Skybox draw using a fullscreen triangle
-            if let (
-                Some(skybox_prepass_pipeline),
-                Some(skybox_prepass_bind_group),
-                Some(view_prev_uniform_offset),
-            ) = (
-                skybox_prepass_pipeline,
-                skybox_prepass_bind_group,
-                view_prev_uniform_offset,
-            ) {
-                let pipeline_cache = world.resource::<PipelineCache>();
-                if let Some(pipeline) =
-                    pipeline_cache.get_render_pipeline(skybox_prepass_pipeline.0)
-                {
-                    render_pass.set_render_pipeline(pipeline);
-                    render_pass.set_bind_group(
-                        0,
-                        &skybox_prepass_bind_group.0,
-                        &[view_uniform_offset.offset, view_prev_uniform_offset.offset],
-                    );
-                    render_pass.draw(0..3, 0..1);
-                }
-            }
-
-            pass_span.end(&mut render_pass);
-            drop(render_pass);
-
-            // After rendering to the view depth texture, copy it to the prepass depth texture if deferred isn't going to
-            if deferred_prepass.is_none() {
-                if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
-                    command_encoder.copy_texture_to_texture(
-                        view_depth_texture.texture.as_image_copy(),
-                        prepass_depth_texture.texture.texture.as_image_copy(),
-                        view_prepass_textures.size,
-                    );
-                }
-            }
-
-            command_encoder.finish()
-        });
-
-        Ok(())
-    }
+    Ok(())
 }
diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs
index 44537faf9b..089f38d47d 100644
--- a/crates/bevy_pbr/src/lib.rs
+++ b/crates/bevy_pbr/src/lib.rs
@@ -98,11 +98,13 @@ pub mod graph {
         /// Label for the volumetric lighting pass.
         VolumetricFog,
         /// Label for the compute shader instance data building pass.
-        GpuPreprocess,
+        EarlyGpuPreprocess,
+        LateGpuPreprocess,
         /// Label for the screen space reflections pass.
         ScreenSpaceReflections,
-        /// Label for the indirect parameters building pass.
-        BuildIndirectParameters,
+        EarlyPrepassBuildIndirectParameters,
+        LatePrepassBuildIndirectParameters,
+        MainBuildIndirectParameters,
     }
 }
 
diff --git a/crates/bevy_pbr/src/meshlet/mod.rs b/crates/bevy_pbr/src/meshlet/mod.rs
index 5db0644f97..862db413f8 100644
--- a/crates/bevy_pbr/src/meshlet/mod.rs
+++ b/crates/bevy_pbr/src/meshlet/mod.rs
@@ -175,12 +175,6 @@ impl Plugin for MeshletPlugin {
             "cull_clusters.wgsl",
             Shader::from_wgsl
         );
-        load_internal_asset!(
-            app,
-            MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
-            "downsample_depth.wgsl",
-            Shader::from_wgsl
-        );
         load_internal_asset!(
             app,
             MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE,
@@ -255,7 +249,6 @@ impl Plugin for MeshletPlugin {
                     NodePbr::ShadowPass,
                     //
                     NodeMeshlet::Prepass,
-                    Node3d::Prepass,
                     //
                     NodeMeshlet::DeferredPrepass,
                     Node3d::DeferredPrepass,
diff --git a/crates/bevy_pbr/src/meshlet/pipelines.rs b/crates/bevy_pbr/src/meshlet/pipelines.rs
index 2d271c678d..e5e8ec44d8 100644
--- a/crates/bevy_pbr/src/meshlet/pipelines.rs
+++ b/crates/bevy_pbr/src/meshlet/pipelines.rs
@@ -1,7 +1,8 @@
 use super::resource_manager::ResourceManager;
 use bevy_asset::Handle;
 use bevy_core_pipeline::{
-    core_3d::CORE_3D_DEPTH_FORMAT, fullscreen_vertex_shader::fullscreen_shader_vertex_state,
+    core_3d::CORE_3D_DEPTH_FORMAT, experimental::mip_generation::DOWNSAMPLE_DEPTH_SHADER_HANDLE,
+    fullscreen_vertex_shader::fullscreen_shader_vertex_state,
 };
 use bevy_ecs::{
     resource::Resource,
@@ -12,8 +13,6 @@ use bevy_render::render_resource::*;
 pub const MESHLET_FILL_CLUSTER_BUFFERS_SHADER_HANDLE: Handle<Shader> =
     Handle::weak_from_u128(4325134235233421);
 pub const MESHLET_CULLING_SHADER_HANDLE: Handle<Shader> = Handle::weak_from_u128(5325134235233421);
-pub const MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle<Shader> =
-    Handle::weak_from_u128(6325134235233421);
 pub const MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE: Handle<Shader> =
     Handle::weak_from_u128(7325134235233421);
 pub const MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE: Handle<Shader> =
@@ -119,8 +118,11 @@ impl FromWorld for MeshletPipelines {
                         stages: ShaderStages::COMPUTE,
                         range: 0..8,
                     }],
-                    shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
-                    shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()],
+                    shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
+                    shader_defs: vec![
+                        "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(),
+                        "MESHLET".into(),
+                    ],
                     entry_point: "downsample_depth_first".into(),
                     zero_initialize_workgroup_memory: false,
                 },
@@ -134,8 +136,11 @@ impl FromWorld for MeshletPipelines {
                         stages: ShaderStages::COMPUTE,
                         range: 0..8,
                     }],
-                    shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
-                    shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()],
+                    shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
+                    shader_defs: vec![
+                        "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(),
+                        "MESHLET".into(),
+                    ],
                     entry_point: "downsample_depth_second".into(),
                     zero_initialize_workgroup_memory: false,
                 },
@@ -149,8 +154,8 @@ impl FromWorld for MeshletPipelines {
                         stages: ShaderStages::COMPUTE,
                         range: 0..8,
                     }],
-                    shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
-                    shader_defs: vec![],
+                    shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
+                    shader_defs: vec!["MESHLET".into()],
                     entry_point: "downsample_depth_first".into(),
                     zero_initialize_workgroup_memory: false,
                 },
@@ -164,8 +169,8 @@ impl FromWorld for MeshletPipelines {
                         stages: ShaderStages::COMPUTE,
                         range: 0..8,
                     }],
-                    shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
-                    shader_defs: vec![],
+                    shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
+                    shader_defs: vec!["MESHLET".into()],
                     entry_point: "downsample_depth_second".into(),
                     zero_initialize_workgroup_memory: false,
                 },
diff --git a/crates/bevy_pbr/src/meshlet/resource_manager.rs b/crates/bevy_pbr/src/meshlet/resource_manager.rs
index 653c90f162..b2d6cff11d 100644
--- a/crates/bevy_pbr/src/meshlet/resource_manager.rs
+++ b/crates/bevy_pbr/src/meshlet/resource_manager.rs
@@ -3,6 +3,7 @@ use crate::ShadowView;
 use alloc::sync::Arc;
 use bevy_core_pipeline::{
     core_3d::Camera3d,
+    experimental::mip_generation::{self, ViewDepthPyramid},
     prepass::{PreviousViewData, PreviousViewUniforms},
 };
 use bevy_ecs::{
@@ -20,7 +21,7 @@ use bevy_render::{
     view::{ExtractedView, RenderLayers, ViewUniform, ViewUniforms},
 };
 use binding_types::*;
-use core::{array, iter, sync::atomic::AtomicBool};
+use core::{iter, sync::atomic::AtomicBool};
 use encase::internal::WriteInto;
 
 /// Manages per-view and per-cluster GPU resources for [`super::MeshletPlugin`].
@@ -85,31 +86,11 @@ impl ResourceManager {
                 label: Some("meshlet_depth_pyramid_sampler"),
                 ..SamplerDescriptor::default()
             }),
-            depth_pyramid_dummy_texture: render_device
-                .create_texture(&TextureDescriptor {
-                    label: Some("meshlet_depth_pyramid_dummy_texture"),
-                    size: Extent3d {
-                        width: 1,
-                        height: 1,
-                        depth_or_array_layers: 1,
-                    },
-                    mip_level_count: 1,
-                    sample_count: 1,
-                    dimension: TextureDimension::D2,
-                    format: TextureFormat::R32Float,
-                    usage: TextureUsages::STORAGE_BINDING,
-                    view_formats: &[],
-                })
-                .create_view(&TextureViewDescriptor {
-                    label: Some("meshlet_depth_pyramid_dummy_texture_view"),
-                    format: Some(TextureFormat::R32Float),
-                    dimension: Some(TextureViewDimension::D2),
-                    aspect: TextureAspect::All,
-                    base_mip_level: 0,
-                    mip_level_count: Some(1),
-                    base_array_layer: 0,
-                    array_layer_count: Some(1),
-                }),
+            depth_pyramid_dummy_texture: mip_generation::create_depth_pyramid_dummy_texture(
+                render_device,
+                "meshlet_depth_pyramid_dummy_texture",
+                "meshlet_depth_pyramid_dummy_texture_view",
+            ),
 
             previous_depth_pyramids: EntityHashMap::default(),
 
@@ -258,9 +239,7 @@ pub struct MeshletViewResources {
     pub visibility_buffer_software_raster_indirect_args_second: Buffer,
     pub visibility_buffer_hardware_raster_indirect_args_first: Buffer,
     pub visibility_buffer_hardware_raster_indirect_args_second: Buffer,
-    depth_pyramid_all_mips: TextureView,
-    depth_pyramid_mips: [TextureView; 12],
-    pub depth_pyramid_mip_count: u32,
+    pub depth_pyramid: ViewDepthPyramid,
     previous_depth_pyramid: TextureView,
     pub material_depth: Option<CachedTexture>,
     pub view_size: UVec2,
@@ -491,51 +470,23 @@ pub fn prepare_meshlet_per_frame_resources(
                 usage: BufferUsages::STORAGE | BufferUsages::INDIRECT,
             });
 
-        let depth_pyramid_size = Extent3d {
-            width: view.viewport.z.div_ceil(2),
-            height: view.viewport.w.div_ceil(2),
-            depth_or_array_layers: 1,
-        };
-        let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2);
-        let depth_pyramid = texture_cache.get(
+        let depth_pyramid = ViewDepthPyramid::new(
             &render_device,
-            TextureDescriptor {
-                label: Some("meshlet_depth_pyramid"),
-                size: depth_pyramid_size,
-                mip_level_count: depth_pyramid_mip_count,
-                sample_count: 1,
-                dimension: TextureDimension::D2,
-                format: TextureFormat::R32Float,
-                usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
-                view_formats: &[],
-            },
+            &mut texture_cache,
+            &resource_manager.depth_pyramid_dummy_texture,
+            view.viewport.zw(),
+            "meshlet_depth_pyramid",
+            "meshlet_depth_pyramid_texture_view",
         );
-        let depth_pyramid_mips = array::from_fn(|i| {
-            if (i as u32) < depth_pyramid_mip_count {
-                depth_pyramid.texture.create_view(&TextureViewDescriptor {
-                    label: Some("meshlet_depth_pyramid_texture_view"),
-                    format: Some(TextureFormat::R32Float),
-                    dimension: Some(TextureViewDimension::D2),
-                    aspect: TextureAspect::All,
-                    base_mip_level: i as u32,
-                    mip_level_count: Some(1),
-                    base_array_layer: 0,
-                    array_layer_count: Some(1),
-                })
-            } else {
-                resource_manager.depth_pyramid_dummy_texture.clone()
-            }
-        });
-        let depth_pyramid_all_mips = depth_pyramid.default_view.clone();
 
         let previous_depth_pyramid =
             match resource_manager.previous_depth_pyramids.get(&view_entity) {
                 Some(texture_view) => texture_view.clone(),
-                None => depth_pyramid_all_mips.clone(),
+                None => depth_pyramid.all_mips.clone(),
             };
         resource_manager
             .previous_depth_pyramids
-            .insert(view_entity, depth_pyramid_all_mips.clone());
+            .insert(view_entity, depth_pyramid.all_mips.clone());
 
         let material_depth = TextureDescriptor {
             label: Some("meshlet_material_depth"),
@@ -563,9 +514,7 @@ pub fn prepare_meshlet_per_frame_resources(
             visibility_buffer_software_raster_indirect_args_second,
             visibility_buffer_hardware_raster_indirect_args_first,
             visibility_buffer_hardware_raster_indirect_args_second,
-            depth_pyramid_all_mips,
-            depth_pyramid_mips,
-            depth_pyramid_mip_count,
+            depth_pyramid,
             previous_depth_pyramid,
             material_depth: not_shadow_view
                 .then(|| texture_cache.get(&render_device, material_depth)),
@@ -676,7 +625,7 @@ pub fn prepare_meshlet_view_bind_groups(
             resource_manager
                 .visibility_buffer_raster_clusters
                 .as_entire_binding(),
-            &view_resources.depth_pyramid_all_mips,
+            &view_resources.depth_pyramid.all_mips,
             view_uniforms.clone(),
             previous_view_uniforms.clone(),
         ));
@@ -686,25 +635,12 @@ pub fn prepare_meshlet_view_bind_groups(
             &entries,
         );
 
-        let downsample_depth = render_device.create_bind_group(
+        let downsample_depth = view_resources.depth_pyramid.create_bind_group(
+            &render_device,
             "meshlet_downsample_depth_bind_group",
             &resource_manager.downsample_depth_bind_group_layout,
-            &BindGroupEntries::sequential((
-                view_resources.visibility_buffer.as_entire_binding(),
-                &view_resources.depth_pyramid_mips[0],
-                &view_resources.depth_pyramid_mips[1],
-                &view_resources.depth_pyramid_mips[2],
-                &view_resources.depth_pyramid_mips[3],
-                &view_resources.depth_pyramid_mips[4],
-                &view_resources.depth_pyramid_mips[5],
-                &view_resources.depth_pyramid_mips[6],
-                &view_resources.depth_pyramid_mips[7],
-                &view_resources.depth_pyramid_mips[8],
-                &view_resources.depth_pyramid_mips[9],
-                &view_resources.depth_pyramid_mips[10],
-                &view_resources.depth_pyramid_mips[11],
-                &resource_manager.depth_pyramid_sampler,
-            )),
+            view_resources.visibility_buffer.as_entire_binding(),
+            &resource_manager.depth_pyramid_sampler,
         );
 
         let entries = BindGroupEntries::sequential((
diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs
index 9890deb4db..c4376d622a 100644
--- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs
+++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs
@@ -149,10 +149,11 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
             Some(camera),
             meshlet_view_resources.raster_cluster_rightmost_slot,
         );
-        downsample_depth(
+        meshlet_view_resources.depth_pyramid.downsample_depth(
+            "meshlet early downsample depth",
             render_context,
-            meshlet_view_resources,
-            meshlet_view_bind_groups,
+            meshlet_view_resources.view_size,
+            &meshlet_view_bind_groups.downsample_depth,
             downsample_depth_first_pipeline,
             downsample_depth_second_pipeline,
         );
@@ -200,10 +201,11 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
             resolve_material_depth_pipeline,
             camera,
         );
-        downsample_depth(
+        meshlet_view_resources.depth_pyramid.downsample_depth(
+            "meshlet late downsample depth",
             render_context,
-            meshlet_view_resources,
-            meshlet_view_bind_groups,
+            meshlet_view_resources.view_size,
+            &meshlet_view_bind_groups.downsample_depth,
             downsample_depth_first_pipeline,
             downsample_depth_second_pipeline,
         );
@@ -267,10 +269,11 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
                 None,
                 meshlet_view_resources.raster_cluster_rightmost_slot,
             );
-            downsample_depth(
+            meshlet_view_resources.depth_pyramid.downsample_depth(
+                "meshlet early shadow downsample depth",
                 render_context,
-                meshlet_view_resources,
-                meshlet_view_bind_groups,
+                meshlet_view_resources.view_size,
+                &meshlet_view_bind_groups.downsample_depth,
                 downsample_depth_first_shadow_view_pipeline,
                 downsample_depth_second_shadow_view_pipeline,
             );
@@ -311,10 +314,11 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
                 resolve_depth_shadow_view_pipeline,
                 camera,
             );
-            downsample_depth(
+            meshlet_view_resources.depth_pyramid.downsample_depth(
+                "meshlet late shadow downsample depth",
                 render_context,
-                meshlet_view_resources,
-                meshlet_view_bind_groups,
+                meshlet_view_resources.view_size,
+                &meshlet_view_bind_groups.downsample_depth,
                 downsample_depth_first_shadow_view_pipeline,
                 downsample_depth_second_shadow_view_pipeline,
             );
@@ -471,39 +475,6 @@ fn raster_pass(
     hardware_pass.draw_indirect(visibility_buffer_hardware_raster_indirect_args, 0);
 }
 
-fn downsample_depth(
-    render_context: &mut RenderContext,
-    meshlet_view_resources: &MeshletViewResources,
-    meshlet_view_bind_groups: &MeshletViewBindGroups,
-    downsample_depth_first_pipeline: &ComputePipeline,
-    downsample_depth_second_pipeline: &ComputePipeline,
-) {
-    let command_encoder = render_context.command_encoder();
-    let mut downsample_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
-        label: Some("downsample_depth"),
-        timestamp_writes: None,
-    });
-    downsample_pass.set_pipeline(downsample_depth_first_pipeline);
-    downsample_pass.set_push_constants(
-        0,
-        bytemuck::cast_slice(&[
-            meshlet_view_resources.depth_pyramid_mip_count,
-            meshlet_view_resources.view_size.x,
-        ]),
-    );
-    downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth, &[]);
-    downsample_pass.dispatch_workgroups(
-        meshlet_view_resources.view_size.x.div_ceil(64),
-        meshlet_view_resources.view_size.y.div_ceil(64),
-        1,
-    );
-
-    if meshlet_view_resources.depth_pyramid_mip_count >= 7 {
-        downsample_pass.set_pipeline(downsample_depth_second_pipeline);
-        downsample_pass.dispatch_workgroups(1, 1, 1);
-    }
-}
-
 fn resolve_depth(
     render_context: &mut RenderContext,
     depth_stencil_attachment: RenderPassDepthStencilAttachment,
diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs
index 9c9b800d06..3bc2dc51e3 100644
--- a/crates/bevy_pbr/src/prepass/mod.rs
+++ b/crates/bevy_pbr/src/prepass/mod.rs
@@ -6,7 +6,7 @@ use crate::{
     setup_morph_and_skinning_defs, skin, DrawMesh, Material, MaterialPipeline, MaterialPipelineKey,
     MeshLayouts, MeshPipeline, MeshPipelineKey, OpaqueRendererMethod, PreparedMaterial,
     RenderLightmaps, RenderMaterialInstances, RenderMeshInstanceFlags, RenderMeshInstances,
-    SetMaterialBindGroup, SetMeshBindGroup, StandardMaterial,
+    SetMaterialBindGroup, SetMeshBindGroup, ShadowView, StandardMaterial,
 };
 use bevy_app::{App, Plugin, PreUpdate};
 use bevy_render::{
@@ -52,8 +52,6 @@ use crate::meshlet::{
     prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, InstanceManager,
     MeshletMesh3d,
 };
-#[cfg(feature = "meshlet")]
-use crate::ShadowView;
 
 use bevy_render::view::RenderVisibleEntities;
 use core::{hash::Hash, marker::PhantomData};
@@ -214,20 +212,16 @@ where
 #[derive(Resource)]
 struct AnyPrepassPluginLoaded;
 
-#[cfg(not(feature = "meshlet"))]
-type PreviousViewFilter = (With<Camera3d>, With<MotionVectorPrepass>);
-#[cfg(feature = "meshlet")]
-type PreviousViewFilter = Or<(With<Camera3d>, With<ShadowView>)>;
-
 pub fn update_previous_view_data(
     mut commands: Commands,
-    query: Query<(Entity, &Camera, &GlobalTransform), PreviousViewFilter>,
+    query: Query<(Entity, &Camera, &GlobalTransform), Or<(With<Camera3d>, With<ShadowView>)>>,
 ) {
     for (entity, camera, camera_transform) in &query {
         let view_from_world = camera_transform.compute_matrix().inverse();
         commands.entity(entity).try_insert(PreviousViewData {
             view_from_world,
             clip_from_world: camera.clip_from_view() * view_from_world,
+            clip_from_view: camera.clip_from_view(),
         });
     }
 }
@@ -242,7 +236,7 @@ type PreviousMeshFilter = Or<(With<Mesh3d>, With<MeshletMesh3d>)>;
 
 pub fn update_mesh_previous_global_transforms(
     mut commands: Commands,
-    views: Query<&Camera, PreviousViewFilter>,
+    views: Query<&Camera, Or<(With<Camera3d>, With<ShadowView>)>>,
     meshes: Query<(Entity, &GlobalTransform), PreviousMeshFilter>,
 ) {
     let should_run = views.iter().any(|camera| camera.is_active);
@@ -698,7 +692,10 @@ pub fn prepare_previous_view_uniforms(
     render_device: Res<RenderDevice>,
     render_queue: Res<RenderQueue>,
     mut previous_view_uniforms: ResMut<PreviousViewUniforms>,
-    views: Query<(Entity, &ExtractedView, Option<&PreviousViewData>), PreviousViewFilter>,
+    views: Query<
+        (Entity, &ExtractedView, Option<&PreviousViewData>),
+        Or<(With<Camera3d>, With<ShadowView>)>,
+    >,
 ) {
     let views_iter = views.iter();
     let view_count = views_iter.len();
@@ -718,6 +715,7 @@ pub fn prepare_previous_view_uniforms(
                 PreviousViewData {
                     view_from_world,
                     clip_from_world: camera.clip_from_view * view_from_world,
+                    clip_from_view: camera.clip_from_view,
                 }
             }
         };
diff --git a/crates/bevy_pbr/src/prepass/prepass_bindings.wgsl b/crates/bevy_pbr/src/prepass/prepass_bindings.wgsl
index a8dae83b8e..3bd27b2e03 100644
--- a/crates/bevy_pbr/src/prepass/prepass_bindings.wgsl
+++ b/crates/bevy_pbr/src/prepass/prepass_bindings.wgsl
@@ -3,10 +3,9 @@
 struct PreviousViewUniforms {
     view_from_world: mat4x4<f32>,
     clip_from_world: mat4x4<f32>,
+    clip_from_view: mat4x4<f32>,
 }
 
-#ifdef MOTION_VECTOR_PREPASS
 @group(0) @binding(2) var<uniform> previous_view_uniforms: PreviousViewUniforms;
-#endif // MOTION_VECTOR_PREPASS
 
 // Material bindings will be in @group(2)
diff --git a/crates/bevy_pbr/src/render/build_indirect_params.wgsl b/crates/bevy_pbr/src/render/build_indirect_params.wgsl
index 90741e9064..17152caab0 100644
--- a/crates/bevy_pbr/src/render/build_indirect_params.wgsl
+++ b/crates/bevy_pbr/src/render/build_indirect_params.wgsl
@@ -59,24 +59,47 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     let mesh_index = indirect_parameters_metadata[instance_index].mesh_index;
     let base_output_index = indirect_parameters_metadata[instance_index].base_output_index;
     let batch_set_index = indirect_parameters_metadata[instance_index].batch_set_index;
-    let instance_count = atomicLoad(&indirect_parameters_metadata[instance_index].instance_count);
 
     // If we aren't using `multi_draw_indirect_count`, we have a 1:1 fixed
     // assignment of batches to slots in the indirect parameters buffer, so we
     // can just use the instance index as the index of our indirect parameters.
+    let early_instance_count =
+        atomicLoad(&indirect_parameters_metadata[instance_index].early_instance_count);
+    let late_instance_count =
+        atomicLoad(&indirect_parameters_metadata[instance_index].late_instance_count);
+
+    // If in the early phase, we draw only the early meshes. If in the late
+    // phase, we draw only the late meshes. If in the main phase, draw all the
+    // meshes.
+#ifdef EARLY_PHASE
+    let instance_count = early_instance_count;
+#else   // EARLY_PHASE
+#ifdef LATE_PHASE
+    let instance_count = late_instance_count;
+#else   // LATE_PHASE
+    let instance_count = early_instance_count + late_instance_count;
+#endif  // LATE_PHASE
+#endif  // EARLY_PHASE
+
     var indirect_parameters_index = instance_index;
 
     // If the current hardware and driver support `multi_draw_indirect_count`,
     // dynamically reserve an index for the indirect parameters we're to
     // generate.
 #ifdef MULTI_DRAW_INDIRECT_COUNT_SUPPORTED
-    if (instance_count == 0u) {
-        return;
-    }
-
     // If this batch belongs to a batch set, then allocate space for the
     // indirect commands in that batch set.
     if (batch_set_index != 0xffffffffu) {
+        // Bail out now if there are no instances. Note that we can only bail if
+        // we're in a batch set. That's because only batch sets are drawn using
+        // `multi_draw_indirect_count`. If we aren't using
+        // `multi_draw_indirect_count`, then we need to continue in order to
+        // zero out the instance count; otherwise, it'll have garbage data in
+        // it.
+        if (instance_count == 0u) {
+            return;
+        }
+
         let indirect_parameters_base =
             indirect_batch_sets[batch_set_index].indirect_parameters_base;
         let indirect_parameters_offset =
@@ -90,7 +113,16 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     // non-indexed meshes are slightly different.
 
     indirect_parameters[indirect_parameters_index].instance_count = instance_count;
+
+#ifdef LATE_PHASE
+    // The late mesh instances are stored after the early mesh instances, so we
+    // offset the output index by the number of early mesh instances.
+    indirect_parameters[indirect_parameters_index].first_instance =
+        base_output_index + early_instance_count;
+#else   // LATE_PHASE
     indirect_parameters[indirect_parameters_index].first_instance = base_output_index;
+#endif  // LATE_PHASE
+
     indirect_parameters[indirect_parameters_index].base_vertex =
         current_input[mesh_index].first_vertex_index;
 
diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs
index dac3c68068..dfc8739ca4 100644
--- a/crates/bevy_pbr/src/render/gpu_preprocess.rs
+++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs
@@ -10,30 +10,41 @@ use core::num::{NonZero, NonZeroU64};
 
 use bevy_app::{App, Plugin};
 use bevy_asset::{load_internal_asset, Handle};
-use bevy_core_pipeline::core_3d::graph::{Core3d, Node3d};
+use bevy_core_pipeline::{
+    core_3d::graph::{Core3d, Node3d},
+    experimental::mip_generation::ViewDepthPyramid,
+    prepass::{
+        DeferredPrepass, DepthPrepass, PreviousViewData, PreviousViewUniformOffset,
+        PreviousViewUniforms,
+    },
+};
 use bevy_derive::{Deref, DerefMut};
 use bevy_ecs::{
     component::Component,
     entity::Entity,
-    query::{Has, QueryState, Without},
+    prelude::resource_exists,
+    query::{Has, QueryState, With, Without},
     resource::Resource,
-    schedule::{common_conditions::resource_exists, IntoSystemConfigs as _},
-    system::{lifetimeless::Read, Commands, Res, ResMut},
+    schedule::IntoSystemConfigs as _,
+    system::{lifetimeless::Read, Commands, Query, Res, ResMut},
     world::{FromWorld, World},
 };
 use bevy_render::{
     batching::gpu_preprocessing::{
-        BatchedInstanceBuffers, GpuPreprocessingSupport, IndirectBatchSet,
-        IndirectParametersBuffers, IndirectParametersIndexed, IndirectParametersMetadata,
-        IndirectParametersNonIndexed, PreprocessWorkItem, PreprocessWorkItemBuffers,
+        BatchedInstanceBuffers, GpuOcclusionCullingWorkItemBuffers, GpuPreprocessingSupport,
+        IndirectBatchSet, IndirectParametersBuffers, IndirectParametersIndexed,
+        IndirectParametersMetadata, IndirectParametersNonIndexed,
+        LatePreprocessWorkItemIndirectParameters, PreprocessWorkItem, PreprocessWorkItemBuffers,
     },
+    experimental::occlusion_culling::OcclusionCulling,
     render_graph::{Node, NodeRunError, RenderGraphApp, RenderGraphContext},
     render_resource::{
-        binding_types::{storage_buffer, storage_buffer_read_only, uniform_buffer},
+        binding_types::{storage_buffer, storage_buffer_read_only, texture_2d, uniform_buffer},
         BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, Buffer, BufferBinding,
-        CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor,
-        DynamicBindGroupLayoutEntries, PipelineCache, Shader, ShaderStages, ShaderType,
-        SpecializedComputePipeline, SpecializedComputePipelines,
+        BufferVec, CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor,
+        DynamicBindGroupLayoutEntries, PipelineCache, PushConstantRange, RawBufferVec, Shader,
+        ShaderStages, ShaderType, SpecializedComputePipeline, SpecializedComputePipelines,
+        TextureSampleType, UninitBufferVec,
     },
     renderer::{RenderContext, RenderDevice, RenderQueue},
     settings::WgpuFeatures,
@@ -57,6 +68,9 @@ pub const MESH_PREPROCESS_SHADER_HANDLE: Handle<Shader> =
 /// The handle to the `mesh_preprocess_types.wgsl` compute shader.
 pub const MESH_PREPROCESS_TYPES_SHADER_HANDLE: Handle<Shader> =
     Handle::weak_from_u128(2720440370122465935);
+/// The handle to the `reset_indirect_batch_sets.wgsl` compute shader.
+pub const RESET_INDIRECT_BATCH_SETS_SHADER_HANDLE: Handle<Shader> =
+    Handle::weak_from_u128(2602194133710559644);
 /// The handle to the `build_indirect_params.wgsl` compute shader.
 pub const BUILD_INDIRECT_PARAMS_SHADER_HANDLE: Handle<Shader> =
     Handle::weak_from_u128(3711077208359699672);
@@ -76,32 +90,96 @@ pub struct GpuMeshPreprocessPlugin {
     pub use_gpu_instance_buffer_builder: bool,
 }
 
-/// The render node for the mesh preprocessing pass.
+/// The render node for the first mesh preprocessing pass.
 ///
-/// This pass runs a compute shader to cull invisible meshes (if that wasn't
-/// done by the CPU), transforms them, and, if indirect drawing is on, populates
-/// indirect draw parameter metadata for the subsequent
-/// [`BuildIndirectParametersNode`].
-pub struct GpuPreprocessNode {
+/// This pass runs a compute shader to cull meshes outside the view frustum (if
+/// that wasn't done by the CPU), cull meshes that weren't visible last frame
+/// (if occlusion culling is on), transform them, and, if indirect drawing is
+/// on, populate indirect draw parameter metadata for the subsequent
+/// [`EarlyPrepassBuildIndirectParametersNode`].
+pub struct EarlyGpuPreprocessNode {
     view_query: QueryState<
         (
             Entity,
-            Read<PreprocessBindGroups>,
-            Read<ViewUniformOffset>,
+            Option<Read<PreprocessBindGroups>>,
+            Option<Read<ViewUniformOffset>>,
             Has<NoIndirectDrawing>,
+            Has<OcclusionCulling>,
         ),
         Without<SkipGpuPreprocess>,
     >,
     main_view_query: QueryState<Read<ViewLightEntities>>,
 }
 
-/// The render node for the indirect parameter building pass.
+/// The render node for the second mesh preprocessing pass.
 ///
-/// This node runs a compute shader on the output of the [`GpuPreprocessNode`]
-/// in order to transform the [`IndirectParametersMetadata`] into
-/// properly-formatted [`IndirectParametersIndexed`] and
-/// [`IndirectParametersNonIndexed`].
-pub struct BuildIndirectParametersNode {
+/// This pass runs a compute shader to cull meshes outside the view frustum (if
+/// that wasn't done by the CPU), cull meshes that were neither visible last
+/// frame nor visible this frame (if occlusion culling is on), transform them,
+/// and, if indirect drawing is on, populate the indirect draw parameter
+/// metadata for the subsequent [`LatePrepassBuildIndirectParametersNode`].
+pub struct LateGpuPreprocessNode {
+    view_query: QueryState<
+        (Entity, Read<PreprocessBindGroups>, Read<ViewUniformOffset>),
+        (
+            Without<SkipGpuPreprocess>,
+            Without<NoIndirectDrawing>,
+            With<OcclusionCulling>,
+            With<DepthPrepass>,
+            Without<DeferredPrepass>,
+        ),
+    >,
+}
+
+/// The render node for the part of the indirect parameter building pass that
+/// draws the meshes visible from the previous frame.
+///
+/// This node runs a compute shader on the output of the
+/// [`EarlyGpuPreprocessNode`] in order to transform the
+/// [`IndirectParametersMetadata`] into properly-formatted
+/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`].
+pub struct EarlyPrepassBuildIndirectParametersNode {
+    view_query: QueryState<
+        Read<PreprocessBindGroups>,
+        (
+            Without<SkipGpuPreprocess>,
+            Without<NoIndirectDrawing>,
+            With<DepthPrepass>,
+            Without<DeferredPrepass>,
+        ),
+    >,
+}
+
+/// The render node for the part of the indirect parameter building pass that
+/// draws the meshes that are potentially visible on this frame but weren't
+/// visible on the previous frame.
+///
+/// This node runs a compute shader on the output of the
+/// [`LateGpuPreprocessNode`] in order to transform the
+/// [`IndirectParametersMetadata`] into properly-formatted
+/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`].
+pub struct LatePrepassBuildIndirectParametersNode {
+    view_query: QueryState<
+        Read<PreprocessBindGroups>,
+        (
+            Without<SkipGpuPreprocess>,
+            Without<NoIndirectDrawing>,
+            With<DepthPrepass>,
+            With<OcclusionCulling>,
+            Without<DeferredPrepass>,
+        ),
+    >,
+}
+
+/// The render node for the part of the indirect parameter building pass that
+/// draws all meshes, both those that are newly-visible on this frame and those
+/// that were visible last frame.
+///
+/// This node runs a compute shader on the output of the
+/// [`EarlyGpuPreprocessNode`] and [`LateGpuPreprocessNode`] in order to
+/// transform the [`IndirectParametersMetadata`] into properly-formatted
+/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`].
+pub struct MainBuildIndirectParametersNode {
     view_query: QueryState<
         Read<PreprocessBindGroups>,
         (Without<SkipGpuPreprocess>, Without<NoIndirectDrawing>),
@@ -115,19 +193,57 @@ pub struct PreprocessPipelines {
     /// The pipeline used for CPU culling. This pipeline doesn't populate
     /// indirect parameter metadata.
     pub direct_preprocess: PreprocessPipeline,
-    /// The pipeline used for GPU culling. This pipeline populates indirect
-    /// parameter metadata.
-    pub gpu_culling_preprocess: PreprocessPipeline,
+    /// The pipeline used for mesh preprocessing when GPU frustum culling is in
+    /// use, but occlusion culling isn't.
+    ///
+    /// This pipeline populates indirect parameter metadata.
+    pub gpu_frustum_culling_preprocess: PreprocessPipeline,
+    /// The pipeline used for the first phase of occlusion culling.
+    ///
+    /// This pipeline culls, transforms meshes, and populates indirect parameter
+    /// metadata.
+    pub early_gpu_occlusion_culling_preprocess: PreprocessPipeline,
+    /// The pipeline used for the second phase of occlusion culling.
+    ///
+    /// This pipeline culls, transforms meshes, and populates indirect parameter
+    /// metadata.
+    pub late_gpu_occlusion_culling_preprocess: PreprocessPipeline,
+    /// The pipeline that builds indirect draw parameters for indexed meshes,
+    /// when frustum culling is enabled but occlusion culling *isn't* enabled.
+    pub gpu_frustum_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline,
+    /// The pipeline that builds indirect draw parameters for non-indexed
+    /// meshes, when frustum culling is enabled but occlusion culling *isn't*
+    /// enabled.
+    pub gpu_frustum_culling_build_non_indexed_indirect_params: BuildIndirectParametersPipeline,
+    /// Compute shader pipelines for the early prepass phase that draws meshes
+    /// visible in the previous frame.
+    pub early_phase: PreprocessPhasePipelines,
+    /// Compute shader pipelines for the late prepass phase that draws meshes
+    /// that weren't visible in the previous frame, but became visible this
+    /// frame.
+    pub late_phase: PreprocessPhasePipelines,
+    /// Compute shader pipelines for the main color phase.
+    pub main_phase: PreprocessPhasePipelines,
+}
+
+/// Compute shader pipelines for a specific phase: early, late, or main.
+///
+/// The distinction between these phases is relevant for occlusion culling.
+#[derive(Clone)]
+pub struct PreprocessPhasePipelines {
+    /// The pipeline that resets the indirect draw counts used in
+    /// `multi_draw_indirect_count` to 0 in preparation for a new pass.
+    pub reset_indirect_batch_sets: ResetIndirectBatchSetsPipeline,
     /// The pipeline used for indexed indirect parameter building.
     ///
     /// This pipeline converts indirect parameter metadata into indexed indirect
     /// parameters.
-    pub build_indexed_indirect_params: BuildIndirectParametersPipeline,
+    pub gpu_occlusion_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline,
     /// The pipeline used for non-indexed indirect parameter building.
     ///
     /// This pipeline converts indirect parameter metadata into non-indexed
     /// indirect parameters.
-    pub build_non_indexed_indirect_params: BuildIndirectParametersPipeline,
+    pub gpu_occlusion_culling_build_non_indexed_indirect_params: BuildIndirectParametersPipeline,
 }
 
 /// The pipeline for the GPU mesh preprocessing shader.
@@ -140,7 +256,22 @@ pub struct PreprocessPipeline {
     pub pipeline_id: Option<CachedComputePipelineId>,
 }
 
+/// The pipeline for the batch set count reset shader.
+///
+/// This shader resets the indirect batch set count to 0 for each view. It runs
+/// in between every phase (early, late, and main).
+#[derive(Clone)]
+pub struct ResetIndirectBatchSetsPipeline {
+    /// The bind group layout for the compute shader.
+    pub bind_group_layout: BindGroupLayout,
+    /// The pipeline ID for the compute shader.
+    ///
+    /// This gets filled in `prepare_preprocess_pipelines`.
+    pub pipeline_id: Option<CachedComputePipelineId>,
+}
+
 /// The pipeline for the indirect parameter building shader.
+#[derive(Clone)]
 pub struct BuildIndirectParametersPipeline {
     /// The bind group layout for the compute shader.
     pub bind_group_layout: BindGroupLayout,
@@ -154,10 +285,18 @@ bitflags! {
     /// Specifies variants of the mesh preprocessing shader.
     #[derive(Clone, Copy, PartialEq, Eq, Hash)]
     pub struct PreprocessPipelineKey: u8 {
-        /// Whether GPU culling is in use.
+        /// Whether GPU frustum culling is in use.
         ///
-        /// This `#define`'s `GPU_CULLING` in the shader.
-        const GPU_CULLING = 1;
+        /// This `#define`'s `FRUSTUM_CULLING` in the shader.
+        const FRUSTUM_CULLING = 1;
+        /// Whether GPU two-phase occlusion culling is in use.
+        ///
+        /// This `#define`'s `OCCLUSION_CULLING` in the shader.
+        const OCCLUSION_CULLING = 2;
+        /// Whether this is the early phase of GPU two-phase occlusion culling.
+        ///
+        /// This `#define`'s `EARLY_PHASE` in the shader.
+        const EARLY_PHASE = 4;
     }
 
     /// Specifies variants of the indirect parameter building shader.
@@ -172,6 +311,24 @@ bitflags! {
         ///
         /// This defines `MULTI_DRAW_INDIRECT_COUNT_SUPPORTED` in the shader.
         const MULTI_DRAW_INDIRECT_COUNT_SUPPORTED = 2;
+        /// Whether GPU two-phase occlusion culling is in use.
+        ///
+        /// This `#define`'s `OCCLUSION_CULLING` in the shader.
+        const OCCLUSION_CULLING = 4;
+        /// Whether this is the early phase of GPU two-phase occlusion culling.
+        ///
+        /// This `#define`'s `EARLY_PHASE` in the shader.
+        const EARLY_PHASE = 8;
+        /// Whether this is the late phase of GPU two-phase occlusion culling.
+        ///
+        /// This `#define`'s `LATE_PHASE` in the shader.
+        const LATE_PHASE = 16;
+        /// Whether this is the phase that runs after the early and late phases,
+        /// and right before the main drawing logic, when GPU two-phase
+        /// occlusion culling is in use.
+        ///
+        /// This `#define`'s `MAIN_PHASE` in the shader.
+        const MAIN_PHASE = 32;
     }
 }
 
@@ -196,35 +353,56 @@ pub enum PhasePreprocessBindGroups {
     Direct(BindGroup),
 
     /// The bind groups used for the compute shader when indirect drawing is
-    /// being used.
+    /// being used, but occlusion culling isn't being used.
     ///
     /// Because indirect drawing requires splitting the meshes into indexed and
     /// non-indexed meshes, there are two bind groups here.
-    Indirect {
-        /// The bind group used for indexed meshes.
-        ///
-        /// This will be `None` if there are no indexed meshes.
+    IndirectFrustumCulling {
+        /// The bind group for indexed meshes.
         indexed: Option<BindGroup>,
-        /// The bind group used for non-indexed meshes.
-        ///
-        /// This will be `None` if there are no non-indexed meshes.
+        /// The bind group for non-indexed meshes.
         non_indexed: Option<BindGroup>,
     },
+
+    /// The bind groups used for the compute shader when indirect drawing is
+    /// being used, but occlusion culling isn't being used.
+    ///
+    /// Because indirect drawing requires splitting the meshes into indexed and
+    /// non-indexed meshes, and because occlusion culling requires splitting
+    /// this phase into early and late versions, there are four bind groups
+    /// here.
+    IndirectOcclusionCulling {
+        /// The bind group for indexed meshes during the early mesh
+        /// preprocessing phase.
+        early_indexed: Option<BindGroup>,
+        /// The bind group for non-indexed meshes during the early mesh
+        /// preprocessing phase.
+        early_non_indexed: Option<BindGroup>,
+        /// The bind group for indexed meshes during the late mesh preprocessing
+        /// phase.
+        late_indexed: Option<BindGroup>,
+        /// The bind group for non-indexed meshes during the late mesh
+        /// preprocessing phase.
+        late_non_indexed: Option<BindGroup>,
+    },
 }
 
-/// The bind groups for the indirect parameters building compute shader.
-///
-/// This is shared among all views and phases.
+/// The bind groups for the compute shaders that reset indirect draw counts and
+/// build indirect parameters.
 #[derive(Resource)]
 pub struct BuildIndirectParametersBindGroups {
-    /// The bind group used for indexed meshes.
-    ///
-    /// This will be `None` if there are no indexed meshes.
-    indexed: Option<BindGroup>,
-    /// The bind group used for non-indexed meshes.
-    ///
-    /// This will be `None` if there are no non-indexed meshes.
-    non_indexed: Option<BindGroup>,
+    /// The bind group for the `reset_indirect_batch_sets.wgsl` shader, for
+    /// indexed meshes.
+    reset_indexed_indirect_batch_sets: Option<BindGroup>,
+    /// The bind group for the `reset_indirect_batch_sets.wgsl` shader, for
+    /// non-indexed meshes.
+    reset_non_indexed_indirect_batch_sets: Option<BindGroup>,
+    /// The bind group for the `build_indirect_params.wgsl` shader, for indexed
+    /// meshes.
+    build_indexed_indirect: Option<BindGroup>,
+    /// The bind group for the `build_indirect_params.wgsl` shader, for
+    /// non-indexed meshes.
+    build_non_indexed_indirect: Option<BindGroup>,
 }
 
 /// Stops the `GpuPreprocessNode` attempting to generate the buffer for this view
@@ -242,8 +420,20 @@ impl Plugin for GpuMeshPreprocessPlugin {
         );
         load_internal_asset!(
             app,
-            MESH_PREPROCESS_TYPES_SHADER_HANDLE,
-            "mesh_preprocess_types.wgsl",
+            RESET_INDIRECT_BATCH_SETS_SHADER_HANDLE,
+            "reset_indirect_batch_sets.wgsl",
+            Shader::from_wgsl
+        );
+        load_internal_asset!(
+            app,
+            BUILD_INDIRECT_PARAMS_SHADER_HANDLE,
+            "build_indirect_params.wgsl",
+            Shader::from_wgsl
+        );
+        load_internal_asset!(
+            app,
+            BUILD_INDIRECT_PARAMS_SHADER_HANDLE,
+            "build_indirect_params.wgsl",
             Shader::from_wgsl
         );
         load_internal_asset!(
@@ -269,6 +459,7 @@ impl Plugin for GpuMeshPreprocessPlugin {
         render_app
             .init_resource::<PreprocessPipelines>()
             .init_resource::<SpecializedComputePipelines<PreprocessPipeline>>()
+            .init_resource::<SpecializedComputePipelines<ResetIndirectBatchSetsPipeline>>()
             .init_resource::<SpecializedComputePipelines<BuildIndirectParametersPipeline>>()
             .add_systems(
                 Render,
@@ -280,25 +471,48 @@ impl Plugin for GpuMeshPreprocessPlugin {
                         )
                         .in_set(RenderSet::PrepareBindGroups),
                     write_mesh_culling_data_buffer.in_set(RenderSet::PrepareResourcesFlush),
-                )
+                ),
             )
-            .add_render_graph_node::<GpuPreprocessNode>(Core3d, NodePbr::GpuPreprocess)
-            .add_render_graph_node::<BuildIndirectParametersNode>(
+            .add_render_graph_node::<EarlyGpuPreprocessNode>(Core3d, NodePbr::EarlyGpuPreprocess)
+            .add_render_graph_node::<LateGpuPreprocessNode>(Core3d, NodePbr::LateGpuPreprocess)
+            .add_render_graph_node::<EarlyPrepassBuildIndirectParametersNode>(
                 Core3d,
-                NodePbr::BuildIndirectParameters
+                NodePbr::EarlyPrepassBuildIndirectParameters,
+            )
+            .add_render_graph_node::<LatePrepassBuildIndirectParametersNode>(
+                Core3d,
+                NodePbr::LatePrepassBuildIndirectParameters,
+            )
+            .add_render_graph_node::<MainBuildIndirectParametersNode>(
+                Core3d,
+                NodePbr::MainBuildIndirectParameters,
             )
             .add_render_graph_edges(
                 Core3d,
-                (NodePbr::GpuPreprocess, NodePbr::BuildIndirectParameters, Node3d::Prepass)
+                (
+                    NodePbr::EarlyGpuPreprocess,
+                    NodePbr::EarlyPrepassBuildIndirectParameters,
+                    Node3d::EarlyPrepass,
+                    Node3d::EarlyDownsampleDepth,
+                    NodePbr::LateGpuPreprocess,
+                    NodePbr::LatePrepassBuildIndirectParameters,
+                    Node3d::LatePrepass,
+                    NodePbr::MainBuildIndirectParameters,
+                    // Shadows don't currently support occlusion culling, so we
+                    // treat shadows as effectively the main phase for our
+                    // purposes.
+                    NodePbr::ShadowPass,
+                ),
             )
-            .add_render_graph_edges(
+            .add_render_graph_edge(
                 Core3d,
-                (NodePbr::GpuPreprocess, NodePbr::BuildIndirectParameters, NodePbr::ShadowPass)
+                NodePbr::MainBuildIndirectParameters,
+                Node3d::DeferredPrepass
             );
     }
 }
 
-impl FromWorld for GpuPreprocessNode {
+impl FromWorld for EarlyGpuPreprocessNode {
     fn from_world(world: &mut World) -> Self {
         Self {
             view_query: QueryState::new(world),
@@ -307,7 +521,7 @@ impl FromWorld for GpuPreprocessNode {
     }
 }
 
-impl Node for GpuPreprocessNode {
+impl Node for EarlyGpuPreprocessNode {
     fn update(&mut self, world: &mut World) {
         self.view_query.update_archetypes(world);
         self.main_view_query.update_archetypes(world);
@@ -332,7 +546,7 @@ impl Node for GpuPreprocessNode {
             render_context
                 .command_encoder()
                 .begin_compute_pass(&ComputePassDescriptor {
-                    label: Some("mesh preprocessing"),
+                    label: Some("early mesh preprocessing"),
                     timestamp_writes: None,
                 });
 
@@ -347,24 +561,42 @@ impl Node for GpuPreprocessNode {
         // Run the compute passes.
 
         for view_entity in all_views {
-            let Ok((view, bind_groups, view_uniform_offset, no_indirect_drawing)) =
-                self.view_query.get_manual(world, view_entity)
+            let Ok((
+                view,
+                bind_groups,
+                view_uniform_offset,
+                no_indirect_drawing,
+                occlusion_culling,
+            )) = self.view_query.get_manual(world, view_entity)
             else {
                 continue;
             };
 
+            let Some(bind_groups) = bind_groups else {
+                continue;
+            };
+            let Some(view_uniform_offset) = view_uniform_offset else {
+                continue;
+            };
+
             // Grab the work item buffers for this view.
-            let Some(view_work_item_buffers) = index_buffers.get(&view) else {
+            let Some(phase_work_item_buffers) = index_buffers.get(&view) else {
                 warn!("The preprocessing index buffer wasn't present");
                 continue;
             };
 
             // Select the right pipeline, depending on whether GPU culling is in
             // use.
-            let maybe_pipeline_id = if !no_indirect_drawing {
-                preprocess_pipelines.gpu_culling_preprocess.pipeline_id
-            } else {
+            let maybe_pipeline_id = if no_indirect_drawing {
                 preprocess_pipelines.direct_preprocess.pipeline_id
+            } else if occlusion_culling {
+                preprocess_pipelines
+                    .early_gpu_occlusion_culling_preprocess
+                    .pipeline_id
+            } else {
+                preprocess_pipelines
+                    .gpu_frustum_culling_preprocess
+                    .pipeline_id
             };
 
             // Fetch the pipeline.
@@ -383,7 +615,7 @@ impl Node for GpuPreprocessNode {
             compute_pass.set_pipeline(preprocess_pipeline);
 
             // Loop over each render phase.
-            for (phase_type_id, phase_work_item_buffers) in view_work_item_buffers {
+            for (phase_type_id, work_item_buffers) in phase_work_item_buffers {
                 // Fetch the bind group for the render phase.
                 let Some(phase_bind_groups) = bind_groups.get(phase_type_id) else {
                     continue;
@@ -401,21 +633,25 @@ impl Node for GpuPreprocessNode {
                     PhasePreprocessBindGroups::Direct(ref bind_group) => {
                         // Invoke the mesh preprocessing shader to transform
                         // meshes only, but not cull.
-                        let PreprocessWorkItemBuffers::Direct(phase_work_item_buffer) =
-                            phase_work_item_buffers
+                        let PreprocessWorkItemBuffers::Direct(work_item_buffer) = work_item_buffers
                         else {
                             continue;
                         };
                         compute_pass.set_bind_group(0, bind_group, &dynamic_offsets);
-                        let workgroup_count = phase_work_item_buffer.len().div_ceil(WORKGROUP_SIZE);
+                        let workgroup_count = work_item_buffer.len().div_ceil(WORKGROUP_SIZE);
                         if workgroup_count > 0 {
                             compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
                         }
                     }
 
-                    PhasePreprocessBindGroups::Indirect {
+                    PhasePreprocessBindGroups::IndirectFrustumCulling {
                         indexed: ref maybe_indexed_bind_group,
                         non_indexed: ref maybe_non_indexed_bind_group,
+                    }
+                    | PhasePreprocessBindGroups::IndirectOcclusionCulling {
+                        early_indexed: ref maybe_indexed_bind_group,
+                        early_non_indexed: ref maybe_non_indexed_bind_group,
+                        ..
                     } => {
                         // Invoke the mesh preprocessing shader to transform and
                         // cull the meshes.
@@ -423,13 +659,28 @@ impl Node for GpuPreprocessNode {
                             indexed: indexed_buffer,
                             non_indexed: non_indexed_buffer,
                             ..
-                        } = phase_work_item_buffers
+                        } = work_item_buffers
                         else {
                             continue;
                         };
 
                         // Transform and cull indexed meshes if there are any.
                         if let Some(indexed_bind_group) = maybe_indexed_bind_group {
+                            if let PreprocessWorkItemBuffers::Indirect {
+                                gpu_occlusion_culling:
+                                    Some(GpuOcclusionCullingWorkItemBuffers {
+                                        late_indirect_parameters_indexed_offset,
+                                        ..
+                                    }),
+                                ..
+                            } = *work_item_buffers
+                            {
+                                compute_pass.set_push_constants(
+                                    0,
+                                    bytemuck::bytes_of(&late_indirect_parameters_indexed_offset),
+                                );
+                            }
+
                             compute_pass.set_bind_group(0, indexed_bind_group, &dynamic_offsets);
                             let workgroup_count = indexed_buffer.len().div_ceil(WORKGROUP_SIZE);
                             if workgroup_count > 0 {
@@ -439,6 +690,23 @@ impl Node for GpuPreprocessNode {
 
                         // Transform and cull non-indexed meshes if there are any.
                         if let Some(non_indexed_bind_group) = maybe_non_indexed_bind_group {
+                            if let PreprocessWorkItemBuffers::Indirect {
+                                gpu_occlusion_culling:
+                                    Some(GpuOcclusionCullingWorkItemBuffers {
+                                        late_indirect_parameters_non_indexed_offset,
+                                        ..
+                                    }),
+                                ..
+                            } = *work_item_buffers
+                            {
+                                compute_pass.set_push_constants(
+                                    0,
+                                    bytemuck::bytes_of(
+                                        &late_indirect_parameters_non_indexed_offset,
+                                    ),
+                                );
+                            }
+
                             compute_pass.set_bind_group(
                                 0,
                                 non_indexed_bind_group,
@@ -458,7 +726,7 @@ impl Node for GpuPreprocessNode {
     }
 }
 
-impl FromWorld for BuildIndirectParametersNode {
+impl FromWorld for EarlyPrepassBuildIndirectParametersNode {
     fn from_world(world: &mut World) -> Self {
         Self {
             view_query: QueryState::new(world),
@@ -466,7 +734,31 @@ impl FromWorld for BuildIndirectParametersNode {
     }
 }
 
-impl Node for BuildIndirectParametersNode {
+impl FromWorld for LatePrepassBuildIndirectParametersNode {
+    fn from_world(world: &mut World) -> Self {
+        Self {
+            view_query: QueryState::new(world),
+        }
+    }
+}
+
+impl FromWorld for MainBuildIndirectParametersNode {
+    fn from_world(world: &mut World) -> Self {
+        Self {
+            view_query: QueryState::new(world),
+        }
+    }
+}
+
+impl FromWorld for LateGpuPreprocessNode {
+    fn from_world(world: &mut World) -> Self {
+        Self {
+            view_query: QueryState::new(world),
+        }
+    }
+}
+
+impl Node for LateGpuPreprocessNode {
     fn update(&mut self, world: &mut World) {
         self.view_query.update_archetypes(world);
     }
@@ -477,88 +769,117 @@ impl Node for BuildIndirectParametersNode {
         render_context: &mut RenderContext<'w>,
         world: &'w World,
     ) -> Result<(), NodeRunError> {
-        // Fetch the bind group.
-        let Some(build_indirect_params_bind_groups) =
-            world.get_resource::<BuildIndirectParametersBindGroups>()
-        else {
-            return Ok(());
-        };
+        // Grab the [`BatchedInstanceBuffers`].
+        let BatchedInstanceBuffers {
+            ref work_item_buffers,
+            ref late_indexed_indirect_parameters_buffer,
+            ref late_non_indexed_indirect_parameters_buffer,
+            ..
+        } = world.resource::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>();
 
-        // Fetch the pipelines and the buffers we need.
         let pipeline_cache = world.resource::<PipelineCache>();
         let preprocess_pipelines = world.resource::<PreprocessPipelines>();
-        let indirect_parameters_buffers = world.resource::<IndirectParametersBuffers>();
 
-        // Create the compute pass.
         let mut compute_pass =
             render_context
                 .command_encoder()
                 .begin_compute_pass(&ComputePassDescriptor {
-                    label: Some("build indirect parameters"),
+                    label: Some("late mesh preprocessing"),
                     timestamp_writes: None,
                 });
 
-        // Fetch the pipelines.
+        // Run the compute passes.
+        for (view, bind_groups, view_uniform_offset) in self.view_query.iter_manual(world) {
+            // Grab the work item buffers for this view.
+            let Some(phase_work_item_buffers) = work_item_buffers.get(&view) else {
+                warn!("The preprocessing index buffer wasn't present");
+                continue;
+            };
 
-        let (maybe_indexed_pipeline_id, maybe_non_indexed_pipeline_id) = (
-            preprocess_pipelines
-                .build_indexed_indirect_params
-                .pipeline_id,
-            preprocess_pipelines
-                .build_non_indexed_indirect_params
-                .pipeline_id,
-        );
+            let maybe_pipeline_id = preprocess_pipelines
+                .late_gpu_occlusion_culling_preprocess
+                .pipeline_id;
 
-        let (
-            Some(build_indexed_indirect_params_pipeline_id),
-            Some(build_non_indexed_indirect_params_pipeline_id),
-        ) = (maybe_indexed_pipeline_id, maybe_non_indexed_pipeline_id)
-        else {
-            warn!("The build indirect parameters pipelines weren't ready");
-            return Ok(());
-        };
+            // Fetch the pipeline.
+            let Some(preprocess_pipeline_id) = maybe_pipeline_id else {
+                warn!("The build mesh uniforms pipeline wasn't ready");
+                return Ok(());
+            };
 
-        let (
-            Some(build_indexed_indirect_params_pipeline),
-            Some(build_non_indexed_indirect_params_pipeline),
-        ) = (
-            pipeline_cache.get_compute_pipeline(build_indexed_indirect_params_pipeline_id),
-            pipeline_cache.get_compute_pipeline(build_non_indexed_indirect_params_pipeline_id),
-        )
-        else {
-            // This will happen while the pipeline is being compiled and is fine.
-            return Ok(());
-        };
+            let Some(preprocess_pipeline) =
+                pipeline_cache.get_compute_pipeline(preprocess_pipeline_id)
+            else {
+                // This will happen while the pipeline is being compiled and is fine.
+                return Ok(());
+            };
 
-        // Transform the [`IndirectParametersMetadata`] that the GPU mesh
-        // preprocessing phase wrote to [`IndirectParametersIndexed`] for
-        // indexed meshes, if we have any.
-        if let Some(ref build_indirect_indexed_params_bind_group) =
-            build_indirect_params_bind_groups.indexed
-        {
-            compute_pass.set_pipeline(build_indexed_indirect_params_pipeline);
-            compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]);
-            let workgroup_count = indirect_parameters_buffers
-                .indexed_batch_count()
-                .div_ceil(WORKGROUP_SIZE);
-            if workgroup_count > 0 {
-                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
-            }
-        }
+            compute_pass.set_pipeline(preprocess_pipeline);
 
-        // Transform the [`IndirectParametersMetadata`] that the GPU mesh
-        // preprocessing phase wrote to [`IndirectParametersNonIndexed`] for
-        // non-indexed meshes, if we have any.
-        if let Some(ref build_indirect_non_indexed_params_bind_group) =
-            build_indirect_params_bind_groups.non_indexed
-        {
-            compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline);
-            compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]);
-            let workgroup_count = indirect_parameters_buffers
-                .non_indexed_batch_count()
-                .div_ceil(WORKGROUP_SIZE);
-            if workgroup_count > 0 {
-                compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+            for (phase_type_id, work_item_buffers) in phase_work_item_buffers {
+                let (
+                    PreprocessWorkItemBuffers::Indirect {
+                        gpu_occlusion_culling:
+                            Some(GpuOcclusionCullingWorkItemBuffers {
+                                late_indirect_parameters_indexed_offset,
+                                late_indirect_parameters_non_indexed_offset,
+                                ..
+                            }),
+                        ..
+                    },
+                    Some(PhasePreprocessBindGroups::IndirectOcclusionCulling {
+                        late_indexed: ref maybe_late_indexed_bind_group,
+                        late_non_indexed: ref maybe_late_non_indexed_bind_group,
+                        ..
+                    }),
+                    Some(late_indexed_indirect_parameters_buffer),
+                    Some(late_non_indexed_indirect_parameters_buffer),
+                ) = (
+                    work_item_buffers,
+                    bind_groups.get(phase_type_id),
+                    late_indexed_indirect_parameters_buffer.buffer(),
+                    late_non_indexed_indirect_parameters_buffer.buffer(),
+                )
+                else {
+                    continue;
+                };
+
+                let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
+                dynamic_offsets.push(view_uniform_offset.offset);
+
+                // If there's no space reserved for work items, then don't
+                // bother doing the dispatch, as there can't possibly be any
+                // meshes of the given class (indexed or non-indexed) in this
+                // phase.
+
+                // Transform and cull indexed meshes if there are any.
+                if let Some(late_indexed_bind_group) = maybe_late_indexed_bind_group {
+                    compute_pass.set_push_constants(
+                        0,
+                        bytemuck::bytes_of(late_indirect_parameters_indexed_offset),
+                    );
+
+                    compute_pass.set_bind_group(0, late_indexed_bind_group, &dynamic_offsets);
+                    compute_pass.dispatch_workgroups_indirect(
+                        late_indexed_indirect_parameters_buffer,
+                        (*late_indirect_parameters_indexed_offset as u64)
+                            * (size_of::<LatePreprocessWorkItemIndirectParameters>() as u64),
+                    );
+                }
+
+                // Transform and cull non-indexed meshes if there are any.
+                if let Some(late_non_indexed_bind_group) = maybe_late_non_indexed_bind_group {
+                    compute_pass.set_push_constants(
+                        0,
+                        bytemuck::bytes_of(late_indirect_parameters_non_indexed_offset),
+                    );
+
+                    compute_pass.set_bind_group(0, late_non_indexed_bind_group, &dynamic_offsets);
+                    compute_pass.dispatch_workgroups_indirect(
+                        late_non_indexed_indirect_parameters_buffer,
+                        (*late_indirect_parameters_non_indexed_offset as u64)
+                            * (size_of::<LatePreprocessWorkItemIndirectParameters>() as u64),
+                    );
+                }
             }
         }
 
@@ -566,15 +887,233 @@ impl Node for BuildIndirectParametersNode {
     }
 }
 
+impl Node for EarlyPrepassBuildIndirectParametersNode {
+    fn update(&mut self, world: &mut World) {
+        self.view_query.update_archetypes(world);
+    }
+
+    fn run<'w>(
+        &self,
+        _: &mut RenderGraphContext,
+        render_context: &mut RenderContext<'w>,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        let preprocess_pipelines = world.resource::<PreprocessPipelines>();
+
+        // If there are no views with a depth prepass enabled, we don't need to
+        // run this.
+        if self.view_query.iter_manual(world).next().is_none() {
+            return Ok(());
+        }
+
+        run_build_indirect_parameters_node(
+            render_context,
+            world,
+            &preprocess_pipelines.early_phase,
+            "early prepass indirect parameters building",
+        )
+    }
+}
+
+impl Node for LatePrepassBuildIndirectParametersNode {
+    fn update(&mut self, world: &mut World) {
+        self.view_query.update_archetypes(world);
+    }
+
+    fn run<'w>(
+        &self,
+        _: &mut RenderGraphContext,
+        render_context: &mut RenderContext<'w>,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        let preprocess_pipelines = world.resource::<PreprocessPipelines>();
+
+        // If there are no views with occlusion culling enabled, we don't need
+        // to run this.
+        if self.view_query.iter_manual(world).next().is_none() {
+            return Ok(());
+        }
+
+        run_build_indirect_parameters_node(
+            render_context,
+            world,
+            &preprocess_pipelines.late_phase,
+            "late prepass indirect parameters building",
+        )
+    }
+}
+
+impl Node for MainBuildIndirectParametersNode {
+    fn update(&mut self, world: &mut World) {
+        self.view_query.update_archetypes(world);
+    }
+
+    fn run<'w>(
+        &self,
+        _: &mut RenderGraphContext,
+        render_context: &mut RenderContext<'w>,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        let preprocess_pipelines = world.resource::<PreprocessPipelines>();
+
+        run_build_indirect_parameters_node(
+            render_context,
+            world,
+            &preprocess_pipelines.main_phase,
+            "main indirect parameters building",
+        )
+    }
+}
+
+fn run_build_indirect_parameters_node(
+    render_context: &mut RenderContext,
+    world: &World,
+    preprocess_phase_pipelines: &PreprocessPhasePipelines,
+    label: &'static str,
+) -> Result<(), NodeRunError> {
+    let Some(build_indirect_params_bind_groups) =
+        world.get_resource::<BuildIndirectParametersBindGroups>()
+    else {
+        return Ok(());
+    };
+
+    let pipeline_cache = world.resource::<PipelineCache>();
+    let indirect_parameters_buffers = world.resource::<IndirectParametersBuffers>();
+
+    let mut compute_pass =
+        render_context
+            .command_encoder()
+            .begin_compute_pass(&ComputePassDescriptor {
+                label: Some(label),
+                timestamp_writes: None,
+            });
+
+    // Fetch the pipeline.
+    let (
+        Some(reset_indirect_batch_sets_pipeline_id),
+        Some(build_indexed_indirect_params_pipeline_id),
+        Some(build_non_indexed_indirect_params_pipeline_id),
+    ) = (
+        preprocess_phase_pipelines
+            .reset_indirect_batch_sets
+            .pipeline_id,
+        preprocess_phase_pipelines
+            .gpu_occlusion_culling_build_indexed_indirect_params
+            .pipeline_id,
+        preprocess_phase_pipelines
+            .gpu_occlusion_culling_build_non_indexed_indirect_params
+            .pipeline_id,
+    )
+    else {
+        warn!("The build indirect parameters pipelines weren't ready");
+        return Ok(());
+    };
+
+    let (
+        Some(reset_indirect_batch_sets_pipeline),
+        Some(build_indexed_indirect_params_pipeline),
+        Some(build_non_indexed_indirect_params_pipeline),
+    ) = (
+        pipeline_cache.get_compute_pipeline(reset_indirect_batch_sets_pipeline_id),
+        pipeline_cache.get_compute_pipeline(build_indexed_indirect_params_pipeline_id),
+        pipeline_cache.get_compute_pipeline(build_non_indexed_indirect_params_pipeline_id),
+    )
+    else {
+        // This will happen while the pipeline is being compiled and is fine.
+        return Ok(());
+    };
+
+    // Build indexed indirect parameters.
+    if let (
+        Some(reset_indexed_indirect_batch_sets_bind_group),
+        Some(build_indirect_indexed_params_bind_group),
+    ) = (
+        &build_indirect_params_bind_groups.reset_indexed_indirect_batch_sets,
+        &build_indirect_params_bind_groups.build_indexed_indirect,
+    ) {
+        compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline);
+        compute_pass.set_bind_group(0, reset_indexed_indirect_batch_sets_bind_group, &[]);
+        let workgroup_count = indirect_parameters_buffers
+            .batch_set_count(true)
+            .div_ceil(WORKGROUP_SIZE);
+        if workgroup_count > 0 {
+            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+        }
+
+        compute_pass.set_pipeline(build_indexed_indirect_params_pipeline);
+        compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]);
+        let workgroup_count = indirect_parameters_buffers
+            .indexed_batch_count()
+            .div_ceil(WORKGROUP_SIZE);
+        if workgroup_count > 0 {
+            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+        }
+    }
+
+    // Build non-indexed indirect parameters.
+    if let (
+        Some(reset_non_indexed_indirect_batch_sets_bind_group),
+        Some(build_indirect_non_indexed_params_bind_group),
+    ) = (
+        &build_indirect_params_bind_groups.reset_non_indexed_indirect_batch_sets,
+        &build_indirect_params_bind_groups.build_non_indexed_indirect,
+    ) {
+        compute_pass.set_pipeline(reset_indirect_batch_sets_pipeline);
+        compute_pass.set_bind_group(0, reset_non_indexed_indirect_batch_sets_bind_group, &[]);
+        let workgroup_count = indirect_parameters_buffers
+            .batch_set_count(false)
+            .div_ceil(WORKGROUP_SIZE);
+        if workgroup_count > 0 {
+            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+        }
+
+        compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline);
+        compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]);
+        let workgroup_count = indirect_parameters_buffers
+            .non_indexed_batch_count()
+            .div_ceil(WORKGROUP_SIZE);
+        if workgroup_count > 0 {
+            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
+        }
+    }
+
+    Ok(())
+}
+
 impl PreprocessPipelines {
     /// Returns true if the preprocessing and indirect parameters pipelines have
     /// been loaded or false otherwise.
     pub(crate) fn pipelines_are_loaded(&self, pipeline_cache: &PipelineCache) -> bool {
         self.direct_preprocess.is_loaded(pipeline_cache)
-            && self.gpu_culling_preprocess.is_loaded(pipeline_cache)
-            && self.build_indexed_indirect_params.is_loaded(pipeline_cache)
             && self
-                .build_non_indexed_indirect_params
+                .gpu_frustum_culling_preprocess
+                .is_loaded(pipeline_cache)
+            && self
+                .early_gpu_occlusion_culling_preprocess
+                .is_loaded(pipeline_cache)
+            && self
+                .late_gpu_occlusion_culling_preprocess
+                .is_loaded(pipeline_cache)
+            && self
+                .gpu_frustum_culling_build_indexed_indirect_params
+                .is_loaded(pipeline_cache)
+            && self
+                .gpu_frustum_culling_build_non_indexed_indirect_params
+                .is_loaded(pipeline_cache)
+            && self.early_phase.is_loaded(pipeline_cache)
+            && self.late_phase.is_loaded(pipeline_cache)
+            && self.main_phase.is_loaded(pipeline_cache)
+    }
+}
+
+impl PreprocessPhasePipelines {
+    fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool {
+        self.reset_indirect_batch_sets.is_loaded(pipeline_cache)
+            && self
+                .gpu_occlusion_culling_build_indexed_indirect_params
+                .is_loaded(pipeline_cache)
+            && self
+                .gpu_occlusion_culling_build_non_indexed_indirect_params
                 .is_loaded(pipeline_cache)
     }
 }
@@ -586,6 +1125,13 @@ impl PreprocessPipeline {
     }
 }
 
+impl ResetIndirectBatchSetsPipeline {
+    fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool {
+        self.pipeline_id
+            .is_some_and(|pipeline_id| pipeline_cache.get_compute_pipeline(pipeline_id).is_some())
+    }
+}
+
 impl BuildIndirectParametersPipeline {
     /// Returns true if this pipeline has been loaded into the pipeline cache or
     /// false otherwise.
@@ -600,17 +1146,32 @@ impl SpecializedComputePipeline for PreprocessPipeline {
 
     fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
         let mut shader_defs = vec![];
-        if key.contains(PreprocessPipelineKey::GPU_CULLING) {
+        if key.contains(PreprocessPipelineKey::FRUSTUM_CULLING) {
             shader_defs.push("INDIRECT".into());
             shader_defs.push("FRUSTUM_CULLING".into());
         }
+        if key.contains(PreprocessPipelineKey::OCCLUSION_CULLING) {
+            shader_defs.push("OCCLUSION_CULLING".into());
+            if key.contains(PreprocessPipelineKey::EARLY_PHASE) {
+                shader_defs.push("EARLY_PHASE".into());
+            } else {
+                shader_defs.push("LATE_PHASE".into());
+            }
+        }
 
         ComputePipelineDescriptor {
             label: Some(
                 format!(
                     "mesh preprocessing ({})",
-                    if key.contains(PreprocessPipelineKey::GPU_CULLING) {
-                        "GPU culling"
+                    if key.contains(
+                        PreprocessPipelineKey::OCCLUSION_CULLING
+                            | PreprocessPipelineKey::EARLY_PHASE
+                    ) {
+                        "early GPU occlusion culling"
+                    } else if key.contains(PreprocessPipelineKey::OCCLUSION_CULLING) {
+                        "late GPU occlusion culling"
+                    } else if key.contains(PreprocessPipelineKey::FRUSTUM_CULLING) {
+                        "GPU frustum culling"
                     } else {
                         "direct"
                     }
@@ -618,7 +1179,14 @@ impl SpecializedComputePipeline for PreprocessPipeline {
                 .into(),
             ),
             layout: vec![self.bind_group_layout.clone()],
-            push_constant_ranges: vec![],
+            push_constant_ranges: if key.contains(PreprocessPipelineKey::OCCLUSION_CULLING) {
+                vec![PushConstantRange {
+                    stages: ShaderStages::COMPUTE,
+                    range: 0..4,
+                }]
+            } else {
+                vec![]
+            },
             shader: MESH_PREPROCESS_SHADER_HANDLE,
             shader_defs,
             entry_point: "main".into(),
@@ -634,15 +1202,20 @@ impl FromWorld for PreprocessPipelines {
         // GPU culling bind group parameters are a superset of those in the CPU
         // culling (direct) shader.
         let direct_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries();
-        let gpu_culling_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries()
-            .extend_sequential((
-                // `indirect_parameters_metadata`
-                storage_buffer::<IndirectParametersMetadata>(/* has_dynamic_offset= */ false),
-                // `mesh_culling_data`
-                storage_buffer_read_only::<MeshCullingData>(/* has_dynamic_offset= */ false),
-                // `view`
-                uniform_buffer::<ViewUniform>(/* has_dynamic_offset= */ true),
-            ));
+        let gpu_frustum_culling_bind_group_layout_entries = gpu_culling_bind_group_layout_entries();
+        let gpu_early_occlusion_culling_bind_group_layout_entries =
+            gpu_occlusion_culling_bind_group_layout_entries().extend_with_indices(((
+                11,
+                storage_buffer::<PreprocessWorkItem>(/*has_dynamic_offset=*/ false),
+            ),));
+        let gpu_late_occlusion_culling_bind_group_layout_entries =
+            gpu_occlusion_culling_bind_group_layout_entries();
+
+        let reset_indirect_batch_sets_bind_group_layout_entries =
+            DynamicBindGroupLayoutEntries::sequential(
+                ShaderStages::COMPUTE,
+                (storage_buffer::<IndirectBatchSet>(false),),
+            );
 
         // Indexed and non-indexed bind group parameters share all the bind
         // group layout entries except the final one.
@@ -658,9 +1231,21 @@ impl FromWorld for PreprocessPipelines {
             "build mesh uniforms direct bind group layout",
             &direct_bind_group_layout_entries,
         );
-        let gpu_culling_bind_group_layout = render_device.create_bind_group_layout(
-            "build mesh uniforms GPU culling bind group layout",
-            &gpu_culling_bind_group_layout_entries,
+        let gpu_frustum_culling_bind_group_layout = render_device.create_bind_group_layout(
+            "build mesh uniforms GPU frustum culling bind group layout",
+            &gpu_frustum_culling_bind_group_layout_entries,
+        );
+        let gpu_early_occlusion_culling_bind_group_layout = render_device.create_bind_group_layout(
+            "build mesh uniforms GPU early occlusion culling bind group layout",
+            &gpu_early_occlusion_culling_bind_group_layout_entries,
+        );
+        let gpu_late_occlusion_culling_bind_group_layout = render_device.create_bind_group_layout(
+            "build mesh uniforms GPU late occlusion culling bind group layout",
+            &gpu_late_occlusion_culling_bind_group_layout_entries,
+        );
+        let reset_indirect_batch_sets_bind_group_layout = render_device.create_bind_group_layout(
+            "reset indirect batch sets bind group layout",
+            &reset_indirect_batch_sets_bind_group_layout_entries,
         );
         let build_indexed_indirect_params_bind_group_layout = render_device
             .create_bind_group_layout(
@@ -673,39 +1258,67 @@ impl FromWorld for PreprocessPipelines {
                 &build_non_indexed_indirect_params_bind_group_layout_entries,
             );
 
+        let preprocess_phase_pipelines = PreprocessPhasePipelines {
+            reset_indirect_batch_sets: ResetIndirectBatchSetsPipeline {
+                bind_group_layout: reset_indirect_batch_sets_bind_group_layout.clone(),
+                pipeline_id: None,
+            },
+            gpu_occlusion_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline {
+                bind_group_layout: build_indexed_indirect_params_bind_group_layout.clone(),
+                pipeline_id: None,
+            },
+            gpu_occlusion_culling_build_non_indexed_indirect_params:
+                BuildIndirectParametersPipeline {
+                    bind_group_layout: build_non_indexed_indirect_params_bind_group_layout.clone(),
+                    pipeline_id: None,
+                },
+        };
+
         PreprocessPipelines {
             direct_preprocess: PreprocessPipeline {
                 bind_group_layout: direct_bind_group_layout,
                 pipeline_id: None,
             },
-            gpu_culling_preprocess: PreprocessPipeline {
-                bind_group_layout: gpu_culling_bind_group_layout,
+            gpu_frustum_culling_preprocess: PreprocessPipeline {
+                bind_group_layout: gpu_frustum_culling_bind_group_layout,
                 pipeline_id: None,
             },
-            build_indexed_indirect_params: BuildIndirectParametersPipeline {
-                bind_group_layout: build_indexed_indirect_params_bind_group_layout,
+            early_gpu_occlusion_culling_preprocess: PreprocessPipeline {
+                bind_group_layout: gpu_early_occlusion_culling_bind_group_layout,
                 pipeline_id: None,
             },
-            build_non_indexed_indirect_params: BuildIndirectParametersPipeline {
-                bind_group_layout: build_non_indexed_indirect_params_bind_group_layout,
+            late_gpu_occlusion_culling_preprocess: PreprocessPipeline {
+                bind_group_layout: gpu_late_occlusion_culling_bind_group_layout,
                 pipeline_id: None,
             },
+            gpu_frustum_culling_build_indexed_indirect_params: BuildIndirectParametersPipeline {
+                bind_group_layout: build_indexed_indirect_params_bind_group_layout.clone(),
+                pipeline_id: None,
+            },
+            gpu_frustum_culling_build_non_indexed_indirect_params:
+                BuildIndirectParametersPipeline {
+                    bind_group_layout: build_non_indexed_indirect_params_bind_group_layout.clone(),
+                    pipeline_id: None,
+                },
+            early_phase: preprocess_phase_pipelines.clone(),
+            late_phase: preprocess_phase_pipelines.clone(),
+            main_phase: preprocess_phase_pipelines.clone(),
         }
     }
 }
 
 fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries {
-    DynamicBindGroupLayoutEntries::sequential(
+    DynamicBindGroupLayoutEntries::new_with_indices(
         ShaderStages::COMPUTE,
         (
             // `current_input`
-            storage_buffer_read_only::<MeshInputUniform>(false),
+            (3, storage_buffer_read_only::<MeshInputUniform>(false)),
             // `previous_input`
-            storage_buffer_read_only::<MeshInputUniform>(false),
+            (4, storage_buffer_read_only::<MeshInputUniform>(false)),
             // `indices`
-            storage_buffer_read_only::<PreprocessWorkItem>(false),
+            (5, storage_buffer_read_only::<PreprocessWorkItem>(false)),
             // `output`
-            storage_buffer::<MeshUniform>(false),
+            (6, storage_buffer::<MeshUniform>(false)),
         ),
     )
 }
@@ -713,37 +1326,103 @@ fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntrie
 // Returns the first 3 bind group layout entries shared between all invocations
 // of the indirect parameters building shader.
 fn build_indirect_params_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries {
-    DynamicBindGroupLayoutEntries::sequential(
+    DynamicBindGroupLayoutEntries::new_with_indices(
         ShaderStages::COMPUTE,
         (
-            storage_buffer_read_only::<MeshInputUniform>(false),
-            storage_buffer_read_only::<IndirectParametersMetadata>(false),
-            storage_buffer::<IndirectBatchSet>(false),
+            (0, storage_buffer_read_only::<MeshInputUniform>(false)),
+            (
+                1,
+                storage_buffer_read_only::<IndirectParametersMetadata>(false),
+            ),
+            (2, storage_buffer::<IndirectBatchSet>(false)),
         ),
     )
 }
 
 /// A system that specializes the `mesh_preprocess.wgsl` and
 /// `build_indirect_params.wgsl` pipelines if necessary.
+fn gpu_culling_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries {
+    // GPU culling bind group parameters are a superset of those in the CPU
+    // culling (direct) shader.
+    preprocess_direct_bind_group_layout_entries().extend_with_indices((
+        // `indirect_parameters`
+        (
+            7,
+            storage_buffer::<IndirectParametersMetadata>(/* has_dynamic_offset= */ false),
+        ),
+        // `mesh_culling_data`
+        (
+            8,
+            storage_buffer_read_only::<MeshCullingData>(/* has_dynamic_offset= */ false),
+        ),
+        // `view`
+        (
+            0,
+            uniform_buffer::<ViewUniform>(/* has_dynamic_offset= */ true),
+        ),
+    ))
+}
+
+fn gpu_occlusion_culling_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries {
+    gpu_culling_bind_group_layout_entries().extend_with_indices((
+        (
+            2,
+            uniform_buffer::<PreviousViewData>(/*has_dynamic_offset=*/ false),
+        ),
+        (
+            10,
+            texture_2d(TextureSampleType::Float { filterable: true }),
+        ),
+        (
+            12,
+            storage_buffer::<LatePreprocessWorkItemIndirectParameters>(
+                /*has_dynamic_offset=*/ false,
+            ),
+        ),
+    ))
+}
+
+/// A system that specializes the `mesh_preprocess.wgsl` pipelines if necessary.
 pub fn prepare_preprocess_pipelines(
     pipeline_cache: Res<PipelineCache>,
     render_device: Res<RenderDevice>,
     mut specialized_preprocess_pipelines: ResMut<SpecializedComputePipelines<PreprocessPipeline>>,
+    mut specialized_reset_indirect_batch_sets_pipelines: ResMut<
+        SpecializedComputePipelines<ResetIndirectBatchSetsPipeline>,
+    >,
     mut specialized_build_indirect_parameters_pipelines: ResMut<
         SpecializedComputePipelines<BuildIndirectParametersPipeline>,
     >,
-    mut preprocess_pipelines: ResMut<PreprocessPipelines>,
+    preprocess_pipelines: ResMut<PreprocessPipelines>,
 ) {
+    let preprocess_pipelines = preprocess_pipelines.into_inner();
+
     preprocess_pipelines.direct_preprocess.prepare(
         &pipeline_cache,
         &mut specialized_preprocess_pipelines,
         PreprocessPipelineKey::empty(),
     );
-    preprocess_pipelines.gpu_culling_preprocess.prepare(
+    preprocess_pipelines.gpu_frustum_culling_preprocess.prepare(
         &pipeline_cache,
         &mut specialized_preprocess_pipelines,
-        PreprocessPipelineKey::GPU_CULLING,
+        PreprocessPipelineKey::FRUSTUM_CULLING,
     );
+    preprocess_pipelines
+        .early_gpu_occlusion_culling_preprocess
+        .prepare(
+            &pipeline_cache,
+            &mut specialized_preprocess_pipelines,
+            PreprocessPipelineKey::FRUSTUM_CULLING
+                | PreprocessPipelineKey::OCCLUSION_CULLING
+                | PreprocessPipelineKey::EARLY_PHASE,
+        );
+    preprocess_pipelines
+        .late_gpu_occlusion_culling_preprocess
+        .prepare(
+            &pipeline_cache,
+            &mut specialized_preprocess_pipelines,
+            PreprocessPipelineKey::FRUSTUM_CULLING | PreprocessPipelineKey::OCCLUSION_CULLING,
+        );
 
     let mut build_indirect_parameters_pipeline_key = BuildIndirectParametersPipelineKey::empty();
 
@@ -758,18 +1437,61 @@ pub fn prepare_preprocess_pipelines(
             .insert(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED);
     }
 
-    preprocess_pipelines.build_indexed_indirect_params.prepare(
-        &pipeline_cache,
-        &mut specialized_build_indirect_parameters_pipelines,
-        build_indirect_parameters_pipeline_key | BuildIndirectParametersPipelineKey::INDEXED,
-    );
     preprocess_pipelines
-        .build_non_indexed_indirect_params
+        .gpu_frustum_culling_build_indexed_indirect_params
+        .prepare(
+            &pipeline_cache,
+            &mut specialized_build_indirect_parameters_pipelines,
+            build_indirect_parameters_pipeline_key | BuildIndirectParametersPipelineKey::INDEXED,
+        );
+    preprocess_pipelines
+        .gpu_frustum_culling_build_non_indexed_indirect_params
         .prepare(
             &pipeline_cache,
             &mut specialized_build_indirect_parameters_pipelines,
             build_indirect_parameters_pipeline_key,
         );
+
+    for (preprocess_phase_pipelines, build_indirect_parameters_phase_pipeline_key) in [
+        (
+            &mut preprocess_pipelines.early_phase,
+            BuildIndirectParametersPipelineKey::EARLY_PHASE,
+        ),
+        (
+            &mut preprocess_pipelines.late_phase,
+            BuildIndirectParametersPipelineKey::LATE_PHASE,
+        ),
+        (
+            &mut preprocess_pipelines.main_phase,
+            BuildIndirectParametersPipelineKey::MAIN_PHASE,
+        ),
+    ] {
+        preprocess_phase_pipelines
+            .reset_indirect_batch_sets
+            .prepare(
+                &pipeline_cache,
+                &mut specialized_reset_indirect_batch_sets_pipelines,
+            );
+        preprocess_phase_pipelines
+            .gpu_occlusion_culling_build_indexed_indirect_params
+            .prepare(
+                &pipeline_cache,
+                &mut specialized_build_indirect_parameters_pipelines,
+                build_indirect_parameters_pipeline_key
+                    | build_indirect_parameters_phase_pipeline_key
+                    | BuildIndirectParametersPipelineKey::INDEXED
+                    | BuildIndirectParametersPipelineKey::OCCLUSION_CULLING,
+            );
+        preprocess_phase_pipelines
+            .gpu_occlusion_culling_build_non_indexed_indirect_params
+            .prepare(
+                &pipeline_cache,
+                &mut specialized_build_indirect_parameters_pipelines,
+                build_indirect_parameters_pipeline_key
+                    | build_indirect_parameters_phase_pipeline_key
+                    | BuildIndirectParametersPipelineKey::OCCLUSION_CULLING,
+            );
+    }
 }
 
 impl PreprocessPipeline {
@@ -788,6 +1510,22 @@ impl PreprocessPipeline {
     }
 }
 
+impl SpecializedComputePipeline for ResetIndirectBatchSetsPipeline {
+    type Key = ();
+
+    fn specialize(&self, _: Self::Key) -> ComputePipelineDescriptor {
+        ComputePipelineDescriptor {
+            label: Some("reset indirect batch sets".into()),
+            layout: vec![self.bind_group_layout.clone()],
+            push_constant_ranges: vec![],
+            shader: RESET_INDIRECT_BATCH_SETS_SHADER_HANDLE,
+            shader_defs: vec![],
+            entry_point: "main".into(),
+            zero_initialize_workgroup_memory: false,
+        }
+    }
+}
+
 impl SpecializedComputePipeline for BuildIndirectParametersPipeline {
     type Key = BuildIndirectParametersPipelineKey;
 
@@ -799,13 +1537,39 @@ impl SpecializedComputePipeline for BuildIndirectParametersPipeline {
         if key.contains(BuildIndirectParametersPipelineKey::MULTI_DRAW_INDIRECT_COUNT_SUPPORTED) {
             shader_defs.push("MULTI_DRAW_INDIRECT_COUNT_SUPPORTED".into());
         }
+        if key.contains(BuildIndirectParametersPipelineKey::OCCLUSION_CULLING) {
+            shader_defs.push("OCCLUSION_CULLING".into());
+        }
+        if key.contains(BuildIndirectParametersPipelineKey::EARLY_PHASE) {
+            shader_defs.push("EARLY_PHASE".into());
+        }
+        if key.contains(BuildIndirectParametersPipelineKey::LATE_PHASE) {
+            shader_defs.push("LATE_PHASE".into());
+        }
+        if key.contains(BuildIndirectParametersPipelineKey::MAIN_PHASE) {
+            shader_defs.push("MAIN_PHASE".into());
+        }
+
+        let label = format!(
+            "{} build {}indexed indirect parameters",
+            if !key.contains(BuildIndirectParametersPipelineKey::OCCLUSION_CULLING) {
+                "frustum culling"
+            } else if key.contains(BuildIndirectParametersPipelineKey::EARLY_PHASE) {
+                "early occlusion culling"
+            } else if key.contains(BuildIndirectParametersPipelineKey::LATE_PHASE) {
+                "late occlusion culling"
+            } else {
+                "main occlusion culling"
+            },
+            if key.contains(BuildIndirectParametersPipelineKey::INDEXED) {
+                ""
+            } else {
+                "non-"
+            }
+        );
 
         ComputePipelineDescriptor {
-            label: if key.contains(BuildIndirectParametersPipelineKey::INDEXED) {
-                Some("build indexed indirect parameters".into())
-            } else {
-                Some("build non-indexed indirect parameters".into())
-            },
+            label: Some(label.into()),
             layout: vec![self.bind_group_layout.clone()],
             push_constant_ranges: vec![],
             shader: BUILD_INDIRECT_PARAMS_SHADER_HANDLE,
@@ -816,6 +1580,21 @@ impl SpecializedComputePipeline for BuildIndirectParametersPipeline {
     }
 }
 
+impl ResetIndirectBatchSetsPipeline {
+    fn prepare(
+        &mut self,
+        pipeline_cache: &PipelineCache,
+        pipelines: &mut SpecializedComputePipelines<ResetIndirectBatchSetsPipeline>,
+    ) {
+        if self.pipeline_id.is_some() {
+            return;
+        }
+
+        let reset_indirect_batch_sets_pipeline_id = pipelines.specialize(pipeline_cache, self, ());
+        self.pipeline_id = Some(reset_indirect_batch_sets_pipeline_id);
+    }
+}
+
 impl BuildIndirectParametersPipeline {
     fn prepare(
         &mut self,
@@ -834,55 +1613,117 @@ impl BuildIndirectParametersPipeline {
 
 /// A system that attaches the mesh uniform buffers to the bind groups for the
 /// variants of the mesh preprocessing compute shader.
+#[expect(
+    clippy::too_many_arguments,
+    reason = "it's a system that needs a lot of arguments"
+)]
 pub fn prepare_preprocess_bind_groups(
     mut commands: Commands,
+    view_depth_pyramids: Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>,
     render_device: Res<RenderDevice>,
     batched_instance_buffers: Res<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>,
     indirect_parameters_buffers: Res<IndirectParametersBuffers>,
     mesh_culling_data_buffer: Res<MeshCullingDataBuffer>,
     view_uniforms: Res<ViewUniforms>,
+    previous_view_uniforms: Res<PreviousViewUniforms>,
     pipelines: Res<PreprocessPipelines>,
 ) {
     // Grab the `BatchedInstanceBuffers`.
-    let batched_instance_buffers = batched_instance_buffers.into_inner();
+    let BatchedInstanceBuffers {
+        data_buffer: ref data_buffer_vec,
+        ref work_item_buffers,
+        current_input_buffer: ref current_input_buffer_vec,
+        previous_input_buffer: ref previous_input_buffer_vec,
+        ref late_indexed_indirect_parameters_buffer,
+        ref late_non_indexed_indirect_parameters_buffer,
+    } = batched_instance_buffers.into_inner();
 
-    let Some(current_input_buffer) = batched_instance_buffers
-        .current_input_buffer
-        .buffer()
-        .buffer()
-    else {
+    let (Some(current_input_buffer), Some(previous_input_buffer), Some(data_buffer)) = (
+        current_input_buffer_vec.buffer().buffer(),
+        previous_input_buffer_vec.buffer().buffer(),
+        data_buffer_vec.buffer(),
+    ) else {
         return;
     };
 
-    // Keep track of whether any of the phases will be drawn indirectly. If
-    // they are, then we'll need bind groups for the indirect parameters
-    // building shader too.
+    // Record whether we have any meshes that are to be drawn indirectly. If we
+    // don't, then we can skip building indirect parameters.
     let mut any_indirect = false;
 
-    for (view, phase_work_item_buffers) in &batched_instance_buffers.work_item_buffers {
+    // Loop over each view.
+    for (view, phase_work_item_buffers) in work_item_buffers {
         let mut bind_groups = TypeIdMap::default();
 
+        // Loop over each phase.
         for (&phase_id, work_item_buffers) in phase_work_item_buffers {
-            if let Some(bind_group) = prepare_preprocess_bind_group_for_phase(
-                &render_device,
-                &pipelines,
-                &view_uniforms,
-                &indirect_parameters_buffers,
-                &mesh_culling_data_buffer,
-                batched_instance_buffers,
-                work_item_buffers,
-                &mut any_indirect,
-            ) {
+            // Create the `PreprocessBindGroupBuilder`.
+            let preprocess_bind_group_builder = PreprocessBindGroupBuilder {
+                view: *view,
+                late_indexed_indirect_parameters_buffer,
+                late_non_indexed_indirect_parameters_buffer,
+                render_device: &render_device,
+                indirect_parameters_buffers: &indirect_parameters_buffers,
+                mesh_culling_data_buffer: &mesh_culling_data_buffer,
+                view_uniforms: &view_uniforms,
+                previous_view_uniforms: &previous_view_uniforms,
+                pipelines: &pipelines,
+                current_input_buffer,
+                previous_input_buffer,
+                data_buffer,
+            };
+
+            // Depending on the type of work items we have, construct the
+            // appropriate bind groups.
+            let (was_indirect, bind_group) = match *work_item_buffers {
+                PreprocessWorkItemBuffers::Direct(ref work_item_buffer) => (
+                    false,
+                    preprocess_bind_group_builder
+                        .create_direct_preprocess_bind_groups(work_item_buffer),
+                ),
+
+                PreprocessWorkItemBuffers::Indirect {
+                    indexed: ref indexed_work_item_buffer,
+                    non_indexed: ref non_indexed_work_item_buffer,
+                    gpu_occlusion_culling: Some(ref gpu_occlusion_culling_work_item_buffers),
+                } => (
+                    true,
+                    preprocess_bind_group_builder
+                        .create_indirect_occlusion_culling_preprocess_bind_groups(
+                            &view_depth_pyramids,
+                            indexed_work_item_buffer,
+                            non_indexed_work_item_buffer,
+                            gpu_occlusion_culling_work_item_buffers,
+                        ),
+                ),
+
+                PreprocessWorkItemBuffers::Indirect {
+                    indexed: ref indexed_work_item_buffer,
+                    non_indexed: ref non_indexed_work_item_buffer,
+                    gpu_occlusion_culling: None,
+                } => (
+                    true,
+                    preprocess_bind_group_builder
+                        .create_indirect_frustum_culling_preprocess_bind_groups(
+                            indexed_work_item_buffer,
+                            non_indexed_work_item_buffer,
+                        ),
+                ),
+            };
+
+            // Write that bind group in.
+            if let Some(bind_group) = bind_group {
+                any_indirect = any_indirect || was_indirect;
                 bind_groups.insert(phase_id, bind_group);
             }
         }
 
+        // Save the bind groups.
         commands
             .entity(*view)
             .insert(PreprocessBindGroups(bind_groups));
     }
 
-    // If any of the phases will be drawn indirectly, create the bind groups for
+    // Now, if there were any indirect draw commands, create the bind groups for
     // the indirect parameters building shader.
     if any_indirect {
         create_build_indirect_parameters_bind_groups(
@@ -895,164 +1736,599 @@ pub fn prepare_preprocess_bind_groups(
     }
 }
 
-// Creates the bind group for the GPU preprocessing shader for a single phase
-// for a single view.
-#[expect(
-    clippy::too_many_arguments,
-    reason = "it's a system that needs a bunch of parameters"
-)]
-fn prepare_preprocess_bind_group_for_phase(
-    render_device: &RenderDevice,
-    pipelines: &PreprocessPipelines,
-    view_uniforms: &ViewUniforms,
-    indirect_parameters_buffers: &IndirectParametersBuffers,
-    mesh_culling_data_buffer: &MeshCullingDataBuffer,
-    batched_instance_buffers: &BatchedInstanceBuffers<MeshUniform, MeshInputUniform>,
-    work_item_buffers: &PreprocessWorkItemBuffers,
-    any_indirect: &mut bool,
-) -> Option<PhasePreprocessBindGroups> {
-    // Get the current input buffers.
+/// A temporary structure that stores all the information needed to construct
+/// bind groups for the mesh preprocessing shader.
+struct PreprocessBindGroupBuilder<'a> {
+    /// The render-world entity corresponding to the current view.
+    view: Entity,
+    /// The indirect compute dispatch parameters buffer for indexed meshes in
+    /// the late prepass.
+    late_indexed_indirect_parameters_buffer:
+        &'a RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
+    /// The indirect compute dispatch parameters buffer for non-indexed meshes
+    /// in the late prepass.
+    late_non_indexed_indirect_parameters_buffer:
+        &'a RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
+    /// The device.
+    render_device: &'a RenderDevice,
+    /// The buffers that store indirect draw parameters.
+    indirect_parameters_buffers: &'a IndirectParametersBuffers,
+    /// The GPU buffer that stores the information needed to cull each mesh.
+    mesh_culling_data_buffer: &'a MeshCullingDataBuffer,
+    /// The GPU buffer that stores information about the view.
+    view_uniforms: &'a ViewUniforms,
+    /// The GPU buffer that stores information about the view from last frame.
+    previous_view_uniforms: &'a PreviousViewUniforms,
+    /// The pipelines for the mesh preprocessing shader.
+    pipelines: &'a PreprocessPipelines,
+    /// The GPU buffer containing the list of [`MeshInputUniform`]s for the
+    /// current frame.
+    current_input_buffer: &'a Buffer,
+    /// The GPU buffer containing the list of [`MeshInputUniform`]s for the
+    /// previous frame.
+    previous_input_buffer: &'a Buffer,
+    /// The GPU buffer containing the list of [`MeshUniform`]s for the current
+    /// frame.
+    ///
+    /// This is the buffer containing the mesh's final transforms that the
+    /// shaders will write to.
+    data_buffer: &'a Buffer,
+}
 
-    let BatchedInstanceBuffers {
-        data_buffer: ref data_buffer_vec,
-        current_input_buffer: ref current_input_buffer_vec,
-        previous_input_buffer: ref previous_input_buffer_vec,
-        ..
-    } = batched_instance_buffers;
+impl<'a> PreprocessBindGroupBuilder<'a> {
+    /// Creates the bind groups for mesh preprocessing when GPU frustum culling
+    /// and GPU occlusion culling are both disabled.
+    fn create_direct_preprocess_bind_groups(
+        &self,
+        work_item_buffer: &BufferVec<PreprocessWorkItem>,
+    ) -> Option<PhasePreprocessBindGroups> {
+        // Don't use `as_entire_binding()` here; the shader reads the array
+        // length and the underlying buffer may be longer than the actual size
+        // of the vector.
+        let work_item_buffer_size = NonZero::<u64>::try_from(
+            work_item_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
+        )
+        .ok();
 
-    let current_input_buffer = current_input_buffer_vec.buffer().buffer()?;
-    let previous_input_buffer = previous_input_buffer_vec.buffer().buffer()?;
-    let data_buffer = data_buffer_vec.buffer()?;
-
-    // Build the appropriate bind group, depending on whether we're drawing
-    // directly or indirectly.
-
-    match *work_item_buffers {
-        PreprocessWorkItemBuffers::Direct(ref work_item_buffer_vec) => {
-            let work_item_buffer = work_item_buffer_vec.buffer()?;
-
-            // Don't use `as_entire_binding()` here; the shader reads the array
-            // length and the underlying buffer may be longer than the actual size
-            // of the vector.
-            let work_item_buffer_size = NonZero::<u64>::try_from(
-                work_item_buffer_vec.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
-            )
-            .ok();
-
-            Some(PhasePreprocessBindGroups::Direct(
-                render_device.create_bind_group(
-                    "preprocess_direct_bind_group",
-                    &pipelines.direct_preprocess.bind_group_layout,
-                    &BindGroupEntries::sequential((
-                        current_input_buffer.as_entire_binding(),
-                        previous_input_buffer.as_entire_binding(),
+        Some(PhasePreprocessBindGroups::Direct(
+            self.render_device.create_bind_group(
+                "preprocess_direct_bind_group",
+                &self.pipelines.direct_preprocess.bind_group_layout,
+                &BindGroupEntries::with_indices((
+                    (3, self.current_input_buffer.as_entire_binding()),
+                    (4, self.previous_input_buffer.as_entire_binding()),
+                    (
+                        5,
                         BindingResource::Buffer(BufferBinding {
-                            buffer: work_item_buffer,
+                            buffer: work_item_buffer.buffer()?,
                             offset: 0,
                             size: work_item_buffer_size,
                         }),
-                        data_buffer.as_entire_binding(),
-                    )),
-                ),
-            ))
+                    ),
+                    (6, self.data_buffer.as_entire_binding()),
+                )),
+            ),
+        ))
+    }
+
+    /// Creates the bind groups for mesh preprocessing when GPU occlusion
+    /// culling is enabled.
+    fn create_indirect_occlusion_culling_preprocess_bind_groups(
+        &self,
+        view_depth_pyramids: &Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>,
+        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        gpu_occlusion_culling_work_item_buffers: &GpuOcclusionCullingWorkItemBuffers,
+    ) -> Option<PhasePreprocessBindGroups> {
+        let GpuOcclusionCullingWorkItemBuffers {
+            late_indexed: ref late_indexed_work_item_buffer,
+            late_non_indexed: ref late_non_indexed_work_item_buffer,
+            ..
+        } = *gpu_occlusion_culling_work_item_buffers;
+
+        let (view_depth_pyramid, previous_view_uniform_offset) =
+            view_depth_pyramids.get(self.view).ok()?;
+
+        Some(PhasePreprocessBindGroups::IndirectOcclusionCulling {
+            early_indexed: self.create_indirect_occlusion_culling_early_indexed_bind_group(
+                view_depth_pyramid,
+                previous_view_uniform_offset,
+                indexed_work_item_buffer,
+                late_indexed_work_item_buffer,
+            ),
+
+            early_non_indexed: self.create_indirect_occlusion_culling_early_non_indexed_bind_group(
+                view_depth_pyramid,
+                previous_view_uniform_offset,
+                non_indexed_work_item_buffer,
+                late_non_indexed_work_item_buffer,
+            ),
+
+            late_indexed: self.create_indirect_occlusion_culling_late_indexed_bind_group(
+                view_depth_pyramid,
+                previous_view_uniform_offset,
+                late_indexed_work_item_buffer,
+            ),
+
+            late_non_indexed: self.create_indirect_occlusion_culling_late_non_indexed_bind_group(
+                view_depth_pyramid,
+                previous_view_uniform_offset,
+                late_non_indexed_work_item_buffer,
+            ),
+        })
+    }
+
+    /// Creates the bind group for the first phase of mesh preprocessing of
+    /// indexed meshes when GPU occlusion culling is enabled.
+    fn create_indirect_occlusion_culling_early_indexed_bind_group(
+        &self,
+        view_depth_pyramid: &ViewDepthPyramid,
+        previous_view_uniform_offset: &PreviousViewUniformOffset,
+        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        late_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
+    ) -> Option<BindGroup> {
+        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
+        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
+        let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
+
+        match (
+            self.indirect_parameters_buffers.indexed_metadata_buffer(),
+            indexed_work_item_buffer.buffer(),
+            late_indexed_work_item_buffer.buffer(),
+            self.late_indexed_indirect_parameters_buffer.buffer(),
+        ) {
+            (
+                Some(indexed_metadata_buffer),
+                Some(indexed_work_item_gpu_buffer),
+                Some(late_indexed_work_item_gpu_buffer),
+                Some(late_indexed_indirect_parameters_buffer),
+            ) => {
+                // Don't use `as_entire_binding()` here; the shader reads the array
+                // length and the underlying buffer may be longer than the actual size
+                // of the vector.
+                let indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                    indexed_work_item_buffer.len() as u64
+                        * u64::from(PreprocessWorkItem::min_size()),
+                )
+                .ok();
+
+                Some(
+                    self.render_device.create_bind_group(
+                        "preprocess_early_indexed_gpu_occlusion_culling_bind_group",
+                        &self
+                            .pipelines
+                            .early_gpu_occlusion_culling_preprocess
+                            .bind_group_layout,
+                        &BindGroupEntries::with_indices((
+                            (3, self.current_input_buffer.as_entire_binding()),
+                            (4, self.previous_input_buffer.as_entire_binding()),
+                            (
+                                5,
+                                BindingResource::Buffer(BufferBinding {
+                                    buffer: indexed_work_item_gpu_buffer,
+                                    offset: 0,
+                                    size: indexed_work_item_buffer_size,
+                                }),
+                            ),
+                            (6, self.data_buffer.as_entire_binding()),
+                            (7, indexed_metadata_buffer.as_entire_binding()),
+                            (8, mesh_culling_data_buffer.as_entire_binding()),
+                            (0, view_uniforms_binding.clone()),
+                            (10, &view_depth_pyramid.all_mips),
+                            (
+                                2,
+                                BufferBinding {
+                                    buffer: previous_view_buffer,
+                                    offset: previous_view_uniform_offset.offset as u64,
+                                    size: NonZeroU64::new(size_of::<PreviousViewData>() as u64),
+                                },
+                            ),
+                            (
+                                11,
+                                BufferBinding {
+                                    buffer: late_indexed_work_item_gpu_buffer,
+                                    offset: 0,
+                                    size: indexed_work_item_buffer_size,
+                                },
+                            ),
+                            (
+                                12,
+                                BufferBinding {
+                                    buffer: late_indexed_indirect_parameters_buffer,
+                                    offset: 0,
+                                    size: NonZeroU64::new(
+                                        late_indexed_indirect_parameters_buffer.size(),
+                                    ),
+                                },
+                            ),
+                        )),
+                    ),
+                )
+            }
+            _ => None,
         }
+    }
 
-        PreprocessWorkItemBuffers::Indirect {
-            indexed: ref indexed_buffer,
-            non_indexed: ref non_indexed_buffer,
-        } => {
-            // For indirect drawing, we need two separate bind groups, one for indexed meshes and one for non-indexed meshes.
+    /// Creates the bind group for the first phase of mesh preprocessing of
+    /// non-indexed meshes when GPU occlusion culling is enabled.
+    fn create_indirect_occlusion_culling_early_non_indexed_bind_group(
+        &self,
+        view_depth_pyramid: &ViewDepthPyramid,
+        previous_view_uniform_offset: &PreviousViewUniformOffset,
+        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        late_non_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
+    ) -> Option<BindGroup> {
+        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
+        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
+        let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
 
-            let mesh_culling_data_buffer = mesh_culling_data_buffer.buffer()?;
-            let view_uniforms_binding = view_uniforms.uniforms.binding()?;
+        match (
+            self.indirect_parameters_buffers
+                .non_indexed_metadata_buffer(),
+            non_indexed_work_item_buffer.buffer(),
+            late_non_indexed_work_item_buffer.buffer(),
+            self.late_non_indexed_indirect_parameters_buffer.buffer(),
+        ) {
+            (
+                Some(non_indexed_metadata_buffer),
+                Some(non_indexed_work_item_gpu_buffer),
+                Some(late_non_indexed_work_item_buffer),
+                Some(late_non_indexed_indirect_parameters_buffer),
+            ) => {
+                // Don't use `as_entire_binding()` here; the shader reads the array
+                // length and the underlying buffer may be longer than the actual size
+                // of the vector.
+                let non_indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                    non_indexed_work_item_buffer.len() as u64
+                        * u64::from(PreprocessWorkItem::min_size()),
+                )
+                .ok();
 
-            let indexed_bind_group = match (
-                indexed_buffer.buffer(),
-                indirect_parameters_buffers.indexed_metadata_buffer(),
-            ) {
-                (
-                    Some(indexed_work_item_buffer),
-                    Some(indexed_indirect_parameters_metadata_buffer),
-                ) => {
-                    // Don't use `as_entire_binding()` here; the shader reads the array
-                    // length and the underlying buffer may be longer than the actual size
-                    // of the vector.
-                    let indexed_work_item_buffer_size = NonZero::<u64>::try_from(
-                        indexed_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
-                    )
-                    .ok();
-                    Some(render_device.create_bind_group(
-                        "preprocess_indexed_indirect_gpu_culling_bind_group",
-                        &pipelines.gpu_culling_preprocess.bind_group_layout,
-                        &BindGroupEntries::sequential((
-                            current_input_buffer.as_entire_binding(),
-                            previous_input_buffer.as_entire_binding(),
-                            BindingResource::Buffer(BufferBinding {
-                                buffer: indexed_work_item_buffer,
-                                offset: 0,
-                                size: indexed_work_item_buffer_size,
-                            }),
-                            data_buffer.as_entire_binding(),
-                            indexed_indirect_parameters_metadata_buffer.as_entire_binding(),
-                            mesh_culling_data_buffer.as_entire_binding(),
-                            view_uniforms_binding.clone(),
+                Some(
+                    self.render_device.create_bind_group(
+                        "preprocess_early_non_indexed_gpu_occlusion_culling_bind_group",
+                        &self
+                            .pipelines
+                            .early_gpu_occlusion_culling_preprocess
+                            .bind_group_layout,
+                        &BindGroupEntries::with_indices((
+                            (3, self.current_input_buffer.as_entire_binding()),
+                            (4, self.previous_input_buffer.as_entire_binding()),
+                            (
+                                5,
+                                BindingResource::Buffer(BufferBinding {
+                                    buffer: non_indexed_work_item_gpu_buffer,
+                                    offset: 0,
+                                    size: non_indexed_work_item_buffer_size,
+                                }),
+                            ),
+                            (6, self.data_buffer.as_entire_binding()),
+                            (7, non_indexed_metadata_buffer.as_entire_binding()),
+                            (8, mesh_culling_data_buffer.as_entire_binding()),
+                            (0, view_uniforms_binding.clone()),
+                            (10, &view_depth_pyramid.all_mips),
+                            (
+                                2,
+                                BufferBinding {
+                                    buffer: previous_view_buffer,
+                                    offset: previous_view_uniform_offset.offset as u64,
+                                    size: NonZeroU64::new(size_of::<PreviousViewData>() as u64),
+                                },
+                            ),
+                            (
+                                11,
+                                BufferBinding {
+                                    buffer: late_non_indexed_work_item_buffer,
+                                    offset: 0,
+                                    size: non_indexed_work_item_buffer_size,
+                                },
+                            ),
+                            (
+                                12,
+                                BufferBinding {
+                                    buffer: late_non_indexed_indirect_parameters_buffer,
+                                    offset: 0,
+                                    size: NonZeroU64::new(
+                                        late_non_indexed_indirect_parameters_buffer.size(),
+                                    ),
+                                },
+                            ),
                         )),
-                    ))
-                }
-                _ => None,
-            };
+                    ),
+                )
+            }
+            _ => None,
+        }
+    }
 
-            let non_indexed_bind_group = match (
-                non_indexed_buffer.buffer(),
-                indirect_parameters_buffers.non_indexed_metadata_buffer(),
-            ) {
-                (
-                    Some(non_indexed_work_item_buffer),
-                    Some(non_indexed_indirect_parameters_metadata_buffer),
-                ) => {
-                    // Don't use `as_entire_binding()` here; the shader reads the array
-                    // length and the underlying buffer may be longer than the actual size
-                    // of the vector.
-                    let non_indexed_work_item_buffer_size = NonZero::<u64>::try_from(
-                        non_indexed_buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()),
-                    )
-                    .ok();
-                    Some(render_device.create_bind_group(
-                        "preprocess_non_indexed_indirect_gpu_culling_bind_group",
-                        &pipelines.gpu_culling_preprocess.bind_group_layout,
-                        &BindGroupEntries::sequential((
-                            current_input_buffer.as_entire_binding(),
-                            previous_input_buffer.as_entire_binding(),
-                            BindingResource::Buffer(BufferBinding {
-                                buffer: non_indexed_work_item_buffer,
-                                offset: 0,
-                                size: non_indexed_work_item_buffer_size,
-                            }),
-                            data_buffer.as_entire_binding(),
-                            non_indexed_indirect_parameters_metadata_buffer.as_entire_binding(),
-                            mesh_culling_data_buffer.as_entire_binding(),
-                            view_uniforms_binding,
+    /// Creates the bind group for the second phase of mesh preprocessing of
+    /// indexed meshes when GPU occlusion culling is enabled.
+    fn create_indirect_occlusion_culling_late_indexed_bind_group(
+        &self,
+        view_depth_pyramid: &ViewDepthPyramid,
+        previous_view_uniform_offset: &PreviousViewUniformOffset,
+        late_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
+    ) -> Option<BindGroup> {
+        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
+        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
+        let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
+
+        match (
+            self.indirect_parameters_buffers.indexed_metadata_buffer(),
+            late_indexed_work_item_buffer.buffer(),
+            self.late_indexed_indirect_parameters_buffer.buffer(),
+        ) {
+            (
+                Some(indexed_metadata_buffer),
+                Some(late_indexed_work_item_gpu_buffer),
+                Some(late_indexed_indirect_parameters_buffer),
+            ) => {
+                // Don't use `as_entire_binding()` here; the shader reads the array
+                // length and the underlying buffer may be longer than the actual size
+                // of the vector.
+                let late_indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                    late_indexed_work_item_buffer.len() as u64
+                        * u64::from(PreprocessWorkItem::min_size()),
+                )
+                .ok();
+
+                Some(
+                    self.render_device.create_bind_group(
+                        "preprocess_late_indexed_gpu_occlusion_culling_bind_group",
+                        &self
+                            .pipelines
+                            .late_gpu_occlusion_culling_preprocess
+                            .bind_group_layout,
+                        &BindGroupEntries::with_indices((
+                            (3, self.current_input_buffer.as_entire_binding()),
+                            (4, self.previous_input_buffer.as_entire_binding()),
+                            (
+                                5,
+                                BindingResource::Buffer(BufferBinding {
+                                    buffer: late_indexed_work_item_gpu_buffer,
+                                    offset: 0,
+                                    size: late_indexed_work_item_buffer_size,
+                                }),
+                            ),
+                            (6, self.data_buffer.as_entire_binding()),
+                            (7, indexed_metadata_buffer.as_entire_binding()),
+                            (8, mesh_culling_data_buffer.as_entire_binding()),
+                            (0, view_uniforms_binding.clone()),
+                            (10, &view_depth_pyramid.all_mips),
+                            (
+                                2,
+                                BufferBinding {
+                                    buffer: previous_view_buffer,
+                                    offset: previous_view_uniform_offset.offset as u64,
+                                    size: NonZeroU64::new(size_of::<PreviousViewData>() as u64),
+                                },
+                            ),
+                            (
+                                12,
+                                BufferBinding {
+                                    buffer: late_indexed_indirect_parameters_buffer,
+                                    offset: 0,
+                                    size: NonZeroU64::new(
+                                        late_indexed_indirect_parameters_buffer.size(),
+                                    ),
+                                },
+                            ),
                         )),
-                    ))
-                }
-                _ => None,
-            };
+                    ),
+                )
+            }
+            _ => None,
+        }
+    }
 
-            // Note that we found phases that will be drawn indirectly so that
-            // we remember to build the bind groups for the indirect parameter
-            // building shader.
-            *any_indirect = true;
+    /// Creates the bind group for the second phase of mesh preprocessing of
+    /// non-indexed meshes when GPU occlusion culling is enabled.
+    fn create_indirect_occlusion_culling_late_non_indexed_bind_group(
+        &self,
+        view_depth_pyramid: &ViewDepthPyramid,
+        previous_view_uniform_offset: &PreviousViewUniformOffset,
+        late_non_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
+    ) -> Option<BindGroup> {
+        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
+        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
+        let previous_view_buffer = self.previous_view_uniforms.uniforms.buffer()?;
 
-            Some(PhasePreprocessBindGroups::Indirect {
-                indexed: indexed_bind_group,
-                non_indexed: non_indexed_bind_group,
-            })
+        match (
+            self.indirect_parameters_buffers
+                .non_indexed_metadata_buffer(),
+            late_non_indexed_work_item_buffer.buffer(),
+            self.late_non_indexed_indirect_parameters_buffer.buffer(),
+        ) {
+            (
+                Some(non_indexed_metadata_buffer),
+                Some(non_indexed_work_item_gpu_buffer),
+                Some(late_non_indexed_indirect_parameters_buffer),
+            ) => {
+                // Don't use `as_entire_binding()` here; the shader reads the array
+                // length and the underlying buffer may be longer than the actual size
+                // of the vector.
+                let non_indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                    late_non_indexed_work_item_buffer.len() as u64
+                        * u64::from(PreprocessWorkItem::min_size()),
+                )
+                .ok();
+
+                Some(
+                    self.render_device.create_bind_group(
+                        "preprocess_late_non_indexed_gpu_occlusion_culling_bind_group",
+                        &self
+                            .pipelines
+                            .late_gpu_occlusion_culling_preprocess
+                            .bind_group_layout,
+                        &BindGroupEntries::with_indices((
+                            (3, self.current_input_buffer.as_entire_binding()),
+                            (4, self.previous_input_buffer.as_entire_binding()),
+                            (
+                                5,
+                                BindingResource::Buffer(BufferBinding {
+                                    buffer: non_indexed_work_item_gpu_buffer,
+                                    offset: 0,
+                                    size: non_indexed_work_item_buffer_size,
+                                }),
+                            ),
+                            (6, self.data_buffer.as_entire_binding()),
+                            (7, non_indexed_metadata_buffer.as_entire_binding()),
+                            (8, mesh_culling_data_buffer.as_entire_binding()),
+                            (0, view_uniforms_binding.clone()),
+                            (10, &view_depth_pyramid.all_mips),
+                            (
+                                2,
+                                BufferBinding {
+                                    buffer: previous_view_buffer,
+                                    offset: previous_view_uniform_offset.offset as u64,
+                                    size: NonZeroU64::new(size_of::<PreviousViewData>() as u64),
+                                },
+                            ),
+                            (
+                                12,
+                                BufferBinding {
+                                    buffer: late_non_indexed_indirect_parameters_buffer,
+                                    offset: 0,
+                                    size: NonZeroU64::new(
+                                        late_non_indexed_indirect_parameters_buffer.size(),
+                                    ),
+                                },
+                            ),
+                        )),
+                    ),
+                )
+            }
+            _ => None,
+        }
+    }
+
+    /// Creates the bind groups for mesh preprocessing when GPU frustum culling
+    /// is enabled, but GPU occlusion culling is disabled.
+    fn create_indirect_frustum_culling_preprocess_bind_groups(
+        &self,
+        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+    ) -> Option<PhasePreprocessBindGroups> {
+        Some(PhasePreprocessBindGroups::IndirectFrustumCulling {
+            indexed: self
+                .create_indirect_frustum_culling_indexed_bind_group(indexed_work_item_buffer),
+            non_indexed: self.create_indirect_frustum_culling_non_indexed_bind_group(
+                non_indexed_work_item_buffer,
+            ),
+        })
+    }
+
+    /// Creates the bind group for mesh preprocessing of indexed meshes when GPU
+    /// frustum culling is enabled, but GPU occlusion culling is disabled.
+    fn create_indirect_frustum_culling_indexed_bind_group(
+        &self,
+        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+    ) -> Option<BindGroup> {
+        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
+        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
+
+        match (
+            self.indirect_parameters_buffers.indexed_metadata_buffer(),
+            indexed_work_item_buffer.buffer(),
+        ) {
+            (Some(indexed_metadata_buffer), Some(indexed_work_item_gpu_buffer)) => {
+                // Don't use `as_entire_binding()` here; the shader reads the array
+                // length and the underlying buffer may be longer than the actual size
+                // of the vector.
+                let indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                    indexed_work_item_buffer.len() as u64
+                        * u64::from(PreprocessWorkItem::min_size()),
+                )
+                .ok();
+
+                Some(
+                    self.render_device.create_bind_group(
+                        "preprocess_gpu_indexed_frustum_culling_bind_group",
+                        &self
+                            .pipelines
+                            .gpu_frustum_culling_preprocess
+                            .bind_group_layout,
+                        &BindGroupEntries::with_indices((
+                            (3, self.current_input_buffer.as_entire_binding()),
+                            (4, self.previous_input_buffer.as_entire_binding()),
+                            (
+                                5,
+                                BindingResource::Buffer(BufferBinding {
+                                    buffer: indexed_work_item_gpu_buffer,
+                                    offset: 0,
+                                    size: indexed_work_item_buffer_size,
+                                }),
+                            ),
+                            (6, self.data_buffer.as_entire_binding()),
+                            (7, indexed_metadata_buffer.as_entire_binding()),
+                            (8, mesh_culling_data_buffer.as_entire_binding()),
+                            (0, view_uniforms_binding.clone()),
+                        )),
+                    ),
+                )
+            }
+            _ => None,
+        }
+    }
+
+    /// Creates the bind group for mesh preprocessing of non-indexed meshes when
+    /// GPU frustum culling is enabled, but GPU occlusion culling is disabled.
+    fn create_indirect_frustum_culling_non_indexed_bind_group(
+        &self,
+        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+    ) -> Option<BindGroup> {
+        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
+        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
+
+        match (
+            self.indirect_parameters_buffers
+                .non_indexed_metadata_buffer(),
+            non_indexed_work_item_buffer.buffer(),
+        ) {
+            (Some(non_indexed_metadata_buffer), Some(non_indexed_work_item_gpu_buffer)) => {
+                // Don't use `as_entire_binding()` here; the shader reads the array
+                // length and the underlying buffer may be longer than the actual size
+                // of the vector.
+                let non_indexed_work_item_buffer_size = NonZero::<u64>::try_from(
+                    non_indexed_work_item_buffer.len() as u64
+                        * u64::from(PreprocessWorkItem::min_size()),
+                )
+                .ok();
+
+                Some(
+                    self.render_device.create_bind_group(
+                        "preprocess_gpu_non_indexed_frustum_culling_bind_group",
+                        &self
+                            .pipelines
+                            .gpu_frustum_culling_preprocess
+                            .bind_group_layout,
+                        &BindGroupEntries::with_indices((
+                            (3, self.current_input_buffer.as_entire_binding()),
+                            (4, self.previous_input_buffer.as_entire_binding()),
+                            (
+                                5,
+                                BindingResource::Buffer(BufferBinding {
+                                    buffer: non_indexed_work_item_gpu_buffer,
+                                    offset: 0,
+                                    size: non_indexed_work_item_buffer_size,
+                                }),
+                            ),
+                            (6, self.data_buffer.as_entire_binding()),
+                            (7, non_indexed_metadata_buffer.as_entire_binding()),
+                            (8, mesh_culling_data_buffer.as_entire_binding()),
+                            (0, view_uniforms_binding.clone()),
+                        )),
+                    ),
+                )
+            }
+            _ => None,
         }
     }
 }
 
 /// A system that creates bind groups from the indirect parameters metadata and
-/// data buffers for the indirect parameter building shader.
+/// data buffers for the indirect batch set reset shader and the indirect
+/// parameter building shader.
 fn create_build_indirect_parameters_bind_groups(
     commands: &mut Commands,
     render_device: &RenderDevice,
@@ -1061,7 +2337,45 @@ fn create_build_indirect_parameters_bind_groups(
     indirect_parameters_buffer: &IndirectParametersBuffers,
 ) {
     commands.insert_resource(BuildIndirectParametersBindGroups {
-        indexed: match (
+        reset_indexed_indirect_batch_sets: match (
+            indirect_parameters_buffer.indexed_batch_sets_buffer(),
+        ) {
+            (Some(indexed_batch_sets_buffer),) => Some(
+                render_device.create_bind_group(
+                    "reset_indexed_indirect_batch_sets_bind_group",
+                    // The early bind group is good for the main phase and late
+                    // phase too. They bind the same buffers.
+                    &pipelines
+                        .early_phase
+                        .reset_indirect_batch_sets
+                        .bind_group_layout,
+                    &BindGroupEntries::sequential((indexed_batch_sets_buffer.as_entire_binding(),)),
+                ),
+            ),
+            _ => None,
+        },
+
+        reset_non_indexed_indirect_batch_sets: match (
+            indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
+        ) {
+            (Some(non_indexed_batch_sets_buffer),) => Some(
+                render_device.create_bind_group(
+                    "reset_non_indexed_indirect_batch_sets_bind_group",
+                    // The early bind group is good for the main phase and late
+                    // phase too. They bind the same buffers.
+                    &pipelines
+                        .early_phase
+                        .reset_indirect_batch_sets
+                        .bind_group_layout,
+                    &BindGroupEntries::sequential((
+                        non_indexed_batch_sets_buffer.as_entire_binding(),
+                    )),
+                ),
+            ),
+            _ => None,
+        },
+
+        build_indexed_indirect: match (
             indirect_parameters_buffer.indexed_metadata_buffer(),
             indirect_parameters_buffer.indexed_data_buffer(),
             indirect_parameters_buffer.indexed_batch_sets_buffer(),
@@ -1070,28 +2384,35 @@ fn create_build_indirect_parameters_bind_groups(
                 Some(indexed_indirect_parameters_metadata_buffer),
                 Some(indexed_indirect_parameters_data_buffer),
                 Some(indexed_batch_sets_buffer),
-            ) => Some(render_device.create_bind_group(
-                "build_indexed_indirect_parameters_bind_group",
-                &pipelines.build_indexed_indirect_params.bind_group_layout,
-                &BindGroupEntries::sequential((
-                    current_input_buffer.as_entire_binding(),
-                    // Don't use `as_entire_binding` here; the shader reads
-                    // the length and `RawBufferVec` overallocates.
-                    BufferBinding {
-                        buffer: indexed_indirect_parameters_metadata_buffer,
-                        offset: 0,
-                        size: NonZeroU64::new(
-                            indirect_parameters_buffer.indexed_batch_count() as u64
-                                * size_of::<IndirectParametersMetadata>() as u64,
-                        ),
-                    },
-                    indexed_batch_sets_buffer.as_entire_binding(),
-                    indexed_indirect_parameters_data_buffer.as_entire_binding(),
-                )),
-            )),
+            ) => Some(
+                render_device.create_bind_group(
+                    "build_indexed_indirect_parameters_bind_group",
+                    // The frustum culling bind group is good for occlusion culling
+                    // too. They bind the same buffers.
+                    &pipelines
+                        .gpu_frustum_culling_build_indexed_indirect_params
+                        .bind_group_layout,
+                    &BindGroupEntries::sequential((
+                        current_input_buffer.as_entire_binding(),
+                        // Don't use `as_entire_binding` here; the shader reads
+                        // the length and `RawBufferVec` overallocates.
+                        BufferBinding {
+                            buffer: indexed_indirect_parameters_metadata_buffer,
+                            offset: 0,
+                            size: NonZeroU64::new(
+                                indirect_parameters_buffer.indexed_batch_count() as u64
+                                    * size_of::<IndirectParametersMetadata>() as u64,
+                            ),
+                        },
+                        indexed_batch_sets_buffer.as_entire_binding(),
+                        indexed_indirect_parameters_data_buffer.as_entire_binding(),
+                    )),
+                ),
+            ),
             _ => None,
         },
-        non_indexed: match (
+
+        build_non_indexed_indirect: match (
             indirect_parameters_buffer.non_indexed_metadata_buffer(),
             indirect_parameters_buffer.non_indexed_data_buffer(),
             indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
@@ -1103,8 +2424,10 @@ fn create_build_indirect_parameters_bind_groups(
             ) => Some(
                 render_device.create_bind_group(
                     "build_non_indexed_indirect_parameters_bind_group",
+                    // The frustum culling bind group is good for occlusion culling
+                    // too. They bind the same buffers.
                     &pipelines
-                        .build_non_indexed_indirect_params
+                        .gpu_frustum_culling_build_non_indexed_indirect_params
                         .bind_group_layout,
                     &BindGroupEntries::sequential((
                         current_input_buffer.as_entire_binding(),
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index 96b783ea66..b7e8fcf6f1 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -87,6 +87,7 @@ pub const MESH_FUNCTIONS_HANDLE: Handle<Shader> = Handle::weak_from_u128(6300874
 pub const MESH_SHADER_HANDLE: Handle<Shader> = Handle::weak_from_u128(3252377289100772450);
 pub const SKINNING_HANDLE: Handle<Shader> = Handle::weak_from_u128(13215291596265391738);
 pub const MORPH_HANDLE: Handle<Shader> = Handle::weak_from_u128(970982813587607345);
+pub const OCCLUSION_CULLING_HANDLE: Handle<Shader> = Handle::weak_from_u128(285365001154292827);
 
 /// How many textures are allowed in the view bind group layout (`@group(0)`) before
 /// broader compatibility with WebGL and WebGPU is at risk, due to the minimum guaranteed
@@ -134,6 +135,12 @@ impl Plugin for MeshRenderPlugin {
         load_internal_asset!(app, MESH_SHADER_HANDLE, "mesh.wgsl", Shader::from_wgsl);
         load_internal_asset!(app, SKINNING_HANDLE, "skinning.wgsl", Shader::from_wgsl);
         load_internal_asset!(app, MORPH_HANDLE, "morph.wgsl", Shader::from_wgsl);
+        load_internal_asset!(
+            app,
+            OCCLUSION_CULLING_HANDLE,
+            "occlusion_culling.wgsl",
+            Shader::from_wgsl
+        );
 
         if app.get_sub_app(RenderApp).is_none() {
             return;
@@ -1254,9 +1261,10 @@ pub fn extract_meshes_for_gpu_building(
     mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
     mut removed_global_transforms_query: Extract<RemovedComponents<GlobalTransform>>,
     mut removed_meshes_query: Extract<RemovedComponents<Mesh3d>>,
-    cameras_query: Extract<Query<(), (With<Camera>, Without<NoIndirectDrawing>)>>,
+    gpu_culling_query: Extract<Query<(), (With<Camera>, Without<NoIndirectDrawing>)>>,
 ) {
-    let any_gpu_culling = !cameras_query.is_empty();
+    let any_gpu_culling = !gpu_culling_query.is_empty();
+
     for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() {
         render_mesh_instance_queue.init(any_gpu_culling);
     }
@@ -1761,7 +1769,8 @@ impl GetFullBatchData for MeshPipeline {
                 Some(batch_set_index) => u32::from(batch_set_index),
                 None => !0,
             },
-            instance_count: 0,
+            early_instance_count: 0,
+            late_instance_count: 0,
         };
 
         if indexed {
diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
index df73454a3e..315dd13d3f 100644
--- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
+++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
@@ -1,14 +1,28 @@
-// GPU mesh uniform building.
+// GPU mesh transforming and culling.
 //
 // This is a compute shader that expands each `MeshInputUniform` out to a full
-// `MeshUniform` for each view before rendering. (Thus `MeshInputUniform`
-// and `MeshUniform` are in a 1:N relationship.) It runs in parallel for all
-// meshes for all views. As part of this process, the shader gathers each
-// mesh's transform on the previous frame and writes it into the `MeshUniform`
-// so that TAA works.
+// `MeshUniform` for each view before rendering. (Thus `MeshInputUniform` and
+// `MeshUniform` are in a 1:N relationship.) It runs in parallel for all meshes
+// for all views. As part of this process, the shader gathers each mesh's
+// transform on the previous frame and writes it into the `MeshUniform` so that
+// TAA works. It also performs frustum culling and occlusion culling, if
+// requested.
+//
+// If occlusion culling is on, this shader runs twice: once to prepare the
+// meshes that were visible last frame, and once to prepare the meshes that
+// weren't visible last frame but became visible this frame. The two invocations
+// are known as *early mesh preprocessing* and *late mesh preprocessing*
+// respectively.
 
+#import bevy_pbr::mesh_preprocess_types::{IndirectParametersMetadata, MeshInput}
 #import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT}
-#import bevy_pbr::mesh_preprocess_types::{MeshInput, IndirectParametersMetadata}
+#import bevy_pbr::mesh_view_bindings::view
+#import bevy_pbr::occlusion_culling
+#import bevy_pbr::prepass_bindings::previous_view_uniforms
+#import bevy_pbr::view_transformations::{
+    position_world_to_ndc, position_world_to_view, ndc_to_uv, view_z_to_depth_ndc,
+    position_world_to_prev_ndc, position_world_to_prev_view, prev_view_z_to_depth_ndc
+}
 #import bevy_render::maths
 #import bevy_render::view::View
 
@@ -36,20 +50,48 @@ struct PreprocessWorkItem {
     indirect_parameters_index: u32,
 }
 
+// The parameters for the indirect compute dispatch for the late mesh
+// preprocessing phase.
+struct LatePreprocessWorkItemIndirectParameters {
+    // The number of workgroups we're going to dispatch.
+    //
+    // This value should always be equal to `ceil(work_item_count / 64)`.
+    dispatch_x: atomic<u32>,
+    // The number of workgroups in the Y direction; always 1.
+    dispatch_y: u32,
+    // The number of workgroups in the Z direction; always 1.
+    dispatch_z: u32,
+    // The precise number of work items.
+    work_item_count: atomic<u32>,
+    // Padding.
+    //
+    // This isn't the usual structure padding; it's needed because some hardware
+    // requires indirect compute dispatch parameters to be aligned on 64-byte
+    // boundaries.
+    pad: vec4<u32>,
+}
+
+// These have to be in a structure because of Naga limitations on DX12.
+struct PushConstants {
+    // The offset into the `late_preprocess_work_item_indirect_parameters`
+    // buffer.
+    late_preprocess_work_item_indirect_offset: u32,
+}
+
 // The current frame's `MeshInput`.
-@group(0) @binding(0) var<storage> current_input: array<MeshInput>;
+@group(0) @binding(3) var<storage> current_input: array<MeshInput>;
 // The `MeshInput` values from the previous frame.
-@group(0) @binding(1) var<storage> previous_input: array<MeshInput>;
+@group(0) @binding(4) var<storage> previous_input: array<MeshInput>;
 // Indices into the `MeshInput` buffer.
 //
 // There may be many indices that map to the same `MeshInput`.
-@group(0) @binding(2) var<storage> work_items: array<PreprocessWorkItem>;
+@group(0) @binding(5) var<storage> work_items: array<PreprocessWorkItem>;
 // The output array of `Mesh`es.
-@group(0) @binding(3) var<storage, read_write> output: array<Mesh>;
+@group(0) @binding(6) var<storage, read_write> output: array<Mesh>;
 
 #ifdef INDIRECT
 // The array of indirect parameters for drawcalls.
-@group(0) @binding(4) var<storage, read_write> indirect_parameters_metadata:
+@group(0) @binding(7) var<storage, read_write> indirect_parameters_metadata:
     array<IndirectParametersMetadata>;
 #endif
 
@@ -57,11 +99,24 @@ struct PreprocessWorkItem {
 // Data needed to cull the meshes.
 //
 // At the moment, this consists only of AABBs.
-@group(0) @binding(5) var<storage> mesh_culling_data: array<MeshCullingData>;
+@group(0) @binding(8) var<storage> mesh_culling_data: array<MeshCullingData>;
+#endif  // FRUSTUM_CULLING
 
-// The view data, including the view matrix.
-@group(0) @binding(6) var<uniform> view: View;
+#ifdef OCCLUSION_CULLING
+@group(0) @binding(10) var depth_pyramid: texture_2d<f32>;
 
+#ifdef EARLY_PHASE
+@group(0) @binding(11) var<storage, read_write> late_preprocess_work_items:
+    array<PreprocessWorkItem>;
+#endif  // EARLY_PHASE
+
+@group(0) @binding(12) var<storage, read_write> late_preprocess_work_item_indirect_parameters:
+    array<LatePreprocessWorkItemIndirectParameters>;
+
+var<push_constant> push_constants: PushConstants;
+#endif  // OCCLUSION_CULLING
+
+#ifdef FRUSTUM_CULLING
 // Returns true if the view frustum intersects an oriented bounding box (OBB).
 //
 // `aabb_center.w` should be 1.0.
@@ -102,9 +157,17 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     // Figure out our instance index. If this thread doesn't correspond to any
     // index, bail.
     let instance_index = global_invocation_id.x;
+
+#ifdef LATE_PHASE
+    if (instance_index >= atomicLoad(&late_preprocess_work_item_indirect_parameters[
+            push_constants.late_preprocess_work_item_indirect_offset].work_item_count)) {
+        return;
+    }
+#else   // LATE_PHASE
     if (instance_index >= arrayLength(&work_items)) {
         return;
     }
+#endif
 
     // Unpack the work item.
     let input_index = work_items[instance_index].input_index;
@@ -115,7 +178,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     let world_from_local_affine_transpose = current_input[input_index].world_from_local;
     let world_from_local = maths::affine3_to_square(world_from_local_affine_transpose);
 
-    // Cull if necessary.
+    // Frustum cull if necessary.
 #ifdef FRUSTUM_CULLING
     if ((current_input[input_index].flags & MESH_FLAGS_NO_FRUSTUM_CULLING_BIT) == 0u) {
         let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
@@ -129,6 +192,112 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     }
 #endif
 
+    // Look up the previous model matrix.
+    let previous_input_index = current_input[input_index].previous_input_index;
+    var previous_world_from_local_affine_transpose: mat3x4<f32>;
+    if (previous_input_index == 0xffffffff) {
+        previous_world_from_local_affine_transpose = world_from_local_affine_transpose;
+    } else {
+        previous_world_from_local_affine_transpose =
+            previous_input[previous_input_index].world_from_local;
+    }
+    let previous_world_from_local =
+        maths::affine3_to_square(previous_world_from_local_affine_transpose);
+
+    // Occlusion cull if necessary. This is done by calculating the screen-space
+    // axis-aligned bounding box (AABB) of the mesh and testing it against the
+    // appropriate level of the depth pyramid (a.k.a. hierarchical Z-buffer). If
+    // no part of the AABB is in front of the corresponding pixel quad in the
+    // hierarchical Z-buffer, then this mesh must be occluded, and we can skip
+    // rendering it.
+#ifdef OCCLUSION_CULLING
+    let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
+    let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;
+
+    // Initialize the AABB and the maximum depth.
+    let infinity = bitcast<f32>(0x7f800000u);
+    let neg_infinity = bitcast<f32>(0xff800000u);
+    var aabb = vec4(infinity, infinity, neg_infinity, neg_infinity);
+    var max_depth_view = neg_infinity;
+
+    // Build up the AABB by taking each corner of this mesh's OBB, transforming
+    // it, and updating the AABB and depth accordingly.
+    for (var i = 0u; i < 8u; i += 1u) {
+        let local_pos = aabb_center + select(
+            vec3(-1.0),
+            vec3(1.0),
+            vec3((i & 1) != 0, (i & 2) != 0, (i & 4) != 0)
+        ) * aabb_half_extents;
+
+#ifdef EARLY_PHASE
+        // If we're in the early phase, we're testing against the last frame's
+        // depth buffer, so we need to use the previous frame's transform.
+        let prev_world_pos = (previous_world_from_local * vec4(local_pos, 1.0)).xyz;
+        let view_pos = position_world_to_prev_view(prev_world_pos);
+        let ndc_pos = position_world_to_prev_ndc(prev_world_pos);
+#else   // EARLY_PHASE
+        // Otherwise, if this is the late phase, we use the current frame's
+        // transform.
+        let world_pos = (world_from_local * vec4(local_pos, 1.0)).xyz;
+        let view_pos = position_world_to_view(world_pos);
+        let ndc_pos = position_world_to_ndc(world_pos);
+#endif  // EARLY_PHASE
+
+        let uv_pos = ndc_to_uv(ndc_pos.xy);
+
+        // Update the AABB and maximum view-space depth.
+        aabb = vec4(min(aabb.xy, uv_pos), max(aabb.zw, uv_pos));
+        max_depth_view = max(max_depth_view, view_pos.z);
+    }
+
+    // Clip to the near plane to avoid the NDC depth becoming negative.
+#ifdef EARLY_PHASE
+    max_depth_view = min(-previous_view_uniforms.clip_from_view[3][2], max_depth_view);
+#else   // EARLY_PHASE
+    max_depth_view = min(-view.clip_from_view[3][2], max_depth_view);
+#endif  // EARLY_PHASE
+
+    // Figure out the depth of the occluder, and compare it to our own depth.
+
+    let aabb_pixel_size = occlusion_culling::get_aabb_size_in_pixels(aabb, depth_pyramid);
+    let occluder_depth_ndc =
+        occlusion_culling::get_occluder_depth(aabb, aabb_pixel_size, depth_pyramid);
+
+#ifdef EARLY_PHASE
+    let max_depth_ndc = prev_view_z_to_depth_ndc(max_depth_view);
+#else   // EARLY_PHASE
+    let max_depth_ndc = view_z_to_depth_ndc(max_depth_view);
+#endif
+
+    // Are we culled out?
+    if (max_depth_ndc < occluder_depth_ndc) {
+#ifdef EARLY_PHASE
+        // If this is the early phase, we need to make a note of this mesh so
+        // that we examine it again in the late phase, so that we handle the
+        // case in which a mesh that was invisible last frame became visible in
+        // this frame.
+        let output_work_item_index = atomicAdd(&late_preprocess_work_item_indirect_parameters[
+            push_constants.late_preprocess_work_item_indirect_offset].work_item_count, 1u);
+        if (output_work_item_index % 64u == 0u) {
+            // Our workgroup size is 64, and the indirect parameters for the
+            // late mesh preprocessing phase are counted in workgroups, so if
+            // we're the first thread in this workgroup, bump the workgroup
+            // count.
+            atomicAdd(&late_preprocess_work_item_indirect_parameters[
+                push_constants.late_preprocess_work_item_indirect_offset].dispatch_x, 1u);
+        }
+
+        // Enqueue a work item for the late prepass phase.
+        late_preprocess_work_items[output_work_item_index].input_index = input_index;
+        late_preprocess_work_items[output_work_item_index].output_index = output_index;
+        late_preprocess_work_items[output_work_item_index].indirect_parameters_index =
+            indirect_parameters_index;
+#endif  // EARLY_PHASE
+        // This mesh is culled. Skip it.
+        return;
+    }
+#endif  // OCCLUSION_CULLING
+
     // Calculate inverse transpose.
     let local_from_world_transpose = transpose(maths::inverse_affine3(transpose(
         world_from_local_affine_transpose)));
@@ -139,32 +308,34 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
         vec4<f32>(local_from_world_transpose[1].yz, local_from_world_transpose[2].xy));
     let local_from_world_transpose_b = local_from_world_transpose[2].z;
 
-    // Look up the previous model matrix.
-    let previous_input_index = current_input[input_index].previous_input_index;
-    var previous_world_from_local: mat3x4<f32>;
-    if (previous_input_index == 0xffffffff) {
-        previous_world_from_local = world_from_local_affine_transpose;
-    } else {
-        previous_world_from_local = previous_input[previous_input_index].world_from_local;
-    }
-
     // Figure out the output index. In indirect mode, this involves bumping the
     // instance index in the indirect parameters metadata, which
     // `build_indirect_params.wgsl` will use to generate the actual indirect
     // parameters. Otherwise, this index was directly supplied to us.
 #ifdef INDIRECT
+#ifdef LATE_PHASE
     let batch_output_index =
-        atomicAdd(&indirect_parameters_metadata[indirect_parameters_index].instance_count, 1u);
+        atomicLoad(&indirect_parameters_metadata[indirect_parameters_index].early_instance_count) +
+        atomicAdd(&indirect_parameters_metadata[indirect_parameters_index].late_instance_count, 1u);
+#else   // LATE_PHASE
+    let batch_output_index = atomicAdd(
+        &indirect_parameters_metadata[indirect_parameters_index].early_instance_count,
+        1u
+    );
+#endif  // LATE_PHASE
+
     let mesh_output_index =
         indirect_parameters_metadata[indirect_parameters_index].base_output_index +
         batch_output_index;
+
 #else   // INDIRECT
     let mesh_output_index = output_index;
 #endif  // INDIRECT
 
     // Write the output.
     output[mesh_output_index].world_from_local = world_from_local_affine_transpose;
-    output[mesh_output_index].previous_world_from_local = previous_world_from_local;
+    output[mesh_output_index].previous_world_from_local =
+        previous_world_from_local_affine_transpose;
     output[mesh_output_index].local_from_world_transpose_a = local_from_world_transpose_a;
     output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b;
     output[mesh_output_index].flags = current_input[input_index].flags;
diff --git a/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl
deleted file mode 100644
index 974a9d303a..0000000000
--- a/crates/bevy_pbr/src/render/mesh_preprocess_types.wgsl
+++ /dev/null
@@ -1,98 +0,0 @@
-// Types needed for GPU mesh uniform building.
-
-#define_import_path bevy_pbr::mesh_preprocess_types
-
-// Per-frame data that the CPU supplies to the GPU.
-struct MeshInput {
-    // The model transform.
-    world_from_local: mat3x4<f32>,
-    // The lightmap UV rect, packed into 64 bits.
-    lightmap_uv_rect: vec2<u32>,
-    // A set of bitflags corresponding to `MeshFlags` on the Rust side. See the
-    // `MESH_FLAGS_` flags in `mesh_types.wgsl` for a list of these.
-    flags: u32,
-    // The index of this mesh's `MeshInput` in the `previous_input` array, if
-    // applicable. If not present, this is `u32::MAX`.
-    previous_input_index: u32,
-    // The index of the first vertex in the vertex slab.
-    first_vertex_index: u32,
-    // The index of the first vertex index in the index slab.
-    //
-    // If this mesh isn't indexed, this value is ignored.
-    first_index_index: u32,
-    // For indexed meshes, the number of indices that this mesh has; for
-    // non-indexed meshes, the number of vertices that this mesh consists of.
-    index_count: u32,
-    current_skin_index: u32,
-    previous_skin_index: u32,
-    // Low 16 bits: index of the material inside the bind group data.
-    // High 16 bits: index of the lightmap in the binding array.
-    material_and_lightmap_bind_group_slot: u32,
-}
-
-// The `wgpu` indirect parameters structure for indexed meshes.
-//
-// The `build_indirect_params.wgsl` shader generates these.
-struct IndirectParametersIndexed {
-    // The number of indices that this mesh has.
-    index_count: u32,
-    // The number of instances we are to draw.
-    instance_count: u32,
-    // The offset of the first index for this mesh in the index buffer slab.
-    first_index: u32,
-    // The offset of the first vertex for this mesh in the vertex buffer slab.
-    base_vertex: u32,
-    // The index of the first mesh instance in the `Mesh` buffer.
-    first_instance: u32,
-}
-
-// The `wgpu` indirect parameters structure for non-indexed meshes.
-//
-// The `build_indirect_params.wgsl` shader generates these.
-struct IndirectParametersNonIndexed {
-    // The number of vertices that this mesh has.
-    vertex_count: u32,
-    // The number of instances we are to draw.
-    instance_count: u32,
-    // The offset of the first vertex for this mesh in the vertex buffer slab.
-    base_vertex: u32,
-    // The index of the first mesh instance in the `Mesh` buffer.
-    first_instance: u32,
-}
-
-// Information needed to generate the `IndirectParametersIndexed` and
-// `IndirectParametersNonIndexed` draw commands.
-struct IndirectParametersMetadata {
-    // The index of the mesh in the `MeshInput` buffer.
-    mesh_index: u32,
-    // The index of the first instance corresponding to this batch in the `Mesh`
-    // buffer.
-    base_output_index: u32,
-    // The index of the batch set in the `IndirectBatchSet` buffer.
-    batch_set_index: u32,
-    // The number of instances that are to be drawn.
-    //
-    // The `mesh_preprocess.wgsl` shader determines this, and the
-    // `build_indirect_params.wgsl` shader copies this value into the indirect
-    // draw command.
-    instance_count: atomic<u32>,
-}
-
-// Information about each batch set.
-//
-// A *batch set* is a set of meshes that might be multi-drawn together.
-//
-// The CPU creates this structure, and the `build_indirect_params.wgsl` shader
-// modifies it. If `multi_draw_indirect_count` is in use, the GPU reads this
-// value when multi-drawing a batch set in order to determine how many commands
-// make up the batch set.
-struct IndirectBatchSet {
-    // The number of commands that make up this batch set.
-    //
-    // The CPU initializes this value to zero. The `build_indirect_params.wgsl`
-    // shader increments this value as it processes batches.
-    indirect_parameters_count: atomic<u32>,
-    // The offset of the first batch corresponding to this batch set within the
-    // `IndirectParametersIndexed` or `IndirectParametersNonIndexed` arrays.
-    indirect_parameters_base: u32,
-}
diff --git a/crates/bevy_pbr/src/render/occlusion_culling.wgsl b/crates/bevy_pbr/src/render/occlusion_culling.wgsl
new file mode 100644
index 0000000000..1be999cc6a
--- /dev/null
+++ b/crates/bevy_pbr/src/render/occlusion_culling.wgsl
@@ -0,0 +1,30 @@
+// Occlusion culling utility functions.
+
+#define_import_path bevy_pbr::occlusion_culling
+
+fn get_aabb_size_in_pixels(aabb: vec4<f32>, depth_pyramid: texture_2d<f32>) -> vec2<f32> {
+    let depth_pyramid_size_mip_0 = vec2<f32>(textureDimensions(depth_pyramid, 0));
+    let aabb_width_pixels = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x;
+    let aabb_height_pixels = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y;
+    return vec2(aabb_width_pixels, aabb_height_pixels);
+}
+
+fn get_occluder_depth(
+    aabb: vec4<f32>,
+    aabb_pixel_size: vec2<f32>,
+    depth_pyramid: texture_2d<f32>
+) -> f32 {
+    let aabb_width_pixels = aabb_pixel_size.x;
+    let aabb_height_pixels = aabb_pixel_size.y;
+
+    let depth_pyramid_size_mip_0 = vec2<f32>(textureDimensions(depth_pyramid, 0));
+    let depth_level = max(0, i32(ceil(log2(max(aabb_width_pixels, aabb_height_pixels))))); // TODO: Naga doesn't like this being a u32
+    let depth_pyramid_size = vec2<f32>(textureDimensions(depth_pyramid, depth_level));
+    let aabb_top_left = vec2<u32>(aabb.xy * depth_pyramid_size);
+
+    let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x;
+    let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x;
+    let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x;
+    let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x;
+    return min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d));
+}
diff --git a/crates/bevy_pbr/src/render/reset_indirect_batch_sets.wgsl b/crates/bevy_pbr/src/render/reset_indirect_batch_sets.wgsl
new file mode 100644
index 0000000000..9309594725
--- /dev/null
+++ b/crates/bevy_pbr/src/render/reset_indirect_batch_sets.wgsl
@@ -0,0 +1,25 @@
+// Resets the indirect draw counts to zero.
+//
+// This shader is needed because we reuse the same indirect batch set count
+// buffer (i.e. the buffer that gets passed to `multi_draw_indirect_count` to
+// determine how many objects to draw) between phases (early, late, and main).
+// Before launching `build_indirect_params.wgsl`, we need to reinitialize the
+// value to 0.
+
+#import bevy_pbr::mesh_preprocess_types::IndirectBatchSet
+
+@group(0) @binding(0) var<storage, read_write> indirect_batch_sets: array<IndirectBatchSet>;
+
+@compute
+@workgroup_size(64)
+fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
+    // Figure out our instance index. If this thread doesn't correspond to any
+    // index, bail.
+    let instance_index = global_invocation_id.x;
+    if (instance_index >= arrayLength(&indirect_batch_sets)) {
+        return;
+    }
+
+    // Reset the number of batch sets to 0.
+    atomicStore(&indirect_batch_sets[instance_index].indirect_parameters_count, 0u);
+}
diff --git a/crates/bevy_pbr/src/render/view_transformations.wgsl b/crates/bevy_pbr/src/render/view_transformations.wgsl
index 63ee78a0c0..80c26d7b69 100644
--- a/crates/bevy_pbr/src/render/view_transformations.wgsl
+++ b/crates/bevy_pbr/src/render/view_transformations.wgsl
@@ -1,6 +1,7 @@
 #define_import_path bevy_pbr::view_transformations
 
 #import bevy_pbr::mesh_view_bindings as view_bindings
+#import bevy_pbr::prepass_bindings
 
 /// World space:
 /// +y is up
@@ -93,6 +94,22 @@ fn direction_clip_to_view(clip_dir: vec4<f32>) -> vec3<f32> {
     return view_dir.xyz;
 }
 
+// -----------------
+// TO PREV. VIEW ---
+// -----------------
+
+fn position_world_to_prev_view(world_pos: vec3<f32>) -> vec3<f32> {
+    let view_pos = prepass_bindings::previous_view_uniforms.view_from_world *
+        vec4(world_pos, 1.0);
+    return view_pos.xyz;
+}
+
+fn position_world_to_prev_ndc(world_pos: vec3<f32>) -> vec3<f32> {
+    let ndc_pos = prepass_bindings::previous_view_uniforms.clip_from_world *
+        vec4(world_pos, 1.0);
+    return ndc_pos.xyz / ndc_pos.w;
+}
+
 // -----------------
 // TO CLIP ---------
 // -----------------
@@ -172,6 +189,19 @@ fn view_z_to_depth_ndc(view_z: f32) -> f32 {
 #endif
 }
 
+fn prev_view_z_to_depth_ndc(view_z: f32) -> f32 {
+#ifdef VIEW_PROJECTION_PERSPECTIVE
+    return -perspective_camera_near() / view_z;
+#else ifdef VIEW_PROJECTION_ORTHOGRAPHIC
+    return prepass_bindings::previous_view_uniforms.clip_from_view[3][2] +
+        view_z * prepass_bindings::previous_view_uniforms.clip_from_view[2][2];
+#else
+    let ndc_pos = prepass_bindings::previous_view_uniforms.clip_from_view *
+        vec4(0.0, 0.0, view_z, 1.0);
+    return ndc_pos.z / ndc_pos.w;
+#endif
+}
+
 // -----------------
 // UV --------------
 // -----------------
diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs
index a650951793..7effe9d1d7 100644
--- a/crates/bevy_render/src/batching/gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs
@@ -12,6 +12,8 @@ use bevy_ecs::{
     world::{FromWorld, World},
 };
 use bevy_encase_derive::ShaderType;
+use bevy_math::UVec4;
+use bevy_platform_support::collections::hash_map::Entry;
 use bevy_utils::{default, TypeIdMap};
 use bytemuck::{Pod, Zeroable};
 use nonmax::NonMaxU32;
@@ -19,6 +21,7 @@ use tracing::error;
 use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
 
 use crate::{
+    experimental::occlusion_culling::OcclusionCulling,
     render_phase::{
         BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
         BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItemBatchSetKey as _,
@@ -33,7 +36,15 @@ use crate::{
 
 use super::{BatchMeta, GetBatchData, GetFullBatchData};
 
-pub struct BatchingPlugin;
+#[derive(Default)]
+pub struct BatchingPlugin {
+    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
+    /// that they can be read back to CPU.
+    ///
+    /// This is a debugging feature that may reduce performance. It primarily
+    /// exists for the `occlusion_culling` example.
+    pub allow_copies_from_indirect_parameters: bool,
+}
 
 impl Plugin for BatchingPlugin {
     fn build(&self, app: &mut App) {
@@ -42,7 +53,9 @@ impl Plugin for BatchingPlugin {
         };
 
         render_app
-            .insert_resource(IndirectParametersBuffers::new())
+            .insert_resource(IndirectParametersBuffers::new(
+                self.allow_copies_from_indirect_parameters,
+            ))
             .add_systems(
                 Render,
                 write_indirect_parameters_buffers.in_set(RenderSet::PrepareResourcesFlush),
@@ -159,6 +172,26 @@ where
     /// data input uniform is expected to contain the index of the
     /// corresponding buffer data input uniform in this list.
     pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,
+
+    /// A buffer that holds the number of indexed meshes that weren't visible in
+    /// the previous frame, when GPU occlusion culling is in use.
+    ///
+    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
+    /// view. Bevy uses this value to determine how many threads to dispatch to
+    /// check meshes that weren't visible next frame to see if they became newly
+    /// visible this frame.
+    pub late_indexed_indirect_parameters_buffer:
+        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
+
+    /// A buffer that holds the number of non-indexed meshes that weren't
+    /// visible in the previous frame, when GPU occlusion culling is in use.
+    ///
+    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
+    /// view. Bevy uses this value to determine how many threads to dispatch to
+    /// check meshes that weren't visible next frame to see if they became newly
+    /// visible this frame.
+    pub late_non_indexed_indirect_parameters_buffer:
+        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
 }
 
 /// Holds the GPU buffer of instance input data, which is the data about each
@@ -260,6 +293,23 @@ where
             self.buffer.push(default());
         }
     }
+
+    /// Returns the number of instances in this buffer.
+    pub fn len(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// Returns true if this buffer has no instances or false if it contains any
+    /// instances.
+    pub fn is_empty(&self) -> bool {
+        self.buffer.is_empty()
+    }
+
+    /// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer
+    /// ready to be uploaded to the GPU.
+    pub fn into_buffer(self) -> RawBufferVec<BDI> {
+        self.buffer
+    }
 }
 
 impl<BDI> Default for InstanceInputUniformBuffer<BDI>
@@ -289,25 +339,130 @@ pub enum PreprocessWorkItemBuffers {
         indexed: BufferVec<PreprocessWorkItem>,
         /// The buffer of work items corresponding to non-indexed meshes.
         non_indexed: BufferVec<PreprocessWorkItem>,
+        /// The work item buffers we use when GPU occlusion culling is in use.
+        gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
     },
 }
 
-impl PreprocessWorkItemBuffers {
-    /// Creates a new set of buffers.
+/// The work item buffers we use when GPU occlusion culling is in use.
+pub struct GpuOcclusionCullingWorkItemBuffers {
+    /// The buffer of work items corresponding to indexed meshes.
+    pub late_indexed: UninitBufferVec<PreprocessWorkItem>,
+    /// The buffer of work items corresponding to non-indexed meshes.
+    pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,
+    /// The offset into the
+    /// [`BatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
+    /// where this view's indirect dispatch counts for indexed meshes live.
+    pub late_indirect_parameters_indexed_offset: u32,
+    /// The offset into the
+    /// [`BatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
+    /// where this view's indirect dispatch counts for non-indexed meshes live.
+    pub late_indirect_parameters_non_indexed_offset: u32,
+}
+
+/// A GPU-side data structure that stores the number of workgroups to dispatch
+/// for the second phase of GPU occlusion culling.
+///
+/// The late mesh preprocessing phase checks meshes that weren't visible frame
+/// to determine if they're potentially visible this frame.
+#[derive(Clone, Copy, ShaderType, Pod, Zeroable)]
+#[repr(C)]
+pub struct LatePreprocessWorkItemIndirectParameters {
+    /// The number of workgroups to dispatch.
     ///
-    /// `no_indirect_drawing` specifies whether we're drawing directly or
-    /// indirectly.
-    pub fn new(no_indirect_drawing: bool) -> Self {
-        if no_indirect_drawing {
-            PreprocessWorkItemBuffers::Direct(BufferVec::new(BufferUsages::STORAGE))
-        } else {
-            PreprocessWorkItemBuffers::Indirect {
-                indexed: BufferVec::new(BufferUsages::STORAGE),
-                non_indexed: BufferVec::new(BufferUsages::STORAGE),
+    /// This will be equal to `work_item_count / 64`, rounded *up*.
+    dispatch_x: u32,
+    /// The number of workgroups along the abstract Y axis to dispatch: always
+    /// 1.
+    dispatch_y: u32,
+    /// The number of workgroups along the abstract Z axis to dispatch: always
+    /// 1.
+    dispatch_z: u32,
+    /// The actual number of work items.
+    ///
+    /// The GPU indirect dispatch doesn't read this, but it's used internally to
+    /// determine the actual number of work items that exist in the late
+    /// preprocessing work item buffer.
+    work_item_count: u32,
+    /// Padding to 64-byte boundaries for some hardware.
+    pad: UVec4,
+}
+
+impl Default for LatePreprocessWorkItemIndirectParameters {
+    fn default() -> LatePreprocessWorkItemIndirectParameters {
+        LatePreprocessWorkItemIndirectParameters {
+            dispatch_x: 0,
+            dispatch_y: 1,
+            dispatch_z: 1,
+            work_item_count: 0,
+            pad: default(),
+        }
+    }
+}
+
+/// Returns the set of work item buffers for the given view, first creating it
+/// if necessary.
+///
+/// Bevy uses work item buffers to tell the mesh preprocessing compute shader
+/// which meshes are to be drawn.
+///
+/// You may need to call this function if you're implementing your own custom
+/// render phases. See the `specialized_mesh_pipeline` example.
+pub fn get_or_create_work_item_buffer<'a, I>(
+    work_item_buffers: &'a mut EntityHashMap<TypeIdMap<PreprocessWorkItemBuffers>>,
+    view: Entity,
+    no_indirect_drawing: bool,
+    gpu_occlusion_culling: bool,
+    late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
+        LatePreprocessWorkItemIndirectParameters,
+    >,
+    late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
+        LatePreprocessWorkItemIndirectParameters,
+    >,
+) -> &'a mut PreprocessWorkItemBuffers
+where
+    I: 'static,
+{
+    match work_item_buffers
+        .entry(view)
+        .or_default()
+        .entry(TypeId::of::<I>())
+    {
+        Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
+        Entry::Vacant(vacant_entry) => {
+            if no_indirect_drawing {
+                vacant_entry.insert(PreprocessWorkItemBuffers::Direct(BufferVec::new(
+                    BufferUsages::STORAGE,
+                )))
+            } else {
+                vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
+                    indexed: BufferVec::new(BufferUsages::STORAGE),
+                    non_indexed: BufferVec::new(BufferUsages::STORAGE),
+                    gpu_occlusion_culling: if gpu_occlusion_culling {
+                        let late_indirect_parameters_indexed_offset =
+                            late_indexed_indirect_parameters_buffer
+                                .push(LatePreprocessWorkItemIndirectParameters::default());
+                        let late_indirect_parameters_non_indexed_offset =
+                            late_non_indexed_indirect_parameters_buffer
+                                .push(LatePreprocessWorkItemIndirectParameters::default());
+                        Some(GpuOcclusionCullingWorkItemBuffers {
+                            late_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
+                            late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
+                            late_indirect_parameters_indexed_offset:
+                                late_indirect_parameters_indexed_offset as u32,
+                            late_indirect_parameters_non_indexed_offset:
+                                late_indirect_parameters_non_indexed_offset as u32,
+                        })
+                    } else {
+                        None
+                    },
+                })
             }
         }
     }
+}
 
+impl PreprocessWorkItemBuffers {
     /// Adds a new work item to the appropriate buffer.
     ///
     /// `indexed` specifies whether the work item corresponds to an indexed
@@ -320,12 +475,21 @@ impl PreprocessWorkItemBuffers {
             PreprocessWorkItemBuffers::Indirect {
                 indexed: ref mut indexed_buffer,
                 non_indexed: ref mut non_indexed_buffer,
+                ref mut gpu_occlusion_culling,
             } => {
                 if indexed {
                     indexed_buffer.push(preprocess_work_item);
                 } else {
                     non_indexed_buffer.push(preprocess_work_item);
                 }
+
+                if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
+                    if indexed {
+                        gpu_occlusion_culling.late_indexed.add();
+                    } else {
+                        gpu_occlusion_culling.late_non_indexed.add();
+                    }
+                }
             }
         }
     }
@@ -353,7 +517,7 @@ pub struct PreprocessWorkItem {
 ///
 /// This is the variant for indexed meshes. We generate the instances of this
 /// structure in the `build_indirect_params.wgsl` compute shader.
-#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
+#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
 #[repr(C)]
 pub struct IndirectParametersIndexed {
     /// The number of indices that this mesh has.
@@ -372,7 +536,7 @@ pub struct IndirectParametersIndexed {
 ///
 /// This is the variant for non-indexed meshes. We generate the instances of
 /// this structure in the `build_indirect_params.wgsl` compute shader.
-#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
+#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
 #[repr(C)]
 pub struct IndirectParametersNonIndexed {
     /// The number of vertices that this mesh has.
@@ -389,10 +553,11 @@ pub struct IndirectParametersNonIndexed {
 /// each mesh are actually to be drawn.
 ///
 /// The CPU writes to this structure in order to initialize the fields other
-/// than [`Self::instance_count`]. The GPU mesh preprocessing shader increments
-/// the [`Self::instance_count`] as it determines that meshes are visible. The
-/// indirect parameter building shader reads this metadata in order to construct
-/// the indirect draw parameters.
+/// than [`Self::early_instance_count`] and [`Self::late_instance_count`]. The
+/// GPU mesh preprocessing shader increments the [`Self::early_instance_count`]
+/// and [`Self::late_instance_count`] as it determines that meshes are visible.
+/// The indirect parameter building shader reads this metadata in order to
+/// construct the indirect draw parameters.
 ///
 /// Each batch will have one instance of this structure.
 #[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
@@ -419,11 +584,18 @@ pub struct IndirectParametersMetadata {
     /// set.
     pub batch_set_index: u32,
 
-    /// The number of instances that have been judged potentially visible.
+    /// The number of instances that were judged visible last frame.
     ///
     /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
     /// increments it as it culls mesh instances.
-    pub instance_count: u32,
+    pub early_instance_count: u32,
+
+    /// The number of instances that have been judged potentially visible this
+    /// frame that weren't in the last frame's potentially visible set.
+    ///
+    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
+    /// increments it as it culls mesh instances.
+    pub late_instance_count: u32,
 }
 
 /// A structure, shared between CPU and GPU, that holds the number of on-GPU
@@ -520,16 +692,19 @@ pub struct IndirectParametersBuffers {
 
 impl IndirectParametersBuffers {
     /// Creates the indirect parameters buffers.
-    pub fn new() -> IndirectParametersBuffers {
+    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
+        let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
+        if allow_copies_from_indirect_parameter_buffers {
+            indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
+        }
+
         IndirectParametersBuffers {
-            non_indexed_data: UninitBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT),
+            non_indexed_data: UninitBufferVec::new(indirect_parameter_buffer_usages),
             non_indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE),
-            non_indexed_batch_sets: RawBufferVec::new(
-                BufferUsages::STORAGE | BufferUsages::INDIRECT,
-            ),
-            indexed_data: UninitBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT),
+            non_indexed_batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
+            indexed_data: UninitBufferVec::new(indirect_parameter_buffer_usages),
             indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE),
-            indexed_batch_sets: RawBufferVec::new(BufferUsages::STORAGE | BufferUsages::INDIRECT),
+            indexed_batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
         }
     }
 
@@ -617,9 +792,9 @@ impl IndirectParametersBuffers {
     /// Reserves space for `count` new batches corresponding to non-indexed
     /// meshes.
     ///
-    /// This allocates in both the [`Self::non_indexed_metadata`] and
-    /// [`Self::non_indexed_data`] buffers.
-    fn allocate_non_indexed(&mut self, count: u32) -> u32 {
+    /// This allocates in both the `non_indexed_metadata` and `non_indexed_data`
+    /// buffers.
+    pub fn allocate_non_indexed(&mut self, count: u32) -> u32 {
         let length = self.non_indexed_data.len();
         self.non_indexed_metadata.reserve_internal(count as usize);
         for _ in 0..count {
@@ -712,11 +887,17 @@ impl IndirectParametersBuffers {
             });
         }
     }
+
+    pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
+        NonMaxU32::new(self.batch_set_count(indexed) as u32)
+    }
 }
 
 impl Default for IndirectParametersBuffers {
     fn default() -> Self {
-        Self::new()
+        // By default, we don't allow GPU indirect parameter mapping, since
+        // that's a debugging option.
+        Self::new(false)
     }
 }
 
@@ -755,7 +936,7 @@ impl FromWorld for GpuPreprocessingSupport {
 impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
 where
     BD: GpuArrayBufferable + Sync + Send + 'static,
-    BDI: Pod + Default,
+    BDI: Pod + Sync + Send + Default + 'static,
 {
     /// Creates new buffers.
     pub fn new() -> Self {
@@ -764,6 +945,12 @@ where
             work_item_buffers: EntityHashMap::default(),
             current_input_buffer: InstanceInputUniformBuffer::new(),
             previous_input_buffer: InstanceInputUniformBuffer::new(),
+            late_indexed_indirect_parameters_buffer: RawBufferVec::new(
+                BufferUsages::STORAGE | BufferUsages::INDIRECT,
+            ),
+            late_non_indexed_indirect_parameters_buffer: RawBufferVec::new(
+                BufferUsages::STORAGE | BufferUsages::INDIRECT,
+            ),
         }
     }
 
@@ -779,28 +966,16 @@ where
     /// Clears out the buffers in preparation for a new frame.
     pub fn clear(&mut self) {
         self.data_buffer.clear();
-
-        for view_work_item_buffers in self.work_item_buffers.values_mut() {
-            for phase_work_item_buffers in view_work_item_buffers.values_mut() {
-                match *phase_work_item_buffers {
-                    PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => buffer_vec.clear(),
-                    PreprocessWorkItemBuffers::Indirect {
-                        ref mut indexed,
-                        ref mut non_indexed,
-                    } => {
-                        indexed.clear();
-                        non_indexed.clear();
-                    }
-                }
-            }
-        }
+        self.late_indexed_indirect_parameters_buffer.clear();
+        self.late_non_indexed_indirect_parameters_buffer.clear();
+        self.work_item_buffers.clear();
     }
 }
 
 impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
 where
     BD: GpuArrayBufferable + Sync + Send + 'static,
-    BDI: Pod + Default,
+    BDI: Pod + Default + Sync + Send + 'static,
 {
     fn default() -> Self {
         Self::new()
@@ -856,9 +1031,14 @@ where
         let (batch_range, batch_extra_index) =
             phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
         *batch_range = self.instance_start_index..instance_end_index;
-        *batch_extra_index =
-            PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index);
-
+        *batch_extra_index = match self.indirect_parameters_index {
+            Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
+                range: u32::from(indirect_parameters_index)
+                    ..(u32::from(indirect_parameters_index) + 1),
+                batch_set_index: None,
+            },
+            None => PhaseItemExtraIndex::None,
+        };
         if let Some(indirect_parameters_index) = self.indirect_parameters_index {
             indirect_parameters_buffers
                 .add_batch_set(self.indexed, indirect_parameters_index.into());
@@ -910,7 +1090,12 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
     gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
     mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
     mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
-    mut views: Query<(Entity, &ExtractedView, Has<NoIndirectDrawing>)>,
+    mut views: Query<(
+        Entity,
+        &ExtractedView,
+        Has<NoIndirectDrawing>,
+        Has<OcclusionCulling>,
+    )>,
     system_param_item: StaticSystemParam<GFBD::Param>,
 ) where
     I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
@@ -920,20 +1105,25 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
     let BatchedInstanceBuffers {
         ref mut data_buffer,
         ref mut work_item_buffers,
+        ref mut late_indexed_indirect_parameters_buffer,
+        ref mut late_non_indexed_indirect_parameters_buffer,
         ..
     } = gpu_array_buffer.into_inner();
 
-    for (view, extracted_view, no_indirect_drawing) in &mut views {
+    for (view, extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
         let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {
             continue;
         };
 
         // Create the work item buffer if necessary.
-        let work_item_buffer = work_item_buffers
-            .entry(view)
-            .or_insert_with(TypeIdMap::default)
-            .entry(TypeId::of::<I>())
-            .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing));
+        let work_item_buffer = get_or_create_work_item_buffer::<I>(
+            work_item_buffers,
+            view,
+            no_indirect_drawing,
+            gpu_occlusion_culling,
+            late_indexed_indirect_parameters_buffer,
+            late_non_indexed_indirect_parameters_buffer,
+        );
 
         // Walk through the list of phase items, building up batches as we go.
         let mut batch: Option<SortedRenderBatch<GFBD>> = None;
@@ -1056,7 +1246,15 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
     gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
     mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
     mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
-    mut views: Query<(Entity, &ExtractedView, Has<NoIndirectDrawing>)>,
+    mut views: Query<
+        (
+            Entity,
+            &ExtractedView,
+            Has<NoIndirectDrawing>,
+            Has<OcclusionCulling>,
+        ),
+        With<ExtractedView>,
+    >,
     param: StaticSystemParam<GFBD::Param>,
 ) where
     BPI: BinnedPhaseItem,
@@ -1067,21 +1265,26 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
     let BatchedInstanceBuffers {
         ref mut data_buffer,
         ref mut work_item_buffers,
+        ref mut late_indexed_indirect_parameters_buffer,
+        ref mut late_non_indexed_indirect_parameters_buffer,
         ..
     } = gpu_array_buffer.into_inner();
 
-    for (view, extracted_view, no_indirect_drawing) in &mut views {
+    for (view, extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
         let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {
             continue;
         };
 
         // Create the work item buffer if necessary; otherwise, just mark it as
         // used this frame.
-        let work_item_buffer = work_item_buffers
-            .entry(view)
-            .or_insert_with(TypeIdMap::default)
-            .entry(TypeId::of::<BPI>())
-            .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing));
+        let work_item_buffer = get_or_create_work_item_buffer::<BPI>(
+            work_item_buffers,
+            view,
+            no_indirect_drawing,
+            gpu_occlusion_culling,
+            late_indexed_indirect_parameters_buffer,
+            late_non_indexed_indirect_parameters_buffer,
+        );
 
         // Prepare multidrawables.
 
@@ -1125,10 +1328,9 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                             // Start a new batch, in indirect mode.
                             let indirect_parameters_index =
                                 indirect_parameters_buffers.allocate(batch_set_key.indexed(), 1);
-                            let batch_set_index = NonMaxU32::new(
-                                indirect_parameters_buffers.batch_set_count(batch_set_key.indexed())
-                                    as u32,
-                            );
+                            let batch_set_index = indirect_parameters_buffers
+                                .get_next_batch_set_index(batch_set_key.indexed());
+
                             GFBD::write_batch_indirect_parameters_metadata(
                                 input_index.into(),
                                 batch_set_key.indexed(),
@@ -1234,9 +1436,9 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                         // Start a new batch, in indirect mode.
                         let indirect_parameters_index =
                             indirect_parameters_buffers.allocate(key.0.indexed(), 1);
-                        let batch_set_index = NonMaxU32::new(
-                            indirect_parameters_buffers.batch_set_count(key.0.indexed()) as u32,
-                        );
+                        let batch_set_index =
+                            indirect_parameters_buffers.get_next_batch_set_index(key.0.indexed());
+
                         GFBD::write_batch_indirect_parameters_metadata(
                             input_index.into(),
                             key.0.indexed(),
@@ -1256,9 +1458,10 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                         batch = Some(BinnedRenderPhaseBatch {
                             representative_entity: (entity, main_entity),
                             instance_range: output_index..output_index + 1,
-                            extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(
-                                NonMaxU32::new(indirect_parameters_index),
-                            ),
+                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
+                                range: indirect_parameters_index..(indirect_parameters_index + 1),
+                                batch_set_index: None,
+                            },
                         });
                     }
 
@@ -1396,6 +1599,8 @@ pub fn write_batched_instance_buffers<GFBD>(
         ref mut work_item_buffers,
         ref mut current_input_buffer,
         ref mut previous_input_buffer,
+        ref mut late_indexed_indirect_parameters_buffer,
+        ref mut late_non_indexed_indirect_parameters_buffer,
     } = gpu_array_buffer.into_inner();
 
     data_buffer.write_buffer(&render_device);
@@ -1405,6 +1610,8 @@ pub fn write_batched_instance_buffers<GFBD>(
     previous_input_buffer
         .buffer
         .write_buffer(&render_device, &render_queue);
+    late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
+    late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
 
     for view_work_item_buffers in work_item_buffers.values_mut() {
         for phase_work_item_buffers in view_work_item_buffers.values_mut() {
@@ -1415,9 +1622,25 @@ pub fn write_batched_instance_buffers<GFBD>(
                 PreprocessWorkItemBuffers::Indirect {
                     ref mut indexed,
                     ref mut non_indexed,
+                    ref mut gpu_occlusion_culling,
                 } => {
                     indexed.write_buffer(&render_device, &render_queue);
                     non_indexed.write_buffer(&render_device, &render_queue);
+
+                    if let Some(GpuOcclusionCullingWorkItemBuffers {
+                        ref mut late_indexed,
+                        ref mut late_non_indexed,
+                        late_indirect_parameters_indexed_offset: _,
+                        late_indirect_parameters_non_indexed_offset: _,
+                    }) = *gpu_occlusion_culling
+                    {
+                        if !late_indexed.is_empty() {
+                            late_indexed.write_buffer(&render_device);
+                        }
+                        if !late_non_indexed.is_empty() {
+                            late_non_indexed.write_buffer(&render_device);
+                        }
+                    }
                 }
             }
         }
diff --git a/crates/bevy_render/src/experimental/mod.rs b/crates/bevy_render/src/experimental/mod.rs
new file mode 100644
index 0000000000..40bb6cf1dc
--- /dev/null
+++ b/crates/bevy_render/src/experimental/mod.rs
@@ -0,0 +1,6 @@
+//! Experimental rendering features.
+//!
+//! Experimental features are features with known problems, but are included
+//! nonetheless for testing purposes.
+
+pub mod occlusion_culling;
diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl
new file mode 100644
index 0000000000..7f4dd71f61
--- /dev/null
+++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl
@@ -0,0 +1,60 @@
+// Types needed for GPU mesh uniform building.
+
+#define_import_path bevy_pbr::mesh_preprocess_types
+
+// Per-frame data that the CPU supplies to the GPU.
+struct MeshInput {
+    // The model transform.
+    world_from_local: mat3x4<f32>,
+    // The lightmap UV rect, packed into 64 bits.
+    lightmap_uv_rect: vec2<u32>,
+    // Various flags.
+    flags: u32,
+    previous_input_index: u32,
+    first_vertex_index: u32,
+    first_index_index: u32,
+    index_count: u32,
+    current_skin_index: u32,
+    previous_skin_index: u32,
+    // Low 16 bits: index of the material inside the bind group data.
+    // High 16 bits: index of the lightmap in the binding array.
+    material_and_lightmap_bind_group_slot: u32,
+    pad_a: u32,
+    pad_b: u32,
+}
+
+// The `wgpu` indirect parameters structure. This is a union of two structures.
+// For more information, see the corresponding comment in
+// `gpu_preprocessing.rs`.
+struct IndirectParametersIndexed {
+    // `vertex_count` or `index_count`.
+    index_count: u32,
+    // `instance_count` in both structures.
+    instance_count: u32,
+    // `first_vertex` or `first_index`.
+    first_index: u32,
+    // `base_vertex` or `first_instance`.
+    base_vertex: u32,
+    // A read-only copy of `instance_index`.
+    first_instance: u32,
+}
+
+struct IndirectParametersNonIndexed {
+    vertex_count: u32,
+    instance_count: u32,
+    base_vertex: u32,
+    first_instance: u32,
+}
+
+struct IndirectParametersMetadata {
+    mesh_index: u32,
+    base_output_index: u32,
+    batch_set_index: u32,
+    early_instance_count: atomic<u32>,
+    late_instance_count: atomic<u32>,
+}
+
+struct IndirectBatchSet {
+    indirect_parameters_count: atomic<u32>,
+    indirect_parameters_base: u32,
+}
diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mod.rs b/crates/bevy_render/src/experimental/occlusion_culling/mod.rs
new file mode 100644
index 0000000000..1c7fae1515
--- /dev/null
+++ b/crates/bevy_render/src/experimental/occlusion_culling/mod.rs
@@ -0,0 +1,87 @@
+//! GPU occlusion culling.
+//!
+//! See [`OcclusionCulling`] for a detailed description of occlusion culling in
+//! Bevy.
+
+use bevy_app::{App, Plugin};
+use bevy_asset::{load_internal_asset, Handle};
+use bevy_ecs::{component::Component, prelude::ReflectComponent};
+use bevy_reflect::{prelude::ReflectDefault, Reflect};
+
+use crate::{extract_component::ExtractComponent, render_resource::Shader};
+
+/// The handle to the `mesh_preprocess_types.wgsl` compute shader.
+pub const MESH_PREPROCESS_TYPES_SHADER_HANDLE: Handle<Shader> =
+    Handle::weak_from_u128(2720440370122465935);
+
+/// Enables GPU occlusion culling.
+///
+/// See [`OcclusionCulling`] for a detailed description of occlusion culling in
+/// Bevy.
+pub struct OcclusionCullingPlugin;
+
+impl Plugin for OcclusionCullingPlugin {
+    fn build(&self, app: &mut App) {
+        load_internal_asset!(
+            app,
+            MESH_PREPROCESS_TYPES_SHADER_HANDLE,
+            "mesh_preprocess_types.wgsl",
+            Shader::from_wgsl
+        );
+    }
+}
+
+/// Add this component to a view in order to enable experimental GPU occlusion
+/// culling.
+///
+/// *Bevy's occlusion culling is currently marked as experimental.* There are
+/// known issues whereby, in rare circumstances, occlusion culling can result in
+/// meshes being culled that shouldn't be (i.e. meshes that turn invisible).
+/// Please try it out and report issues.
+///
+/// *Occlusion culling* allows Bevy to avoid rendering objects that are fully
+/// behind other opaque or alpha tested objects. This is different from, and
+/// complements, depth fragment rejection as the `DepthPrepass` enables. While
+/// depth rejection allows Bevy to avoid rendering *pixels* that are behind
+/// other objects, the GPU still has to examine those pixels to reject them,
+/// which requires transforming the vertices of the objects and performing
+/// skinning if the objects were skinned. Occlusion culling allows the GPU to go
+/// a step further, avoiding even transforming the vertices of objects that it
+/// can quickly prove to be behind other objects.
+///
+/// Occlusion culling inherently has some overhead, because Bevy must examine
+/// the objects' bounding boxes, and create an acceleration structure
+/// (hierarchical Z-buffer) to perform the occlusion tests. Therefore, occlusion
+/// culling is disabled by default. Only enable it if you measure it to be a
+/// speedup on your scene. Note that, because Bevy's occlusion culling runs on
+/// the GPU and is quite efficient, it's rare for occlusion culling to result in
+/// a significant slowdown.
+///
+/// Occlusion culling currently requires a `DepthPrepass`. If no depth prepass
+/// is present on the view, the [`OcclusionCulling`] component will be ignored.
+/// Additionally, occlusion culling is currently incompatible with deferred
+/// shading; including both `DeferredPrepass` and [`OcclusionCulling`] results
+/// in unspecified behavior.
+///
+/// The algorithm that Bevy uses is known as [*two-phase occlusion culling*].
+/// When you enable occlusion culling, Bevy splits the depth prepass into two:
+/// an *early* depth prepass and a *late* depth prepass. The early depth prepass
+/// renders all the meshes that were visible last frame to produce a
+/// conservative approximation of the depth buffer. Then, after producing an
+/// acceleration structure known as a hierarchical Z-buffer or depth pyramid,
+/// Bevy tests the bounding boxes of all meshes against that depth buffer. Those
+/// that can be quickly proven to be behind the geometry rendered during the
+/// early depth prepass are skipped entirely. The other potentially-visible
+/// meshes are rendered during the late prepass, and finally all the visible
+/// meshes are rendered as usual during the opaque, transparent, etc. passes.
+///
+/// Unlike other occlusion culling systems you may be familiar with, Bevy's
+/// occlusion culling is fully dynamic and requires no baking step. The CPU
+/// overhead is minimal. Large skinned meshes and other dynamic objects can
+/// occlude other objects.
+///
+/// [*two-phase occlusion culling*]:
+/// https://medium.com/@mil_kru/two-pass-occlusion-culling-4100edcad501
+#[derive(Component, ExtractComponent, Clone, Copy, Default, Reflect)]
+#[reflect(Component, Default)]
+pub struct OcclusionCulling;
diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs
index f98efcf1e7..0f9c504ede 100644
--- a/crates/bevy_render/src/lib.rs
+++ b/crates/bevy_render/src/lib.rs
@@ -23,6 +23,7 @@ pub mod alpha;
 pub mod batching;
 pub mod camera;
 pub mod diagnostic;
+pub mod experimental;
 pub mod extract_component;
 pub mod extract_instances;
 mod extract_param;
@@ -73,6 +74,7 @@ use bevy_utils::prelude::default;
 pub use extract_param::Extract;
 
 use bevy_window::{PrimaryWindow, RawHandleWrapperHolder};
+use experimental::occlusion_culling::OcclusionCullingPlugin;
 use extract_resource::ExtractResourcePlugin;
 use globals::GlobalsPlugin;
 use render_asset::RenderAssetBytesPerFrame;
@@ -115,6 +117,12 @@ pub struct RenderPlugin {
     /// If `true`, disables asynchronous pipeline compilation.
     /// This has no effect on macOS, Wasm, iOS, or without the `multi_threaded` feature.
     pub synchronous_pipeline_compilation: bool,
+    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
+    /// that they can be read back to CPU.
+    ///
+    /// This is a debugging feature that may reduce performance. It primarily
+    /// exists for the `occlusion_culling` example.
+    pub allow_copies_from_indirect_parameters: bool,
 }
 
 /// The systems sets of the default [`App`] rendering schedule.
@@ -357,10 +365,13 @@ impl Plugin for RenderPlugin {
             MeshPlugin,
             GlobalsPlugin,
             MorphPlugin,
-            BatchingPlugin,
+            BatchingPlugin {
+                allow_copies_from_indirect_parameters: self.allow_copies_from_indirect_parameters,
+            },
             SyncWorldPlugin,
             StoragePlugin,
             GpuReadbackPlugin::default(),
+            OcclusionCullingPlugin,
         ));
 
         app.init_resource::<RenderAssetBytesPerFrame>()
diff --git a/crates/bevy_render/src/render_graph/context.rs b/crates/bevy_render/src/render_graph/context.rs
index c27f269d0b..4c6ecd30c1 100644
--- a/crates/bevy_render/src/render_graph/context.rs
+++ b/crates/bevy_render/src/render_graph/context.rs
@@ -3,10 +3,10 @@ use crate::{
     render_resource::{Buffer, Sampler, TextureView},
 };
 use alloc::borrow::Cow;
-use bevy_ecs::entity::Entity;
+use bevy_ecs::{entity::Entity, intern::Interned};
 use thiserror::Error;
 
-use super::{InternedRenderSubGraph, RenderSubGraph};
+use super::{InternedRenderSubGraph, RenderLabel, RenderSubGraph};
 
 /// A command that signals the graph runner to run the sub graph corresponding to the `sub_graph`
 /// with the specified `inputs` next.
@@ -224,6 +224,11 @@ impl<'a> RenderGraphContext<'a> {
         Ok(())
     }
 
+    /// Returns a human-readable label for this node, for debugging purposes.
+    pub fn label(&self) -> Interned<dyn RenderLabel> {
+        self.node.label
+    }
+
     /// Finishes the context for this [`Node`](super::Node) by
     /// returning the sub graphs to run next.
     pub fn finish(self) -> Vec<RunSubGraph> {
diff --git a/crates/bevy_render/src/render_resource/buffer_vec.rs b/crates/bevy_render/src/render_resource/buffer_vec.rs
index 8191671c15..d3f16ac341 100644
--- a/crates/bevy_render/src/render_resource/buffer_vec.rs
+++ b/crates/bevy_render/src/render_resource/buffer_vec.rs
@@ -202,6 +202,18 @@ impl<T: NoUninit> RawBufferVec<T> {
     }
 }
 
+impl<T> RawBufferVec<T>
+where
+    T: NoUninit + Default,
+{
+    pub fn grow_set(&mut self, index: u32, value: T) {
+        while index as usize + 1 > self.len() {
+            self.values.push(T::default());
+        }
+        self.values[index as usize] = value;
+    }
+}
+
 impl<T: NoUninit> Extend<T> for RawBufferVec<T> {
     #[inline]
     fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs
index 57d7562c45..a3a5118c30 100644
--- a/crates/bevy_render/src/view/mod.rs
+++ b/crates/bevy_render/src/view/mod.rs
@@ -10,6 +10,7 @@ use crate::{
         CameraMainTextureUsages, ClearColor, ClearColorConfig, Exposure, ExtractedCamera,
         ManualTextureViews, MipBias, NormalizedRenderTarget, TemporalJitter,
     },
+    experimental::occlusion_culling::OcclusionCulling,
     extract_component::ExtractComponentPlugin,
     prelude::Shader,
     primitives::Frustum,
@@ -109,9 +110,11 @@ impl Plugin for ViewPlugin {
             .register_type::<Visibility>()
             .register_type::<VisibleEntities>()
             .register_type::<ColorGrading>()
+            .register_type::<OcclusionCulling>()
             // NOTE: windows.is_changed() handles cases where a window was resized
             .add_plugins((
                 ExtractComponentPlugin::<Msaa>::default(),
+                ExtractComponentPlugin::<OcclusionCulling>::default(),
                 VisibilityPlugin,
                 VisibilityRangePlugin,
             ));
diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs
index 3566c74a5e..e44b27a005 100644
--- a/crates/bevy_sprite/src/mesh2d/material.rs
+++ b/crates/bevy_sprite/src/mesh2d/material.rs
@@ -17,6 +17,7 @@ use bevy_ecs::{
 };
 use bevy_math::FloatOrd;
 use bevy_reflect::{prelude::ReflectDefault, Reflect};
+use bevy_render::view::RenderVisibleEntities;
 use bevy_render::{
     mesh::{MeshVertexBufferLayoutRef, RenderMesh},
     render_asset::{
@@ -34,7 +35,7 @@ use bevy_render::{
     },
     renderer::RenderDevice,
     sync_world::{MainEntity, MainEntityHashMap},
-    view::{ExtractedView, Msaa, RenderVisibleEntities, ViewVisibility},
+    view::{ExtractedView, Msaa, ViewVisibility},
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
 use core::{hash::Hash, marker::PhantomData};
diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs
index 52309fd492..86d02d5680 100644
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@@ -421,7 +421,8 @@ impl GetFullBatchData for Mesh2dPipeline {
                 None => !0,
                 Some(batch_set_index) => u32::from(batch_set_index),
             },
-            instance_count: 0,
+            early_instance_count: 0,
+            late_instance_count: 0,
         };
 
         if indexed {
diff --git a/examples/3d/occlusion_culling.rs b/examples/3d/occlusion_culling.rs
new file mode 100644
index 0000000000..11bdde698a
--- /dev/null
+++ b/examples/3d/occlusion_culling.rs
@@ -0,0 +1,669 @@
+//! Demonstrates occlusion culling.
+//!
+//! This demo rotates many small cubes around a rotating large cube at the
+//! origin. At all times, the large cube will be occluding several of the small
+//! cubes. The demo displays the number of cubes that were actually rendered, so
+//! the effects of occlusion culling can be seen.
+
+use std::{
+    f32::consts::PI,
+    fmt::Write as _,
+    result::Result,
+    sync::{Arc, Mutex},
+};
+
+use bevy::{
+    color::palettes::css::{SILVER, WHITE},
+    core_pipeline::{
+        core_3d::graph::{Core3d, Node3d},
+        prepass::DepthPrepass,
+    },
+    prelude::*,
+    render::{
+        batching::gpu_preprocessing::{
+            GpuPreprocessingMode, GpuPreprocessingSupport, IndirectParametersBuffers,
+            IndirectParametersIndexed,
+        },
+        experimental::occlusion_culling::OcclusionCulling,
+        render_graph::{self, NodeRunError, RenderGraphApp, RenderGraphContext, RenderLabel},
+        render_resource::{Buffer, BufferDescriptor, BufferUsages, MapMode},
+        renderer::{RenderAdapter, RenderContext, RenderDevice},
+        settings::WgpuFeatures,
+        Render, RenderApp, RenderPlugin, RenderSet,
+    },
+};
+use bytemuck::Pod;
+
+/// The radius of the spinning sphere of cubes.
+const OUTER_RADIUS: f32 = 3.0;
+
+/// The density of cubes in the other sphere.
+const OUTER_SUBDIVISION_COUNT: u32 = 5;
+
+/// The speed at which the outer sphere and large cube rotate in radians per
+/// frame.
+const ROTATION_SPEED: f32 = 0.01;
+
+/// The length of each side of the small cubes, in meters.
+const SMALL_CUBE_SIZE: f32 = 0.1;
+
+/// The length of each side of the large cube, in meters.
+const LARGE_CUBE_SIZE: f32 = 2.0;
+
+/// A marker component for the immediate parent of the large sphere of cubes.
+#[derive(Default, Component)]
+struct SphereParent;
+
+/// A marker component for the large spinning cube at the origin.
+#[derive(Default, Component)]
+struct LargeCube;
+
+/// A plugin for the render app that reads the number of culled meshes from the
+/// GPU back to the CPU.
+struct ReadbackIndirectParametersPlugin;
+
+/// The node that we insert into the render graph in order to read the number of
+/// culled meshes from the GPU back to the CPU.
+#[derive(Default)]
+struct ReadbackIndirectParametersNode;
+
+/// The [`RenderLabel`] that we use to identify the
+/// [`ReadbackIndirectParametersNode`].
+#[derive(Clone, PartialEq, Eq, Hash, Debug, RenderLabel)]
+struct ReadbackIndirectParameters;
+
+/// The intermediate staging buffers that we use to read back the indirect
+/// parameters from the GPU to the CPU.
+///
+/// We read back the GPU indirect parameters so that we can determine the number
+/// of meshes that were culled.
+///
+/// `wgpu` doesn't allow us to read indirect buffers back from the GPU to the
+/// CPU directly. Instead, we have to copy them to a temporary staging buffer
+/// first, and then read *those* buffers back from the GPU to the CPU. This
+/// resource holds those temporary buffers.
+#[derive(Resource, Default)]
+struct IndirectParametersStagingBuffers {
+    /// The buffer that stores the indirect draw commands.
+    ///
+    /// See [`IndirectParametersIndexed`] for more information about the memory
+    /// layout of this buffer.
+    data: Option<Buffer>,
+    /// The buffer that stores the *number* of indirect draw commands.
+    ///
+    /// We only care about the first `u32` in this buffer.
+    batch_sets: Option<Buffer>,
+}
+
+/// A resource, shared between the main world and the render world, that saves a
+/// CPU-side copy of the GPU buffer that stores the indirect draw parameters.
+///
+/// This is needed so that we can display the number of meshes that were culled.
+/// It's reference counted, and protected by a lock, because we don't precisely
+/// know when the GPU will be ready to present the CPU with the buffer copy.
+/// Even though the rendering runs at least a frame ahead of the main app logic,
+/// we don't require more precise synchronization than the lock because we don't
+/// really care how up-to-date the counter of culled meshes is. If it's off by a
+/// few frames, that's no big deal.
+#[derive(Clone, Resource, Deref, DerefMut)]
+struct SavedIndirectParameters(Arc<Mutex<SavedIndirectParametersData>>);
+
+/// A CPU-side copy of the GPU buffer that stores the indirect draw parameters.
+///
+/// This is needed so that we can display the number of meshes that were culled.
+struct SavedIndirectParametersData {
+    /// The CPU-side copy of the GPU buffer that stores the indirect draw
+    /// parameters.
+    data: Vec<IndirectParametersIndexed>,
+    /// The CPU-side copy of the GPU buffer that stores the *number* of indirect
+    /// draw parameters that we have.
+    ///
+    /// All we care about is the number of indirect draw parameters for a single
+    /// view, so this is only one word in size.
+    count: u32,
+    /// True if occlusion culling is supported at all; false if it's not.
+    occlusion_culling_supported: bool,
+    /// True if we support inspecting the number of meshes that were culled on
+    /// this platform; false if we don't.
+    ///
+    /// If `multi_draw_indirect_count` isn't supported, then we would have to
+    /// employ a more complicated approach in order to determine the number of
+    /// meshes that are occluded, and that would be out of scope for this
+    /// example.
+    occlusion_culling_introspection_supported: bool,
+}
+
+impl FromWorld for SavedIndirectParameters {
+    fn from_world(world: &mut World) -> SavedIndirectParameters {
+        let render_adapter = world.resource::<RenderAdapter>();
+        SavedIndirectParameters(Arc::new(Mutex::new(SavedIndirectParametersData {
+            data: vec![],
+            count: 0,
+            // This gets set to false in `readback_indirect_buffers` if we don't
+            // support GPU preprocessing.
+            occlusion_culling_supported: true,
+            // In order to determine how many meshes were culled, we look at the
+            // indirect count buffer that Bevy only populates if the platform
+            // supports `multi_draw_indirect_count`. So, if we don't have that
+            // feature, then we don't bother to display how many meshes were
+            // culled.
+            occlusion_culling_introspection_supported: render_adapter
+                .features()
+                .contains(WgpuFeatures::MULTI_DRAW_INDIRECT_COUNT),
+        })))
+    }
+}
+
+/// The demo's current settings.
+#[derive(Resource)]
+struct AppStatus {
+    /// Whether occlusion culling is presently enabled.
+    ///
+    /// By default, this is set to true.
+    occlusion_culling: bool,
+}
+
+impl Default for AppStatus {
+    fn default() -> Self {
+        AppStatus {
+            occlusion_culling: true,
+        }
+    }
+}
+
+fn main() {
+    App::new()
+        .add_plugins(
+            DefaultPlugins
+                .set(WindowPlugin {
+                    primary_window: Some(Window {
+                        title: "Bevy Occlusion Culling Example".into(),
+                        ..default()
+                    }),
+                    ..default()
+                })
+                .set(RenderPlugin {
+                    allow_copies_from_indirect_parameters: true,
+                    ..default()
+                }),
+        )
+        .add_plugins(ReadbackIndirectParametersPlugin)
+        .init_resource::<AppStatus>()
+        .add_systems(Startup, setup)
+        .add_systems(Update, spin_small_cubes)
+        .add_systems(Update, spin_large_cube)
+        .add_systems(Update, update_status_text)
+        .add_systems(Update, toggle_occlusion_culling_on_request)
+        .run();
+}
+
+impl Plugin for ReadbackIndirectParametersPlugin {
+    fn build(&self, app: &mut App) {
+        // Fetch the render app.
+        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
+            return;
+        };
+
+        render_app
+            .init_resource::<IndirectParametersStagingBuffers>()
+            .add_systems(ExtractSchedule, readback_indirect_parameters)
+            .add_systems(
+                Render,
+                create_indirect_parameters_staging_buffers.in_set(RenderSet::PrepareResourcesFlush),
+            )
+            // Add the node that allows us to read the indirect parameters back
+            // from the GPU to the CPU, which allows us to determine how many
+            // meshes were culled.
+            .add_render_graph_node::<ReadbackIndirectParametersNode>(
+                Core3d,
+                ReadbackIndirectParameters,
+            )
+            // We read back the indirect parameters any time after
+            // `EndMainPass`. Readback doesn't particularly need to execute
+            // before `EndMainPassPostProcessing`, but we specify that anyway
+            // because we want to make the indirect parameters run before
+            // *something* in the graph, and `EndMainPassPostProcessing` is a
+            // good a node as any other.
+            .add_render_graph_edges(
+                Core3d,
+                (
+                    Node3d::EndMainPass,
+                    ReadbackIndirectParameters,
+                    Node3d::EndMainPassPostProcessing,
+                ),
+            );
+    }
+
+    fn finish(&self, app: &mut App) {
+        // Create the `SavedIndirectParameters` resource that we're going to use
+        // to communicate between the thread that the GPU-to-CPU readback
+        // callback runs on and the main application threads. This resource is
+        // atomically reference counted. We store one reference to the
+        // `SavedIndirectParameters` in the main app and another reference in
+        // the render app.
+        let saved_indirect_parameters = SavedIndirectParameters::from_world(app.world_mut());
+        app.insert_resource(saved_indirect_parameters.clone());
+
+        // Fetch the render app.
+        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
+            return;
+        };
+
+        render_app
+            // Insert another reference to the `SavedIndirectParameters`.
+            .insert_resource(saved_indirect_parameters);
+    }
+}
+
+/// Spawns all the objects in the scene.
+fn setup(
+    mut commands: Commands,
+    asset_server: Res<AssetServer>,
+    mut meshes: ResMut<Assets<Mesh>>,
+    mut materials: ResMut<Assets<StandardMaterial>>,
+) {
+    spawn_small_cubes(&mut commands, &mut meshes, &mut materials);
+    spawn_large_cube(&mut commands, &asset_server, &mut meshes, &mut materials);
+    spawn_light(&mut commands);
+    spawn_camera(&mut commands);
+    spawn_help_text(&mut commands);
+}
+
+/// Spawns the rotating sphere of small cubes.
+fn spawn_small_cubes(
+    commands: &mut Commands,
+    meshes: &mut Assets<Mesh>,
+    materials: &mut Assets<StandardMaterial>,
+) {
+    // Add the cube mesh.
+    let small_cube = meshes.add(Cuboid::new(
+        SMALL_CUBE_SIZE,
+        SMALL_CUBE_SIZE,
+        SMALL_CUBE_SIZE,
+    ));
+
+    // Add the cube material.
+    let small_cube_material = materials.add(StandardMaterial {
+        base_color: SILVER.into(),
+        ..default()
+    });
+
+    // Create the entity that the small cubes will be parented to. This is the
+    // entity that we rotate.
+    let sphere_parent = commands
+        .spawn(Transform::from_translation(Vec3::ZERO))
+        .insert(Visibility::default())
+        .insert(SphereParent)
+        .id();
+
+    // Now we have to figure out where to place the cubes. To do that, we create
+    // a sphere mesh, but we don't add it to the scene. Instead, we inspect the
+    // sphere mesh to find the positions of its vertices, and spawn a small cube
+    // at each one. That way, we end up with a bunch of cubes arranged in a
+    // spherical shape.
+
+    // Create the sphere mesh, and extract the positions of its vertices.
+    let sphere = Sphere::new(OUTER_RADIUS)
+        .mesh()
+        .ico(OUTER_SUBDIVISION_COUNT)
+        .unwrap();
+    let sphere_positions = sphere.attribute(Mesh::ATTRIBUTE_POSITION).unwrap();
+
+    // At each vertex, create a small cube.
+    for sphere_position in sphere_positions.as_float3().unwrap() {
+        let sphere_position = Vec3::from_slice(sphere_position);
+        let small_cube = commands
+            .spawn(Mesh3d(small_cube.clone()))
+            .insert(MeshMaterial3d(small_cube_material.clone()))
+            .insert(Transform::from_translation(sphere_position))
+            .id();
+        commands.entity(sphere_parent).add_child(small_cube);
+    }
+}
+
+/// Spawns the large cube at the center of the screen.
+///
+/// This cube rotates chaotically and occludes small cubes behind it.
+fn spawn_large_cube(
+    commands: &mut Commands,
+    asset_server: &AssetServer,
+    meshes: &mut Assets<Mesh>,
+    materials: &mut Assets<StandardMaterial>,
+) {
+    commands
+        .spawn(Mesh3d(meshes.add(Cuboid::new(
+            LARGE_CUBE_SIZE,
+            LARGE_CUBE_SIZE,
+            LARGE_CUBE_SIZE,
+        ))))
+        .insert(MeshMaterial3d(materials.add(StandardMaterial {
+            base_color: WHITE.into(),
+            base_color_texture: Some(asset_server.load("branding/icon.png")),
+            ..default()
+        })))
+        .insert(Transform::IDENTITY)
+        .insert(LargeCube);
+}
+
+// Spins the outer sphere a bit every frame.
+//
+// This ensures that the set of cubes that are hidden and shown varies over
+// time.
+fn spin_small_cubes(mut sphere_parents: Query<&mut Transform, With<SphereParent>>) {
+    for mut sphere_parent_transform in &mut sphere_parents {
+        sphere_parent_transform.rotate_y(ROTATION_SPEED);
+    }
+}
+
+/// Spins the large cube a bit every frame.
+///
+/// The chaotic rotation adds a bit of randomness to the scene to better
+/// demonstrate the dynamicity of the occlusion culling.
+fn spin_large_cube(mut large_cubes: Query<&mut Transform, With<LargeCube>>) {
+    for mut transform in &mut large_cubes {
+        transform.rotate(Quat::from_euler(
+            EulerRot::XYZ,
+            0.13 * ROTATION_SPEED,
+            0.29 * ROTATION_SPEED,
+            0.35 * ROTATION_SPEED,
+        ));
+    }
+}
+
+/// Spawns a directional light to illuminate the scene.
+fn spawn_light(commands: &mut Commands) {
+    commands
+        .spawn(DirectionalLight::default())
+        .insert(Transform::from_rotation(Quat::from_euler(
+            EulerRot::ZYX,
+            0.0,
+            PI * -0.15,
+            PI * -0.15,
+        )));
+}
+
+/// Spawns a camera that includes the depth prepass and occlusion culling.
+fn spawn_camera(commands: &mut Commands) {
+    commands
+        .spawn(Camera3d::default())
+        .insert(Transform::from_xyz(0.0, 0.0, 9.0).looking_at(Vec3::ZERO, Vec3::Y))
+        .insert(DepthPrepass)
+        .insert(OcclusionCulling);
+}
+
+/// Spawns the help text at the upper left of the screen.
+fn spawn_help_text(commands: &mut Commands) {
+    commands.spawn((
+        Text::new(""),
+        Node {
+            position_type: PositionType::Absolute,
+            top: Val::Px(12.0),
+            left: Val::Px(12.0),
+            ..default()
+        },
+    ));
+}
+
+impl render_graph::Node for ReadbackIndirectParametersNode {
+    fn run<'w>(
+        &self,
+        _: &mut RenderGraphContext,
+        render_context: &mut RenderContext<'w>,
+        world: &'w World,
+    ) -> Result<(), NodeRunError> {
+        // Extract the buffers that hold the GPU indirect draw parameters from
+        // the world resources. We're going to read those buffers to determine
+        // how many meshes were actually drawn.
+        let (Some(indirect_parameters_buffers), Some(indirect_parameters_mapping_buffers)) = (
+            world.get_resource::<IndirectParametersBuffers>(),
+            world.get_resource::<IndirectParametersStagingBuffers>(),
+        ) else {
+            return Ok(());
+        };
+
+        // Grab both the buffers we're copying from and the staging buffers
+        // we're copying to. Remember that we can't map the indirect parameters
+        // buffers directly, so we have to copy their contents to a staging
+        // buffer.
+        let (
+            Some(indexed_data_buffer),
+            Some(indexed_batch_sets_buffer),
+            Some(indirect_parameters_staging_data_buffer),
+            Some(indirect_parameters_staging_batch_sets_buffer),
+        ) = (
+            indirect_parameters_buffers.indexed_data_buffer(),
+            indirect_parameters_buffers.indexed_batch_sets_buffer(),
+            indirect_parameters_mapping_buffers.data.as_ref(),
+            indirect_parameters_mapping_buffers.batch_sets.as_ref(),
+        )
+        else {
+            return Ok(());
+        };
+
+        // Copy from the indirect parameters buffers to the staging buffers.
+        render_context.command_encoder().copy_buffer_to_buffer(
+            indexed_data_buffer,
+            0,
+            indirect_parameters_staging_data_buffer,
+            0,
+            indexed_data_buffer.size(),
+        );
+        render_context.command_encoder().copy_buffer_to_buffer(
+            indexed_batch_sets_buffer,
+            0,
+            indirect_parameters_staging_batch_sets_buffer,
+            0,
+            indexed_batch_sets_buffer.size(),
+        );
+
+        Ok(())
+    }
+}
+
+/// Creates the staging buffers that we use to read back the indirect parameters
+/// from the GPU to the CPU.
+///
+/// We read the indirect parameters from the GPU to the CPU in order to display
+/// the number of meshes that were culled each frame.
+///
+/// We need these staging buffers because `wgpu` doesn't allow us to read the
+/// contents of the indirect parameters buffers directly. We must first copy
+/// them from the GPU to a staging buffer, and then read the staging buffer.
+fn create_indirect_parameters_staging_buffers(
+    mut indirect_parameters_staging_buffers: ResMut<IndirectParametersStagingBuffers>,
+    indirect_parameters_buffers: Res<IndirectParametersBuffers>,
+    render_device: Res<RenderDevice>,
+) {
+    // Fetch the indirect parameters buffers that we're going to copy from.
+    let (Some(indexed_data_buffer), Some(indexed_batch_set_buffer)) = (
+        indirect_parameters_buffers.indexed_data_buffer(),
+        indirect_parameters_buffers.indexed_batch_sets_buffer(),
+    ) else {
+        return;
+    };
+
+    // Build the staging buffers. Make sure they have the same sizes as the
+    // buffers we're copying from.
+    indirect_parameters_staging_buffers.data =
+        Some(render_device.create_buffer(&BufferDescriptor {
+            label: Some("indexed data staging buffer"),
+            size: indexed_data_buffer.size(),
+            usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        }));
+    indirect_parameters_staging_buffers.batch_sets =
+        Some(render_device.create_buffer(&BufferDescriptor {
+            label: Some("indexed batch set staging buffer"),
+            size: indexed_batch_set_buffer.size(),
+            usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        }));
+}
+
+/// Updates the app status text at the top of the screen.
+fn update_status_text(
+    saved_indirect_parameters: Res<SavedIndirectParameters>,
+    mut texts: Query<&mut Text>,
+    meshes: Query<Entity, With<Mesh3d>>,
+    app_status: Res<AppStatus>,
+) {
+    // How many meshes are in the scene?
+    let total_mesh_count = meshes.iter().count();
+
+    // Sample the rendered object count. Note that we don't synchronize beyond
+    // locking the data and therefore this will value will generally at least
+    // one frame behind. This is fine; this app is just a demonstration after
+    // all.
+    let (
+        rendered_object_count,
+        occlusion_culling_supported,
+        occlusion_culling_introspection_supported,
+    ): (u32, bool, bool) = {
+        let saved_indirect_parameters = saved_indirect_parameters.lock().unwrap();
+        (
+            saved_indirect_parameters
+                .data
+                .iter()
+                .take(saved_indirect_parameters.count as usize)
+                .map(|indirect_parameters| indirect_parameters.instance_count)
+                .sum(),
+            saved_indirect_parameters.occlusion_culling_supported,
+            saved_indirect_parameters.occlusion_culling_introspection_supported,
+        )
+    };
+
+    // Change the text.
+    for mut text in &mut texts {
+        text.0 = String::new();
+        if !occlusion_culling_supported {
+            text.0
+                .push_str("Occlusion culling not supported on this platform");
+            continue;
+        }
+
+        let _ = writeln!(
+            &mut text.0,
+            "Occlusion culling {} (Press Space to toggle)",
+            if app_status.occlusion_culling {
+                "ON"
+            } else {
+                "OFF"
+            },
+        );
+
+        if !occlusion_culling_introspection_supported {
+            continue;
+        }
+
+        let _ = write!(
+            &mut text.0,
+            "{}/{} meshes rendered",
+            rendered_object_count, total_mesh_count
+        );
+    }
+}
+
+/// A system that reads the indirect parameters back from the GPU so that we can
+/// report how many meshes were culled.
+fn readback_indirect_parameters(
+    mut indirect_parameters_staging_buffers: ResMut<IndirectParametersStagingBuffers>,
+    saved_indirect_parameters: Res<SavedIndirectParameters>,
+    gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
+) {
+    // If culling isn't supported on this platform, note that, and bail.
+    if gpu_preprocessing_support.max_supported_mode != GpuPreprocessingMode::Culling {
+        saved_indirect_parameters
+            .lock()
+            .unwrap()
+            .occlusion_culling_supported = false;
+        return;
+    }
+
+    // Grab the staging buffers.
+    let (Some(data_buffer), Some(batch_sets_buffer)) = (
+        indirect_parameters_staging_buffers.data.take(),
+        indirect_parameters_staging_buffers.batch_sets.take(),
+    ) else {
+        return;
+    };
+
+    // Read the GPU buffers back.
+    let saved_indirect_parameters_0 = (**saved_indirect_parameters).clone();
+    let saved_indirect_parameters_1 = (**saved_indirect_parameters).clone();
+    readback_buffer::<IndirectParametersIndexed>(data_buffer, move |indirect_parameters| {
+        saved_indirect_parameters_0.lock().unwrap().data = indirect_parameters.to_vec();
+    });
+    readback_buffer::<u32>(batch_sets_buffer, move |indirect_parameters_count| {
+        saved_indirect_parameters_1.lock().unwrap().count = indirect_parameters_count[0];
+    });
+}
+
+// A helper function to asynchronously read an array of [`Pod`] values back from
+// the GPU to the CPU.
+//
+// The given callback is invoked when the data is ready. The buffer will
+// automatically be unmapped after the callback executes.
+fn readback_buffer<T>(buffer: Buffer, callback: impl FnOnce(&[T]) + Send + 'static)
+where
+    T: Pod,
+{
+    // We need to make another reference to the buffer so that we can move the
+    // original reference into the closure below.
+    let original_buffer = buffer.clone();
+    original_buffer
+        .slice(..)
+        .map_async(MapMode::Read, move |result| {
+            // Make sure we succeeded.
+            if result.is_err() {
+                return;
+            }
+
+            {
+                // Cast the raw bytes in the GPU buffer to the appropriate type.
+                let buffer_view = buffer.slice(..).get_mapped_range();
+                let indirect_parameters: &[T] = bytemuck::cast_slice(
+                    &buffer_view[0..(buffer_view.len() / size_of::<T>() * size_of::<T>())],
+                );
+
+                // Invoke the callback.
+                callback(indirect_parameters);
+            }
+
+            // Unmap the buffer. We have to do this before submitting any more
+            // GPU command buffers, or `wgpu` will assert.
+            buffer.unmap();
+        });
+}
+
+/// Adds or removes the [`OcclusionCulling`] and [`DepthPrepass`] components
+/// when the user presses the spacebar.
+fn toggle_occlusion_culling_on_request(
+    mut commands: Commands,
+    input: Res<ButtonInput<KeyCode>>,
+    mut app_status: ResMut<AppStatus>,
+    cameras: Query<Entity, With<Camera3d>>,
+) {
+    // Only run when the user presses the spacebar.
+    if !input.just_pressed(KeyCode::Space) {
+        return;
+    }
+
+    // Toggle the occlusion culling flag in `AppStatus`.
+    app_status.occlusion_culling = !app_status.occlusion_culling;
+
+    // Add or remove the `OcclusionCulling` and `DepthPrepass` components as
+    // requested.
+    for camera in &cameras {
+        if app_status.occlusion_culling {
+            commands
+                .entity(camera)
+                .insert(DepthPrepass)
+                .insert(OcclusionCulling);
+        } else {
+            commands
+                .entity(camera)
+                .remove::<DepthPrepass>()
+                .remove::<OcclusionCulling>();
+        }
+    }
+}
diff --git a/examples/README.md b/examples/README.md
index 8963590365..858d5b3f59 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -164,6 +164,7 @@ Example | Description
 [Meshlet](../examples/3d/meshlet.rs) | Meshlet rendering for dense high-poly scenes (experimental)
 [Mixed lighting](../examples/3d/mixed_lighting.rs) | Demonstrates how to combine baked and dynamic lighting
 [Motion Blur](../examples/3d/motion_blur.rs) | Demonstrates per-pixel motion blur
+[Occlusion Culling](../examples/3d/occlusion_culling.rs) | Demonstration of Occlusion Culling
 [Order Independent Transparency](../examples/3d/order_independent_transparency.rs) | Demonstrates how to use OIT
 [Orthographic View](../examples/3d/orthographic.rs) | Shows how to create a 3D orthographic view (for isometric-look in games or CAD applications)
 [Parallax Mapping](../examples/3d/parallax_mapping.rs) | Demonstrates use of a normal map and depth map for parallax mapping
diff --git a/examples/shader/specialized_mesh_pipeline.rs b/examples/shader/specialized_mesh_pipeline.rs
index bc1ecf113c..9cc414e26a 100644
--- a/examples/shader/specialized_mesh_pipeline.rs
+++ b/examples/shader/specialized_mesh_pipeline.rs
@@ -6,8 +6,6 @@
 //!
 //! [`SpecializedMeshPipeline`] let's you customize the entire pipeline used when rendering a mesh.
 
-use std::any::TypeId;
-
 use bevy::{
     core_pipeline::core_3d::{Opaque3d, Opaque3dBatchSetKey, Opaque3dBinKey, CORE_3D_DEPTH_FORMAT},
     ecs::system::StaticSystemParam,
@@ -21,11 +19,11 @@ use bevy::{
         batching::GetFullBatchData,
         batching::{
             gpu_preprocessing::{
-                BatchedInstanceBuffers, IndirectParametersBuffers, PreprocessWorkItem,
-                PreprocessWorkItemBuffers,
+                self, BatchedInstanceBuffers, IndirectParametersBuffers, PreprocessWorkItem,
             },
             GetBatchData,
         },
+        experimental::occlusion_culling::OcclusionCulling,
         extract_component::{ExtractComponent, ExtractComponentPlugin},
         mesh::{Indices, MeshVertexBufferLayoutRef, PrimitiveTopology, RenderMesh},
         render_asset::{RenderAssetUsages, RenderAssets},
@@ -43,7 +41,6 @@ use bevy::{
         view::{self, ExtractedView, RenderVisibleEntities, ViewTarget, VisibilityClass},
         Render, RenderApp, RenderSet,
     },
-    utils::TypeIdMap,
 };
 
 const SHADER_ASSET_PATH: &str = "shaders/specialized_mesh_pipeline.wgsl";
@@ -288,6 +285,7 @@ fn queue_custom_mesh_pipeline(
         &ExtractedView,
         &Msaa,
         Has<NoIndirectDrawing>,
+        Has<OcclusionCulling>,
     )>,
     (render_meshes, render_mesh_instances): (
         Res<RenderAssets<RenderMesh>>,
@@ -307,6 +305,8 @@ fn queue_custom_mesh_pipeline(
     let BatchedInstanceBuffers {
         ref mut data_buffer,
         ref mut work_item_buffers,
+        ref mut late_indexed_indirect_parameters_buffer,
+        ref mut late_non_indexed_indirect_parameters_buffer,
         ..
     } = gpu_array_buffer.into_inner();
 
@@ -318,7 +318,15 @@ fn queue_custom_mesh_pipeline(
     // Render phases are per-view, so we need to iterate over all views so that
     // the entity appears in them. (In this example, we have only one view, but
     // it's good practice to loop over all views anyway.)
-    for (view_entity, view_visible_entities, view, msaa, no_indirect_drawing) in views.iter() {
+    for (
+        view_entity,
+        view_visible_entities,
+        view,
+        msaa,
+        no_indirect_drawing,
+        gpu_occlusion_culling,
+    ) in views.iter()
+    {
         let Some(opaque_phase) = opaque_render_phases.get_mut(&view.retained_view_entity) else {
             continue;
         };
@@ -326,11 +334,14 @@ fn queue_custom_mesh_pipeline(
         // Create a *work item buffer* if necessary. Work item buffers store the
         // indices of meshes that are to be rendered when indirect drawing is
         // enabled.
-        let work_item_buffer = work_item_buffers
-            .entry(view_entity)
-            .or_insert_with(TypeIdMap::default)
-            .entry(TypeId::of::<Opaque3d>())
-            .or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing));
+        let work_item_buffer = gpu_preprocessing::get_or_create_work_item_buffer::<Opaque3d>(
+            work_item_buffers,
+            view_entity,
+            no_indirect_drawing,
+            gpu_occlusion_culling,
+            late_indexed_indirect_parameters_buffer,
+            late_non_indexed_indirect_parameters_buffer,
+        );
 
         // Create the key based on the view. In this case we only care about MSAA and HDR
         let view_key = MeshPipelineKey::from_msaa_samples(msaa.samples())
diff --git a/examples/tools/scene_viewer/main.rs b/examples/tools/scene_viewer/main.rs
index af8272a13b..6935798169 100644
--- a/examples/tools/scene_viewer/main.rs
+++ b/examples/tools/scene_viewer/main.rs
@@ -4,12 +4,19 @@
 //! replacing the path as appropriate.
 //! In case of multiple scenes, you can select which to display by adapting the file path: `/path/to/model.gltf#Scene1`.
 //! With no arguments it will load the `FlightHelmet` glTF model from the repository assets subdirectory.
+//! Pass `--help` to see all the supported arguments.
 //!
 //! If you want to hot reload asset changes, enable the `file_watcher` cargo feature.
 
+use argh::FromArgs;
 use bevy::{
+    core_pipeline::prepass::{DeferredPrepass, DepthPrepass},
+    pbr::DefaultOpaqueRendererMethod,
     prelude::*,
-    render::primitives::{Aabb, Sphere},
+    render::{
+        experimental::occlusion_culling::OcclusionCulling,
+        primitives::{Aabb, Sphere},
+    },
 };
 
 #[path = "../../helpers/camera_controller.rs"]
@@ -24,7 +31,34 @@ use camera_controller::{CameraController, CameraControllerPlugin};
 use morph_viewer_plugin::MorphViewerPlugin;
 use scene_viewer_plugin::{SceneHandle, SceneViewerPlugin};
 
+/// A simple glTF scene viewer made with Bevy
+#[derive(FromArgs, Resource)]
+struct Args {
+    /// the path to the glTF scene
+    #[argh(
+        positional,
+        default = "\"assets/models/FlightHelmet/FlightHelmet.gltf\".to_string()"
+    )]
+    scene_path: String,
+    /// enable a depth prepass
+    #[argh(switch)]
+    depth_prepass: Option<bool>,
+    /// enable occlusion culling
+    #[argh(switch)]
+    occlusion_culling: Option<bool>,
+    /// enable deferred shading
+    #[argh(switch)]
+    deferred: Option<bool>,
+}
+
 fn main() {
+    #[cfg(not(target_arch = "wasm32"))]
+    let args: Args = argh::from_env();
+    #[cfg(target_arch = "wasm32")]
+    let args: Args = Args::from_args(&[], &[]).unwrap();
+
+    let deferred = args.deferred;
+
     let mut app = App::new();
     app.add_plugins((
         DefaultPlugins
@@ -43,9 +77,15 @@ fn main() {
         SceneViewerPlugin,
         MorphViewerPlugin,
     ))
+    .insert_resource(args)
     .add_systems(Startup, setup)
     .add_systems(PreUpdate, setup_scene_after_load);
 
+    // If deferred shading was requested, turn it on.
+    if deferred == Some(true) {
+        app.insert_resource(DefaultOpaqueRendererMethod::deferred());
+    }
+
     #[cfg(feature = "animation")]
     app.add_plugins(animation_plugin::AnimationManipulationPlugin);
 
@@ -67,12 +107,10 @@ fn parse_scene(scene_path: String) -> (String, usize) {
     (scene_path, 0)
 }
 
-fn setup(mut commands: Commands, asset_server: Res<AssetServer>) {
-    let scene_path = std::env::args()
-        .nth(1)
-        .unwrap_or_else(|| "assets/models/FlightHelmet/FlightHelmet.gltf".to_string());
+fn setup(mut commands: Commands, asset_server: Res<AssetServer>, args: Res<Args>) {
+    let scene_path = &args.scene_path;
     info!("Loading {}", scene_path);
-    let (file_path, scene_index) = parse_scene(scene_path);
+    let (file_path, scene_index) = parse_scene((*scene_path).clone());
 
     commands.insert_resource(SceneHandle::new(asset_server.load(file_path), scene_index));
 }
@@ -82,6 +120,7 @@ fn setup_scene_after_load(
     mut setup: Local<bool>,
     mut scene_handle: ResMut<SceneHandle>,
     asset_server: Res<AssetServer>,
+    args: Res<Args>,
     meshes: Query<(&GlobalTransform, Option<&Aabb>), With<Mesh3d>>,
 ) {
     if scene_handle.is_loaded && !*setup {
@@ -125,7 +164,7 @@ fn setup_scene_after_load(
         info!("{}", camera_controller);
         info!("{}", *scene_handle);
 
-        commands.spawn((
+        let mut camera = commands.spawn((
             Camera3d::default(),
             Projection::from(projection),
             Transform::from_translation(Vec3::from(aabb.center) + size * Vec3::new(0.5, 0.25, 0.5))
@@ -145,6 +184,25 @@ fn setup_scene_after_load(
             camera_controller,
         ));
 
+        // If occlusion culling was requested, include the relevant components.
+        // The Z-prepass is currently required.
+        if args.occlusion_culling == Some(true) {
+            camera.insert((DepthPrepass, OcclusionCulling));
+        }
+
+        // If the depth prepass was requested, include it.
+        if args.depth_prepass == Some(true) {
+            camera.insert(DepthPrepass);
+        }
+
+        // If deferred shading was requested, include the prepass.
+        if args.deferred == Some(true) {
+            camera
+                .insert(Msaa::Off)
+                .insert(DepthPrepass)
+                .insert(DeferredPrepass);
+        }
+
         // Spawn a default light if the scene does not have one
         if !scene_handle.has_light {
             info!("Spawning a directional light");