Dynamic light clusters (#3968)

# Objective provide some customisation for default cluster setup avoid "cluster index lists is full" in all cases (using a strategy outlined by @superdump) ## Solution Add ClusterConfig enum (which can be inserted into a view at any time) to allow specifying cluster setup with variants: - None (do not do any light assignment - for views which do not require light info, e.g. minimaps etc) - Single (one cluster) - XYZ (explicit cluster counts in each dimension) - FixedZ (most similar to current - specify Z-slices and total, then x and y counts are dynamically determined to give approximately square clusters based on current aspect ratio) Defaults to FixedZ { total: 4096, z: 24 } which is similar to the current setup. Per frame, estimate the number of indices that would be required for the current config and decrease the cluster counts / increase the cluster sizes in the x and y dimensions if the index list would be too small. notes: - I didn't put ClusterConfig in the camera bundles to avoid introducing a dependency from bevy_render to bevy_pbr. the ClusterConfig enum comes with a pbr-centric impl block so i didn't want to move that into bevy_render either. - ~Might want to add None variant to cluster config for views that don't care about lights?~ - Not well tested for orthographic - ~there's a cluster_muck branch on my repo which includes some diagnostics / a modified lighting example which may be useful for tyre-kicking~ (outdated, i will bring it up to date if required) anecdotal timings: FPS on the lighting demo is negligibly better (~5%), maybe due to a small optimisation constraining the light aabb to be in front of the camera FPS on the lighting demo with 100 extra lights added is ~33% faster, and also renders correctly as the cluster index count is no longer exceeded
2022-03-08 04:56:42 +00:00 · 2022-03-08 04:56:42 +00:00 · 244687a0bb
commit 244687a0bb
parent a188babce2
3 changed files with 450 additions and 145 deletions
--- a/crates/bevy_pbr/src/lib.rs
+++ b/crates/bevy_pbr/src/lib.rs
@ -82,19 +82,11 @@ impl Plugin for PbrPlugin {
                    .exclusive_system()
                    .label(SimulationLightSystems::AddClusters),
            )
-            .add_system_to_stage(
-                CoreStage::PostUpdate,
-                // NOTE: Must come after add_clusters!
-                update_clusters
-                    .label(SimulationLightSystems::UpdateClusters)
-                    .after(TransformSystem::TransformPropagate),
-            )
            .add_system_to_stage(
                CoreStage::PostUpdate,
                assign_lights_to_clusters
                    .label(SimulationLightSystems::AssignLightsToClusters)
-                    .after(TransformSystem::TransformPropagate)
-                    .after(SimulationLightSystems::UpdateClusters),
+                    .after(TransformSystem::TransformPropagate),
            )
            .add_system_to_stage(
                CoreStage::PostUpdate,
--- a/crates/bevy_pbr/src/light.rs
+++ b/crates/bevy_pbr/src/light.rs
@ -191,7 +191,6 @@ pub struct NotShadowReceiver;
 #[derive(Debug, Hash, PartialEq, Eq, Clone, SystemLabel)]
 pub enum SimulationLightSystems {
    AddClusters,
-    UpdateClusters,
    AssignLightsToClusters,
    UpdateDirectionalLightFrusta,
    UpdatePointLightFrusta,
@ -207,6 +206,135 @@ pub enum SimulationLightSystems {
 // The z-slicing method mentioned in the aortiz article is originally from Tiago Sousa’s Siggraph 2016 talk about Doom 2016:
 // http://advances.realtimerendering.com/s2016/Siggraph2016_idTech6.pdf

+/// Configure the far z-plane mode used for the furthest depth slice for clustered forward
+/// rendering
+#[derive(Debug, Copy, Clone)]
+pub enum ClusterFarZMode {
+    /// Use the camera far-plane to determine the z-depth of the furthest cluster layer
+    CameraFarPlane,
+    /// Calculate the required maximum z-depth based on currently visible lights.
+    /// Makes better use of available clusters, speeding up GPU lighting operations
+    /// at the expense of some CPU time and using more indices in the cluster light
+    /// index lists.
+    MaxLightRange,
+    /// Constant max z-depth
+    Constant(f32),
+}
+
+/// Configure the depth-slicing strategy for clustered forward rendering
+#[derive(Debug, Copy, Clone)]
+pub struct ClusterZConfig {
+    /// Far z plane of the first depth slice
+    pub first_slice_depth: f32,
+    /// Strategy for how to evaluate the far z plane of the furthest depth slice
+    pub far_z_mode: ClusterFarZMode,
+}
+
+impl Default for ClusterZConfig {
+    fn default() -> Self {
+        Self {
+            first_slice_depth: 5.0,
+            far_z_mode: ClusterFarZMode::MaxLightRange,
+        }
+    }
+}
+
+/// Configuration of the clustering strategy for clustered forward rendering
+#[derive(Debug, Copy, Clone, Component)]
+pub enum ClusterConfig {
+    /// Disable light cluster calculations for this view
+    None,
+    /// One single cluster. Optimal for low-light complexity scenes or scenes where
+    /// most lights affect the entire scene.
+    Single,
+    /// Explicit x, y and z counts (may yield non-square x/y clusters depending on the aspect ratio)
+    XYZ {
+        dimensions: UVec3,
+        z_config: ClusterZConfig,
+        /// Specify if clusters should automatically resize in x/y if there is a risk of exceeding
+        /// the available cluster-light index limit
+        dynamic_resizing: bool,
+    },
+    /// Fixed number of z slices, x and y calculated to give square clusters
+    /// with at most total clusters. For top-down games where lights will generally always be within a
+    /// short depth range, it may be useful to use this configuration with 1 or few z slices. This
+    /// would reduce the number of lights per cluster by distributing more clusters in screen space
+    /// x/y which matches how lights are distributed in the scene.
+    FixedZ {
+        total: u32,
+        z_slices: u32,
+        z_config: ClusterZConfig,
+        /// Specify if clusters should automatically resize in x/y if there is a risk of exceeding
+        /// the available cluster-light index limit
+        dynamic_resizing: bool,
+    },
+}
+
+impl Default for ClusterConfig {
+    fn default() -> Self {
+        // 24 depth slices, square clusters with at most 4096 total clusters
+        // use max light distance as clusters max Z-depth, first slice extends to 5.0
+        Self::FixedZ {
+            total: 4096,
+            z_slices: 24,
+            z_config: ClusterZConfig::default(),
+            dynamic_resizing: true,
+        }
+    }
+}
+
+impl ClusterConfig {
+    fn dimensions_for_screen_size(&self, screen_size: UVec2) -> UVec3 {
+        match &self {
+            ClusterConfig::None => UVec3::ZERO,
+            ClusterConfig::Single => UVec3::ONE,
+            ClusterConfig::XYZ { dimensions, .. } => *dimensions,
+            ClusterConfig::FixedZ {
+                total, z_slices, ..
+            } => {
+                let aspect_ratio = screen_size.x as f32 / screen_size.y as f32;
+                let per_layer = *total as f32 / *z_slices as f32;
+                let y = f32::sqrt(per_layer / aspect_ratio);
+                let x = (y * aspect_ratio).floor() as u32;
+                let y = y.floor() as u32;
+                UVec3::new(x, y, *z_slices)
+            }
+        }
+    }
+
+    fn first_slice_depth(&self) -> f32 {
+        match self {
+            ClusterConfig::None => 0.0,
+            ClusterConfig::Single => 1.0e9, // FIXME note can't use f32::MAX as the aabb explodes
+            ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => {
+                z_config.first_slice_depth
+            }
+        }
+    }
+
+    fn far_z_mode(&self) -> ClusterFarZMode {
+        match self {
+            ClusterConfig::None => ClusterFarZMode::Constant(0.0),
+            ClusterConfig::Single => ClusterFarZMode::Constant(1.0e9), // FIXME note can't use f32::MAX as the aabb explodes
+            ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => {
+                z_config.far_z_mode
+            }
+        }
+    }
+
+    fn dynamic_resizing(&self) -> bool {
+        match self {
+            ClusterConfig::None | ClusterConfig::Single => false,
+            ClusterConfig::XYZ {
+                dynamic_resizing, ..
+            }
+            | ClusterConfig::FixedZ {
+                dynamic_resizing, ..
+            } => *dynamic_resizing,
+        }
+    }
+}
+
 #[derive(Component, Debug)]
 pub struct Clusters {
    /// Tile size
@ -216,16 +344,18 @@ pub struct Clusters {
    /// Distance to the far plane of the first depth slice. The first depth slice is special
    /// and explicitly-configured to avoid having unnecessarily many slices close to the camera.
    pub(crate) near: f32,
+    pub(crate) far: f32,
    aabbs: Vec<Aabb>,
    pub(crate) lights: Vec<VisiblePointLights>,
 }

 impl Clusters {
-    fn new(tile_size: UVec2, screen_size: UVec2, z_slices: u32) -> Self {
+    fn new(tile_size: UVec2, screen_size: UVec2, z_slices: u32, near: f32, far: f32) -> Self {
        let mut clusters = Self {
            tile_size,
            axis_slices: Default::default(),
-            near: 5.0,
+            near,
+            far,
            aabbs: Default::default(),
            lights: Default::default(),
        };
@ -233,17 +363,19 @@ impl Clusters {
        clusters
    }

-    fn from_screen_size_and_z_slices(screen_size: UVec2, z_slices: u32) -> Self {
-        let aspect_ratio = screen_size.x as f32 / screen_size.y as f32;
-        let n_tiles_y =
-            ((ViewClusterBindings::MAX_OFFSETS as u32 / z_slices) as f32 / aspect_ratio).sqrt();
-        // NOTE: Round down the number of tiles in order to avoid overflowing the maximum number of
-        // clusters.
-        let n_tiles = UVec2::new(
-            (aspect_ratio * n_tiles_y).floor() as u32,
-            n_tiles_y.floor() as u32,
-        );
-        Clusters::new((screen_size + UVec2::ONE) / n_tiles, screen_size, Z_SLICES)
+    fn from_screen_size_and_dimensions(
+        screen_size: UVec2,
+        dimensions: UVec3,
+        near: f32,
+        far: f32,
+    ) -> Self {
+        Clusters::new(
+            (screen_size + UVec2::ONE) / dimensions.xy(),
+            screen_size,
+            dimensions.z,
+            near,
+            far,
+        )
    }

    fn update(&mut self, tile_size: UVec2, screen_size: UVec2, z_slices: u32) {
@ -336,8 +468,11 @@ fn compute_aabb_for_cluster(
        };
        // NOTE: This could be simplified to:
        // cluster_far = cluster_near * z_far_over_z_near;
-        let cluster_far =
-            -z_near * z_far_over_z_near.powf(ijk.z / (cluster_dimensions.z - 1) as f32);
+        let cluster_far = if cluster_dimensions.z == 1 {
+            -z_far
+        } else {
+            -z_near * z_far_over_z_near.powf(ijk.z / (cluster_dimensions.z - 1) as f32)
+        };

        // Calculate the four intersection points of the min and max points with the cluster near and far planes
        let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near);
@ -352,68 +487,63 @@ fn compute_aabb_for_cluster(
    Aabb::from_min_max(cluster_min, cluster_max)
 }

-const Z_SLICES: u32 = 24;
-
 pub fn add_clusters(
    mut commands: Commands,
-    windows: Res<Windows>,
-    images: Res<Assets<Image>>,
-    cameras: Query<(Entity, &Camera), Without<Clusters>>,
+    cameras: Query<(Entity, Option<&ClusterConfig>), (With<Camera>, Without<Clusters>)>,
 ) {
-    for (entity, camera) in cameras.iter() {
-        if let Some(size) = camera.target.get_physical_size(&windows, &images) {
-            let clusters = Clusters::from_screen_size_and_z_slices(size, Z_SLICES);
-            commands.entity(entity).insert(clusters);
-        }
+    for (entity, config) in cameras.iter() {
+        let config = config.copied().unwrap_or_default();
+        // actual settings here don't matter - they will be overwritten in assign_lights_to_clusters
+        let clusters = Clusters::from_screen_size_and_dimensions(UVec2::ONE, UVec3::ONE, 1.0, 1.0);
+        commands.entity(entity).insert_bundle((clusters, config));
    }
 }

-pub fn update_clusters(
-    windows: Res<Windows>,
-    images: Res<Assets<Image>>,
-    mut views: Query<(&Camera, &mut Clusters)>,
+fn update_clusters(
+    screen_size: UVec2,
+    camera: &Camera,
+    cluster_dimensions: UVec3,
+    clusters: &mut Clusters,
+    near: f32,
+    far: f32,
 ) {
-    for (camera, mut clusters) in views.iter_mut() {
-        let is_orthographic = camera.projection_matrix.w_axis.w == 1.0;
-        let inverse_projection = camera.projection_matrix.inverse();
-        if let Some(screen_size_u32) = camera.target.get_physical_size(&windows, &images) {
-            // Don't update clusters if screen size is 0.
-            if screen_size_u32.x == 0 || screen_size_u32.y == 0 {
-                continue;
-            }
-            *clusters =
-                Clusters::from_screen_size_and_z_slices(screen_size_u32, clusters.axis_slices.z);
-            let screen_size = screen_size_u32.as_vec2();
-            let tile_size_u32 = clusters.tile_size;
-            let tile_size = tile_size_u32.as_vec2();
+    let is_orthographic = camera.projection_matrix.w_axis.w == 1.0;
+    let inverse_projection = camera.projection_matrix.inverse();
+    // Don't update clusters if screen size is 0.
+    if screen_size.x == 0 || screen_size.y == 0 {
+        return;
+    }
+    *clusters =
+        Clusters::from_screen_size_and_dimensions(screen_size, cluster_dimensions, near, far);
+    let screen_size = screen_size.as_vec2();
+    let tile_size_u32 = clusters.tile_size;
+    let tile_size = tile_size_u32.as_vec2();

-            // Calculate view space AABBs
-            // NOTE: It is important that these are iterated in a specific order
-            // so that we can calculate the cluster index in the fragment shader!
-            // I (Rob Swain) choose to scan along rows of tiles in x,y, and for each tile then scan
-            // along z
-            let mut aabbs = Vec::with_capacity(
-                (clusters.axis_slices.y * clusters.axis_slices.x * clusters.axis_slices.z) as usize,
-            );
-            for y in 0..clusters.axis_slices.y {
-                for x in 0..clusters.axis_slices.x {
-                    for z in 0..clusters.axis_slices.z {
-                        aabbs.push(compute_aabb_for_cluster(
-                            clusters.near,
-                            camera.far,
-                            tile_size,
-                            screen_size,
-                            inverse_projection,
-                            is_orthographic,
-                            clusters.axis_slices,
-                            UVec3::new(x, y, z),
-                        ));
-                    }
-                }
+    // Calculate view space AABBs
+    // NOTE: It is important that these are iterated in a specific order
+    // so that we can calculate the cluster index in the fragment shader!
+    // I (Rob Swain) choose to scan along rows of tiles in x,y, and for each tile then scan
+    // along z
+    let mut aabbs = Vec::with_capacity(
+        (clusters.axis_slices.y * clusters.axis_slices.x * clusters.axis_slices.z) as usize,
+    );
+    for y in 0..clusters.axis_slices.y {
+        for x in 0..clusters.axis_slices.x {
+            for z in 0..clusters.axis_slices.z {
+                aabbs.push(compute_aabb_for_cluster(
+                    near,
+                    far,
+                    tile_size,
+                    screen_size,
+                    inverse_projection,
+                    is_orthographic,
+                    clusters.axis_slices,
+                    UVec3::new(x, y, z),
+                ));
            }
-            clusters.aabbs = aabbs;
        }
    }
+    clusters.aabbs = aabbs;
 }

 #[derive(Clone, Component, Debug, Default)]
@ -479,6 +609,98 @@ fn ndc_position_to_cluster(
        .clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE)
 }

+// Calculate bounds for the light using a view space aabb.
+// Returns a (Vec3, Vec3) containing min and max with
+//     x and y in normalized device coordinates with range [-1, 1]
+//     z in view space, with range [-inf, -f32::MIN_POSITIVE]
+fn cluster_space_light_aabb(
+    inverse_view_transform: Mat4,
+    projection_matrix: Mat4,
+    light_sphere: &Sphere,
+) -> (Vec3, Vec3) {
+    let light_aabb_view = Aabb {
+        center: (inverse_view_transform * light_sphere.center.extend(1.0)).xyz(),
+        half_extents: Vec3::splat(light_sphere.radius),
+    };
+    let (mut light_aabb_view_min, mut light_aabb_view_max) =
+        (light_aabb_view.min(), light_aabb_view.max());
+
+    // Constrain view z to be negative - i.e. in front of the camera
+    // When view z is >= 0.0 and we're using a perspective projection, bad things happen.
+    // At view z == 0.0, ndc x,y are mathematically undefined. At view z > 0.0, i.e. behind the camera,
+    // the perspective projection flips the directions of the axes. This breaks assumptions about
+    // use of min/max operations as something that was to the left in view space is now returning a
+    // coordinate that for view z in front of the camera would be on the right, but at view z behind the
+    // camera is on the left. So, we just constrain view z to be < 0.0 and necessarily in front of the camera.
+    light_aabb_view_min.z = light_aabb_view_min.z.min(-f32::MIN_POSITIVE);
+    light_aabb_view_max.z = light_aabb_view_max.z.min(-f32::MIN_POSITIVE);
+
+    // Is there a cheaper way to do this? The problem is that because of perspective
+    // the point at max z but min xy may be less xy in screenspace, and similar. As
+    // such, projecting the min and max xy at both the closer and further z and taking
+    // the min and max of those projected points addresses this.
+    let (
+        light_aabb_view_xymin_near,
+        light_aabb_view_xymin_far,
+        light_aabb_view_xymax_near,
+        light_aabb_view_xymax_far,
+    ) = (
+        light_aabb_view_min,
+        light_aabb_view_min.xy().extend(light_aabb_view_max.z),
+        light_aabb_view_max.xy().extend(light_aabb_view_min.z),
+        light_aabb_view_max,
+    );
+    let (
+        light_aabb_clip_xymin_near,
+        light_aabb_clip_xymin_far,
+        light_aabb_clip_xymax_near,
+        light_aabb_clip_xymax_far,
+    ) = (
+        projection_matrix * light_aabb_view_xymin_near.extend(1.0),
+        projection_matrix * light_aabb_view_xymin_far.extend(1.0),
+        projection_matrix * light_aabb_view_xymax_near.extend(1.0),
+        projection_matrix * light_aabb_view_xymax_far.extend(1.0),
+    );
+    let (
+        light_aabb_ndc_xymin_near,
+        light_aabb_ndc_xymin_far,
+        light_aabb_ndc_xymax_near,
+        light_aabb_ndc_xymax_far,
+    ) = (
+        light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w,
+        light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w,
+        light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w,
+        light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w,
+    );
+    let (light_aabb_ndc_min, light_aabb_ndc_max) = (
+        light_aabb_ndc_xymin_near
+            .min(light_aabb_ndc_xymin_far)
+            .min(light_aabb_ndc_xymax_near)
+            .min(light_aabb_ndc_xymax_far),
+        light_aabb_ndc_xymin_near
+            .max(light_aabb_ndc_xymin_far)
+            .max(light_aabb_ndc_xymax_near)
+            .max(light_aabb_ndc_xymax_far),
+    );
+
+    // pack unadjusted z depth into the vecs
+    let (aabb_min, aabb_max) = (
+        light_aabb_ndc_min.xy().extend(light_aabb_view_min.z),
+        light_aabb_ndc_max.xy().extend(light_aabb_view_max.z),
+    );
+    // clamp to ndc coords
+    (
+        aabb_min.clamp(
+            Vec3::new(-1.0, -1.0, f32::MIN),
+            Vec3::new(1.0, 1.0, f32::MAX),
+        ),
+        aabb_max.clamp(
+            Vec3::new(-1.0, -1.0, f32::MIN),
+            Vec3::new(1.0, 1.0, f32::MAX),
+        ),
+    )
+}
+
 // Sort point lights with shadows enabled first, then by a stable key so that the index
 // can be used to render at most `MAX_POINT_LIGHT_SHADOW_MAPS` point light shadows and
 // we keep a stable set of lights visible
@ -502,10 +724,20 @@ pub(crate) struct PointLightAssignmentData {
 }

 // NOTE: Run this before update_point_light_frusta!
+#[allow(clippy::too_many_arguments)]
 pub(crate) fn assign_lights_to_clusters(
    mut commands: Commands,
    mut global_lights: ResMut<VisiblePointLights>,
-    mut views: Query<(Entity, &GlobalTransform, &Camera, &Frustum, &mut Clusters)>,
+    windows: Res<Windows>,
+    images: Res<Assets<Image>>,
+    mut views: Query<(
+        Entity,
+        &GlobalTransform,
+        &Camera,
+        &Frustum,
+        &ClusterConfig,
+        &mut Clusters,
+    )>,
    lights_query: Query<(Entity, &GlobalTransform, &PointLight, &Visibility)>,
    mut lights: Local<Vec<PointLightAssignmentData>>,
    mut max_point_lights_warning_emitted: Local<bool>,
@ -534,7 +766,10 @@ pub(crate) fn assign_lights_to_clusters(
        });

        // check each light against each view's frustum, keep only those that affect at least one of our views
-        let frusta: Vec<_> = views.iter().map(|(_, _, _, frustum, _)| *frustum).collect();
+        let frusta: Vec<_> = views
+            .iter()
+            .map(|(_, _, _, frustum, _, _)| *frustum)
+            .collect();
        let mut lights_in_view_count = 0;
        lights.retain(|light| {
            // take one extra light to check if we should emit the warning
@ -568,19 +803,141 @@ pub(crate) fn assign_lights_to_clusters(

    let light_count = lights.len();
    let mut global_lights_set = HashSet::with_capacity(light_count);
-    for (view_entity, view_transform, camera, frustum, mut clusters) in views.iter_mut() {
+    for (view_entity, view_transform, camera, frustum, config, mut clusters) in views.iter_mut() {
+        if matches!(config, ClusterConfig::None) {
+            commands.entity(view_entity).remove::<VisiblePointLights>();
+            continue;
+        }
+
        let view_transform = view_transform.compute_matrix();
        let inverse_view_transform = view_transform.inverse();
-        let cluster_count = clusters.aabbs.len();
        let is_orthographic = camera.projection_matrix.w_axis.w == 1.0;
+
+        let screen_size_u32 = camera.target.get_physical_size(&windows, &images);
+        let screen_size_u32 = screen_size_u32.unwrap_or_default();
+        if screen_size_u32.x == 0 || screen_size_u32.y == 0 {
+            continue;
+        }
+        let mut cluster_dimensions = config.dimensions_for_screen_size(screen_size_u32);
+
+        let far_z = match config.far_z_mode() {
+            ClusterFarZMode::CameraFarPlane => camera.far,
+            ClusterFarZMode::MaxLightRange => {
+                let inverse_view_row_2 = inverse_view_transform.row(2);
+                lights
+                    .iter()
+                    .map(|light| {
+                        -inverse_view_row_2.dot(light.translation.extend(1.0)) + light.range
+                    })
+                    .reduce(f32::max)
+                    .unwrap_or(0.0)
+            }
+            ClusterFarZMode::Constant(far) => far,
+        };
+        let first_slice_depth = match cluster_dimensions.z {
+            1 => config.first_slice_depth().max(far_z),
+            _ => config.first_slice_depth(),
+        };
+        // NOTE: Ensure the far_z is at least as far as the first_depth_slice to avoid clustering problems.
+        let far_z = far_z.max(first_slice_depth);
+
        let cluster_factors = calculate_cluster_factors(
-            // NOTE: Using the special cluster near value
-            clusters.near,
-            camera.far,
-            clusters.axis_slices.z as f32,
+            first_slice_depth,
+            far_z,
+            cluster_dimensions.z as f32,
            is_orthographic,
        );

+        let max_indices = ViewClusterBindings::MAX_INDICES;
+
+        if config.dynamic_resizing() {
+            let mut cluster_index_estimate = 0.0;
+            for light in lights.iter() {
+                let light_sphere = Sphere {
+                    center: light.translation,
+                    radius: light.range,
+                };
+
+                // Check if the light is within the view frustum
+                if !frustum.intersects_sphere(&light_sphere) {
+                    continue;
+                }
+
+                // calculate a conservative aabb estimate of number of clusters affected by this light
+                // this overestimates index counts by at most 50% (and typically much less) when the whole light range is in view
+                // it can overestimate more significantly when light ranges are only partially in view
+                let (light_aabb_min, light_aabb_max) = cluster_space_light_aabb(
+                    inverse_view_transform,
+                    camera.projection_matrix,
+                    &light_sphere,
+                );
+
+                // since we won't adjust z slices we can calculate exact number of slices required in z dimension
+                let z_cluster_min = view_z_to_z_slice(
+                    cluster_factors,
+                    cluster_dimensions.z as f32,
+                    light_aabb_min.z,
+                    is_orthographic,
+                );
+                let z_cluster_max = view_z_to_z_slice(
+                    cluster_factors,
+                    cluster_dimensions.z as f32,
+                    light_aabb_max.z,
+                    is_orthographic,
+                );
+                let z_count =
+                    z_cluster_min.max(z_cluster_max) - z_cluster_min.min(z_cluster_max) + 1;
+
+                // calculate x/y count using floats to avoid overestimating counts due to large initial tile sizes
+                let xy_min = light_aabb_min.xy();
+                let xy_max = light_aabb_max.xy();
+                // multiply by 0.5 to move from [-1,1] to [-0.5, 0.5], max extent of 1 in each dimension
+                let xy_count = (xy_max - xy_min)
+                    * 0.5
+                    * Vec2::new(cluster_dimensions.x as f32, cluster_dimensions.y as f32);
+
+                // add up to 2 to each axis to account for overlap
+                let x_overlap = if xy_min.x <= -1.0 { 0.0 } else { 1.0 }
+                    + if xy_max.x >= 1.0 { 0.0 } else { 1.0 };
+                let y_overlap = if xy_min.y <= -1.0 { 0.0 } else { 1.0 }
+                    + if xy_max.y >= 1.0 { 0.0 } else { 1.0 };
+                cluster_index_estimate +=
+                    (xy_count.x + x_overlap) * (xy_count.y + y_overlap) * z_count as f32;
+            }
+
+            if cluster_index_estimate > max_indices as f32 {
+                // scale x and y cluster count to be able to fit all our indices
+
+                // we take the ratio of the actual indices over the index estimate.
+                // this not not guaranteed to be small enough due to overlapped tiles, but
+                // the conservative estimate is more than sufficient to cover the
+                // difference
+                let index_ratio = max_indices as f32 / cluster_index_estimate as f32;
+                let xy_ratio = index_ratio.sqrt();
+
+                cluster_dimensions.x =
+                    ((cluster_dimensions.x as f32 * xy_ratio).floor() as u32).max(1);
+                cluster_dimensions.y =
+                    ((cluster_dimensions.y as f32 * xy_ratio).floor() as u32).max(1);
+            }
+        }
+
+        update_clusters(
+            screen_size_u32,
+            camera,
+            cluster_dimensions,
+            &mut clusters,
+            first_slice_depth,
+            far_z,
+        );
+        // NOTE: This is here to avoid bugs in future due to update_clusters() having updated clusters.axis_slices
+        // but cluster_dimensions has a different configuration.
+        #[allow(unused_assignments)]
+        {
+            cluster_dimensions = clusters.axis_slices;
+        }
+        let cluster_count = clusters.aabbs.len();
+
        let mut clusters_lights =
            vec![VisiblePointLights::from_light_count(light_count); cluster_count];
        let mut visible_lights = Vec::with_capacity(light_count);
@ -600,77 +957,31 @@ pub(crate) fn assign_lights_to_clusters(
            global_lights_set.insert(light.entity);
            visible_lights.push(light.entity);

-            // Calculate an AABB for the light in view space, find the corresponding clusters for the min and max
-            // points of the AABB, then iterate over just those clusters for this light
-            let light_aabb_view = Aabb {
-                center: (inverse_view_transform * light_sphere.center.extend(1.0)).xyz(),
-                half_extents: Vec3::splat(light_sphere.radius),
-            };
-            let (light_aabb_view_min, light_aabb_view_max) =
-                (light_aabb_view.min(), light_aabb_view.max());
-            // Is there a cheaper way to do this? The problem is that because of perspective
-            // the point at max z but min xy may be less xy in screenspace, and similar. As
-            // such, projecting the min and max xy at both the closer and further z and taking
-            // the min and max of those projected points addresses this.
-            let (
-                light_aabb_view_xymin_near,
-                light_aabb_view_xymin_far,
-                light_aabb_view_xymax_near,
-                light_aabb_view_xymax_far,
-            ) = (
-                light_aabb_view_min,
-                light_aabb_view_min.xy().extend(light_aabb_view_max.z),
-                light_aabb_view_max.xy().extend(light_aabb_view_min.z),
-                light_aabb_view_max,
-            );
-            let (
-                light_aabb_clip_xymin_near,
-                light_aabb_clip_xymin_far,
-                light_aabb_clip_xymax_near,
-                light_aabb_clip_xymax_far,
-            ) = (
-                camera.projection_matrix * light_aabb_view_xymin_near.extend(1.0),
-                camera.projection_matrix * light_aabb_view_xymin_far.extend(1.0),
-                camera.projection_matrix * light_aabb_view_xymax_near.extend(1.0),
-                camera.projection_matrix * light_aabb_view_xymax_far.extend(1.0),
-            );
-            let (
-                light_aabb_ndc_xymin_near,
-                light_aabb_ndc_xymin_far,
-                light_aabb_ndc_xymax_near,
-                light_aabb_ndc_xymax_far,
-            ) = (
-                light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w,
-                light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w,
-                light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w,
-                light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w,
-            );
-            let (light_aabb_ndc_min, light_aabb_ndc_max) = (
-                light_aabb_ndc_xymin_near
-                    .min(light_aabb_ndc_xymin_far)
-                    .min(light_aabb_ndc_xymax_near)
-                    .min(light_aabb_ndc_xymax_far),
-                light_aabb_ndc_xymin_near
-                    .max(light_aabb_ndc_xymin_far)
-                    .max(light_aabb_ndc_xymax_near)
-                    .max(light_aabb_ndc_xymax_far),
-            );
+            // note: caching seems to be slower than calling twice for this aabb calculation
+            let (light_aabb_xy_ndc_z_view_min, light_aabb_xy_ndc_z_view_max) =
+                cluster_space_light_aabb(
+                    inverse_view_transform,
+                    camera.projection_matrix,
+                    &light_sphere,
+                );
+
            let min_cluster = ndc_position_to_cluster(
                clusters.axis_slices,
                cluster_factors,
                is_orthographic,
-                light_aabb_ndc_min,
-                light_aabb_view_min.z,
+                light_aabb_xy_ndc_z_view_min,
+                light_aabb_xy_ndc_z_view_min.z,
            );
            let max_cluster = ndc_position_to_cluster(
                clusters.axis_slices,
                cluster_factors,
                is_orthographic,
-                light_aabb_ndc_max,
-                light_aabb_view_max.z,
+                light_aabb_xy_ndc_z_view_max,
+                light_aabb_xy_ndc_z_view_max.z,
            );
            let (min_cluster, max_cluster) =
                (min_cluster.min(max_cluster), min_cluster.max(max_cluster));
+
            for y in min_cluster.y..=max_cluster.y {
                let row_offset = y * clusters.axis_slices.x;
                for x in min_cluster.x..=max_cluster.x {
--- a/crates/bevy_pbr/src/render/light.rs
+++ b/crates/bevy_pbr/src/render/light.rs
@ -303,6 +303,7 @@ impl SpecializedMeshPipeline for ShadowPipeline {
 pub struct ExtractedClusterConfig {
    /// Special near value for cluster calculations
    near: f32,
+    far: f32,
    /// Number of clusters in x / y / z in the view frustum
    axis_slices: UVec3,
 }
@ -320,6 +321,7 @@ pub fn extract_clusters(mut commands: Commands, views: Query<(Entity, &Clusters)
            },
            ExtractedClusterConfig {
                near: clusters.near,
+                far: clusters.far,
                axis_slices: clusters.axis_slices,
            },
        ));
@ -670,7 +672,7 @@ pub fn prepare_lights(
        let is_orthographic = extracted_view.projection.w_axis.w == 1.0;
        let cluster_factors_zw = calculate_cluster_factors(
            clusters.near,
-            extracted_view.far,
+            clusters.far,
            clusters.axis_slices.z as f32,
            is_orthographic,
        );
@ -924,7 +926,7 @@ pub struct ViewClusterBindings {
 impl ViewClusterBindings {
    pub const MAX_OFFSETS: usize = 16384 / 4;
    const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4;
-    const MAX_INDICES: usize = 16384;
+    pub const MAX_INDICES: usize = 16384;

    pub fn reserve_and_clear(&mut self) {
        self.cluster_light_index_lists.clear();