Dynamic light clusters (#3968)

# Objective

provide some customisation for default cluster setup
avoid "cluster index lists is full" in all cases (using a strategy outlined by @superdump)

## Solution

Add ClusterConfig enum (which can be inserted into a view at any time) to allow specifying cluster setup with variants:
- None (do not do any light assignment - for views which do not require light info, e.g. minimaps etc)
- Single (one cluster)
- XYZ (explicit cluster counts in each dimension)
- FixedZ (most similar to current - specify Z-slices and total, then x and y counts are dynamically determined to give approximately square clusters based on current aspect ratio)
Defaults to FixedZ { total: 4096, z: 24 } which is similar to the current setup.

Per frame, estimate the number of indices that would be required for the current config and decrease the cluster counts / increase the cluster sizes in the x and y dimensions if the index list would be too small.

notes:

- I didn't put ClusterConfig in the camera bundles to avoid introducing a dependency from bevy_render to bevy_pbr. the ClusterConfig enum comes with a pbr-centric impl block so i didn't want to move that into bevy_render either.
- ~Might want to add None variant to cluster config for views that don't care about lights?~
- Not well tested for orthographic
- ~there's a cluster_muck branch on my repo which includes some diagnostics / a modified lighting example which may be useful for tyre-kicking~ (outdated, i will bring it up to date if required)

anecdotal timings:

FPS on the lighting demo is negligibly better (~5%), maybe due to a small optimisation constraining the light aabb to be in front of the camera
FPS on the lighting demo with 100 extra lights added is ~33% faster, and also renders correctly as the cluster index count is no longer exceeded
This commit is contained in:
robtfm 2022-03-08 04:56:42 +00:00
parent a188babce2
commit 244687a0bb
3 changed files with 450 additions and 145 deletions

View File

@ -82,19 +82,11 @@ impl Plugin for PbrPlugin {
.exclusive_system() .exclusive_system()
.label(SimulationLightSystems::AddClusters), .label(SimulationLightSystems::AddClusters),
) )
.add_system_to_stage(
CoreStage::PostUpdate,
// NOTE: Must come after add_clusters!
update_clusters
.label(SimulationLightSystems::UpdateClusters)
.after(TransformSystem::TransformPropagate),
)
.add_system_to_stage( .add_system_to_stage(
CoreStage::PostUpdate, CoreStage::PostUpdate,
assign_lights_to_clusters assign_lights_to_clusters
.label(SimulationLightSystems::AssignLightsToClusters) .label(SimulationLightSystems::AssignLightsToClusters)
.after(TransformSystem::TransformPropagate) .after(TransformSystem::TransformPropagate),
.after(SimulationLightSystems::UpdateClusters),
) )
.add_system_to_stage( .add_system_to_stage(
CoreStage::PostUpdate, CoreStage::PostUpdate,

View File

@ -191,7 +191,6 @@ pub struct NotShadowReceiver;
#[derive(Debug, Hash, PartialEq, Eq, Clone, SystemLabel)] #[derive(Debug, Hash, PartialEq, Eq, Clone, SystemLabel)]
pub enum SimulationLightSystems { pub enum SimulationLightSystems {
AddClusters, AddClusters,
UpdateClusters,
AssignLightsToClusters, AssignLightsToClusters,
UpdateDirectionalLightFrusta, UpdateDirectionalLightFrusta,
UpdatePointLightFrusta, UpdatePointLightFrusta,
@ -207,6 +206,135 @@ pub enum SimulationLightSystems {
// The z-slicing method mentioned in the aortiz article is originally from Tiago Sousas Siggraph 2016 talk about Doom 2016: // The z-slicing method mentioned in the aortiz article is originally from Tiago Sousas Siggraph 2016 talk about Doom 2016:
// http://advances.realtimerendering.com/s2016/Siggraph2016_idTech6.pdf // http://advances.realtimerendering.com/s2016/Siggraph2016_idTech6.pdf
/// Configure the far z-plane mode used for the furthest depth slice for clustered forward
/// rendering
#[derive(Debug, Copy, Clone)]
pub enum ClusterFarZMode {
/// Use the camera far-plane to determine the z-depth of the furthest cluster layer
CameraFarPlane,
/// Calculate the required maximum z-depth based on currently visible lights.
/// Makes better use of available clusters, speeding up GPU lighting operations
/// at the expense of some CPU time and using more indices in the cluster light
/// index lists.
MaxLightRange,
/// Constant max z-depth
Constant(f32),
}
/// Configure the depth-slicing strategy for clustered forward rendering
#[derive(Debug, Copy, Clone)]
pub struct ClusterZConfig {
/// Far z plane of the first depth slice
pub first_slice_depth: f32,
/// Strategy for how to evaluate the far z plane of the furthest depth slice
pub far_z_mode: ClusterFarZMode,
}
impl Default for ClusterZConfig {
fn default() -> Self {
Self {
first_slice_depth: 5.0,
far_z_mode: ClusterFarZMode::MaxLightRange,
}
}
}
/// Configuration of the clustering strategy for clustered forward rendering
#[derive(Debug, Copy, Clone, Component)]
pub enum ClusterConfig {
/// Disable light cluster calculations for this view
None,
/// One single cluster. Optimal for low-light complexity scenes or scenes where
/// most lights affect the entire scene.
Single,
/// Explicit x, y and z counts (may yield non-square x/y clusters depending on the aspect ratio)
XYZ {
dimensions: UVec3,
z_config: ClusterZConfig,
/// Specify if clusters should automatically resize in x/y if there is a risk of exceeding
/// the available cluster-light index limit
dynamic_resizing: bool,
},
/// Fixed number of z slices, x and y calculated to give square clusters
/// with at most total clusters. For top-down games where lights will generally always be within a
/// short depth range, it may be useful to use this configuration with 1 or few z slices. This
/// would reduce the number of lights per cluster by distributing more clusters in screen space
/// x/y which matches how lights are distributed in the scene.
FixedZ {
total: u32,
z_slices: u32,
z_config: ClusterZConfig,
/// Specify if clusters should automatically resize in x/y if there is a risk of exceeding
/// the available cluster-light index limit
dynamic_resizing: bool,
},
}
impl Default for ClusterConfig {
fn default() -> Self {
// 24 depth slices, square clusters with at most 4096 total clusters
// use max light distance as clusters max Z-depth, first slice extends to 5.0
Self::FixedZ {
total: 4096,
z_slices: 24,
z_config: ClusterZConfig::default(),
dynamic_resizing: true,
}
}
}
impl ClusterConfig {
fn dimensions_for_screen_size(&self, screen_size: UVec2) -> UVec3 {
match &self {
ClusterConfig::None => UVec3::ZERO,
ClusterConfig::Single => UVec3::ONE,
ClusterConfig::XYZ { dimensions, .. } => *dimensions,
ClusterConfig::FixedZ {
total, z_slices, ..
} => {
let aspect_ratio = screen_size.x as f32 / screen_size.y as f32;
let per_layer = *total as f32 / *z_slices as f32;
let y = f32::sqrt(per_layer / aspect_ratio);
let x = (y * aspect_ratio).floor() as u32;
let y = y.floor() as u32;
UVec3::new(x, y, *z_slices)
}
}
}
fn first_slice_depth(&self) -> f32 {
match self {
ClusterConfig::None => 0.0,
ClusterConfig::Single => 1.0e9, // FIXME note can't use f32::MAX as the aabb explodes
ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => {
z_config.first_slice_depth
}
}
}
fn far_z_mode(&self) -> ClusterFarZMode {
match self {
ClusterConfig::None => ClusterFarZMode::Constant(0.0),
ClusterConfig::Single => ClusterFarZMode::Constant(1.0e9), // FIXME note can't use f32::MAX as the aabb explodes
ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => {
z_config.far_z_mode
}
}
}
fn dynamic_resizing(&self) -> bool {
match self {
ClusterConfig::None | ClusterConfig::Single => false,
ClusterConfig::XYZ {
dynamic_resizing, ..
}
| ClusterConfig::FixedZ {
dynamic_resizing, ..
} => *dynamic_resizing,
}
}
}
#[derive(Component, Debug)] #[derive(Component, Debug)]
pub struct Clusters { pub struct Clusters {
/// Tile size /// Tile size
@ -216,16 +344,18 @@ pub struct Clusters {
/// Distance to the far plane of the first depth slice. The first depth slice is special /// Distance to the far plane of the first depth slice. The first depth slice is special
/// and explicitly-configured to avoid having unnecessarily many slices close to the camera. /// and explicitly-configured to avoid having unnecessarily many slices close to the camera.
pub(crate) near: f32, pub(crate) near: f32,
pub(crate) far: f32,
aabbs: Vec<Aabb>, aabbs: Vec<Aabb>,
pub(crate) lights: Vec<VisiblePointLights>, pub(crate) lights: Vec<VisiblePointLights>,
} }
impl Clusters { impl Clusters {
fn new(tile_size: UVec2, screen_size: UVec2, z_slices: u32) -> Self { fn new(tile_size: UVec2, screen_size: UVec2, z_slices: u32, near: f32, far: f32) -> Self {
let mut clusters = Self { let mut clusters = Self {
tile_size, tile_size,
axis_slices: Default::default(), axis_slices: Default::default(),
near: 5.0, near,
far,
aabbs: Default::default(), aabbs: Default::default(),
lights: Default::default(), lights: Default::default(),
}; };
@ -233,17 +363,19 @@ impl Clusters {
clusters clusters
} }
fn from_screen_size_and_z_slices(screen_size: UVec2, z_slices: u32) -> Self { fn from_screen_size_and_dimensions(
let aspect_ratio = screen_size.x as f32 / screen_size.y as f32; screen_size: UVec2,
let n_tiles_y = dimensions: UVec3,
((ViewClusterBindings::MAX_OFFSETS as u32 / z_slices) as f32 / aspect_ratio).sqrt(); near: f32,
// NOTE: Round down the number of tiles in order to avoid overflowing the maximum number of far: f32,
// clusters. ) -> Self {
let n_tiles = UVec2::new( Clusters::new(
(aspect_ratio * n_tiles_y).floor() as u32, (screen_size + UVec2::ONE) / dimensions.xy(),
n_tiles_y.floor() as u32, screen_size,
); dimensions.z,
Clusters::new((screen_size + UVec2::ONE) / n_tiles, screen_size, Z_SLICES) near,
far,
)
} }
fn update(&mut self, tile_size: UVec2, screen_size: UVec2, z_slices: u32) { fn update(&mut self, tile_size: UVec2, screen_size: UVec2, z_slices: u32) {
@ -336,8 +468,11 @@ fn compute_aabb_for_cluster(
}; };
// NOTE: This could be simplified to: // NOTE: This could be simplified to:
// cluster_far = cluster_near * z_far_over_z_near; // cluster_far = cluster_near * z_far_over_z_near;
let cluster_far = let cluster_far = if cluster_dimensions.z == 1 {
-z_near * z_far_over_z_near.powf(ijk.z / (cluster_dimensions.z - 1) as f32); -z_far
} else {
-z_near * z_far_over_z_near.powf(ijk.z / (cluster_dimensions.z - 1) as f32)
};
// Calculate the four intersection points of the min and max points with the cluster near and far planes // Calculate the four intersection points of the min and max points with the cluster near and far planes
let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near); let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near);
@ -352,38 +487,35 @@ fn compute_aabb_for_cluster(
Aabb::from_min_max(cluster_min, cluster_max) Aabb::from_min_max(cluster_min, cluster_max)
} }
const Z_SLICES: u32 = 24;
pub fn add_clusters( pub fn add_clusters(
mut commands: Commands, mut commands: Commands,
windows: Res<Windows>, cameras: Query<(Entity, Option<&ClusterConfig>), (With<Camera>, Without<Clusters>)>,
images: Res<Assets<Image>>,
cameras: Query<(Entity, &Camera), Without<Clusters>>,
) { ) {
for (entity, camera) in cameras.iter() { for (entity, config) in cameras.iter() {
if let Some(size) = camera.target.get_physical_size(&windows, &images) { let config = config.copied().unwrap_or_default();
let clusters = Clusters::from_screen_size_and_z_slices(size, Z_SLICES); // actual settings here don't matter - they will be overwritten in assign_lights_to_clusters
commands.entity(entity).insert(clusters); let clusters = Clusters::from_screen_size_and_dimensions(UVec2::ONE, UVec3::ONE, 1.0, 1.0);
} commands.entity(entity).insert_bundle((clusters, config));
} }
} }
pub fn update_clusters( fn update_clusters(
windows: Res<Windows>, screen_size: UVec2,
images: Res<Assets<Image>>, camera: &Camera,
mut views: Query<(&Camera, &mut Clusters)>, cluster_dimensions: UVec3,
clusters: &mut Clusters,
near: f32,
far: f32,
) { ) {
for (camera, mut clusters) in views.iter_mut() {
let is_orthographic = camera.projection_matrix.w_axis.w == 1.0; let is_orthographic = camera.projection_matrix.w_axis.w == 1.0;
let inverse_projection = camera.projection_matrix.inverse(); let inverse_projection = camera.projection_matrix.inverse();
if let Some(screen_size_u32) = camera.target.get_physical_size(&windows, &images) {
// Don't update clusters if screen size is 0. // Don't update clusters if screen size is 0.
if screen_size_u32.x == 0 || screen_size_u32.y == 0 { if screen_size.x == 0 || screen_size.y == 0 {
continue; return;
} }
*clusters = *clusters =
Clusters::from_screen_size_and_z_slices(screen_size_u32, clusters.axis_slices.z); Clusters::from_screen_size_and_dimensions(screen_size, cluster_dimensions, near, far);
let screen_size = screen_size_u32.as_vec2(); let screen_size = screen_size.as_vec2();
let tile_size_u32 = clusters.tile_size; let tile_size_u32 = clusters.tile_size;
let tile_size = tile_size_u32.as_vec2(); let tile_size = tile_size_u32.as_vec2();
@ -399,8 +531,8 @@ pub fn update_clusters(
for x in 0..clusters.axis_slices.x { for x in 0..clusters.axis_slices.x {
for z in 0..clusters.axis_slices.z { for z in 0..clusters.axis_slices.z {
aabbs.push(compute_aabb_for_cluster( aabbs.push(compute_aabb_for_cluster(
clusters.near, near,
camera.far, far,
tile_size, tile_size,
screen_size, screen_size,
inverse_projection, inverse_projection,
@ -412,8 +544,6 @@ pub fn update_clusters(
} }
} }
clusters.aabbs = aabbs; clusters.aabbs = aabbs;
}
}
} }
#[derive(Clone, Component, Debug, Default)] #[derive(Clone, Component, Debug, Default)]
@ -479,6 +609,98 @@ fn ndc_position_to_cluster(
.clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE) .clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE)
} }
// Calculate bounds for the light using a view space aabb.
// Returns a (Vec3, Vec3) containing min and max with
// x and y in normalized device coordinates with range [-1, 1]
// z in view space, with range [-inf, -f32::MIN_POSITIVE]
fn cluster_space_light_aabb(
inverse_view_transform: Mat4,
projection_matrix: Mat4,
light_sphere: &Sphere,
) -> (Vec3, Vec3) {
let light_aabb_view = Aabb {
center: (inverse_view_transform * light_sphere.center.extend(1.0)).xyz(),
half_extents: Vec3::splat(light_sphere.radius),
};
let (mut light_aabb_view_min, mut light_aabb_view_max) =
(light_aabb_view.min(), light_aabb_view.max());
// Constrain view z to be negative - i.e. in front of the camera
// When view z is >= 0.0 and we're using a perspective projection, bad things happen.
// At view z == 0.0, ndc x,y are mathematically undefined. At view z > 0.0, i.e. behind the camera,
// the perspective projection flips the directions of the axes. This breaks assumptions about
// use of min/max operations as something that was to the left in view space is now returning a
// coordinate that for view z in front of the camera would be on the right, but at view z behind the
// camera is on the left. So, we just constrain view z to be < 0.0 and necessarily in front of the camera.
light_aabb_view_min.z = light_aabb_view_min.z.min(-f32::MIN_POSITIVE);
light_aabb_view_max.z = light_aabb_view_max.z.min(-f32::MIN_POSITIVE);
// Is there a cheaper way to do this? The problem is that because of perspective
// the point at max z but min xy may be less xy in screenspace, and similar. As
// such, projecting the min and max xy at both the closer and further z and taking
// the min and max of those projected points addresses this.
let (
light_aabb_view_xymin_near,
light_aabb_view_xymin_far,
light_aabb_view_xymax_near,
light_aabb_view_xymax_far,
) = (
light_aabb_view_min,
light_aabb_view_min.xy().extend(light_aabb_view_max.z),
light_aabb_view_max.xy().extend(light_aabb_view_min.z),
light_aabb_view_max,
);
let (
light_aabb_clip_xymin_near,
light_aabb_clip_xymin_far,
light_aabb_clip_xymax_near,
light_aabb_clip_xymax_far,
) = (
projection_matrix * light_aabb_view_xymin_near.extend(1.0),
projection_matrix * light_aabb_view_xymin_far.extend(1.0),
projection_matrix * light_aabb_view_xymax_near.extend(1.0),
projection_matrix * light_aabb_view_xymax_far.extend(1.0),
);
let (
light_aabb_ndc_xymin_near,
light_aabb_ndc_xymin_far,
light_aabb_ndc_xymax_near,
light_aabb_ndc_xymax_far,
) = (
light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w,
light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w,
light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w,
light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w,
);
let (light_aabb_ndc_min, light_aabb_ndc_max) = (
light_aabb_ndc_xymin_near
.min(light_aabb_ndc_xymin_far)
.min(light_aabb_ndc_xymax_near)
.min(light_aabb_ndc_xymax_far),
light_aabb_ndc_xymin_near
.max(light_aabb_ndc_xymin_far)
.max(light_aabb_ndc_xymax_near)
.max(light_aabb_ndc_xymax_far),
);
// pack unadjusted z depth into the vecs
let (aabb_min, aabb_max) = (
light_aabb_ndc_min.xy().extend(light_aabb_view_min.z),
light_aabb_ndc_max.xy().extend(light_aabb_view_max.z),
);
// clamp to ndc coords
(
aabb_min.clamp(
Vec3::new(-1.0, -1.0, f32::MIN),
Vec3::new(1.0, 1.0, f32::MAX),
),
aabb_max.clamp(
Vec3::new(-1.0, -1.0, f32::MIN),
Vec3::new(1.0, 1.0, f32::MAX),
),
)
}
// Sort point lights with shadows enabled first, then by a stable key so that the index // Sort point lights with shadows enabled first, then by a stable key so that the index
// can be used to render at most `MAX_POINT_LIGHT_SHADOW_MAPS` point light shadows and // can be used to render at most `MAX_POINT_LIGHT_SHADOW_MAPS` point light shadows and
// we keep a stable set of lights visible // we keep a stable set of lights visible
@ -502,10 +724,20 @@ pub(crate) struct PointLightAssignmentData {
} }
// NOTE: Run this before update_point_light_frusta! // NOTE: Run this before update_point_light_frusta!
#[allow(clippy::too_many_arguments)]
pub(crate) fn assign_lights_to_clusters( pub(crate) fn assign_lights_to_clusters(
mut commands: Commands, mut commands: Commands,
mut global_lights: ResMut<VisiblePointLights>, mut global_lights: ResMut<VisiblePointLights>,
mut views: Query<(Entity, &GlobalTransform, &Camera, &Frustum, &mut Clusters)>, windows: Res<Windows>,
images: Res<Assets<Image>>,
mut views: Query<(
Entity,
&GlobalTransform,
&Camera,
&Frustum,
&ClusterConfig,
&mut Clusters,
)>,
lights_query: Query<(Entity, &GlobalTransform, &PointLight, &Visibility)>, lights_query: Query<(Entity, &GlobalTransform, &PointLight, &Visibility)>,
mut lights: Local<Vec<PointLightAssignmentData>>, mut lights: Local<Vec<PointLightAssignmentData>>,
mut max_point_lights_warning_emitted: Local<bool>, mut max_point_lights_warning_emitted: Local<bool>,
@ -534,7 +766,10 @@ pub(crate) fn assign_lights_to_clusters(
}); });
// check each light against each view's frustum, keep only those that affect at least one of our views // check each light against each view's frustum, keep only those that affect at least one of our views
let frusta: Vec<_> = views.iter().map(|(_, _, _, frustum, _)| *frustum).collect(); let frusta: Vec<_> = views
.iter()
.map(|(_, _, _, frustum, _, _)| *frustum)
.collect();
let mut lights_in_view_count = 0; let mut lights_in_view_count = 0;
lights.retain(|light| { lights.retain(|light| {
// take one extra light to check if we should emit the warning // take one extra light to check if we should emit the warning
@ -568,19 +803,141 @@ pub(crate) fn assign_lights_to_clusters(
let light_count = lights.len(); let light_count = lights.len();
let mut global_lights_set = HashSet::with_capacity(light_count); let mut global_lights_set = HashSet::with_capacity(light_count);
for (view_entity, view_transform, camera, frustum, mut clusters) in views.iter_mut() { for (view_entity, view_transform, camera, frustum, config, mut clusters) in views.iter_mut() {
if matches!(config, ClusterConfig::None) {
commands.entity(view_entity).remove::<VisiblePointLights>();
continue;
}
let view_transform = view_transform.compute_matrix(); let view_transform = view_transform.compute_matrix();
let inverse_view_transform = view_transform.inverse(); let inverse_view_transform = view_transform.inverse();
let cluster_count = clusters.aabbs.len();
let is_orthographic = camera.projection_matrix.w_axis.w == 1.0; let is_orthographic = camera.projection_matrix.w_axis.w == 1.0;
let screen_size_u32 = camera.target.get_physical_size(&windows, &images);
let screen_size_u32 = screen_size_u32.unwrap_or_default();
if screen_size_u32.x == 0 || screen_size_u32.y == 0 {
continue;
}
let mut cluster_dimensions = config.dimensions_for_screen_size(screen_size_u32);
let far_z = match config.far_z_mode() {
ClusterFarZMode::CameraFarPlane => camera.far,
ClusterFarZMode::MaxLightRange => {
let inverse_view_row_2 = inverse_view_transform.row(2);
lights
.iter()
.map(|light| {
-inverse_view_row_2.dot(light.translation.extend(1.0)) + light.range
})
.reduce(f32::max)
.unwrap_or(0.0)
}
ClusterFarZMode::Constant(far) => far,
};
let first_slice_depth = match cluster_dimensions.z {
1 => config.first_slice_depth().max(far_z),
_ => config.first_slice_depth(),
};
// NOTE: Ensure the far_z is at least as far as the first_depth_slice to avoid clustering problems.
let far_z = far_z.max(first_slice_depth);
let cluster_factors = calculate_cluster_factors( let cluster_factors = calculate_cluster_factors(
// NOTE: Using the special cluster near value first_slice_depth,
clusters.near, far_z,
camera.far, cluster_dimensions.z as f32,
clusters.axis_slices.z as f32,
is_orthographic, is_orthographic,
); );
let max_indices = ViewClusterBindings::MAX_INDICES;
if config.dynamic_resizing() {
let mut cluster_index_estimate = 0.0;
for light in lights.iter() {
let light_sphere = Sphere {
center: light.translation,
radius: light.range,
};
// Check if the light is within the view frustum
if !frustum.intersects_sphere(&light_sphere) {
continue;
}
// calculate a conservative aabb estimate of number of clusters affected by this light
// this overestimates index counts by at most 50% (and typically much less) when the whole light range is in view
// it can overestimate more significantly when light ranges are only partially in view
let (light_aabb_min, light_aabb_max) = cluster_space_light_aabb(
inverse_view_transform,
camera.projection_matrix,
&light_sphere,
);
// since we won't adjust z slices we can calculate exact number of slices required in z dimension
let z_cluster_min = view_z_to_z_slice(
cluster_factors,
cluster_dimensions.z as f32,
light_aabb_min.z,
is_orthographic,
);
let z_cluster_max = view_z_to_z_slice(
cluster_factors,
cluster_dimensions.z as f32,
light_aabb_max.z,
is_orthographic,
);
let z_count =
z_cluster_min.max(z_cluster_max) - z_cluster_min.min(z_cluster_max) + 1;
// calculate x/y count using floats to avoid overestimating counts due to large initial tile sizes
let xy_min = light_aabb_min.xy();
let xy_max = light_aabb_max.xy();
// multiply by 0.5 to move from [-1,1] to [-0.5, 0.5], max extent of 1 in each dimension
let xy_count = (xy_max - xy_min)
* 0.5
* Vec2::new(cluster_dimensions.x as f32, cluster_dimensions.y as f32);
// add up to 2 to each axis to account for overlap
let x_overlap = if xy_min.x <= -1.0 { 0.0 } else { 1.0 }
+ if xy_max.x >= 1.0 { 0.0 } else { 1.0 };
let y_overlap = if xy_min.y <= -1.0 { 0.0 } else { 1.0 }
+ if xy_max.y >= 1.0 { 0.0 } else { 1.0 };
cluster_index_estimate +=
(xy_count.x + x_overlap) * (xy_count.y + y_overlap) * z_count as f32;
}
if cluster_index_estimate > max_indices as f32 {
// scale x and y cluster count to be able to fit all our indices
// we take the ratio of the actual indices over the index estimate.
// this not not guaranteed to be small enough due to overlapped tiles, but
// the conservative estimate is more than sufficient to cover the
// difference
let index_ratio = max_indices as f32 / cluster_index_estimate as f32;
let xy_ratio = index_ratio.sqrt();
cluster_dimensions.x =
((cluster_dimensions.x as f32 * xy_ratio).floor() as u32).max(1);
cluster_dimensions.y =
((cluster_dimensions.y as f32 * xy_ratio).floor() as u32).max(1);
}
}
update_clusters(
screen_size_u32,
camera,
cluster_dimensions,
&mut clusters,
first_slice_depth,
far_z,
);
// NOTE: This is here to avoid bugs in future due to update_clusters() having updated clusters.axis_slices
// but cluster_dimensions has a different configuration.
#[allow(unused_assignments)]
{
cluster_dimensions = clusters.axis_slices;
}
let cluster_count = clusters.aabbs.len();
let mut clusters_lights = let mut clusters_lights =
vec![VisiblePointLights::from_light_count(light_count); cluster_count]; vec![VisiblePointLights::from_light_count(light_count); cluster_count];
let mut visible_lights = Vec::with_capacity(light_count); let mut visible_lights = Vec::with_capacity(light_count);
@ -600,77 +957,31 @@ pub(crate) fn assign_lights_to_clusters(
global_lights_set.insert(light.entity); global_lights_set.insert(light.entity);
visible_lights.push(light.entity); visible_lights.push(light.entity);
// Calculate an AABB for the light in view space, find the corresponding clusters for the min and max // note: caching seems to be slower than calling twice for this aabb calculation
// points of the AABB, then iterate over just those clusters for this light let (light_aabb_xy_ndc_z_view_min, light_aabb_xy_ndc_z_view_max) =
let light_aabb_view = Aabb { cluster_space_light_aabb(
center: (inverse_view_transform * light_sphere.center.extend(1.0)).xyz(), inverse_view_transform,
half_extents: Vec3::splat(light_sphere.radius), camera.projection_matrix,
}; &light_sphere,
let (light_aabb_view_min, light_aabb_view_max) =
(light_aabb_view.min(), light_aabb_view.max());
// Is there a cheaper way to do this? The problem is that because of perspective
// the point at max z but min xy may be less xy in screenspace, and similar. As
// such, projecting the min and max xy at both the closer and further z and taking
// the min and max of those projected points addresses this.
let (
light_aabb_view_xymin_near,
light_aabb_view_xymin_far,
light_aabb_view_xymax_near,
light_aabb_view_xymax_far,
) = (
light_aabb_view_min,
light_aabb_view_min.xy().extend(light_aabb_view_max.z),
light_aabb_view_max.xy().extend(light_aabb_view_min.z),
light_aabb_view_max,
);
let (
light_aabb_clip_xymin_near,
light_aabb_clip_xymin_far,
light_aabb_clip_xymax_near,
light_aabb_clip_xymax_far,
) = (
camera.projection_matrix * light_aabb_view_xymin_near.extend(1.0),
camera.projection_matrix * light_aabb_view_xymin_far.extend(1.0),
camera.projection_matrix * light_aabb_view_xymax_near.extend(1.0),
camera.projection_matrix * light_aabb_view_xymax_far.extend(1.0),
);
let (
light_aabb_ndc_xymin_near,
light_aabb_ndc_xymin_far,
light_aabb_ndc_xymax_near,
light_aabb_ndc_xymax_far,
) = (
light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w,
light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w,
light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w,
light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w,
);
let (light_aabb_ndc_min, light_aabb_ndc_max) = (
light_aabb_ndc_xymin_near
.min(light_aabb_ndc_xymin_far)
.min(light_aabb_ndc_xymax_near)
.min(light_aabb_ndc_xymax_far),
light_aabb_ndc_xymin_near
.max(light_aabb_ndc_xymin_far)
.max(light_aabb_ndc_xymax_near)
.max(light_aabb_ndc_xymax_far),
); );
let min_cluster = ndc_position_to_cluster( let min_cluster = ndc_position_to_cluster(
clusters.axis_slices, clusters.axis_slices,
cluster_factors, cluster_factors,
is_orthographic, is_orthographic,
light_aabb_ndc_min, light_aabb_xy_ndc_z_view_min,
light_aabb_view_min.z, light_aabb_xy_ndc_z_view_min.z,
); );
let max_cluster = ndc_position_to_cluster( let max_cluster = ndc_position_to_cluster(
clusters.axis_slices, clusters.axis_slices,
cluster_factors, cluster_factors,
is_orthographic, is_orthographic,
light_aabb_ndc_max, light_aabb_xy_ndc_z_view_max,
light_aabb_view_max.z, light_aabb_xy_ndc_z_view_max.z,
); );
let (min_cluster, max_cluster) = let (min_cluster, max_cluster) =
(min_cluster.min(max_cluster), min_cluster.max(max_cluster)); (min_cluster.min(max_cluster), min_cluster.max(max_cluster));
for y in min_cluster.y..=max_cluster.y { for y in min_cluster.y..=max_cluster.y {
let row_offset = y * clusters.axis_slices.x; let row_offset = y * clusters.axis_slices.x;
for x in min_cluster.x..=max_cluster.x { for x in min_cluster.x..=max_cluster.x {

View File

@ -303,6 +303,7 @@ impl SpecializedMeshPipeline for ShadowPipeline {
pub struct ExtractedClusterConfig { pub struct ExtractedClusterConfig {
/// Special near value for cluster calculations /// Special near value for cluster calculations
near: f32, near: f32,
far: f32,
/// Number of clusters in x / y / z in the view frustum /// Number of clusters in x / y / z in the view frustum
axis_slices: UVec3, axis_slices: UVec3,
} }
@ -320,6 +321,7 @@ pub fn extract_clusters(mut commands: Commands, views: Query<(Entity, &Clusters)
}, },
ExtractedClusterConfig { ExtractedClusterConfig {
near: clusters.near, near: clusters.near,
far: clusters.far,
axis_slices: clusters.axis_slices, axis_slices: clusters.axis_slices,
}, },
)); ));
@ -670,7 +672,7 @@ pub fn prepare_lights(
let is_orthographic = extracted_view.projection.w_axis.w == 1.0; let is_orthographic = extracted_view.projection.w_axis.w == 1.0;
let cluster_factors_zw = calculate_cluster_factors( let cluster_factors_zw = calculate_cluster_factors(
clusters.near, clusters.near,
extracted_view.far, clusters.far,
clusters.axis_slices.z as f32, clusters.axis_slices.z as f32,
is_orthographic, is_orthographic,
); );
@ -924,7 +926,7 @@ pub struct ViewClusterBindings {
impl ViewClusterBindings { impl ViewClusterBindings {
pub const MAX_OFFSETS: usize = 16384 / 4; pub const MAX_OFFSETS: usize = 16384 / 4;
const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4; const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4;
const MAX_INDICES: usize = 16384; pub const MAX_INDICES: usize = 16384;
pub fn reserve_and_clear(&mut self) { pub fn reserve_and_clear(&mut self) {
self.cluster_light_index_lists.clear(); self.cluster_light_index_lists.clear();