From 3f6068da3db8038ab69706a40ba714cfd836238d Mon Sep 17 00:00:00 2001 From: robtfm <50659922+robtfm@users.noreply.github.com> Date: Tue, 1 Mar 2022 10:17:41 +0000 Subject: [PATCH] fix issues with too many point lights (#3916) # Objective fix #3915 ## Solution the issues are caused by - lights are assigned to clusters before being filtered down to MAX_POINT_LIGHTS, leading to cluster counts potentially being too high - after fixing the above, packing the count into 8 bits still causes overflow with exactly 256 lights affecting a cluster to fix: ```assign_lights_to_clusters``` - limit extracted lights to MAX_POINT_LIGHTS, selecting based on shadow-caster & intensity (if required) - warn if MAX_POINT_LIGHT count is exceeded ```prepare_lights``` - limit the lights assigned to a cluster to CLUSTER_COUNT_MASK (which is 1 less than MAX_POINT_LIGHTS) to avoid overflowing into the offset bits notes: - a better solution to the overflow may be to use more than 8 bits for cluster_count (the comment states only 14 of the remaining 24 bits are used for the offset). this would touch more of the code base but i'm happy to try if it has some benefit. - intensity is only one way to select lights. it may be worth allowing user configuration of the light filtering, but i can't see a clean way to do that --- crates/bevy_pbr/src/light.rs | 97 ++++++++++++++++++++++++++--- crates/bevy_pbr/src/render/light.rs | 34 +++++----- crates/bevy_pbr/src/render/pbr.wgsl | 6 +- 3 files changed, 111 insertions(+), 26 deletions(-) diff --git a/crates/bevy_pbr/src/light.rs b/crates/bevy_pbr/src/light.rs index 6c0d761017..33781bf774 100644 --- a/crates/bevy_pbr/src/light.rs +++ b/crates/bevy_pbr/src/light.rs @@ -12,11 +12,12 @@ use bevy_render::{ view::{ComputedVisibility, RenderLayers, Visibility, VisibleEntities}, }; use bevy_transform::components::GlobalTransform; +use bevy_utils::tracing::warn; use bevy_window::Windows; use crate::{ calculate_cluster_factors, CubeMapFace, CubemapVisibleEntities, ViewClusterBindings, - CUBE_MAP_FACES, POINT_LIGHT_NEAR_Z, + CUBE_MAP_FACES, MAX_POINT_LIGHTS, POINT_LIGHT_NEAR_Z, }; /// A light that emits light in all directions from a central point. @@ -478,14 +479,91 @@ fn ndc_position_to_cluster( .clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE) } +// Sort point lights with shadows enabled first, then by a stable key so that the index +// can be used to render at most `MAX_POINT_LIGHT_SHADOW_MAPS` point light shadows, and +// we keep a stable set of lights visible +pub(crate) fn point_light_order( + (entity_1, shadows_enabled_1): (&Entity, &bool), + (entity_2, shadows_enabled_2): (&Entity, &bool), +) -> std::cmp::Ordering { + shadows_enabled_1 + .cmp(shadows_enabled_2) + .reverse() + .then_with(|| entity_1.cmp(entity_2)) +} + +#[derive(Clone, Copy)] +// data required for assigning lights to clusters +pub(crate) struct PointLightAssignmentData { + entity: Entity, + translation: Vec3, + range: f32, + shadows_enabled: bool, +} + // NOTE: Run this before update_point_light_frusta! -pub fn assign_lights_to_clusters( +pub(crate) fn assign_lights_to_clusters( mut commands: Commands, mut global_lights: ResMut, mut views: Query<(Entity, &GlobalTransform, &Camera, &Frustum, &mut Clusters)>, - lights: Query<(Entity, &GlobalTransform, &PointLight)>, + lights_query: Query<(Entity, &GlobalTransform, &PointLight)>, + mut lights: Local>, + mut max_point_lights_warning_emitted: Local, ) { - let light_count = lights.iter().count(); + // collect just the relevant light query data into a persisted vec to avoid reallocating each frame + lights.extend( + lights_query + .iter() + .map(|(entity, transform, light)| PointLightAssignmentData { + entity, + translation: transform.translation, + shadows_enabled: light.shadows_enabled, + range: light.range, + }), + ); + + if lights.len() > MAX_POINT_LIGHTS { + lights.sort_by(|light_1, light_2| { + point_light_order( + (&light_1.entity, &light_1.shadows_enabled), + (&light_2.entity, &light_2.shadows_enabled), + ) + }); + + // check each light against each view's frustum, keep only those that affect at least one of our views + let frusta: Vec<_> = views.iter().map(|(_, _, _, frustum, _)| *frustum).collect(); + let mut lights_in_view_count = 0; + lights.retain(|light| { + // take one extra light to check if we should emit the warning + if lights_in_view_count == MAX_POINT_LIGHTS + 1 { + false + } else { + let light_sphere = Sphere { + center: light.translation, + radius: light.range, + }; + + let light_in_view = frusta + .iter() + .any(|frustum| frustum.intersects_sphere(&light_sphere)); + + if light_in_view { + lights_in_view_count += 1; + } + + light_in_view + } + }); + + if lights.len() > MAX_POINT_LIGHTS && !*max_point_lights_warning_emitted { + warn!("MAX_POINT_LIGHTS ({}) exceeded", MAX_POINT_LIGHTS); + *max_point_lights_warning_emitted = true; + } + + lights.truncate(MAX_POINT_LIGHTS); + } + + let light_count = lights.len(); let mut global_lights_set = HashSet::with_capacity(light_count); for (view_entity, view_transform, camera, frustum, mut clusters) in views.iter_mut() { let view_transform = view_transform.compute_matrix(); @@ -504,9 +582,9 @@ pub fn assign_lights_to_clusters( vec![VisiblePointLights::from_light_count(light_count); cluster_count]; let mut visible_lights = Vec::with_capacity(light_count); - for (light_entity, light_transform, light) in lights.iter() { + for light in lights.iter() { let light_sphere = Sphere { - center: light_transform.translation, + center: light.translation, radius: light.range, }; @@ -516,8 +594,8 @@ pub fn assign_lights_to_clusters( } // NOTE: The light intersects the frustum so it must be visible and part of the global set - global_lights_set.insert(light_entity); - visible_lights.push(light_entity); + global_lights_set.insert(light.entity); + visible_lights.push(light.entity); // Calculate an AABB for the light in view space, find the corresponding clusters for the min and max // points of the AABB, then iterate over just those clusters for this light @@ -599,7 +677,7 @@ pub fn assign_lights_to_clusters( let cluster_index = (col_offset + z) as usize; let cluster_aabb = &clusters.aabbs[cluster_index]; if light_sphere.intersects_obb(cluster_aabb, &view_transform) { - clusters_lights[cluster_index].entities.push(light_entity); + clusters_lights[cluster_index].entities.push(light.entity); } } } @@ -617,6 +695,7 @@ pub fn assign_lights_to_clusters( }); } global_lights.entities = global_lights_set.into_iter().collect(); + lights.clear(); } pub fn update_directional_light_frusta( diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index 227c0eeeee..e5d09ea18d 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -1,7 +1,7 @@ use crate::{ - AmbientLight, Clusters, CubemapVisibleEntities, DirectionalLight, DirectionalLightShadowMap, - DrawMesh, MeshPipeline, NotShadowCaster, PointLight, PointLightShadowMap, SetMeshBindGroup, - VisiblePointLights, SHADOW_SHADER_HANDLE, + point_light_order, AmbientLight, Clusters, CubemapVisibleEntities, DirectionalLight, + DirectionalLightShadowMap, DrawMesh, MeshPipeline, NotShadowCaster, PointLight, + PointLightShadowMap, SetMeshBindGroup, VisiblePointLights, SHADOW_SHADER_HANDLE, }; use bevy_asset::Handle; use bevy_core::FloatOrd; @@ -574,11 +574,10 @@ pub fn prepare_lights( // Sort point lights with shadows enabled first, then by a stable key so that the index can be used // to render at most `MAX_POINT_LIGHT_SHADOW_MAPS` point light shadows. point_lights.sort_by(|(entity_1, light_1), (entity_2, light_2)| { - light_1 - .shadows_enabled - .cmp(&light_2.shadows_enabled) - .reverse() - .then_with(|| entity_1.cmp(entity_2)) + point_light_order( + (entity_1, &light_1.shadows_enabled), + (entity_2, &light_2.shadows_enabled), + ) }); if global_light_meta.entity_to_index.capacity() < point_lights.len() { @@ -588,7 +587,7 @@ pub fn prepare_lights( } let mut gpu_point_lights = [GpuPointLight::default(); MAX_POINT_LIGHTS]; - for (index, &(entity, light)) in point_lights.iter().enumerate().take(MAX_POINT_LIGHTS) { + for (index, &(entity, light)) in point_lights.iter().enumerate() { let mut flags = PointLightFlags::NONE; // Lights are sorted, shadow enabled lights are first if light.shadows_enabled && index < MAX_POINT_LIGHT_SHADOW_MAPS { @@ -877,20 +876,25 @@ pub fn prepare_lights( .write_buffer(&render_device, &render_queue); } -const CLUSTER_OFFSET_MASK: u32 = (1 << 24) - 1; -const CLUSTER_COUNT_SIZE: u32 = 8; -const CLUSTER_COUNT_MASK: u32 = (1 << 8) - 1; +// this must match CLUSTER_COUNT_SIZE in pbr.wgsl +// and must be large enough to contain MAX_POINT_LIGHTS +const CLUSTER_COUNT_SIZE: u32 = 13; + +const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - CLUSTER_COUNT_SIZE)) - 1; +const CLUSTER_COUNT_MASK: u32 = (1 << CLUSTER_COUNT_SIZE) - 1; const POINT_LIGHT_INDEX_MASK: u32 = (1 << 8) - 1; // NOTE: With uniform buffer max binding size as 16384 bytes // that means we can fit say 256 point lights in one uniform // buffer, which means the count can be at most 256 so it -// needs 8 bits. +// needs 9 bits. // The array of indices can also use u8 and that means the // offset in to the array of indices needs to be able to address // 16384 values. log2(16384) = 14 bits. -// This means we can pack the offset into the upper 24 bits of a u32 -// and the count into the lower 8 bits. +// We use 32 bits to store the pair, so we choose to divide the +// remaining 9 bits proportionally to give some future room. +// This means we can pack the offset into the upper 19 bits of a u32 +// and the count into the lower 13 bits. // NOTE: This assumes CPU and GPU endianness are the same which is true // for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs fn pack_offset_and_count(offset: usize, count: usize) -> u32 { diff --git a/crates/bevy_pbr/src/render/pbr.wgsl b/crates/bevy_pbr/src/render/pbr.wgsl index abf9dfaae7..073a77fb15 100644 --- a/crates/bevy_pbr/src/render/pbr.wgsl +++ b/crates/bevy_pbr/src/render/pbr.wgsl @@ -267,13 +267,15 @@ struct ClusterOffsetAndCount { count: u32; }; +// this must match CLUSTER_COUNT_SIZE in light.rs +let CLUSTER_COUNT_SIZE = 13u; fn unpack_offset_and_count(cluster_index: u32) -> ClusterOffsetAndCount { let offset_and_count = cluster_offsets_and_counts.data[cluster_index >> 2u][cluster_index & ((1u << 2u) - 1u)]; var output: ClusterOffsetAndCount; // The offset is stored in the upper 24 bits - output.offset = (offset_and_count >> 8u) & ((1u << 24u) - 1u); + output.offset = (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u); // The count is stored in the lower 8 bits - output.count = offset_and_count & ((1u << 8u) - 1u); + output.count = offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u); return output; }