Use storage buffers for clustered forward point lights (#3989)

# Objective

- Make use of storage buffers, where they are available, for clustered forward bindings to support far more point lights in a scene
- Fixes #3605 
- Based on top of #4079 

This branch on an M1 Max can keep 60fps with about 2150 point lights of radius 1m in the Sponza scene where I've been testing. The bottleneck is mostly assigning lights to clusters which grows faster than linearly (I think 1000 lights was about 1.5ms and 5000 was 7.5ms). I have seen papers and presentations leveraging compute shaders that can get this up to over 1 million. That said, I think any further optimisations should probably be done in a separate PR.

## Solution

- Add `RenderDevice` to the `Material` and `SpecializedMaterial` trait `::key()` functions to allow setting flags on the keys depending on feature/limit availability
- Make `GpuPointLights` and `ViewClusterBuffers` into enums containing `UniformVec` and `StorageBuffer` variants. Implement the necessary API on them to make usage the same for both cases, and the only difference is at initialisation time.
- Appropriate shader defs in the shader code to handle the two cases

## Context on some decisions / open questions

- I'm using `max_storage_buffers_per_shader_stage >= 3` as a check to see if storage buffers are supported. I was thinking about diving into 'binding resource management' but it feels like we don't have enough use cases to understand the problem yet, and it is mostly a separate concern to this PR, so I think it should be handled separately.
- Should `ViewClusterBuffers` and `ViewClusterBindings` be merged, duplicating the count variables into the enum variants?


Co-authored-by: Carter Anderson <mcanders1@gmail.com>
This commit is contained in:
Robert Swain 2022-04-07 16:16:35 +00:00
parent 579928e8e0
commit c5963b4fd5
16 changed files with 622 additions and 132 deletions

View File

@ -612,7 +612,10 @@ min_sdk_version = 16
target_sdk_version = 29 target_sdk_version = 29
# Stress Tests # Stress Tests
[[example]]
name = "many_lights"
path = "examples/stress_tests/many_lights.rs"
[[example]] [[example]]
name = "transform_hierarchy" name = "transform_hierarchy"
path = "examples/stress_tests/transform_hierarchy.rs" path = "examples/stress_tests/transform_hierarchy.rs"

View File

@ -150,12 +150,10 @@ impl Plugin for PbrPlugin {
) )
.add_system_to_stage( .add_system_to_stage(
RenderStage::Prepare, RenderStage::Prepare,
// this is added as an exclusive system because it contributes new views. it must run (and have Commands applied) // NOTE: This needs to run after prepare_lights. As prepare_lights is an exclusive system,
// _before_ the `prepare_views()` system is run. ideally this becomes a normal system when "stageless" features come out // just adding it to the non-exclusive systems in the Prepare stage means it runs after
render::prepare_clusters // prepare_lights.
.exclusive_system() render::prepare_clusters.label(RenderLightSystems::PrepareClusters),
.label(RenderLightSystems::PrepareClusters)
.after(RenderLightSystems::PrepareLights),
) )
.add_system_to_stage( .add_system_to_stage(
RenderStage::Queue, RenderStage::Queue,

View File

@ -9,6 +9,8 @@ use bevy_render::{
color::Color, color::Color,
prelude::Image, prelude::Image,
primitives::{Aabb, CubemapFrusta, Frustum, Sphere}, primitives::{Aabb, CubemapFrusta, Frustum, Sphere},
render_resource::BufferBindingType,
renderer::RenderDevice,
view::{ComputedVisibility, RenderLayers, Visibility, VisibleEntities}, view::{ComputedVisibility, RenderLayers, Visibility, VisibleEntities},
}; };
use bevy_transform::components::GlobalTransform; use bevy_transform::components::GlobalTransform;
@ -17,7 +19,8 @@ use bevy_window::Windows;
use crate::{ use crate::{
calculate_cluster_factors, CubeMapFace, CubemapVisibleEntities, ViewClusterBindings, calculate_cluster_factors, CubeMapFace, CubemapVisibleEntities, ViewClusterBindings,
CUBE_MAP_FACES, MAX_POINT_LIGHTS, POINT_LIGHT_NEAR_Z, CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, CUBE_MAP_FACES, MAX_UNIFORM_BUFFER_POINT_LIGHTS,
POINT_LIGHT_NEAR_Z,
}; };
/// A light that emits light in all directions from a central point. /// A light that emits light in all directions from a central point.
@ -709,6 +712,7 @@ pub(crate) fn assign_lights_to_clusters(
lights_query: Query<(Entity, &GlobalTransform, &PointLight, &Visibility)>, lights_query: Query<(Entity, &GlobalTransform, &PointLight, &Visibility)>,
mut lights: Local<Vec<PointLightAssignmentData>>, mut lights: Local<Vec<PointLightAssignmentData>>,
mut max_point_lights_warning_emitted: Local<bool>, mut max_point_lights_warning_emitted: Local<bool>,
render_device: Res<RenderDevice>,
) { ) {
global_lights.entities.clear(); global_lights.entities.clear();
lights.clear(); lights.clear();
@ -727,7 +731,13 @@ pub(crate) fn assign_lights_to_clusters(
), ),
); );
if lights.len() > MAX_POINT_LIGHTS { let clustered_forward_buffer_binding_type =
render_device.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT);
let supports_storage_buffers = matches!(
clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. }
);
if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !supports_storage_buffers {
lights.sort_by(|light_1, light_2| { lights.sort_by(|light_1, light_2| {
point_light_order( point_light_order(
(&light_1.entity, &light_1.shadows_enabled), (&light_1.entity, &light_1.shadows_enabled),
@ -743,7 +753,7 @@ pub(crate) fn assign_lights_to_clusters(
let mut lights_in_view_count = 0; let mut lights_in_view_count = 0;
lights.retain(|light| { lights.retain(|light| {
// take one extra light to check if we should emit the warning // take one extra light to check if we should emit the warning
if lights_in_view_count == MAX_POINT_LIGHTS + 1 { if lights_in_view_count == MAX_UNIFORM_BUFFER_POINT_LIGHTS + 1 {
false false
} else { } else {
let light_sphere = Sphere { let light_sphere = Sphere {
@ -763,12 +773,15 @@ pub(crate) fn assign_lights_to_clusters(
} }
}); });
if lights.len() > MAX_POINT_LIGHTS && !*max_point_lights_warning_emitted { if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !*max_point_lights_warning_emitted {
warn!("MAX_POINT_LIGHTS ({}) exceeded", MAX_POINT_LIGHTS); warn!(
"MAX_UNIFORM_BUFFER_POINT_LIGHTS ({}) exceeded",
MAX_UNIFORM_BUFFER_POINT_LIGHTS
);
*max_point_lights_warning_emitted = true; *max_point_lights_warning_emitted = true;
} }
lights.truncate(MAX_POINT_LIGHTS); lights.truncate(MAX_UNIFORM_BUFFER_POINT_LIGHTS);
} }
for (view_entity, camera_transform, camera, frustum, config, clusters, mut visible_lights) in for (view_entity, camera_transform, camera, frustum, config, clusters, mut visible_lights) in

View File

@ -39,7 +39,7 @@ use std::marker::PhantomData;
/// way to render [`Mesh`] entities with custom shader logic. For materials that can specialize their [`RenderPipelineDescriptor`] /// way to render [`Mesh`] entities with custom shader logic. For materials that can specialize their [`RenderPipelineDescriptor`]
/// based on specific material values, see [`SpecializedMaterial`]. [`Material`] automatically implements [`SpecializedMaterial`] /// based on specific material values, see [`SpecializedMaterial`]. [`Material`] automatically implements [`SpecializedMaterial`]
/// and can be used anywhere that type is used (such as [`MaterialPlugin`]). /// and can be used anywhere that type is used (such as [`MaterialPlugin`]).
pub trait Material: Asset + RenderAsset { pub trait Material: Asset + RenderAsset + Sized {
/// Returns this material's [`BindGroup`]. This should match the layout returned by [`Material::bind_group_layout`]. /// Returns this material's [`BindGroup`]. This should match the layout returned by [`Material::bind_group_layout`].
fn bind_group(material: &<Self as RenderAsset>::PreparedAsset) -> &BindGroup; fn bind_group(material: &<Self as RenderAsset>::PreparedAsset) -> &BindGroup;
@ -78,6 +78,7 @@ pub trait Material: Asset + RenderAsset {
#[allow(unused_variables)] #[allow(unused_variables)]
#[inline] #[inline]
fn specialize( fn specialize(
pipeline: &MaterialPipeline<Self>,
descriptor: &mut RenderPipelineDescriptor, descriptor: &mut RenderPipelineDescriptor,
layout: &MeshVertexBufferLayout, layout: &MeshVertexBufferLayout,
) -> Result<(), SpecializedMeshPipelineError> { ) -> Result<(), SpecializedMeshPipelineError> {
@ -93,11 +94,12 @@ impl<M: Material> SpecializedMaterial for M {
#[inline] #[inline]
fn specialize( fn specialize(
pipeline: &MaterialPipeline<Self>,
descriptor: &mut RenderPipelineDescriptor, descriptor: &mut RenderPipelineDescriptor,
_key: Self::Key, _key: Self::Key,
layout: &MeshVertexBufferLayout, layout: &MeshVertexBufferLayout,
) -> Result<(), SpecializedMeshPipelineError> { ) -> Result<(), SpecializedMeshPipelineError> {
<M as Material>::specialize(descriptor, layout) <M as Material>::specialize(pipeline, descriptor, layout)
} }
#[inline] #[inline]
@ -137,7 +139,7 @@ impl<M: Material> SpecializedMaterial for M {
/// way to render [`Mesh`] entities with custom shader logic. [`SpecializedMaterials`](SpecializedMaterial) use their [`SpecializedMaterial::Key`] /// way to render [`Mesh`] entities with custom shader logic. [`SpecializedMaterials`](SpecializedMaterial) use their [`SpecializedMaterial::Key`]
/// to customize their [`RenderPipelineDescriptor`] based on specific material values. The slightly simpler [`Material`] trait /// to customize their [`RenderPipelineDescriptor`] based on specific material values. The slightly simpler [`Material`] trait
/// should be used for materials that do not need specialization. [`Material`] types automatically implement [`SpecializedMaterial`]. /// should be used for materials that do not need specialization. [`Material`] types automatically implement [`SpecializedMaterial`].
pub trait SpecializedMaterial: Asset + RenderAsset { pub trait SpecializedMaterial: Asset + RenderAsset + Sized {
/// The key used to specialize this material's [`RenderPipelineDescriptor`]. /// The key used to specialize this material's [`RenderPipelineDescriptor`].
type Key: PartialEq + Eq + Hash + Clone + Send + Sync; type Key: PartialEq + Eq + Hash + Clone + Send + Sync;
@ -148,6 +150,7 @@ pub trait SpecializedMaterial: Asset + RenderAsset {
/// Specializes the given `descriptor` according to the given `key`. /// Specializes the given `descriptor` according to the given `key`.
fn specialize( fn specialize(
pipeline: &MaterialPipeline<Self>,
descriptor: &mut RenderPipelineDescriptor, descriptor: &mut RenderPipelineDescriptor,
key: Self::Key, key: Self::Key,
layout: &MeshVertexBufferLayout, layout: &MeshVertexBufferLayout,
@ -251,7 +254,7 @@ impl<M: SpecializedMaterial> SpecializedMeshPipeline for MaterialPipeline<M> {
let descriptor_layout = descriptor.layout.as_mut().unwrap(); let descriptor_layout = descriptor.layout.as_mut().unwrap();
descriptor_layout.insert(1, self.material_layout.clone()); descriptor_layout.insert(1, self.material_layout.clone());
M::specialize(&mut descriptor, key.material_key, layout)?; M::specialize(self, &mut descriptor, key.material_key, layout)?;
Ok(descriptor) Ok(descriptor)
} }
} }

View File

@ -378,6 +378,7 @@ impl SpecializedMaterial for StandardMaterial {
} }
fn specialize( fn specialize(
_pipeline: &MaterialPipeline<Self>,
descriptor: &mut RenderPipelineDescriptor, descriptor: &mut RenderPipelineDescriptor,
key: Self::Key, key: Self::Key,
_layout: &MeshVertexBufferLayout, _layout: &MeshVertexBufferLayout,

View File

@ -10,7 +10,7 @@ use bevy_ecs::{
prelude::*, prelude::*,
system::{lifetimeless::*, SystemParamItem}, system::{lifetimeless::*, SystemParamItem},
}; };
use bevy_math::{const_vec3, Mat4, UVec3, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles}; use bevy_math::{const_vec3, Mat4, UVec2, UVec3, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles};
use bevy_render::{ use bevy_render::{
camera::{Camera, CameraProjection}, camera::{Camera, CameraProjection},
color::Color, color::Color,
@ -22,7 +22,7 @@ use bevy_render::{
EntityRenderCommand, PhaseItem, RenderCommandResult, RenderPhase, SetItemPipeline, EntityRenderCommand, PhaseItem, RenderCommandResult, RenderPhase, SetItemPipeline,
TrackedRenderPass, TrackedRenderPass,
}, },
render_resource::{std140::AsStd140, *}, render_resource::{std140::AsStd140, std430::AsStd430, *},
renderer::{RenderContext, RenderDevice, RenderQueue}, renderer::{RenderContext, RenderDevice, RenderQueue},
texture::*, texture::*,
view::{ view::{
@ -81,7 +81,7 @@ pub struct ExtractedDirectionalLight {
pub type ExtractedDirectionalLightShadowMap = DirectionalLightShadowMap; pub type ExtractedDirectionalLightShadowMap = DirectionalLightShadowMap;
#[repr(C)] #[repr(C)]
#[derive(Copy, Clone, AsStd140, Default, Debug)] #[derive(Copy, Clone, AsStd140, AsStd430, Default, Debug)]
pub struct GpuPointLight { pub struct GpuPointLight {
// The lower-right 2x2 values of the projection matrix 22 23 32 33 // The lower-right 2x2 values of the projection matrix 22 23 32 33
projection_lr: Vec4, projection_lr: Vec4,
@ -92,9 +92,84 @@ pub struct GpuPointLight {
shadow_normal_bias: f32, shadow_normal_bias: f32,
} }
#[derive(AsStd140)] pub enum GpuPointLights {
pub struct GpuPointLights { Uniform {
data: [GpuPointLight; MAX_POINT_LIGHTS], buffer: UniformVec<[GpuPointLight; MAX_UNIFORM_BUFFER_POINT_LIGHTS]>,
},
Storage {
buffer: StorageBuffer<GpuPointLight>,
},
}
impl GpuPointLights {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
Self::Uniform {
buffer: UniformVec::default(),
}
}
fn storage() -> Self {
Self::Storage {
buffer: StorageBuffer::default(),
}
}
fn clear(&mut self) {
match self {
GpuPointLights::Uniform { buffer } => buffer.clear(),
GpuPointLights::Storage { buffer } => buffer.clear(),
}
}
fn push(&mut self, mut lights: Vec<GpuPointLight>) {
match self {
GpuPointLights::Uniform { buffer } => {
// NOTE: This iterator construction allows moving and padding with default
// values and is like this to avoid unnecessary cloning.
let gpu_point_lights = lights
.drain(..)
.chain(std::iter::repeat_with(GpuPointLight::default))
.take(MAX_UNIFORM_BUFFER_POINT_LIGHTS)
.collect::<Vec<_>>();
buffer.push(gpu_point_lights.try_into().unwrap());
}
GpuPointLights::Storage { buffer } => {
buffer.append(&mut lights);
}
}
}
fn write_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {
match self {
GpuPointLights::Uniform { buffer } => buffer.write_buffer(render_device, render_queue),
GpuPointLights::Storage { buffer } => buffer.write_buffer(render_device, render_queue),
}
}
pub fn binding(&self) -> Option<BindingResource> {
match self {
GpuPointLights::Uniform { buffer } => buffer.binding(),
GpuPointLights::Storage { buffer } => buffer.binding(),
}
}
pub fn len(&self) -> usize {
match self {
GpuPointLights::Uniform { buffer } => buffer.len(),
GpuPointLights::Storage { buffer } => buffer.values().len(),
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
} }
// NOTE: These must match the bit flags in bevy_pbr2/src/render/pbr.frag! // NOTE: These must match the bit flags in bevy_pbr2/src/render/pbr.frag!
@ -144,7 +219,7 @@ pub struct GpuLights {
} }
// NOTE: this must be kept in sync with the same constants in pbr.frag // NOTE: this must be kept in sync with the same constants in pbr.frag
pub const MAX_POINT_LIGHTS: usize = 256; pub const MAX_UNIFORM_BUFFER_POINT_LIGHTS: usize = 256;
// FIXME: How should we handle shadows for clustered forward? Limiting to maximum 10 // FIXME: How should we handle shadows for clustered forward? Limiting to maximum 10
// point light shadow maps for now // point light shadow maps for now
#[cfg(feature = "webgl")] #[cfg(feature = "webgl")]
@ -346,13 +421,13 @@ pub fn extract_clusters(mut commands: Commands, views: Query<(Entity, &Clusters)
} }
} }
#[allow(clippy::too_many_arguments)]
pub fn extract_lights( pub fn extract_lights(
mut commands: Commands, mut commands: Commands,
ambient_light: Res<AmbientLight>, ambient_light: Res<AmbientLight>,
point_light_shadow_map: Res<PointLightShadowMap>, point_light_shadow_map: Res<PointLightShadowMap>,
directional_light_shadow_map: Res<DirectionalLightShadowMap>, directional_light_shadow_map: Res<DirectionalLightShadowMap>,
global_point_lights: Res<GlobalVisiblePointLights>, global_point_lights: Res<GlobalVisiblePointLights>,
// visible_point_lights: Query<&VisiblePointLights>,
mut point_lights: Query<(&PointLight, &mut CubemapVisibleEntities, &GlobalTransform)>, mut point_lights: Query<(&PointLight, &mut CubemapVisibleEntities, &GlobalTransform)>,
mut directional_lights: Query<( mut directional_lights: Query<(
Entity, Entity,
@ -361,6 +436,7 @@ pub fn extract_lights(
&GlobalTransform, &GlobalTransform,
&Visibility, &Visibility,
)>, )>,
mut previous_point_lights_len: Local<usize>,
) { ) {
commands.insert_resource(ExtractedAmbientLight { commands.insert_resource(ExtractedAmbientLight {
color: ambient_light.color, color: ambient_light.color,
@ -379,32 +455,38 @@ pub fn extract_lights(
// https://catlikecoding.com/unity/tutorials/custom-srp/point-and-spot-shadows/ // https://catlikecoding.com/unity/tutorials/custom-srp/point-and-spot-shadows/
let point_light_texel_size = 2.0 / point_light_shadow_map.size as f32; let point_light_texel_size = 2.0 / point_light_shadow_map.size as f32;
let mut point_lights_values = Vec::with_capacity(*previous_point_lights_len);
for entity in global_point_lights.iter().copied() { for entity in global_point_lights.iter().copied() {
if let Ok((point_light, cubemap_visible_entities, transform)) = point_lights.get_mut(entity) if let Ok((point_light, cubemap_visible_entities, transform)) = point_lights.get_mut(entity)
{ {
let render_cubemap_visible_entities = let render_cubemap_visible_entities =
std::mem::take(cubemap_visible_entities.into_inner()); std::mem::take(cubemap_visible_entities.into_inner());
commands.get_or_spawn(entity).insert_bundle(( point_lights_values.push((
ExtractedPointLight { entity,
color: point_light.color, (
// NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian ExtractedPointLight {
// for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower color: point_light.color,
// for details. // NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian
intensity: point_light.intensity / (4.0 * std::f32::consts::PI), // for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower
range: point_light.range, // for details.
radius: point_light.radius, intensity: point_light.intensity / (4.0 * std::f32::consts::PI),
transform: *transform, range: point_light.range,
shadows_enabled: point_light.shadows_enabled, radius: point_light.radius,
shadow_depth_bias: point_light.shadow_depth_bias, transform: *transform,
// The factor of SQRT_2 is for the worst-case diagonal offset shadows_enabled: point_light.shadows_enabled,
shadow_normal_bias: point_light.shadow_normal_bias shadow_depth_bias: point_light.shadow_depth_bias,
* point_light_texel_size // The factor of SQRT_2 is for the worst-case diagonal offset
* std::f32::consts::SQRT_2, shadow_normal_bias: point_light.shadow_normal_bias
}, * point_light_texel_size
render_cubemap_visible_entities, * std::f32::consts::SQRT_2,
},
render_cubemap_visible_entities,
),
)); ));
} }
} }
*previous_point_lights_len = point_lights_values.len();
commands.insert_or_spawn_batch(point_lights_values);
for (entity, directional_light, visible_entities, transform, visibility) in for (entity, directional_light, visible_entities, transform, visibility) in
directional_lights.iter_mut() directional_lights.iter_mut()
@ -528,12 +610,34 @@ pub struct ViewLightsUniformOffset {
pub offset: u32, pub offset: u32,
} }
#[derive(Default)] // NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that
// at least that many are supported using this constant and SupportedBindingType::from_device()
pub const CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT: u32 = 3;
pub struct GlobalLightMeta { pub struct GlobalLightMeta {
pub gpu_point_lights: UniformVec<GpuPointLights>, pub gpu_point_lights: GpuPointLights,
pub entity_to_index: HashMap<Entity, usize>, pub entity_to_index: HashMap<Entity, usize>,
} }
impl FromWorld for GlobalLightMeta {
fn from_world(world: &mut World) -> Self {
Self::new(
world
.resource::<RenderDevice>()
.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT),
)
}
}
impl GlobalLightMeta {
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
gpu_point_lights: GpuPointLights::new(buffer_binding_type),
entity_to_index: HashMap::default(),
}
}
}
#[derive(Default)] #[derive(Default)]
pub struct LightMeta { pub struct LightMeta {
pub view_gpu_lights: DynamicUniformVec<GpuLights>, pub view_gpu_lights: DynamicUniformVec<GpuLights>,
@ -615,14 +719,14 @@ pub fn prepare_lights(
.reserve(point_lights.len()); .reserve(point_lights.len());
} }
let mut gpu_point_lights = [GpuPointLight::default(); MAX_POINT_LIGHTS]; let mut gpu_point_lights = Vec::new();
for (index, &(entity, light)) in point_lights.iter().enumerate() { for (index, &(entity, light)) in point_lights.iter().enumerate() {
let mut flags = PointLightFlags::NONE; let mut flags = PointLightFlags::NONE;
// Lights are sorted, shadow enabled lights are first // Lights are sorted, shadow enabled lights are first
if light.shadows_enabled && index < MAX_POINT_LIGHT_SHADOW_MAPS { if light.shadows_enabled && index < MAX_POINT_LIGHT_SHADOW_MAPS {
flags |= PointLightFlags::SHADOWS_ENABLED; flags |= PointLightFlags::SHADOWS_ENABLED;
} }
gpu_point_lights[index] = GpuPointLight { gpu_point_lights.push(GpuPointLight {
projection_lr: Vec4::new( projection_lr: Vec4::new(
cube_face_projection.z_axis.z, cube_face_projection.z_axis.z,
cube_face_projection.z_axis.w, cube_face_projection.z_axis.w,
@ -639,12 +743,10 @@ pub fn prepare_lights(
flags: flags.bits, flags: flags.bits,
shadow_depth_bias: light.shadow_depth_bias, shadow_depth_bias: light.shadow_depth_bias,
shadow_normal_bias: light.shadow_normal_bias, shadow_normal_bias: light.shadow_normal_bias,
}; });
global_light_meta.entity_to_index.insert(entity, index); global_light_meta.entity_to_index.insert(entity, index);
} }
global_light_meta.gpu_point_lights.push(GpuPointLights { global_light_meta.gpu_point_lights.push(gpu_point_lights);
data: gpu_point_lights,
});
global_light_meta global_light_meta
.gpu_point_lights .gpu_point_lights
.write_buffer(&render_device, &render_queue); .write_buffer(&render_device, &render_queue);
@ -906,7 +1008,7 @@ pub fn prepare_lights(
} }
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl // this must match CLUSTER_COUNT_SIZE in pbr.wgsl
// and must be large enough to contain MAX_POINT_LIGHTS // and must be large enough to contain MAX_UNIFORM_BUFFER_POINT_LIGHTS
const CLUSTER_COUNT_SIZE: u32 = 13; const CLUSTER_COUNT_SIZE: u32 = 13;
const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - CLUSTER_COUNT_SIZE)) - 1; const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - CLUSTER_COUNT_SIZE)) - 1;
@ -931,14 +1033,47 @@ fn pack_offset_and_count(offset: usize, count: usize) -> u32 {
| (count as u32 & CLUSTER_COUNT_MASK) | (count as u32 & CLUSTER_COUNT_MASK)
} }
#[derive(Component, Default)] enum ViewClusterBuffers {
Uniform {
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
cluster_light_index_lists: UniformVec<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
cluster_offsets_and_counts: UniformVec<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
},
Storage {
cluster_light_index_lists: StorageBuffer<u32>,
cluster_offsets_and_counts: StorageBuffer<UVec2>,
},
}
impl ViewClusterBuffers {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
ViewClusterBuffers::Uniform {
cluster_light_index_lists: UniformVec::default(),
cluster_offsets_and_counts: UniformVec::default(),
}
}
fn storage() -> Self {
ViewClusterBuffers::Storage {
cluster_light_index_lists: StorageBuffer::default(),
cluster_offsets_and_counts: StorageBuffer::default(),
}
}
}
#[derive(Component)]
pub struct ViewClusterBindings { pub struct ViewClusterBindings {
n_indices: usize, n_indices: usize,
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
pub cluster_light_index_lists: UniformVec<[UVec4; Self::MAX_UNIFORM_ITEMS]>,
n_offsets: usize, n_offsets: usize,
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment buffers: ViewClusterBuffers,
pub cluster_offsets_and_counts: UniformVec<[UVec4; Self::MAX_UNIFORM_ITEMS]>,
} }
impl ViewClusterBindings { impl ViewClusterBindings {
@ -946,25 +1081,59 @@ impl ViewClusterBindings {
const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4; const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4;
pub const MAX_INDICES: usize = 16384; pub const MAX_INDICES: usize = 16384;
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
n_indices: 0,
n_offsets: 0,
buffers: ViewClusterBuffers::new(buffer_binding_type),
}
}
pub fn reserve_and_clear(&mut self) { pub fn reserve_and_clear(&mut self) {
self.cluster_light_index_lists.clear(); match &mut self.buffers {
self.cluster_light_index_lists ViewClusterBuffers::Uniform {
.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); cluster_light_index_lists,
self.cluster_offsets_and_counts.clear(); cluster_offsets_and_counts,
self.cluster_offsets_and_counts } => {
.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); cluster_light_index_lists.clear();
cluster_light_index_lists.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]);
cluster_offsets_and_counts.clear();
cluster_offsets_and_counts.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]);
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
cluster_offsets_and_counts,
..
} => {
cluster_light_index_lists.clear();
cluster_offsets_and_counts.clear();
}
}
} }
pub fn push_offset_and_count(&mut self, offset: usize, count: usize) { pub fn push_offset_and_count(&mut self, offset: usize, count: usize) {
let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4 match &mut self.buffers {
if array_index >= Self::MAX_UNIFORM_ITEMS { ViewClusterBuffers::Uniform {
warn!("cluster offset and count out of bounds!"); cluster_offsets_and_counts,
return; ..
} } => {
let component = self.n_offsets & ((1 << 2) - 1); let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4
let packed = pack_offset_and_count(offset, count); if array_index >= Self::MAX_UNIFORM_ITEMS {
warn!("cluster offset and count out of bounds!");
return;
}
let component = self.n_offsets & ((1 << 2) - 1);
let packed = pack_offset_and_count(offset, count);
self.cluster_offsets_and_counts.get_mut(0)[array_index][component] = packed; cluster_offsets_and_counts.get_mut(0)[array_index][component] = packed;
}
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => {
cluster_offsets_and_counts.push(UVec2::new(offset as u32, count as u32));
}
}
self.n_offsets += 1; self.n_offsets += 1;
} }
@ -974,22 +1143,81 @@ impl ViewClusterBindings {
} }
pub fn push_index(&mut self, index: usize) { pub fn push_index(&mut self, index: usize) {
let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16 match &mut self.buffers {
let component = (self.n_indices >> 2) & ((1 << 2) - 1); ViewClusterBuffers::Uniform {
let sub_index = self.n_indices & ((1 << 2) - 1); cluster_light_index_lists,
let index = index as u32 & POINT_LIGHT_INDEX_MASK; ..
} => {
let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16
let component = (self.n_indices >> 2) & ((1 << 2) - 1);
let sub_index = self.n_indices & ((1 << 2) - 1);
let index = index as u32 & POINT_LIGHT_INDEX_MASK;
self.cluster_light_index_lists.get_mut(0)[array_index][component] |= cluster_light_index_lists.get_mut(0)[array_index][component] |=
index << (8 * sub_index); index << (8 * sub_index);
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
..
} => {
cluster_light_index_lists.push(index as u32);
}
}
self.n_indices += 1; self.n_indices += 1;
} }
pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
cluster_light_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
cluster_light_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
}
}
pub fn light_index_lists_binding(&self) -> Option<BindingResource> {
match &self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
..
} => cluster_light_index_lists.binding(),
ViewClusterBuffers::Storage {
cluster_light_index_lists,
..
} => cluster_light_index_lists.binding(),
}
}
pub fn offsets_and_counts_binding(&self) -> Option<BindingResource> {
match &self.buffers {
ViewClusterBuffers::Uniform {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
}
}
} }
pub fn prepare_clusters( pub fn prepare_clusters(
mut commands: Commands, mut commands: Commands,
render_device: Res<RenderDevice>, render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>, render_queue: Res<RenderQueue>,
mesh_pipeline: Res<MeshPipeline>,
global_light_meta: Res<GlobalLightMeta>, global_light_meta: Res<GlobalLightMeta>,
views: Query< views: Query<
( (
@ -1000,8 +1228,14 @@ pub fn prepare_clusters(
With<RenderPhase<Transparent3d>>, With<RenderPhase<Transparent3d>>,
>, >,
) { ) {
let render_device = render_device.into_inner();
let supports_storage_buffers = matches!(
mesh_pipeline.clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. }
);
for (entity, cluster_config, extracted_clusters) in views.iter() { for (entity, cluster_config, extracted_clusters) in views.iter() {
let mut view_clusters_bindings = ViewClusterBindings::default(); let mut view_clusters_bindings =
ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type);
view_clusters_bindings.reserve_and_clear(); view_clusters_bindings.reserve_and_clear();
let mut indices_full = false; let mut indices_full = false;
@ -1021,6 +1255,7 @@ pub fn prepare_clusters(
{ {
if view_clusters_bindings.n_indices() if view_clusters_bindings.n_indices()
>= ViewClusterBindings::MAX_INDICES >= ViewClusterBindings::MAX_INDICES
&& !supports_storage_buffers
{ {
warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters."); warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters.");
indices_full = true; indices_full = true;
@ -1036,12 +1271,7 @@ pub fn prepare_clusters(
} }
} }
view_clusters_bindings view_clusters_bindings.write_buffers(render_device, &render_queue);
.cluster_light_index_lists
.write_buffer(&render_device, &render_queue);
view_clusters_bindings
.cluster_offsets_and_counts
.write_buffer(&render_device, &render_queue);
commands.get_or_spawn(entity).insert(view_clusters_bindings); commands.get_or_spawn(entity).insert(view_clusters_bindings);
} }

View File

@ -1,6 +1,7 @@
use crate::{ use crate::{
GlobalLightMeta, GpuLights, LightMeta, NotShadowCaster, NotShadowReceiver, ShadowPipeline, GlobalLightMeta, GpuLights, LightMeta, NotShadowCaster, NotShadowReceiver, ShadowPipeline,
ViewClusterBindings, ViewLightsUniformOffset, ViewShadowBindings, ViewClusterBindings, ViewLightsUniformOffset, ViewShadowBindings,
CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT,
}; };
use bevy_app::Plugin; use bevy_app::Plugin;
use bevy_asset::{load_internal_asset, Assets, Handle, HandleUntyped}; use bevy_asset::{load_internal_asset, Assets, Handle, HandleUntyped};
@ -258,11 +259,18 @@ pub struct MeshPipeline {
pub skinned_mesh_layout: BindGroupLayout, pub skinned_mesh_layout: BindGroupLayout,
// This dummy white texture is to be used in place of optional StandardMaterial textures // This dummy white texture is to be used in place of optional StandardMaterial textures
pub dummy_white_gpu_image: GpuImage, pub dummy_white_gpu_image: GpuImage,
pub clustered_forward_buffer_binding_type: BufferBindingType,
} }
impl FromWorld for MeshPipeline { impl FromWorld for MeshPipeline {
fn from_world(world: &mut World) -> Self { fn from_world(world: &mut World) -> Self {
let render_device = world.resource::<RenderDevice>(); let render_device = world.resource::<RenderDevice>();
let clustered_forward_buffer_binding_type = render_device
.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT);
let cluster_min_binding_size = match clustered_forward_buffer_binding_type {
BufferBindingType::Storage { .. } => None,
BufferBindingType::Uniform => BufferSize::new(16384),
};
let view_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor { let view_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor {
entries: &[ entries: &[
// View // View
@ -334,11 +342,12 @@ impl FromWorld for MeshPipeline {
binding: 6, binding: 6,
visibility: ShaderStages::FRAGMENT, visibility: ShaderStages::FRAGMENT,
ty: BindingType::Buffer { ty: BindingType::Buffer {
ty: BufferBindingType::Uniform, ty: clustered_forward_buffer_binding_type,
has_dynamic_offset: false, has_dynamic_offset: false,
// NOTE: Static size for uniform buffers. GpuPointLight has a padded // NOTE (when no storage buffers): Static size for uniform buffers.
// size of 64 bytes, so 16384 / 64 = 256 point lights max // GpuPointLight has a padded size of 64 bytes, so 16384 / 64 = 256
min_binding_size: BufferSize::new(16384), // point lights max
min_binding_size: cluster_min_binding_size,
}, },
count: None, count: None,
}, },
@ -347,10 +356,11 @@ impl FromWorld for MeshPipeline {
binding: 7, binding: 7,
visibility: ShaderStages::FRAGMENT, visibility: ShaderStages::FRAGMENT,
ty: BindingType::Buffer { ty: BindingType::Buffer {
ty: BufferBindingType::Uniform, ty: clustered_forward_buffer_binding_type,
has_dynamic_offset: false, has_dynamic_offset: false,
// NOTE: With 256 point lights max, indices need 8 bits so use u8 // NOTE (when no storage buffers): With 256 point lights max, indices
min_binding_size: BufferSize::new(16384), // need 8 bits so use u8
min_binding_size: cluster_min_binding_size,
}, },
count: None, count: None,
}, },
@ -359,13 +369,14 @@ impl FromWorld for MeshPipeline {
binding: 8, binding: 8,
visibility: ShaderStages::FRAGMENT, visibility: ShaderStages::FRAGMENT,
ty: BindingType::Buffer { ty: BindingType::Buffer {
ty: BufferBindingType::Uniform, ty: clustered_forward_buffer_binding_type,
has_dynamic_offset: false, has_dynamic_offset: false,
// NOTE: The offset needs to address 16384 indices, which needs 14 bits. // NOTE (when no storage buffers): The offset needs to address 16384
// The count can be at most all 256 lights so 8 bits. // indices, which needs 14 bits. The count can be at most all 256 lights
// Pack the offset into the upper 24 bits and the count into the // so 8 bits.
// lower 8 bits. // NOTE: Pack the offset into the upper 19 bits and the count into the
min_binding_size: BufferSize::new(16384), // lower 13 bits.
min_binding_size: cluster_min_binding_size,
}, },
count: None, count: None,
}, },
@ -457,6 +468,7 @@ impl FromWorld for MeshPipeline {
view_layout, view_layout,
mesh_layout, mesh_layout,
skinned_mesh_layout, skinned_mesh_layout,
clustered_forward_buffer_binding_type,
dummy_white_gpu_image, dummy_white_gpu_image,
} }
} }
@ -548,6 +560,18 @@ impl SpecializedMeshPipeline for MeshPipeline {
vertex_attributes.push(Mesh::ATTRIBUTE_TANGENT.at_shader_location(3)); vertex_attributes.push(Mesh::ATTRIBUTE_TANGENT.at_shader_location(3));
} }
// TODO: consider exposing this in shaders in a more generally useful way, such as:
// # if AVAILABLE_STORAGE_BUFFER_BINDINGS == 3
// /* use storage buffers here */
// # elif
// /* use uniforms here */
if !matches!(
self.clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. }
) {
shader_defs.push(String::from("NO_STORAGE_BUFFERS_SUPPORT"));
}
let mut bind_group_layout = vec![self.view_layout.clone()]; let mut bind_group_layout = vec![self.view_layout.clone()];
if layout.contains(Mesh::ATTRIBUTE_JOINT_INDEX) if layout.contains(Mesh::ATTRIBUTE_JOINT_INDEX)
&& layout.contains(Mesh::ATTRIBUTE_JOINT_WEIGHT) && layout.contains(Mesh::ATTRIBUTE_JOINT_WEIGHT)
@ -770,17 +794,11 @@ pub fn queue_mesh_view_bind_groups(
}, },
BindGroupEntry { BindGroupEntry {
binding: 7, binding: 7,
resource: view_cluster_bindings resource: view_cluster_bindings.light_index_lists_binding().unwrap(),
.cluster_light_index_lists
.binding()
.unwrap(),
}, },
BindGroupEntry { BindGroupEntry {
binding: 8, binding: 8,
resource: view_cluster_bindings resource: view_cluster_bindings.offsets_and_counts_binding().unwrap(),
.cluster_offsets_and_counts
.binding()
.unwrap(),
}, },
], ],
label: Some("mesh_view_bind_group"), label: Some("mesh_view_bind_group"),

View File

@ -57,20 +57,30 @@ struct Lights {
n_directional_lights: u32; n_directional_lights: u32;
}; };
#ifdef NO_STORAGE_BUFFERS_SUPPORT
struct PointLights { struct PointLights {
data: array<PointLight, 256u>; data: array<PointLight, 256u>;
}; };
struct ClusterLightIndexLists { struct ClusterLightIndexLists {
// each u32 contains 4 u8 indices into the PointLights array // each u32 contains 4 u8 indices into the PointLights array
data: array<vec4<u32>, 1024u>; data: array<vec4<u32>, 1024u>;
}; };
struct ClusterOffsetsAndCounts { struct ClusterOffsetsAndCounts {
// each u32 contains a 24-bit index into ClusterLightIndexLists in the high 24 bits // each u32 contains a 24-bit index into ClusterLightIndexLists in the high 24 bits
// and an 8-bit count of the number of lights in the low 8 bits // and an 8-bit count of the number of lights in the low 8 bits
data: array<vec4<u32>, 1024u>; data: array<vec4<u32>, 1024u>;
}; };
#else
struct PointLights {
data: array<PointLight>;
};
struct ClusterLightIndexLists {
data: array<u32>;
};
struct ClusterOffsetsAndCounts {
data: array<vec2<u32>>;
};
#endif
[[group(0), binding(0)]] [[group(0), binding(0)]]
var<uniform> view: View; var<uniform> view: View;
@ -94,9 +104,19 @@ var directional_shadow_textures: texture_depth_2d_array;
#endif #endif
[[group(0), binding(5)]] [[group(0), binding(5)]]
var directional_shadow_textures_sampler: sampler_comparison; var directional_shadow_textures_sampler: sampler_comparison;
#ifdef NO_STORAGE_BUFFERS_SUPPORT
[[group(0), binding(6)]] [[group(0), binding(6)]]
var<uniform> point_lights: PointLights; var<uniform> point_lights: PointLights;
[[group(0), binding(7)]] [[group(0), binding(7)]]
var<uniform> cluster_light_index_lists: ClusterLightIndexLists; var<uniform> cluster_light_index_lists: ClusterLightIndexLists;
[[group(0), binding(8)]] [[group(0), binding(8)]]
var<uniform> cluster_offsets_and_counts: ClusterOffsetsAndCounts; var<uniform> cluster_offsets_and_counts: ClusterOffsetsAndCounts;
#else
[[group(0), binding(6)]]
var<storage> point_lights: PointLights;
[[group(0), binding(7)]]
var<storage> cluster_light_index_lists: ClusterLightIndexLists;
[[group(0), binding(8)]]
var<storage> cluster_offsets_and_counts: ClusterOffsetsAndCounts;
#endif

View File

@ -264,29 +264,32 @@ fn fragment_cluster_index(frag_coord: vec2<f32>, view_z: f32, is_orthographic: b
); );
} }
struct ClusterOffsetAndCount {
offset: u32;
count: u32;
};
// this must match CLUSTER_COUNT_SIZE in light.rs // this must match CLUSTER_COUNT_SIZE in light.rs
let CLUSTER_COUNT_SIZE = 13u; let CLUSTER_COUNT_SIZE = 13u;
fn unpack_offset_and_count(cluster_index: u32) -> ClusterOffsetAndCount { fn unpack_offset_and_count(cluster_index: u32) -> vec2<u32> {
#ifdef NO_STORAGE_BUFFERS_SUPPORT
let offset_and_count = cluster_offsets_and_counts.data[cluster_index >> 2u][cluster_index & ((1u << 2u) - 1u)]; let offset_and_count = cluster_offsets_and_counts.data[cluster_index >> 2u][cluster_index & ((1u << 2u) - 1u)];
var output: ClusterOffsetAndCount; return vec2<u32>(
// The offset is stored in the upper 24 bits // The offset is stored in the upper 32 - CLUSTER_COUNT_SIZE = 19 bits
output.offset = (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u); (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u),
// The count is stored in the lower 8 bits // The count is stored in the lower CLUSTER_COUNT_SIZE = 13 bits
output.count = offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u); offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u)
return output; );
#else
return cluster_offsets_and_counts.data[cluster_index];
#endif
} }
fn get_light_id(index: u32) -> u32 { fn get_light_id(index: u32) -> u32 {
#ifdef NO_STORAGE_BUFFERS_SUPPORT
// The index is correct but in cluster_light_index_lists we pack 4 u8s into a u32 // The index is correct but in cluster_light_index_lists we pack 4 u8s into a u32
// This means the index into cluster_light_index_lists is index / 4 // This means the index into cluster_light_index_lists is index / 4
let indices = cluster_light_index_lists.data[index >> 4u][(index >> 2u) & ((1u << 2u) - 1u)]; let indices = cluster_light_index_lists.data[index >> 4u][(index >> 2u) & ((1u << 2u) - 1u)];
// And index % 4 gives the sub-index of the u8 within the u32 so we shift by 8 * sub-index // And index % 4 gives the sub-index of the u8 within the u32 so we shift by 8 * sub-index
return (indices >> (8u * (index & ((1u << 2u) - 1u)))) & ((1u << 8u) - 1u); return (indices >> (8u * (index & ((1u << 2u) - 1u)))) & ((1u << 8u) - 1u);
#else
return cluster_light_index_lists.data[index];
#endif
} }
fn point_light( fn point_light(
@ -583,7 +586,7 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4<f32> {
), in.world_position); ), in.world_position);
let cluster_index = fragment_cluster_index(in.frag_coord.xy, view_z, is_orthographic); let cluster_index = fragment_cluster_index(in.frag_coord.xy, view_z, is_orthographic);
let offset_and_count = unpack_offset_and_count(cluster_index); let offset_and_count = unpack_offset_and_count(cluster_index);
for (var i: u32 = offset_and_count.offset; i < offset_and_count.offset + offset_and_count.count; i = i + 1u) { for (var i: u32 = offset_and_count[0]; i < offset_and_count[0] + offset_and_count[1]; i = i + 1u) {
let light_id = get_light_id(i); let light_id = get_light_id(i);
let light = point_lights.data[light_id]; let light = point_lights.data[light_id];
var shadow: f32 = 1.0; var shadow: f32 = 1.0;
@ -637,9 +640,9 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4<f32> {
let cluster_overlay_alpha = 0.1; let cluster_overlay_alpha = 0.1;
let max_light_complexity_per_cluster = 64.0; let max_light_complexity_per_cluster = 64.0;
output_color.r = (1.0 - cluster_overlay_alpha) * output_color.r output_color.r = (1.0 - cluster_overlay_alpha) * output_color.r
+ cluster_overlay_alpha * smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count.count)); + cluster_overlay_alpha * smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count[1]));
output_color.g = (1.0 - cluster_overlay_alpha) * output_color.g output_color.g = (1.0 - cluster_overlay_alpha) * output_color.g
+ cluster_overlay_alpha * (1.0 - smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count.count))); + cluster_overlay_alpha * (1.0 - smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count[1])));
#endif // CLUSTERED_FORWARD_DEBUG_CLUSTER_LIGHT_COMPLEXITY #endif // CLUSTERED_FORWARD_DEBUG_CLUSTER_LIGHT_COMPLEXITY
#ifdef CLUSTERED_FORWARD_DEBUG_CLUSTER_COHERENCY #ifdef CLUSTERED_FORWARD_DEBUG_CLUSTER_COHERENCY
// NOTE: Visualizes the cluster to which the fragment belongs // NOTE: Visualizes the cluster to which the fragment belongs

View File

@ -1,13 +1,10 @@
use std::num::NonZeroU64; use super::Buffer;
use crate::renderer::{RenderDevice, RenderQueue};
use bevy_crevice::std430::{self, AsStd430, Std430}; use bevy_crevice::std430::{self, AsStd430, Std430};
use bevy_utils::tracing::warn; use bevy_utils::tracing::warn;
use std::num::NonZeroU64;
use wgpu::{BindingResource, BufferBinding, BufferDescriptor, BufferUsages}; use wgpu::{BindingResource, BufferBinding, BufferDescriptor, BufferUsages};
use crate::renderer::{RenderDevice, RenderQueue};
use super::Buffer;
/// A helper for a storage buffer binding with a body, or a variable-sized array, or both. /// A helper for a storage buffer binding with a body, or a variable-sized array, or both.
pub struct StorageBuffer<T: AsStd430, U: AsStd430 = ()> { pub struct StorageBuffer<T: AsStd430, U: AsStd430 = ()> {
body: U, body: U,
@ -126,4 +123,19 @@ impl<T: AsStd430, U: AsStd430> StorageBuffer<T, U> {
pub fn values_mut(&mut self) -> &mut [T] { pub fn values_mut(&mut self) -> &mut [T] {
&mut self.values &mut self.values
} }
#[inline]
pub fn clear(&mut self) {
self.values.clear();
}
#[inline]
pub fn push(&mut self, value: T) {
self.values.push(value);
}
#[inline]
pub fn append(&mut self, values: &mut Vec<T>) {
self.values.append(values);
}
} }

View File

@ -4,7 +4,7 @@ use crate::render_resource::{
}; };
use futures_lite::future; use futures_lite::future;
use std::sync::Arc; use std::sync::Arc;
use wgpu::util::DeviceExt; use wgpu::{util::DeviceExt, BufferBindingType};
use super::RenderQueue; use super::RenderQueue;
@ -184,4 +184,15 @@ impl RenderDevice {
let padded_bytes_per_row_padding = (align - row_bytes % align) % align; let padded_bytes_per_row_padding = (align - row_bytes % align) % align;
row_bytes + padded_bytes_per_row_padding row_bytes + padded_bytes_per_row_padding
} }
pub fn get_supported_read_only_binding_type(
&self,
buffers_per_shader_stage: u32,
) -> BufferBindingType {
if self.limits().max_storage_buffers_per_shader_stage >= buffers_per_shader_stage {
BufferBindingType::Storage { read_only: true }
} else {
BufferBindingType::Uniform
}
}
} }

View File

@ -86,7 +86,11 @@ impl<M: Material2d> SpecializedMaterial2d for M {
type Key = (); type Key = ();
#[inline] #[inline]
fn key(_material: &<Self as RenderAsset>::PreparedAsset) -> Self::Key {} fn key(
_render_device: &RenderDevice,
_material: &<Self as RenderAsset>::PreparedAsset,
) -> Self::Key {
}
#[inline] #[inline]
fn specialize( fn specialize(
@ -136,7 +140,10 @@ pub trait SpecializedMaterial2d: Asset + RenderAsset {
/// Extract the [`SpecializedMaterial2d::Key`] for the "prepared" version of this material. This key will be /// Extract the [`SpecializedMaterial2d::Key`] for the "prepared" version of this material. This key will be
/// passed in to the [`SpecializedMaterial2d::specialize`] function when compiling the [`RenderPipeline`](bevy_render::render_resource::RenderPipeline) /// passed in to the [`SpecializedMaterial2d::specialize`] function when compiling the [`RenderPipeline`](bevy_render::render_resource::RenderPipeline)
/// for a given entity's material. /// for a given entity's material.
fn key(material: &<Self as RenderAsset>::PreparedAsset) -> Self::Key; fn key(
render_device: &RenderDevice,
material: &<Self as RenderAsset>::PreparedAsset,
) -> Self::Key;
/// Specializes the given `descriptor` according to the given `key`. /// Specializes the given `descriptor` according to the given `key`.
fn specialize( fn specialize(
@ -292,6 +299,7 @@ pub fn queue_material2d_meshes<M: SpecializedMaterial2d>(
material2d_pipeline: Res<Material2dPipeline<M>>, material2d_pipeline: Res<Material2dPipeline<M>>,
mut pipelines: ResMut<SpecializedMeshPipelines<Material2dPipeline<M>>>, mut pipelines: ResMut<SpecializedMeshPipelines<Material2dPipeline<M>>>,
mut pipeline_cache: ResMut<PipelineCache>, mut pipeline_cache: ResMut<PipelineCache>,
render_device: Res<RenderDevice>,
msaa: Res<Msaa>, msaa: Res<Msaa>,
render_meshes: Res<RenderAssets<Mesh>>, render_meshes: Res<RenderAssets<Mesh>>,
render_materials: Res<RenderAssets<M>>, render_materials: Res<RenderAssets<M>>,
@ -301,6 +309,7 @@ pub fn queue_material2d_meshes<M: SpecializedMaterial2d>(
if material2d_meshes.is_empty() { if material2d_meshes.is_empty() {
return; return;
} }
let render_device = render_device.into_inner();
for (visible_entities, mut transparent_phase) in views.iter_mut() { for (visible_entities, mut transparent_phase) in views.iter_mut() {
let draw_transparent_pbr = transparent_draw_functions let draw_transparent_pbr = transparent_draw_functions
.read() .read()
@ -318,7 +327,7 @@ pub fn queue_material2d_meshes<M: SpecializedMaterial2d>(
let mesh_key = msaa_key let mesh_key = msaa_key
| Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology); | Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology);
let material_key = M::key(material2d); let material_key = M::key(render_device, material2d);
let pipeline_id = pipelines.specialize( let pipeline_id = pipelines.specialize(
&mut pipeline_cache, &mut pipeline_cache,
&material2d_pipeline, &material2d_pipeline,

View File

@ -443,4 +443,5 @@ cargo run --release --example <example name>
Example | File | Description Example | File | Description
--- | --- | --- --- | --- | ---
`many_lights` | [`stress_tests/many_lights.rs`](./stress_tests/many_lights.rs) | Simple benchmark to test rendering many point lights. Run with `WGPU_SETTINGS_PRIO=webgl2` to restrict to uniform buffers and max 256 lights.
`transform_hierarchy.rs` | [`stress_tests/transform_hierarchy.rs`](./stress_tests/transform_hierarchy.rs) | Various test cases for hierarchy and transform propagation performance `transform_hierarchy.rs` | [`stress_tests/transform_hierarchy.rs`](./stress_tests/transform_hierarchy.rs) | Various test cases for hierarchy and transform propagation performance

View File

@ -137,6 +137,7 @@ impl Material for CustomMaterial {
} }
fn specialize( fn specialize(
_pipeline: &MaterialPipeline<Self>,
descriptor: &mut RenderPipelineDescriptor, descriptor: &mut RenderPipelineDescriptor,
layout: &MeshVertexBufferLayout, layout: &MeshVertexBufferLayout,
) -> Result<(), SpecializedMeshPipelineError> { ) -> Result<(), SpecializedMeshPipelineError> {

View File

@ -97,6 +97,7 @@ impl SpecializedMaterial for CustomMaterial {
fn key(_: &<CustomMaterial as RenderAsset>::PreparedAsset) -> Self::Key {} fn key(_: &<CustomMaterial as RenderAsset>::PreparedAsset) -> Self::Key {}
fn specialize( fn specialize(
_pipeline: &MaterialPipeline<Self>,
descriptor: &mut RenderPipelineDescriptor, descriptor: &mut RenderPipelineDescriptor,
_: Self::Key, _: Self::Key,
_layout: &MeshVertexBufferLayout, _layout: &MeshVertexBufferLayout,

View File

@ -0,0 +1,166 @@
use bevy::{
diagnostic::{FrameTimeDiagnosticsPlugin, LogDiagnosticsPlugin},
math::{DVec2, DVec3},
pbr::{ExtractedPointLight, GlobalLightMeta},
prelude::*,
render::{RenderApp, RenderStage},
};
fn main() {
App::new()
.insert_resource(WindowDescriptor {
width: 1024.0,
height: 768.0,
title: "many_lights".to_string(),
present_mode: bevy::window::PresentMode::Immediate,
..default()
})
.add_plugins(DefaultPlugins)
.add_plugin(FrameTimeDiagnosticsPlugin::default())
.add_plugin(LogDiagnosticsPlugin::default())
.add_startup_system(setup)
.add_system(move_camera)
.add_system(print_light_count)
.add_plugin(LogVisibleLights)
.run();
}
fn setup(
mut commands: Commands,
mut meshes: ResMut<Assets<Mesh>>,
mut materials: ResMut<Assets<StandardMaterial>>,
) {
const LIGHT_RADIUS: f32 = 0.3;
const LIGHT_INTENSITY: f32 = 5.0;
const RADIUS: f32 = 50.0;
const N_LIGHTS: usize = 100_000;
commands.spawn_bundle(PbrBundle {
mesh: meshes.add(Mesh::from(shape::Icosphere {
radius: RADIUS,
subdivisions: 9,
})),
material: materials.add(StandardMaterial::from(Color::WHITE)),
transform: Transform::from_scale(Vec3::splat(-1.0)),
..default()
});
let mesh = meshes.add(Mesh::from(shape::Cube { size: 1.0 }));
let material = materials.add(StandardMaterial {
base_color: Color::PINK,
..default()
});
// NOTE: This pattern is good for testing performance of culling as it provides roughly
// the same number of visible meshes regardless of the viewing angle.
// NOTE: f64 is used to avoid precision issues that produce visual artifacts in the distribution
let golden_ratio = 0.5f64 * (1.0f64 + 5.0f64.sqrt());
for i in 0..N_LIGHTS {
let spherical_polar_theta_phi = fibonacci_spiral_on_sphere(golden_ratio, i, N_LIGHTS);
let unit_sphere_p = spherical_polar_to_cartesian(spherical_polar_theta_phi);
commands.spawn_bundle(PointLightBundle {
point_light: PointLight {
range: LIGHT_RADIUS,
intensity: LIGHT_INTENSITY,
..default()
},
transform: Transform::from_translation((RADIUS as f64 * unit_sphere_p).as_vec3()),
..default()
});
}
// camera
commands.spawn_bundle(PerspectiveCameraBundle::default());
// add one cube, the only one with strong handles
// also serves as a reference point during rotation
commands.spawn_bundle(PbrBundle {
mesh,
material,
transform: Transform {
translation: Vec3::new(0.0, RADIUS as f32, 0.0),
scale: Vec3::splat(5.0),
..default()
},
..default()
});
}
// NOTE: This epsilon value is apparently optimal for optimizing for the average
// nearest-neighbor distance. See:
// http://extremelearning.com.au/how-to-evenly-distribute-points-on-a-sphere-more-effectively-than-the-canonical-fibonacci-lattice/
// for details.
const EPSILON: f64 = 0.36;
fn fibonacci_spiral_on_sphere(golden_ratio: f64, i: usize, n: usize) -> DVec2 {
DVec2::new(
2.0 * std::f64::consts::PI * (i as f64 / golden_ratio),
(1.0 - 2.0 * (i as f64 + EPSILON) / (n as f64 - 1.0 + 2.0 * EPSILON)).acos(),
)
}
fn spherical_polar_to_cartesian(p: DVec2) -> DVec3 {
let (sin_theta, cos_theta) = p.x.sin_cos();
let (sin_phi, cos_phi) = p.y.sin_cos();
DVec3::new(cos_theta * sin_phi, sin_theta * sin_phi, cos_phi)
}
// System for rotating the camera
fn move_camera(time: Res<Time>, mut camera_query: Query<&mut Transform, With<Camera>>) {
let mut camera_transform = camera_query.single_mut();
camera_transform.rotate(Quat::from_rotation_z(time.delta_seconds() * 0.15));
camera_transform.rotate(Quat::from_rotation_x(time.delta_seconds() * 0.15));
}
// System for printing the number of meshes on every tick of the timer
fn print_light_count(time: Res<Time>, mut timer: Local<PrintingTimer>, lights: Query<&PointLight>) {
timer.0.tick(time.delta());
if timer.0.just_finished() {
info!("Lights: {}", lights.iter().len(),);
}
}
struct LogVisibleLights;
impl Plugin for LogVisibleLights {
fn build(&self, app: &mut App) {
let render_app = match app.get_sub_app_mut(RenderApp) {
Ok(render_app) => render_app,
Err(_) => return,
};
render_app
.add_system_to_stage(RenderStage::Extract, extract_time)
.add_system_to_stage(RenderStage::Prepare, print_visible_light_count);
}
}
// System for printing the number of meshes on every tick of the timer
fn print_visible_light_count(
time: Res<Time>,
mut timer: Local<PrintingTimer>,
visible: Query<&ExtractedPointLight>,
global_light_meta: Res<GlobalLightMeta>,
) {
timer.0.tick(time.delta());
if timer.0.just_finished() {
info!(
"Visible Lights: {}, Rendered Lights: {}",
visible.iter().len(),
global_light_meta.entity_to_index.len()
);
}
}
fn extract_time(mut commands: Commands, time: Res<Time>) {
commands.insert_resource(time.into_inner().clone());
}
struct PrintingTimer(Timer);
impl Default for PrintingTimer {
fn default() -> Self {
Self(Timer::from_seconds(1.0, true))
}
}