Meshlet LOD-compatible two-pass occlusion culling (#12898)

Keeping track of explicit visibility per cluster between frames does not
work with LODs, and leads to worse culling (using the final depth buffer
from the previous frame is more accurate).

Instead, we need to generate a second depth pyramid after the second
raster pass, and then use that in the first culling pass in the next
frame to test if a cluster would have been visible last frame or not.

As part of these changes, the write_index_buffer pass has been folded
into the culling pass for a large performance gain, and to avoid
tracking a lot of extra state that would be needed between passes.

Prepass previous model/view stuff was adapted to work with meshlets as
well.

Also fixed a bug with materials, and other misc improvements.

---------

Co-authored-by: François <mockersf@gmail.com>
Co-authored-by: atlas dostal <rodol@rivalrebels.com>
Co-authored-by: vero <email@atlasdostal.com>
Co-authored-by: Patrick Walton <pcwalton@mimiga.net>
Co-authored-by: Robert Swain <robert.swain@gmail.com>
This commit is contained in:
JMS55 2024-04-27 22:30:20 -07:00 committed by GitHub
parent 4b446c020e
commit e1a0da0fa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 337 additions and 488 deletions

View File

@ -3,36 +3,61 @@
meshlet_bounding_spheres,
meshlet_thread_instance_ids,
meshlet_instance_uniforms,
meshlet_occlusion,
meshlet_second_pass_candidates,
depth_pyramid,
view,
previous_view,
should_cull_instance,
get_meshlet_previous_occlusion,
meshlet_is_second_pass_candidate,
meshlets,
draw_indirect_args,
draw_triangle_buffer,
}
#ifdef MESHLET_SECOND_CULLING_PASS
#import bevy_pbr::meshlet_bindings::depth_pyramid
#endif
#import bevy_render::maths::affine3_to_square
/// Culls individual clusters (1 per thread) in two passes (two pass occlusion culling), and outputs a bitmask of which clusters survived.
/// 1. The first pass is only frustum culling, on only the clusters that were visible last frame.
/// 2. The second pass performs both frustum and occlusion culling (using the depth buffer generated from the first pass), on all clusters.
/// 1. The first pass tests instance visibility, frustum culling, LOD selection, and finally occlusion culling using last frame's depth pyramid.
/// 2. The second pass performs occlusion culling (using the depth buffer generated from the first pass) on all clusters that passed
/// the instance, frustum, and LOD tests in the first pass, but were not visible last frame according to the occlusion culling.
@compute
@workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 instanced meshlet per thread
fn cull_meshlets(@builtin(global_invocation_id) cluster_id: vec3<u32>) {
// Fetch the instance data and check for instance culling
if cluster_id.x >= arrayLength(&meshlet_thread_meshlet_ids) { return; }
let instance_id = meshlet_thread_instance_ids[cluster_id.x];
if should_cull_instance(instance_id) {
return;
}
fn cull_meshlets(
@builtin(workgroup_id) workgroup_id: vec3<u32>,
@builtin(num_workgroups) num_workgroups: vec3<u32>,
@builtin(local_invocation_id) local_invocation_id: vec3<u32>,
) {
// Calculate the cluster ID for this thread
let cluster_id = local_invocation_id.x + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u));
if cluster_id >= arrayLength(&meshlet_thread_meshlet_ids) { return; }
// Fetch other meshlet data
let meshlet_id = meshlet_thread_meshlet_ids[cluster_id.x];
#ifdef MESHLET_SECOND_CULLING_PASS
if !meshlet_is_second_pass_candidate(cluster_id) { return; }
#endif
// Check for instance culling
let instance_id = meshlet_thread_instance_ids[cluster_id];
#ifdef MESHLET_FIRST_CULLING_PASS
if should_cull_instance(instance_id) { return; }
#endif
// Calculate world-space culling bounding sphere for the cluster
let instance_uniform = meshlet_instance_uniforms[instance_id];
let meshlet_id = meshlet_thread_meshlet_ids[cluster_id];
let model = affine3_to_square(instance_uniform.model);
let model_scale = max(length(model[0]), max(length(model[1]), length(model[2])));
let bounding_spheres = meshlet_bounding_spheres[meshlet_id];
var culling_bounding_sphere_center = model * vec4(bounding_spheres.self_culling.center, 1.0);
var culling_bounding_sphere_radius = model_scale * bounding_spheres.self_culling.radius;
#ifdef MESHLET_FIRST_CULLING_PASS
// Frustum culling
// TODO: Faster method from https://vkguide.dev/docs/gpudriven/compute_culling/#frustum-culling-function
for (var i = 0u; i < 6u; i++) {
if dot(view.frustum[i], culling_bounding_sphere_center) + culling_bounding_sphere_radius <= 0.0 {
return;
}
}
// Calculate view-space LOD bounding sphere for the meshlet
let lod_bounding_sphere_center = model * vec4(bounding_spheres.self_lod.center, 1.0);
@ -48,61 +73,59 @@ fn cull_meshlets(@builtin(global_invocation_id) cluster_id: vec3<u32>) {
let lod_is_ok = lod_error_is_imperceptible(lod_bounding_sphere_center_view_space, lod_bounding_sphere_radius);
let parent_lod_is_ok = lod_error_is_imperceptible(parent_lod_bounding_sphere_center_view_space, parent_lod_bounding_sphere_radius);
if !lod_is_ok || parent_lod_is_ok { return; }
// In the first pass, operate only on the clusters visible last frame. In the second pass, operate on all clusters.
#ifdef MESHLET_SECOND_CULLING_PASS
var meshlet_visible = true;
#else
var meshlet_visible = get_meshlet_previous_occlusion(cluster_id.x);
if !meshlet_visible { return; }
#endif
// Calculate world-space culling bounding sphere for the cluster
let culling_bounding_sphere_center = model * vec4(bounding_spheres.self_culling.center, 1.0);
let culling_bounding_sphere_radius = model_scale * bounding_spheres.self_culling.radius;
// Project the culling bounding sphere to view-space for occlusion culling
#ifdef MESHLET_FIRST_CULLING_PASS
let previous_model = affine3_to_square(instance_uniform.previous_model);
let previous_model_scale = max(length(previous_model[0]), max(length(previous_model[1]), length(previous_model[2])));
culling_bounding_sphere_center = previous_model * vec4(bounding_spheres.self_culling.center, 1.0);
culling_bounding_sphere_radius = previous_model_scale * bounding_spheres.self_culling.radius;
#endif
let culling_bounding_sphere_center_view_space = (view.inverse_view * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz;
// Frustum culling
// TODO: Faster method from https://vkguide.dev/docs/gpudriven/compute_culling/#frustum-culling-function
for (var i = 0u; i < 6u; i++) {
if !meshlet_visible { break; }
meshlet_visible &= dot(view.frustum[i], culling_bounding_sphere_center) > -culling_bounding_sphere_radius;
let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius);
// Halve the view-space AABB size as the depth pyramid is half the view size
let depth_pyramid_size_mip_0 = vec2<f32>(textureDimensions(depth_pyramid, 0)) * 0.5;
let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x;
let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y;
let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32
let depth_pyramid_size = vec2<f32>(textureDimensions(depth_pyramid, depth_level));
let aabb_top_left = vec2<u32>(aabb.xy * depth_pyramid_size);
let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x;
let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x;
let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x;
let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x;
let occluder_depth = min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d));
// Check whether or not the cluster would be occluded if drawn
var meshlet_visible: bool;
if view.projection[3][3] == 1.0 {
// Orthographic
let sphere_depth = view.projection[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.projection[2][2];
meshlet_visible = sphere_depth >= occluder_depth;
} else {
// Perspective
let sphere_depth = -view.projection[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius);
meshlet_visible = sphere_depth >= occluder_depth;
}
#ifdef MESHLET_SECOND_CULLING_PASS
// In the second culling pass, cull against the depth pyramid generated from the first pass
// Write if the cluster should be occlusion tested in the second pass
#ifdef MESHLET_FIRST_CULLING_PASS
let second_pass_candidate = u32(!meshlet_visible) << cluster_id % 32u;
atomicOr(&meshlet_second_pass_candidates[cluster_id / 32u], second_pass_candidate);
#endif
// Append a list of this cluster's triangles to draw if not culled
if meshlet_visible {
let culling_bounding_sphere_center_view_space = (view.inverse_view * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz;
let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius);
// Halve the AABB size because the first depth mip resampling pass cut the full screen resolution into a power of two conservatively
let depth_pyramid_size_mip_0 = vec2<f32>(textureDimensions(depth_pyramid, 0)) * 0.5;
let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x;
let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y;
let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32
let depth_pyramid_size = vec2<f32>(textureDimensions(depth_pyramid, depth_level));
let aabb_top_left = vec2<u32>(aabb.xy * depth_pyramid_size);
let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x;
let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x;
let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x;
let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x;
let occluder_depth = min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d));
if view.projection[3][3] == 1.0 {
// Orthographic
let sphere_depth = view.projection[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.projection[2][2];
meshlet_visible &= sphere_depth >= occluder_depth;
} else {
// Perspective
let sphere_depth = -view.projection[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius);
meshlet_visible &= sphere_depth >= occluder_depth;
let meshlet_triangle_count = meshlets[meshlet_id].triangle_count;
let buffer_start = atomicAdd(&draw_indirect_args.vertex_count, meshlet_triangle_count * 3u) / 3u;
let cluster_id_packed = cluster_id << 6u;
for (var triangle_id = 0u; triangle_id < meshlet_triangle_count; triangle_id++) {
draw_triangle_buffer[buffer_start + triangle_id] = cluster_id_packed | triangle_id;
}
}
#endif
// Write the bitmask of whether or not the cluster was culled
let occlusion_bit = u32(meshlet_visible) << (cluster_id.x % 32u);
atomicOr(&meshlet_occlusion[cluster_id.x / 32u], occlusion_bit);
}
// https://stackoverflow.com/questions/21648630/radius-of-projected-sphere-in-screen-space/21649403#21649403

View File

@ -4,7 +4,8 @@ use super::{
};
use crate::{
Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver,
PreviousGlobalTransform, RenderMaterialInstances, ShadowView,
PreviousGlobalTransform, PreviousViewData, PreviousViewUniforms, RenderMaterialInstances,
ShadowView,
};
use bevy_asset::{AssetEvent, AssetId, AssetServer, Assets, Handle, UntypedAssetId};
use bevy_core_pipeline::core_3d::Camera3d;
@ -13,7 +14,7 @@ use bevy_ecs::{
entity::{Entity, EntityHashMap},
event::EventReader,
query::{AnyOf, Has},
system::{Commands, Query, Res, ResMut, Resource, SystemState},
system::{Commands, Local, Query, Res, ResMut, Resource, SystemState},
world::{FromWorld, World},
};
use bevy_render::{
@ -37,24 +38,33 @@ use std::{
/// [`MeshletMesh`] entities, as well as queuing uploads for any new meshlet mesh
/// assets that have not already been uploaded to the GPU.
pub fn extract_meshlet_meshes(
// TODO: Replace main_world when Extract<ResMut<Assets<MeshletMesh>>> is possible
mut main_world: ResMut<MainWorld>,
mut gpu_scene: ResMut<MeshletGpuScene>,
// TODO: Replace main_world and system_state when Extract<ResMut<Assets<MeshletMesh>>> is possible
mut main_world: ResMut<MainWorld>,
mut system_state: Local<
Option<
SystemState<(
Query<(
Entity,
&Handle<MeshletMesh>,
&GlobalTransform,
Option<&PreviousGlobalTransform>,
Option<&RenderLayers>,
Has<NotShadowReceiver>,
Has<NotShadowCaster>,
)>,
Res<AssetServer>,
ResMut<Assets<MeshletMesh>>,
EventReader<AssetEvent<MeshletMesh>>,
)>,
>,
>,
) {
let mut system_state: SystemState<(
Query<(
Entity,
&Handle<MeshletMesh>,
&GlobalTransform,
Option<&PreviousGlobalTransform>,
Option<&RenderLayers>,
Has<NotShadowReceiver>,
Has<NotShadowCaster>,
)>,
Res<AssetServer>,
ResMut<Assets<MeshletMesh>>,
EventReader<AssetEvent<MeshletMesh>>,
)> = SystemState::new(&mut main_world);
if system_state.is_none() {
*system_state = Some(SystemState::new(&mut main_world));
}
let system_state = system_state.as_mut().unwrap();
let (instances_query, asset_server, mut assets, mut asset_events) =
system_state.get_mut(&mut main_world);
@ -210,10 +220,6 @@ pub fn prepare_meshlet_per_frame_resources(
render_device: Res<RenderDevice>,
mut commands: Commands,
) {
gpu_scene
.previous_cluster_id_starts
.retain(|_, (_, active)| *active);
if gpu_scene.scene_meshlet_count == 0 {
return;
}
@ -238,21 +244,19 @@ pub fn prepare_meshlet_per_frame_resources(
&render_device,
&render_queue,
);
upload_storage_buffer(
&mut gpu_scene.previous_cluster_ids,
&render_device,
&render_queue,
);
// Early submission for GPU data uploads to start while the render graph records commands
render_queue.submit([]);
let needed_buffer_size = 4 * gpu_scene.scene_triangle_count;
let visibility_buffer_draw_index_buffer =
match &mut gpu_scene.visibility_buffer_draw_index_buffer {
let visibility_buffer_draw_triangle_buffer =
match &mut gpu_scene.visibility_buffer_draw_triangle_buffer {
Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(),
slot => {
let buffer = render_device.create_buffer(&BufferDescriptor {
label: Some("meshlet_visibility_buffer_draw_index_buffer"),
label: Some("meshlet_visibility_buffer_draw_triangle_buffer"),
size: needed_buffer_size,
usage: BufferUsages::STORAGE | BufferUsages::INDEX,
usage: BufferUsages::STORAGE,
mapped_at_creation: false,
});
*slot = Some(buffer.clone());
@ -260,7 +264,8 @@ pub fn prepare_meshlet_per_frame_resources(
}
};
let needed_buffer_size = gpu_scene.scene_meshlet_count.div_ceil(32) as u64 * 4;
let needed_buffer_size =
gpu_scene.scene_meshlet_count.div_ceil(u32::BITS) as u64 * size_of::<u32>() as u64;
for (view_entity, view, render_layers, (_, shadow_view)) in &views {
let instance_visibility = gpu_scene
.view_instance_visibility
@ -290,29 +295,19 @@ pub fn prepare_meshlet_per_frame_resources(
upload_storage_buffer(instance_visibility, &render_device, &render_queue);
let instance_visibility = instance_visibility.buffer().unwrap().clone();
// Early submission for GPU data uploads to start while the render graph records commands
render_queue.submit([]);
let create_occlusion_buffer = || {
render_device.create_buffer(&BufferDescriptor {
label: Some("meshlet_occlusion_buffer"),
size: needed_buffer_size,
usage: BufferUsages::STORAGE | BufferUsages::COPY_DST,
mapped_at_creation: false,
})
let second_pass_candidates_buffer = match &mut gpu_scene.second_pass_candidates_buffer {
Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(),
slot => {
let buffer = render_device.create_buffer(&BufferDescriptor {
label: Some("meshlet_second_pass_candidates"),
size: needed_buffer_size,
usage: BufferUsages::STORAGE | BufferUsages::COPY_DST,
mapped_at_creation: false,
});
*slot = Some(buffer.clone());
buffer
}
};
let (previous_occlusion_buffer, occlusion_buffer, occlusion_buffer_needs_clearing) =
match gpu_scene.previous_occlusion_buffers.get(&view_entity) {
Some((buffer_a, buffer_b)) if buffer_b.size() >= needed_buffer_size => {
(buffer_a.clone(), buffer_b.clone(), true)
}
Some((buffer_a, _)) => (buffer_a.clone(), create_occlusion_buffer(), false),
None => (create_occlusion_buffer(), create_occlusion_buffer(), false),
};
gpu_scene.previous_occlusion_buffers.insert(
view_entity,
(occlusion_buffer.clone(), previous_occlusion_buffer.clone()),
);
let visibility_buffer = TextureDescriptor {
label: Some("meshlet_visibility_buffer"),
@ -354,19 +349,19 @@ pub fn prepare_meshlet_per_frame_resources(
usage: BufferUsages::STORAGE | BufferUsages::INDIRECT,
});
let depth_size = Extent3d {
let depth_pyramid_size = Extent3d {
// Round down to the nearest power of 2 to ensure depth is conservative
width: previous_power_of_2(view.viewport.z),
height: previous_power_of_2(view.viewport.w),
depth_or_array_layers: 1,
};
let depth_mip_count = depth_size.width.max(depth_size.height).ilog2() + 1;
let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2);
let depth_pyramid = texture_cache.get(
&render_device,
TextureDescriptor {
label: Some("meshlet_depth_pyramid"),
size: depth_size,
mip_level_count: depth_mip_count,
size: depth_pyramid_size,
mip_level_count: depth_pyramid_mip_count,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::R32Float,
@ -374,7 +369,7 @@ pub fn prepare_meshlet_per_frame_resources(
view_formats: &[],
},
);
let depth_pyramid_mips = (0..depth_mip_count)
let depth_pyramid_mips = (0..depth_pyramid_mip_count)
.map(|i| {
depth_pyramid.texture.create_view(&TextureViewDescriptor {
label: Some("meshlet_depth_pyramid_texture_view"),
@ -384,10 +379,19 @@ pub fn prepare_meshlet_per_frame_resources(
base_mip_level: i,
mip_level_count: Some(1),
base_array_layer: 0,
array_layer_count: None,
array_layer_count: Some(1),
})
})
.collect();
.collect::<Box<[TextureView]>>();
let depth_pyramid_all_mips = depth_pyramid.default_view.clone();
let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) {
Some(texture_view) => texture_view.clone(),
None => depth_pyramid_all_mips.clone(),
};
gpu_scene
.previous_depth_pyramids
.insert(view_entity, depth_pyramid_all_mips.clone());
let material_depth_color = TextureDescriptor {
label: Some("meshlet_material_depth_color"),
@ -422,17 +426,16 @@ pub fn prepare_meshlet_per_frame_resources(
let not_shadow_view = shadow_view.is_none();
commands.entity(view_entity).insert(MeshletViewResources {
scene_meshlet_count: gpu_scene.scene_meshlet_count,
previous_occlusion_buffer,
occlusion_buffer,
occlusion_buffer_needs_clearing,
second_pass_candidates_buffer,
instance_visibility,
visibility_buffer: not_shadow_view
.then(|| texture_cache.get(&render_device, visibility_buffer)),
visibility_buffer_draw_indirect_args_first,
visibility_buffer_draw_indirect_args_second,
visibility_buffer_draw_index_buffer: visibility_buffer_draw_index_buffer.clone(),
depth_pyramid,
visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(),
depth_pyramid_all_mips,
depth_pyramid_mips,
previous_depth_pyramid,
material_depth_color: not_shadow_view
.then(|| texture_cache.get(&render_device, material_depth_color)),
material_depth: not_shadow_view
@ -449,10 +452,14 @@ pub fn prepare_meshlet_view_bind_groups(
AnyOf<(&ViewDepthTexture, &ShadowView)>,
)>,
view_uniforms: Res<ViewUniforms>,
previous_view_uniforms: Res<PreviousViewUniforms>,
render_device: Res<RenderDevice>,
mut commands: Commands,
) {
let Some(view_uniforms) = view_uniforms.uniforms.binding() else {
let (Some(view_uniforms), Some(previous_view_uniforms)) = (
view_uniforms.uniforms.binding(),
previous_view_uniforms.uniforms.binding(),
) else {
return;
};
@ -462,56 +469,50 @@ pub fn prepare_meshlet_view_bind_groups(
gpu_scene.meshlet_bounding_spheres.binding(),
gpu_scene.thread_instance_ids.binding().unwrap(),
gpu_scene.instance_uniforms.binding().unwrap(),
gpu_scene.view_instance_visibility[&view_entity]
.binding()
.unwrap(),
view_resources.occlusion_buffer.as_entire_binding(),
gpu_scene.previous_cluster_ids.binding().unwrap(),
view_resources.previous_occlusion_buffer.as_entire_binding(),
view_uniforms.clone(),
&view_resources.depth_pyramid.default_view,
));
let culling = render_device.create_bind_group(
"meshlet_culling_bind_group",
&gpu_scene.culling_bind_group_layout,
&entries,
);
let entries = BindGroupEntries::sequential((
view_resources.occlusion_buffer.as_entire_binding(),
gpu_scene.thread_meshlet_ids.binding().unwrap(),
gpu_scene.previous_cluster_ids.binding().unwrap(),
view_resources.previous_occlusion_buffer.as_entire_binding(),
view_resources.instance_visibility.as_entire_binding(),
view_resources
.second_pass_candidates_buffer
.as_entire_binding(),
gpu_scene.meshlets.binding(),
view_resources
.visibility_buffer_draw_indirect_args_first
.as_entire_binding(),
view_resources
.visibility_buffer_draw_index_buffer
.visibility_buffer_draw_triangle_buffer
.as_entire_binding(),
&view_resources.previous_depth_pyramid,
view_uniforms.clone(),
previous_view_uniforms.clone(),
));
let write_index_buffer_first = render_device.create_bind_group(
"meshlet_write_index_buffer_first_bind_group",
&gpu_scene.write_index_buffer_bind_group_layout,
let culling_first = render_device.create_bind_group(
"meshlet_culling_first_bind_group",
&gpu_scene.culling_bind_group_layout,
&entries,
);
let entries = BindGroupEntries::sequential((
view_resources.occlusion_buffer.as_entire_binding(),
gpu_scene.thread_meshlet_ids.binding().unwrap(),
gpu_scene.previous_cluster_ids.binding().unwrap(),
view_resources.previous_occlusion_buffer.as_entire_binding(),
gpu_scene.meshlet_bounding_spheres.binding(),
gpu_scene.thread_instance_ids.binding().unwrap(),
gpu_scene.instance_uniforms.binding().unwrap(),
view_resources.instance_visibility.as_entire_binding(),
view_resources
.second_pass_candidates_buffer
.as_entire_binding(),
gpu_scene.meshlets.binding(),
view_resources
.visibility_buffer_draw_indirect_args_second
.as_entire_binding(),
view_resources
.visibility_buffer_draw_index_buffer
.visibility_buffer_draw_triangle_buffer
.as_entire_binding(),
&view_resources.depth_pyramid_all_mips,
view_uniforms.clone(),
previous_view_uniforms.clone(),
));
let write_index_buffer_second = render_device.create_bind_group(
"meshlet_write_index_buffer_second_bind_group",
&gpu_scene.write_index_buffer_bind_group_layout,
let culling_second = render_device.create_bind_group(
"meshlet_culling_second_bind_group",
&gpu_scene.culling_bind_group_layout,
&entries,
);
@ -547,7 +548,7 @@ pub fn prepare_meshlet_view_bind_groups(
gpu_scene.instance_uniforms.binding().unwrap(),
gpu_scene.instance_material_ids.binding().unwrap(),
view_resources
.visibility_buffer_draw_index_buffer
.visibility_buffer_draw_triangle_buffer
.as_entire_binding(),
view_uniforms.clone(),
));
@ -596,9 +597,8 @@ pub fn prepare_meshlet_view_bind_groups(
});
commands.entity(view_entity).insert(MeshletViewBindGroups {
culling,
write_index_buffer_first,
write_index_buffer_second,
culling_first,
culling_second,
downsample_depth,
visibility_buffer_raster,
copy_material_depth,
@ -631,13 +631,11 @@ pub struct MeshletGpuScene {
instance_material_ids: StorageBuffer<Vec<u32>>,
thread_instance_ids: StorageBuffer<Vec<u32>>,
thread_meshlet_ids: StorageBuffer<Vec<u32>>,
previous_cluster_ids: StorageBuffer<Vec<u32>>,
previous_cluster_id_starts: HashMap<(Entity, AssetId<MeshletMesh>), (u32, bool)>,
previous_occlusion_buffers: EntityHashMap<(Buffer, Buffer)>,
visibility_buffer_draw_index_buffer: Option<Buffer>,
second_pass_candidates_buffer: Option<Buffer>,
previous_depth_pyramids: EntityHashMap<TextureView>,
visibility_buffer_draw_triangle_buffer: Option<Buffer>,
culling_bind_group_layout: BindGroupLayout,
write_index_buffer_bind_group_layout: BindGroupLayout,
visibility_buffer_raster_bind_group_layout: BindGroupLayout,
downsample_depth_bind_group_layout: BindGroupLayout,
copy_material_depth_bind_group_layout: BindGroupLayout,
@ -687,14 +685,9 @@ impl FromWorld for MeshletGpuScene {
buffer.set_label(Some("meshlet_thread_meshlet_ids"));
buffer
},
previous_cluster_ids: {
let mut buffer = StorageBuffer::default();
buffer.set_label(Some("meshlet_previous_cluster_ids"));
buffer
},
previous_cluster_id_starts: HashMap::new(),
previous_occlusion_buffers: EntityHashMap::default(),
visibility_buffer_draw_index_buffer: None,
second_pass_candidates_buffer: None,
previous_depth_pyramids: EntityHashMap::default(),
visibility_buffer_draw_triangle_buffer: None,
// TODO: Buffer min sizes
culling_bind_group_layout: render_device.create_bind_group_layout(
@ -709,24 +702,11 @@ impl FromWorld for MeshletGpuScene {
storage_buffer_read_only_sized(false, None),
storage_buffer_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
uniform_buffer::<ViewUniform>(true),
storage_buffer_sized(false, None),
storage_buffer_sized(false, None),
texture_2d(TextureSampleType::Float { filterable: false }),
),
),
),
write_index_buffer_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_write_index_buffer_bind_group_layout",
&BindGroupLayoutEntries::sequential(
ShaderStages::COMPUTE,
(
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_sized(false, None),
storage_buffer_sized(false, None),
uniform_buffer::<ViewUniform>(true),
uniform_buffer::<PreviousViewData>(true),
),
),
),
@ -806,11 +786,7 @@ impl MeshletGpuScene {
self.instance_material_ids.get_mut().clear();
self.thread_instance_ids.get_mut().clear();
self.thread_meshlet_ids.get_mut().clear();
self.previous_cluster_ids.get_mut().clear();
self.previous_cluster_id_starts
.values_mut()
.for_each(|(_, active)| *active = false);
// TODO: Remove unused entries for previous_occlusion_buffers
// TODO: Remove unused entries for view_instance_visibility and previous_depth_pyramids
}
fn queue_meshlet_mesh_upload(
@ -872,33 +848,14 @@ impl MeshletGpuScene {
let meshlets_slice = (meshlets_slice.start as u32 / size_of::<Meshlet>() as u32)
..(meshlets_slice.end as u32 / size_of::<Meshlet>() as u32);
let current_cluster_id_start = self.scene_meshlet_count;
self.scene_meshlet_count += meshlets_slice.end - meshlets_slice.start;
self.scene_triangle_count += triangle_count;
// Calculate the previous cluster IDs for each meshlet for this instance
let previous_cluster_id_start = self
.previous_cluster_id_starts
.entry((instance, handle.id()))
.or_insert((0, true));
let previous_cluster_ids = if previous_cluster_id_start.1 {
0..(meshlets_slice.len() as u32)
} else {
let start = previous_cluster_id_start.0;
start..(meshlets_slice.len() as u32 + start)
};
// Append per-cluster data for this frame
self.thread_instance_ids
.get_mut()
.extend(std::iter::repeat(instance_index).take(meshlets_slice.len()));
self.thread_meshlet_ids.get_mut().extend(meshlets_slice);
self.previous_cluster_ids
.get_mut()
.extend(previous_cluster_ids);
*previous_cluster_id_start = (current_cluster_id_start, true);
}
/// Get the depth value for use with the material depth texture for a given [`Material`] asset.
@ -920,10 +877,6 @@ impl MeshletGpuScene {
self.culling_bind_group_layout.clone()
}
pub fn write_index_buffer_bind_group_layout(&self) -> BindGroupLayout {
self.write_index_buffer_bind_group_layout.clone()
}
pub fn downsample_depth_bind_group_layout(&self) -> BindGroupLayout {
self.downsample_depth_bind_group_layout.clone()
}
@ -944,25 +897,23 @@ impl MeshletGpuScene {
#[derive(Component)]
pub struct MeshletViewResources {
pub scene_meshlet_count: u32,
previous_occlusion_buffer: Buffer,
pub occlusion_buffer: Buffer,
pub occlusion_buffer_needs_clearing: bool,
pub instance_visibility: Buffer,
pub second_pass_candidates_buffer: Buffer,
instance_visibility: Buffer,
pub visibility_buffer: Option<CachedTexture>,
pub visibility_buffer_draw_indirect_args_first: Buffer,
pub visibility_buffer_draw_indirect_args_second: Buffer,
visibility_buffer_draw_index_buffer: Buffer,
pub depth_pyramid: CachedTexture,
visibility_buffer_draw_triangle_buffer: Buffer,
depth_pyramid_all_mips: TextureView,
pub depth_pyramid_mips: Box<[TextureView]>,
previous_depth_pyramid: TextureView,
pub material_depth_color: Option<CachedTexture>,
pub material_depth: Option<CachedTexture>,
}
#[derive(Component)]
pub struct MeshletViewBindGroups {
pub culling: BindGroup,
pub write_index_buffer_first: BindGroup,
pub write_index_buffer_second: BindGroup,
pub culling_first: BindGroup,
pub culling_second: BindGroup,
pub downsample_depth: Box<[BindGroup]>,
pub visibility_buffer_raster: BindGroup,
pub copy_material_depth: Option<BindGroup>,

View File

@ -14,7 +14,7 @@ use bevy_render::{
render_resource::*,
view::ExtractedView,
};
use bevy_utils::HashMap;
use bevy_utils::{HashMap, HashSet};
use std::hash::Hash;
/// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletMainOpaquePass3dNode`].
@ -137,7 +137,7 @@ pub fn prepare_material_meshlet_meshes_main_opaque_pass<M: Material>(
view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList);
for material_id in render_material_instances.values() {
for material_id in render_material_instances.values().collect::<HashSet<_>>() {
let Some(material) = render_materials.get(*material_id) else {
continue;
};
@ -262,7 +262,7 @@ pub fn prepare_material_meshlet_meshes_prepass<M: Material>(
view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList);
for material_id in render_material_instances.values() {
for material_id in render_material_instances.values().collect::<HashSet<_>>() {
let Some(material) = render_materials.get(*material_id) else {
continue;
};

View File

@ -2,6 +2,7 @@
#import bevy_pbr::mesh_types::Mesh
#import bevy_render::view::View
#import bevy_pbr::prepass_bindings::PreviousViewUniforms
struct PackedMeshletVertex {
a: vec4<f32>,
@ -56,11 +57,13 @@ struct DrawIndirectArgs {
@group(0) @binding(2) var<storage, read> meshlet_thread_instance_ids: array<u32>; // Per cluster (instance of a meshlet)
@group(0) @binding(3) var<storage, read> meshlet_instance_uniforms: array<Mesh>; // Per entity instance
@group(0) @binding(4) var<storage, read> meshlet_view_instance_visibility: array<u32>; // 1 bit per entity instance, packed as a bitmask
@group(0) @binding(5) var<storage, read_write> meshlet_occlusion: array<atomic<u32>>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask
@group(0) @binding(6) var<storage, read> meshlet_previous_cluster_ids: array<u32>; // Per cluster (instance of a meshlet)
@group(0) @binding(7) var<storage, read> meshlet_previous_occlusion: array<u32>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask
@group(0) @binding(8) var<uniform> view: View;
@group(0) @binding(9) var depth_pyramid: texture_2d<f32>; // Generated from the first raster pass (unused in the first pass but still bound)
@group(0) @binding(5) var<storage, read_write> meshlet_second_pass_candidates: array<atomic<u32>>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask
@group(0) @binding(6) var<storage, read> meshlets: array<Meshlet>; // Per asset meshlet
@group(0) @binding(7) var<storage, read_write> draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles
@group(0) @binding(8) var<storage, read_write> draw_triangle_buffer: array<u32>; // Single object shared between all workgroups/meshlets/triangles
@group(0) @binding(9) var depth_pyramid: texture_2d<f32>; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass
@group(0) @binding(10) var<uniform> view: View;
@group(0) @binding(11) var<uniform> previous_view: PreviousViewUniforms;
fn should_cull_instance(instance_id: u32) -> bool {
let bit_offset = instance_id % 32u;
@ -68,34 +71,10 @@ fn should_cull_instance(instance_id: u32) -> bool {
return bool(extractBits(packed_visibility, bit_offset, 1u));
}
fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool {
let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id];
let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u];
let bit_offset = previous_cluster_id % 32u;
return bool(extractBits(packed_occlusion, bit_offset, 1u));
}
#endif
#ifdef MESHLET_WRITE_INDEX_BUFFER_PASS
@group(0) @binding(0) var<storage, read> meshlet_occlusion: array<u32>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask
@group(0) @binding(1) var<storage, read> meshlet_thread_meshlet_ids: array<u32>; // Per cluster (instance of a meshlet)
@group(0) @binding(2) var<storage, read> meshlet_previous_cluster_ids: array<u32>; // Per cluster (instance of a meshlet)
@group(0) @binding(3) var<storage, read> meshlet_previous_occlusion: array<u32>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask
@group(0) @binding(4) var<storage, read> meshlets: array<Meshlet>; // Per asset meshlet
@group(0) @binding(5) var<storage, read_write> draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles
@group(0) @binding(6) var<storage, read_write> draw_index_buffer: array<u32>; // Single object shared between all workgroups/meshlets/triangles
fn get_meshlet_occlusion(cluster_id: u32) -> bool {
let packed_occlusion = meshlet_occlusion[cluster_id / 32u];
fn meshlet_is_second_pass_candidate(cluster_id: u32) -> bool {
let packed_candidates = meshlet_second_pass_candidates[cluster_id / 32u];
let bit_offset = cluster_id % 32u;
return bool(extractBits(packed_occlusion, bit_offset, 1u));
}
fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool {
let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id];
let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u];
let bit_offset = previous_cluster_id % 32u;
return bool(extractBits(packed_occlusion, bit_offset, 1u));
return bool(extractBits(packed_candidates, bit_offset, 1u));
}
#endif
@ -108,7 +87,7 @@ fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool {
@group(0) @binding(5) var<storage, read> meshlet_thread_instance_ids: array<u32>; // Per cluster (instance of a meshlet)
@group(0) @binding(6) var<storage, read> meshlet_instance_uniforms: array<Mesh>; // Per entity instance
@group(0) @binding(7) var<storage, read> meshlet_instance_material_ids: array<u32>; // Per entity instance
@group(0) @binding(8) var<storage, read> draw_index_buffer: array<u32>; // Single object shared between all workgroups/meshlets/triangles
@group(0) @binding(8) var<storage, read> draw_triangle_buffer: array<u32>; // Single object shared between all workgroups/meshlets/triangles
@group(0) @binding(9) var<uniform> view: View;
fn get_meshlet_index(index_id: u32) -> u32 {

View File

@ -50,7 +50,6 @@ use self::{
pipelines::{
MeshletPipelines, MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, MESHLET_CULLING_SHADER_HANDLE,
MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE,
MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE,
},
visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode,
};
@ -131,12 +130,6 @@ impl Plugin for MeshletPlugin {
"cull_meshlets.wgsl",
Shader::from_wgsl
);
load_internal_asset!(
app,
MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE,
"write_index_buffer.wgsl",
Shader::from_wgsl
);
load_internal_asset!(
app,
MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,

View File

@ -10,21 +10,17 @@ use bevy_ecs::{
use bevy_render::render_resource::*;
pub const MESHLET_CULLING_SHADER_HANDLE: Handle<Shader> = Handle::weak_from_u128(4325134235233421);
pub const MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE: Handle<Shader> =
Handle::weak_from_u128(5325134235233421);
pub const MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle<Shader> =
Handle::weak_from_u128(6325134235233421);
Handle::weak_from_u128(5325134235233421);
pub const MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE: Handle<Shader> =
Handle::weak_from_u128(7325134235233421);
Handle::weak_from_u128(6325134235233421);
pub const MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE: Handle<Shader> =
Handle::weak_from_u128(8325134235233421);
Handle::weak_from_u128(7325134235233421);
#[derive(Resource)]
pub struct MeshletPipelines {
cull_first: CachedComputePipelineId,
cull_second: CachedComputePipelineId,
write_index_buffer_first: CachedComputePipelineId,
write_index_buffer_second: CachedComputePipelineId,
downsample_depth: CachedRenderPipelineId,
visibility_buffer_raster: CachedRenderPipelineId,
visibility_buffer_raster_depth_only: CachedRenderPipelineId,
@ -36,7 +32,6 @@ impl FromWorld for MeshletPipelines {
fn from_world(world: &mut World) -> Self {
let gpu_scene = world.resource::<MeshletGpuScene>();
let cull_layout = gpu_scene.culling_bind_group_layout();
let write_index_buffer_layout = gpu_scene.write_index_buffer_bind_group_layout();
let downsample_depth_layout = gpu_scene.downsample_depth_bind_group_layout();
let visibility_buffer_layout = gpu_scene.visibility_buffer_raster_bind_group_layout();
let copy_material_depth_layout = gpu_scene.copy_material_depth_bind_group_layout();
@ -48,7 +43,10 @@ impl FromWorld for MeshletPipelines {
layout: vec![cull_layout.clone()],
push_constant_ranges: vec![],
shader: MESHLET_CULLING_SHADER_HANDLE,
shader_defs: vec!["MESHLET_CULLING_PASS".into()],
shader_defs: vec![
"MESHLET_CULLING_PASS".into(),
"MESHLET_FIRST_CULLING_PASS".into(),
],
entry_point: "cull_meshlets".into(),
}),
@ -64,31 +62,6 @@ impl FromWorld for MeshletPipelines {
entry_point: "cull_meshlets".into(),
}),
write_index_buffer_first: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_write_index_buffer_first_pipeline".into()),
layout: vec![write_index_buffer_layout.clone()],
push_constant_ranges: vec![],
shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE,
shader_defs: vec!["MESHLET_WRITE_INDEX_BUFFER_PASS".into()],
entry_point: "write_index_buffer".into(),
},
),
write_index_buffer_second: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_write_index_buffer_second_pipeline".into()),
layout: vec![write_index_buffer_layout],
push_constant_ranges: vec![],
shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE,
shader_defs: vec![
"MESHLET_WRITE_INDEX_BUFFER_PASS".into(),
"MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS".into(),
],
entry_point: "write_index_buffer".into(),
},
),
downsample_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor {
label: Some("meshlet_downsample_depth".into()),
layout: vec![downsample_depth_layout],
@ -197,7 +170,9 @@ impl FromWorld for MeshletPipelines {
visibility_buffer_raster_depth_only_clamp_ortho: pipeline_cache.queue_render_pipeline(
RenderPipelineDescriptor {
label: Some("visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into()),
label: Some(
"meshlet_visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into(),
),
layout: vec![visibility_buffer_layout],
push_constant_ranges: vec![],
vertex: VertexState {
@ -267,8 +242,6 @@ impl MeshletPipelines {
pub fn get(
world: &World,
) -> Option<(
&ComputePipeline,
&ComputePipeline,
&ComputePipeline,
&ComputePipeline,
&RenderPipeline,
@ -282,8 +255,6 @@ impl MeshletPipelines {
Some((
pipeline_cache.get_compute_pipeline(pipeline.cull_first)?,
pipeline_cache.get_compute_pipeline(pipeline.cull_second)?,
pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_first)?,
pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_second)?,
pipeline_cache.get_render_pipeline(pipeline.downsample_depth)?,
pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?,
pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?,

View File

@ -7,7 +7,7 @@
meshlet_thread_instance_ids,
meshlet_instance_uniforms,
meshlet_instance_material_ids,
draw_index_buffer,
draw_triangle_buffer,
view,
get_meshlet_index,
unpack_meshlet_vertex,
@ -38,9 +38,9 @@ struct FragmentOutput {
@vertex
fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
let packed_ids = draw_index_buffer[vertex_index / 3u];
let cluster_id = packed_ids >> 8u;
let triangle_id = extractBits(packed_ids, 0u, 8u);
let packed_ids = draw_triangle_buffer[vertex_index / 3u];
let cluster_id = packed_ids >> 6u;
let triangle_id = extractBits(packed_ids, 0u, 6u);
let index_id = (triangle_id * 3u) + (vertex_index % 3u);
let meshlet_id = meshlet_thread_meshlet_ids[cluster_id];
let meshlet = meshlets[meshlet_id];

View File

@ -2,7 +2,7 @@ use super::{
gpu_scene::{MeshletViewBindGroups, MeshletViewResources},
pipelines::MeshletPipelines,
};
use crate::{LightEntity, ShadowView, ViewLightEntities};
use crate::{LightEntity, PreviousViewUniformOffset, ShadowView, ViewLightEntities};
use bevy_color::LinearRgba;
use bevy_ecs::{
query::QueryState,
@ -22,6 +22,7 @@ pub struct MeshletVisibilityBufferRasterPassNode {
&'static ExtractedCamera,
&'static ViewDepthTexture,
&'static ViewUniformOffset,
&'static PreviousViewUniformOffset,
&'static MeshletViewBindGroups,
&'static MeshletViewResources,
&'static ViewLightEntities,
@ -30,6 +31,7 @@ pub struct MeshletVisibilityBufferRasterPassNode {
&'static ShadowView,
&'static LightEntity,
&'static ViewUniformOffset,
&'static PreviousViewUniformOffset,
&'static MeshletViewBindGroups,
&'static MeshletViewResources,
)>,
@ -60,6 +62,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
camera,
view_depth,
view_offset,
previous_view_offset,
meshlet_view_bind_groups,
meshlet_view_resources,
lights,
@ -71,8 +74,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
let Some((
culling_first_pipeline,
culling_second_pipeline,
write_index_buffer_first_pipeline,
write_index_buffer_second_pipeline,
downsample_depth_pipeline,
visibility_buffer_raster_pipeline,
visibility_buffer_raster_depth_only_pipeline,
@ -83,41 +84,27 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
return Ok(());
};
let culling_workgroups = meshlet_view_resources.scene_meshlet_count.div_ceil(128);
let write_index_buffer_workgroups = (meshlet_view_resources.scene_meshlet_count as f32)
let culling_workgroups = (meshlet_view_resources.scene_meshlet_count.div_ceil(128) as f32)
.cbrt()
.ceil() as u32;
render_context
.command_encoder()
.push_debug_group("meshlet_visibility_buffer_raster_pass");
if meshlet_view_resources.occlusion_buffer_needs_clearing {
render_context.command_encoder().clear_buffer(
&meshlet_view_resources.occlusion_buffer,
0,
None,
);
}
cull_pass(
"meshlet_culling_first_pass",
render_context,
meshlet_view_bind_groups,
view_offset,
culling_first_pipeline,
culling_workgroups,
);
write_index_buffer_pass(
"meshlet_write_index_buffer_first_pass",
render_context,
&meshlet_view_bind_groups.write_index_buffer_first,
write_index_buffer_first_pipeline,
write_index_buffer_workgroups,
);
.push_debug_group("meshlet_visibility_buffer_raster");
render_context.command_encoder().clear_buffer(
&meshlet_view_resources.occlusion_buffer,
&meshlet_view_resources.second_pass_candidates_buffer,
0,
None,
);
cull_pass(
"culling_first",
render_context,
&meshlet_view_bind_groups.culling_first,
view_offset,
previous_view_offset,
culling_first_pipeline,
culling_workgroups,
);
raster_pass(
true,
render_context,
@ -136,20 +123,14 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
downsample_depth_pipeline,
);
cull_pass(
"meshlet_culling_second_pass",
"culling_second",
render_context,
meshlet_view_bind_groups,
&meshlet_view_bind_groups.culling_second,
view_offset,
previous_view_offset,
culling_second_pipeline,
culling_workgroups,
);
write_index_buffer_pass(
"meshlet_write_index_buffer_second_pass",
render_context,
&meshlet_view_bind_groups.write_index_buffer_second,
write_index_buffer_second_pipeline,
write_index_buffer_workgroups,
);
raster_pass(
false,
render_context,
@ -168,6 +149,12 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
copy_material_depth_pipeline,
camera,
);
downsample_depth(
render_context,
meshlet_view_resources,
meshlet_view_bind_groups,
downsample_depth_pipeline,
);
render_context.command_encoder().pop_debug_group();
for light_entity in &lights.lights {
@ -175,6 +162,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
shadow_view,
light_type,
view_offset,
previous_view_offset,
meshlet_view_bind_groups,
meshlet_view_resources,
)) = self.view_light_query.get_manual(world, *light_entity)
@ -188,36 +176,23 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
};
render_context.command_encoder().push_debug_group(&format!(
"meshlet_visibility_buffer_raster_pass: {}",
"meshlet_visibility_buffer_raster: {}",
shadow_view.pass_name
));
if meshlet_view_resources.occlusion_buffer_needs_clearing {
render_context.command_encoder().clear_buffer(
&meshlet_view_resources.occlusion_buffer,
0,
None,
);
}
cull_pass(
"meshlet_culling_first_pass",
render_context,
meshlet_view_bind_groups,
view_offset,
culling_first_pipeline,
culling_workgroups,
);
write_index_buffer_pass(
"meshlet_write_index_buffer_first_pass",
render_context,
&meshlet_view_bind_groups.write_index_buffer_first,
write_index_buffer_first_pipeline,
write_index_buffer_workgroups,
);
render_context.command_encoder().clear_buffer(
&meshlet_view_resources.occlusion_buffer,
&meshlet_view_resources.second_pass_candidates_buffer,
0,
None,
);
cull_pass(
"culling_first",
render_context,
&meshlet_view_bind_groups.culling_first,
view_offset,
previous_view_offset,
culling_first_pipeline,
culling_workgroups,
);
raster_pass(
true,
render_context,
@ -236,20 +211,14 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
downsample_depth_pipeline,
);
cull_pass(
"meshlet_culling_second_pass",
"culling_second",
render_context,
meshlet_view_bind_groups,
&meshlet_view_bind_groups.culling_second,
view_offset,
previous_view_offset,
culling_second_pipeline,
culling_workgroups,
);
write_index_buffer_pass(
"meshlet_write_index_buffer_second_pass",
render_context,
&meshlet_view_bind_groups.write_index_buffer_second,
write_index_buffer_second_pipeline,
write_index_buffer_workgroups,
);
raster_pass(
false,
render_context,
@ -261,6 +230,12 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
shadow_visibility_buffer_pipeline,
None,
);
downsample_depth(
render_context,
meshlet_view_resources,
meshlet_view_bind_groups,
downsample_depth_pipeline,
);
render_context.command_encoder().pop_debug_group();
}
@ -271,8 +246,9 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
fn cull_pass(
label: &'static str,
render_context: &mut RenderContext,
meshlet_view_bind_groups: &MeshletViewBindGroups,
culling_bind_group: &BindGroup,
view_offset: &ViewUniformOffset,
previous_view_offset: &PreviousViewUniformOffset,
culling_pipeline: &ComputePipeline,
culling_workgroups: u32,
) {
@ -281,30 +257,13 @@ fn cull_pass(
label: Some(label),
timestamp_writes: None,
});
cull_pass.set_bind_group(0, &meshlet_view_bind_groups.culling, &[view_offset.offset]);
cull_pass.set_pipeline(culling_pipeline);
cull_pass.dispatch_workgroups(culling_workgroups, 1, 1);
}
fn write_index_buffer_pass(
label: &'static str,
render_context: &mut RenderContext,
write_index_buffer_bind_group: &BindGroup,
write_index_buffer_pipeline: &ComputePipeline,
write_index_buffer_workgroups: u32,
) {
let command_encoder = render_context.command_encoder();
let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
label: Some(label),
timestamp_writes: None,
});
cull_pass.set_bind_group(0, write_index_buffer_bind_group, &[]);
cull_pass.set_pipeline(write_index_buffer_pipeline);
cull_pass.dispatch_workgroups(
write_index_buffer_workgroups,
write_index_buffer_workgroups,
write_index_buffer_workgroups,
cull_pass.set_bind_group(
0,
culling_bind_group,
&[view_offset.offset, previous_view_offset.offset],
);
cull_pass.set_pipeline(culling_pipeline);
cull_pass.dispatch_workgroups(culling_workgroups, culling_workgroups, culling_workgroups);
}
#[allow(clippy::too_many_arguments)]
@ -351,9 +310,9 @@ fn raster_pass(
let mut draw_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some(if first_pass {
"meshlet_visibility_buffer_raster_first_pass"
"raster_first"
} else {
"meshlet_visibility_buffer_raster_second_pass"
"raster_second"
}),
color_attachments: if color_attachments_filled[0].is_none() {
&[]
@ -389,7 +348,7 @@ fn downsample_depth(
for i in 0..meshlet_view_resources.depth_pyramid_mips.len() {
let downsample_pass = RenderPassDescriptor {
label: Some("meshlet_downsample_depth_pass"),
label: Some("downsample_depth"),
color_attachments: &[Some(RenderPassColorAttachment {
view: &meshlet_view_resources.depth_pyramid_mips[i],
resolve_target: None,
@ -424,7 +383,7 @@ fn copy_material_depth_pass(
meshlet_view_bind_groups.copy_material_depth.as_ref(),
) {
let mut copy_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
label: Some("meshlet_copy_material_depth_pass"),
label: Some("copy_material_depth"),
color_attachments: &[],
depth_stencil_attachment: Some(RenderPassDepthStencilAttachment {
view: &material_depth.default_view,

View File

@ -96,10 +96,10 @@ struct VertexOutput {
/// Load the visibility buffer texture and resolve it into a VertexOutput.
fn resolve_vertex_output(frag_coord: vec4<f32>) -> VertexOutput {
let vbuffer = textureLoad(meshlet_visibility_buffer, vec2<i32>(frag_coord.xy), 0).r;
let cluster_id = vbuffer >> 8u;
let cluster_id = vbuffer >> 6u;
let meshlet_id = meshlet_thread_meshlet_ids[cluster_id];
let meshlet = meshlets[meshlet_id];
let triangle_id = extractBits(vbuffer, 0u, 8u);
let triangle_id = extractBits(vbuffer, 0u, 6u);
let index_ids = meshlet.start_index_id + vec3(triangle_id * 3u) + vec3(0u, 1u, 2u);
let indices = meshlet.start_vertex_id + vec3(get_meshlet_index(index_ids.x), get_meshlet_index(index_ids.y), get_meshlet_index(index_ids.z));
let vertex_ids = vec3(meshlet_vertex_ids[indices.x], meshlet_vertex_ids[indices.y], meshlet_vertex_ids[indices.z]);

View File

@ -1,43 +0,0 @@
#import bevy_pbr::meshlet_bindings::{
meshlet_thread_meshlet_ids,
meshlets,
draw_indirect_args,
draw_index_buffer,
get_meshlet_occlusion,
get_meshlet_previous_occlusion,
}
var<workgroup> draw_index_buffer_start_workgroup: u32;
/// This pass writes out a buffer of cluster + triangle IDs for the draw_indirect() call to rasterize each visible meshlet.
@compute
@workgroup_size(64, 1, 1) // 64 threads per workgroup, 1 workgroup per cluster, 1 thread per triangle
fn write_index_buffer(@builtin(workgroup_id) workgroup_id: vec3<u32>, @builtin(num_workgroups) num_workgroups: vec3<u32>, @builtin(local_invocation_index) triangle_id: u32) {
// Calculate the cluster ID for this workgroup
let cluster_id = dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u));
if cluster_id >= arrayLength(&meshlet_thread_meshlet_ids) { return; }
// If the meshlet was culled, then we don't need to draw it
if !get_meshlet_occlusion(cluster_id) { return; }
// If the meshlet was drawn in the first pass, and this is the second pass, then we don't need to draw it
#ifdef MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS
if get_meshlet_previous_occlusion(cluster_id) { return; }
#endif
let meshlet_id = meshlet_thread_meshlet_ids[cluster_id];
let meshlet = meshlets[meshlet_id];
// Reserve space in the buffer for this meshlet's triangles, and broadcast the start of that slice to all threads
if triangle_id == 0u {
draw_index_buffer_start_workgroup = atomicAdd(&draw_indirect_args.vertex_count, meshlet.triangle_count * 3u);
draw_index_buffer_start_workgroup /= 3u;
}
workgroupBarrier();
// Each thread writes one triangle of the meshlet to the buffer slice reserved for the meshlet
if triangle_id < meshlet.triangle_count {
draw_index_buffer[draw_index_buffer_start_workgroup + triangle_id] = (cluster_id << 8u) | triangle_id;
}
}

View File

@ -32,6 +32,7 @@ use bevy_utils::tracing::error;
#[cfg(feature = "meshlet")]
use crate::meshlet::{
prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, MeshletGpuScene,
MeshletMesh,
};
use crate::*;
@ -102,7 +103,7 @@ where
.init_resource::<PrepassViewBindGroup>()
.init_resource::<SpecializedMeshPipelines<PrepassPipeline<M>>>()
.allow_ambiguous_resource::<SpecializedMeshPipelines<PrepassPipeline<M>>>()
.init_resource::<PreviousViewProjectionUniforms>();
.init_resource::<PreviousViewUniforms>();
}
fn finish(&self, app: &mut App) {
@ -199,9 +200,14 @@ pub struct PreviousViewData {
pub view_proj: Mat4,
}
#[cfg(not(feature = "meshlet"))]
type PreviousViewFilter = (With<Camera3d>, With<MotionVectorPrepass>);
#[cfg(feature = "meshlet")]
type PreviousViewFilter = Or<(With<Camera3d>, With<ShadowView>)>;
pub fn update_previous_view_data(
mut commands: Commands,
query: Query<(Entity, &Camera, &GlobalTransform), (With<Camera3d>, With<MotionVectorPrepass>)>,
query: Query<(Entity, &Camera, &GlobalTransform), PreviousViewFilter>,
) {
for (entity, camera, camera_transform) in &query {
let inverse_view = camera_transform.compute_matrix().inverse();
@ -215,10 +221,15 @@ pub fn update_previous_view_data(
#[derive(Component)]
pub struct PreviousGlobalTransform(pub Affine3A);
#[cfg(not(feature = "meshlet"))]
type PreviousMeshFilter = With<Handle<Mesh>>;
#[cfg(feature = "meshlet")]
type PreviousMeshFilter = Or<(With<Handle<Mesh>>, With<Handle<MeshletMesh>>)>;
pub fn update_mesh_previous_global_transforms(
mut commands: Commands,
views: Query<&Camera, (With<Camera3d>, With<MotionVectorPrepass>)>,
meshes: Query<(Entity, &GlobalTransform), With<Handle<Mesh>>>,
views: Query<&Camera, PreviousViewFilter>,
meshes: Query<(Entity, &GlobalTransform), PreviousMeshFilter>,
) {
let should_run = views.iter().any(|camera| camera.is_active);
@ -603,7 +614,7 @@ pub fn extract_camera_previous_view_data(
}
#[derive(Resource, Default)]
pub struct PreviousViewProjectionUniforms {
pub struct PreviousViewUniforms {
pub uniforms: DynamicUniformBuffer<PreviousViewData>,
}
@ -616,13 +627,13 @@ pub fn prepare_previous_view_uniforms(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut view_uniforms: ResMut<PreviousViewProjectionUniforms>,
views: Query<(Entity, &ExtractedView, Option<&PreviousViewData>), With<MotionVectorPrepass>>,
mut previous_view_uniforms: ResMut<PreviousViewUniforms>,
views: Query<(Entity, &ExtractedView, Option<&PreviousViewData>), PreviousViewFilter>,
) {
let views_iter = views.iter();
let view_count = views_iter.len();
let Some(mut writer) =
view_uniforms
previous_view_uniforms
.uniforms
.get_writer(view_count, &render_device, &render_queue)
else {
@ -658,7 +669,7 @@ pub fn prepare_prepass_view_bind_group<M: Material>(
prepass_pipeline: Res<PrepassPipeline<M>>,
view_uniforms: Res<ViewUniforms>,
globals_buffer: Res<GlobalsBuffer>,
previous_view_uniforms: Res<PreviousViewProjectionUniforms>,
previous_view_uniforms: Res<PreviousViewUniforms>,
mut prepass_view_bind_group: ResMut<PrepassViewBindGroup>,
) {
if let (Some(view_binding), Some(globals_binding)) = (
@ -905,6 +916,7 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetPrepassViewBindGroup<
type Param = SRes<PrepassViewBindGroup>;
type ViewQuery = (
Read<ViewUniformOffset>,
Has<MotionVectorPrepass>,
Option<Read<PreviousViewUniformOffset>>,
);
type ItemQuery = ();
@ -912,8 +924,9 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetPrepassViewBindGroup<
#[inline]
fn render<'w>(
_item: &P,
(view_uniform_offset, previous_view_uniform_offset): (
(view_uniform_offset, has_motion_vector_prepass, previous_view_uniform_offset): (
&'_ ViewUniformOffset,
bool,
Option<&'_ PreviousViewUniformOffset>,
),
_entity: Option<()>,
@ -922,21 +935,24 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetPrepassViewBindGroup<
) -> RenderCommandResult {
let prepass_view_bind_group = prepass_view_bind_group.into_inner();
if let Some(previous_view_uniform_offset) = previous_view_uniform_offset {
pass.set_bind_group(
I,
prepass_view_bind_group.motion_vectors.as_ref().unwrap(),
&[
view_uniform_offset.offset,
previous_view_uniform_offset.offset,
],
);
} else {
pass.set_bind_group(
I,
prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(),
&[view_uniform_offset.offset],
);
match previous_view_uniform_offset {
Some(previous_view_uniform_offset) if has_motion_vector_prepass => {
pass.set_bind_group(
I,
prepass_view_bind_group.motion_vectors.as_ref().unwrap(),
&[
view_uniform_offset.offset,
previous_view_uniform_offset.offset,
],
);
}
_ => {
pass.set_bind_group(
I,
prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(),
&[view_uniform_offset.offset],
);
}
}
RenderCommandResult::Success

View File

@ -20,7 +20,7 @@ const ASSET_URL: &str = "https://github.com/JMS55/bevy_meshlet_asset/blob/bd8698
fn main() -> ExitCode {
if !Path::new("./assets/models/bunny.meshlet_mesh").exists() {
println!("ERROR: Asset at path <bevy>/assets/models/bunny.meshlet_mesh is missing. Please download it from {ASSET_URL}");
eprintln!("ERROR: Asset at path <bevy>/assets/models/bunny.meshlet_mesh is missing. Please download it from {ASSET_URL}");
return ExitCode::FAILURE;
}