bevy/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl
atlv 57e58ef997
Meshlet BVH Culling (#19318)
# Objective

- Merge @SparkyPotato 's efforts to implement BVH-accelerated meshlet
culling.

## Solution

- Add hot reloading support
- Fix near-plane overculling
- Fix hzb sampling
- Fix orthographic error metric

## Testing

- Meshlet example, Nsight, hot-reloading and careful thinking

---------

Co-authored-by: SparkyPotato <noob.sparkypotato@gmail.com>
Co-authored-by: JMS55 <47158642+JMS55@users.noreply.github.com>
Co-authored-by: charlotte <charlotte.c.mcelwain@gmail.com>
2025-06-29 00:04:21 +00:00

94 lines
3.6 KiB
WebGPU Shading Language

#import bevy_pbr::meshlet_bindings::{
InstancedOffset,
get_aabb,
get_aabb_error,
constants,
view,
meshlet_instance_uniforms,
meshlet_cull_data,
meshlet_software_raster_indirect_args,
meshlet_hardware_raster_indirect_args,
meshlet_previous_raster_counts,
meshlet_raster_clusters,
meshlet_meshlet_cull_count_read,
meshlet_meshlet_cull_count_write,
meshlet_meshlet_cull_dispatch,
meshlet_meshlet_cull_queue,
}
#import bevy_pbr::meshlet_cull_shared::{
ScreenAabb,
project_aabb,
lod_error_is_imperceptible,
aabb_in_frustum,
should_occlusion_cull_aabb,
}
#import bevy_render::maths::affine3_to_square
@compute
@workgroup_size(128, 1, 1) // 1 cluster per thread
fn cull_clusters(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
if global_invocation_id.x >= meshlet_meshlet_cull_count_read { return; }
#ifdef MESHLET_FIRST_CULLING_PASS
let meshlet_id = global_invocation_id.x;
#else
let meshlet_id = constants.rightmost_slot - global_invocation_id.x;
#endif
let instanced_offset = meshlet_meshlet_cull_queue[meshlet_id];
let instance_id = instanced_offset.instance_id;
let cull_data = &meshlet_cull_data[instanced_offset.offset];
var aabb_error_offset = (*cull_data).aabb;
let aabb = get_aabb(&aabb_error_offset);
let error = get_aabb_error(&aabb_error_offset);
let lod_sphere = (*cull_data).lod_group_sphere;
let is_imperceptible = lod_error_is_imperceptible(lod_sphere, error, instance_id);
// Error and frustum cull, in both passes
if !is_imperceptible || !aabb_in_frustum(aabb, instance_id) { return; }
// If we pass, try occlusion culling
// If this node was occluded, push it's children to the second pass to check against this frame's HZB
if should_occlusion_cull_aabb(aabb, instance_id) {
#ifdef MESHLET_FIRST_CULLING_PASS
let id = atomicAdd(&meshlet_meshlet_cull_count_write, 1u);
let value = InstancedOffset(instance_id, instanced_offset.offset);
meshlet_meshlet_cull_queue[constants.rightmost_slot - id] = value;
if ((id & 127u) == 0) {
atomicAdd(&meshlet_meshlet_cull_dispatch.x, 1u);
}
#endif
return;
}
// If we pass, rasterize the meshlet
// Check how big the cluster is in screen space
let world_from_local = affine3_to_square(meshlet_instance_uniforms[instance_id].world_from_local);
let clip_from_local = view.clip_from_world * world_from_local;
let projection = view.clip_from_world;
var near: f32;
if projection[3][3] == 1.0 {
near = projection[3][2] / projection[2][2];
} else {
near = projection[3][2];
}
var screen_aabb = ScreenAabb(vec3<f32>(0.0), vec3<f32>(0.0));
var sw_raster = project_aabb(clip_from_local, near, aabb, &screen_aabb);
if sw_raster {
let aabb_size = (screen_aabb.max.xy - screen_aabb.min.xy) * view.viewport.zw;
sw_raster = all(aabb_size <= vec2<f32>(64.0));
}
var buffer_slot: u32;
if sw_raster {
// Append this cluster to the list for software rasterization
buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u);
buffer_slot += meshlet_previous_raster_counts[0];
} else {
// Append this cluster to the list for hardware rasterization
buffer_slot = atomicAdd(&meshlet_hardware_raster_indirect_args.instance_count, 1u);
buffer_slot += meshlet_previous_raster_counts[1];
buffer_slot = constants.rightmost_slot - buffer_slot;
}
meshlet_raster_clusters[buffer_slot] = InstancedOffset(instance_id, instanced_offset.offset);
}