From e1a0da0fa65c3e06b38f64f23023ca8f79ca140e Mon Sep 17 00:00:00 2001 From: JMS55 <47158642+JMS55@users.noreply.github.com> Date: Sat, 27 Apr 2024 22:30:20 -0700 Subject: [PATCH] Meshlet LOD-compatible two-pass occlusion culling (#12898) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keeping track of explicit visibility per cluster between frames does not work with LODs, and leads to worse culling (using the final depth buffer from the previous frame is more accurate). Instead, we need to generate a second depth pyramid after the second raster pass, and then use that in the first culling pass in the next frame to test if a cluster would have been visible last frame or not. As part of these changes, the write_index_buffer pass has been folded into the culling pass for a large performance gain, and to avoid tracking a lot of extra state that would be needed between passes. Prepass previous model/view stuff was adapted to work with meshlets as well. Also fixed a bug with materials, and other misc improvements. --------- Co-authored-by: François Co-authored-by: atlas dostal Co-authored-by: vero Co-authored-by: Patrick Walton Co-authored-by: Robert Swain --- .../bevy_pbr/src/meshlet/cull_meshlets.wgsl | 149 +++++---- crates/bevy_pbr/src/meshlet/gpu_scene.rs | 289 ++++++++---------- .../src/meshlet/material_draw_prepare.rs | 6 +- .../src/meshlet/meshlet_bindings.wgsl | 45 +-- crates/bevy_pbr/src/meshlet/mod.rs | 7 - crates/bevy_pbr/src/meshlet/pipelines.rs | 49 +-- .../src/meshlet/visibility_buffer_raster.wgsl | 8 +- .../meshlet/visibility_buffer_raster_node.rs | 157 ++++------ .../meshlet/visibility_buffer_resolve.wgsl | 4 +- .../src/meshlet/write_index_buffer.wgsl | 43 --- crates/bevy_pbr/src/prepass/mod.rs | 66 ++-- examples/3d/meshlet.rs | 2 +- 12 files changed, 337 insertions(+), 488 deletions(-) delete mode 100644 crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl diff --git a/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl b/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl index d2edeccd5f..2e04f3332b 100644 --- a/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl +++ b/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl @@ -3,36 +3,61 @@ meshlet_bounding_spheres, meshlet_thread_instance_ids, meshlet_instance_uniforms, - meshlet_occlusion, + meshlet_second_pass_candidates, + depth_pyramid, view, + previous_view, should_cull_instance, - get_meshlet_previous_occlusion, + meshlet_is_second_pass_candidate, + meshlets, + draw_indirect_args, + draw_triangle_buffer, } -#ifdef MESHLET_SECOND_CULLING_PASS -#import bevy_pbr::meshlet_bindings::depth_pyramid -#endif #import bevy_render::maths::affine3_to_square /// Culls individual clusters (1 per thread) in two passes (two pass occlusion culling), and outputs a bitmask of which clusters survived. -/// 1. The first pass is only frustum culling, on only the clusters that were visible last frame. -/// 2. The second pass performs both frustum and occlusion culling (using the depth buffer generated from the first pass), on all clusters. +/// 1. The first pass tests instance visibility, frustum culling, LOD selection, and finally occlusion culling using last frame's depth pyramid. +/// 2. The second pass performs occlusion culling (using the depth buffer generated from the first pass) on all clusters that passed +/// the instance, frustum, and LOD tests in the first pass, but were not visible last frame according to the occlusion culling. @compute @workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 instanced meshlet per thread -fn cull_meshlets(@builtin(global_invocation_id) cluster_id: vec3) { - // Fetch the instance data and check for instance culling - if cluster_id.x >= arrayLength(&meshlet_thread_meshlet_ids) { return; } - let instance_id = meshlet_thread_instance_ids[cluster_id.x]; - if should_cull_instance(instance_id) { - return; - } +fn cull_meshlets( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(num_workgroups) num_workgroups: vec3, + @builtin(local_invocation_id) local_invocation_id: vec3, +) { + // Calculate the cluster ID for this thread + let cluster_id = local_invocation_id.x + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); + if cluster_id >= arrayLength(&meshlet_thread_meshlet_ids) { return; } - // Fetch other meshlet data - let meshlet_id = meshlet_thread_meshlet_ids[cluster_id.x]; +#ifdef MESHLET_SECOND_CULLING_PASS + if !meshlet_is_second_pass_candidate(cluster_id) { return; } +#endif + + // Check for instance culling + let instance_id = meshlet_thread_instance_ids[cluster_id]; +#ifdef MESHLET_FIRST_CULLING_PASS + if should_cull_instance(instance_id) { return; } +#endif + + // Calculate world-space culling bounding sphere for the cluster let instance_uniform = meshlet_instance_uniforms[instance_id]; + let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; let model = affine3_to_square(instance_uniform.model); let model_scale = max(length(model[0]), max(length(model[1]), length(model[2]))); let bounding_spheres = meshlet_bounding_spheres[meshlet_id]; + var culling_bounding_sphere_center = model * vec4(bounding_spheres.self_culling.center, 1.0); + var culling_bounding_sphere_radius = model_scale * bounding_spheres.self_culling.radius; + +#ifdef MESHLET_FIRST_CULLING_PASS + // Frustum culling + // TODO: Faster method from https://vkguide.dev/docs/gpudriven/compute_culling/#frustum-culling-function + for (var i = 0u; i < 6u; i++) { + if dot(view.frustum[i], culling_bounding_sphere_center) + culling_bounding_sphere_radius <= 0.0 { + return; + } + } // Calculate view-space LOD bounding sphere for the meshlet let lod_bounding_sphere_center = model * vec4(bounding_spheres.self_lod.center, 1.0); @@ -48,61 +73,59 @@ fn cull_meshlets(@builtin(global_invocation_id) cluster_id: vec3) { let lod_is_ok = lod_error_is_imperceptible(lod_bounding_sphere_center_view_space, lod_bounding_sphere_radius); let parent_lod_is_ok = lod_error_is_imperceptible(parent_lod_bounding_sphere_center_view_space, parent_lod_bounding_sphere_radius); if !lod_is_ok || parent_lod_is_ok { return; } - - // In the first pass, operate only on the clusters visible last frame. In the second pass, operate on all clusters. -#ifdef MESHLET_SECOND_CULLING_PASS - var meshlet_visible = true; -#else - var meshlet_visible = get_meshlet_previous_occlusion(cluster_id.x); - if !meshlet_visible { return; } #endif - // Calculate world-space culling bounding sphere for the cluster - let culling_bounding_sphere_center = model * vec4(bounding_spheres.self_culling.center, 1.0); - let culling_bounding_sphere_radius = model_scale * bounding_spheres.self_culling.radius; + // Project the culling bounding sphere to view-space for occlusion culling +#ifdef MESHLET_FIRST_CULLING_PASS + let previous_model = affine3_to_square(instance_uniform.previous_model); + let previous_model_scale = max(length(previous_model[0]), max(length(previous_model[1]), length(previous_model[2]))); + culling_bounding_sphere_center = previous_model * vec4(bounding_spheres.self_culling.center, 1.0); + culling_bounding_sphere_radius = previous_model_scale * bounding_spheres.self_culling.radius; +#endif + let culling_bounding_sphere_center_view_space = (view.inverse_view * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; - // Frustum culling - // TODO: Faster method from https://vkguide.dev/docs/gpudriven/compute_culling/#frustum-culling-function - for (var i = 0u; i < 6u; i++) { - if !meshlet_visible { break; } - meshlet_visible &= dot(view.frustum[i], culling_bounding_sphere_center) > -culling_bounding_sphere_radius; + let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); + // Halve the view-space AABB size as the depth pyramid is half the view size + let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)) * 0.5; + let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; + let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; + let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32 + let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); + let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); + + let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x; + let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x; + let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x; + let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x; + let occluder_depth = min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d)); + + // Check whether or not the cluster would be occluded if drawn + var meshlet_visible: bool; + if view.projection[3][3] == 1.0 { + // Orthographic + let sphere_depth = view.projection[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.projection[2][2]; + meshlet_visible = sphere_depth >= occluder_depth; + } else { + // Perspective + let sphere_depth = -view.projection[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius); + meshlet_visible = sphere_depth >= occluder_depth; } -#ifdef MESHLET_SECOND_CULLING_PASS - // In the second culling pass, cull against the depth pyramid generated from the first pass + // Write if the cluster should be occlusion tested in the second pass +#ifdef MESHLET_FIRST_CULLING_PASS + let second_pass_candidate = u32(!meshlet_visible) << cluster_id % 32u; + atomicOr(&meshlet_second_pass_candidates[cluster_id / 32u], second_pass_candidate); +#endif + + // Append a list of this cluster's triangles to draw if not culled if meshlet_visible { - let culling_bounding_sphere_center_view_space = (view.inverse_view * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; - let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); - - // Halve the AABB size because the first depth mip resampling pass cut the full screen resolution into a power of two conservatively - let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)) * 0.5; - let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; - let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; - let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32 - let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); - let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); - - let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x; - let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x; - let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x; - let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x; - - let occluder_depth = min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d)); - if view.projection[3][3] == 1.0 { - // Orthographic - let sphere_depth = view.projection[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.projection[2][2]; - meshlet_visible &= sphere_depth >= occluder_depth; - } else { - // Perspective - let sphere_depth = -view.projection[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius); - meshlet_visible &= sphere_depth >= occluder_depth; + let meshlet_triangle_count = meshlets[meshlet_id].triangle_count; + let buffer_start = atomicAdd(&draw_indirect_args.vertex_count, meshlet_triangle_count * 3u) / 3u; + let cluster_id_packed = cluster_id << 6u; + for (var triangle_id = 0u; triangle_id < meshlet_triangle_count; triangle_id++) { + draw_triangle_buffer[buffer_start + triangle_id] = cluster_id_packed | triangle_id; } } -#endif - - // Write the bitmask of whether or not the cluster was culled - let occlusion_bit = u32(meshlet_visible) << (cluster_id.x % 32u); - atomicOr(&meshlet_occlusion[cluster_id.x / 32u], occlusion_bit); } // https://stackoverflow.com/questions/21648630/radius-of-projected-sphere-in-screen-space/21649403#21649403 diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index e297dc91fd..4cffa6da71 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -4,7 +4,8 @@ use super::{ }; use crate::{ Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver, - PreviousGlobalTransform, RenderMaterialInstances, ShadowView, + PreviousGlobalTransform, PreviousViewData, PreviousViewUniforms, RenderMaterialInstances, + ShadowView, }; use bevy_asset::{AssetEvent, AssetId, AssetServer, Assets, Handle, UntypedAssetId}; use bevy_core_pipeline::core_3d::Camera3d; @@ -13,7 +14,7 @@ use bevy_ecs::{ entity::{Entity, EntityHashMap}, event::EventReader, query::{AnyOf, Has}, - system::{Commands, Query, Res, ResMut, Resource, SystemState}, + system::{Commands, Local, Query, Res, ResMut, Resource, SystemState}, world::{FromWorld, World}, }; use bevy_render::{ @@ -37,24 +38,33 @@ use std::{ /// [`MeshletMesh`] entities, as well as queuing uploads for any new meshlet mesh /// assets that have not already been uploaded to the GPU. pub fn extract_meshlet_meshes( - // TODO: Replace main_world when Extract>> is possible - mut main_world: ResMut, mut gpu_scene: ResMut, + // TODO: Replace main_world and system_state when Extract>> is possible + mut main_world: ResMut, + mut system_state: Local< + Option< + SystemState<( + Query<( + Entity, + &Handle, + &GlobalTransform, + Option<&PreviousGlobalTransform>, + Option<&RenderLayers>, + Has, + Has, + )>, + Res, + ResMut>, + EventReader>, + )>, + >, + >, ) { - let mut system_state: SystemState<( - Query<( - Entity, - &Handle, - &GlobalTransform, - Option<&PreviousGlobalTransform>, - Option<&RenderLayers>, - Has, - Has, - )>, - Res, - ResMut>, - EventReader>, - )> = SystemState::new(&mut main_world); + if system_state.is_none() { + *system_state = Some(SystemState::new(&mut main_world)); + } + let system_state = system_state.as_mut().unwrap(); + let (instances_query, asset_server, mut assets, mut asset_events) = system_state.get_mut(&mut main_world); @@ -210,10 +220,6 @@ pub fn prepare_meshlet_per_frame_resources( render_device: Res, mut commands: Commands, ) { - gpu_scene - .previous_cluster_id_starts - .retain(|_, (_, active)| *active); - if gpu_scene.scene_meshlet_count == 0 { return; } @@ -238,21 +244,19 @@ pub fn prepare_meshlet_per_frame_resources( &render_device, &render_queue, ); - upload_storage_buffer( - &mut gpu_scene.previous_cluster_ids, - &render_device, - &render_queue, - ); + + // Early submission for GPU data uploads to start while the render graph records commands + render_queue.submit([]); let needed_buffer_size = 4 * gpu_scene.scene_triangle_count; - let visibility_buffer_draw_index_buffer = - match &mut gpu_scene.visibility_buffer_draw_index_buffer { + let visibility_buffer_draw_triangle_buffer = + match &mut gpu_scene.visibility_buffer_draw_triangle_buffer { Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), slot => { let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_visibility_buffer_draw_index_buffer"), + label: Some("meshlet_visibility_buffer_draw_triangle_buffer"), size: needed_buffer_size, - usage: BufferUsages::STORAGE | BufferUsages::INDEX, + usage: BufferUsages::STORAGE, mapped_at_creation: false, }); *slot = Some(buffer.clone()); @@ -260,7 +264,8 @@ pub fn prepare_meshlet_per_frame_resources( } }; - let needed_buffer_size = gpu_scene.scene_meshlet_count.div_ceil(32) as u64 * 4; + let needed_buffer_size = + gpu_scene.scene_meshlet_count.div_ceil(u32::BITS) as u64 * size_of::() as u64; for (view_entity, view, render_layers, (_, shadow_view)) in &views { let instance_visibility = gpu_scene .view_instance_visibility @@ -290,29 +295,19 @@ pub fn prepare_meshlet_per_frame_resources( upload_storage_buffer(instance_visibility, &render_device, &render_queue); let instance_visibility = instance_visibility.buffer().unwrap().clone(); - // Early submission for GPU data uploads to start while the render graph records commands - render_queue.submit([]); - - let create_occlusion_buffer = || { - render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_occlusion_buffer"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, - mapped_at_creation: false, - }) + let second_pass_candidates_buffer = match &mut gpu_scene.second_pass_candidates_buffer { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_second_pass_candidates"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } }; - let (previous_occlusion_buffer, occlusion_buffer, occlusion_buffer_needs_clearing) = - match gpu_scene.previous_occlusion_buffers.get(&view_entity) { - Some((buffer_a, buffer_b)) if buffer_b.size() >= needed_buffer_size => { - (buffer_a.clone(), buffer_b.clone(), true) - } - Some((buffer_a, _)) => (buffer_a.clone(), create_occlusion_buffer(), false), - None => (create_occlusion_buffer(), create_occlusion_buffer(), false), - }; - gpu_scene.previous_occlusion_buffers.insert( - view_entity, - (occlusion_buffer.clone(), previous_occlusion_buffer.clone()), - ); let visibility_buffer = TextureDescriptor { label: Some("meshlet_visibility_buffer"), @@ -354,19 +349,19 @@ pub fn prepare_meshlet_per_frame_resources( usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, }); - let depth_size = Extent3d { + let depth_pyramid_size = Extent3d { // Round down to the nearest power of 2 to ensure depth is conservative width: previous_power_of_2(view.viewport.z), height: previous_power_of_2(view.viewport.w), depth_or_array_layers: 1, }; - let depth_mip_count = depth_size.width.max(depth_size.height).ilog2() + 1; + let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); let depth_pyramid = texture_cache.get( &render_device, TextureDescriptor { label: Some("meshlet_depth_pyramid"), - size: depth_size, - mip_level_count: depth_mip_count, + size: depth_pyramid_size, + mip_level_count: depth_pyramid_mip_count, sample_count: 1, dimension: TextureDimension::D2, format: TextureFormat::R32Float, @@ -374,7 +369,7 @@ pub fn prepare_meshlet_per_frame_resources( view_formats: &[], }, ); - let depth_pyramid_mips = (0..depth_mip_count) + let depth_pyramid_mips = (0..depth_pyramid_mip_count) .map(|i| { depth_pyramid.texture.create_view(&TextureViewDescriptor { label: Some("meshlet_depth_pyramid_texture_view"), @@ -384,10 +379,19 @@ pub fn prepare_meshlet_per_frame_resources( base_mip_level: i, mip_level_count: Some(1), base_array_layer: 0, - array_layer_count: None, + array_layer_count: Some(1), }) }) - .collect(); + .collect::>(); + let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); + + let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) { + Some(texture_view) => texture_view.clone(), + None => depth_pyramid_all_mips.clone(), + }; + gpu_scene + .previous_depth_pyramids + .insert(view_entity, depth_pyramid_all_mips.clone()); let material_depth_color = TextureDescriptor { label: Some("meshlet_material_depth_color"), @@ -422,17 +426,16 @@ pub fn prepare_meshlet_per_frame_resources( let not_shadow_view = shadow_view.is_none(); commands.entity(view_entity).insert(MeshletViewResources { scene_meshlet_count: gpu_scene.scene_meshlet_count, - previous_occlusion_buffer, - occlusion_buffer, - occlusion_buffer_needs_clearing, + second_pass_candidates_buffer, instance_visibility, visibility_buffer: not_shadow_view .then(|| texture_cache.get(&render_device, visibility_buffer)), visibility_buffer_draw_indirect_args_first, visibility_buffer_draw_indirect_args_second, - visibility_buffer_draw_index_buffer: visibility_buffer_draw_index_buffer.clone(), - depth_pyramid, + visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(), + depth_pyramid_all_mips, depth_pyramid_mips, + previous_depth_pyramid, material_depth_color: not_shadow_view .then(|| texture_cache.get(&render_device, material_depth_color)), material_depth: not_shadow_view @@ -449,10 +452,14 @@ pub fn prepare_meshlet_view_bind_groups( AnyOf<(&ViewDepthTexture, &ShadowView)>, )>, view_uniforms: Res, + previous_view_uniforms: Res, render_device: Res, mut commands: Commands, ) { - let Some(view_uniforms) = view_uniforms.uniforms.binding() else { + let (Some(view_uniforms), Some(previous_view_uniforms)) = ( + view_uniforms.uniforms.binding(), + previous_view_uniforms.uniforms.binding(), + ) else { return; }; @@ -462,56 +469,50 @@ pub fn prepare_meshlet_view_bind_groups( gpu_scene.meshlet_bounding_spheres.binding(), gpu_scene.thread_instance_ids.binding().unwrap(), gpu_scene.instance_uniforms.binding().unwrap(), - gpu_scene.view_instance_visibility[&view_entity] - .binding() - .unwrap(), - view_resources.occlusion_buffer.as_entire_binding(), - gpu_scene.previous_cluster_ids.binding().unwrap(), - view_resources.previous_occlusion_buffer.as_entire_binding(), - view_uniforms.clone(), - &view_resources.depth_pyramid.default_view, - )); - let culling = render_device.create_bind_group( - "meshlet_culling_bind_group", - &gpu_scene.culling_bind_group_layout, - &entries, - ); - - let entries = BindGroupEntries::sequential(( - view_resources.occlusion_buffer.as_entire_binding(), - gpu_scene.thread_meshlet_ids.binding().unwrap(), - gpu_scene.previous_cluster_ids.binding().unwrap(), - view_resources.previous_occlusion_buffer.as_entire_binding(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), gpu_scene.meshlets.binding(), view_resources .visibility_buffer_draw_indirect_args_first .as_entire_binding(), view_resources - .visibility_buffer_draw_index_buffer + .visibility_buffer_draw_triangle_buffer .as_entire_binding(), + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), )); - let write_index_buffer_first = render_device.create_bind_group( - "meshlet_write_index_buffer_first_bind_group", - &gpu_scene.write_index_buffer_bind_group_layout, + let culling_first = render_device.create_bind_group( + "meshlet_culling_first_bind_group", + &gpu_scene.culling_bind_group_layout, &entries, ); let entries = BindGroupEntries::sequential(( - view_resources.occlusion_buffer.as_entire_binding(), gpu_scene.thread_meshlet_ids.binding().unwrap(), - gpu_scene.previous_cluster_ids.binding().unwrap(), - view_resources.previous_occlusion_buffer.as_entire_binding(), + gpu_scene.meshlet_bounding_spheres.binding(), + gpu_scene.thread_instance_ids.binding().unwrap(), + gpu_scene.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), gpu_scene.meshlets.binding(), view_resources .visibility_buffer_draw_indirect_args_second .as_entire_binding(), view_resources - .visibility_buffer_draw_index_buffer + .visibility_buffer_draw_triangle_buffer .as_entire_binding(), + &view_resources.depth_pyramid_all_mips, + view_uniforms.clone(), + previous_view_uniforms.clone(), )); - let write_index_buffer_second = render_device.create_bind_group( - "meshlet_write_index_buffer_second_bind_group", - &gpu_scene.write_index_buffer_bind_group_layout, + let culling_second = render_device.create_bind_group( + "meshlet_culling_second_bind_group", + &gpu_scene.culling_bind_group_layout, &entries, ); @@ -547,7 +548,7 @@ pub fn prepare_meshlet_view_bind_groups( gpu_scene.instance_uniforms.binding().unwrap(), gpu_scene.instance_material_ids.binding().unwrap(), view_resources - .visibility_buffer_draw_index_buffer + .visibility_buffer_draw_triangle_buffer .as_entire_binding(), view_uniforms.clone(), )); @@ -596,9 +597,8 @@ pub fn prepare_meshlet_view_bind_groups( }); commands.entity(view_entity).insert(MeshletViewBindGroups { - culling, - write_index_buffer_first, - write_index_buffer_second, + culling_first, + culling_second, downsample_depth, visibility_buffer_raster, copy_material_depth, @@ -631,13 +631,11 @@ pub struct MeshletGpuScene { instance_material_ids: StorageBuffer>, thread_instance_ids: StorageBuffer>, thread_meshlet_ids: StorageBuffer>, - previous_cluster_ids: StorageBuffer>, - previous_cluster_id_starts: HashMap<(Entity, AssetId), (u32, bool)>, - previous_occlusion_buffers: EntityHashMap<(Buffer, Buffer)>, - visibility_buffer_draw_index_buffer: Option, + second_pass_candidates_buffer: Option, + previous_depth_pyramids: EntityHashMap, + visibility_buffer_draw_triangle_buffer: Option, culling_bind_group_layout: BindGroupLayout, - write_index_buffer_bind_group_layout: BindGroupLayout, visibility_buffer_raster_bind_group_layout: BindGroupLayout, downsample_depth_bind_group_layout: BindGroupLayout, copy_material_depth_bind_group_layout: BindGroupLayout, @@ -687,14 +685,9 @@ impl FromWorld for MeshletGpuScene { buffer.set_label(Some("meshlet_thread_meshlet_ids")); buffer }, - previous_cluster_ids: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_previous_cluster_ids")); - buffer - }, - previous_cluster_id_starts: HashMap::new(), - previous_occlusion_buffers: EntityHashMap::default(), - visibility_buffer_draw_index_buffer: None, + second_pass_candidates_buffer: None, + previous_depth_pyramids: EntityHashMap::default(), + visibility_buffer_draw_triangle_buffer: None, // TODO: Buffer min sizes culling_bind_group_layout: render_device.create_bind_group_layout( @@ -709,24 +702,11 @@ impl FromWorld for MeshletGpuScene { storage_buffer_read_only_sized(false, None), storage_buffer_sized(false, None), storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - uniform_buffer::(true), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), texture_2d(TextureSampleType::Float { filterable: false }), - ), - ), - ), - write_index_buffer_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_write_index_buffer_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::COMPUTE, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_sized(false, None), + uniform_buffer::(true), + uniform_buffer::(true), ), ), ), @@ -806,11 +786,7 @@ impl MeshletGpuScene { self.instance_material_ids.get_mut().clear(); self.thread_instance_ids.get_mut().clear(); self.thread_meshlet_ids.get_mut().clear(); - self.previous_cluster_ids.get_mut().clear(); - self.previous_cluster_id_starts - .values_mut() - .for_each(|(_, active)| *active = false); - // TODO: Remove unused entries for previous_occlusion_buffers + // TODO: Remove unused entries for view_instance_visibility and previous_depth_pyramids } fn queue_meshlet_mesh_upload( @@ -872,33 +848,14 @@ impl MeshletGpuScene { let meshlets_slice = (meshlets_slice.start as u32 / size_of::() as u32) ..(meshlets_slice.end as u32 / size_of::() as u32); - let current_cluster_id_start = self.scene_meshlet_count; - self.scene_meshlet_count += meshlets_slice.end - meshlets_slice.start; self.scene_triangle_count += triangle_count; - // Calculate the previous cluster IDs for each meshlet for this instance - let previous_cluster_id_start = self - .previous_cluster_id_starts - .entry((instance, handle.id())) - .or_insert((0, true)); - let previous_cluster_ids = if previous_cluster_id_start.1 { - 0..(meshlets_slice.len() as u32) - } else { - let start = previous_cluster_id_start.0; - start..(meshlets_slice.len() as u32 + start) - }; - // Append per-cluster data for this frame self.thread_instance_ids .get_mut() .extend(std::iter::repeat(instance_index).take(meshlets_slice.len())); self.thread_meshlet_ids.get_mut().extend(meshlets_slice); - self.previous_cluster_ids - .get_mut() - .extend(previous_cluster_ids); - - *previous_cluster_id_start = (current_cluster_id_start, true); } /// Get the depth value for use with the material depth texture for a given [`Material`] asset. @@ -920,10 +877,6 @@ impl MeshletGpuScene { self.culling_bind_group_layout.clone() } - pub fn write_index_buffer_bind_group_layout(&self) -> BindGroupLayout { - self.write_index_buffer_bind_group_layout.clone() - } - pub fn downsample_depth_bind_group_layout(&self) -> BindGroupLayout { self.downsample_depth_bind_group_layout.clone() } @@ -944,25 +897,23 @@ impl MeshletGpuScene { #[derive(Component)] pub struct MeshletViewResources { pub scene_meshlet_count: u32, - previous_occlusion_buffer: Buffer, - pub occlusion_buffer: Buffer, - pub occlusion_buffer_needs_clearing: bool, - pub instance_visibility: Buffer, + pub second_pass_candidates_buffer: Buffer, + instance_visibility: Buffer, pub visibility_buffer: Option, pub visibility_buffer_draw_indirect_args_first: Buffer, pub visibility_buffer_draw_indirect_args_second: Buffer, - visibility_buffer_draw_index_buffer: Buffer, - pub depth_pyramid: CachedTexture, + visibility_buffer_draw_triangle_buffer: Buffer, + depth_pyramid_all_mips: TextureView, pub depth_pyramid_mips: Box<[TextureView]>, + previous_depth_pyramid: TextureView, pub material_depth_color: Option, pub material_depth: Option, } #[derive(Component)] pub struct MeshletViewBindGroups { - pub culling: BindGroup, - pub write_index_buffer_first: BindGroup, - pub write_index_buffer_second: BindGroup, + pub culling_first: BindGroup, + pub culling_second: BindGroup, pub downsample_depth: Box<[BindGroup]>, pub visibility_buffer_raster: BindGroup, pub copy_material_depth: Option, diff --git a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs b/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs index d3b6eac8e6..65fa6272a2 100644 --- a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs +++ b/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs @@ -14,7 +14,7 @@ use bevy_render::{ render_resource::*, view::ExtractedView, }; -use bevy_utils::HashMap; +use bevy_utils::{HashMap, HashSet}; use std::hash::Hash; /// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletMainOpaquePass3dNode`]. @@ -137,7 +137,7 @@ pub fn prepare_material_meshlet_meshes_main_opaque_pass( view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); - for material_id in render_material_instances.values() { + for material_id in render_material_instances.values().collect::>() { let Some(material) = render_materials.get(*material_id) else { continue; }; @@ -262,7 +262,7 @@ pub fn prepare_material_meshlet_meshes_prepass( view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); - for material_id in render_material_instances.values() { + for material_id in render_material_instances.values().collect::>() { let Some(material) = render_materials.get(*material_id) else { continue; }; diff --git a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl index 599dc787b1..2ca98b5d41 100644 --- a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl +++ b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl @@ -2,6 +2,7 @@ #import bevy_pbr::mesh_types::Mesh #import bevy_render::view::View +#import bevy_pbr::prepass_bindings::PreviousViewUniforms struct PackedMeshletVertex { a: vec4, @@ -56,11 +57,13 @@ struct DrawIndirectArgs { @group(0) @binding(2) var meshlet_thread_instance_ids: array; // Per cluster (instance of a meshlet) @group(0) @binding(3) var meshlet_instance_uniforms: array; // Per entity instance @group(0) @binding(4) var meshlet_view_instance_visibility: array; // 1 bit per entity instance, packed as a bitmask -@group(0) @binding(5) var meshlet_occlusion: array>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(6) var meshlet_previous_cluster_ids: array; // Per cluster (instance of a meshlet) -@group(0) @binding(7) var meshlet_previous_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(8) var view: View; -@group(0) @binding(9) var depth_pyramid: texture_2d; // Generated from the first raster pass (unused in the first pass but still bound) +@group(0) @binding(5) var meshlet_second_pass_candidates: array>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask +@group(0) @binding(6) var meshlets: array; // Per asset meshlet +@group(0) @binding(7) var draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(9) var depth_pyramid: texture_2d; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass +@group(0) @binding(10) var view: View; +@group(0) @binding(11) var previous_view: PreviousViewUniforms; fn should_cull_instance(instance_id: u32) -> bool { let bit_offset = instance_id % 32u; @@ -68,34 +71,10 @@ fn should_cull_instance(instance_id: u32) -> bool { return bool(extractBits(packed_visibility, bit_offset, 1u)); } -fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { - let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id]; - let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u]; - let bit_offset = previous_cluster_id % 32u; - return bool(extractBits(packed_occlusion, bit_offset, 1u)); -} -#endif - -#ifdef MESHLET_WRITE_INDEX_BUFFER_PASS -@group(0) @binding(0) var meshlet_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(1) var meshlet_thread_meshlet_ids: array; // Per cluster (instance of a meshlet) -@group(0) @binding(2) var meshlet_previous_cluster_ids: array; // Per cluster (instance of a meshlet) -@group(0) @binding(3) var meshlet_previous_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(4) var meshlets: array; // Per asset meshlet -@group(0) @binding(5) var draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles -@group(0) @binding(6) var draw_index_buffer: array; // Single object shared between all workgroups/meshlets/triangles - -fn get_meshlet_occlusion(cluster_id: u32) -> bool { - let packed_occlusion = meshlet_occlusion[cluster_id / 32u]; +fn meshlet_is_second_pass_candidate(cluster_id: u32) -> bool { + let packed_candidates = meshlet_second_pass_candidates[cluster_id / 32u]; let bit_offset = cluster_id % 32u; - return bool(extractBits(packed_occlusion, bit_offset, 1u)); -} - -fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { - let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id]; - let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u]; - let bit_offset = previous_cluster_id % 32u; - return bool(extractBits(packed_occlusion, bit_offset, 1u)); + return bool(extractBits(packed_candidates, bit_offset, 1u)); } #endif @@ -108,7 +87,7 @@ fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { @group(0) @binding(5) var meshlet_thread_instance_ids: array; // Per cluster (instance of a meshlet) @group(0) @binding(6) var meshlet_instance_uniforms: array; // Per entity instance @group(0) @binding(7) var meshlet_instance_material_ids: array; // Per entity instance -@group(0) @binding(8) var draw_index_buffer: array; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles @group(0) @binding(9) var view: View; fn get_meshlet_index(index_id: u32) -> u32 { diff --git a/crates/bevy_pbr/src/meshlet/mod.rs b/crates/bevy_pbr/src/meshlet/mod.rs index 972b68fb4f..5b4681838d 100644 --- a/crates/bevy_pbr/src/meshlet/mod.rs +++ b/crates/bevy_pbr/src/meshlet/mod.rs @@ -50,7 +50,6 @@ use self::{ pipelines::{ MeshletPipelines, MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, MESHLET_CULLING_SHADER_HANDLE, MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, - MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, }, visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode, }; @@ -131,12 +130,6 @@ impl Plugin for MeshletPlugin { "cull_meshlets.wgsl", Shader::from_wgsl ); - load_internal_asset!( - app, - MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, - "write_index_buffer.wgsl", - Shader::from_wgsl - ); load_internal_asset!( app, MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, diff --git a/crates/bevy_pbr/src/meshlet/pipelines.rs b/crates/bevy_pbr/src/meshlet/pipelines.rs index 1452905d7b..bb62c6bdf5 100644 --- a/crates/bevy_pbr/src/meshlet/pipelines.rs +++ b/crates/bevy_pbr/src/meshlet/pipelines.rs @@ -10,21 +10,17 @@ use bevy_ecs::{ use bevy_render::render_resource::*; pub const MESHLET_CULLING_SHADER_HANDLE: Handle = Handle::weak_from_u128(4325134235233421); -pub const MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE: Handle = - Handle::weak_from_u128(5325134235233421); pub const MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle = - Handle::weak_from_u128(6325134235233421); + Handle::weak_from_u128(5325134235233421); pub const MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE: Handle = - Handle::weak_from_u128(7325134235233421); + Handle::weak_from_u128(6325134235233421); pub const MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE: Handle = - Handle::weak_from_u128(8325134235233421); + Handle::weak_from_u128(7325134235233421); #[derive(Resource)] pub struct MeshletPipelines { cull_first: CachedComputePipelineId, cull_second: CachedComputePipelineId, - write_index_buffer_first: CachedComputePipelineId, - write_index_buffer_second: CachedComputePipelineId, downsample_depth: CachedRenderPipelineId, visibility_buffer_raster: CachedRenderPipelineId, visibility_buffer_raster_depth_only: CachedRenderPipelineId, @@ -36,7 +32,6 @@ impl FromWorld for MeshletPipelines { fn from_world(world: &mut World) -> Self { let gpu_scene = world.resource::(); let cull_layout = gpu_scene.culling_bind_group_layout(); - let write_index_buffer_layout = gpu_scene.write_index_buffer_bind_group_layout(); let downsample_depth_layout = gpu_scene.downsample_depth_bind_group_layout(); let visibility_buffer_layout = gpu_scene.visibility_buffer_raster_bind_group_layout(); let copy_material_depth_layout = gpu_scene.copy_material_depth_bind_group_layout(); @@ -48,7 +43,10 @@ impl FromWorld for MeshletPipelines { layout: vec![cull_layout.clone()], push_constant_ranges: vec![], shader: MESHLET_CULLING_SHADER_HANDLE, - shader_defs: vec!["MESHLET_CULLING_PASS".into()], + shader_defs: vec![ + "MESHLET_CULLING_PASS".into(), + "MESHLET_FIRST_CULLING_PASS".into(), + ], entry_point: "cull_meshlets".into(), }), @@ -64,31 +62,6 @@ impl FromWorld for MeshletPipelines { entry_point: "cull_meshlets".into(), }), - write_index_buffer_first: pipeline_cache.queue_compute_pipeline( - ComputePipelineDescriptor { - label: Some("meshlet_write_index_buffer_first_pipeline".into()), - layout: vec![write_index_buffer_layout.clone()], - push_constant_ranges: vec![], - shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, - shader_defs: vec!["MESHLET_WRITE_INDEX_BUFFER_PASS".into()], - entry_point: "write_index_buffer".into(), - }, - ), - - write_index_buffer_second: pipeline_cache.queue_compute_pipeline( - ComputePipelineDescriptor { - label: Some("meshlet_write_index_buffer_second_pipeline".into()), - layout: vec![write_index_buffer_layout], - push_constant_ranges: vec![], - shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, - shader_defs: vec![ - "MESHLET_WRITE_INDEX_BUFFER_PASS".into(), - "MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS".into(), - ], - entry_point: "write_index_buffer".into(), - }, - ), - downsample_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { label: Some("meshlet_downsample_depth".into()), layout: vec![downsample_depth_layout], @@ -197,7 +170,9 @@ impl FromWorld for MeshletPipelines { visibility_buffer_raster_depth_only_clamp_ortho: pipeline_cache.queue_render_pipeline( RenderPipelineDescriptor { - label: Some("visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into()), + label: Some( + "meshlet_visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into(), + ), layout: vec![visibility_buffer_layout], push_constant_ranges: vec![], vertex: VertexState { @@ -267,8 +242,6 @@ impl MeshletPipelines { pub fn get( world: &World, ) -> Option<( - &ComputePipeline, - &ComputePipeline, &ComputePipeline, &ComputePipeline, &RenderPipeline, @@ -282,8 +255,6 @@ impl MeshletPipelines { Some(( pipeline_cache.get_compute_pipeline(pipeline.cull_first)?, pipeline_cache.get_compute_pipeline(pipeline.cull_second)?, - pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_first)?, - pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_second)?, pipeline_cache.get_render_pipeline(pipeline.downsample_depth)?, pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?, pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?, diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl index dde6d2655d..e2c716de16 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl @@ -7,7 +7,7 @@ meshlet_thread_instance_ids, meshlet_instance_uniforms, meshlet_instance_material_ids, - draw_index_buffer, + draw_triangle_buffer, view, get_meshlet_index, unpack_meshlet_vertex, @@ -38,9 +38,9 @@ struct FragmentOutput { @vertex fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { - let packed_ids = draw_index_buffer[vertex_index / 3u]; - let cluster_id = packed_ids >> 8u; - let triangle_id = extractBits(packed_ids, 0u, 8u); + let packed_ids = draw_triangle_buffer[vertex_index / 3u]; + let cluster_id = packed_ids >> 6u; + let triangle_id = extractBits(packed_ids, 0u, 6u); let index_id = (triangle_id * 3u) + (vertex_index % 3u); let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs index 965a4135b2..54303af71c 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs @@ -2,7 +2,7 @@ use super::{ gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, pipelines::MeshletPipelines, }; -use crate::{LightEntity, ShadowView, ViewLightEntities}; +use crate::{LightEntity, PreviousViewUniformOffset, ShadowView, ViewLightEntities}; use bevy_color::LinearRgba; use bevy_ecs::{ query::QueryState, @@ -22,6 +22,7 @@ pub struct MeshletVisibilityBufferRasterPassNode { &'static ExtractedCamera, &'static ViewDepthTexture, &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, &'static MeshletViewBindGroups, &'static MeshletViewResources, &'static ViewLightEntities, @@ -30,6 +31,7 @@ pub struct MeshletVisibilityBufferRasterPassNode { &'static ShadowView, &'static LightEntity, &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, &'static MeshletViewBindGroups, &'static MeshletViewResources, )>, @@ -60,6 +62,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { camera, view_depth, view_offset, + previous_view_offset, meshlet_view_bind_groups, meshlet_view_resources, lights, @@ -71,8 +74,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode { let Some(( culling_first_pipeline, culling_second_pipeline, - write_index_buffer_first_pipeline, - write_index_buffer_second_pipeline, downsample_depth_pipeline, visibility_buffer_raster_pipeline, visibility_buffer_raster_depth_only_pipeline, @@ -83,41 +84,27 @@ impl Node for MeshletVisibilityBufferRasterPassNode { return Ok(()); }; - let culling_workgroups = meshlet_view_resources.scene_meshlet_count.div_ceil(128); - let write_index_buffer_workgroups = (meshlet_view_resources.scene_meshlet_count as f32) + let culling_workgroups = (meshlet_view_resources.scene_meshlet_count.div_ceil(128) as f32) .cbrt() .ceil() as u32; render_context .command_encoder() - .push_debug_group("meshlet_visibility_buffer_raster_pass"); - if meshlet_view_resources.occlusion_buffer_needs_clearing { - render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, - 0, - None, - ); - } - cull_pass( - "meshlet_culling_first_pass", - render_context, - meshlet_view_bind_groups, - view_offset, - culling_first_pipeline, - culling_workgroups, - ); - write_index_buffer_pass( - "meshlet_write_index_buffer_first_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_first, - write_index_buffer_first_pipeline, - write_index_buffer_workgroups, - ); + .push_debug_group("meshlet_visibility_buffer_raster"); render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, + &meshlet_view_resources.second_pass_candidates_buffer, 0, None, ); + cull_pass( + "culling_first", + render_context, + &meshlet_view_bind_groups.culling_first, + view_offset, + previous_view_offset, + culling_first_pipeline, + culling_workgroups, + ); raster_pass( true, render_context, @@ -136,20 +123,14 @@ impl Node for MeshletVisibilityBufferRasterPassNode { downsample_depth_pipeline, ); cull_pass( - "meshlet_culling_second_pass", + "culling_second", render_context, - meshlet_view_bind_groups, + &meshlet_view_bind_groups.culling_second, view_offset, + previous_view_offset, culling_second_pipeline, culling_workgroups, ); - write_index_buffer_pass( - "meshlet_write_index_buffer_second_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_second, - write_index_buffer_second_pipeline, - write_index_buffer_workgroups, - ); raster_pass( false, render_context, @@ -168,6 +149,12 @@ impl Node for MeshletVisibilityBufferRasterPassNode { copy_material_depth_pipeline, camera, ); + downsample_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + downsample_depth_pipeline, + ); render_context.command_encoder().pop_debug_group(); for light_entity in &lights.lights { @@ -175,6 +162,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { shadow_view, light_type, view_offset, + previous_view_offset, meshlet_view_bind_groups, meshlet_view_resources, )) = self.view_light_query.get_manual(world, *light_entity) @@ -188,36 +176,23 @@ impl Node for MeshletVisibilityBufferRasterPassNode { }; render_context.command_encoder().push_debug_group(&format!( - "meshlet_visibility_buffer_raster_pass: {}", + "meshlet_visibility_buffer_raster: {}", shadow_view.pass_name )); - if meshlet_view_resources.occlusion_buffer_needs_clearing { - render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, - 0, - None, - ); - } - cull_pass( - "meshlet_culling_first_pass", - render_context, - meshlet_view_bind_groups, - view_offset, - culling_first_pipeline, - culling_workgroups, - ); - write_index_buffer_pass( - "meshlet_write_index_buffer_first_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_first, - write_index_buffer_first_pipeline, - write_index_buffer_workgroups, - ); render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, + &meshlet_view_resources.second_pass_candidates_buffer, 0, None, ); + cull_pass( + "culling_first", + render_context, + &meshlet_view_bind_groups.culling_first, + view_offset, + previous_view_offset, + culling_first_pipeline, + culling_workgroups, + ); raster_pass( true, render_context, @@ -236,20 +211,14 @@ impl Node for MeshletVisibilityBufferRasterPassNode { downsample_depth_pipeline, ); cull_pass( - "meshlet_culling_second_pass", + "culling_second", render_context, - meshlet_view_bind_groups, + &meshlet_view_bind_groups.culling_second, view_offset, + previous_view_offset, culling_second_pipeline, culling_workgroups, ); - write_index_buffer_pass( - "meshlet_write_index_buffer_second_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_second, - write_index_buffer_second_pipeline, - write_index_buffer_workgroups, - ); raster_pass( false, render_context, @@ -261,6 +230,12 @@ impl Node for MeshletVisibilityBufferRasterPassNode { shadow_visibility_buffer_pipeline, None, ); + downsample_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + downsample_depth_pipeline, + ); render_context.command_encoder().pop_debug_group(); } @@ -271,8 +246,9 @@ impl Node for MeshletVisibilityBufferRasterPassNode { fn cull_pass( label: &'static str, render_context: &mut RenderContext, - meshlet_view_bind_groups: &MeshletViewBindGroups, + culling_bind_group: &BindGroup, view_offset: &ViewUniformOffset, + previous_view_offset: &PreviousViewUniformOffset, culling_pipeline: &ComputePipeline, culling_workgroups: u32, ) { @@ -281,30 +257,13 @@ fn cull_pass( label: Some(label), timestamp_writes: None, }); - cull_pass.set_bind_group(0, &meshlet_view_bind_groups.culling, &[view_offset.offset]); - cull_pass.set_pipeline(culling_pipeline); - cull_pass.dispatch_workgroups(culling_workgroups, 1, 1); -} - -fn write_index_buffer_pass( - label: &'static str, - render_context: &mut RenderContext, - write_index_buffer_bind_group: &BindGroup, - write_index_buffer_pipeline: &ComputePipeline, - write_index_buffer_workgroups: u32, -) { - let command_encoder = render_context.command_encoder(); - let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { - label: Some(label), - timestamp_writes: None, - }); - cull_pass.set_bind_group(0, write_index_buffer_bind_group, &[]); - cull_pass.set_pipeline(write_index_buffer_pipeline); - cull_pass.dispatch_workgroups( - write_index_buffer_workgroups, - write_index_buffer_workgroups, - write_index_buffer_workgroups, + cull_pass.set_bind_group( + 0, + culling_bind_group, + &[view_offset.offset, previous_view_offset.offset], ); + cull_pass.set_pipeline(culling_pipeline); + cull_pass.dispatch_workgroups(culling_workgroups, culling_workgroups, culling_workgroups); } #[allow(clippy::too_many_arguments)] @@ -351,9 +310,9 @@ fn raster_pass( let mut draw_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { label: Some(if first_pass { - "meshlet_visibility_buffer_raster_first_pass" + "raster_first" } else { - "meshlet_visibility_buffer_raster_second_pass" + "raster_second" }), color_attachments: if color_attachments_filled[0].is_none() { &[] @@ -389,7 +348,7 @@ fn downsample_depth( for i in 0..meshlet_view_resources.depth_pyramid_mips.len() { let downsample_pass = RenderPassDescriptor { - label: Some("meshlet_downsample_depth_pass"), + label: Some("downsample_depth"), color_attachments: &[Some(RenderPassColorAttachment { view: &meshlet_view_resources.depth_pyramid_mips[i], resolve_target: None, @@ -424,7 +383,7 @@ fn copy_material_depth_pass( meshlet_view_bind_groups.copy_material_depth.as_ref(), ) { let mut copy_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { - label: Some("meshlet_copy_material_depth_pass"), + label: Some("copy_material_depth"), color_attachments: &[], depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { view: &material_depth.default_view, diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl index 4ade7d709a..947c9d49be 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl @@ -96,10 +96,10 @@ struct VertexOutput { /// Load the visibility buffer texture and resolve it into a VertexOutput. fn resolve_vertex_output(frag_coord: vec4) -> VertexOutput { let vbuffer = textureLoad(meshlet_visibility_buffer, vec2(frag_coord.xy), 0).r; - let cluster_id = vbuffer >> 8u; + let cluster_id = vbuffer >> 6u; let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; - let triangle_id = extractBits(vbuffer, 0u, 8u); + let triangle_id = extractBits(vbuffer, 0u, 6u); let index_ids = meshlet.start_index_id + vec3(triangle_id * 3u) + vec3(0u, 1u, 2u); let indices = meshlet.start_vertex_id + vec3(get_meshlet_index(index_ids.x), get_meshlet_index(index_ids.y), get_meshlet_index(index_ids.z)); let vertex_ids = vec3(meshlet_vertex_ids[indices.x], meshlet_vertex_ids[indices.y], meshlet_vertex_ids[indices.z]); diff --git a/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl b/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl deleted file mode 100644 index f7ea7dae56..0000000000 --- a/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl +++ /dev/null @@ -1,43 +0,0 @@ -#import bevy_pbr::meshlet_bindings::{ - meshlet_thread_meshlet_ids, - meshlets, - draw_indirect_args, - draw_index_buffer, - get_meshlet_occlusion, - get_meshlet_previous_occlusion, -} - -var draw_index_buffer_start_workgroup: u32; - -/// This pass writes out a buffer of cluster + triangle IDs for the draw_indirect() call to rasterize each visible meshlet. - -@compute -@workgroup_size(64, 1, 1) // 64 threads per workgroup, 1 workgroup per cluster, 1 thread per triangle -fn write_index_buffer(@builtin(workgroup_id) workgroup_id: vec3, @builtin(num_workgroups) num_workgroups: vec3, @builtin(local_invocation_index) triangle_id: u32) { - // Calculate the cluster ID for this workgroup - let cluster_id = dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); - if cluster_id >= arrayLength(&meshlet_thread_meshlet_ids) { return; } - - // If the meshlet was culled, then we don't need to draw it - if !get_meshlet_occlusion(cluster_id) { return; } - - // If the meshlet was drawn in the first pass, and this is the second pass, then we don't need to draw it -#ifdef MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS - if get_meshlet_previous_occlusion(cluster_id) { return; } -#endif - - let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; - let meshlet = meshlets[meshlet_id]; - - // Reserve space in the buffer for this meshlet's triangles, and broadcast the start of that slice to all threads - if triangle_id == 0u { - draw_index_buffer_start_workgroup = atomicAdd(&draw_indirect_args.vertex_count, meshlet.triangle_count * 3u); - draw_index_buffer_start_workgroup /= 3u; - } - workgroupBarrier(); - - // Each thread writes one triangle of the meshlet to the buffer slice reserved for the meshlet - if triangle_id < meshlet.triangle_count { - draw_index_buffer[draw_index_buffer_start_workgroup + triangle_id] = (cluster_id << 8u) | triangle_id; - } -} diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 18b7eeeb3a..f4e6b59c74 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -32,6 +32,7 @@ use bevy_utils::tracing::error; #[cfg(feature = "meshlet")] use crate::meshlet::{ prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, MeshletGpuScene, + MeshletMesh, }; use crate::*; @@ -102,7 +103,7 @@ where .init_resource::() .init_resource::>>() .allow_ambiguous_resource::>>() - .init_resource::(); + .init_resource::(); } fn finish(&self, app: &mut App) { @@ -199,9 +200,14 @@ pub struct PreviousViewData { pub view_proj: Mat4, } +#[cfg(not(feature = "meshlet"))] +type PreviousViewFilter = (With, With); +#[cfg(feature = "meshlet")] +type PreviousViewFilter = Or<(With, With)>; + pub fn update_previous_view_data( mut commands: Commands, - query: Query<(Entity, &Camera, &GlobalTransform), (With, With)>, + query: Query<(Entity, &Camera, &GlobalTransform), PreviousViewFilter>, ) { for (entity, camera, camera_transform) in &query { let inverse_view = camera_transform.compute_matrix().inverse(); @@ -215,10 +221,15 @@ pub fn update_previous_view_data( #[derive(Component)] pub struct PreviousGlobalTransform(pub Affine3A); +#[cfg(not(feature = "meshlet"))] +type PreviousMeshFilter = With>; +#[cfg(feature = "meshlet")] +type PreviousMeshFilter = Or<(With>, With>)>; + pub fn update_mesh_previous_global_transforms( mut commands: Commands, - views: Query<&Camera, (With, With)>, - meshes: Query<(Entity, &GlobalTransform), With>>, + views: Query<&Camera, PreviousViewFilter>, + meshes: Query<(Entity, &GlobalTransform), PreviousMeshFilter>, ) { let should_run = views.iter().any(|camera| camera.is_active); @@ -603,7 +614,7 @@ pub fn extract_camera_previous_view_data( } #[derive(Resource, Default)] -pub struct PreviousViewProjectionUniforms { +pub struct PreviousViewUniforms { pub uniforms: DynamicUniformBuffer, } @@ -616,13 +627,13 @@ pub fn prepare_previous_view_uniforms( mut commands: Commands, render_device: Res, render_queue: Res, - mut view_uniforms: ResMut, - views: Query<(Entity, &ExtractedView, Option<&PreviousViewData>), With>, + mut previous_view_uniforms: ResMut, + views: Query<(Entity, &ExtractedView, Option<&PreviousViewData>), PreviousViewFilter>, ) { let views_iter = views.iter(); let view_count = views_iter.len(); let Some(mut writer) = - view_uniforms + previous_view_uniforms .uniforms .get_writer(view_count, &render_device, &render_queue) else { @@ -658,7 +669,7 @@ pub fn prepare_prepass_view_bind_group( prepass_pipeline: Res>, view_uniforms: Res, globals_buffer: Res, - previous_view_uniforms: Res, + previous_view_uniforms: Res, mut prepass_view_bind_group: ResMut, ) { if let (Some(view_binding), Some(globals_binding)) = ( @@ -905,6 +916,7 @@ impl RenderCommand

for SetPrepassViewBindGroup< type Param = SRes; type ViewQuery = ( Read, + Has, Option>, ); type ItemQuery = (); @@ -912,8 +924,9 @@ impl RenderCommand

for SetPrepassViewBindGroup< #[inline] fn render<'w>( _item: &P, - (view_uniform_offset, previous_view_uniform_offset): ( + (view_uniform_offset, has_motion_vector_prepass, previous_view_uniform_offset): ( &'_ ViewUniformOffset, + bool, Option<&'_ PreviousViewUniformOffset>, ), _entity: Option<()>, @@ -922,21 +935,24 @@ impl RenderCommand

for SetPrepassViewBindGroup< ) -> RenderCommandResult { let prepass_view_bind_group = prepass_view_bind_group.into_inner(); - if let Some(previous_view_uniform_offset) = previous_view_uniform_offset { - pass.set_bind_group( - I, - prepass_view_bind_group.motion_vectors.as_ref().unwrap(), - &[ - view_uniform_offset.offset, - previous_view_uniform_offset.offset, - ], - ); - } else { - pass.set_bind_group( - I, - prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), - &[view_uniform_offset.offset], - ); + match previous_view_uniform_offset { + Some(previous_view_uniform_offset) if has_motion_vector_prepass => { + pass.set_bind_group( + I, + prepass_view_bind_group.motion_vectors.as_ref().unwrap(), + &[ + view_uniform_offset.offset, + previous_view_uniform_offset.offset, + ], + ); + } + _ => { + pass.set_bind_group( + I, + prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), + &[view_uniform_offset.offset], + ); + } } RenderCommandResult::Success diff --git a/examples/3d/meshlet.rs b/examples/3d/meshlet.rs index 45eb80db0b..722029feeb 100644 --- a/examples/3d/meshlet.rs +++ b/examples/3d/meshlet.rs @@ -20,7 +20,7 @@ const ASSET_URL: &str = "https://github.com/JMS55/bevy_meshlet_asset/blob/bd8698 fn main() -> ExitCode { if !Path::new("./assets/models/bunny.meshlet_mesh").exists() { - println!("ERROR: Asset at path /assets/models/bunny.meshlet_mesh is missing. Please download it from {ASSET_URL}"); + eprintln!("ERROR: Asset at path /assets/models/bunny.meshlet_mesh is missing. Please download it from {ASSET_URL}"); return ExitCode::FAILURE; }