diff --git a/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl b/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl index d2edeccd5f..2e04f3332b 100644 --- a/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl +++ b/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl @@ -3,36 +3,61 @@ meshlet_bounding_spheres, meshlet_thread_instance_ids, meshlet_instance_uniforms, - meshlet_occlusion, + meshlet_second_pass_candidates, + depth_pyramid, view, + previous_view, should_cull_instance, - get_meshlet_previous_occlusion, + meshlet_is_second_pass_candidate, + meshlets, + draw_indirect_args, + draw_triangle_buffer, } -#ifdef MESHLET_SECOND_CULLING_PASS -#import bevy_pbr::meshlet_bindings::depth_pyramid -#endif #import bevy_render::maths::affine3_to_square /// Culls individual clusters (1 per thread) in two passes (two pass occlusion culling), and outputs a bitmask of which clusters survived. -/// 1. The first pass is only frustum culling, on only the clusters that were visible last frame. -/// 2. The second pass performs both frustum and occlusion culling (using the depth buffer generated from the first pass), on all clusters. +/// 1. The first pass tests instance visibility, frustum culling, LOD selection, and finally occlusion culling using last frame's depth pyramid. +/// 2. The second pass performs occlusion culling (using the depth buffer generated from the first pass) on all clusters that passed +/// the instance, frustum, and LOD tests in the first pass, but were not visible last frame according to the occlusion culling. @compute @workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 instanced meshlet per thread -fn cull_meshlets(@builtin(global_invocation_id) cluster_id: vec3) { - // Fetch the instance data and check for instance culling - if cluster_id.x >= arrayLength(&meshlet_thread_meshlet_ids) { return; } - let instance_id = meshlet_thread_instance_ids[cluster_id.x]; - if should_cull_instance(instance_id) { - return; - } +fn cull_meshlets( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(num_workgroups) num_workgroups: vec3, + @builtin(local_invocation_id) local_invocation_id: vec3, +) { + // Calculate the cluster ID for this thread + let cluster_id = local_invocation_id.x + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); + if cluster_id >= arrayLength(&meshlet_thread_meshlet_ids) { return; } - // Fetch other meshlet data - let meshlet_id = meshlet_thread_meshlet_ids[cluster_id.x]; +#ifdef MESHLET_SECOND_CULLING_PASS + if !meshlet_is_second_pass_candidate(cluster_id) { return; } +#endif + + // Check for instance culling + let instance_id = meshlet_thread_instance_ids[cluster_id]; +#ifdef MESHLET_FIRST_CULLING_PASS + if should_cull_instance(instance_id) { return; } +#endif + + // Calculate world-space culling bounding sphere for the cluster let instance_uniform = meshlet_instance_uniforms[instance_id]; + let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; let model = affine3_to_square(instance_uniform.model); let model_scale = max(length(model[0]), max(length(model[1]), length(model[2]))); let bounding_spheres = meshlet_bounding_spheres[meshlet_id]; + var culling_bounding_sphere_center = model * vec4(bounding_spheres.self_culling.center, 1.0); + var culling_bounding_sphere_radius = model_scale * bounding_spheres.self_culling.radius; + +#ifdef MESHLET_FIRST_CULLING_PASS + // Frustum culling + // TODO: Faster method from https://vkguide.dev/docs/gpudriven/compute_culling/#frustum-culling-function + for (var i = 0u; i < 6u; i++) { + if dot(view.frustum[i], culling_bounding_sphere_center) + culling_bounding_sphere_radius <= 0.0 { + return; + } + } // Calculate view-space LOD bounding sphere for the meshlet let lod_bounding_sphere_center = model * vec4(bounding_spheres.self_lod.center, 1.0); @@ -48,61 +73,59 @@ fn cull_meshlets(@builtin(global_invocation_id) cluster_id: vec3) { let lod_is_ok = lod_error_is_imperceptible(lod_bounding_sphere_center_view_space, lod_bounding_sphere_radius); let parent_lod_is_ok = lod_error_is_imperceptible(parent_lod_bounding_sphere_center_view_space, parent_lod_bounding_sphere_radius); if !lod_is_ok || parent_lod_is_ok { return; } - - // In the first pass, operate only on the clusters visible last frame. In the second pass, operate on all clusters. -#ifdef MESHLET_SECOND_CULLING_PASS - var meshlet_visible = true; -#else - var meshlet_visible = get_meshlet_previous_occlusion(cluster_id.x); - if !meshlet_visible { return; } #endif - // Calculate world-space culling bounding sphere for the cluster - let culling_bounding_sphere_center = model * vec4(bounding_spheres.self_culling.center, 1.0); - let culling_bounding_sphere_radius = model_scale * bounding_spheres.self_culling.radius; + // Project the culling bounding sphere to view-space for occlusion culling +#ifdef MESHLET_FIRST_CULLING_PASS + let previous_model = affine3_to_square(instance_uniform.previous_model); + let previous_model_scale = max(length(previous_model[0]), max(length(previous_model[1]), length(previous_model[2]))); + culling_bounding_sphere_center = previous_model * vec4(bounding_spheres.self_culling.center, 1.0); + culling_bounding_sphere_radius = previous_model_scale * bounding_spheres.self_culling.radius; +#endif + let culling_bounding_sphere_center_view_space = (view.inverse_view * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; - // Frustum culling - // TODO: Faster method from https://vkguide.dev/docs/gpudriven/compute_culling/#frustum-culling-function - for (var i = 0u; i < 6u; i++) { - if !meshlet_visible { break; } - meshlet_visible &= dot(view.frustum[i], culling_bounding_sphere_center) > -culling_bounding_sphere_radius; + let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); + // Halve the view-space AABB size as the depth pyramid is half the view size + let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)) * 0.5; + let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; + let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; + let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32 + let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); + let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); + + let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x; + let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x; + let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x; + let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x; + let occluder_depth = min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d)); + + // Check whether or not the cluster would be occluded if drawn + var meshlet_visible: bool; + if view.projection[3][3] == 1.0 { + // Orthographic + let sphere_depth = view.projection[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.projection[2][2]; + meshlet_visible = sphere_depth >= occluder_depth; + } else { + // Perspective + let sphere_depth = -view.projection[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius); + meshlet_visible = sphere_depth >= occluder_depth; } -#ifdef MESHLET_SECOND_CULLING_PASS - // In the second culling pass, cull against the depth pyramid generated from the first pass + // Write if the cluster should be occlusion tested in the second pass +#ifdef MESHLET_FIRST_CULLING_PASS + let second_pass_candidate = u32(!meshlet_visible) << cluster_id % 32u; + atomicOr(&meshlet_second_pass_candidates[cluster_id / 32u], second_pass_candidate); +#endif + + // Append a list of this cluster's triangles to draw if not culled if meshlet_visible { - let culling_bounding_sphere_center_view_space = (view.inverse_view * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; - let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); - - // Halve the AABB size because the first depth mip resampling pass cut the full screen resolution into a power of two conservatively - let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)) * 0.5; - let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; - let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; - let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32 - let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); - let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); - - let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x; - let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x; - let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x; - let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x; - - let occluder_depth = min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d)); - if view.projection[3][3] == 1.0 { - // Orthographic - let sphere_depth = view.projection[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.projection[2][2]; - meshlet_visible &= sphere_depth >= occluder_depth; - } else { - // Perspective - let sphere_depth = -view.projection[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius); - meshlet_visible &= sphere_depth >= occluder_depth; + let meshlet_triangle_count = meshlets[meshlet_id].triangle_count; + let buffer_start = atomicAdd(&draw_indirect_args.vertex_count, meshlet_triangle_count * 3u) / 3u; + let cluster_id_packed = cluster_id << 6u; + for (var triangle_id = 0u; triangle_id < meshlet_triangle_count; triangle_id++) { + draw_triangle_buffer[buffer_start + triangle_id] = cluster_id_packed | triangle_id; } } -#endif - - // Write the bitmask of whether or not the cluster was culled - let occlusion_bit = u32(meshlet_visible) << (cluster_id.x % 32u); - atomicOr(&meshlet_occlusion[cluster_id.x / 32u], occlusion_bit); } // https://stackoverflow.com/questions/21648630/radius-of-projected-sphere-in-screen-space/21649403#21649403 diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index e297dc91fd..4cffa6da71 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -4,7 +4,8 @@ use super::{ }; use crate::{ Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver, - PreviousGlobalTransform, RenderMaterialInstances, ShadowView, + PreviousGlobalTransform, PreviousViewData, PreviousViewUniforms, RenderMaterialInstances, + ShadowView, }; use bevy_asset::{AssetEvent, AssetId, AssetServer, Assets, Handle, UntypedAssetId}; use bevy_core_pipeline::core_3d::Camera3d; @@ -13,7 +14,7 @@ use bevy_ecs::{ entity::{Entity, EntityHashMap}, event::EventReader, query::{AnyOf, Has}, - system::{Commands, Query, Res, ResMut, Resource, SystemState}, + system::{Commands, Local, Query, Res, ResMut, Resource, SystemState}, world::{FromWorld, World}, }; use bevy_render::{ @@ -37,24 +38,33 @@ use std::{ /// [`MeshletMesh`] entities, as well as queuing uploads for any new meshlet mesh /// assets that have not already been uploaded to the GPU. pub fn extract_meshlet_meshes( - // TODO: Replace main_world when Extract>> is possible - mut main_world: ResMut, mut gpu_scene: ResMut, + // TODO: Replace main_world and system_state when Extract>> is possible + mut main_world: ResMut, + mut system_state: Local< + Option< + SystemState<( + Query<( + Entity, + &Handle, + &GlobalTransform, + Option<&PreviousGlobalTransform>, + Option<&RenderLayers>, + Has, + Has, + )>, + Res, + ResMut>, + EventReader>, + )>, + >, + >, ) { - let mut system_state: SystemState<( - Query<( - Entity, - &Handle, - &GlobalTransform, - Option<&PreviousGlobalTransform>, - Option<&RenderLayers>, - Has, - Has, - )>, - Res, - ResMut>, - EventReader>, - )> = SystemState::new(&mut main_world); + if system_state.is_none() { + *system_state = Some(SystemState::new(&mut main_world)); + } + let system_state = system_state.as_mut().unwrap(); + let (instances_query, asset_server, mut assets, mut asset_events) = system_state.get_mut(&mut main_world); @@ -210,10 +220,6 @@ pub fn prepare_meshlet_per_frame_resources( render_device: Res, mut commands: Commands, ) { - gpu_scene - .previous_cluster_id_starts - .retain(|_, (_, active)| *active); - if gpu_scene.scene_meshlet_count == 0 { return; } @@ -238,21 +244,19 @@ pub fn prepare_meshlet_per_frame_resources( &render_device, &render_queue, ); - upload_storage_buffer( - &mut gpu_scene.previous_cluster_ids, - &render_device, - &render_queue, - ); + + // Early submission for GPU data uploads to start while the render graph records commands + render_queue.submit([]); let needed_buffer_size = 4 * gpu_scene.scene_triangle_count; - let visibility_buffer_draw_index_buffer = - match &mut gpu_scene.visibility_buffer_draw_index_buffer { + let visibility_buffer_draw_triangle_buffer = + match &mut gpu_scene.visibility_buffer_draw_triangle_buffer { Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), slot => { let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_visibility_buffer_draw_index_buffer"), + label: Some("meshlet_visibility_buffer_draw_triangle_buffer"), size: needed_buffer_size, - usage: BufferUsages::STORAGE | BufferUsages::INDEX, + usage: BufferUsages::STORAGE, mapped_at_creation: false, }); *slot = Some(buffer.clone()); @@ -260,7 +264,8 @@ pub fn prepare_meshlet_per_frame_resources( } }; - let needed_buffer_size = gpu_scene.scene_meshlet_count.div_ceil(32) as u64 * 4; + let needed_buffer_size = + gpu_scene.scene_meshlet_count.div_ceil(u32::BITS) as u64 * size_of::() as u64; for (view_entity, view, render_layers, (_, shadow_view)) in &views { let instance_visibility = gpu_scene .view_instance_visibility @@ -290,29 +295,19 @@ pub fn prepare_meshlet_per_frame_resources( upload_storage_buffer(instance_visibility, &render_device, &render_queue); let instance_visibility = instance_visibility.buffer().unwrap().clone(); - // Early submission for GPU data uploads to start while the render graph records commands - render_queue.submit([]); - - let create_occlusion_buffer = || { - render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_occlusion_buffer"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, - mapped_at_creation: false, - }) + let second_pass_candidates_buffer = match &mut gpu_scene.second_pass_candidates_buffer { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_second_pass_candidates"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } }; - let (previous_occlusion_buffer, occlusion_buffer, occlusion_buffer_needs_clearing) = - match gpu_scene.previous_occlusion_buffers.get(&view_entity) { - Some((buffer_a, buffer_b)) if buffer_b.size() >= needed_buffer_size => { - (buffer_a.clone(), buffer_b.clone(), true) - } - Some((buffer_a, _)) => (buffer_a.clone(), create_occlusion_buffer(), false), - None => (create_occlusion_buffer(), create_occlusion_buffer(), false), - }; - gpu_scene.previous_occlusion_buffers.insert( - view_entity, - (occlusion_buffer.clone(), previous_occlusion_buffer.clone()), - ); let visibility_buffer = TextureDescriptor { label: Some("meshlet_visibility_buffer"), @@ -354,19 +349,19 @@ pub fn prepare_meshlet_per_frame_resources( usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, }); - let depth_size = Extent3d { + let depth_pyramid_size = Extent3d { // Round down to the nearest power of 2 to ensure depth is conservative width: previous_power_of_2(view.viewport.z), height: previous_power_of_2(view.viewport.w), depth_or_array_layers: 1, }; - let depth_mip_count = depth_size.width.max(depth_size.height).ilog2() + 1; + let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); let depth_pyramid = texture_cache.get( &render_device, TextureDescriptor { label: Some("meshlet_depth_pyramid"), - size: depth_size, - mip_level_count: depth_mip_count, + size: depth_pyramid_size, + mip_level_count: depth_pyramid_mip_count, sample_count: 1, dimension: TextureDimension::D2, format: TextureFormat::R32Float, @@ -374,7 +369,7 @@ pub fn prepare_meshlet_per_frame_resources( view_formats: &[], }, ); - let depth_pyramid_mips = (0..depth_mip_count) + let depth_pyramid_mips = (0..depth_pyramid_mip_count) .map(|i| { depth_pyramid.texture.create_view(&TextureViewDescriptor { label: Some("meshlet_depth_pyramid_texture_view"), @@ -384,10 +379,19 @@ pub fn prepare_meshlet_per_frame_resources( base_mip_level: i, mip_level_count: Some(1), base_array_layer: 0, - array_layer_count: None, + array_layer_count: Some(1), }) }) - .collect(); + .collect::>(); + let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); + + let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) { + Some(texture_view) => texture_view.clone(), + None => depth_pyramid_all_mips.clone(), + }; + gpu_scene + .previous_depth_pyramids + .insert(view_entity, depth_pyramid_all_mips.clone()); let material_depth_color = TextureDescriptor { label: Some("meshlet_material_depth_color"), @@ -422,17 +426,16 @@ pub fn prepare_meshlet_per_frame_resources( let not_shadow_view = shadow_view.is_none(); commands.entity(view_entity).insert(MeshletViewResources { scene_meshlet_count: gpu_scene.scene_meshlet_count, - previous_occlusion_buffer, - occlusion_buffer, - occlusion_buffer_needs_clearing, + second_pass_candidates_buffer, instance_visibility, visibility_buffer: not_shadow_view .then(|| texture_cache.get(&render_device, visibility_buffer)), visibility_buffer_draw_indirect_args_first, visibility_buffer_draw_indirect_args_second, - visibility_buffer_draw_index_buffer: visibility_buffer_draw_index_buffer.clone(), - depth_pyramid, + visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(), + depth_pyramid_all_mips, depth_pyramid_mips, + previous_depth_pyramid, material_depth_color: not_shadow_view .then(|| texture_cache.get(&render_device, material_depth_color)), material_depth: not_shadow_view @@ -449,10 +452,14 @@ pub fn prepare_meshlet_view_bind_groups( AnyOf<(&ViewDepthTexture, &ShadowView)>, )>, view_uniforms: Res, + previous_view_uniforms: Res, render_device: Res, mut commands: Commands, ) { - let Some(view_uniforms) = view_uniforms.uniforms.binding() else { + let (Some(view_uniforms), Some(previous_view_uniforms)) = ( + view_uniforms.uniforms.binding(), + previous_view_uniforms.uniforms.binding(), + ) else { return; }; @@ -462,56 +469,50 @@ pub fn prepare_meshlet_view_bind_groups( gpu_scene.meshlet_bounding_spheres.binding(), gpu_scene.thread_instance_ids.binding().unwrap(), gpu_scene.instance_uniforms.binding().unwrap(), - gpu_scene.view_instance_visibility[&view_entity] - .binding() - .unwrap(), - view_resources.occlusion_buffer.as_entire_binding(), - gpu_scene.previous_cluster_ids.binding().unwrap(), - view_resources.previous_occlusion_buffer.as_entire_binding(), - view_uniforms.clone(), - &view_resources.depth_pyramid.default_view, - )); - let culling = render_device.create_bind_group( - "meshlet_culling_bind_group", - &gpu_scene.culling_bind_group_layout, - &entries, - ); - - let entries = BindGroupEntries::sequential(( - view_resources.occlusion_buffer.as_entire_binding(), - gpu_scene.thread_meshlet_ids.binding().unwrap(), - gpu_scene.previous_cluster_ids.binding().unwrap(), - view_resources.previous_occlusion_buffer.as_entire_binding(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), gpu_scene.meshlets.binding(), view_resources .visibility_buffer_draw_indirect_args_first .as_entire_binding(), view_resources - .visibility_buffer_draw_index_buffer + .visibility_buffer_draw_triangle_buffer .as_entire_binding(), + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), )); - let write_index_buffer_first = render_device.create_bind_group( - "meshlet_write_index_buffer_first_bind_group", - &gpu_scene.write_index_buffer_bind_group_layout, + let culling_first = render_device.create_bind_group( + "meshlet_culling_first_bind_group", + &gpu_scene.culling_bind_group_layout, &entries, ); let entries = BindGroupEntries::sequential(( - view_resources.occlusion_buffer.as_entire_binding(), gpu_scene.thread_meshlet_ids.binding().unwrap(), - gpu_scene.previous_cluster_ids.binding().unwrap(), - view_resources.previous_occlusion_buffer.as_entire_binding(), + gpu_scene.meshlet_bounding_spheres.binding(), + gpu_scene.thread_instance_ids.binding().unwrap(), + gpu_scene.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), gpu_scene.meshlets.binding(), view_resources .visibility_buffer_draw_indirect_args_second .as_entire_binding(), view_resources - .visibility_buffer_draw_index_buffer + .visibility_buffer_draw_triangle_buffer .as_entire_binding(), + &view_resources.depth_pyramid_all_mips, + view_uniforms.clone(), + previous_view_uniforms.clone(), )); - let write_index_buffer_second = render_device.create_bind_group( - "meshlet_write_index_buffer_second_bind_group", - &gpu_scene.write_index_buffer_bind_group_layout, + let culling_second = render_device.create_bind_group( + "meshlet_culling_second_bind_group", + &gpu_scene.culling_bind_group_layout, &entries, ); @@ -547,7 +548,7 @@ pub fn prepare_meshlet_view_bind_groups( gpu_scene.instance_uniforms.binding().unwrap(), gpu_scene.instance_material_ids.binding().unwrap(), view_resources - .visibility_buffer_draw_index_buffer + .visibility_buffer_draw_triangle_buffer .as_entire_binding(), view_uniforms.clone(), )); @@ -596,9 +597,8 @@ pub fn prepare_meshlet_view_bind_groups( }); commands.entity(view_entity).insert(MeshletViewBindGroups { - culling, - write_index_buffer_first, - write_index_buffer_second, + culling_first, + culling_second, downsample_depth, visibility_buffer_raster, copy_material_depth, @@ -631,13 +631,11 @@ pub struct MeshletGpuScene { instance_material_ids: StorageBuffer>, thread_instance_ids: StorageBuffer>, thread_meshlet_ids: StorageBuffer>, - previous_cluster_ids: StorageBuffer>, - previous_cluster_id_starts: HashMap<(Entity, AssetId), (u32, bool)>, - previous_occlusion_buffers: EntityHashMap<(Buffer, Buffer)>, - visibility_buffer_draw_index_buffer: Option, + second_pass_candidates_buffer: Option, + previous_depth_pyramids: EntityHashMap, + visibility_buffer_draw_triangle_buffer: Option, culling_bind_group_layout: BindGroupLayout, - write_index_buffer_bind_group_layout: BindGroupLayout, visibility_buffer_raster_bind_group_layout: BindGroupLayout, downsample_depth_bind_group_layout: BindGroupLayout, copy_material_depth_bind_group_layout: BindGroupLayout, @@ -687,14 +685,9 @@ impl FromWorld for MeshletGpuScene { buffer.set_label(Some("meshlet_thread_meshlet_ids")); buffer }, - previous_cluster_ids: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_previous_cluster_ids")); - buffer - }, - previous_cluster_id_starts: HashMap::new(), - previous_occlusion_buffers: EntityHashMap::default(), - visibility_buffer_draw_index_buffer: None, + second_pass_candidates_buffer: None, + previous_depth_pyramids: EntityHashMap::default(), + visibility_buffer_draw_triangle_buffer: None, // TODO: Buffer min sizes culling_bind_group_layout: render_device.create_bind_group_layout( @@ -709,24 +702,11 @@ impl FromWorld for MeshletGpuScene { storage_buffer_read_only_sized(false, None), storage_buffer_sized(false, None), storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - uniform_buffer::(true), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), texture_2d(TextureSampleType::Float { filterable: false }), - ), - ), - ), - write_index_buffer_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_write_index_buffer_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::COMPUTE, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_sized(false, None), + uniform_buffer::(true), + uniform_buffer::(true), ), ), ), @@ -806,11 +786,7 @@ impl MeshletGpuScene { self.instance_material_ids.get_mut().clear(); self.thread_instance_ids.get_mut().clear(); self.thread_meshlet_ids.get_mut().clear(); - self.previous_cluster_ids.get_mut().clear(); - self.previous_cluster_id_starts - .values_mut() - .for_each(|(_, active)| *active = false); - // TODO: Remove unused entries for previous_occlusion_buffers + // TODO: Remove unused entries for view_instance_visibility and previous_depth_pyramids } fn queue_meshlet_mesh_upload( @@ -872,33 +848,14 @@ impl MeshletGpuScene { let meshlets_slice = (meshlets_slice.start as u32 / size_of::() as u32) ..(meshlets_slice.end as u32 / size_of::() as u32); - let current_cluster_id_start = self.scene_meshlet_count; - self.scene_meshlet_count += meshlets_slice.end - meshlets_slice.start; self.scene_triangle_count += triangle_count; - // Calculate the previous cluster IDs for each meshlet for this instance - let previous_cluster_id_start = self - .previous_cluster_id_starts - .entry((instance, handle.id())) - .or_insert((0, true)); - let previous_cluster_ids = if previous_cluster_id_start.1 { - 0..(meshlets_slice.len() as u32) - } else { - let start = previous_cluster_id_start.0; - start..(meshlets_slice.len() as u32 + start) - }; - // Append per-cluster data for this frame self.thread_instance_ids .get_mut() .extend(std::iter::repeat(instance_index).take(meshlets_slice.len())); self.thread_meshlet_ids.get_mut().extend(meshlets_slice); - self.previous_cluster_ids - .get_mut() - .extend(previous_cluster_ids); - - *previous_cluster_id_start = (current_cluster_id_start, true); } /// Get the depth value for use with the material depth texture for a given [`Material`] asset. @@ -920,10 +877,6 @@ impl MeshletGpuScene { self.culling_bind_group_layout.clone() } - pub fn write_index_buffer_bind_group_layout(&self) -> BindGroupLayout { - self.write_index_buffer_bind_group_layout.clone() - } - pub fn downsample_depth_bind_group_layout(&self) -> BindGroupLayout { self.downsample_depth_bind_group_layout.clone() } @@ -944,25 +897,23 @@ impl MeshletGpuScene { #[derive(Component)] pub struct MeshletViewResources { pub scene_meshlet_count: u32, - previous_occlusion_buffer: Buffer, - pub occlusion_buffer: Buffer, - pub occlusion_buffer_needs_clearing: bool, - pub instance_visibility: Buffer, + pub second_pass_candidates_buffer: Buffer, + instance_visibility: Buffer, pub visibility_buffer: Option, pub visibility_buffer_draw_indirect_args_first: Buffer, pub visibility_buffer_draw_indirect_args_second: Buffer, - visibility_buffer_draw_index_buffer: Buffer, - pub depth_pyramid: CachedTexture, + visibility_buffer_draw_triangle_buffer: Buffer, + depth_pyramid_all_mips: TextureView, pub depth_pyramid_mips: Box<[TextureView]>, + previous_depth_pyramid: TextureView, pub material_depth_color: Option, pub material_depth: Option, } #[derive(Component)] pub struct MeshletViewBindGroups { - pub culling: BindGroup, - pub write_index_buffer_first: BindGroup, - pub write_index_buffer_second: BindGroup, + pub culling_first: BindGroup, + pub culling_second: BindGroup, pub downsample_depth: Box<[BindGroup]>, pub visibility_buffer_raster: BindGroup, pub copy_material_depth: Option, diff --git a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs b/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs index d3b6eac8e6..65fa6272a2 100644 --- a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs +++ b/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs @@ -14,7 +14,7 @@ use bevy_render::{ render_resource::*, view::ExtractedView, }; -use bevy_utils::HashMap; +use bevy_utils::{HashMap, HashSet}; use std::hash::Hash; /// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletMainOpaquePass3dNode`]. @@ -137,7 +137,7 @@ pub fn prepare_material_meshlet_meshes_main_opaque_pass( view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); - for material_id in render_material_instances.values() { + for material_id in render_material_instances.values().collect::>() { let Some(material) = render_materials.get(*material_id) else { continue; }; @@ -262,7 +262,7 @@ pub fn prepare_material_meshlet_meshes_prepass( view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); - for material_id in render_material_instances.values() { + for material_id in render_material_instances.values().collect::>() { let Some(material) = render_materials.get(*material_id) else { continue; }; diff --git a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl index 599dc787b1..2ca98b5d41 100644 --- a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl +++ b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl @@ -2,6 +2,7 @@ #import bevy_pbr::mesh_types::Mesh #import bevy_render::view::View +#import bevy_pbr::prepass_bindings::PreviousViewUniforms struct PackedMeshletVertex { a: vec4, @@ -56,11 +57,13 @@ struct DrawIndirectArgs { @group(0) @binding(2) var meshlet_thread_instance_ids: array; // Per cluster (instance of a meshlet) @group(0) @binding(3) var meshlet_instance_uniforms: array; // Per entity instance @group(0) @binding(4) var meshlet_view_instance_visibility: array; // 1 bit per entity instance, packed as a bitmask -@group(0) @binding(5) var meshlet_occlusion: array>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(6) var meshlet_previous_cluster_ids: array; // Per cluster (instance of a meshlet) -@group(0) @binding(7) var meshlet_previous_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(8) var view: View; -@group(0) @binding(9) var depth_pyramid: texture_2d; // Generated from the first raster pass (unused in the first pass but still bound) +@group(0) @binding(5) var meshlet_second_pass_candidates: array>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask +@group(0) @binding(6) var meshlets: array; // Per asset meshlet +@group(0) @binding(7) var draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(9) var depth_pyramid: texture_2d; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass +@group(0) @binding(10) var view: View; +@group(0) @binding(11) var previous_view: PreviousViewUniforms; fn should_cull_instance(instance_id: u32) -> bool { let bit_offset = instance_id % 32u; @@ -68,34 +71,10 @@ fn should_cull_instance(instance_id: u32) -> bool { return bool(extractBits(packed_visibility, bit_offset, 1u)); } -fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { - let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id]; - let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u]; - let bit_offset = previous_cluster_id % 32u; - return bool(extractBits(packed_occlusion, bit_offset, 1u)); -} -#endif - -#ifdef MESHLET_WRITE_INDEX_BUFFER_PASS -@group(0) @binding(0) var meshlet_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(1) var meshlet_thread_meshlet_ids: array; // Per cluster (instance of a meshlet) -@group(0) @binding(2) var meshlet_previous_cluster_ids: array; // Per cluster (instance of a meshlet) -@group(0) @binding(3) var meshlet_previous_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask -@group(0) @binding(4) var meshlets: array; // Per asset meshlet -@group(0) @binding(5) var draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles -@group(0) @binding(6) var draw_index_buffer: array; // Single object shared between all workgroups/meshlets/triangles - -fn get_meshlet_occlusion(cluster_id: u32) -> bool { - let packed_occlusion = meshlet_occlusion[cluster_id / 32u]; +fn meshlet_is_second_pass_candidate(cluster_id: u32) -> bool { + let packed_candidates = meshlet_second_pass_candidates[cluster_id / 32u]; let bit_offset = cluster_id % 32u; - return bool(extractBits(packed_occlusion, bit_offset, 1u)); -} - -fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { - let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id]; - let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u]; - let bit_offset = previous_cluster_id % 32u; - return bool(extractBits(packed_occlusion, bit_offset, 1u)); + return bool(extractBits(packed_candidates, bit_offset, 1u)); } #endif @@ -108,7 +87,7 @@ fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { @group(0) @binding(5) var meshlet_thread_instance_ids: array; // Per cluster (instance of a meshlet) @group(0) @binding(6) var meshlet_instance_uniforms: array; // Per entity instance @group(0) @binding(7) var meshlet_instance_material_ids: array; // Per entity instance -@group(0) @binding(8) var draw_index_buffer: array; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles @group(0) @binding(9) var view: View; fn get_meshlet_index(index_id: u32) -> u32 { diff --git a/crates/bevy_pbr/src/meshlet/mod.rs b/crates/bevy_pbr/src/meshlet/mod.rs index 972b68fb4f..5b4681838d 100644 --- a/crates/bevy_pbr/src/meshlet/mod.rs +++ b/crates/bevy_pbr/src/meshlet/mod.rs @@ -50,7 +50,6 @@ use self::{ pipelines::{ MeshletPipelines, MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, MESHLET_CULLING_SHADER_HANDLE, MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, - MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, }, visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode, }; @@ -131,12 +130,6 @@ impl Plugin for MeshletPlugin { "cull_meshlets.wgsl", Shader::from_wgsl ); - load_internal_asset!( - app, - MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, - "write_index_buffer.wgsl", - Shader::from_wgsl - ); load_internal_asset!( app, MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, diff --git a/crates/bevy_pbr/src/meshlet/pipelines.rs b/crates/bevy_pbr/src/meshlet/pipelines.rs index 1452905d7b..bb62c6bdf5 100644 --- a/crates/bevy_pbr/src/meshlet/pipelines.rs +++ b/crates/bevy_pbr/src/meshlet/pipelines.rs @@ -10,21 +10,17 @@ use bevy_ecs::{ use bevy_render::render_resource::*; pub const MESHLET_CULLING_SHADER_HANDLE: Handle = Handle::weak_from_u128(4325134235233421); -pub const MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE: Handle = - Handle::weak_from_u128(5325134235233421); pub const MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle = - Handle::weak_from_u128(6325134235233421); + Handle::weak_from_u128(5325134235233421); pub const MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE: Handle = - Handle::weak_from_u128(7325134235233421); + Handle::weak_from_u128(6325134235233421); pub const MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE: Handle = - Handle::weak_from_u128(8325134235233421); + Handle::weak_from_u128(7325134235233421); #[derive(Resource)] pub struct MeshletPipelines { cull_first: CachedComputePipelineId, cull_second: CachedComputePipelineId, - write_index_buffer_first: CachedComputePipelineId, - write_index_buffer_second: CachedComputePipelineId, downsample_depth: CachedRenderPipelineId, visibility_buffer_raster: CachedRenderPipelineId, visibility_buffer_raster_depth_only: CachedRenderPipelineId, @@ -36,7 +32,6 @@ impl FromWorld for MeshletPipelines { fn from_world(world: &mut World) -> Self { let gpu_scene = world.resource::(); let cull_layout = gpu_scene.culling_bind_group_layout(); - let write_index_buffer_layout = gpu_scene.write_index_buffer_bind_group_layout(); let downsample_depth_layout = gpu_scene.downsample_depth_bind_group_layout(); let visibility_buffer_layout = gpu_scene.visibility_buffer_raster_bind_group_layout(); let copy_material_depth_layout = gpu_scene.copy_material_depth_bind_group_layout(); @@ -48,7 +43,10 @@ impl FromWorld for MeshletPipelines { layout: vec![cull_layout.clone()], push_constant_ranges: vec![], shader: MESHLET_CULLING_SHADER_HANDLE, - shader_defs: vec!["MESHLET_CULLING_PASS".into()], + shader_defs: vec![ + "MESHLET_CULLING_PASS".into(), + "MESHLET_FIRST_CULLING_PASS".into(), + ], entry_point: "cull_meshlets".into(), }), @@ -64,31 +62,6 @@ impl FromWorld for MeshletPipelines { entry_point: "cull_meshlets".into(), }), - write_index_buffer_first: pipeline_cache.queue_compute_pipeline( - ComputePipelineDescriptor { - label: Some("meshlet_write_index_buffer_first_pipeline".into()), - layout: vec![write_index_buffer_layout.clone()], - push_constant_ranges: vec![], - shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, - shader_defs: vec!["MESHLET_WRITE_INDEX_BUFFER_PASS".into()], - entry_point: "write_index_buffer".into(), - }, - ), - - write_index_buffer_second: pipeline_cache.queue_compute_pipeline( - ComputePipelineDescriptor { - label: Some("meshlet_write_index_buffer_second_pipeline".into()), - layout: vec![write_index_buffer_layout], - push_constant_ranges: vec![], - shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, - shader_defs: vec![ - "MESHLET_WRITE_INDEX_BUFFER_PASS".into(), - "MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS".into(), - ], - entry_point: "write_index_buffer".into(), - }, - ), - downsample_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { label: Some("meshlet_downsample_depth".into()), layout: vec![downsample_depth_layout], @@ -197,7 +170,9 @@ impl FromWorld for MeshletPipelines { visibility_buffer_raster_depth_only_clamp_ortho: pipeline_cache.queue_render_pipeline( RenderPipelineDescriptor { - label: Some("visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into()), + label: Some( + "meshlet_visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into(), + ), layout: vec![visibility_buffer_layout], push_constant_ranges: vec![], vertex: VertexState { @@ -267,8 +242,6 @@ impl MeshletPipelines { pub fn get( world: &World, ) -> Option<( - &ComputePipeline, - &ComputePipeline, &ComputePipeline, &ComputePipeline, &RenderPipeline, @@ -282,8 +255,6 @@ impl MeshletPipelines { Some(( pipeline_cache.get_compute_pipeline(pipeline.cull_first)?, pipeline_cache.get_compute_pipeline(pipeline.cull_second)?, - pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_first)?, - pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_second)?, pipeline_cache.get_render_pipeline(pipeline.downsample_depth)?, pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?, pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?, diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl index dde6d2655d..e2c716de16 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl @@ -7,7 +7,7 @@ meshlet_thread_instance_ids, meshlet_instance_uniforms, meshlet_instance_material_ids, - draw_index_buffer, + draw_triangle_buffer, view, get_meshlet_index, unpack_meshlet_vertex, @@ -38,9 +38,9 @@ struct FragmentOutput { @vertex fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { - let packed_ids = draw_index_buffer[vertex_index / 3u]; - let cluster_id = packed_ids >> 8u; - let triangle_id = extractBits(packed_ids, 0u, 8u); + let packed_ids = draw_triangle_buffer[vertex_index / 3u]; + let cluster_id = packed_ids >> 6u; + let triangle_id = extractBits(packed_ids, 0u, 6u); let index_id = (triangle_id * 3u) + (vertex_index % 3u); let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs index 965a4135b2..54303af71c 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs @@ -2,7 +2,7 @@ use super::{ gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, pipelines::MeshletPipelines, }; -use crate::{LightEntity, ShadowView, ViewLightEntities}; +use crate::{LightEntity, PreviousViewUniformOffset, ShadowView, ViewLightEntities}; use bevy_color::LinearRgba; use bevy_ecs::{ query::QueryState, @@ -22,6 +22,7 @@ pub struct MeshletVisibilityBufferRasterPassNode { &'static ExtractedCamera, &'static ViewDepthTexture, &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, &'static MeshletViewBindGroups, &'static MeshletViewResources, &'static ViewLightEntities, @@ -30,6 +31,7 @@ pub struct MeshletVisibilityBufferRasterPassNode { &'static ShadowView, &'static LightEntity, &'static ViewUniformOffset, + &'static PreviousViewUniformOffset, &'static MeshletViewBindGroups, &'static MeshletViewResources, )>, @@ -60,6 +62,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { camera, view_depth, view_offset, + previous_view_offset, meshlet_view_bind_groups, meshlet_view_resources, lights, @@ -71,8 +74,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode { let Some(( culling_first_pipeline, culling_second_pipeline, - write_index_buffer_first_pipeline, - write_index_buffer_second_pipeline, downsample_depth_pipeline, visibility_buffer_raster_pipeline, visibility_buffer_raster_depth_only_pipeline, @@ -83,41 +84,27 @@ impl Node for MeshletVisibilityBufferRasterPassNode { return Ok(()); }; - let culling_workgroups = meshlet_view_resources.scene_meshlet_count.div_ceil(128); - let write_index_buffer_workgroups = (meshlet_view_resources.scene_meshlet_count as f32) + let culling_workgroups = (meshlet_view_resources.scene_meshlet_count.div_ceil(128) as f32) .cbrt() .ceil() as u32; render_context .command_encoder() - .push_debug_group("meshlet_visibility_buffer_raster_pass"); - if meshlet_view_resources.occlusion_buffer_needs_clearing { - render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, - 0, - None, - ); - } - cull_pass( - "meshlet_culling_first_pass", - render_context, - meshlet_view_bind_groups, - view_offset, - culling_first_pipeline, - culling_workgroups, - ); - write_index_buffer_pass( - "meshlet_write_index_buffer_first_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_first, - write_index_buffer_first_pipeline, - write_index_buffer_workgroups, - ); + .push_debug_group("meshlet_visibility_buffer_raster"); render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, + &meshlet_view_resources.second_pass_candidates_buffer, 0, None, ); + cull_pass( + "culling_first", + render_context, + &meshlet_view_bind_groups.culling_first, + view_offset, + previous_view_offset, + culling_first_pipeline, + culling_workgroups, + ); raster_pass( true, render_context, @@ -136,20 +123,14 @@ impl Node for MeshletVisibilityBufferRasterPassNode { downsample_depth_pipeline, ); cull_pass( - "meshlet_culling_second_pass", + "culling_second", render_context, - meshlet_view_bind_groups, + &meshlet_view_bind_groups.culling_second, view_offset, + previous_view_offset, culling_second_pipeline, culling_workgroups, ); - write_index_buffer_pass( - "meshlet_write_index_buffer_second_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_second, - write_index_buffer_second_pipeline, - write_index_buffer_workgroups, - ); raster_pass( false, render_context, @@ -168,6 +149,12 @@ impl Node for MeshletVisibilityBufferRasterPassNode { copy_material_depth_pipeline, camera, ); + downsample_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + downsample_depth_pipeline, + ); render_context.command_encoder().pop_debug_group(); for light_entity in &lights.lights { @@ -175,6 +162,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { shadow_view, light_type, view_offset, + previous_view_offset, meshlet_view_bind_groups, meshlet_view_resources, )) = self.view_light_query.get_manual(world, *light_entity) @@ -188,36 +176,23 @@ impl Node for MeshletVisibilityBufferRasterPassNode { }; render_context.command_encoder().push_debug_group(&format!( - "meshlet_visibility_buffer_raster_pass: {}", + "meshlet_visibility_buffer_raster: {}", shadow_view.pass_name )); - if meshlet_view_resources.occlusion_buffer_needs_clearing { - render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, - 0, - None, - ); - } - cull_pass( - "meshlet_culling_first_pass", - render_context, - meshlet_view_bind_groups, - view_offset, - culling_first_pipeline, - culling_workgroups, - ); - write_index_buffer_pass( - "meshlet_write_index_buffer_first_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_first, - write_index_buffer_first_pipeline, - write_index_buffer_workgroups, - ); render_context.command_encoder().clear_buffer( - &meshlet_view_resources.occlusion_buffer, + &meshlet_view_resources.second_pass_candidates_buffer, 0, None, ); + cull_pass( + "culling_first", + render_context, + &meshlet_view_bind_groups.culling_first, + view_offset, + previous_view_offset, + culling_first_pipeline, + culling_workgroups, + ); raster_pass( true, render_context, @@ -236,20 +211,14 @@ impl Node for MeshletVisibilityBufferRasterPassNode { downsample_depth_pipeline, ); cull_pass( - "meshlet_culling_second_pass", + "culling_second", render_context, - meshlet_view_bind_groups, + &meshlet_view_bind_groups.culling_second, view_offset, + previous_view_offset, culling_second_pipeline, culling_workgroups, ); - write_index_buffer_pass( - "meshlet_write_index_buffer_second_pass", - render_context, - &meshlet_view_bind_groups.write_index_buffer_second, - write_index_buffer_second_pipeline, - write_index_buffer_workgroups, - ); raster_pass( false, render_context, @@ -261,6 +230,12 @@ impl Node for MeshletVisibilityBufferRasterPassNode { shadow_visibility_buffer_pipeline, None, ); + downsample_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + downsample_depth_pipeline, + ); render_context.command_encoder().pop_debug_group(); } @@ -271,8 +246,9 @@ impl Node for MeshletVisibilityBufferRasterPassNode { fn cull_pass( label: &'static str, render_context: &mut RenderContext, - meshlet_view_bind_groups: &MeshletViewBindGroups, + culling_bind_group: &BindGroup, view_offset: &ViewUniformOffset, + previous_view_offset: &PreviousViewUniformOffset, culling_pipeline: &ComputePipeline, culling_workgroups: u32, ) { @@ -281,30 +257,13 @@ fn cull_pass( label: Some(label), timestamp_writes: None, }); - cull_pass.set_bind_group(0, &meshlet_view_bind_groups.culling, &[view_offset.offset]); - cull_pass.set_pipeline(culling_pipeline); - cull_pass.dispatch_workgroups(culling_workgroups, 1, 1); -} - -fn write_index_buffer_pass( - label: &'static str, - render_context: &mut RenderContext, - write_index_buffer_bind_group: &BindGroup, - write_index_buffer_pipeline: &ComputePipeline, - write_index_buffer_workgroups: u32, -) { - let command_encoder = render_context.command_encoder(); - let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { - label: Some(label), - timestamp_writes: None, - }); - cull_pass.set_bind_group(0, write_index_buffer_bind_group, &[]); - cull_pass.set_pipeline(write_index_buffer_pipeline); - cull_pass.dispatch_workgroups( - write_index_buffer_workgroups, - write_index_buffer_workgroups, - write_index_buffer_workgroups, + cull_pass.set_bind_group( + 0, + culling_bind_group, + &[view_offset.offset, previous_view_offset.offset], ); + cull_pass.set_pipeline(culling_pipeline); + cull_pass.dispatch_workgroups(culling_workgroups, culling_workgroups, culling_workgroups); } #[allow(clippy::too_many_arguments)] @@ -351,9 +310,9 @@ fn raster_pass( let mut draw_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { label: Some(if first_pass { - "meshlet_visibility_buffer_raster_first_pass" + "raster_first" } else { - "meshlet_visibility_buffer_raster_second_pass" + "raster_second" }), color_attachments: if color_attachments_filled[0].is_none() { &[] @@ -389,7 +348,7 @@ fn downsample_depth( for i in 0..meshlet_view_resources.depth_pyramid_mips.len() { let downsample_pass = RenderPassDescriptor { - label: Some("meshlet_downsample_depth_pass"), + label: Some("downsample_depth"), color_attachments: &[Some(RenderPassColorAttachment { view: &meshlet_view_resources.depth_pyramid_mips[i], resolve_target: None, @@ -424,7 +383,7 @@ fn copy_material_depth_pass( meshlet_view_bind_groups.copy_material_depth.as_ref(), ) { let mut copy_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { - label: Some("meshlet_copy_material_depth_pass"), + label: Some("copy_material_depth"), color_attachments: &[], depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { view: &material_depth.default_view, diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl index 4ade7d709a..947c9d49be 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl @@ -96,10 +96,10 @@ struct VertexOutput { /// Load the visibility buffer texture and resolve it into a VertexOutput. fn resolve_vertex_output(frag_coord: vec4) -> VertexOutput { let vbuffer = textureLoad(meshlet_visibility_buffer, vec2(frag_coord.xy), 0).r; - let cluster_id = vbuffer >> 8u; + let cluster_id = vbuffer >> 6u; let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; - let triangle_id = extractBits(vbuffer, 0u, 8u); + let triangle_id = extractBits(vbuffer, 0u, 6u); let index_ids = meshlet.start_index_id + vec3(triangle_id * 3u) + vec3(0u, 1u, 2u); let indices = meshlet.start_vertex_id + vec3(get_meshlet_index(index_ids.x), get_meshlet_index(index_ids.y), get_meshlet_index(index_ids.z)); let vertex_ids = vec3(meshlet_vertex_ids[indices.x], meshlet_vertex_ids[indices.y], meshlet_vertex_ids[indices.z]); diff --git a/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl b/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl deleted file mode 100644 index f7ea7dae56..0000000000 --- a/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl +++ /dev/null @@ -1,43 +0,0 @@ -#import bevy_pbr::meshlet_bindings::{ - meshlet_thread_meshlet_ids, - meshlets, - draw_indirect_args, - draw_index_buffer, - get_meshlet_occlusion, - get_meshlet_previous_occlusion, -} - -var draw_index_buffer_start_workgroup: u32; - -/// This pass writes out a buffer of cluster + triangle IDs for the draw_indirect() call to rasterize each visible meshlet. - -@compute -@workgroup_size(64, 1, 1) // 64 threads per workgroup, 1 workgroup per cluster, 1 thread per triangle -fn write_index_buffer(@builtin(workgroup_id) workgroup_id: vec3, @builtin(num_workgroups) num_workgroups: vec3, @builtin(local_invocation_index) triangle_id: u32) { - // Calculate the cluster ID for this workgroup - let cluster_id = dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); - if cluster_id >= arrayLength(&meshlet_thread_meshlet_ids) { return; } - - // If the meshlet was culled, then we don't need to draw it - if !get_meshlet_occlusion(cluster_id) { return; } - - // If the meshlet was drawn in the first pass, and this is the second pass, then we don't need to draw it -#ifdef MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS - if get_meshlet_previous_occlusion(cluster_id) { return; } -#endif - - let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; - let meshlet = meshlets[meshlet_id]; - - // Reserve space in the buffer for this meshlet's triangles, and broadcast the start of that slice to all threads - if triangle_id == 0u { - draw_index_buffer_start_workgroup = atomicAdd(&draw_indirect_args.vertex_count, meshlet.triangle_count * 3u); - draw_index_buffer_start_workgroup /= 3u; - } - workgroupBarrier(); - - // Each thread writes one triangle of the meshlet to the buffer slice reserved for the meshlet - if triangle_id < meshlet.triangle_count { - draw_index_buffer[draw_index_buffer_start_workgroup + triangle_id] = (cluster_id << 8u) | triangle_id; - } -} diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 18b7eeeb3a..f4e6b59c74 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -32,6 +32,7 @@ use bevy_utils::tracing::error; #[cfg(feature = "meshlet")] use crate::meshlet::{ prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, MeshletGpuScene, + MeshletMesh, }; use crate::*; @@ -102,7 +103,7 @@ where .init_resource::() .init_resource::>>() .allow_ambiguous_resource::>>() - .init_resource::(); + .init_resource::(); } fn finish(&self, app: &mut App) { @@ -199,9 +200,14 @@ pub struct PreviousViewData { pub view_proj: Mat4, } +#[cfg(not(feature = "meshlet"))] +type PreviousViewFilter = (With, With); +#[cfg(feature = "meshlet")] +type PreviousViewFilter = Or<(With, With)>; + pub fn update_previous_view_data( mut commands: Commands, - query: Query<(Entity, &Camera, &GlobalTransform), (With, With)>, + query: Query<(Entity, &Camera, &GlobalTransform), PreviousViewFilter>, ) { for (entity, camera, camera_transform) in &query { let inverse_view = camera_transform.compute_matrix().inverse(); @@ -215,10 +221,15 @@ pub fn update_previous_view_data( #[derive(Component)] pub struct PreviousGlobalTransform(pub Affine3A); +#[cfg(not(feature = "meshlet"))] +type PreviousMeshFilter = With>; +#[cfg(feature = "meshlet")] +type PreviousMeshFilter = Or<(With>, With>)>; + pub fn update_mesh_previous_global_transforms( mut commands: Commands, - views: Query<&Camera, (With, With)>, - meshes: Query<(Entity, &GlobalTransform), With>>, + views: Query<&Camera, PreviousViewFilter>, + meshes: Query<(Entity, &GlobalTransform), PreviousMeshFilter>, ) { let should_run = views.iter().any(|camera| camera.is_active); @@ -603,7 +614,7 @@ pub fn extract_camera_previous_view_data( } #[derive(Resource, Default)] -pub struct PreviousViewProjectionUniforms { +pub struct PreviousViewUniforms { pub uniforms: DynamicUniformBuffer, } @@ -616,13 +627,13 @@ pub fn prepare_previous_view_uniforms( mut commands: Commands, render_device: Res, render_queue: Res, - mut view_uniforms: ResMut, - views: Query<(Entity, &ExtractedView, Option<&PreviousViewData>), With>, + mut previous_view_uniforms: ResMut, + views: Query<(Entity, &ExtractedView, Option<&PreviousViewData>), PreviousViewFilter>, ) { let views_iter = views.iter(); let view_count = views_iter.len(); let Some(mut writer) = - view_uniforms + previous_view_uniforms .uniforms .get_writer(view_count, &render_device, &render_queue) else { @@ -658,7 +669,7 @@ pub fn prepare_prepass_view_bind_group( prepass_pipeline: Res>, view_uniforms: Res, globals_buffer: Res, - previous_view_uniforms: Res, + previous_view_uniforms: Res, mut prepass_view_bind_group: ResMut, ) { if let (Some(view_binding), Some(globals_binding)) = ( @@ -905,6 +916,7 @@ impl RenderCommand

for SetPrepassViewBindGroup< type Param = SRes; type ViewQuery = ( Read, + Has, Option>, ); type ItemQuery = (); @@ -912,8 +924,9 @@ impl RenderCommand

for SetPrepassViewBindGroup< #[inline] fn render<'w>( _item: &P, - (view_uniform_offset, previous_view_uniform_offset): ( + (view_uniform_offset, has_motion_vector_prepass, previous_view_uniform_offset): ( &'_ ViewUniformOffset, + bool, Option<&'_ PreviousViewUniformOffset>, ), _entity: Option<()>, @@ -922,21 +935,24 @@ impl RenderCommand

for SetPrepassViewBindGroup< ) -> RenderCommandResult { let prepass_view_bind_group = prepass_view_bind_group.into_inner(); - if let Some(previous_view_uniform_offset) = previous_view_uniform_offset { - pass.set_bind_group( - I, - prepass_view_bind_group.motion_vectors.as_ref().unwrap(), - &[ - view_uniform_offset.offset, - previous_view_uniform_offset.offset, - ], - ); - } else { - pass.set_bind_group( - I, - prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), - &[view_uniform_offset.offset], - ); + match previous_view_uniform_offset { + Some(previous_view_uniform_offset) if has_motion_vector_prepass => { + pass.set_bind_group( + I, + prepass_view_bind_group.motion_vectors.as_ref().unwrap(), + &[ + view_uniform_offset.offset, + previous_view_uniform_offset.offset, + ], + ); + } + _ => { + pass.set_bind_group( + I, + prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), + &[view_uniform_offset.offset], + ); + } } RenderCommandResult::Success diff --git a/examples/3d/meshlet.rs b/examples/3d/meshlet.rs index 45eb80db0b..722029feeb 100644 --- a/examples/3d/meshlet.rs +++ b/examples/3d/meshlet.rs @@ -20,7 +20,7 @@ const ASSET_URL: &str = "https://github.com/JMS55/bevy_meshlet_asset/blob/bd8698 fn main() -> ExitCode { if !Path::new("./assets/models/bunny.meshlet_mesh").exists() { - println!("ERROR: Asset at path /assets/models/bunny.meshlet_mesh is missing. Please download it from {ASSET_URL}"); + eprintln!("ERROR: Asset at path /assets/models/bunny.meshlet_mesh is missing. Please download it from {ASSET_URL}"); return ExitCode::FAILURE; }