Meshlet texture atomics (#17765)

* Use texture atomics rather than buffer atomics for the visbuffer
(haven't tested perf on a raster-heavy scene yet)
* Unfortunately to clear the visbuffer we now need a compute pass to
clear it. Using wgpu's clear_texture function internally uses a buffer
-> image copy that's insanely expensive. Ideally it should be using
vkCmdClearColorImage, which I've opened an issue for
https://github.com/gfx-rs/wgpu/issues/7090. For now we'll have to stick
with a custom compute pass and all the extra code that brings.
* Faster resolve depth pass by discarding 0 depth pixels instead of
redundantly writing zero (2x faster for big depth textures like shadow
views)
This commit is contained in:
JMS55 2025-02-12 10:15:43 -08:00 committed by GitHub
parent 2f9613f22c
commit 2fd4cc4937
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 328 additions and 131 deletions

View File

@ -1,8 +1,8 @@
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(0) var<storage, read> mip_0: array<u64>; // Per pixel
@group(0) @binding(0) var mip_0: texture_storage_2d<r64uint, read>;
#else
#ifdef MESHLET
@group(0) @binding(0) var<storage, read> mip_0: array<u32>; // Per pixel
@group(0) @binding(0) var mip_0: texture_storage_2d<r32uint, read>;
#else // MESHLET
#ifdef MULTISAMPLE
@group(0) @binding(0) var mip_0: texture_depth_multisampled_2d;
@ -24,7 +24,7 @@
@group(0) @binding(11) var mip_11: texture_storage_2d<r32float, write>;
@group(0) @binding(12) var mip_12: texture_storage_2d<r32float, write>;
@group(0) @binding(13) var samplr: sampler;
struct Constants { max_mip_level: u32, view_width: u32 }
struct Constants { max_mip_level: u32 }
var<push_constant> constants: Constants;
/// Generates a hierarchical depth buffer.
@ -39,7 +39,6 @@ var<workgroup> intermediate_memory: array<array<f32, 16>, 16>;
@compute
@workgroup_size(256, 1, 1)
fn downsample_depth_first(
@builtin(num_workgroups) num_workgroups: vec3u,
@builtin(workgroup_id) workgroup_id: vec3u,
@builtin(local_invocation_index) local_invocation_index: u32,
) {
@ -309,12 +308,13 @@ fn reduce_load_mip_6(tex: vec2u) -> f32 {
}
fn load_mip_0(x: u32, y: u32) -> f32 {
let i = y * constants.view_width + x;
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
return bitcast<f32>(u32(mip_0[i] >> 32u));
let visibility = textureLoad(mip_0, vec2(x, y)).r;
return bitcast<f32>(u32(visibility >> 32u));
#else // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
#ifdef MESHLET
return bitcast<f32>(mip_0[i]);
let visibility = textureLoad(mip_0, vec2(x, y)).r;
return bitcast<f32>(visibility);
#else // MESHLET
// Downsample the top level.
#ifdef MULTISAMPLE

View File

@ -427,7 +427,7 @@ impl SpecializedComputePipeline for DownsampleDepthPipeline {
layout: vec![self.bind_group_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..8,
range: 0..4,
}],
shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
shader_defs,
@ -627,9 +627,8 @@ impl ViewDepthPyramid {
timestamp_writes: None,
});
downsample_pass.set_pipeline(downsample_depth_first_pipeline);
// Pass the mip count and the texture width as push constants, for
// simplicity.
downsample_pass.set_push_constants(0, bytemuck::cast_slice(&[self.mip_count, view_size.x]));
// Pass the mip count as a push constant, for simplicity.
downsample_pass.set_push_constants(0, &self.mip_count.to_le_bytes());
downsample_pass.set_bind_group(0, downsample_depth_bind_group, &[]);
downsample_pass.dispatch_workgroups(view_size.x.div_ceil(64), view_size.y.div_ceil(64), 1);

View File

@ -0,0 +1,18 @@
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r64uint, write>;
#else
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r32uint, write>;
#endif
var<push_constant> view_size: vec2<u32>;
@compute
@workgroup_size(16, 16, 1)
fn clear_visibility_buffer(@builtin(global_invocation_id) global_id: vec3<u32>) {
if any(global_id.xy >= view_size) { return; }
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0lu));
#else
textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0u));
#endif
}

View File

@ -100,9 +100,9 @@ fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool {
@group(0) @binding(6) var<storage, read> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups
@group(0) @binding(7) var<storage, read> meshlet_software_raster_cluster_count: u32;
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(8) var<storage, read_write> meshlet_visibility_buffer: array<atomic<u64>>; // Per pixel
@group(0) @binding(8) var meshlet_visibility_buffer: texture_storage_2d<r64uint, atomic>;
#else
@group(0) @binding(8) var<storage, read_write> meshlet_visibility_buffer: array<atomic<u32>>; // Per pixel
@group(0) @binding(8) var meshlet_visibility_buffer: texture_storage_2d<r32uint, atomic>;
#endif
@group(0) @binding(9) var<uniform> view: View;
@ -149,7 +149,7 @@ fn get_meshlet_vertex_position(meshlet: ptr<function, Meshlet>, vertex_id: u32)
#endif
#ifdef MESHLET_MESH_MATERIAL_PASS
@group(1) @binding(0) var<storage, read> meshlet_visibility_buffer: array<u64>; // Per pixel
@group(1) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r64uint, read>;
@group(1) @binding(1) var<storage, read> meshlet_cluster_meshlet_ids: array<u32>; // Per cluster
@group(1) @binding(2) var<storage, read> meshlets: array<Meshlet>; // Per meshlet
@group(1) @binding(3) var<storage, read> meshlet_indices: array<u32>; // Many per meshlet

View File

@ -106,9 +106,9 @@ const MESHLET_MESH_MATERIAL_SHADER_HANDLE: Handle<Shader> =
/// * Requires preprocessing meshes. See [`MeshletMesh`] for details.
/// * Limitations on the kinds of materials you can use. See [`MeshletMesh`] for details.
///
/// This plugin requires a fairly recent GPU that supports [`WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX`].
/// This plugin requires a fairly recent GPU that supports [`WgpuFeatures::TEXTURE_INT64_ATOMIC`].
///
/// This plugin currently works only on the Vulkan backend.
/// This plugin currently works only on the Vulkan and Metal backends.
///
/// This plugin is not compatible with [`Msaa`]. Any camera rendering a [`MeshletMesh`] must have
/// [`Msaa`] set to [`Msaa::Off`].
@ -133,7 +133,8 @@ pub struct MeshletPlugin {
impl MeshletPlugin {
/// [`WgpuFeatures`] required for this plugin to function.
pub fn required_wgpu_features() -> WgpuFeatures {
WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX
WgpuFeatures::TEXTURE_INT64_ATOMIC
| WgpuFeatures::TEXTURE_ATOMIC
| WgpuFeatures::SHADER_INT64
| WgpuFeatures::SUBGROUP
| WgpuFeatures::DEPTH_CLIP_CONTROL
@ -151,6 +152,12 @@ impl Plugin for MeshletPlugin {
std::process::exit(1);
}
load_internal_asset!(
app,
MESHLET_CLEAR_VISIBILITY_BUFFER_SHADER_HANDLE,
"clear_visibility_buffer.wgsl",
Shader::from_wgsl
);
load_internal_asset!(
app,
MESHLET_BINDINGS_SHADER_HANDLE,

View File

@ -10,6 +10,8 @@ use bevy_ecs::{
};
use bevy_render::render_resource::*;
pub const MESHLET_CLEAR_VISIBILITY_BUFFER_SHADER_HANDLE: Handle<Shader> =
weak_handle!("a4bf48e4-5605-4d1c-987e-29c7b1ec95dc");
pub const MESHLET_FILL_CLUSTER_BUFFERS_SHADER_HANDLE: Handle<Shader> =
weak_handle!("80ccea4a-8234-4ee0-af74-77b3cad503cf");
pub const MESHLET_CULLING_SHADER_HANDLE: Handle<Shader> =
@ -26,6 +28,8 @@ pub const MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE: Handle<Shader> =
#[derive(Resource)]
pub struct MeshletPipelines {
fill_cluster_buffers: CachedComputePipelineId,
clear_visibility_buffer: CachedComputePipelineId,
clear_visibility_buffer_shadow_view: CachedComputePipelineId,
cull_first: CachedComputePipelineId,
cull_second: CachedComputePipelineId,
downsample_depth_first: CachedComputePipelineId,
@ -33,10 +37,10 @@ pub struct MeshletPipelines {
downsample_depth_first_shadow_view: CachedComputePipelineId,
downsample_depth_second_shadow_view: CachedComputePipelineId,
visibility_buffer_software_raster: CachedComputePipelineId,
visibility_buffer_software_raster_depth_only: CachedComputePipelineId,
visibility_buffer_software_raster_shadow_view: CachedComputePipelineId,
visibility_buffer_hardware_raster: CachedRenderPipelineId,
visibility_buffer_hardware_raster_depth_only: CachedRenderPipelineId,
visibility_buffer_hardware_raster_depth_only_unclipped: CachedRenderPipelineId,
visibility_buffer_hardware_raster_shadow_view: CachedRenderPipelineId,
visibility_buffer_hardware_raster_shadow_view_unclipped: CachedRenderPipelineId,
resolve_depth: CachedRenderPipelineId,
resolve_depth_shadow_view: CachedRenderPipelineId,
resolve_material_depth: CachedRenderPipelineId,
@ -49,12 +53,27 @@ impl FromWorld for MeshletPipelines {
let fill_cluster_buffers_bind_group_layout = resource_manager
.fill_cluster_buffers_bind_group_layout
.clone();
let clear_visibility_buffer_bind_group_layout = resource_manager
.clear_visibility_buffer_bind_group_layout
.clone();
let clear_visibility_buffer_shadow_view_bind_group_layout = resource_manager
.clear_visibility_buffer_shadow_view_bind_group_layout
.clone();
let cull_layout = resource_manager.culling_bind_group_layout.clone();
let downsample_depth_layout = resource_manager.downsample_depth_bind_group_layout.clone();
let downsample_depth_shadow_view_layout = resource_manager
.downsample_depth_shadow_view_bind_group_layout
.clone();
let visibility_buffer_raster_layout = resource_manager
.visibility_buffer_raster_bind_group_layout
.clone();
let visibility_buffer_raster_shadow_view_layout = resource_manager
.visibility_buffer_raster_shadow_view_bind_group_layout
.clone();
let resolve_depth_layout = resource_manager.resolve_depth_bind_group_layout.clone();
let resolve_depth_shadow_view_layout = resource_manager
.resolve_depth_shadow_view_bind_group_layout
.clone();
let resolve_material_depth_layout = resource_manager
.resolve_material_depth_bind_group_layout
.clone();
@ -67,7 +86,7 @@ impl FromWorld for MeshletPipelines {
fill_cluster_buffers: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_fill_cluster_buffers_pipeline".into()),
layout: vec![fill_cluster_buffers_bind_group_layout.clone()],
layout: vec![fill_cluster_buffers_bind_group_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..4,
@ -79,6 +98,36 @@ impl FromWorld for MeshletPipelines {
},
),
clear_visibility_buffer: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_clear_visibility_buffer_pipeline".into()),
layout: vec![clear_visibility_buffer_bind_group_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..8,
}],
shader: MESHLET_CLEAR_VISIBILITY_BUFFER_SHADER_HANDLE,
shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()],
entry_point: "clear_visibility_buffer".into(),
zero_initialize_workgroup_memory: false,
},
),
clear_visibility_buffer_shadow_view: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_clear_visibility_buffer_shadow_view_pipeline".into()),
layout: vec![clear_visibility_buffer_shadow_view_bind_group_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..8,
}],
shader: MESHLET_CLEAR_VISIBILITY_BUFFER_SHADER_HANDLE,
shader_defs: vec![],
entry_point: "clear_visibility_buffer".into(),
zero_initialize_workgroup_memory: false,
},
),
cull_first: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
label: Some("meshlet_culling_first_pipeline".into()),
layout: vec![cull_layout.clone()],
@ -117,7 +166,7 @@ impl FromWorld for MeshletPipelines {
layout: vec![downsample_depth_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..8,
range: 0..4,
}],
shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
shader_defs: vec![
@ -135,7 +184,7 @@ impl FromWorld for MeshletPipelines {
layout: vec![downsample_depth_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..8,
range: 0..4,
}],
shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
shader_defs: vec![
@ -150,10 +199,10 @@ impl FromWorld for MeshletPipelines {
downsample_depth_first_shadow_view: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_downsample_depth_first_pipeline".into()),
layout: vec![downsample_depth_layout.clone()],
layout: vec![downsample_depth_shadow_view_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..8,
range: 0..4,
}],
shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
shader_defs: vec!["MESHLET".into()],
@ -165,10 +214,10 @@ impl FromWorld for MeshletPipelines {
downsample_depth_second_shadow_view: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_downsample_depth_second_pipeline".into()),
layout: vec![downsample_depth_layout],
layout: vec![downsample_depth_shadow_view_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..8,
range: 0..4,
}],
shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
shader_defs: vec!["MESHLET".into()],
@ -198,12 +247,12 @@ impl FromWorld for MeshletPipelines {
},
),
visibility_buffer_software_raster_depth_only: pipeline_cache.queue_compute_pipeline(
visibility_buffer_software_raster_shadow_view: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some(
"meshlet_visibility_buffer_software_raster_depth_only_pipeline".into(),
"meshlet_visibility_buffer_software_raster_shadow_view_pipeline".into(),
),
layout: vec![visibility_buffer_raster_layout.clone()],
layout: vec![visibility_buffer_raster_shadow_view_layout.clone()],
push_constant_ranges: vec![],
shader: MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE,
shader_defs: vec![
@ -265,12 +314,12 @@ impl FromWorld for MeshletPipelines {
},
),
visibility_buffer_hardware_raster_depth_only: pipeline_cache.queue_render_pipeline(
visibility_buffer_hardware_raster_shadow_view: pipeline_cache.queue_render_pipeline(
RenderPipelineDescriptor {
label: Some(
"meshlet_visibility_buffer_hardware_raster_depth_only_pipeline".into(),
"meshlet_visibility_buffer_hardware_raster_shadow_view_pipeline".into(),
),
layout: vec![visibility_buffer_raster_layout.clone()],
layout: vec![visibility_buffer_raster_shadow_view_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::VERTEX,
range: 0..4,
@ -306,13 +355,13 @@ impl FromWorld for MeshletPipelines {
},
),
visibility_buffer_hardware_raster_depth_only_unclipped: pipeline_cache
visibility_buffer_hardware_raster_shadow_view_unclipped: pipeline_cache
.queue_render_pipeline(RenderPipelineDescriptor {
label: Some(
"meshlet_visibility_buffer_hardware_raster_depth_only_unclipped_pipeline"
"meshlet_visibility_buffer_hardware_raster_shadow_view_unclipped_pipeline"
.into(),
),
layout: vec![visibility_buffer_raster_layout],
layout: vec![visibility_buffer_raster_shadow_view_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::VERTEX,
range: 0..4,
@ -349,17 +398,14 @@ impl FromWorld for MeshletPipelines {
resolve_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor {
label: Some("meshlet_resolve_depth_pipeline".into()),
layout: vec![resolve_depth_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::FRAGMENT,
range: 0..4,
}],
layout: vec![resolve_depth_layout],
push_constant_ranges: vec![],
vertex: fullscreen_shader_vertex_state(),
primitive: PrimitiveState::default(),
depth_stencil: Some(DepthStencilState {
format: CORE_3D_DEPTH_FORMAT,
depth_write_enabled: true,
depth_compare: CompareFunction::GreaterEqual,
depth_compare: CompareFunction::Always,
stencil: StencilState::default(),
bias: DepthBiasState::default(),
}),
@ -376,17 +422,14 @@ impl FromWorld for MeshletPipelines {
resolve_depth_shadow_view: pipeline_cache.queue_render_pipeline(
RenderPipelineDescriptor {
label: Some("meshlet_resolve_depth_pipeline".into()),
layout: vec![resolve_depth_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::FRAGMENT,
range: 0..4,
}],
layout: vec![resolve_depth_shadow_view_layout],
push_constant_ranges: vec![],
vertex: fullscreen_shader_vertex_state(),
primitive: PrimitiveState::default(),
depth_stencil: Some(DepthStencilState {
format: CORE_3D_DEPTH_FORMAT,
depth_write_enabled: true,
depth_compare: CompareFunction::GreaterEqual,
depth_compare: CompareFunction::Always,
stencil: StencilState::default(),
bias: DepthBiasState::default(),
}),
@ -405,10 +448,7 @@ impl FromWorld for MeshletPipelines {
RenderPipelineDescriptor {
label: Some("meshlet_resolve_material_depth_pipeline".into()),
layout: vec![resolve_material_depth_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::FRAGMENT,
range: 0..4,
}],
push_constant_ranges: vec![],
vertex: fullscreen_shader_vertex_state(),
primitive: PrimitiveState::default(),
depth_stencil: Some(DepthStencilState {
@ -460,6 +500,8 @@ impl MeshletPipelines {
&ComputePipeline,
&ComputePipeline,
&ComputePipeline,
&ComputePipeline,
&ComputePipeline,
&RenderPipeline,
&RenderPipeline,
&RenderPipeline,
@ -472,6 +514,8 @@ impl MeshletPipelines {
let pipeline = world.get_resource::<Self>()?;
Some((
pipeline_cache.get_compute_pipeline(pipeline.fill_cluster_buffers)?,
pipeline_cache.get_compute_pipeline(pipeline.clear_visibility_buffer)?,
pipeline_cache.get_compute_pipeline(pipeline.clear_visibility_buffer_shadow_view)?,
pipeline_cache.get_compute_pipeline(pipeline.cull_first)?,
pipeline_cache.get_compute_pipeline(pipeline.cull_second)?,
pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first)?,
@ -480,12 +524,12 @@ impl MeshletPipelines {
pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second_shadow_view)?,
pipeline_cache.get_compute_pipeline(pipeline.visibility_buffer_software_raster)?,
pipeline_cache
.get_compute_pipeline(pipeline.visibility_buffer_software_raster_depth_only)?,
.get_compute_pipeline(pipeline.visibility_buffer_software_raster_shadow_view)?,
pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_hardware_raster)?,
pipeline_cache
.get_render_pipeline(pipeline.visibility_buffer_hardware_raster_depth_only)?,
.get_render_pipeline(pipeline.visibility_buffer_hardware_raster_shadow_view)?,
pipeline_cache.get_render_pipeline(
pipeline.visibility_buffer_hardware_raster_depth_only_unclipped,
pipeline.visibility_buffer_hardware_raster_shadow_view_unclipped,
)?,
pipeline_cache.get_render_pipeline(pipeline.resolve_depth)?,
pipeline_cache.get_render_pipeline(pipeline.resolve_depth_shadow_view)?,

View File

@ -1,35 +1,36 @@
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(0) var<storage, read> meshlet_visibility_buffer: array<u64>; // Per pixel
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r64uint, read>;
#else
@group(0) @binding(0) var<storage, read> meshlet_visibility_buffer: array<u32>; // Per pixel
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r32uint, read>;
#endif
@group(0) @binding(1) var<storage, read> meshlet_cluster_instance_ids: array<u32>; // Per cluster
@group(0) @binding(2) var<storage, read> meshlet_instance_material_ids: array<u32>; // Per entity instance
var<push_constant> view_width: u32;
/// This pass writes out the depth texture.
@fragment
fn resolve_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 {
let frag_coord_1d = u32(in.position.y) * view_width + u32(in.position.x);
let visibility = meshlet_visibility_buffer[frag_coord_1d];
let visibility = textureLoad(meshlet_visibility_buffer, vec2<u32>(in.position.xy)).r;
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
return bitcast<f32>(u32(visibility >> 32u));
let depth = u32(visibility >> 32u);
#else
return bitcast<f32>(visibility);
let depth = visibility;
#endif
if depth == 0u { discard; }
return bitcast<f32>(depth);
}
/// This pass writes out the material depth texture.
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@fragment
fn resolve_material_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 {
let frag_coord_1d = u32(in.position.y) * view_width + u32(in.position.x);
let visibility = meshlet_visibility_buffer[frag_coord_1d];
let visibility = textureLoad(meshlet_visibility_buffer, vec2<u32>(in.position.xy)).r;
let depth = visibility >> 32u;
if depth == 0lu { return 0.0; }
if depth == 0lu { discard; }
let cluster_id = u32(visibility) >> 7u;
let instance_id = meshlet_cluster_instance_ids[cluster_id];

View File

@ -50,10 +50,15 @@ pub struct ResourceManager {
// Bind group layouts
pub fill_cluster_buffers_bind_group_layout: BindGroupLayout,
pub clear_visibility_buffer_bind_group_layout: BindGroupLayout,
pub clear_visibility_buffer_shadow_view_bind_group_layout: BindGroupLayout,
pub culling_bind_group_layout: BindGroupLayout,
pub visibility_buffer_raster_bind_group_layout: BindGroupLayout,
pub visibility_buffer_raster_shadow_view_bind_group_layout: BindGroupLayout,
pub downsample_depth_bind_group_layout: BindGroupLayout,
pub downsample_depth_shadow_view_bind_group_layout: BindGroupLayout,
pub resolve_depth_bind_group_layout: BindGroupLayout,
pub resolve_depth_shadow_view_bind_group_layout: BindGroupLayout,
pub resolve_material_depth_bind_group_layout: BindGroupLayout,
pub material_shade_bind_group_layout: BindGroupLayout,
pub remap_1d_to_2d_dispatch_bind_group_layout: Option<BindGroupLayout>,
@ -108,6 +113,21 @@ impl ResourceManager {
),
),
),
clear_visibility_buffer_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_clear_visibility_buffer_bind_group_layout",
&BindGroupLayoutEntries::single(
ShaderStages::COMPUTE,
texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::WriteOnly),
),
),
clear_visibility_buffer_shadow_view_bind_group_layout: render_device
.create_bind_group_layout(
"meshlet_clear_visibility_buffer_shadow_view_bind_group_layout",
&BindGroupLayoutEntries::single(
ShaderStages::COMPUTE,
texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::WriteOnly),
),
),
culling_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_culling_bind_group_layout",
&BindGroupLayoutEntries::sequential(
@ -136,7 +156,34 @@ impl ResourceManager {
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly)
};
(
storage_buffer_read_only_sized(false, None),
texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
texture_storage_2d(
TextureFormat::R32Float,
StorageTextureAccess::ReadWrite,
),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
sampler(SamplerBindingType::NonFiltering),
)
}),
),
downsample_depth_shadow_view_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_downsample_depth_shadow_view_bind_group_layout",
&BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, {
let write_only_r32float = || {
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly)
};
(
texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::ReadOnly),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
@ -169,16 +216,45 @@ impl ResourceManager {
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_sized(false, None),
texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::Atomic),
uniform_buffer::<ViewUniform>(true),
),
),
),
visibility_buffer_raster_shadow_view_bind_group_layout: render_device
.create_bind_group_layout(
"meshlet_visibility_buffer_raster_shadow_view_bind_group_layout",
&BindGroupLayoutEntries::sequential(
ShaderStages::all(),
(
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
texture_storage_2d(
TextureFormat::R32Uint,
StorageTextureAccess::Atomic,
),
uniform_buffer::<ViewUniform>(true),
),
),
),
resolve_depth_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_resolve_depth_bind_group_layout",
&BindGroupLayoutEntries::single(
ShaderStages::FRAGMENT,
storage_buffer_read_only_sized(false, None),
texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly),
),
),
resolve_depth_shadow_view_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_resolve_depth_shadow_view_bind_group_layout",
&BindGroupLayoutEntries::single(
ShaderStages::FRAGMENT,
texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::ReadOnly),
),
),
resolve_material_depth_bind_group_layout: render_device.create_bind_group_layout(
@ -186,7 +262,7 @@ impl ResourceManager {
&BindGroupLayoutEntries::sequential(
ShaderStages::FRAGMENT,
(
storage_buffer_read_only_sized(false, None),
texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
),
@ -197,7 +273,7 @@ impl ResourceManager {
&BindGroupLayoutEntries::sequential(
ShaderStages::FRAGMENT,
(
storage_buffer_read_only_sized(false, None),
texture_storage_2d(TextureFormat::R64Uint, StorageTextureAccess::ReadOnly),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
storage_buffer_read_only_sized(false, None),
@ -234,7 +310,7 @@ pub struct MeshletViewResources {
pub second_pass_candidates_buffer: Buffer,
instance_visibility: Buffer,
pub dummy_render_target: CachedTexture,
pub visibility_buffer: Buffer,
pub visibility_buffer: CachedTexture,
pub visibility_buffer_software_raster_indirect_args_first: Buffer,
pub visibility_buffer_software_raster_indirect_args_second: Buffer,
pub visibility_buffer_hardware_raster_indirect_args_first: Buffer,
@ -244,12 +320,14 @@ pub struct MeshletViewResources {
pub material_depth: Option<CachedTexture>,
pub view_size: UVec2,
pub raster_cluster_rightmost_slot: u32,
not_shadow_view: bool,
}
#[derive(Component)]
pub struct MeshletViewBindGroups {
pub first_node: Arc<AtomicBool>,
pub fill_cluster_buffers: BindGroup,
pub clear_visibility_buffer: BindGroup,
pub culling_first: BindGroup,
pub culling_second: BindGroup,
pub downsample_depth: BindGroup,
@ -419,18 +497,27 @@ pub fn prepare_meshlet_per_frame_resources(
},
);
let type_size = if not_shadow_view {
size_of::<u64>()
} else {
size_of::<u32>()
} as u64;
// TODO: Cache
let visibility_buffer = render_device.create_buffer(&BufferDescriptor {
label: Some("meshlet_visibility_buffer"),
size: type_size * (view.viewport.z * view.viewport.w) as u64,
usage: BufferUsages::STORAGE,
mapped_at_creation: false,
});
let visibility_buffer = texture_cache.get(
&render_device,
TextureDescriptor {
label: Some("meshlet_visibility_buffer"),
size: Extent3d {
width: view.viewport.z,
height: view.viewport.w,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: if not_shadow_view {
TextureFormat::R64Uint
} else {
TextureFormat::R32Uint
},
usage: TextureUsages::STORAGE_ATOMIC | TextureUsages::STORAGE_BINDING,
view_formats: &[],
},
);
let visibility_buffer_software_raster_indirect_args_first = render_device
.create_buffer_with_data(&BufferInitDescriptor {
@ -520,6 +607,7 @@ pub fn prepare_meshlet_per_frame_resources(
.then(|| texture_cache.get(&render_device, material_depth)),
view_size: view.viewport.zw(),
raster_cluster_rightmost_slot: resource_manager.raster_cluster_rightmost_slot,
not_shadow_view,
});
}
}
@ -577,6 +665,16 @@ pub fn prepare_meshlet_view_bind_groups(
&entries,
);
let clear_visibility_buffer = render_device.create_bind_group(
"meshlet_clear_visibility_buffer_bind_group",
if view_resources.not_shadow_view {
&resource_manager.clear_visibility_buffer_bind_group_layout
} else {
&resource_manager.clear_visibility_buffer_shadow_view_bind_group_layout
},
&BindGroupEntries::single(&view_resources.visibility_buffer.default_view),
);
let entries = BindGroupEntries::sequential((
cluster_meshlet_ids.as_entire_binding(),
meshlet_mesh_manager.meshlet_bounding_spheres.binding(),
@ -638,8 +736,12 @@ pub fn prepare_meshlet_view_bind_groups(
let downsample_depth = view_resources.depth_pyramid.create_bind_group(
&render_device,
"meshlet_downsample_depth_bind_group",
&resource_manager.downsample_depth_bind_group_layout,
view_resources.visibility_buffer.as_entire_binding(),
if view_resources.not_shadow_view {
&resource_manager.downsample_depth_bind_group_layout
} else {
&resource_manager.downsample_depth_shadow_view_bind_group_layout
},
&view_resources.visibility_buffer.default_view,
&resource_manager.depth_pyramid_sampler,
);
@ -656,24 +758,32 @@ pub fn prepare_meshlet_view_bind_groups(
resource_manager
.software_raster_cluster_count
.as_entire_binding(),
view_resources.visibility_buffer.as_entire_binding(),
&view_resources.visibility_buffer.default_view,
view_uniforms.clone(),
));
let visibility_buffer_raster = render_device.create_bind_group(
"meshlet_visibility_raster_buffer_bind_group",
&resource_manager.visibility_buffer_raster_bind_group_layout,
if view_resources.not_shadow_view {
&resource_manager.visibility_buffer_raster_bind_group_layout
} else {
&resource_manager.visibility_buffer_raster_shadow_view_bind_group_layout
},
&entries,
);
let resolve_depth = render_device.create_bind_group(
"meshlet_resolve_depth_bind_group",
&resource_manager.resolve_depth_bind_group_layout,
&BindGroupEntries::single(view_resources.visibility_buffer.as_entire_binding()),
if view_resources.not_shadow_view {
&resource_manager.resolve_depth_bind_group_layout
} else {
&resource_manager.resolve_depth_shadow_view_bind_group_layout
},
&BindGroupEntries::single(&view_resources.visibility_buffer.default_view),
);
let resolve_material_depth = view_resources.material_depth.as_ref().map(|_| {
let entries = BindGroupEntries::sequential((
view_resources.visibility_buffer.as_entire_binding(),
&view_resources.visibility_buffer.default_view,
cluster_instance_ids.as_entire_binding(),
instance_manager.instance_material_ids.binding().unwrap(),
));
@ -686,7 +796,7 @@ pub fn prepare_meshlet_view_bind_groups(
let material_shade = view_resources.material_depth.as_ref().map(|_| {
let entries = BindGroupEntries::sequential((
view_resources.visibility_buffer.as_entire_binding(),
&view_resources.visibility_buffer.default_view,
cluster_meshlet_ids.as_entire_binding(),
meshlet_mesh_manager.meshlets.binding(),
meshlet_mesh_manager.indices.binding(),
@ -738,6 +848,7 @@ pub fn prepare_meshlet_view_bind_groups(
commands.entity(view_entity).insert(MeshletViewBindGroups {
first_node: Arc::clone(&first_node),
fill_cluster_buffers,
clear_visibility_buffer,
culling_first,
culling_second,
downsample_depth,

View File

@ -54,16 +54,13 @@ fn vertex(@builtin(instance_index) instance_index: u32, @builtin(vertex_index) v
@fragment
fn fragment(vertex_output: VertexOutput) {
let frag_coord_1d = u32(vertex_output.position.y) * u32(view.viewport.z) + u32(vertex_output.position.x);
let depth = bitcast<u32>(vertex_output.position.z);
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
let depth = bitcast<u32>(vertex_output.position.z);
let visibility = (u64(depth) << 32u) | u64(vertex_output.packed_ids);
atomicMax(&meshlet_visibility_buffer[frag_coord_1d], visibility);
#else
let depth = bitcast<u32>(vertex_output.position.z);
atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth);
let visibility = depth;
#endif
textureAtomicMax(meshlet_visibility_buffer, vec2<u32>(vertex_output.position.xy), visibility);
}
fn dummy_vertex() -> VertexOutput {

View File

@ -9,7 +9,7 @@ use bevy_ecs::{
query::QueryState,
world::{FromWorld, World},
};
use bevy_math::ops;
use bevy_math::{ops, UVec2};
use bevy_render::{
camera::ExtractedCamera,
render_graph::{Node, NodeRunError, RenderGraphContext},
@ -77,6 +77,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
let Some((
fill_cluster_buffers_pipeline,
clear_visibility_buffer_pipeline,
clear_visibility_buffer_shadow_view_pipeline,
culling_first_pipeline,
culling_second_pipeline,
downsample_depth_first_pipeline,
@ -84,10 +86,10 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
downsample_depth_first_shadow_view_pipeline,
downsample_depth_second_shadow_view_pipeline,
visibility_buffer_software_raster_pipeline,
visibility_buffer_software_raster_depth_only_pipeline,
visibility_buffer_software_raster_shadow_view_pipeline,
visibility_buffer_hardware_raster_pipeline,
visibility_buffer_hardware_raster_depth_only_pipeline,
visibility_buffer_hardware_raster_depth_only_unclipped_pipeline,
visibility_buffer_hardware_raster_shadow_view_pipeline,
visibility_buffer_hardware_raster_shadow_view_unclipped_pipeline,
resolve_depth_pipeline,
resolve_depth_shadow_view_pipeline,
resolve_material_depth_pipeline,
@ -107,11 +109,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
render_context
.command_encoder()
.push_debug_group("meshlet_visibility_buffer_raster");
render_context.command_encoder().clear_buffer(
&meshlet_view_resources.second_pass_candidates_buffer,
0,
None,
);
if first_node {
fill_cluster_buffers_pass(
render_context,
@ -120,6 +117,17 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
meshlet_view_resources.scene_instance_count,
);
}
clear_visibility_buffer_pass(
render_context,
&meshlet_view_bind_groups.clear_visibility_buffer,
clear_visibility_buffer_pipeline,
meshlet_view_resources.view_size,
);
render_context.command_encoder().clear_buffer(
&meshlet_view_resources.second_pass_candidates_buffer,
0,
None,
);
cull_pass(
"culling_first",
render_context,
@ -189,7 +197,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
resolve_depth(
render_context,
view_depth.get_attachment(StoreOp::Store),
meshlet_view_resources,
meshlet_view_bind_groups,
resolve_depth_pipeline,
camera,
@ -226,15 +233,21 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
let shadow_visibility_buffer_hardware_raster_pipeline =
if let LightEntity::Directional { .. } = light_type {
visibility_buffer_hardware_raster_depth_only_unclipped_pipeline
visibility_buffer_hardware_raster_shadow_view_unclipped_pipeline
} else {
visibility_buffer_hardware_raster_depth_only_pipeline
visibility_buffer_hardware_raster_shadow_view_pipeline
};
render_context.command_encoder().push_debug_group(&format!(
"meshlet_visibility_buffer_raster: {}",
shadow_view.pass_name
));
clear_visibility_buffer_pass(
render_context,
&meshlet_view_bind_groups.clear_visibility_buffer,
clear_visibility_buffer_shadow_view_pipeline,
meshlet_view_resources.view_size,
);
render_context.command_encoder().clear_buffer(
&meshlet_view_resources.second_pass_candidates_buffer,
0,
@ -264,7 +277,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
&meshlet_view_resources.dummy_render_target.default_view,
meshlet_view_bind_groups,
view_offset,
visibility_buffer_software_raster_depth_only_pipeline,
visibility_buffer_software_raster_shadow_view_pipeline,
shadow_visibility_buffer_hardware_raster_pipeline,
None,
meshlet_view_resources.raster_cluster_rightmost_slot,
@ -301,7 +314,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
&meshlet_view_resources.dummy_render_target.default_view,
meshlet_view_bind_groups,
view_offset,
visibility_buffer_software_raster_depth_only_pipeline,
visibility_buffer_software_raster_shadow_view_pipeline,
shadow_visibility_buffer_hardware_raster_pipeline,
None,
meshlet_view_resources.raster_cluster_rightmost_slot,
@ -309,7 +322,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
resolve_depth(
render_context,
shadow_view.depth_attachment.get_attachment(StoreOp::Store),
meshlet_view_resources,
meshlet_view_bind_groups,
resolve_depth_shadow_view_pipeline,
camera,
@ -362,6 +374,29 @@ fn fill_cluster_buffers_pass(
);
}
// TODO: Replace this with vkCmdClearColorImage once wgpu supports it
fn clear_visibility_buffer_pass(
render_context: &mut RenderContext,
clear_visibility_buffer_bind_group: &BindGroup,
clear_visibility_buffer_pipeline: &ComputePipeline,
view_size: UVec2,
) {
let command_encoder = render_context.command_encoder();
let mut clear_visibility_buffer_pass =
command_encoder.begin_compute_pass(&ComputePassDescriptor {
label: Some("clear_visibility_buffer"),
timestamp_writes: None,
});
clear_visibility_buffer_pass.set_pipeline(clear_visibility_buffer_pipeline);
clear_visibility_buffer_pass.set_push_constants(0, bytemuck::bytes_of(&view_size));
clear_visibility_buffer_pass.set_bind_group(0, clear_visibility_buffer_bind_group, &[]);
clear_visibility_buffer_pass.dispatch_workgroups(
view_size.x.div_ceil(16),
view_size.y.div_ceil(16),
1,
);
}
fn cull_pass(
label: &'static str,
render_context: &mut RenderContext,
@ -478,7 +513,6 @@ fn raster_pass(
fn resolve_depth(
render_context: &mut RenderContext,
depth_stencil_attachment: RenderPassDepthStencilAttachment,
meshlet_view_resources: &MeshletViewResources,
meshlet_view_bind_groups: &MeshletViewBindGroups,
resolve_depth_pipeline: &RenderPipeline,
camera: &ExtractedCamera,
@ -494,11 +528,6 @@ fn resolve_depth(
resolve_pass.set_camera_viewport(viewport);
}
resolve_pass.set_render_pipeline(resolve_depth_pipeline);
resolve_pass.set_push_constants(
ShaderStages::FRAGMENT,
0,
&meshlet_view_resources.view_size.x.to_le_bytes(),
);
resolve_pass.set_bind_group(0, &meshlet_view_bind_groups.resolve_depth, &[]);
resolve_pass.draw(0..3, 0..1);
}
@ -532,11 +561,6 @@ fn resolve_material_depth(
resolve_pass.set_camera_viewport(viewport);
}
resolve_pass.set_render_pipeline(resolve_material_depth_pipeline);
resolve_pass.set_push_constants(
ShaderStages::FRAGMENT,
0,
&meshlet_view_resources.view_size.x.to_le_bytes(),
);
resolve_pass.set_bind_group(0, resolve_material_depth_bind_group, &[]);
resolve_pass.draw(0..3, 0..1);
}

View File

@ -104,8 +104,7 @@ struct VertexOutput {
/// Load the visibility buffer texture and resolve it into a VertexOutput.
fn resolve_vertex_output(frag_coord: vec4<f32>) -> VertexOutput {
let frag_coord_1d = u32(frag_coord.y) * u32(view.viewport.z) + u32(frag_coord.x);
let packed_ids = u32(meshlet_visibility_buffer[frag_coord_1d]); // TODO: Might be faster to load the correct u32 directly
let packed_ids = u32(textureLoad(meshlet_visibility_buffer, vec2<u32>(frag_coord.xy)).r);
let cluster_id = packed_ids >> 7u;
let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id];
var meshlet = meshlets[meshlet_id];

View File

@ -167,16 +167,13 @@ fn rasterize_cluster(
}
fn write_visibility_buffer_pixel(x: f32, y: f32, z: f32, packed_ids: u32) {
let frag_coord_1d = u32(y * view.viewport.z + x);
let depth = bitcast<u32>(z);
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
let depth = bitcast<u32>(z);
let visibility = (u64(depth) << 32u) | u64(packed_ids);
atomicMax(&meshlet_visibility_buffer[frag_coord_1d], visibility);
#else
let depth = bitcast<u32>(z);
atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth);
let visibility = depth;
#endif
textureAtomicMax(meshlet_visibility_buffer, vec2(u32(x), u32(y)), visibility);
}
fn edge_function(a: vec2<f32>, b: vec2<f32>, c: vec2<f32>) -> f32 {