bevy/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
Patrick Walton 16531fb3e3
Implement GPU frustum culling. (#12889)
This commit implements opt-in GPU frustum culling, built on top of the
infrastructure in https://github.com/bevyengine/bevy/pull/12773. To
enable it on a camera, add the `GpuCulling` component to it. To
additionally disable CPU frustum culling, add the `NoCpuCulling`
component. Note that adding `GpuCulling` without `NoCpuCulling`
*currently* does nothing useful. The reason why `GpuCulling` doesn't
automatically imply `NoCpuCulling` is that I intend to follow this patch
up with GPU two-phase occlusion culling, and CPU frustum culling plus
GPU occlusion culling seems like a very commonly-desired mode.

Adding the `GpuCulling` component to a view puts that view into
*indirect mode*. This mode makes all drawcalls indirect, relying on the
mesh preprocessing shader to allocate instances dynamically. In indirect
mode, the `PreprocessWorkItem` `output_index` points not to a
`MeshUniform` instance slot but instead to a set of `wgpu`
`IndirectParameters`, from which it allocates an instance slot
dynamically if frustum culling succeeds. Batch building has been updated
to allocate and track indirect parameter slots, and the AABBs are now
supplied to the GPU as `MeshCullingData`.

A small amount of code relating to the frustum culling has been borrowed
from meshlets and moved into `maths.wgsl`. Note that standard Bevy
frustum culling uses AABBs, while meshlets use bounding spheres; this
means that not as much code can be shared as one might think.

This patch doesn't provide any way to perform GPU culling on shadow
maps, to avoid making this patch bigger than it already is. That can be
a followup.

## Changelog

### Added

* Frustum culling can now optionally be done on the GPU. To enable it,
add the `GpuCulling` component to a camera.
* To disable CPU frustum culling, add `NoCpuCulling` to a camera. Note
that `GpuCulling` doesn't automatically imply `NoCpuCulling`.
2024-04-28 12:50:00 +00:00

190 lines
6.8 KiB
WebGPU Shading Language

// GPU mesh uniform building.
//
// This is a compute shader that expands each `MeshInputUniform` out to a full
// `MeshUniform` for each view before rendering. (Thus `MeshInputUniform`
// and `MeshUniform` are in a 1:N relationship.) It runs in parallel for all
// meshes for all views. As part of this process, the shader gathers each
// mesh's transform on the previous frame and writes it into the `MeshUniform`
// so that TAA works.
#import bevy_pbr::mesh_types::Mesh
#import bevy_render::maths
#import bevy_render::view::View
// Per-frame data that the CPU supplies to the GPU.
struct MeshInput {
// The model transform.
model: mat3x4<f32>,
// The lightmap UV rect, packed into 64 bits.
lightmap_uv_rect: vec2<u32>,
// Various flags.
flags: u32,
// The index of this mesh's `MeshInput` in the `previous_input` array, if
// applicable. If not present, this is `u32::MAX`.
previous_input_index: u32,
}
// Information about each mesh instance needed to cull it on GPU.
//
// At the moment, this just consists of its axis-aligned bounding box (AABB).
struct MeshCullingData {
// The 3D center of the AABB in model space, padded with an extra unused
// float value.
aabb_center: vec4<f32>,
// The 3D extents of the AABB in model space, divided by two, padded with
// an extra unused float value.
aabb_half_extents: vec4<f32>,
}
// One invocation of this compute shader: i.e. one mesh instance in a view.
struct PreprocessWorkItem {
// The index of the `MeshInput` in the `current_input` buffer that we read
// from.
input_index: u32,
// In direct mode, the index of the `Mesh` in `output` that we write to. In
// indirect mode, the index of the `IndirectParameters` in
// `indirect_parameters` that we write to.
output_index: u32,
}
// The `wgpu` indirect parameters structure. This is a union of two structures.
// For more information, see the corresponding comment in
// `gpu_preprocessing.rs`.
struct IndirectParameters {
// `vertex_count` or `index_count`.
data0: u32,
// `instance_count` in both structures.
instance_count: atomic<u32>,
// `first_vertex` in both structures.
first_vertex: u32,
// `first_instance` or `base_vertex`.
data1: u32,
// A read-only copy of `instance_index`.
instance_index: u32,
}
// The current frame's `MeshInput`.
@group(0) @binding(0) var<storage> current_input: array<MeshInput>;
// The `MeshInput` values from the previous frame.
@group(0) @binding(1) var<storage> previous_input: array<MeshInput>;
// Indices into the `MeshInput` buffer.
//
// There may be many indices that map to the same `MeshInput`.
@group(0) @binding(2) var<storage> work_items: array<PreprocessWorkItem>;
// The output array of `Mesh`es.
@group(0) @binding(3) var<storage, read_write> output: array<Mesh>;
#ifdef INDIRECT
// The array of indirect parameters for drawcalls.
@group(0) @binding(4) var<storage, read_write> indirect_parameters: array<IndirectParameters>;
#endif
#ifdef FRUSTUM_CULLING
// Data needed to cull the meshes.
//
// At the moment, this consists only of AABBs.
@group(0) @binding(5) var<storage> mesh_culling_data: array<MeshCullingData>;
// The view data, including the view matrix.
@group(0) @binding(6) var<uniform> view: View;
// Returns true if the view frustum intersects an oriented bounding box (OBB).
//
// `aabb_center.w` should be 1.0.
fn view_frustum_intersects_obb(
model: mat4x4<f32>,
aabb_center: vec4<f32>,
aabb_half_extents: vec3<f32>,
) -> bool {
for (var i = 0; i < 5; i += 1) {
// Calculate relative radius of the sphere associated with this plane.
let plane_normal = view.frustum[i];
let relative_radius = dot(
abs(
vec3(
dot(plane_normal, model[0]),
dot(plane_normal, model[1]),
dot(plane_normal, model[2]),
)
),
aabb_half_extents
);
// Check the frustum plane.
if (!maths::sphere_intersects_plane_half_space(
plane_normal, aabb_center, relative_radius)) {
return false;
}
}
return true;
}
#endif
@compute
@workgroup_size(64)
fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
// Figure out our instance index. If this thread doesn't correspond to any
// index, bail.
let instance_index = global_invocation_id.x;
if (instance_index >= arrayLength(&work_items)) {
return;
}
// Unpack.
let input_index = work_items[instance_index].input_index;
let output_index = work_items[instance_index].output_index;
let model_affine_transpose = current_input[input_index].model;
let model = maths::affine3_to_square(model_affine_transpose);
// Cull if necessary.
#ifdef FRUSTUM_CULLING
let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;
// Do an OBB-based frustum cull.
let model_center = model * vec4(aabb_center, 1.0);
if (!view_frustum_intersects_obb(model, model_center, aabb_half_extents)) {
return;
}
#endif
// Calculate inverse transpose.
let inverse_transpose_model = transpose(maths::inverse_affine3(transpose(
model_affine_transpose)));
// Pack inverse transpose.
let inverse_transpose_model_a = mat2x4<f32>(
vec4<f32>(inverse_transpose_model[0].xyz, inverse_transpose_model[1].x),
vec4<f32>(inverse_transpose_model[1].yz, inverse_transpose_model[2].xy));
let inverse_transpose_model_b = inverse_transpose_model[2].z;
// Look up the previous model matrix.
let previous_input_index = current_input[input_index].previous_input_index;
var previous_model: mat3x4<f32>;
if (previous_input_index == 0xffffffff) {
previous_model = model_affine_transpose;
} else {
previous_model = previous_input[previous_input_index].model;
}
// Figure out the output index. In indirect mode, this involves bumping the
// instance index in the indirect parameters structure. Otherwise, this
// index was directly supplied to us.
#ifdef INDIRECT
let mesh_output_index = indirect_parameters[output_index].instance_index +
atomicAdd(&indirect_parameters[output_index].instance_count, 1u);
#else
let mesh_output_index = output_index;
#endif
// Write the output.
output[mesh_output_index].model = model_affine_transpose;
output[mesh_output_index].previous_model = previous_model;
output[mesh_output_index].inverse_transpose_model_a = inverse_transpose_model_a;
output[mesh_output_index].inverse_transpose_model_b = inverse_transpose_model_b;
output[mesh_output_index].flags = current_input[input_index].flags;
output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect;
}