Split out the IndirectParametersMetadata into CPU-populated and GPU-populated buffers. (#17863)

The GPU can fill out many of the fields in `IndirectParametersMetadata`
using information it already has:

* `early_instance_count` and `late_instance_count` are always
initialized to zero.

* `mesh_index` is already present in the work item buffer as the
`input_index` of the first work item in each batch.

This patch moves these fields to a separate buffer, the *GPU indirect
parameters metadata* buffer. That way, it avoids having to write them on
CPU during `batch_and_prepare_binned_render_phase`. This effectively
reduces the number of bits that that function must write per mesh from
160 to 64 (in addition to the 64 bits per mesh *instance*).

Additionally, this PR refactors `UntypedPhaseIndirectParametersBuffers`
to add another layer, `MeshClassIndirectParametersBuffers`, which allows
abstracting over the buffers corresponding indexed and non-indexed
meshes. This patch doesn't make much use of this abstraction, but
forthcoming patches will, and it's overall a cleaner approach.

This didn't seem to have much of an effect by itself on
`batch_and_prepare_binned_render_phase` time, but subsequent PRs
dependent on this PR yield roughly a 2× speedup.
This commit is contained in:
Patrick Walton 2025-02-17 16:53:44 -08:00 committed by GitHub
parent fb1e829294
commit 8f36106f9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 533 additions and 350 deletions

View File

@ -105,6 +105,7 @@ pub mod graph {
EarlyPrepassBuildIndirectParameters,
LatePrepassBuildIndirectParameters,
MainBuildIndirectParameters,
ClearIndirectParametersMetadata,
}
}

View File

@ -12,7 +12,8 @@
IndirectBatchSet,
IndirectParametersIndexed,
IndirectParametersNonIndexed,
IndirectParametersMetadata,
IndirectParametersCpuMetadata,
IndirectParametersGpuMetadata,
MeshInput
}
@ -22,26 +23,30 @@
// Data that we use to generate the indirect parameters.
//
// The `mesh_preprocess.wgsl` shader emits these.
@group(0) @binding(1) var<storage> indirect_parameters_metadata: array<IndirectParametersMetadata>;
@group(0) @binding(1) var<storage> indirect_parameters_cpu_metadata:
array<IndirectParametersCpuMetadata>;
@group(0) @binding(2) var<storage> indirect_parameters_gpu_metadata:
array<IndirectParametersGpuMetadata>;
// Information about each batch set.
//
// A *batch set* is a set of meshes that might be multi-drawn together.
@group(0) @binding(2) var<storage, read_write> indirect_batch_sets: array<IndirectBatchSet>;
@group(0) @binding(3) var<storage, read_write> indirect_batch_sets: array<IndirectBatchSet>;
#ifdef INDEXED
// The buffer of indirect draw parameters that we generate, and that the GPU
// reads to issue the draws.
//
// This buffer is for indexed meshes.
@group(0) @binding(3) var<storage, read_write> indirect_parameters:
@group(0) @binding(4) var<storage, read_write> indirect_parameters:
array<IndirectParametersIndexed>;
#else // INDEXED
// The buffer of indirect draw parameters that we generate, and that the GPU
// reads to issue the draws.
//
// This buffer is for non-indexed meshes.
@group(0) @binding(3) var<storage, read_write> indirect_parameters:
@group(0) @binding(4) var<storage, read_write> indirect_parameters:
array<IndirectParametersNonIndexed>;
#endif // INDEXED
@ -51,20 +56,21 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
// Figure out our instance index (i.e. batch index). If this thread doesn't
// correspond to any index, bail.
let instance_index = global_invocation_id.x;
if (instance_index >= arrayLength(&indirect_parameters_metadata)) {
if (instance_index >= arrayLength(&indirect_parameters_cpu_metadata)) {
return;
}
// Unpack the metadata for this batch.
let mesh_index = indirect_parameters_metadata[instance_index].mesh_index;
let base_output_index = indirect_parameters_metadata[instance_index].base_output_index;
let batch_set_index = indirect_parameters_metadata[instance_index].batch_set_index;
let base_output_index = indirect_parameters_cpu_metadata[instance_index].base_output_index;
let batch_set_index = indirect_parameters_cpu_metadata[instance_index].batch_set_index;
let mesh_index = indirect_parameters_gpu_metadata[instance_index].mesh_index;
// If we aren't using `multi_draw_indirect_count`, we have a 1:1 fixed
// assignment of batches to slots in the indirect parameters buffer, so we
// can just use the instance index as the index of our indirect parameters.
let early_instance_count = indirect_parameters_metadata[instance_index].early_instance_count;
let late_instance_count = indirect_parameters_metadata[instance_index].late_instance_count;
let early_instance_count =
indirect_parameters_gpu_metadata[instance_index].early_instance_count;
let late_instance_count = indirect_parameters_gpu_metadata[instance_index].late_instance_count;
// If in the early phase, we draw only the early meshes. If in the late
// phase, we draw only the late meshes. If in the main phase, draw all the

View File

@ -29,12 +29,14 @@ use bevy_ecs::{
system::{lifetimeless::Read, Commands, Query, Res, ResMut},
world::{FromWorld, World},
};
use bevy_render::batching::gpu_preprocessing::UntypedPhaseIndirectParametersBuffers;
use bevy_render::batching::gpu_preprocessing::{
IndirectParametersGpuMetadata, UntypedPhaseIndirectParametersBuffers,
};
use bevy_render::{
batching::gpu_preprocessing::{
BatchedInstanceBuffers, GpuOcclusionCullingWorkItemBuffers, GpuPreprocessingSupport,
IndirectBatchSet, IndirectParametersBuffers, IndirectParametersIndexed,
IndirectParametersMetadata, IndirectParametersNonIndexed,
IndirectBatchSet, IndirectParametersBuffers, IndirectParametersCpuMetadata,
IndirectParametersIndexed, IndirectParametersNonIndexed,
LatePreprocessWorkItemIndirectParameters, PreprocessWorkItem, PreprocessWorkItemBuffers,
UntypedPhaseBatchedInstanceBuffers,
},
@ -92,6 +94,12 @@ pub struct GpuMeshPreprocessPlugin {
pub use_gpu_instance_buffer_builder: bool,
}
/// The render node that clears out the GPU-side indirect metadata buffers.
///
/// This is only used when indirect drawing is enabled.
#[derive(Default)]
pub struct ClearIndirectParametersMetadataNode;
/// The render node for the first mesh preprocessing pass.
///
/// This pass runs a compute shader to cull meshes outside the view frustum (if
@ -142,7 +150,7 @@ pub struct LateGpuPreprocessNode {
///
/// This node runs a compute shader on the output of the
/// [`EarlyGpuPreprocessNode`] in order to transform the
/// [`IndirectParametersMetadata`] into properly-formatted
/// [`IndirectParametersGpuMetadata`] into properly-formatted
/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`].
pub struct EarlyPrepassBuildIndirectParametersNode {
view_query: QueryState<
@ -162,7 +170,7 @@ pub struct EarlyPrepassBuildIndirectParametersNode {
///
/// This node runs a compute shader on the output of the
/// [`LateGpuPreprocessNode`] in order to transform the
/// [`IndirectParametersMetadata`] into properly-formatted
/// [`IndirectParametersGpuMetadata`] into properly-formatted
/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`].
pub struct LatePrepassBuildIndirectParametersNode {
view_query: QueryState<
@ -183,7 +191,7 @@ pub struct LatePrepassBuildIndirectParametersNode {
///
/// This node runs a compute shader on the output of the
/// [`EarlyGpuPreprocessNode`] and [`LateGpuPreprocessNode`] in order to
/// transform the [`IndirectParametersMetadata`] into properly-formatted
/// transform the [`IndirectParametersGpuMetadata`] into properly-formatted
/// [`IndirectParametersIndexed`] and [`IndirectParametersNonIndexed`].
pub struct MainBuildIndirectParametersNode {
view_query: QueryState<
@ -494,6 +502,10 @@ impl Plugin for GpuMeshPreprocessPlugin {
write_mesh_culling_data_buffer.in_set(RenderSet::PrepareResourcesFlush),
),
)
.add_render_graph_node::<ClearIndirectParametersMetadataNode>(
Core3d,
NodePbr::ClearIndirectParametersMetadata
)
.add_render_graph_node::<EarlyGpuPreprocessNode>(Core3d, NodePbr::EarlyGpuPreprocess)
.add_render_graph_node::<LateGpuPreprocessNode>(Core3d, NodePbr::LateGpuPreprocess)
.add_render_graph_node::<EarlyPrepassBuildIndirectParametersNode>(
@ -511,6 +523,7 @@ impl Plugin for GpuMeshPreprocessPlugin {
.add_render_graph_edges(
Core3d,
(
NodePbr::ClearIndirectParametersMetadata,
NodePbr::EarlyGpuPreprocess,
NodePbr::EarlyPrepassBuildIndirectParameters,
Node3d::EarlyPrepass,
@ -533,6 +546,53 @@ impl Plugin for GpuMeshPreprocessPlugin {
}
}
impl Node for ClearIndirectParametersMetadataNode {
fn run<'w>(
&self,
_: &mut RenderGraphContext,
render_context: &mut RenderContext<'w>,
world: &'w World,
) -> Result<(), NodeRunError> {
let Some(indirect_parameters_buffers) = world.get_resource::<IndirectParametersBuffers>()
else {
return Ok(());
};
// Clear out each indexed and non-indexed GPU-side buffer.
for phase_indirect_parameters_buffers in indirect_parameters_buffers.values() {
if let Some(indexed_gpu_metadata_buffer) = phase_indirect_parameters_buffers
.indexed
.gpu_metadata_buffer()
{
render_context.command_encoder().clear_buffer(
indexed_gpu_metadata_buffer,
0,
Some(
phase_indirect_parameters_buffers.indexed.batch_count() as u64
* size_of::<IndirectParametersGpuMetadata>() as u64,
),
);
}
if let Some(non_indexed_gpu_metadata_buffer) = phase_indirect_parameters_buffers
.non_indexed
.gpu_metadata_buffer()
{
render_context.command_encoder().clear_buffer(
non_indexed_gpu_metadata_buffer,
0,
Some(
phase_indirect_parameters_buffers.non_indexed.batch_count() as u64
* size_of::<IndirectParametersGpuMetadata>() as u64,
),
);
}
}
Ok(())
}
}
impl FromWorld for EarlyGpuPreprocessNode {
fn from_world(world: &mut World) -> Self {
Self {
@ -1085,7 +1145,8 @@ fn run_build_indirect_parameters_node(
compute_pass.set_pipeline(build_indexed_indirect_params_pipeline);
compute_pass.set_bind_group(0, build_indirect_indexed_params_bind_group, &[]);
let workgroup_count = phase_indirect_parameters_buffers
.indexed_batch_count()
.indexed
.batch_count()
.div_ceil(WORKGROUP_SIZE);
if workgroup_count > 0 {
compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
@ -1112,7 +1173,8 @@ fn run_build_indirect_parameters_node(
compute_pass.set_pipeline(build_non_indexed_indirect_params_pipeline);
compute_pass.set_bind_group(0, build_indirect_non_indexed_params_bind_group, &[]);
let workgroup_count = phase_indirect_parameters_buffers
.non_indexed_batch_count()
.non_indexed
.batch_count()
.div_ceil(WORKGROUP_SIZE);
if workgroup_count > 0 {
compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
@ -1366,7 +1428,7 @@ fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntrie
)
}
// Returns the first 3 bind group layout entries shared between all invocations
// Returns the first 4 bind group layout entries shared between all invocations
// of the indirect parameters building shader.
fn build_indirect_params_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries {
DynamicBindGroupLayoutEntries::new_with_indices(
@ -1375,9 +1437,13 @@ fn build_indirect_params_bind_group_layout_entries() -> DynamicBindGroupLayoutEn
(0, storage_buffer_read_only::<MeshInputUniform>(false)),
(
1,
storage_buffer_read_only::<IndirectParametersMetadata>(false),
storage_buffer_read_only::<IndirectParametersCpuMetadata>(false),
),
(2, storage_buffer::<IndirectBatchSet>(false)),
(
2,
storage_buffer_read_only::<IndirectParametersGpuMetadata>(false),
),
(3, storage_buffer::<IndirectBatchSet>(false)),
),
)
}
@ -1388,14 +1454,21 @@ fn gpu_culling_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries {
// GPU culling bind group parameters are a superset of those in the CPU
// culling (direct) shader.
preprocess_direct_bind_group_layout_entries().extend_with_indices((
// `indirect_parameters`
// `indirect_parameters_cpu_metadata`
(
7,
storage_buffer::<IndirectParametersMetadata>(/* has_dynamic_offset= */ false),
storage_buffer_read_only::<IndirectParametersCpuMetadata>(
/* has_dynamic_offset= */ false,
),
),
// `indirect_parameters_gpu_metadata`
(
8,
storage_buffer::<IndirectParametersGpuMetadata>(/* has_dynamic_offset= */ false),
),
// `mesh_culling_data`
(
8,
9,
storage_buffer_read_only::<MeshCullingData>(/* has_dynamic_offset= */ false),
),
// `view`
@ -1935,13 +2008,18 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
match (
self.phase_indirect_parameters_buffers
.indexed_metadata_buffer(),
.indexed
.cpu_metadata_buffer(),
self.phase_indirect_parameters_buffers
.indexed
.gpu_metadata_buffer(),
indexed_work_item_buffer.buffer(),
late_indexed_work_item_buffer.buffer(),
self.late_indexed_indirect_parameters_buffer.buffer(),
) {
(
Some(indexed_metadata_buffer),
Some(indexed_cpu_metadata_buffer),
Some(indexed_gpu_metadata_buffer),
Some(indexed_work_item_gpu_buffer),
Some(late_indexed_work_item_gpu_buffer),
Some(late_indexed_indirect_parameters_buffer),
@ -1974,8 +2052,9 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
}),
),
(6, self.data_buffer.as_entire_binding()),
(7, indexed_metadata_buffer.as_entire_binding()),
(8, mesh_culling_data_buffer.as_entire_binding()),
(7, indexed_cpu_metadata_buffer.as_entire_binding()),
(8, indexed_gpu_metadata_buffer.as_entire_binding()),
(9, mesh_culling_data_buffer.as_entire_binding()),
(0, view_uniforms_binding.clone()),
(10, &view_depth_pyramid.all_mips),
(
@ -2027,13 +2106,18 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
match (
self.phase_indirect_parameters_buffers
.non_indexed_metadata_buffer(),
.non_indexed
.cpu_metadata_buffer(),
self.phase_indirect_parameters_buffers
.non_indexed
.gpu_metadata_buffer(),
non_indexed_work_item_buffer.buffer(),
late_non_indexed_work_item_buffer.buffer(),
self.late_non_indexed_indirect_parameters_buffer.buffer(),
) {
(
Some(non_indexed_metadata_buffer),
Some(non_indexed_cpu_metadata_buffer),
Some(non_indexed_gpu_metadata_buffer),
Some(non_indexed_work_item_gpu_buffer),
Some(late_non_indexed_work_item_buffer),
Some(late_non_indexed_indirect_parameters_buffer),
@ -2066,8 +2150,9 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
}),
),
(6, self.data_buffer.as_entire_binding()),
(7, non_indexed_metadata_buffer.as_entire_binding()),
(8, mesh_culling_data_buffer.as_entire_binding()),
(7, non_indexed_cpu_metadata_buffer.as_entire_binding()),
(8, non_indexed_gpu_metadata_buffer.as_entire_binding()),
(9, mesh_culling_data_buffer.as_entire_binding()),
(0, view_uniforms_binding.clone()),
(10, &view_depth_pyramid.all_mips),
(
@ -2118,12 +2203,17 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
match (
self.phase_indirect_parameters_buffers
.indexed_metadata_buffer(),
.indexed
.cpu_metadata_buffer(),
self.phase_indirect_parameters_buffers
.indexed
.gpu_metadata_buffer(),
late_indexed_work_item_buffer.buffer(),
self.late_indexed_indirect_parameters_buffer.buffer(),
) {
(
Some(indexed_metadata_buffer),
Some(indexed_cpu_metadata_buffer),
Some(indexed_gpu_metadata_buffer),
Some(late_indexed_work_item_gpu_buffer),
Some(late_indexed_indirect_parameters_buffer),
) => {
@ -2155,8 +2245,9 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
}),
),
(6, self.data_buffer.as_entire_binding()),
(7, indexed_metadata_buffer.as_entire_binding()),
(8, mesh_culling_data_buffer.as_entire_binding()),
(7, indexed_cpu_metadata_buffer.as_entire_binding()),
(8, indexed_gpu_metadata_buffer.as_entire_binding()),
(9, mesh_culling_data_buffer.as_entire_binding()),
(0, view_uniforms_binding.clone()),
(10, &view_depth_pyramid.all_mips),
(
@ -2199,12 +2290,17 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
match (
self.phase_indirect_parameters_buffers
.non_indexed_metadata_buffer(),
.non_indexed
.cpu_metadata_buffer(),
self.phase_indirect_parameters_buffers
.non_indexed
.gpu_metadata_buffer(),
late_non_indexed_work_item_buffer.buffer(),
self.late_non_indexed_indirect_parameters_buffer.buffer(),
) {
(
Some(non_indexed_metadata_buffer),
Some(non_indexed_cpu_metadata_buffer),
Some(non_indexed_gpu_metadata_buffer),
Some(non_indexed_work_item_gpu_buffer),
Some(late_non_indexed_indirect_parameters_buffer),
) => {
@ -2236,8 +2332,9 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
}),
),
(6, self.data_buffer.as_entire_binding()),
(7, non_indexed_metadata_buffer.as_entire_binding()),
(8, mesh_culling_data_buffer.as_entire_binding()),
(7, non_indexed_cpu_metadata_buffer.as_entire_binding()),
(8, non_indexed_gpu_metadata_buffer.as_entire_binding()),
(9, mesh_culling_data_buffer.as_entire_binding()),
(0, view_uniforms_binding.clone()),
(10, &view_depth_pyramid.all_mips),
(
@ -2293,10 +2390,18 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
match (
self.phase_indirect_parameters_buffers
.indexed_metadata_buffer(),
.indexed
.cpu_metadata_buffer(),
self.phase_indirect_parameters_buffers
.indexed
.gpu_metadata_buffer(),
indexed_work_item_buffer.buffer(),
) {
(Some(indexed_metadata_buffer), Some(indexed_work_item_gpu_buffer)) => {
(
Some(indexed_cpu_metadata_buffer),
Some(indexed_gpu_metadata_buffer),
Some(indexed_work_item_gpu_buffer),
) => {
// Don't use `as_entire_binding()` here; the shader reads the array
// length and the underlying buffer may be longer than the actual size
// of the vector.
@ -2325,8 +2430,9 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
}),
),
(6, self.data_buffer.as_entire_binding()),
(7, indexed_metadata_buffer.as_entire_binding()),
(8, mesh_culling_data_buffer.as_entire_binding()),
(7, indexed_cpu_metadata_buffer.as_entire_binding()),
(8, indexed_gpu_metadata_buffer.as_entire_binding()),
(9, mesh_culling_data_buffer.as_entire_binding()),
(0, view_uniforms_binding.clone()),
)),
),
@ -2347,10 +2453,18 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
match (
self.phase_indirect_parameters_buffers
.non_indexed_metadata_buffer(),
.non_indexed
.cpu_metadata_buffer(),
self.phase_indirect_parameters_buffers
.non_indexed
.gpu_metadata_buffer(),
non_indexed_work_item_buffer.buffer(),
) {
(Some(non_indexed_metadata_buffer), Some(non_indexed_work_item_gpu_buffer)) => {
(
Some(non_indexed_cpu_metadata_buffer),
Some(non_indexed_gpu_metadata_buffer),
Some(non_indexed_work_item_gpu_buffer),
) => {
// Don't use `as_entire_binding()` here; the shader reads the array
// length and the underlying buffer may be longer than the actual size
// of the vector.
@ -2379,8 +2493,9 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
}),
),
(6, self.data_buffer.as_entire_binding()),
(7, non_indexed_metadata_buffer.as_entire_binding()),
(8, mesh_culling_data_buffer.as_entire_binding()),
(7, non_indexed_cpu_metadata_buffer.as_entire_binding()),
(8, non_indexed_gpu_metadata_buffer.as_entire_binding()),
(9, mesh_culling_data_buffer.as_entire_binding()),
(0, view_uniforms_binding.clone()),
)),
),
@ -2407,9 +2522,10 @@ fn create_build_indirect_parameters_bind_groups(
build_indirect_parameters_bind_groups.insert(
*phase_type_id,
PhaseBuildIndirectParametersBindGroups {
reset_indexed_indirect_batch_sets: match (
phase_indirect_parameters_buffer.indexed_batch_sets_buffer(),
) {
reset_indexed_indirect_batch_sets: match (phase_indirect_parameters_buffer
.indexed
.batch_sets_buffer(),)
{
(Some(indexed_batch_sets_buffer),) => Some(
render_device.create_bind_group(
"reset_indexed_indirect_batch_sets_bind_group",
@ -2427,9 +2543,10 @@ fn create_build_indirect_parameters_bind_groups(
_ => None,
},
reset_non_indexed_indirect_batch_sets: match (
phase_indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
) {
reset_non_indexed_indirect_batch_sets: match (phase_indirect_parameters_buffer
.non_indexed
.batch_sets_buffer(),)
{
(Some(non_indexed_batch_sets_buffer),) => Some(
render_device.create_bind_group(
"reset_non_indexed_indirect_batch_sets_bind_group",
@ -2448,12 +2565,18 @@ fn create_build_indirect_parameters_bind_groups(
},
build_indexed_indirect: match (
phase_indirect_parameters_buffer.indexed_metadata_buffer(),
phase_indirect_parameters_buffer.indexed_data_buffer(),
phase_indirect_parameters_buffer.indexed_batch_sets_buffer(),
phase_indirect_parameters_buffer
.indexed
.cpu_metadata_buffer(),
phase_indirect_parameters_buffer
.indexed
.gpu_metadata_buffer(),
phase_indirect_parameters_buffer.indexed.data_buffer(),
phase_indirect_parameters_buffer.indexed.batch_sets_buffer(),
) {
(
Some(indexed_indirect_parameters_metadata_buffer),
Some(indexed_indirect_parameters_cpu_metadata_buffer),
Some(indexed_indirect_parameters_gpu_metadata_buffer),
Some(indexed_indirect_parameters_data_buffer),
Some(indexed_batch_sets_buffer),
) => Some(
@ -2469,12 +2592,21 @@ fn create_build_indirect_parameters_bind_groups(
// Don't use `as_entire_binding` here; the shader reads
// the length and `RawBufferVec` overallocates.
BufferBinding {
buffer: indexed_indirect_parameters_metadata_buffer,
buffer: indexed_indirect_parameters_cpu_metadata_buffer,
offset: 0,
size: NonZeroU64::new(
phase_indirect_parameters_buffer.indexed_batch_count()
phase_indirect_parameters_buffer.indexed.batch_count()
as u64
* size_of::<IndirectParametersMetadata>() as u64,
* size_of::<IndirectParametersCpuMetadata>() as u64,
),
},
BufferBinding {
buffer: indexed_indirect_parameters_gpu_metadata_buffer,
offset: 0,
size: NonZeroU64::new(
phase_indirect_parameters_buffer.indexed.batch_count()
as u64
* size_of::<IndirectParametersGpuMetadata>() as u64,
),
},
indexed_batch_sets_buffer.as_entire_binding(),
@ -2486,12 +2618,20 @@ fn create_build_indirect_parameters_bind_groups(
},
build_non_indexed_indirect: match (
phase_indirect_parameters_buffer.non_indexed_metadata_buffer(),
phase_indirect_parameters_buffer.non_indexed_data_buffer(),
phase_indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
phase_indirect_parameters_buffer
.non_indexed
.cpu_metadata_buffer(),
phase_indirect_parameters_buffer
.non_indexed
.gpu_metadata_buffer(),
phase_indirect_parameters_buffer.non_indexed.data_buffer(),
phase_indirect_parameters_buffer
.non_indexed
.batch_sets_buffer(),
) {
(
Some(non_indexed_indirect_parameters_metadata_buffer),
Some(non_indexed_indirect_parameters_cpu_metadata_buffer),
Some(non_indexed_indirect_parameters_gpu_metadata_buffer),
Some(non_indexed_indirect_parameters_data_buffer),
Some(non_indexed_batch_sets_buffer),
) => Some(
@ -2507,12 +2647,21 @@ fn create_build_indirect_parameters_bind_groups(
// Don't use `as_entire_binding` here; the shader reads
// the length and `RawBufferVec` overallocates.
BufferBinding {
buffer: non_indexed_indirect_parameters_metadata_buffer,
buffer: non_indexed_indirect_parameters_cpu_metadata_buffer,
offset: 0,
size: NonZeroU64::new(
phase_indirect_parameters_buffer.non_indexed_batch_count()
phase_indirect_parameters_buffer.non_indexed.batch_count()
as u64
* size_of::<IndirectParametersMetadata>() as u64,
* size_of::<IndirectParametersCpuMetadata>() as u64,
),
},
BufferBinding {
buffer: non_indexed_indirect_parameters_gpu_metadata_buffer,
offset: 0,
size: NonZeroU64::new(
phase_indirect_parameters_buffer.non_indexed.batch_count()
as u64
* size_of::<IndirectParametersGpuMetadata>() as u64,
),
},
non_indexed_batch_sets_buffer.as_entire_binding(),

View File

@ -20,7 +20,7 @@ use bevy_render::{
batching::{
gpu_preprocessing::{
self, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers,
IndirectParametersIndexed, IndirectParametersMetadata, IndirectParametersNonIndexed,
IndirectParametersCpuMetadata, IndirectParametersIndexed, IndirectParametersNonIndexed,
InstanceInputUniformBuffer, UntypedPhaseIndirectParametersBuffers,
},
no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching,
@ -1965,30 +1965,28 @@ impl GetFullBatchData for MeshPipeline {
}
fn write_batch_indirect_parameters_metadata(
mesh_index: InputUniformIndex,
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,
phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
indirect_parameters_offset: u32,
) {
let indirect_parameters = IndirectParametersMetadata {
mesh_index: *mesh_index,
let indirect_parameters = IndirectParametersCpuMetadata {
base_output_index,
batch_set_index: match batch_set_index {
Some(batch_set_index) => u32::from(batch_set_index),
None => !0,
},
early_instance_count: 0,
late_instance_count: 0,
};
if indexed {
phase_indirect_parameters_buffers
.set_indexed(indirect_parameters_offset, indirect_parameters);
.indexed
.set(indirect_parameters_offset, indirect_parameters);
} else {
phase_indirect_parameters_buffers
.set_non_indexed(indirect_parameters_offset, indirect_parameters);
.non_indexed
.set(indirect_parameters_offset, indirect_parameters);
}
}
}
@ -3086,8 +3084,10 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
return RenderCommandResult::Skip;
};
let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
phase_indirect_parameters_buffers.indexed_data_buffer(),
phase_indirect_parameters_buffers.indexed_batch_sets_buffer(),
phase_indirect_parameters_buffers.indexed.data_buffer(),
phase_indirect_parameters_buffers
.indexed
.batch_sets_buffer(),
) else {
warn!(
"Not rendering mesh because indexed indirect parameters buffer \
@ -3152,8 +3152,10 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
return RenderCommandResult::Skip;
};
let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
phase_indirect_parameters_buffers.non_indexed_data_buffer(),
phase_indirect_parameters_buffers.non_indexed_batch_sets_buffer(),
phase_indirect_parameters_buffers.non_indexed.data_buffer(),
phase_indirect_parameters_buffers
.non_indexed
.batch_sets_buffer(),
) else {
warn!(
"Not rendering mesh because non-indexed indirect parameters buffer \

View File

@ -14,7 +14,9 @@
// are known as *early mesh preprocessing* and *late mesh preprocessing*
// respectively.
#import bevy_pbr::mesh_preprocess_types::{IndirectParametersMetadata, MeshInput}
#import bevy_pbr::mesh_preprocess_types::{
IndirectParametersCpuMetadata, IndirectParametersGpuMetadata, MeshInput
}
#import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT}
#import bevy_pbr::mesh_view_bindings::view
#import bevy_pbr::occlusion_culling
@ -90,15 +92,18 @@ struct PushConstants {
#ifdef INDIRECT
// The array of indirect parameters for drawcalls.
@group(0) @binding(7) var<storage, read_write> indirect_parameters_metadata:
array<IndirectParametersMetadata>;
@group(0) @binding(7) var<storage> indirect_parameters_cpu_metadata:
array<IndirectParametersCpuMetadata>;
@group(0) @binding(8) var<storage, read_write> indirect_parameters_gpu_metadata:
array<IndirectParametersGpuMetadata>;
#endif
#ifdef FRUSTUM_CULLING
// Data needed to cull the meshes.
//
// At the moment, this consists only of AABBs.
@group(0) @binding(8) var<storage> mesh_culling_data: array<MeshCullingData>;
@group(0) @binding(9) var<storage> mesh_culling_data: array<MeshCullingData>;
#endif // FRUSTUM_CULLING
#ifdef OCCLUSION_CULLING
@ -172,6 +177,16 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
let input_index = work_items[instance_index].input_index;
#ifdef INDIRECT
let indirect_parameters_index = work_items[instance_index].output_or_indirect_parameters_index;
// If we're the first mesh instance in this batch, write the index of our
// `MeshInput` into the appropriate slot so that the indirect parameters
// building shader can access it.
#ifndef LATE_PHASE
if (instance_index == 0u || work_items[instance_index - 1].output_or_indirect_parameters_index != indirect_parameters_index) {
indirect_parameters_gpu_metadata[indirect_parameters_index].mesh_index = input_index;
}
#endif // LATE_PHASE
#else // INDIRECT
let mesh_output_index = work_items[instance_index].output_or_indirect_parameters_index;
#endif // INDIRECT
@ -315,18 +330,21 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
// parameters. Otherwise, this index was directly supplied to us.
#ifdef INDIRECT
#ifdef LATE_PHASE
let batch_output_index =
atomicLoad(&indirect_parameters_metadata[indirect_parameters_index].early_instance_count) +
atomicAdd(&indirect_parameters_metadata[indirect_parameters_index].late_instance_count, 1u);
let batch_output_index = atomicLoad(
&indirect_parameters_gpu_metadata[indirect_parameters_index].early_instance_count
) + atomicAdd(
&indirect_parameters_gpu_metadata[indirect_parameters_index].late_instance_count,
1u
);
#else // LATE_PHASE
let batch_output_index = atomicAdd(
&indirect_parameters_metadata[indirect_parameters_index].early_instance_count,
&indirect_parameters_gpu_metadata[indirect_parameters_index].early_instance_count,
1u
);
#endif // LATE_PHASE
let mesh_output_index =
indirect_parameters_metadata[indirect_parameters_index].base_output_index +
indirect_parameters_cpu_metadata[indirect_parameters_index].base_output_index +
batch_output_index;
#endif // INDIRECT

View File

@ -17,6 +17,7 @@ use bevy_math::UVec4;
use bevy_platform_support::collections::{hash_map::Entry, HashMap, HashSet};
use bevy_utils::{default, TypeIdMap};
use bytemuck::{Pod, Zeroable};
use encase::{internal::WriteInto, ShaderSize};
use nonmax::NonMaxU32;
use tracing::error;
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
@ -25,7 +26,7 @@ use crate::{
experimental::occlusion_culling::OcclusionCulling,
render_phase::{
BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, InputUniformIndex, PhaseItem,
BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem,
PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase,
UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases,
},
@ -623,10 +624,10 @@ pub struct PreprocessWorkItem {
pub input_index: u32,
/// In direct mode, the index of the mesh uniform; in indirect mode, the
/// index of the [`IndirectParametersMetadata`].
/// index of the [`IndirectParametersGpuMetadata`].
///
/// In indirect mode, this is the index of the
/// [`IndirectParametersMetadata`] in the
/// [`IndirectParametersGpuMetadata`] in the
/// `IndirectParametersBuffers::indexed_metadata` or
/// `IndirectParametersBuffers::non_indexed_metadata`.
pub output_or_indirect_parameters_index: u32,
@ -668,23 +669,13 @@ pub struct IndirectParametersNonIndexed {
pub first_instance: u32,
}
/// A structure, shared between CPU and GPU, that records how many instances of
/// each mesh are actually to be drawn.
///
/// The CPU writes to this structure in order to initialize the fields other
/// than [`Self::early_instance_count`] and [`Self::late_instance_count`]. The
/// GPU mesh preprocessing shader increments the [`Self::early_instance_count`]
/// and [`Self::late_instance_count`] as it determines that meshes are visible.
/// The indirect parameter building shader reads this metadata in order to
/// construct the indirect draw parameters.
/// A structure, initialized on CPU and read on GPU, that contains metadata
/// about each batch.
///
/// Each batch will have one instance of this structure.
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct IndirectParametersMetadata {
/// The index of the mesh in the array of `MeshInputUniform`s.
pub mesh_index: u32,
pub struct IndirectParametersCpuMetadata {
/// The index of the first instance of this mesh in the array of
/// `MeshUniform`s.
///
@ -699,9 +690,26 @@ pub struct IndirectParametersMetadata {
///
/// A *batch set* is a set of meshes that may be multi-drawn together.
/// Multiple batches (and therefore multiple instances of
/// [`IndirectParametersMetadata`] structures) can be part of the same batch
/// set.
/// [`IndirectParametersGpuMetadata`] structures) can be part of the same
/// batch set.
pub batch_set_index: u32,
}
/// A structure, written and read GPU, that records how many instances of each
/// mesh are actually to be drawn.
///
/// The GPU mesh preprocessing shader increments the
/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it
/// determines that meshes are visible. The indirect parameter building shader
/// reads this metadata in order to construct the indirect draw parameters.
///
/// Each batch will have one instance of this structure.
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct IndirectParametersGpuMetadata {
/// The index of the first mesh in this batch in the array of
/// `MeshInputUniform`s.
pub mesh_index: u32,
/// The number of instances that were judged visible last frame.
///
@ -753,7 +761,7 @@ pub struct IndirectBatchSet {
/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.
///
/// In addition to the indirect draw buffers themselves, this structure contains
/// the buffers that store [`IndirectParametersMetadata`], which are the
/// the buffers that store [`IndirectParametersGpuMetadata`], which are the
/// structures that culling writes to so that the indirect parameter building
/// pass can determine how many meshes are actually to be drawn.
///
@ -823,53 +831,12 @@ where
///
/// See the [`IndirectParametersBuffers`] documentation for more information.
pub struct UntypedPhaseIndirectParametersBuffers {
/// The GPU buffer that stores the indirect draw parameters for non-indexed
/// meshes.
///
/// The indirect parameters building shader writes to this buffer, while the
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
/// it to perform the draws.
non_indexed_data: UninitBufferVec<IndirectParametersNonIndexed>,
/// The GPU buffer that holds the data used to construct indirect draw
/// parameters for non-indexed meshes.
///
/// The GPU mesh preprocessing shader writes to this buffer, and the
/// indirect parameters building shader reads this buffer to construct the
/// indirect draw parameters.
non_indexed_metadata: RawBufferVec<IndirectParametersMetadata>,
/// The GPU buffer that holds the number of indirect draw commands for each
/// phase of each view, for non-indexed meshes.
///
/// The indirect parameters building shader writes to this buffer, and the
/// `multi_draw_indirect_count` command reads from it in order to know how
/// many indirect draw commands to process.
non_indexed_batch_sets: RawBufferVec<IndirectBatchSet>,
/// The GPU buffer that stores the indirect draw parameters for indexed
/// meshes.
///
/// The indirect parameters building shader writes to this buffer, while the
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
/// it to perform the draws.
indexed_data: UninitBufferVec<IndirectParametersIndexed>,
/// The GPU buffer that holds the data used to construct indirect draw
/// parameters for indexed meshes.
///
/// The GPU mesh preprocessing shader writes to this buffer, and the
/// indirect parameters building shader reads this buffer to construct the
/// indirect draw parameters.
indexed_metadata: RawBufferVec<IndirectParametersMetadata>,
/// The GPU buffer that holds the number of indirect draw commands for each
/// phase of each view, for indexed meshes.
///
/// The indirect parameters building shader writes to this buffer, and the
/// `multi_draw_indirect_count` command reads from it in order to know how
/// many indirect draw commands to process.
indexed_batch_sets: RawBufferVec<IndirectBatchSet>,
/// Information that indirect draw commands use to draw indexed meshes in
/// the scene.
pub indexed: MeshClassIndirectParametersBuffers<IndirectParametersIndexed>,
/// Information that indirect draw commands use to draw non-indexed meshes
/// in the scene.
pub non_indexed: MeshClassIndirectParametersBuffers<IndirectParametersNonIndexed>,
}
impl UntypedPhaseIndirectParametersBuffers {
@ -883,171 +850,48 @@ impl UntypedPhaseIndirectParametersBuffers {
}
UntypedPhaseIndirectParametersBuffers {
non_indexed_data: UninitBufferVec::new(indirect_parameter_buffer_usages),
non_indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE),
non_indexed_batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
indexed_data: UninitBufferVec::new(indirect_parameter_buffer_usages),
indexed_metadata: RawBufferVec::new(BufferUsages::STORAGE),
indexed_batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
non_indexed: MeshClassIndirectParametersBuffers::new(
allow_copies_from_indirect_parameter_buffers,
),
indexed: MeshClassIndirectParametersBuffers::new(
allow_copies_from_indirect_parameter_buffers,
),
}
}
/// Returns the GPU buffer that stores the indirect draw parameters for
/// indexed meshes.
///
/// The indirect parameters building shader writes to this buffer, while the
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
/// it to perform the draws.
#[inline]
pub fn indexed_data_buffer(&self) -> Option<&Buffer> {
self.indexed_data.buffer()
}
/// Returns the GPU buffer that holds the data used to construct indirect
/// draw parameters for indexed meshes.
///
/// The GPU mesh preprocessing shader writes to this buffer, and the
/// indirect parameters building shader reads this buffer to construct the
/// indirect draw parameters.
#[inline]
pub fn indexed_metadata_buffer(&self) -> Option<&Buffer> {
self.indexed_metadata.buffer()
}
/// Returns the GPU buffer that holds the number of indirect draw commands
/// for each phase of each view, for indexed meshes.
///
/// The indirect parameters building shader writes to this buffer, and the
/// `multi_draw_indirect_count` command reads from it in order to know how
/// many indirect draw commands to process.
#[inline]
pub fn indexed_batch_sets_buffer(&self) -> Option<&Buffer> {
self.indexed_batch_sets.buffer()
}
/// Returns the GPU buffer that stores the indirect draw parameters for
/// non-indexed meshes.
///
/// The indirect parameters building shader writes to this buffer, while the
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
/// it to perform the draws.
#[inline]
pub fn non_indexed_data_buffer(&self) -> Option<&Buffer> {
self.non_indexed_data.buffer()
}
/// Returns the GPU buffer that holds the data used to construct indirect
/// draw parameters for non-indexed meshes.
///
/// The GPU mesh preprocessing shader writes to this buffer, and the
/// indirect parameters building shader reads this buffer to construct the
/// indirect draw parameters.
#[inline]
pub fn non_indexed_metadata_buffer(&self) -> Option<&Buffer> {
self.non_indexed_metadata.buffer()
}
/// Returns the GPU buffer that holds the number of indirect draw commands
/// for each phase of each view, for non-indexed meshes.
///
/// The indirect parameters building shader writes to this buffer, and the
/// `multi_draw_indirect_count` command reads from it in order to know how
/// many indirect draw commands to process.
#[inline]
pub fn non_indexed_batch_sets_buffer(&self) -> Option<&Buffer> {
self.non_indexed_batch_sets.buffer()
}
/// Reserves space for `count` new batches corresponding to indexed meshes.
///
/// This allocates in both the [`Self::indexed_metadata`] and
/// [`Self::indexed_data`] buffers.
fn allocate_indexed(&mut self, count: u32) -> u32 {
let length = self.indexed_data.len();
self.indexed_metadata.reserve_internal(count as usize);
for _ in 0..count {
self.indexed_data.add();
self.indexed_metadata
.push(IndirectParametersMetadata::default());
}
length as u32
}
/// Reserves space for `count` new batches corresponding to non-indexed
/// meshes.
///
/// This allocates in both the `non_indexed_metadata` and `non_indexed_data`
/// buffers.
pub fn allocate_non_indexed(&mut self, count: u32) -> u32 {
let length = self.non_indexed_data.len();
self.non_indexed_metadata.reserve_internal(count as usize);
for _ in 0..count {
self.non_indexed_data.add();
self.non_indexed_metadata
.push(IndirectParametersMetadata::default());
}
length as u32
}
/// Reserves space for `count` new batches.
///
/// The `indexed` parameter specifies whether the meshes that these batches
/// correspond to are indexed or not.
pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {
if indexed {
self.allocate_indexed(count)
self.indexed.allocate(count)
} else {
self.allocate_non_indexed(count)
self.non_indexed.allocate(count)
}
}
/// Initializes the batch corresponding to an indexed mesh at the given
/// index with the given [`IndirectParametersMetadata`].
pub fn set_indexed(&mut self, index: u32, value: IndirectParametersMetadata) {
self.indexed_metadata.set(index, value);
}
/// Initializes the batch corresponding to a non-indexed mesh at the given
/// index with the given [`IndirectParametersMetadata`].
pub fn set_non_indexed(&mut self, index: u32, value: IndirectParametersMetadata) {
self.non_indexed_metadata.set(index, value);
}
/// Returns the number of batches currently allocated.
///
/// The `indexed` parameter specifies whether the meshes that these batches
/// correspond to are indexed or not.
fn batch_count(&self, indexed: bool) -> usize {
if indexed {
self.indexed_batch_count()
self.indexed.batch_count()
} else {
self.non_indexed_batch_count()
self.non_indexed.batch_count()
}
}
/// Returns the number of batches corresponding to indexed meshes that are
/// currently allocated.
#[inline]
pub fn indexed_batch_count(&self) -> usize {
self.indexed_data.len()
}
/// Returns the number of batches corresponding to non-indexed meshes that
/// are currently allocated.
#[inline]
pub fn non_indexed_batch_count(&self) -> usize {
self.non_indexed_data.len()
}
/// Returns the number of batch sets currently allocated.
///
/// The `indexed` parameter specifies whether the meshes that these batch
/// sets correspond to are indexed or not.
pub fn batch_set_count(&self, indexed: bool) -> usize {
if indexed {
self.indexed_batch_sets.len()
self.indexed.batch_sets.len()
} else {
self.non_indexed_batch_sets.len()
self.non_indexed.batch_sets.len()
}
}
@ -1060,29 +904,170 @@ impl UntypedPhaseIndirectParametersBuffers {
/// batch in this batch set.
pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {
if indexed {
self.indexed_batch_sets.push(IndirectBatchSet {
self.indexed.batch_sets.push(IndirectBatchSet {
indirect_parameters_base,
indirect_parameters_count: 0,
});
} else {
self.non_indexed_batch_sets.push(IndirectBatchSet {
self.non_indexed.batch_sets.push(IndirectBatchSet {
indirect_parameters_base,
indirect_parameters_count: 0,
});
}
}
/// Returns the index that a newly-added batch set will have.
///
/// The `indexed` parameter specifies whether the meshes in such a batch set
/// are indexed or not.
pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
NonMaxU32::new(self.batch_set_count(indexed) as u32)
}
/// Clears out the buffers in preparation for a new frame.
pub fn clear(&mut self) {
self.indexed_data.clear();
self.indexed_metadata.clear();
self.indexed_batch_sets.clear();
self.non_indexed_data.clear();
self.non_indexed_metadata.clear();
self.non_indexed_batch_sets.clear();
self.indexed.clear();
self.non_indexed.clear();
}
}
/// The buffers containing all the information that indirect draw commands use
/// to draw the scene, for a single mesh class (indexed or non-indexed), for a
/// single phase.
pub struct MeshClassIndirectParametersBuffers<IP>
where
IP: Clone + ShaderSize + WriteInto,
{
/// The GPU buffer that stores the indirect draw parameters for the meshes.
///
/// The indirect parameters building shader writes to this buffer, while the
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
/// it to perform the draws.
data: UninitBufferVec<IP>,
/// The GPU buffer that holds the data used to construct indirect draw
/// parameters for meshes.
///
/// The GPU mesh preprocessing shader writes to this buffer, and the
/// indirect parameters building shader reads this buffer to construct the
/// indirect draw parameters.
cpu_metadata: RawBufferVec<IndirectParametersCpuMetadata>,
/// The GPU buffer that holds data built by the GPU used to construct
/// indirect draw parameters for meshes.
///
/// The GPU mesh preprocessing shader writes to this buffer, and the
/// indirect parameters building shader reads this buffer to construct the
/// indirect draw parameters.
gpu_metadata: UninitBufferVec<IndirectParametersGpuMetadata>,
/// The GPU buffer that holds the number of indirect draw commands for each
/// phase of each view, for meshes.
///
/// The indirect parameters building shader writes to this buffer, and the
/// `multi_draw_indirect_count` command reads from it in order to know how
/// many indirect draw commands to process.
batch_sets: RawBufferVec<IndirectBatchSet>,
}
impl<IP> MeshClassIndirectParametersBuffers<IP>
where
IP: Clone + ShaderSize + WriteInto,
{
fn new(
allow_copies_from_indirect_parameter_buffers: bool,
) -> MeshClassIndirectParametersBuffers<IP> {
let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
if allow_copies_from_indirect_parameter_buffers {
indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
}
MeshClassIndirectParametersBuffers {
data: UninitBufferVec::new(indirect_parameter_buffer_usages),
cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE),
gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE),
batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
}
}
/// Returns the GPU buffer that stores the indirect draw parameters for
/// indexed meshes.
///
/// The indirect parameters building shader writes to this buffer, while the
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
/// it to perform the draws.
#[inline]
pub fn data_buffer(&self) -> Option<&Buffer> {
self.data.buffer()
}
/// Returns the GPU buffer that holds the CPU-constructed data used to
/// construct indirect draw parameters for meshes.
///
/// The CPU writes to this buffer, and the indirect parameters building
/// shader reads this buffer to construct the indirect draw parameters.
#[inline]
pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> {
self.cpu_metadata.buffer()
}
/// Returns the GPU buffer that holds the GPU-constructed data used to
/// construct indirect draw parameters for meshes.
///
/// The GPU mesh preprocessing shader writes to this buffer, and the
/// indirect parameters building shader reads this buffer to construct the
/// indirect draw parameters.
#[inline]
pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> {
self.gpu_metadata.buffer()
}
/// Returns the GPU buffer that holds the number of indirect draw commands
/// for each phase of each view.
///
/// The indirect parameters building shader writes to this buffer, and the
/// `multi_draw_indirect_count` command reads from it in order to know how
/// many indirect draw commands to process.
#[inline]
pub fn batch_sets_buffer(&self) -> Option<&Buffer> {
self.batch_sets.buffer()
}
/// Reserves space for `count` new batches.
///
/// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`],
/// and [`Self::data`] buffers.
fn allocate(&mut self, count: u32) -> u32 {
let length = self.data.len();
self.cpu_metadata.reserve_internal(count as usize);
self.gpu_metadata.add_multiple(count as usize);
for _ in 0..count {
self.data.add();
self.cpu_metadata
.push(IndirectParametersCpuMetadata::default());
}
length as u32
}
/// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given
/// index.
pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) {
self.cpu_metadata.set(index, value);
}
/// Returns the number of batches corresponding to meshes that are currently
/// allocated.
#[inline]
pub fn batch_count(&self) -> usize {
self.data.len()
}
/// Clears out all the buffers in preparation for a new frame.
pub fn clear(&mut self) {
self.data.clear();
self.cpu_metadata.clear();
self.gpu_metadata.clear();
self.batch_sets.clear();
}
}
@ -1419,20 +1404,21 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
Some(
phase_indirect_parameters_buffers
.buffers
.allocate_indexed(1),
.indexed
.allocate(1),
)
} else {
Some(
phase_indirect_parameters_buffers
.buffers
.allocate_non_indexed(1),
.non_indexed
.allocate(1),
)
};
// Start a new batch.
if let Some(indirect_parameters_index) = indirect_parameters_index {
GFBD::write_batch_indirect_parameters_metadata(
InputUniformIndex(current_input_index.into()),
item_is_indexed,
output_index,
None,
@ -1575,7 +1561,6 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
.get_next_batch_set_index(batch_set_key.indexed());
GFBD::write_batch_indirect_parameters_metadata(
input_index,
batch_set_key.indexed(),
output_index,
batch_set_index,
@ -1683,7 +1668,6 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
.get_next_batch_set_index(key.0.indexed());
GFBD::write_batch_indirect_parameters_metadata(
input_index,
key.0.indexed(),
output_index,
batch_set_index,
@ -1761,13 +1745,15 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
Some(
phase_indirect_parameters_buffers
.buffers
.allocate_indexed(unbatchables.entities.len() as u32),
.indexed
.allocate(unbatchables.entities.len() as u32),
)
} else {
Some(
phase_indirect_parameters_buffers
.buffers
.allocate_non_indexed(unbatchables.entities.len() as u32),
.non_indexed
.allocate(unbatchables.entities.len() as u32),
)
};
@ -1782,7 +1768,6 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
// We're in indirect mode, so add an indirect parameters
// index.
GFBD::write_batch_indirect_parameters_metadata(
InputUniformIndex(input_index.into()),
key.0.indexed(),
output_index,
None,
@ -1968,24 +1953,39 @@ pub fn write_indirect_parameters_buffers(
) {
for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
phase_indirect_parameters_buffers
.indexed_data
.indexed
.data
.write_buffer(&render_device);
phase_indirect_parameters_buffers
.non_indexed_data
.non_indexed
.data
.write_buffer(&render_device);
phase_indirect_parameters_buffers
.indexed_metadata
.indexed
.cpu_metadata
.write_buffer(&render_device, &render_queue);
phase_indirect_parameters_buffers
.non_indexed_metadata
.non_indexed
.cpu_metadata
.write_buffer(&render_device, &render_queue);
phase_indirect_parameters_buffers
.indexed_batch_sets
.non_indexed
.gpu_metadata
.write_buffer(&render_device);
phase_indirect_parameters_buffers
.indexed
.gpu_metadata
.write_buffer(&render_device);
phase_indirect_parameters_buffers
.indexed
.batch_sets
.write_buffer(&render_device, &render_queue);
phase_indirect_parameters_buffers
.non_indexed_batch_sets
.non_indexed
.batch_sets
.write_buffer(&render_device, &render_queue);
}
}

View File

@ -9,8 +9,8 @@ use nonmax::NonMaxU32;
use crate::{
render_phase::{
BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, InputUniformIndex,
PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, ViewBinnedRenderPhases,
BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItemExtraIndex,
SortedPhaseItem, SortedRenderPhase, ViewBinnedRenderPhases,
},
render_resource::{CachedRenderPipelineId, GpuArrayBufferable},
sync_world::MainEntity,
@ -148,15 +148,13 @@ pub trait GetFullBatchData: GetBatchData {
query_item: MainEntity,
) -> Option<NonMaxU32>;
/// Writes the [`gpu_preprocessing::IndirectParametersMetadata`] necessary
/// to draw this batch into the given metadata buffer at the given index.
/// Writes the [`gpu_preprocessing::IndirectParametersGpuMetadata`]
/// necessary to draw this batch into the given metadata buffer at the given
/// index.
///
/// This is only used if GPU culling is enabled (which requires GPU
/// preprocessing).
///
/// * `mesh_index` describes the index of the first mesh instance in this
/// batch in the `MeshInputUniform` buffer.
///
/// * `indexed` is true if the mesh is indexed or false if it's non-indexed.
///
/// * `base_output_index` is the index of the first mesh instance in this
@ -172,7 +170,6 @@ pub trait GetFullBatchData: GetBatchData {
/// * `indirect_parameters_offset` is the index in that buffer at which to
/// write the metadata.
fn write_batch_indirect_parameters_metadata(
mesh_index: InputUniformIndex,
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,

View File

@ -47,17 +47,20 @@ struct IndirectParametersNonIndexed {
first_instance: u32,
}
struct IndirectParametersMetadata {
mesh_index: u32,
struct IndirectParametersCpuMetadata {
base_output_index: u32,
batch_set_index: u32,
}
struct IndirectParametersGpuMetadata {
mesh_index: u32,
#ifdef WRITE_INDIRECT_PARAMETERS_METADATA
early_instance_count: atomic<u32>,
late_instance_count: atomic<u32>,
#else
#else // WRITE_INDIRECT_PARAMETERS_METADATA
early_instance_count: u32,
late_instance_count: u32,
#endif
#endif // WRITE_INDIRECT_PARAMETERS_METADATA
}
struct IndirectBatchSet {

View File

@ -451,8 +451,14 @@ where
/// Reserves space for one more element in the buffer and returns its index.
pub fn add(&mut self) -> usize {
self.add_multiple(1)
}
/// Reserves space for the given number of elements in the buffer and
/// returns the index of the first one.
pub fn add_multiple(&mut self, count: usize) -> usize {
let index = self.len;
self.len += 1;
self.len += count;
index
}

View File

@ -1,6 +1,5 @@
use bevy_app::Plugin;
use bevy_asset::{load_internal_asset, weak_handle, AssetId, Handle};
use bevy_render::render_phase::{sweep_old_entities, InputUniformIndex};
use crate::{tonemapping_pipeline_key, Material2dBindGroupId};
use bevy_core_pipeline::tonemapping::DebandDither;
@ -25,7 +24,7 @@ use bevy_render::prelude::Msaa;
use bevy_render::RenderSet::PrepareAssets;
use bevy_render::{
batching::{
gpu_preprocessing::IndirectParametersMetadata,
gpu_preprocessing::IndirectParametersCpuMetadata,
no_gpu_preprocessing::{
self, batch_and_prepare_binned_render_phase, batch_and_prepare_sorted_render_phase,
write_batched_instance_buffer, BatchedInstanceBuffer,
@ -39,7 +38,8 @@ use bevy_render::{
},
render_asset::RenderAssets,
render_phase::{
PhaseItem, PhaseItemExtraIndex, RenderCommand, RenderCommandResult, TrackedRenderPass,
sweep_old_entities, PhaseItem, PhaseItemExtraIndex, RenderCommand, RenderCommandResult,
TrackedRenderPass,
},
render_resource::{binding_types::uniform_buffer, *},
renderer::{RenderDevice, RenderQueue},
@ -480,7 +480,6 @@ impl GetFullBatchData for Mesh2dPipeline {
}
fn write_batch_indirect_parameters_metadata(
input_index: InputUniformIndex,
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,
@ -490,22 +489,22 @@ impl GetFullBatchData for Mesh2dPipeline {
// Note that `IndirectParameters` covers both of these structures, even
// though they actually have distinct layouts. See the comment above that
// type for more information.
let indirect_parameters = IndirectParametersMetadata {
mesh_index: *input_index,
let indirect_parameters = IndirectParametersCpuMetadata {
base_output_index,
batch_set_index: match batch_set_index {
None => !0,
Some(batch_set_index) => u32::from(batch_set_index),
},
early_instance_count: 0,
late_instance_count: 0,
};
if indexed {
indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters);
indirect_parameters_buffer
.indexed
.set(indirect_parameters_offset, indirect_parameters);
} else {
indirect_parameters_buffer
.set_non_indexed(indirect_parameters_offset, indirect_parameters);
.non_indexed
.set(indirect_parameters_offset, indirect_parameters);
}
}
}

View File

@ -450,8 +450,10 @@ impl render_graph::Node for ReadbackIndirectParametersNode {
Some(indirect_parameters_staging_data_buffer),
Some(indirect_parameters_staging_batch_sets_buffer),
) = (
phase_indirect_parameters_buffers.indexed_data_buffer(),
phase_indirect_parameters_buffers.indexed_batch_sets_buffer(),
phase_indirect_parameters_buffers.indexed.data_buffer(),
phase_indirect_parameters_buffers
.indexed
.batch_sets_buffer(),
indirect_parameters_mapping_buffers.data.as_ref(),
indirect_parameters_mapping_buffers.batch_sets.as_ref(),
)
@ -501,8 +503,10 @@ fn create_indirect_parameters_staging_buffers(
// Fetch the indirect parameters buffers that we're going to copy from.
let (Some(indexed_data_buffer), Some(indexed_batch_set_buffer)) = (
phase_indirect_parameters_buffers.indexed_data_buffer(),
phase_indirect_parameters_buffers.indexed_batch_sets_buffer(),
phase_indirect_parameters_buffers.indexed.data_buffer(),
phase_indirect_parameters_buffers
.indexed
.batch_sets_buffer(),
) else {
return;
};

View File

@ -28,7 +28,7 @@ use bevy::{
render::{
batching::{
gpu_preprocessing::{
batch_and_prepare_sorted_render_phase, IndirectParametersMetadata,
batch_and_prepare_sorted_render_phase, IndirectParametersCpuMetadata,
UntypedPhaseIndirectParametersBuffers,
},
GetBatchData, GetFullBatchData,
@ -42,8 +42,8 @@ use bevy::{
},
render_phase::{
sort_phase_system, AddRenderCommand, CachedRenderPipelinePhaseItem, DrawFunctionId,
DrawFunctions, InputUniformIndex, PhaseItem, PhaseItemExtraIndex, SetItemPipeline,
SortedPhaseItem, SortedRenderPhasePlugin, ViewSortedRenderPhases,
DrawFunctions, PhaseItem, PhaseItemExtraIndex, SetItemPipeline, SortedPhaseItem,
SortedRenderPhasePlugin, ViewSortedRenderPhases,
},
render_resource::{
CachedRenderPipelineId, ColorTargetState, ColorWrites, Face, FragmentState, FrontFace,
@ -435,7 +435,6 @@ impl GetFullBatchData for StencilPipeline {
}
fn write_batch_indirect_parameters_metadata(
mesh_index: InputUniformIndex,
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,
@ -445,23 +444,22 @@ impl GetFullBatchData for StencilPipeline {
// Note that `IndirectParameters` covers both of these structures, even
// though they actually have distinct layouts. See the comment above that
// type for more information.
let indirect_parameters = IndirectParametersMetadata {
mesh_index: *mesh_index,
let indirect_parameters = IndirectParametersCpuMetadata {
base_output_index,
batch_set_index: match batch_set_index {
None => !0,
Some(batch_set_index) => u32::from(batch_set_index),
},
early_instance_count: 0,
late_instance_count: 0,
};
if indexed {
indirect_parameters_buffers
.set_indexed(indirect_parameters_offset, indirect_parameters);
.indexed
.set(indirect_parameters_offset, indirect_parameters);
} else {
indirect_parameters_buffers
.set_non_indexed(indirect_parameters_offset, indirect_parameters);
.non_indexed
.set(indirect_parameters_offset, indirect_parameters);
}
}