Use multi_draw_indirect_count where available, in preparation for two-phase occlusion culling. (#17211)

This commit allows Bevy to use `multi_draw_indirect_count` for drawing
meshes. The `multi_draw_indirect_count` feature works just like
`multi_draw_indirect`, but it takes the number of indirect parameters
from a GPU buffer rather than specifying it on the CPU.

Currently, the CPU constructs the list of indirect draw parameters with
the instance count for each batch set to zero, uploads the resulting
buffer to the GPU, and dispatches a compute shader that bumps the
instance count for each mesh that survives culling. Unfortunately, this
is inefficient when we support `multi_draw_indirect_count`. Draw
commands corresponding to meshes for which all instances were culled
will remain present in the list when calling
`multi_draw_indirect_count`, causing overhead. Proper use of
`multi_draw_indirect_count` requires eliminating these empty draw
commands.

To address this inefficiency, this PR makes Bevy fully construct the
indirect draw commands on the GPU instead of on the CPU. Instead of
writing instance counts to the draw command buffer, the mesh
preprocessing shader now writes them to a separate *indirect metadata
buffer*. A second compute dispatch known as the *build indirect
parameters* shader runs after mesh preprocessing and converts the
indirect draw metadata into actual indirect draw commands for the GPU.
The build indirect parameters shader operates on a batch at a time,
rather than an instance at a time, and as such each thread writes only 0
or 1 indirect draw parameters, simplifying the current logic in
`mesh_preprocessing`, which currently has to have special cases for the
first mesh in each batch. The build indirect parameters shader emits
draw commands in a tightly packed manner, enabling maximally efficient
use of `multi_draw_indirect_count`.

Along the way, this patch switches mesh preprocessing to dispatch one
compute invocation per render phase per view, instead of dispatching one
compute invocation per view. This is preparation for two-phase occlusion
culling, in which we will have two mesh preprocessing stages. In that
scenario, the first mesh preprocessing stage must only process opaque
and alpha tested objects, so the work items must be separated into those
that are opaque or alpha tested and those that aren't. Thus this PR
splits out the work items into a separate buffer for each phase. As this
patch rewrites so much of the mesh preprocessing infrastructure, it was
simpler to just fold the change into this patch instead of deferring it
to the forthcoming occlusion culling PR.

Finally, this patch changes mesh preprocessing so that it runs
separately for indexed and non-indexed meshes. This is because draw
commands for indexed and non-indexed meshes have different sizes and
layouts. *The existing code is actually broken for non-indexed meshes*,
as it attempts to overlay the indirect parameters for non-indexed meshes
on top of those for indexed meshes. Consequently, right now the
parameters will be read incorrectly when multiple non-indexed meshes are
multi-drawn together. *This is a bug fix* and, as with the change to
dispatch phases separately noted above, was easiest to include in this
patch as opposed to separately.

## Migration Guide

* Systems that add custom phase items now need to populate the indirect
drawing-related buffers. See the `specialized_mesh_pipeline` example for
an example of how this is done.
This commit is contained in:
Patrick Walton 2025-01-14 13:19:20 -08:00 committed by GitHub
parent e53c8e0933
commit 35101f3ed5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
32 changed files with 2269 additions and 642 deletions

View File

@ -35,6 +35,7 @@ use core::ops::Range;
use bevy_asset::UntypedAssetId;
use bevy_render::{
batching::gpu_preprocessing::GpuPreprocessingMode,
render_phase::PhaseItemBatchSetKey,
view::{ExtractedView, RetainedViewEntity},
};
use bevy_utils::{HashMap, HashSet};
@ -132,7 +133,7 @@ pub struct Opaque2d {
///
/// Objects in a single batch set can potentially be multi-drawn together,
/// if it's enabled and the current platform supports it.
pub batch_set_key: (),
pub batch_set_key: BatchSetKey2d,
/// The key, which determines which can be batched.
pub bin_key: Opaque2dBinKey,
/// An entity from which data will be fetched, including the mesh if
@ -198,7 +199,7 @@ impl PhaseItem for Opaque2d {
impl BinnedPhaseItem for Opaque2d {
// Since 2D meshes presently can't be multidrawn, the batch set key is
// irrelevant.
type BatchSetKey = ();
type BatchSetKey = BatchSetKey2d;
type BinKey = Opaque2dBinKey;
@ -219,6 +220,20 @@ impl BinnedPhaseItem for Opaque2d {
}
}
/// 2D meshes aren't currently multi-drawn together, so this batch set key only
/// stores whether the mesh is indexed.
#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
pub struct BatchSetKey2d {
/// True if the mesh is indexed.
pub indexed: bool,
}
impl PhaseItemBatchSetKey for BatchSetKey2d {
fn indexed(&self) -> bool {
self.indexed
}
}
impl CachedRenderPipelinePhaseItem for Opaque2d {
#[inline]
fn cached_pipeline(&self) -> CachedRenderPipelineId {
@ -232,7 +247,7 @@ pub struct AlphaMask2d {
///
/// Objects in a single batch set can potentially be multi-drawn together,
/// if it's enabled and the current platform supports it.
pub batch_set_key: (),
pub batch_set_key: BatchSetKey2d,
/// The key, which determines which can be batched.
pub bin_key: AlphaMask2dBinKey,
/// An entity from which data will be fetched, including the mesh if
@ -297,9 +312,7 @@ impl PhaseItem for AlphaMask2d {
}
impl BinnedPhaseItem for AlphaMask2d {
// Since 2D meshes presently can't be multidrawn, the batch set key is
// irrelevant.
type BatchSetKey = ();
type BatchSetKey = BatchSetKey2d;
type BinKey = AlphaMask2dBinKey;
@ -335,6 +348,9 @@ pub struct Transparent2d {
pub draw_function: DrawFunctionId,
pub batch_range: Range<u32>,
pub extra_index: PhaseItemExtraIndex,
/// Whether the mesh in question is indexed (uses an index buffer in
/// addition to its vertex buffer).
pub indexed: bool,
}
impl PhaseItem for Transparent2d {
@ -387,6 +403,10 @@ impl SortedPhaseItem for Transparent2d {
// radsort is a stable radix sort that performed better than `slice::sort_by_key` or `slice::sort_unstable_by_key`.
radsort::sort_by_key(items, |item| item.sort_key().0);
}
fn indexed(&self) -> bool {
self.indexed
}
}
impl CachedRenderPipelinePhaseItem for Transparent2d {
@ -411,7 +431,7 @@ pub fn extract_core_2d_camera_phases(
}
// This is the main 2D camera, so we use the first subview index (0).
let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0);
let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0);
transparent_2d_phases.insert_or_clear(retained_view_entity);
opaque_2d_phases.insert_or_clear(retained_view_entity, GpuPreprocessingMode::None);

View File

@ -68,6 +68,7 @@ use core::ops::Range;
use bevy_render::{
batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
mesh::allocator::SlabId,
render_phase::PhaseItemBatchSetKey,
view::{NoIndirectDrawing, RetainedViewEntity},
};
pub use camera_3d::*;
@ -269,6 +270,12 @@ pub struct Opaque3dBatchSetKey {
pub lightmap_slab: Option<NonMaxU32>,
}
impl PhaseItemBatchSetKey for Opaque3dBatchSetKey {
fn indexed(&self) -> bool {
self.index_slab.is_some()
}
}
/// Data that must be identical in order to *batch* phase items together.
///
/// Note that a *batch set* (if multi-draw is in use) contains multiple batches.
@ -430,6 +437,9 @@ pub struct Transmissive3d {
pub draw_function: DrawFunctionId,
pub batch_range: Range<u32>,
pub extra_index: PhaseItemExtraIndex,
/// Whether the mesh in question is indexed (uses an index buffer in
/// addition to its vertex buffer).
pub indexed: bool,
}
impl PhaseItem for Transmissive3d {
@ -493,6 +503,11 @@ impl SortedPhaseItem for Transmissive3d {
fn sort(items: &mut [Self]) {
radsort::sort_by_key(items, |item| item.distance);
}
#[inline]
fn indexed(&self) -> bool {
self.indexed
}
}
impl CachedRenderPipelinePhaseItem for Transmissive3d {
@ -509,6 +524,9 @@ pub struct Transparent3d {
pub draw_function: DrawFunctionId,
pub batch_range: Range<u32>,
pub extra_index: PhaseItemExtraIndex,
/// Whether the mesh in question is indexed (uses an index buffer in
/// addition to its vertex buffer).
pub indexed: bool,
}
impl PhaseItem for Transparent3d {
@ -560,6 +578,11 @@ impl SortedPhaseItem for Transparent3d {
fn sort(items: &mut [Self]) {
radsort::sort_by_key(items, |item| item.distance);
}
#[inline]
fn indexed(&self) -> bool {
self.indexed
}
}
impl CachedRenderPipelinePhaseItem for Transparent3d {
@ -594,7 +617,7 @@ pub fn extract_core_3d_camera_phases(
});
// This is the main 3D camera, so use the first subview index (0).
let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0);
let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0);
opaque_3d_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode);
alpha_mask_3d_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode);
@ -662,7 +685,7 @@ pub fn extract_camera_prepass_phase(
});
// This is the main 3D camera, so we use the first subview index (0).
let retained_view_entity = RetainedViewEntity::new(main_entity.into(), 0);
let retained_view_entity = RetainedViewEntity::new(main_entity.into(), None, 0);
if depth_prepass || normal_prepass || motion_vector_prepass {
opaque_3d_prepass_phases.insert_or_clear(retained_view_entity, gpu_preprocessing_mode);

View File

@ -35,6 +35,7 @@ use bevy_ecs::prelude::*;
use bevy_math::Mat4;
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use bevy_render::mesh::allocator::SlabId;
use bevy_render::render_phase::PhaseItemBatchSetKey;
use bevy_render::sync_world::MainEntity;
use bevy_render::{
render_phase::{
@ -184,6 +185,12 @@ pub struct OpaqueNoLightmap3dBatchSetKey {
pub index_slab: Option<SlabId>,
}
impl PhaseItemBatchSetKey for OpaqueNoLightmap3dBatchSetKey {
fn indexed(&self) -> bool {
self.index_slab.is_some()
}
}
// TODO: Try interning these.
/// The data used to bin each opaque 3D object in the prepass and deferred pass.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]

View File

@ -340,6 +340,7 @@ fn queue_line_gizmos_2d(
sort_key: FloatOrd(f32::INFINITY),
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: false,
});
}
@ -360,6 +361,7 @@ fn queue_line_gizmos_2d(
sort_key: FloatOrd(f32::INFINITY),
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: false,
});
}
}
@ -418,6 +420,7 @@ fn queue_line_joint_gizmos_2d(
sort_key: FloatOrd(f32::INFINITY),
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: false,
});
}
}

View File

@ -369,6 +369,7 @@ fn queue_line_gizmos_3d(
distance: 0.,
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
@ -390,6 +391,7 @@ fn queue_line_gizmos_3d(
distance: 0.,
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
}
@ -484,6 +486,7 @@ fn queue_line_joint_gizmos_3d(
distance: 0.,
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
}

View File

@ -102,6 +102,8 @@ pub mod graph {
GpuPreprocess,
/// Label for the screen space reflections pass.
ScreenSpaceReflections,
/// Label for the indirect parameters building pass.
BuildIndirectParameters,
}
}

View File

@ -851,6 +851,9 @@ pub fn queue_material_meshes<M: Material>(
}
};
// Fetch the slabs that this mesh resides in.
let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
match mesh_key
.intersection(MeshPipelineKey::BLEND_RESERVED_BITS | MeshPipelineKey::MAY_DISCARD)
{
@ -865,13 +868,12 @@ pub fn queue_material_meshes<M: Material>(
distance,
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: index_slab.is_some(),
});
} else if material.properties.render_method == OpaqueRendererMethod::Forward {
let (vertex_slab, index_slab) =
mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
let batch_set_key = Opaque3dBatchSetKey {
draw_function: draw_opaque_pbr,
pipeline: pipeline_id,
draw_function: draw_opaque_pbr,
material_bind_group_index: Some(material.binding.group.0),
vertex_slab: vertex_slab.unwrap_or_default(),
index_slab,
@ -903,10 +905,9 @@ pub fn queue_material_meshes<M: Material>(
distance,
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: index_slab.is_some(),
});
} else if material.properties.render_method == OpaqueRendererMethod::Forward {
let (vertex_slab, index_slab) =
mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
let batch_set_key = OpaqueNoLightmap3dBatchSetKey {
draw_function: draw_alpha_mask_pbr,
pipeline: pipeline_id,
@ -938,6 +939,7 @@ pub fn queue_material_meshes<M: Material>(
distance,
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: index_slab.is_some(),
});
}
}

View File

@ -966,13 +966,13 @@ pub fn queue_prepass_material_meshes<M: Material>(
}
};
let (vertex_slab, index_slab) = mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
match mesh_key
.intersection(MeshPipelineKey::BLEND_RESERVED_BITS | MeshPipelineKey::MAY_DISCARD)
{
MeshPipelineKey::BLEND_OPAQUE | MeshPipelineKey::BLEND_ALPHA_TO_COVERAGE => {
if deferred {
let (vertex_slab, index_slab) =
mesh_allocator.mesh_slabs(&mesh_instance.mesh_asset_id);
opaque_deferred_phase.as_mut().unwrap().add(
OpaqueNoLightmap3dBatchSetKey {
draw_function: opaque_draw_deferred,

View File

@ -0,0 +1,106 @@
// Builds GPU indirect draw parameters from metadata.
//
// This only runs when indirect drawing is enabled. It takes the output of
// `mesh_preprocess.wgsl` and creates indirect parameters for the GPU.
//
// This shader runs separately for indexed and non-indexed meshes. Unlike
// `mesh_preprocess.wgsl`, which runs one instance per mesh *instance*, one
// instance of this shader corresponds to a single *batch* which could contain
// arbitrarily many instances of a single mesh.
#import bevy_pbr::mesh_preprocess_types::{
IndirectBatchSet,
IndirectParametersIndexed,
IndirectParametersNonIndexed,
IndirectParametersMetadata,
MeshInput
}
// The data for each mesh that the CPU supplied to the GPU.
@group(0) @binding(0) var<storage> current_input: array<MeshInput>;
// Data that we use to generate the indirect parameters.
//
// The `mesh_preprocess.wgsl` shader emits these.
@group(0) @binding(1) var<storage> indirect_parameters_metadata: array<IndirectParametersMetadata>;
// Information about each batch set.
//
// A *batch set* is a set of meshes that might be multi-drawn together.
@group(0) @binding(2) var<storage, read_write> indirect_batch_sets: array<IndirectBatchSet>;
#ifdef INDEXED
// The buffer of indirect draw parameters that we generate, and that the GPU
// reads to issue the draws.
//
// This buffer is for indexed meshes.
@group(0) @binding(3) var<storage, read_write> indirect_parameters:
array<IndirectParametersIndexed>;
#else // INDEXED
// The buffer of indirect draw parameters that we generate, and that the GPU
// reads to issue the draws.
//
// This buffer is for non-indexed meshes.
@group(0) @binding(3) var<storage, read_write> indirect_parameters:
array<IndirectParametersNonIndexed>;
#endif // INDEXED
@compute
@workgroup_size(64)
fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
// Figure out our instance index (i.e. batch index). If this thread doesn't
// correspond to any index, bail.
let instance_index = global_invocation_id.x;
if (instance_index >= arrayLength(&indirect_parameters_metadata)) {
return;
}
// Unpack the metadata for this batch.
let mesh_index = indirect_parameters_metadata[instance_index].mesh_index;
let base_output_index = indirect_parameters_metadata[instance_index].base_output_index;
let batch_set_index = indirect_parameters_metadata[instance_index].batch_set_index;
let instance_count = atomicLoad(&indirect_parameters_metadata[instance_index].instance_count);
// If we aren't using `multi_draw_indirect_count`, we have a 1:1 fixed
// assignment of batches to slots in the indirect parameters buffer, so we
// can just use the instance index as the index of our indirect parameters.
var indirect_parameters_index = instance_index;
// If the current hardware and driver support `multi_draw_indirect_count`,
// dynamically reserve an index for the indirect parameters we're to
// generate.
#ifdef MULTI_DRAW_INDIRECT_COUNT_SUPPORTED
if (instance_count == 0u) {
return;
}
// If this batch belongs to a batch set, then allocate space for the
// indirect commands in that batch set.
if (batch_set_index != 0xffffffffu) {
let indirect_parameters_base =
indirect_batch_sets[batch_set_index].indirect_parameters_base;
let indirect_parameters_offset =
atomicAdd(&indirect_batch_sets[batch_set_index].indirect_parameters_count, 1u);
indirect_parameters_index = indirect_parameters_base + indirect_parameters_offset;
}
#endif // MULTI_DRAW_INDIRECT_COUNT_SUPPORTED
// Build up the indirect parameters. The structures for indexed and
// non-indexed meshes are slightly different.
indirect_parameters[indirect_parameters_index].instance_count = instance_count;
indirect_parameters[indirect_parameters_index].first_instance = base_output_index;
indirect_parameters[indirect_parameters_index].base_vertex =
current_input[mesh_index].first_vertex_index;
#ifdef INDEXED
indirect_parameters[indirect_parameters_index].index_count =
current_input[mesh_index].index_count;
indirect_parameters[indirect_parameters_index].first_index =
current_input[mesh_index].first_index_index;
#else // INDEXED
indirect_parameters[indirect_parameters_index].vertex_count =
current_input[mesh_index].index_count;
#endif // INDEXED
}

File diff suppressed because it is too large Load Diff

View File

@ -614,8 +614,18 @@ pub struct ViewShadowBindings {
pub directional_light_depth_texture_view: TextureView,
}
/// A component that holds the shadow cascade views for all shadow cascades
/// associated with a camera.
///
/// Note: Despite the name, this component actually holds the shadow cascade
/// views, not the lights themselves.
#[derive(Component)]
pub struct ViewLightEntities {
/// The shadow cascade views for all shadow cascades associated with a
/// camera.
///
/// Note: Despite the name, this component actually holds the shadow cascade
/// views, not the lights themselves.
pub lights: Vec<Entity>,
}
@ -701,6 +711,7 @@ pub fn prepare_lights(
views: Query<
(
Entity,
MainEntity,
&ExtractedView,
&ExtractedClusterConfig,
Option<&RenderLayers>,
@ -1118,6 +1129,7 @@ pub fn prepare_lights(
// set up light data for each view
for (
entity,
camera_main_entity,
extracted_view,
clusters,
maybe_layers,
@ -1238,8 +1250,11 @@ pub fn prepare_lights(
})
.clone();
let retained_view_entity =
RetainedViewEntity::new(*light_main_entity, face_index as u32);
let retained_view_entity = RetainedViewEntity::new(
*light_main_entity,
Some(camera_main_entity.into()),
face_index as u32,
);
commands.entity(view_light_entity).insert((
ShadowView {
@ -1343,7 +1358,8 @@ pub fn prepare_lights(
let view_light_entity = light_view_entities[0];
let retained_view_entity = RetainedViewEntity::new(*light_main_entity, 0);
let retained_view_entity =
RetainedViewEntity::new(*light_main_entity, Some(camera_main_entity.into()), 0);
commands.entity(view_light_entity).insert((
ShadowView {
@ -1476,8 +1492,11 @@ pub fn prepare_lights(
frustum.half_spaces[4] =
HalfSpace::new(frustum.half_spaces[4].normal().extend(f32::INFINITY));
let retained_view_entity =
RetainedViewEntity::new(*light_main_entity, cascade_index as u32);
let retained_view_entity = RetainedViewEntity::new(
*light_main_entity,
Some(camera_main_entity.into()),
cascade_index as u32,
);
commands.entity(view_light_entity).insert((
ShadowView {
@ -1764,6 +1783,12 @@ pub struct ShadowBatchSetKey {
pub index_slab: Option<SlabId>,
}
impl PhaseItemBatchSetKey for ShadowBatchSetKey {
fn indexed(&self) -> bool {
self.index_slab.is_some()
}
}
/// Data used to bin each object in the shadow map phase.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ShadowBinKey {

View File

@ -20,7 +20,8 @@ use bevy_math::{Affine3, Rect, UVec2, Vec3, Vec4};
use bevy_render::{
batching::{
gpu_preprocessing::{
self, GpuPreprocessingSupport, IndirectParameters, IndirectParametersBuffer,
self, GpuPreprocessingSupport, IndirectBatchSet, IndirectParametersBuffers,
IndirectParametersIndexed, IndirectParametersMetadata, IndirectParametersNonIndexed,
InstanceInputUniformBuffer,
},
no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching,
@ -352,6 +353,17 @@ pub struct MeshInputUniform {
/// [`MeshAllocator`]). This value stores the offset of the first vertex in
/// this mesh in that buffer.
pub first_vertex_index: u32,
/// The index of this mesh's first index in the index buffer, if any.
///
/// Multiple meshes can be packed into a single index buffer (see
/// [`MeshAllocator`]). This value stores the offset of the first index in
/// this mesh in that buffer.
///
/// If this mesh isn't indexed, this value is ignored.
pub first_index_index: u32,
/// For an indexed mesh, the number of indices that make it up; for a
/// non-indexed mesh, the number of vertices in it.
pub index_count: u32,
/// The current skin index, or `u32::MAX` if there's no skin.
pub current_skin_index: u32,
/// The previous skin index, or `u32::MAX` if there's no previous skin.
@ -361,6 +373,10 @@ pub struct MeshInputUniform {
/// Low 16 bits: index of the material inside the bind group data.
/// High 16 bits: index of the lightmap in the binding array.
pub material_and_lightmap_bind_group_slot: u32,
/// Padding.
pub pad_a: u32,
/// Padding.
pub pad_b: u32,
}
/// Information about each mesh instance needed to cull it on GPU.
@ -907,11 +923,23 @@ impl RenderMeshInstanceGpuBuilder {
render_lightmaps: &RenderLightmaps,
skin_indices: &SkinIndices,
) -> u32 {
let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id)
{
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
None => 0,
};
let (first_vertex_index, vertex_count) =
match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) {
Some(mesh_vertex_slice) => (
mesh_vertex_slice.range.start,
mesh_vertex_slice.range.end - mesh_vertex_slice.range.start,
),
None => (0, 0),
};
let (mesh_is_indexed, first_index_index, index_count) =
match mesh_allocator.mesh_index_slice(&self.shared.mesh_asset_id) {
Some(mesh_index_slice) => (
true,
mesh_index_slice.range.start,
mesh_index_slice.range.end - mesh_index_slice.range.start,
),
None => (false, 0, 0),
};
let current_skin_index = match skin_indices.current.get(&entity) {
Some(skin_indices) => skin_indices.index(),
@ -938,11 +966,19 @@ impl RenderMeshInstanceGpuBuilder {
flags: self.mesh_flags.bits(),
previous_input_index: u32::MAX,
first_vertex_index,
first_index_index,
index_count: if mesh_is_indexed {
index_count
} else {
vertex_count
},
current_skin_index,
previous_skin_index,
material_and_lightmap_bind_group_slot: u32::from(
self.shared.material_bindings_index.slot,
) | ((lightmap_slot as u32) << 16),
pad_a: 0,
pad_b: 0,
};
// Did the last frame contain this entity as well?
@ -1698,86 +1734,31 @@ impl GetFullBatchData for MeshPipeline {
.map(|entity| entity.current_uniform_index)
}
fn write_batch_indirect_parameters(
(mesh_instances, _, meshes, mesh_allocator, _): &SystemParamItem<Self::Param>,
indirect_parameters_buffer: &mut IndirectParametersBuffer,
fn write_batch_indirect_parameters_metadata(
mesh_index: u32,
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,
indirect_parameters_buffer: &mut IndirectParametersBuffers,
indirect_parameters_offset: u32,
main_entity: MainEntity,
) {
write_batch_indirect_parameters(
mesh_instances,
meshes,
mesh_allocator,
indirect_parameters_buffer,
indirect_parameters_offset,
main_entity,
);
}
}
/// Pushes a set of [`IndirectParameters`] onto the [`IndirectParametersBuffer`]
/// for the given mesh instance, and returns the index of those indirect
/// parameters.
fn write_batch_indirect_parameters(
mesh_instances: &RenderMeshInstances,
meshes: &RenderAssets<RenderMesh>,
mesh_allocator: &MeshAllocator,
indirect_parameters_buffer: &mut IndirectParametersBuffer,
indirect_parameters_offset: u32,
main_entity: MainEntity,
) {
// This should only be called during GPU building.
let RenderMeshInstances::GpuBuilding(ref mesh_instances) = *mesh_instances else {
error!(
"`write_batch_indirect_parameters_index` should never be called in CPU mesh uniform \
building mode"
);
return;
};
let Some(mesh_instance) = mesh_instances.get(&main_entity) else {
return;
};
let Some(mesh) = meshes.get(mesh_instance.mesh_asset_id) else {
return;
};
let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id)
else {
return;
};
// Note that `IndirectParameters` covers both of these structures, even
// though they actually have distinct layouts. See the comment above that
// type for more information.
let indirect_parameters = match mesh.buffer_info {
RenderMeshBufferInfo::Indexed {
count: index_count, ..
} => {
let Some(index_buffer_slice) =
mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id)
else {
return;
};
IndirectParameters {
vertex_or_index_count: index_count,
instance_count: 0,
first_vertex_or_first_index: index_buffer_slice.range.start,
base_vertex_or_first_instance: vertex_buffer_slice.range.start,
first_instance: 0,
}
}
RenderMeshBufferInfo::NonIndexed => IndirectParameters {
vertex_or_index_count: mesh.vertex_count,
let indirect_parameters = IndirectParametersMetadata {
mesh_index,
base_output_index,
batch_set_index: match batch_set_index {
Some(batch_set_index) => u32::from(batch_set_index),
None => !0,
},
instance_count: 0,
first_vertex_or_first_index: vertex_buffer_slice.range.start,
base_vertex_or_first_instance: 0,
// Use `0xffffffff` as a placeholder to tell the mesh preprocessing
// shader that this is a non-indexed mesh.
first_instance: !0,
},
};
};
indirect_parameters_buffer.set(indirect_parameters_offset, indirect_parameters);
if indexed {
indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters);
} else {
indirect_parameters_buffer
.set_non_indexed(indirect_parameters_offset, indirect_parameters);
}
}
}
bitflags::bitflags! {
@ -2687,12 +2668,12 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
type Param = (
SRes<RenderAssets<RenderMesh>>,
SRes<RenderMeshInstances>,
SRes<IndirectParametersBuffer>,
SRes<IndirectParametersBuffers>,
SRes<PipelineCache>,
SRes<MeshAllocator>,
Option<SRes<PreprocessPipelines>>,
);
type ViewQuery = Has<PreprocessBindGroup>;
type ViewQuery = Has<PreprocessBindGroups>;
type ItemQuery = ();
#[inline]
fn render<'w>(
@ -2735,26 +2716,6 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
return RenderCommandResult::Skip;
};
// Calculate the indirect offset, and look up the buffer.
let indirect_parameters = match item.extra_index() {
PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => None,
PhaseItemExtraIndex::IndirectParametersIndex(indices) => {
match indirect_parameters_buffer.buffer() {
None => {
warn!(
"Not rendering mesh because indirect parameters buffer wasn't present"
);
return RenderCommandResult::Skip;
}
Some(buffer) => Some((
indices.start as u64 * size_of::<IndirectParameters>() as u64,
indices.end - indices.start,
buffer,
)),
}
}
};
pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..));
let batch_range = item.batch_range();
@ -2774,8 +2735,8 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, *index_format);
match indirect_parameters {
None => {
match item.extra_index() {
PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => {
pass.draw_indexed(
index_buffer_slice.range.start
..(index_buffer_slice.range.start + *count),
@ -2783,33 +2744,112 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
batch_range.clone(),
);
}
Some((
indirect_parameters_offset,
indirect_parameters_count,
indirect_parameters_buffer,
)) => {
pass.multi_draw_indexed_indirect(
indirect_parameters_buffer,
indirect_parameters_offset,
indirect_parameters_count,
);
PhaseItemExtraIndex::IndirectParametersIndex {
range: indirect_parameters_range,
batch_set_index,
} => {
// Look up the indirect parameters buffer, as well as
// the buffer we're going to use for
// `multi_draw_indexed_indirect_count` (if available).
let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
indirect_parameters_buffer.indexed_data_buffer(),
indirect_parameters_buffer.indexed_batch_sets_buffer(),
) else {
warn!(
"Not rendering mesh because indexed indirect parameters buffer \
wasn't present",
);
return RenderCommandResult::Skip;
};
// Calculate the location of the indirect parameters
// within the buffer.
let indirect_parameters_offset = indirect_parameters_range.start as u64
* size_of::<IndirectParametersIndexed>() as u64;
let indirect_parameters_count =
indirect_parameters_range.end - indirect_parameters_range.start;
// If we're using `multi_draw_indirect_count`, take the
// number of batches from the appropriate position in
// the batch sets buffer. Otherwise, supply the size of
// the batch set.
match batch_set_index {
Some(batch_set_index) => {
let count_offset = u32::from(batch_set_index)
* (size_of::<IndirectBatchSet>() as u32);
pass.multi_draw_indexed_indirect_count(
indirect_parameters_buffer,
indirect_parameters_offset,
batch_sets_buffer,
count_offset as u64,
indirect_parameters_count,
);
}
None => {
pass.multi_draw_indexed_indirect(
indirect_parameters_buffer,
indirect_parameters_offset,
indirect_parameters_count,
);
}
}
}
}
}
RenderMeshBufferInfo::NonIndexed => match indirect_parameters {
None => {
RenderMeshBufferInfo::NonIndexed => match item.extra_index() {
PhaseItemExtraIndex::None | PhaseItemExtraIndex::DynamicOffset(_) => {
pass.draw(vertex_buffer_slice.range, batch_range.clone());
}
Some((
indirect_parameters_offset,
indirect_parameters_count,
indirect_parameters_buffer,
)) => {
pass.multi_draw_indirect(
indirect_parameters_buffer,
indirect_parameters_offset,
indirect_parameters_count,
);
PhaseItemExtraIndex::IndirectParametersIndex {
range: indirect_parameters_range,
batch_set_index,
} => {
// Look up the indirect parameters buffer, as well as the
// buffer we're going to use for
// `multi_draw_indirect_count` (if available).
let (Some(indirect_parameters_buffer), Some(batch_sets_buffer)) = (
indirect_parameters_buffer.non_indexed_data_buffer(),
indirect_parameters_buffer.non_indexed_batch_sets_buffer(),
) else {
warn!(
"Not rendering mesh because non-indexed indirect parameters buffer \
wasn't present"
);
return RenderCommandResult::Skip;
};
// Calculate the location of the indirect parameters within
// the buffer.
let indirect_parameters_offset = indirect_parameters_range.start as u64
* size_of::<IndirectParametersNonIndexed>() as u64;
let indirect_parameters_count =
indirect_parameters_range.end - indirect_parameters_range.start;
// If we're using `multi_draw_indirect_count`, take the
// number of batches from the appropriate position in the
// batch sets buffer. Otherwise, supply the size of the
// batch set.
match batch_set_index {
Some(batch_set_index) => {
let count_offset =
u32::from(batch_set_index) * (size_of::<IndirectBatchSet>() as u32);
pass.multi_draw_indirect_count(
indirect_parameters_buffer,
indirect_parameters_offset,
batch_sets_buffer,
count_offset as u64,
indirect_parameters_count,
);
}
None => {
pass.multi_draw_indirect(
indirect_parameters_buffer,
indirect_parameters_offset,
indirect_parameters_count,
);
}
}
}
},
}

View File

@ -8,29 +8,10 @@
// so that TAA works.
#import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT}
#import bevy_pbr::mesh_preprocess_types::IndirectParameters
#import bevy_pbr::mesh_preprocess_types::{MeshInput, IndirectParametersMetadata}
#import bevy_render::maths
#import bevy_render::view::View
// Per-frame data that the CPU supplies to the GPU.
struct MeshInput {
// The model transform.
world_from_local: mat3x4<f32>,
// The lightmap UV rect, packed into 64 bits.
lightmap_uv_rect: vec2<u32>,
// Various flags.
flags: u32,
// The index of this mesh's `MeshInput` in the `previous_input` array, if
// applicable. If not present, this is `u32::MAX`.
previous_input_index: u32,
first_vertex_index: u32,
current_skin_index: u32,
previous_skin_index: u32,
// Low 16 bits: index of the material inside the bind group data.
// High 16 bits: index of the lightmap in the binding array.
material_and_lightmap_bind_group_slot: u32,
}
// Information about each mesh instance needed to cull it on GPU.
//
// At the moment, this just consists of its axis-aligned bounding box (AABB).
@ -68,7 +49,8 @@ struct PreprocessWorkItem {
#ifdef INDIRECT
// The array of indirect parameters for drawcalls.
@group(0) @binding(4) var<storage, read_write> indirect_parameters: array<IndirectParameters>;
@group(0) @binding(4) var<storage, read_write> indirect_parameters_metadata:
array<IndirectParametersMetadata>;
#endif
#ifdef FRUSTUM_CULLING
@ -167,28 +149,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
}
// Figure out the output index. In indirect mode, this involves bumping the
// instance index in the indirect parameters structure. Otherwise, this
// index was directly supplied to us.
// instance index in the indirect parameters metadata, which
// `build_indirect_params.wgsl` will use to generate the actual indirect
// parameters. Otherwise, this index was directly supplied to us.
#ifdef INDIRECT
let batch_output_index =
atomicAdd(&indirect_parameters[indirect_parameters_index].instance_count, 1u);
let mesh_output_index = output_index + batch_output_index;
// If this is the first mesh in the batch, write the first instance index
// into the indirect parameters.
//
// We could have done this on CPU, but when we start retaining indirect
// parameters that will no longer be desirable, as the index of the first
// instance will change from frame to frame and we won't want the CPU to
// have to keep updating it.
if (batch_output_index == 0u) {
if (indirect_parameters[indirect_parameters_index].first_instance == 0xffffffffu) {
indirect_parameters[indirect_parameters_index].base_vertex_or_first_instance =
mesh_output_index;
} else {
indirect_parameters[indirect_parameters_index].first_instance = mesh_output_index;
}
}
atomicAdd(&indirect_parameters_metadata[indirect_parameters_index].instance_count, 1u);
let mesh_output_index =
indirect_parameters_metadata[indirect_parameters_index].base_output_index +
batch_output_index;
#else // INDIRECT
let mesh_output_index = output_index;
#endif // INDIRECT

View File

@ -2,18 +2,97 @@
#define_import_path bevy_pbr::mesh_preprocess_types
// The `wgpu` indirect parameters structure. This is a union of two structures.
// For more information, see the corresponding comment in
// `gpu_preprocessing.rs`.
struct IndirectParameters {
// `vertex_count` or `index_count`.
vertex_count_or_index_count: u32,
// `instance_count` in both structures.
instance_count: atomic<u32>,
// `first_vertex` or `first_index`.
first_vertex_or_first_index: u32,
// `base_vertex` or `first_instance`.
base_vertex_or_first_instance: u32,
// A read-only copy of `instance_index`.
// Per-frame data that the CPU supplies to the GPU.
struct MeshInput {
// The model transform.
world_from_local: mat3x4<f32>,
// The lightmap UV rect, packed into 64 bits.
lightmap_uv_rect: vec2<u32>,
// A set of bitflags corresponding to `MeshFlags` on the Rust side. See the
// `MESH_FLAGS_` flags in `mesh_types.wgsl` for a list of these.
flags: u32,
// The index of this mesh's `MeshInput` in the `previous_input` array, if
// applicable. If not present, this is `u32::MAX`.
previous_input_index: u32,
// The index of the first vertex in the vertex slab.
first_vertex_index: u32,
// The index of the first vertex index in the index slab.
//
// If this mesh isn't indexed, this value is ignored.
first_index_index: u32,
// For indexed meshes, the number of indices that this mesh has; for
// non-indexed meshes, the number of vertices that this mesh consists of.
index_count: u32,
current_skin_index: u32,
previous_skin_index: u32,
// Low 16 bits: index of the material inside the bind group data.
// High 16 bits: index of the lightmap in the binding array.
material_and_lightmap_bind_group_slot: u32,
}
// The `wgpu` indirect parameters structure for indexed meshes.
//
// The `build_indirect_params.wgsl` shader generates these.
struct IndirectParametersIndexed {
// The number of indices that this mesh has.
index_count: u32,
// The number of instances we are to draw.
instance_count: u32,
// The offset of the first index for this mesh in the index buffer slab.
first_index: u32,
// The offset of the first vertex for this mesh in the vertex buffer slab.
base_vertex: u32,
// The index of the first mesh instance in the `Mesh` buffer.
first_instance: u32,
}
// The `wgpu` indirect parameters structure for non-indexed meshes.
//
// The `build_indirect_params.wgsl` shader generates these.
struct IndirectParametersNonIndexed {
// The number of vertices that this mesh has.
vertex_count: u32,
// The number of instances we are to draw.
instance_count: u32,
// The offset of the first vertex for this mesh in the vertex buffer slab.
base_vertex: u32,
// The index of the first mesh instance in the `Mesh` buffer.
first_instance: u32,
}
// Information needed to generate the `IndirectParametersIndexed` and
// `IndirectParametersNonIndexed` draw commands.
struct IndirectParametersMetadata {
// The index of the mesh in the `MeshInput` buffer.
mesh_index: u32,
// The index of the first instance corresponding to this batch in the `Mesh`
// buffer.
base_output_index: u32,
// The index of the batch set in the `IndirectBatchSet` buffer.
batch_set_index: u32,
// The number of instances that are to be drawn.
//
// The `mesh_preprocess.wgsl` shader determines this, and the
// `build_indirect_params.wgsl` shader copies this value into the indirect
// draw command.
instance_count: atomic<u32>,
}
// Information about each batch set.
//
// A *batch set* is a set of meshes that might be multi-drawn together.
//
// The CPU creates this structure, and the `build_indirect_params.wgsl` shader
// modifies it. If `multi_draw_indirect_count` is in use, the GPU reads this
// value when multi-drawing a batch set in order to determine how many commands
// make up the batch set.
struct IndirectBatchSet {
// The number of commands that make up this batch set.
//
// The CPU initializes this value to zero. The `build_indirect_params.wgsl`
// shader increments this value as it processes batches.
indirect_parameters_count: atomic<u32>,
// The offset of the first batch corresponding to this batch set within the
// `IndirectParametersIndexed` or `IndirectParametersNonIndexed` arrays.
indirect_parameters_base: u32,
}

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@ use bevy_ecs::{
use bytemuck::Pod;
use nonmax::NonMaxU32;
use self::gpu_preprocessing::IndirectParametersBuffer;
use self::gpu_preprocessing::IndirectParametersBuffers;
use crate::{render_phase::PhaseItemExtraIndex, sync_world::MainEntity};
use crate::{
render_phase::{
@ -58,7 +58,9 @@ impl<T: PartialEq> BatchMeta<T> {
PhaseItemExtraIndex::DynamicOffset(dynamic_offset) => {
NonMaxU32::new(dynamic_offset)
}
PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex(_) => None,
PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex { .. } => {
None
}
},
user_data,
}
@ -141,17 +143,36 @@ pub trait GetFullBatchData: GetBatchData {
query_item: MainEntity,
) -> Option<NonMaxU32>;
/// Writes the [`gpu_preprocessing::IndirectParameters`] necessary to draw
/// this batch into the given [`IndirectParametersBuffer`] at the given
/// index.
/// Writes the [`gpu_preprocessing::IndirectParametersMetadata`] necessary
/// to draw this batch into the given metadata buffer at the given index.
///
/// This is only used if GPU culling is enabled (which requires GPU
/// preprocessing).
fn write_batch_indirect_parameters(
param: &SystemParamItem<Self::Param>,
indirect_parameters_buffer: &mut IndirectParametersBuffer,
///
/// * `mesh_index` describes the index of the first mesh instance in this
/// batch in the `MeshInputUniform` buffer.
///
/// * `indexed` is true if the mesh is indexed or false if it's non-indexed.
///
/// * `base_output_index` is the index of the first mesh instance in this
/// batch in the `MeshUniform` output buffer.
///
/// * `batch_set_index` is the index of the batch set in the
/// [`gpu_preprocessing::IndirectBatchSet`] buffer, if this batch belongs to
/// a batch set.
///
/// * `indirect_parameters_buffers` is the buffer in which to write the
/// metadata.
///
/// * `indirect_parameters_offset` is the index in that buffer at which to
/// write the metadata.
fn write_batch_indirect_parameters_metadata(
mesh_index: u32,
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,
indirect_parameters_buffers: &mut IndirectParametersBuffers,
indirect_parameters_offset: u32,
entity: MainEntity,
);
}

View File

@ -1153,7 +1153,7 @@ pub fn extract_cameras(
hdr: camera.hdr,
},
ExtractedView {
retained_view_entity: RetainedViewEntity::new(main_entity.into(), 0),
retained_view_entity: RetainedViewEntity::new(main_entity.into(), None, 0),
clip_from_view: camera.clip_from_view(),
world_from_view: *transform,
clip_from_world: None,

View File

@ -2,11 +2,15 @@ use crate::{
mesh::Mesh,
view::{self, Visibility, VisibilityClass},
};
use bevy_asset::{AssetId, Handle};
use bevy_asset::{AssetEvent, AssetId, Handle};
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{component::Component, prelude::require, reflect::ReflectComponent};
use bevy_ecs::{
change_detection::DetectChangesMut, component::Component, event::EventReader, prelude::require,
reflect::ReflectComponent, system::Query,
};
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use bevy_transform::components::Transform;
use bevy_utils::{FixedHasher, HashSet};
use derive_more::derive::From;
/// A component for 2D meshes. Requires a [`MeshMaterial2d`] to be rendered, commonly using a [`ColorMaterial`].
@ -101,3 +105,32 @@ impl From<&Mesh3d> for AssetId<Mesh> {
mesh.id()
}
}
/// A system that marks a [`Mesh3d`] as changed if the associated [`Mesh`] asset
/// has changed.
///
/// This is needed because the systems that extract meshes, such as
/// `extract_meshes_for_gpu_building`, write some metadata about the mesh (like
/// the location within each slab) into the GPU structures that they build that
/// needs to be kept up to date if the contents of the mesh change.
pub fn mark_3d_meshes_as_changed_if_their_assets_changed(
mut meshes_3d: Query<&mut Mesh3d>,
mut mesh_asset_events: EventReader<AssetEvent<Mesh>>,
) {
let mut changed_meshes: HashSet<AssetId<Mesh>, FixedHasher> = HashSet::default();
for mesh_asset_event in mesh_asset_events.read() {
if let AssetEvent::Modified { id } = mesh_asset_event {
changed_meshes.insert(*id);
}
}
if changed_meshes.is_empty() {
return;
}
for mut mesh_3d in &mut meshes_3d {
if changed_meshes.contains(&mesh_3d.0.id()) {
mesh_3d.set_changed();
}
}
}

View File

@ -9,6 +9,7 @@ use crate::{
render_asset::{PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets},
render_resource::TextureView,
texture::GpuImage,
view::VisibilitySystems,
RenderApp,
};
use allocator::MeshAllocatorPlugin;
@ -17,6 +18,7 @@ use bevy_asset::{AssetApp, AssetId, RenderAssetUsages};
use bevy_ecs::{
entity::Entity,
query::{Changed, With},
schedule::IntoSystemConfigs,
system::Query,
};
use bevy_ecs::{
@ -42,7 +44,12 @@ impl Plugin for MeshPlugin {
.register_type::<Vec<Entity>>()
// 'Mesh' must be prepared after 'Image' as meshes rely on the morph target image being ready
.add_plugins(RenderAssetPlugin::<RenderMesh, GpuImage>::default())
.add_plugins(MeshAllocatorPlugin);
.add_plugins(MeshAllocatorPlugin)
.add_systems(
PostUpdate,
components::mark_3d_meshes_as_changed_if_their_assets_changed
.ambiguous_with(VisibilitySystems::CalculateBounds),
);
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
@ -130,6 +137,12 @@ impl RenderMesh {
pub fn primitive_topology(&self) -> PrimitiveTopology {
self.key_bits.primitive_topology()
}
/// Returns true if this mesh uses an index buffer or false otherwise.
#[inline]
pub fn indexed(&self) -> bool {
matches!(self.buffer_info, RenderMeshBufferInfo::Indexed { .. })
}
}
/// The index/vertex buffer info of a [`RenderMesh`].

View File

@ -36,8 +36,10 @@ pub use draw_state::*;
use encase::{internal::WriteInto, ShaderSize};
use nonmax::NonMaxU32;
pub use rangefinder::*;
use wgpu::Features;
use crate::batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport};
use crate::renderer::RenderDevice;
use crate::sync_world::MainEntity;
use crate::view::RetainedViewEntity;
use crate::{
@ -189,6 +191,7 @@ pub enum BinnedRenderPhaseBatchSets<BK> {
pub struct BinnedRenderPhaseBatchSet<BK> {
pub(crate) batches: Vec<BinnedRenderPhaseBatch>,
pub(crate) bin_key: BK,
pub(crate) index: u32,
}
impl<BK> BinnedRenderPhaseBatchSets<BK> {
@ -456,6 +459,11 @@ where
let draw_functions = world.resource::<DrawFunctions<BPI>>();
let mut draw_functions = draw_functions.write();
let render_device = world.resource::<RenderDevice>();
let multi_draw_indirect_count_supported = render_device
.features()
.contains(Features::MULTI_DRAW_INDIRECT_COUNT);
match self.batch_sets {
BinnedRenderPhaseBatchSets::DynamicUniforms(ref batch_sets) => {
debug_assert_eq!(self.batchable_mesh_keys.len(), batch_sets.len());
@ -522,6 +530,12 @@ where
continue;
};
let batch_set_index = if multi_draw_indirect_count_supported {
NonMaxU32::new(batch_set.index)
} else {
None
};
let binned_phase_item = BPI::new(
batch_set_key.clone(),
batch_set.bin_key.clone(),
@ -532,10 +546,12 @@ where
PhaseItemExtraIndex::DynamicOffset(ref dynamic_offset) => {
PhaseItemExtraIndex::DynamicOffset(*dynamic_offset)
}
PhaseItemExtraIndex::IndirectParametersIndex(ref range) => {
PhaseItemExtraIndex::IndirectParametersIndex(
range.start..(range.start + batch_set.batches.len() as u32),
)
PhaseItemExtraIndex::IndirectParametersIndex { ref range, .. } => {
PhaseItemExtraIndex::IndirectParametersIndex {
range: range.start
..(range.start + batch_set.batches.len() as u32),
batch_set_index,
}
}
},
);
@ -585,10 +601,11 @@ where
let first_indirect_parameters_index_for_entity =
u32::from(*first_indirect_parameters_index)
+ entity_index as u32;
PhaseItemExtraIndex::IndirectParametersIndex(
first_indirect_parameters_index_for_entity
PhaseItemExtraIndex::IndirectParametersIndex {
range: first_indirect_parameters_index_for_entity
..(first_indirect_parameters_index_for_entity + 1),
)
batch_set_index: None,
}
}
},
},
@ -725,10 +742,11 @@ impl UnbatchableBinnedEntityIndexSet {
u32::from(*first_indirect_parameters_index) + entity_index;
Some(UnbatchableBinnedEntityIndices {
instance_index: instance_range.start + entity_index,
extra_index: PhaseItemExtraIndex::IndirectParametersIndex(
first_indirect_parameters_index_for_this_batch
extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
range: first_indirect_parameters_index_for_this_batch
..(first_indirect_parameters_index_for_this_batch + 1),
),
batch_set_index: None,
},
})
}
UnbatchableBinnedEntityIndexSet::Dense(ref indices) => {
@ -890,12 +908,17 @@ impl UnbatchableBinnedEntityIndexSet {
first_indirect_parameters_index: None,
}
}
PhaseItemExtraIndex::IndirectParametersIndex(ref range) => {
PhaseItemExtraIndex::IndirectParametersIndex {
range: ref indirect_parameters_index,
..
} => {
// This is the first entity we've seen, and we have compute
// shaders. Initialize the fast path.
*self = UnbatchableBinnedEntityIndexSet::Sparse {
instance_range: indices.instance_index..indices.instance_index + 1,
first_indirect_parameters_index: NonMaxU32::new(range.start),
first_indirect_parameters_index: NonMaxU32::new(
indirect_parameters_index.start,
),
}
}
}
@ -909,7 +932,10 @@ impl UnbatchableBinnedEntityIndexSet {
&& indices.extra_index == PhaseItemExtraIndex::None)
|| first_indirect_parameters_index.is_some_and(
|first_indirect_parameters_index| match indices.extra_index {
PhaseItemExtraIndex::IndirectParametersIndex(ref this_range) => {
PhaseItemExtraIndex::IndirectParametersIndex {
range: ref this_range,
..
} => {
u32::from(first_indirect_parameters_index) + instance_range.end
- instance_range.start
== this_range.start
@ -1129,7 +1155,22 @@ pub enum PhaseItemExtraIndex {
/// An index into the buffer that specifies the indirect parameters for this
/// [`PhaseItem`]'s drawcall. This is used when indirect mode is on (as used
/// for GPU culling).
IndirectParametersIndex(Range<u32>),
IndirectParametersIndex {
/// The range of indirect parameters within the indirect parameters array.
///
/// If we're using `multi_draw_indirect_count`, this specifies the
/// maximum range of indirect parameters within that array. If batches
/// are ultimately culled out on the GPU, the actual number of draw
/// commands might be lower than the length of this range.
range: Range<u32>,
/// If `multi_draw_indirect_count` is in use, and this phase item is
/// part of a batch set, specifies the index of the batch set that this
/// phase item is a part of.
///
/// If `multi_draw_indirect_count` isn't in use, or this phase item
/// isn't part of a batch set, this is `None`.
batch_set_index: Option<NonMaxU32>,
},
}
impl PhaseItemExtraIndex {
@ -1139,9 +1180,11 @@ impl PhaseItemExtraIndex {
indirect_parameters_index: Option<NonMaxU32>,
) -> PhaseItemExtraIndex {
match indirect_parameters_index {
Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex(
u32::from(indirect_parameters_index)..(u32::from(indirect_parameters_index) + 1),
),
Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
range: u32::from(indirect_parameters_index)
..(u32::from(indirect_parameters_index) + 1),
batch_set_index: None,
},
None => PhaseItemExtraIndex::None,
}
}
@ -1172,7 +1215,11 @@ pub trait BinnedPhaseItem: PhaseItem {
/// reduces the need for rebinding between bins and improves performance.
type BinKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash;
type BatchSetKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash;
/// The key used to combine batches into batch sets.
///
/// A *batch set* is a set of meshes that can potentially be multi-drawn
/// together.
type BatchSetKey: PhaseItemBatchSetKey;
/// Creates a new binned phase item from the key and per-entity data.
///
@ -1188,6 +1235,19 @@ pub trait BinnedPhaseItem: PhaseItem {
) -> Self;
}
/// A key used to combine batches into batch sets.
///
/// A *batch set* is a set of meshes that can potentially be multi-drawn
/// together.
pub trait PhaseItemBatchSetKey: Clone + Send + Sync + PartialEq + Eq + Ord + Hash {
/// Returns true if this batch set key describes indexed meshes or false if
/// it describes non-indexed meshes.
///
/// Bevy uses this in order to determine which kind of indirect draw
/// parameters to use, if indirect drawing is enabled.
fn indexed(&self) -> bool;
}
/// Represents phase items that must be sorted. The `SortKey` specifies the
/// order that these items are drawn in. These are placed into a single array,
/// and the array as a whole is then sorted.
@ -1219,6 +1279,17 @@ pub trait SortedPhaseItem: PhaseItem {
fn sort(items: &mut [Self]) {
items.sort_unstable_by_key(Self::sort_key);
}
/// Whether this phase item targets indexed meshes (those with both vertex
/// and index buffers as opposed to just vertex buffers).
///
/// Bevy needs this information in order to properly group phase items
/// together for multi-draw indirect, because the GPU layout of indirect
/// commands differs between indexed and non-indexed meshes.
///
/// If you're implementing a custom phase item that doesn't describe a mesh,
/// you can safely return false here.
fn indexed(&self) -> bool;
}
/// A [`PhaseItem`] item, that automatically sets the appropriate render pipeline,

View File

@ -191,14 +191,25 @@ impl Msaa {
/// stable, and we can't use just [`MainEntity`] because some main world views
/// extract to multiple render world views. For example, a directional light
/// extracts to one render world view per cascade, and a point light extracts to
/// one render world view per cubemap face. So we pair the main entity with a
/// *subview index*, which *together* uniquely identify a view in the render
/// world in a way that's stable from frame to frame.
/// one render world view per cubemap face. So we pair the main entity with an
/// *auxiliary entity* and a *subview index*, which *together* uniquely identify
/// a view in the render world in a way that's stable from frame to frame.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct RetainedViewEntity {
/// The main entity that this view corresponds to.
pub main_entity: MainEntity,
/// Another entity associated with the view entity.
///
/// This is currently used for shadow cascades. If there are multiple
/// cameras, each camera needs to have its own set of shadow cascades. Thus
/// the light and subview index aren't themselves enough to uniquely
/// identify a shadow cascade: we need the camera that the cascade is
/// associated with as well. This entity stores that camera.
///
/// If not present, this will be `MainEntity(Entity::PLACEHOLDER)`.
pub auxiliary_entity: MainEntity,
/// The index of the view corresponding to the entity.
///
/// For example, for point lights that cast shadows, this is the index of
@ -208,14 +219,19 @@ pub struct RetainedViewEntity {
}
impl RetainedViewEntity {
/// Creates a new [`RetainedViewEntity`] from the given main world entity
/// and subview index.
/// Creates a new [`RetainedViewEntity`] from the given main world entity,
/// auxiliary main world entity, and subview index.
///
/// See [`RetainedViewEntity::subview_index`] for an explanation of what
/// `subview_index` is.
pub fn new(main_entity: MainEntity, subview_index: u32) -> Self {
/// `auxiliary_entity` and `subview_index` are.
pub fn new(
main_entity: MainEntity,
auxiliary_entity: Option<MainEntity>,
subview_index: u32,
) -> Self {
Self {
main_entity,
auxiliary_entity: auxiliary_entity.unwrap_or(Entity::PLACEHOLDER.into()),
subview_index,
}
}

View File

@ -5,7 +5,9 @@ use crate::{
use bevy_app::{App, Plugin};
use bevy_asset::{Asset, AssetApp, AssetId, AssetServer, Handle};
use bevy_core_pipeline::{
core_2d::{AlphaMask2d, AlphaMask2dBinKey, Opaque2d, Opaque2dBinKey, Transparent2d},
core_2d::{
AlphaMask2d, AlphaMask2dBinKey, BatchSetKey2d, Opaque2d, Opaque2dBinKey, Transparent2d,
},
tonemapping::{DebandDither, Tonemapping},
};
use bevy_derive::{Deref, DerefMut};
@ -584,7 +586,9 @@ pub fn queue_material2d_meshes<M: Material2d>(
material_bind_group_id: material_2d.get_bind_group_id().0,
};
opaque_phase.add(
(),
BatchSetKey2d {
indexed: mesh.indexed(),
},
bin_key,
(*render_entity, *visible_entity),
binned_render_phase_type,
@ -598,7 +602,9 @@ pub fn queue_material2d_meshes<M: Material2d>(
material_bind_group_id: material_2d.get_bind_group_id().0,
};
alpha_mask_phase.add(
(),
BatchSetKey2d {
indexed: mesh.indexed(),
},
bin_key,
(*render_entity, *visible_entity),
binned_render_phase_type,
@ -617,6 +623,7 @@ pub fn queue_material2d_meshes<M: Material2d>(
// Batching is done in batch_and_prepare_render_phase
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: mesh.indexed(),
});
}
}

View File

@ -18,7 +18,7 @@ use bevy_image::{BevyDefault, Image, ImageSampler, TextureFormatPixelInfo};
use bevy_math::{Affine3, Vec4};
use bevy_render::{
batching::{
gpu_preprocessing::IndirectParameters,
gpu_preprocessing::IndirectParametersMetadata,
no_gpu_preprocessing::{
self, batch_and_prepare_binned_render_phase, batch_and_prepare_sorted_render_phase,
write_batched_instance_buffer, BatchedInstanceBuffer,
@ -403,56 +403,33 @@ impl GetFullBatchData for Mesh2dPipeline {
None
}
fn write_batch_indirect_parameters(
(mesh_instances, meshes, mesh_allocator): &SystemParamItem<Self::Param>,
indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffer,
fn write_batch_indirect_parameters_metadata(
input_index: u32,
indexed: bool,
base_output_index: u32,
batch_set_index: Option<NonMaxU32>,
indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffers,
indirect_parameters_offset: u32,
main_entity: MainEntity,
) {
let Some(mesh_instance) = mesh_instances.get(&main_entity) else {
return;
};
let Some(mesh) = meshes.get(mesh_instance.mesh_asset_id) else {
return;
};
let Some(vertex_buffer_slice) =
mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id)
else {
return;
};
// Note that `IndirectParameters` covers both of these structures, even
// though they actually have distinct layouts. See the comment above that
// type for more information.
let indirect_parameters = match mesh.buffer_info {
RenderMeshBufferInfo::Indexed {
count: index_count, ..
} => {
let Some(index_buffer_slice) =
mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id)
else {
return;
};
IndirectParameters {
vertex_or_index_count: index_count,
instance_count: 0,
first_vertex_or_first_index: index_buffer_slice.range.start,
base_vertex_or_first_instance: vertex_buffer_slice.range.start,
first_instance: 0,
}
}
RenderMeshBufferInfo::NonIndexed => IndirectParameters {
vertex_or_index_count: mesh.vertex_count,
instance_count: 0,
first_vertex_or_first_index: vertex_buffer_slice.range.start,
base_vertex_or_first_instance: 0,
// Use `0xffffffff` as a placeholder to tell the mesh
// preprocessing shader that this is a non-indexed mesh.
first_instance: !0,
let indirect_parameters = IndirectParametersMetadata {
mesh_index: input_index,
base_output_index,
batch_set_index: match batch_set_index {
None => !0,
Some(batch_set_index) => u32::from(batch_set_index),
},
instance_count: 0,
};
indirect_parameters_buffer.set(indirect_parameters_offset, indirect_parameters);
if indexed {
indirect_parameters_buffer.set_indexed(indirect_parameters_offset, indirect_parameters);
} else {
indirect_parameters_buffer
.set_non_indexed(indirect_parameters_offset, indirect_parameters);
}
}
}

View File

@ -574,6 +574,7 @@ pub fn queue_sprites(
// batch_range and dynamic_offset will be calculated in prepare_sprites
batch_range: 0..0,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
}

View File

@ -387,6 +387,7 @@ pub fn queue_shadows(
),
batch_range: 0..0,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
}

View File

@ -626,7 +626,7 @@ pub fn extract_ui_camera_view(
// We use `UI_CAMERA_SUBVIEW` here so as not to conflict with the
// main 3D or 2D camera, which will have subview index 0.
let retained_view_entity =
RetainedViewEntity::new(main_entity.into(), UI_CAMERA_SUBVIEW);
RetainedViewEntity::new(main_entity.into(), None, UI_CAMERA_SUBVIEW);
// Creates the UI view.
let ui_camera_view = commands
.spawn((
@ -894,6 +894,7 @@ pub fn queue_uinodes(
// batch_range will be calculated in prepare_uinodes
batch_range: 0..0,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
}

View File

@ -112,6 +112,7 @@ pub struct TransparentUi {
pub draw_function: DrawFunctionId,
pub batch_range: Range<u32>,
pub extra_index: PhaseItemExtraIndex,
pub indexed: bool,
}
impl PhaseItem for TransparentUi {
@ -162,6 +163,11 @@ impl SortedPhaseItem for TransparentUi {
fn sort(items: &mut [Self]) {
items.sort_by_key(SortedPhaseItem::sort_key);
}
#[inline]
fn indexed(&self) -> bool {
self.indexed
}
}
impl CachedRenderPipelinePhaseItem for TransparentUi {

View File

@ -662,6 +662,7 @@ pub fn queue_ui_material_nodes<M: UiMaterial>(
),
batch_range: 0..0,
extra_index: PhaseItemExtraIndex::None,
indexed: false,
});
}
}

View File

@ -385,6 +385,7 @@ pub fn queue_ui_slices(
),
batch_range: 0..0,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
}

View File

@ -392,10 +392,10 @@ pub fn queue_colored_mesh2d(
let mesh2d_transforms = &mesh_instance.transforms;
// Get our specialized pipeline
let mut mesh2d_key = mesh_key;
if let Some(mesh) = render_meshes.get(mesh2d_handle) {
mesh2d_key |=
Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology());
}
let Some(mesh) = render_meshes.get(mesh2d_handle) else {
continue;
};
mesh2d_key |= Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology());
let pipeline_id =
pipelines.specialize(&pipeline_cache, &colored_mesh2d_pipeline, mesh2d_key);
@ -411,6 +411,7 @@ pub fn queue_colored_mesh2d(
// This material is not batched
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: mesh.indexed(),
});
}
}

View File

@ -166,6 +166,7 @@ fn queue_custom(
distance: rangefinder.distance_translation(&mesh_instance.translation),
batch_range: 0..1,
extra_index: PhaseItemExtraIndex::None,
indexed: true,
});
}
}

View File

@ -6,8 +6,11 @@
//!
//! [`SpecializedMeshPipeline`] let's you customize the entire pipeline used when rendering a mesh.
use std::any::TypeId;
use bevy::{
core_pipeline::core_3d::{Opaque3d, Opaque3dBatchSetKey, Opaque3dBinKey, CORE_3D_DEPTH_FORMAT},
ecs::system::StaticSystemParam,
math::{vec3, vec4},
pbr::{
DrawMesh, MeshPipeline, MeshPipelineKey, MeshPipelineViewLayoutKey, RenderMeshInstances,
@ -15,6 +18,14 @@ use bevy::{
},
prelude::*,
render::{
batching::GetFullBatchData,
batching::{
gpu_preprocessing::{
BatchedInstanceBuffers, IndirectParametersBuffers, PreprocessWorkItem,
PreprocessWorkItemBuffers,
},
GetBatchData,
},
extract_component::{ExtractComponent, ExtractComponentPlugin},
mesh::{Indices, MeshVertexBufferLayoutRef, PrimitiveTopology, RenderMesh},
render_asset::{RenderAssetUsages, RenderAssets},
@ -28,9 +39,11 @@ use bevy::{
RenderPipelineDescriptor, SpecializedMeshPipeline, SpecializedMeshPipelineError,
SpecializedMeshPipelines, TextureFormat, VertexState,
},
view::NoIndirectDrawing,
view::{self, ExtractedView, RenderVisibleEntities, ViewTarget, VisibilityClass},
Render, RenderApp, RenderSet,
},
utils::TypeIdMap,
};
const SHADER_ASSET_PATH: &str = "shaders/specialized_mesh_pipeline.wgsl";
@ -264,13 +277,39 @@ impl SpecializedMeshPipeline for CustomMeshPipeline {
fn queue_custom_mesh_pipeline(
pipeline_cache: Res<PipelineCache>,
custom_mesh_pipeline: Res<CustomMeshPipeline>,
mut opaque_render_phases: ResMut<ViewBinnedRenderPhases<Opaque3d>>,
opaque_draw_functions: Res<DrawFunctions<Opaque3d>>,
(mut opaque_render_phases, opaque_draw_functions): (
ResMut<ViewBinnedRenderPhases<Opaque3d>>,
Res<DrawFunctions<Opaque3d>>,
),
mut specialized_mesh_pipelines: ResMut<SpecializedMeshPipelines<CustomMeshPipeline>>,
views: Query<(&RenderVisibleEntities, &ExtractedView, &Msaa), With<ExtractedView>>,
render_meshes: Res<RenderAssets<RenderMesh>>,
render_mesh_instances: Res<RenderMeshInstances>,
views: Query<(
Entity,
&RenderVisibleEntities,
&ExtractedView,
&Msaa,
Has<NoIndirectDrawing>,
)>,
(render_meshes, render_mesh_instances): (
Res<RenderAssets<RenderMesh>>,
Res<RenderMeshInstances>,
),
param: StaticSystemParam<<MeshPipeline as GetBatchData>::Param>,
gpu_array_buffer: ResMut<
BatchedInstanceBuffers<
<MeshPipeline as GetBatchData>::BufferData,
<MeshPipeline as GetFullBatchData>::BufferInputData,
>,
>,
mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
) {
let system_param_item = param.into_inner();
let BatchedInstanceBuffers {
ref mut data_buffer,
ref mut work_item_buffers,
..
} = gpu_array_buffer.into_inner();
// Get the id for our custom draw function
let draw_function_id = opaque_draw_functions
.read()
@ -279,15 +318,29 @@ fn queue_custom_mesh_pipeline(
// Render phases are per-view, so we need to iterate over all views so that
// the entity appears in them. (In this example, we have only one view, but
// it's good practice to loop over all views anyway.)
for (view_visible_entities, view, msaa) in views.iter() {
for (view_entity, view_visible_entities, view, msaa, no_indirect_drawing) in views.iter() {
let Some(opaque_phase) = opaque_render_phases.get_mut(&view.retained_view_entity) else {
continue;
};
// Create a *work item buffer* if necessary. Work item buffers store the
// indices of meshes that are to be rendered when indirect drawing is
// enabled.
let work_item_buffer = work_item_buffers
.entry(view_entity)
.or_insert_with(TypeIdMap::default)
.entry(TypeId::of::<Opaque3d>())
.or_insert_with(|| PreprocessWorkItemBuffers::new(no_indirect_drawing));
// Create the key based on the view. In this case we only care about MSAA and HDR
let view_key = MeshPipelineKey::from_msaa_samples(msaa.samples())
| MeshPipelineKey::from_hdr(view.hdr);
// Set up a slot to hold information about the batch set we're going to
// create. If there are any of our custom meshes in the scene, we'll
// need this information in order for Bevy to kick off the rendering.
let mut mesh_batch_set_info = None;
// Find all the custom rendered entities that are visible from this
// view.
for &(render_entity, visible_entity) in
@ -310,6 +363,27 @@ fn queue_custom_mesh_pipeline(
let mut mesh_key = view_key;
mesh_key |= MeshPipelineKey::from_primitive_topology(mesh.primitive_topology());
// Initialize the batch set information if this was the first custom
// mesh we saw. We'll need that information later to create the
// batch set.
if mesh_batch_set_info.is_none() {
mesh_batch_set_info = Some(MeshBatchSetInfo {
indirect_parameters_index: indirect_parameters_buffers
.allocate(mesh.indexed(), 1),
is_indexed: mesh.indexed(),
});
}
let mesh_info = mesh_batch_set_info.unwrap();
// Allocate some input and output indices. We'll need these to
// create the *work item* below.
let Some(input_index) =
MeshPipeline::get_binned_index(&system_param_item, visible_entity)
else {
continue;
};
let output_index = data_buffer.add() as u32;
// Finally, we can specialize the pipeline based on the key
let pipeline_id = specialized_mesh_pipelines
.specialize(
@ -343,6 +417,35 @@ fn queue_custom_mesh_pipeline(
// support it you can use `BinnedRenderPhaseType::UnbatchableMesh`
BinnedRenderPhaseType::BatchableMesh,
);
// Create a *work item*. A work item tells the Bevy renderer to
// transform the mesh on GPU.
work_item_buffer.push(
mesh.indexed(),
PreprocessWorkItem {
input_index: input_index.into(),
output_index,
indirect_parameters_index: mesh_info.indirect_parameters_index,
},
);
}
// Now if there were any meshes, we need to add a command to the
// indirect parameters buffer, so that the renderer will end up
// enqueuing a command to draw the mesh.
if let Some(mesh_info) = mesh_batch_set_info {
indirect_parameters_buffers
.add_batch_set(mesh_info.is_indexed, mesh_info.indirect_parameters_index);
}
}
}
// If we end up having any custom meshes to draw, this contains information
// needed to create the batch set.
#[derive(Clone, Copy)]
struct MeshBatchSetInfo {
/// The first index of the mesh batch in the indirect parameters buffer.
indirect_parameters_index: u32,
/// Whether the mesh is indexed (has an index buffer).
is_indexed: bool,
}