
# Objective - Fixes #13728 ## Solution - add a new feature `smaa_luts`. if enables, it also enables `ktx2` and `zstd`. if not, it doesn't load the files but use placeholders instead - adds all the resources needed in the same places that system that uses them are added.
666 lines
25 KiB
Rust
666 lines
25 KiB
Rust
//! Batching functionality when GPU preprocessing is in use.
|
|
|
|
use bevy_app::{App, Plugin};
|
|
use bevy_derive::{Deref, DerefMut};
|
|
use bevy_ecs::{
|
|
entity::Entity,
|
|
query::{Has, With},
|
|
schedule::IntoSystemConfigs as _,
|
|
system::{Query, Res, ResMut, Resource, StaticSystemParam},
|
|
world::{FromWorld, World},
|
|
};
|
|
use bevy_encase_derive::ShaderType;
|
|
use bevy_utils::EntityHashMap;
|
|
use bytemuck::{Pod, Zeroable};
|
|
use nonmax::NonMaxU32;
|
|
use smallvec::smallvec;
|
|
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
|
|
|
|
use crate::{
|
|
render_phase::{
|
|
BinnedPhaseItem, BinnedRenderPhaseBatch, CachedRenderPipelinePhaseItem,
|
|
PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
|
|
ViewBinnedRenderPhases, ViewSortedRenderPhases,
|
|
},
|
|
render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
|
|
renderer::{RenderAdapter, RenderDevice, RenderQueue},
|
|
view::{GpuCulling, ViewTarget},
|
|
Render, RenderApp, RenderSet,
|
|
};
|
|
|
|
use super::{BatchMeta, GetBatchData, GetFullBatchData};
|
|
|
|
pub struct BatchingPlugin;
|
|
|
|
impl Plugin for BatchingPlugin {
|
|
fn build(&self, app: &mut App) {
|
|
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
|
|
return;
|
|
};
|
|
|
|
render_app
|
|
.insert_resource(IndirectParametersBuffer::new())
|
|
.add_systems(
|
|
Render,
|
|
write_indirect_parameters_buffer.in_set(RenderSet::PrepareResourcesFlush),
|
|
);
|
|
}
|
|
|
|
fn finish(&self, app: &mut App) {
|
|
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
|
|
return;
|
|
};
|
|
|
|
render_app.init_resource::<GpuPreprocessingSupport>();
|
|
}
|
|
}
|
|
|
|
/// Records whether GPU preprocessing and/or GPU culling are supported on the
|
|
/// device.
|
|
///
|
|
/// No GPU preprocessing is supported on WebGL because of the lack of compute
|
|
/// shader support. GPU preprocessing is supported on DirectX 12, but due to [a
|
|
/// `wgpu` limitation] GPU culling is not.
|
|
///
|
|
/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471
|
|
#[derive(Clone, Copy, PartialEq, Resource)]
|
|
pub enum GpuPreprocessingSupport {
|
|
/// No GPU preprocessing support is available at all.
|
|
None,
|
|
/// GPU preprocessing is available, but GPU culling isn't.
|
|
PreprocessingOnly,
|
|
/// Both GPU preprocessing and GPU culling are available.
|
|
Culling,
|
|
}
|
|
|
|
/// The GPU buffers holding the data needed to render batches.
|
|
///
|
|
/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the
|
|
/// `BD` type parameter in that mode.
|
|
///
|
|
/// We have a separate *buffer data input* type (`BDI`) here, which a compute
|
|
/// shader is expected to expand to the full buffer data (`BD`) type. GPU
|
|
/// uniform building is generally faster and uses less system RAM to VRAM bus
|
|
/// bandwidth, but only implemented for some pipelines (for example, not in the
|
|
/// 2D pipeline at present) and only when compute shader is available.
|
|
#[derive(Resource)]
|
|
pub struct BatchedInstanceBuffers<BD, BDI>
|
|
where
|
|
BD: GpuArrayBufferable + Sync + Send + 'static,
|
|
BDI: Pod,
|
|
{
|
|
/// A storage area for the buffer data that the GPU compute shader is
|
|
/// expected to write to.
|
|
///
|
|
/// There will be one entry for each index.
|
|
pub data_buffer: UninitBufferVec<BD>,
|
|
|
|
/// The index of the buffer data in the current input buffer that
|
|
/// corresponds to each instance.
|
|
///
|
|
/// This is keyed off each view. Each view has a separate buffer.
|
|
pub work_item_buffers: EntityHashMap<Entity, PreprocessWorkItemBuffer>,
|
|
|
|
/// The uniform data inputs for the current frame.
|
|
///
|
|
/// These are uploaded during the extraction phase.
|
|
pub current_input_buffer: RawBufferVec<BDI>,
|
|
|
|
/// The uniform data inputs for the previous frame.
|
|
///
|
|
/// The indices don't generally line up between `current_input_buffer`
|
|
/// and `previous_input_buffer`, because, among other reasons, entities
|
|
/// can spawn or despawn between frames. Instead, each current buffer
|
|
/// data input uniform is expected to contain the index of the
|
|
/// corresponding buffer data input uniform in this list.
|
|
pub previous_input_buffer: RawBufferVec<BDI>,
|
|
}
|
|
|
|
/// The buffer of GPU preprocessing work items for a single view.
|
|
pub struct PreprocessWorkItemBuffer {
|
|
/// The buffer of work items.
|
|
pub buffer: BufferVec<PreprocessWorkItem>,
|
|
/// True if we're using GPU culling.
|
|
pub gpu_culling: bool,
|
|
}
|
|
|
|
/// One invocation of the preprocessing shader: i.e. one mesh instance in a
|
|
/// view.
|
|
#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
|
|
#[repr(C)]
|
|
pub struct PreprocessWorkItem {
|
|
/// The index of the batch input data in the input buffer that the shader
|
|
/// reads from.
|
|
pub input_index: u32,
|
|
/// In direct mode, this is the index of the `MeshUniform` in the output
|
|
/// buffer that we write to. In indirect mode, this is the index of the
|
|
/// [`IndirectParameters`].
|
|
pub output_index: u32,
|
|
}
|
|
|
|
/// The `wgpu` indirect parameters structure.
|
|
///
|
|
/// This is actually a union of the two following structures:
|
|
///
|
|
/// ```
|
|
/// #[repr(C)]
|
|
/// struct ArrayIndirectParameters {
|
|
/// vertex_count: u32,
|
|
/// instance_count: u32,
|
|
/// first_vertex: u32,
|
|
/// first_instance: u32,
|
|
/// }
|
|
///
|
|
/// #[repr(C)]
|
|
/// struct ElementIndirectParameters {
|
|
/// index_count: u32,
|
|
/// instance_count: u32,
|
|
/// first_vertex: u32,
|
|
/// base_vertex: u32,
|
|
/// first_instance: u32,
|
|
/// }
|
|
/// ```
|
|
///
|
|
/// We actually generally treat these two variants identically in code. To do
|
|
/// that, we make the following two observations:
|
|
///
|
|
/// 1. `instance_count` is in the same place in both structures. So we can
|
|
/// access it regardless of the structure we're looking at.
|
|
///
|
|
/// 2. The second structure is one word larger than the first. Thus we need to
|
|
/// pad out the first structure by one word in order to place both structures in
|
|
/// an array. If we pad out `ArrayIndirectParameters` by copying the
|
|
/// `first_instance` field into the padding, then the resulting union structure
|
|
/// will always have a read-only copy of `first_instance` in the final word. We
|
|
/// take advantage of this in the shader to reduce branching.
|
|
#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
|
|
#[repr(C)]
|
|
pub struct IndirectParameters {
|
|
/// For `ArrayIndirectParameters`, `vertex_count`; for
|
|
/// `ElementIndirectParameters`, `index_count`.
|
|
pub vertex_or_index_count: u32,
|
|
|
|
/// The number of instances we're going to draw.
|
|
///
|
|
/// This field is in the same place in both structures.
|
|
pub instance_count: u32,
|
|
|
|
/// The index of the first vertex we're to draw.
|
|
pub first_vertex: u32,
|
|
|
|
/// For `ArrayIndirectParameters`, `first_instance`; for
|
|
/// `ElementIndirectParameters`, `base_vertex`.
|
|
pub base_vertex_or_first_instance: u32,
|
|
|
|
/// For `ArrayIndirectParameters`, this is padding; for
|
|
/// `ElementIndirectParameters`, this is `first_instance`.
|
|
///
|
|
/// Conventionally, we copy `first_instance` into this field when padding
|
|
/// out `ArrayIndirectParameters`. That way, shader code can read this value
|
|
/// at the same place, regardless of the specific structure this represents.
|
|
pub first_instance: u32,
|
|
}
|
|
|
|
/// The buffer containing the list of [`IndirectParameters`], for draw commands.
|
|
#[derive(Resource, Deref, DerefMut)]
|
|
pub struct IndirectParametersBuffer(pub BufferVec<IndirectParameters>);
|
|
|
|
impl IndirectParametersBuffer {
|
|
/// Creates the indirect parameters buffer.
|
|
pub fn new() -> IndirectParametersBuffer {
|
|
IndirectParametersBuffer(BufferVec::new(
|
|
BufferUsages::STORAGE | BufferUsages::INDIRECT,
|
|
))
|
|
}
|
|
}
|
|
|
|
impl Default for IndirectParametersBuffer {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl FromWorld for GpuPreprocessingSupport {
|
|
fn from_world(world: &mut World) -> Self {
|
|
let adapter = world.resource::<RenderAdapter>();
|
|
let device = world.resource::<RenderDevice>();
|
|
|
|
if device.limits().max_compute_workgroup_size_x == 0 ||
|
|
// filter lower end / older devices on Android as they crash when using GPU preprocessing
|
|
(cfg!(target_os = "android") && adapter.get_info().name.starts_with("Adreno (TM) 6"))
|
|
{
|
|
GpuPreprocessingSupport::None
|
|
} else if !device
|
|
.features()
|
|
.contains(Features::INDIRECT_FIRST_INSTANCE) ||
|
|
!adapter.get_downlevel_capabilities().flags.contains(
|
|
DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW)
|
|
{
|
|
GpuPreprocessingSupport::PreprocessingOnly
|
|
} else {
|
|
GpuPreprocessingSupport::Culling
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
|
|
where
|
|
BD: GpuArrayBufferable + Sync + Send + 'static,
|
|
BDI: Pod,
|
|
{
|
|
/// Creates new buffers.
|
|
pub fn new() -> Self {
|
|
BatchedInstanceBuffers {
|
|
data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
|
|
work_item_buffers: EntityHashMap::default(),
|
|
current_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
|
|
previous_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
|
|
}
|
|
}
|
|
|
|
/// Returns the binding of the buffer that contains the per-instance data.
|
|
///
|
|
/// This buffer needs to be filled in via a compute shader.
|
|
pub fn instance_data_binding(&self) -> Option<BindingResource> {
|
|
self.data_buffer
|
|
.buffer()
|
|
.map(|buffer| buffer.as_entire_binding())
|
|
}
|
|
|
|
/// Clears out the buffers in preparation for a new frame.
|
|
pub fn clear(&mut self) {
|
|
self.data_buffer.clear();
|
|
self.current_input_buffer.clear();
|
|
self.previous_input_buffer.clear();
|
|
for work_item_buffer in self.work_item_buffers.values_mut() {
|
|
work_item_buffer.buffer.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
|
|
where
|
|
BD: GpuArrayBufferable + Sync + Send + 'static,
|
|
BDI: Pod,
|
|
{
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
/// Information about a render batch that we're building up during a sorted
|
|
/// render phase.
|
|
struct SortedRenderBatch<F>
|
|
where
|
|
F: GetBatchData,
|
|
{
|
|
/// The index of the first phase item in this batch in the list of phase
|
|
/// items.
|
|
phase_item_start_index: u32,
|
|
|
|
/// The index of the first instance in this batch in the instance buffer.
|
|
instance_start_index: u32,
|
|
|
|
/// The index of the indirect parameters for this batch in the
|
|
/// [`IndirectParametersBuffer`].
|
|
///
|
|
/// If CPU culling is being used, then this will be `None`.
|
|
indirect_parameters_index: Option<NonMaxU32>,
|
|
|
|
/// Metadata that can be used to determine whether an instance can be placed
|
|
/// into this batch.
|
|
///
|
|
/// If `None`, the item inside is unbatchable.
|
|
meta: Option<BatchMeta<F::CompareData>>,
|
|
}
|
|
|
|
impl<F> SortedRenderBatch<F>
|
|
where
|
|
F: GetBatchData,
|
|
{
|
|
/// Finalizes this batch and updates the [`SortedRenderPhase`] with the
|
|
/// appropriate indices.
|
|
///
|
|
/// `instance_end_index` is the index of the last instance in this batch
|
|
/// plus one.
|
|
fn flush<I>(self, instance_end_index: u32, phase: &mut SortedRenderPhase<I>)
|
|
where
|
|
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
|
|
{
|
|
let (batch_range, batch_extra_index) =
|
|
phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
|
|
*batch_range = self.instance_start_index..instance_end_index;
|
|
*batch_extra_index =
|
|
PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index);
|
|
}
|
|
}
|
|
|
|
/// A system that runs early in extraction and clears out all the
|
|
/// [`BatchedInstanceBuffers`] for the frame.
|
|
///
|
|
/// We have to run this during extraction because, if GPU preprocessing is in
|
|
/// use, the extraction phase will write to the mesh input uniform buffers
|
|
/// directly, so the buffers need to be cleared before then.
|
|
pub fn clear_batched_gpu_instance_buffers<GFBD>(
|
|
gpu_batched_instance_buffers: Option<
|
|
ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
|
|
>,
|
|
) where
|
|
GFBD: GetFullBatchData,
|
|
{
|
|
if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
|
|
gpu_batched_instance_buffers.clear();
|
|
}
|
|
}
|
|
|
|
/// A system that removes GPU preprocessing work item buffers that correspond to
|
|
/// deleted [`ViewTarget`]s.
|
|
///
|
|
/// This is a separate system from [`clear_batched_gpu_instance_buffers`]
|
|
/// because [`ViewTarget`]s aren't created until after the extraction phase is
|
|
/// completed.
|
|
pub fn delete_old_work_item_buffers<GFBD>(
|
|
mut gpu_batched_instance_buffers: ResMut<
|
|
BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
|
|
>,
|
|
view_targets: Query<Entity, With<ViewTarget>>,
|
|
) where
|
|
GFBD: GetFullBatchData,
|
|
{
|
|
gpu_batched_instance_buffers
|
|
.work_item_buffers
|
|
.retain(|entity, _| view_targets.contains(*entity));
|
|
}
|
|
|
|
/// Batch the items in a sorted render phase, when GPU instance buffer building
|
|
/// is in use. This means comparing metadata needed to draw each phase item and
|
|
/// trying to combine the draws into a batch.
|
|
pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
|
|
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
|
|
mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
|
|
mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
|
|
mut views: Query<(Entity, Has<GpuCulling>)>,
|
|
system_param_item: StaticSystemParam<GFBD::Param>,
|
|
) where
|
|
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
|
|
GFBD: GetFullBatchData,
|
|
{
|
|
// We only process GPU-built batch data in this function.
|
|
let BatchedInstanceBuffers {
|
|
ref mut data_buffer,
|
|
ref mut work_item_buffers,
|
|
..
|
|
} = gpu_array_buffer.into_inner();
|
|
|
|
for (view, gpu_culling) in &mut views {
|
|
let Some(phase) = sorted_render_phases.get_mut(&view) else {
|
|
continue;
|
|
};
|
|
|
|
// Create the work item buffer if necessary.
|
|
let work_item_buffer =
|
|
work_item_buffers
|
|
.entry(view)
|
|
.or_insert_with(|| PreprocessWorkItemBuffer {
|
|
buffer: BufferVec::new(BufferUsages::STORAGE),
|
|
gpu_culling,
|
|
});
|
|
|
|
// Walk through the list of phase items, building up batches as we go.
|
|
let mut batch: Option<SortedRenderBatch<GFBD>> = None;
|
|
for current_index in 0..phase.items.len() {
|
|
// Get the index of the input data, and comparison metadata, for
|
|
// this entity.
|
|
let current_batch_input_index = GFBD::get_index_and_compare_data(
|
|
&system_param_item,
|
|
phase.items[current_index].entity(),
|
|
);
|
|
|
|
// Unpack that index and metadata. Note that it's possible for index
|
|
// and/or metadata to not be present, which signifies that this
|
|
// entity is unbatchable. In that case, we break the batch here.
|
|
let (mut current_input_index, mut current_meta) = (None, None);
|
|
if let Some((input_index, maybe_meta)) = current_batch_input_index {
|
|
current_input_index = Some(input_index);
|
|
current_meta =
|
|
maybe_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
|
|
}
|
|
|
|
// Determine if this entity can be included in the batch we're
|
|
// building up.
|
|
let can_batch = batch.as_ref().is_some_and(|batch| {
|
|
// `None` for metadata indicates that the items are unbatchable.
|
|
match (¤t_meta, &batch.meta) {
|
|
(Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
|
|
(_, _) => false,
|
|
}
|
|
});
|
|
|
|
// Make space in the data buffer for this instance.
|
|
let current_entity = phase.items[current_index].entity();
|
|
let output_index = data_buffer.add() as u32;
|
|
|
|
// If we can't batch, break the existing batch and make a new one.
|
|
if !can_batch {
|
|
// Break a batch if we need to.
|
|
if let Some(batch) = batch.take() {
|
|
batch.flush(output_index, phase);
|
|
}
|
|
|
|
// Start a new batch.
|
|
let indirect_parameters_index = if gpu_culling {
|
|
GFBD::get_batch_indirect_parameters_index(
|
|
&system_param_item,
|
|
&mut indirect_parameters_buffer,
|
|
current_entity,
|
|
output_index,
|
|
)
|
|
} else {
|
|
None
|
|
};
|
|
batch = Some(SortedRenderBatch {
|
|
phase_item_start_index: current_index as u32,
|
|
instance_start_index: output_index,
|
|
indirect_parameters_index,
|
|
meta: current_meta,
|
|
});
|
|
}
|
|
|
|
// Add a new preprocessing work item so that the preprocessing
|
|
// shader will copy the per-instance data over.
|
|
if let (Some(batch), Some(input_index)) = (batch.as_ref(), current_input_index.as_ref())
|
|
{
|
|
work_item_buffer.buffer.push(PreprocessWorkItem {
|
|
input_index: (*input_index).into(),
|
|
output_index: match batch.indirect_parameters_index {
|
|
Some(indirect_parameters_index) => indirect_parameters_index.into(),
|
|
None => output_index,
|
|
},
|
|
});
|
|
}
|
|
}
|
|
|
|
// Flush the final batch if necessary.
|
|
if let Some(batch) = batch.take() {
|
|
batch.flush(data_buffer.len() as u32, phase);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Creates batches for a render phase that uses bins.
|
|
pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
|
|
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
|
|
mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
|
|
mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
|
|
mut views: Query<(Entity, Has<GpuCulling>)>,
|
|
param: StaticSystemParam<GFBD::Param>,
|
|
) where
|
|
BPI: BinnedPhaseItem,
|
|
GFBD: GetFullBatchData,
|
|
{
|
|
let system_param_item = param.into_inner();
|
|
|
|
let BatchedInstanceBuffers {
|
|
ref mut data_buffer,
|
|
ref mut work_item_buffers,
|
|
..
|
|
} = gpu_array_buffer.into_inner();
|
|
|
|
for (view, gpu_culling) in &mut views {
|
|
let Some(phase) = binned_render_phases.get_mut(&view) else {
|
|
continue;
|
|
};
|
|
|
|
// Create the work item buffer if necessary; otherwise, just mark it as
|
|
// used this frame.
|
|
let work_item_buffer =
|
|
work_item_buffers
|
|
.entry(view)
|
|
.or_insert_with(|| PreprocessWorkItemBuffer {
|
|
buffer: BufferVec::new(BufferUsages::STORAGE),
|
|
gpu_culling,
|
|
});
|
|
|
|
// Prepare batchables.
|
|
|
|
for key in &phase.batchable_keys {
|
|
let mut batch: Option<BinnedRenderPhaseBatch> = None;
|
|
for &entity in &phase.batchable_values[key] {
|
|
let Some(input_index) = GFBD::get_binned_index(&system_param_item, entity) else {
|
|
continue;
|
|
};
|
|
let output_index = data_buffer.add() as u32;
|
|
|
|
match batch {
|
|
Some(ref mut batch) => {
|
|
batch.instance_range.end = output_index + 1;
|
|
work_item_buffer.buffer.push(PreprocessWorkItem {
|
|
input_index: input_index.into(),
|
|
output_index: batch
|
|
.extra_index
|
|
.as_indirect_parameters_index()
|
|
.unwrap_or(output_index),
|
|
});
|
|
}
|
|
|
|
None if gpu_culling => {
|
|
let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
|
|
&system_param_item,
|
|
&mut indirect_parameters_buffer,
|
|
entity,
|
|
output_index,
|
|
);
|
|
work_item_buffer.buffer.push(PreprocessWorkItem {
|
|
input_index: input_index.into(),
|
|
output_index: indirect_parameters_index.unwrap_or_default().into(),
|
|
});
|
|
batch = Some(BinnedRenderPhaseBatch {
|
|
representative_entity: entity,
|
|
instance_range: output_index..output_index + 1,
|
|
extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(
|
|
indirect_parameters_index,
|
|
),
|
|
});
|
|
}
|
|
|
|
None => {
|
|
work_item_buffer.buffer.push(PreprocessWorkItem {
|
|
input_index: input_index.into(),
|
|
output_index,
|
|
});
|
|
batch = Some(BinnedRenderPhaseBatch {
|
|
representative_entity: entity,
|
|
instance_range: output_index..output_index + 1,
|
|
extra_index: PhaseItemExtraIndex::NONE,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(batch) = batch {
|
|
phase.batch_sets.push(smallvec![batch]);
|
|
}
|
|
}
|
|
|
|
// Prepare unbatchables.
|
|
for key in &phase.unbatchable_keys {
|
|
let unbatchables = phase.unbatchable_values.get_mut(key).unwrap();
|
|
for &entity in &unbatchables.entities {
|
|
let Some(input_index) = GFBD::get_binned_index(&system_param_item, entity) else {
|
|
continue;
|
|
};
|
|
let output_index = data_buffer.add() as u32;
|
|
|
|
if gpu_culling {
|
|
let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
|
|
&system_param_item,
|
|
&mut indirect_parameters_buffer,
|
|
entity,
|
|
output_index,
|
|
)
|
|
.unwrap_or_default();
|
|
work_item_buffer.buffer.push(PreprocessWorkItem {
|
|
input_index: input_index.into(),
|
|
output_index: indirect_parameters_index.into(),
|
|
});
|
|
unbatchables
|
|
.buffer_indices
|
|
.add(UnbatchableBinnedEntityIndices {
|
|
instance_index: indirect_parameters_index.into(),
|
|
extra_index: PhaseItemExtraIndex::indirect_parameters_index(
|
|
indirect_parameters_index.into(),
|
|
),
|
|
});
|
|
} else {
|
|
work_item_buffer.buffer.push(PreprocessWorkItem {
|
|
input_index: input_index.into(),
|
|
output_index,
|
|
});
|
|
unbatchables
|
|
.buffer_indices
|
|
.add(UnbatchableBinnedEntityIndices {
|
|
instance_index: output_index,
|
|
extra_index: PhaseItemExtraIndex::NONE,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A system that writes all instance buffers to the GPU.
|
|
pub fn write_batched_instance_buffers<GFBD>(
|
|
render_device: Res<RenderDevice>,
|
|
render_queue: Res<RenderQueue>,
|
|
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
|
|
) where
|
|
GFBD: GetFullBatchData,
|
|
{
|
|
let BatchedInstanceBuffers {
|
|
ref mut data_buffer,
|
|
work_item_buffers: ref mut index_buffers,
|
|
ref mut current_input_buffer,
|
|
previous_input_buffer: _,
|
|
} = gpu_array_buffer.into_inner();
|
|
|
|
data_buffer.write_buffer(&render_device);
|
|
current_input_buffer.write_buffer(&render_device, &render_queue);
|
|
// There's no need to write `previous_input_buffer`, as we wrote
|
|
// that on the previous frame, and it hasn't changed.
|
|
|
|
for index_buffer in index_buffers.values_mut() {
|
|
index_buffer
|
|
.buffer
|
|
.write_buffer(&render_device, &render_queue);
|
|
}
|
|
}
|
|
|
|
pub fn write_indirect_parameters_buffer(
|
|
render_device: Res<RenderDevice>,
|
|
render_queue: Res<RenderQueue>,
|
|
mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
|
|
) {
|
|
indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
|
|
indirect_parameters_buffer.clear();
|
|
}
|