Add GpuArrayBuffer and BatchedUniformBuffer (#8204)
# Objective - Add a type for uploading a Rust `Vec<T>` to a GPU `array<T>`. - Makes progress towards https://github.com/bevyengine/bevy/issues/89. ## Solution - Port @superdump's `BatchedUniformBuffer` to bevy main, as a fallback for WebGL2, which doesn't support storage buffers. - Rather than getting an `array<T>` in a shader, you get an `array<T, N>`, and have to rebind every N elements via dynamic offsets. - Add `GpuArrayBuffer` to abstract over `StorageBuffer<Vec<T>>`/`BatchedUniformBuffer`. ## Future Work Add a shader macro kinda thing to abstract over the following automatically: https://github.com/bevyengine/bevy/pull/8204#pullrequestreview-1396911727 --- ## Changelog * Added `GpuArrayBuffer`, `GpuComponentArrayBufferPlugin`, `GpuArrayBufferable`, and `GpuArrayBufferIndex` types. * Added `DynamicUniformBuffer::new_with_alignment()`. --------- Co-authored-by: Robert Swain <robert.swain@gmail.com> Co-authored-by: François <mockersf@gmail.com> Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com> Co-authored-by: IceSentry <IceSentry@users.noreply.github.com> Co-authored-by: Vincent <9408210+konsolas@users.noreply.github.com> Co-authored-by: robtfm <50659922+robtfm@users.noreply.github.com>
This commit is contained in:
parent
264195ed77
commit
ad011d0455
55
crates/bevy_render/src/gpu_component_array_buffer.rs
Normal file
55
crates/bevy_render/src/gpu_component_array_buffer.rs
Normal file
@ -0,0 +1,55 @@
|
||||
use crate::{
|
||||
render_resource::{GpuArrayBuffer, GpuArrayBufferable},
|
||||
renderer::{RenderDevice, RenderQueue},
|
||||
Render, RenderApp, RenderSet,
|
||||
};
|
||||
use bevy_app::{App, Plugin};
|
||||
use bevy_ecs::{
|
||||
prelude::{Component, Entity},
|
||||
schedule::IntoSystemConfigs,
|
||||
system::{Commands, Query, Res, ResMut},
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// This plugin prepares the components of the corresponding type for the GPU
|
||||
/// by storing them in a [`GpuArrayBuffer`].
|
||||
pub struct GpuComponentArrayBufferPlugin<C: Component + GpuArrayBufferable>(PhantomData<C>);
|
||||
|
||||
impl<C: Component + GpuArrayBufferable> Plugin for GpuComponentArrayBufferPlugin<C> {
|
||||
fn build(&self, app: &mut App) {
|
||||
if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
|
||||
render_app
|
||||
.insert_resource(GpuArrayBuffer::<C>::new(
|
||||
render_app.world.resource::<RenderDevice>(),
|
||||
))
|
||||
.add_systems(
|
||||
Render,
|
||||
prepare_gpu_component_array_buffers::<C>.in_set(RenderSet::Prepare),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Component + GpuArrayBufferable> Default for GpuComponentArrayBufferPlugin<C> {
|
||||
fn default() -> Self {
|
||||
Self(PhantomData::<C>)
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_gpu_component_array_buffers<C: Component + GpuArrayBufferable>(
|
||||
mut commands: Commands,
|
||||
render_device: Res<RenderDevice>,
|
||||
render_queue: Res<RenderQueue>,
|
||||
mut gpu_array_buffer: ResMut<GpuArrayBuffer<C>>,
|
||||
components: Query<(Entity, &C)>,
|
||||
) {
|
||||
gpu_array_buffer.clear();
|
||||
|
||||
let entities = components
|
||||
.iter()
|
||||
.map(|(entity, component)| (entity, gpu_array_buffer.push(component.clone())))
|
||||
.collect::<Vec<_>>();
|
||||
commands.insert_or_spawn_batch(entities);
|
||||
|
||||
gpu_array_buffer.write_buffer(&render_device, &render_queue);
|
||||
}
|
||||
@ -11,6 +11,7 @@ pub mod extract_component;
|
||||
mod extract_param;
|
||||
pub mod extract_resource;
|
||||
pub mod globals;
|
||||
pub mod gpu_component_array_buffer;
|
||||
pub mod mesh;
|
||||
pub mod pipelined_rendering;
|
||||
pub mod primitives;
|
||||
|
||||
152
crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
Normal file
152
crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
Normal file
@ -0,0 +1,152 @@
|
||||
use super::{GpuArrayBufferIndex, GpuArrayBufferable};
|
||||
use crate::{
|
||||
render_resource::DynamicUniformBuffer,
|
||||
renderer::{RenderDevice, RenderQueue},
|
||||
};
|
||||
use encase::{
|
||||
private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer},
|
||||
ShaderType,
|
||||
};
|
||||
use std::{marker::PhantomData, num::NonZeroU64};
|
||||
use wgpu::{BindingResource, Limits};
|
||||
|
||||
// 1MB else we will make really large arrays on macOS which reports very large
|
||||
// `max_uniform_buffer_binding_size`. On macOS this ends up being the minimum
|
||||
// size of the uniform buffer as well as the size of each chunk of data at a
|
||||
// dynamic offset.
|
||||
#[cfg(any(not(feature = "webgl"), not(target_arch = "wasm32")))]
|
||||
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 20;
|
||||
|
||||
// WebGL2 quirk: using uniform buffers larger than 4KB will cause extremely
|
||||
// long shader compilation times, so the limit needs to be lower on WebGL2.
|
||||
// This is due to older shader compilers/GPUs that don't support dynamically
|
||||
// indexing uniform buffers, and instead emulate it with large switch statements
|
||||
// over buffer indices that take a long time to compile.
|
||||
#[cfg(all(feature = "webgl", target_arch = "wasm32"))]
|
||||
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 12;
|
||||
|
||||
/// Similar to [`DynamicUniformBuffer`], except every N elements (depending on size)
|
||||
/// are grouped into a batch as an `array<T, N>` in WGSL.
|
||||
///
|
||||
/// This reduces the number of rebindings required due to having to pass dynamic
|
||||
/// offsets to bind group commands, and if indices into the array can be passed
|
||||
/// in via other means, it enables batching of draw commands.
|
||||
pub struct BatchedUniformBuffer<T: GpuArrayBufferable> {
|
||||
// Batches of fixed-size arrays of T are written to this buffer so that
|
||||
// each batch in a fixed-size array can be bound at a dynamic offset.
|
||||
uniforms: DynamicUniformBuffer<MaxCapacityArray<Vec<T>>>,
|
||||
// A batch of T are gathered into this `MaxCapacityArray` until it is full,
|
||||
// then it is written into the `DynamicUniformBuffer`, cleared, and new T
|
||||
// are gathered here, and so on for each batch.
|
||||
temp: MaxCapacityArray<Vec<T>>,
|
||||
current_offset: u32,
|
||||
dynamic_offset_alignment: u32,
|
||||
}
|
||||
|
||||
impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> {
|
||||
pub fn batch_size(limits: &Limits) -> usize {
|
||||
(limits
|
||||
.max_uniform_buffer_binding_size
|
||||
.min(MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE) as u64
|
||||
/ T::min_size().get()) as usize
|
||||
}
|
||||
|
||||
pub fn new(limits: &Limits) -> Self {
|
||||
let capacity = Self::batch_size(limits);
|
||||
let alignment = limits.min_uniform_buffer_offset_alignment;
|
||||
|
||||
Self {
|
||||
uniforms: DynamicUniformBuffer::new_with_alignment(alignment as u64),
|
||||
temp: MaxCapacityArray(Vec::with_capacity(capacity), capacity),
|
||||
current_offset: 0,
|
||||
dynamic_offset_alignment: alignment,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn size(&self) -> NonZeroU64 {
|
||||
self.temp.size()
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.uniforms.clear();
|
||||
self.current_offset = 0;
|
||||
self.temp.0.clear();
|
||||
}
|
||||
|
||||
pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> {
|
||||
let result = GpuArrayBufferIndex {
|
||||
index: self.temp.0.len() as u32,
|
||||
dynamic_offset: Some(self.current_offset),
|
||||
element_type: PhantomData,
|
||||
};
|
||||
self.temp.0.push(component);
|
||||
if self.temp.0.len() == self.temp.1 {
|
||||
self.flush();
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn flush(&mut self) {
|
||||
self.uniforms.push(self.temp.clone());
|
||||
|
||||
self.current_offset +=
|
||||
align_to_next(self.temp.size().get(), self.dynamic_offset_alignment as u64) as u32;
|
||||
|
||||
self.temp.0.clear();
|
||||
}
|
||||
|
||||
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
|
||||
if !self.temp.0.is_empty() {
|
||||
self.flush();
|
||||
}
|
||||
self.uniforms.write_buffer(device, queue);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn binding(&self) -> Option<BindingResource> {
|
||||
let mut binding = self.uniforms.binding();
|
||||
if let Some(BindingResource::Buffer(binding)) = &mut binding {
|
||||
// MaxCapacityArray is runtime-sized so can't use T::min_size()
|
||||
binding.size = Some(self.size());
|
||||
}
|
||||
binding
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn align_to_next(value: u64, alignment: u64) -> u64 {
|
||||
debug_assert!(alignment & (alignment - 1) == 0);
|
||||
((value - 1) | (alignment - 1)) + 1
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// MaxCapacityArray was implemented by Teodor Tanasoaia for encase. It was
|
||||
// copied here as it was not yet included in an encase release and it is
|
||||
// unclear if it is the correct long-term solution for encase.
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
struct MaxCapacityArray<T>(T, usize);
|
||||
|
||||
impl<T> ShaderType for MaxCapacityArray<T>
|
||||
where
|
||||
T: ShaderType<ExtraMetadata = ArrayMetadata>,
|
||||
{
|
||||
type ExtraMetadata = ArrayMetadata;
|
||||
|
||||
const METADATA: Metadata<Self::ExtraMetadata> = T::METADATA;
|
||||
|
||||
fn size(&self) -> ::core::num::NonZeroU64 {
|
||||
Self::METADATA.stride().mul(self.1.max(1) as u64).0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> WriteInto for MaxCapacityArray<T>
|
||||
where
|
||||
T: WriteInto + RuntimeSizedArray,
|
||||
{
|
||||
fn write_into<B: BufferMut>(&self, writer: &mut Writer<B>) {
|
||||
debug_assert!(self.0.len() <= self.1);
|
||||
self.0.write_into(writer);
|
||||
}
|
||||
}
|
||||
@ -21,9 +21,11 @@ use wgpu::BufferUsages;
|
||||
/// from system RAM to VRAM.
|
||||
///
|
||||
/// Other options for storing GPU-accessible data are:
|
||||
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
|
||||
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
|
||||
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
|
||||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
|
||||
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
|
||||
/// * [`BufferVec`](crate::render_resource::BufferVec)
|
||||
/// * [`Texture`](crate::render_resource::Texture)
|
||||
pub struct BufferVec<T: Pod> {
|
||||
|
||||
129
crates/bevy_render/src/render_resource/gpu_array_buffer.rs
Normal file
129
crates/bevy_render/src/render_resource/gpu_array_buffer.rs
Normal file
@ -0,0 +1,129 @@
|
||||
use super::StorageBuffer;
|
||||
use crate::{
|
||||
render_resource::batched_uniform_buffer::BatchedUniformBuffer,
|
||||
renderer::{RenderDevice, RenderQueue},
|
||||
};
|
||||
use bevy_ecs::{prelude::Component, system::Resource};
|
||||
use encase::{private::WriteInto, ShaderSize, ShaderType};
|
||||
use std::{marker::PhantomData, mem};
|
||||
use wgpu::{BindGroupLayoutEntry, BindingResource, BindingType, BufferBindingType, ShaderStages};
|
||||
|
||||
/// Trait for types able to go in a [`GpuArrayBuffer`].
|
||||
pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {}
|
||||
impl<T: ShaderType + ShaderSize + WriteInto + Clone> GpuArrayBufferable for T {}
|
||||
|
||||
/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array.
|
||||
///
|
||||
/// On platforms that support storage buffers, this is equivalent to [`StorageBuffer<Vec<T>>`].
|
||||
/// Otherwise, this falls back to a dynamic offset uniform buffer with the largest
|
||||
/// array of T that fits within a uniform buffer binding (within reasonable limits).
|
||||
///
|
||||
/// Other options for storing GPU-accessible data are:
|
||||
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
|
||||
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
|
||||
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
|
||||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
|
||||
/// * [`BufferVec`](crate::render_resource::BufferVec)
|
||||
/// * [`Texture`](crate::render_resource::Texture)
|
||||
#[derive(Resource)]
|
||||
pub enum GpuArrayBuffer<T: GpuArrayBufferable> {
|
||||
Uniform(BatchedUniformBuffer<T>),
|
||||
Storage((StorageBuffer<Vec<T>>, Vec<T>)),
|
||||
}
|
||||
|
||||
impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
|
||||
pub fn new(device: &RenderDevice) -> Self {
|
||||
let limits = device.limits();
|
||||
if limits.max_storage_buffers_per_shader_stage == 0 {
|
||||
GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits))
|
||||
} else {
|
||||
GpuArrayBuffer::Storage((StorageBuffer::default(), Vec::new()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
match self {
|
||||
GpuArrayBuffer::Uniform(buffer) => buffer.clear(),
|
||||
GpuArrayBuffer::Storage((_, buffer)) => buffer.clear(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(&mut self, value: T) -> GpuArrayBufferIndex<T> {
|
||||
match self {
|
||||
GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
|
||||
GpuArrayBuffer::Storage((_, buffer)) => {
|
||||
let index = buffer.len() as u32;
|
||||
buffer.push(value);
|
||||
GpuArrayBufferIndex {
|
||||
index,
|
||||
dynamic_offset: None,
|
||||
element_type: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
|
||||
match self {
|
||||
GpuArrayBuffer::Uniform(buffer) => buffer.write_buffer(device, queue),
|
||||
GpuArrayBuffer::Storage((buffer, vec)) => {
|
||||
buffer.set(mem::take(vec));
|
||||
buffer.write_buffer(device, queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn binding_layout(
|
||||
binding: u32,
|
||||
visibility: ShaderStages,
|
||||
device: &RenderDevice,
|
||||
) -> BindGroupLayoutEntry {
|
||||
BindGroupLayoutEntry {
|
||||
binding,
|
||||
visibility,
|
||||
ty: if device.limits().max_storage_buffers_per_shader_stage == 0 {
|
||||
BindingType::Buffer {
|
||||
ty: BufferBindingType::Uniform,
|
||||
has_dynamic_offset: true,
|
||||
// BatchedUniformBuffer uses a MaxCapacityArray that is runtime-sized, so we use
|
||||
// None here and let wgpu figure out the size.
|
||||
min_binding_size: None,
|
||||
}
|
||||
} else {
|
||||
BindingType::Buffer {
|
||||
ty: BufferBindingType::Storage { read_only: true },
|
||||
has_dynamic_offset: false,
|
||||
min_binding_size: Some(T::min_size()),
|
||||
}
|
||||
},
|
||||
count: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn binding(&self) -> Option<BindingResource> {
|
||||
match self {
|
||||
GpuArrayBuffer::Uniform(buffer) => buffer.binding(),
|
||||
GpuArrayBuffer::Storage((buffer, _)) => buffer.binding(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn batch_size(device: &RenderDevice) -> Option<u32> {
|
||||
let limits = device.limits();
|
||||
if limits.max_storage_buffers_per_shader_stage == 0 {
|
||||
Some(BatchedUniformBuffer::<T>::batch_size(&limits) as u32)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An index into a [`GpuArrayBuffer`] for a given element.
|
||||
#[derive(Component)]
|
||||
pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
|
||||
/// The index to use in a shader into the array.
|
||||
pub index: u32,
|
||||
/// The dynamic offset to use when setting the bind group in a pass.
|
||||
/// Only used on platforms that don't support storage buffers.
|
||||
pub dynamic_offset: Option<u32>,
|
||||
pub element_type: PhantomData<T>,
|
||||
}
|
||||
@ -1,7 +1,9 @@
|
||||
mod batched_uniform_buffer;
|
||||
mod bind_group;
|
||||
mod bind_group_layout;
|
||||
mod buffer;
|
||||
mod buffer_vec;
|
||||
mod gpu_array_buffer;
|
||||
mod pipeline;
|
||||
mod pipeline_cache;
|
||||
mod pipeline_specializer;
|
||||
@ -15,6 +17,7 @@ pub use bind_group::*;
|
||||
pub use bind_group_layout::*;
|
||||
pub use buffer::*;
|
||||
pub use buffer_vec::*;
|
||||
pub use gpu_array_buffer::*;
|
||||
pub use pipeline::*;
|
||||
pub use pipeline_cache::*;
|
||||
pub use pipeline_specializer::*;
|
||||
|
||||
@ -25,6 +25,7 @@ use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsa
|
||||
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
|
||||
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
|
||||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
|
||||
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
|
||||
/// * [`BufferVec`](crate::render_resource::BufferVec)
|
||||
/// * [`Texture`](crate::render_resource::Texture)
|
||||
///
|
||||
@ -154,6 +155,7 @@ impl<T: ShaderType + WriteInto> StorageBuffer<T> {
|
||||
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
|
||||
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
|
||||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
|
||||
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
|
||||
/// * [`BufferVec`](crate::render_resource::BufferVec)
|
||||
/// * [`Texture`](crate::render_resource::Texture)
|
||||
///
|
||||
|
||||
@ -22,9 +22,10 @@ use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsa
|
||||
/// (vectors), or structures with fields that are vectors.
|
||||
///
|
||||
/// Other options for storing GPU-accessible data are:
|
||||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
|
||||
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
|
||||
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
|
||||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
|
||||
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
|
||||
/// * [`BufferVec`](crate::render_resource::BufferVec)
|
||||
/// * [`Texture`](crate::render_resource::Texture)
|
||||
///
|
||||
@ -151,6 +152,8 @@ impl<T: ShaderType + WriteInto> UniformBuffer<T> {
|
||||
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
|
||||
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
|
||||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
|
||||
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
|
||||
/// * [`BufferVec`](crate::render_resource::BufferVec)
|
||||
/// * [`Texture`](crate::render_resource::Texture)
|
||||
///
|
||||
/// [std140 alignment/padding requirements]: https://www.w3.org/TR/WGSL/#address-spaces-uniform
|
||||
@ -177,6 +180,17 @@ impl<T: ShaderType> Default for DynamicUniformBuffer<T> {
|
||||
}
|
||||
|
||||
impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
|
||||
pub fn new_with_alignment(alignment: u64) -> Self {
|
||||
Self {
|
||||
scratch: DynamicUniformBufferWrapper::new_with_alignment(Vec::new(), alignment),
|
||||
buffer: None,
|
||||
label: None,
|
||||
changed: false,
|
||||
buffer_usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn buffer(&self) -> Option<&Buffer> {
|
||||
self.buffer.as_ref()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user