Directly copy data into uniform buffers (#9865)
# Objective This is a minimally disruptive version of #8340. I attempted to update it, but failed due to the scope of the changes added in #8204. Fixes #8307. Partially addresses #4642. As seen in https://github.com/bevyengine/bevy/issues/8284, we're actually copying data twice in Prepare stage systems. Once into a CPU-side intermediate scratch buffer, and once again into a mapped buffer. This is inefficient and effectively doubles the time spent and memory allocated to run these systems. ## Solution Skip the scratch buffer entirely and use `wgpu::Queue::write_buffer_with` to directly write data into mapped buffers. Separately, this also directly uses `wgpu::Limits::min_uniform_buffer_offset_alignment` to set up the alignment when writing to the buffers. Partially addressing the issue raised in #4642. Storage buffers and the abstractions built on top of `DynamicUniformBuffer` will need to come in followup PRs. This may not have a noticeable performance difference in this PR, as the only first-party systems affected by this are view related, and likely are not going to be particularly heavy. --- ## Changelog Added: `DynamicUniformBuffer::get_writer`. Added: `DynamicUniformBufferWriter`.
This commit is contained in:
parent
35de5e608e
commit
12032cd296
@ -667,9 +667,16 @@ pub fn prepare_previous_view_projection_uniforms(
|
|||||||
With<MotionVectorPrepass>,
|
With<MotionVectorPrepass>,
|
||||||
>,
|
>,
|
||||||
) {
|
) {
|
||||||
view_uniforms.uniforms.clear();
|
let views_iter = views.iter();
|
||||||
|
let view_count = views_iter.len();
|
||||||
for (entity, camera, maybe_previous_view_proj) in &views {
|
let Some(mut writer) =
|
||||||
|
view_uniforms
|
||||||
|
.uniforms
|
||||||
|
.get_writer(view_count, &render_device, &render_queue)
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
for (entity, camera, maybe_previous_view_proj) in views_iter {
|
||||||
let view_projection = match maybe_previous_view_proj {
|
let view_projection = match maybe_previous_view_proj {
|
||||||
Some(previous_view) => previous_view.clone(),
|
Some(previous_view) => previous_view.clone(),
|
||||||
None => PreviousViewProjection {
|
None => PreviousViewProjection {
|
||||||
@ -679,13 +686,9 @@ pub fn prepare_previous_view_projection_uniforms(
|
|||||||
commands
|
commands
|
||||||
.entity(entity)
|
.entity(entity)
|
||||||
.insert(PreviousViewProjectionUniformOffset {
|
.insert(PreviousViewProjectionUniformOffset {
|
||||||
offset: view_uniforms.uniforms.push(view_projection),
|
offset: writer.write(&view_projection),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
view_uniforms
|
|
||||||
.uniforms
|
|
||||||
.write_buffer(&render_device, &render_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Resource)]
|
#[derive(Default, Resource)]
|
||||||
|
|||||||
@ -52,9 +52,15 @@ pub fn prepare_fog(
|
|||||||
mut fog_meta: ResMut<FogMeta>,
|
mut fog_meta: ResMut<FogMeta>,
|
||||||
views: Query<(Entity, Option<&FogSettings>), With<ExtractedView>>,
|
views: Query<(Entity, Option<&FogSettings>), With<ExtractedView>>,
|
||||||
) {
|
) {
|
||||||
fog_meta.gpu_fogs.clear();
|
let views_iter = views.iter();
|
||||||
|
let view_count = views_iter.len();
|
||||||
for (entity, fog) in &views {
|
let Some(mut writer) = fog_meta
|
||||||
|
.gpu_fogs
|
||||||
|
.get_writer(view_count, &render_device, &render_queue)
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
for (entity, fog) in views_iter {
|
||||||
let gpu_fog = if let Some(fog) = fog {
|
let gpu_fog = if let Some(fog) = fog {
|
||||||
match &fog.falloff {
|
match &fog.falloff {
|
||||||
FogFalloff::Linear { start, end } => GpuFog {
|
FogFalloff::Linear { start, end } => GpuFog {
|
||||||
@ -103,13 +109,9 @@ pub fn prepare_fog(
|
|||||||
|
|
||||||
// This is later read by `SetMeshViewBindGroup<I>`
|
// This is later read by `SetMeshViewBindGroup<I>`
|
||||||
commands.entity(entity).insert(ViewFogUniformOffset {
|
commands.entity(entity).insert(ViewFogUniformOffset {
|
||||||
offset: fog_meta.gpu_fogs.push(gpu_fog),
|
offset: writer.write(&gpu_fog),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fog_meta
|
|
||||||
.gpu_fogs
|
|
||||||
.write_buffer(&render_device, &render_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Inserted on each `Entity` with an `ExtractedView` to keep track of its offset
|
/// Inserted on each `Entity` with an `ExtractedView` to keep track of its offset
|
||||||
|
|||||||
@ -667,7 +667,15 @@ pub fn prepare_lights(
|
|||||||
point_lights: Query<(Entity, &ExtractedPointLight)>,
|
point_lights: Query<(Entity, &ExtractedPointLight)>,
|
||||||
directional_lights: Query<(Entity, &ExtractedDirectionalLight)>,
|
directional_lights: Query<(Entity, &ExtractedDirectionalLight)>,
|
||||||
) {
|
) {
|
||||||
light_meta.view_gpu_lights.clear();
|
let views_iter = views.iter();
|
||||||
|
let views_count = views_iter.len();
|
||||||
|
let Some(mut view_gpu_lights_writer) =
|
||||||
|
light_meta
|
||||||
|
.view_gpu_lights
|
||||||
|
.get_writer(views_count, &render_device, &render_queue)
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
// Pre-calculate for PointLights
|
// Pre-calculate for PointLights
|
||||||
let cube_face_projection =
|
let cube_face_projection =
|
||||||
@ -1198,14 +1206,10 @@ pub fn prepare_lights(
|
|||||||
lights: view_lights,
|
lights: view_lights,
|
||||||
},
|
},
|
||||||
ViewLightsUniformOffset {
|
ViewLightsUniformOffset {
|
||||||
offset: light_meta.view_gpu_lights.push(gpu_lights),
|
offset: view_gpu_lights_writer.write(&gpu_lights),
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
light_meta
|
|
||||||
.view_gpu_lights
|
|
||||||
.write_buffer(&render_device, &render_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl
|
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl
|
||||||
|
|||||||
@ -132,24 +132,27 @@ fn prepare_uniform_components<C: Component>(
|
|||||||
) where
|
) where
|
||||||
C: ShaderType + WriteInto + Clone,
|
C: ShaderType + WriteInto + Clone,
|
||||||
{
|
{
|
||||||
component_uniforms.uniforms.clear();
|
let components_iter = components.iter();
|
||||||
let entities = components
|
let count = components_iter.len();
|
||||||
.iter()
|
let Some(mut writer) =
|
||||||
|
component_uniforms
|
||||||
|
.uniforms
|
||||||
|
.get_writer(count, &render_device, &render_queue)
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
let entities = components_iter
|
||||||
.map(|(entity, component)| {
|
.map(|(entity, component)| {
|
||||||
(
|
(
|
||||||
entity,
|
entity,
|
||||||
DynamicUniformIndex::<C> {
|
DynamicUniformIndex::<C> {
|
||||||
index: component_uniforms.uniforms.push(component.clone()),
|
index: writer.write(component),
|
||||||
marker: PhantomData,
|
marker: PhantomData,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
commands.insert_or_spawn_batch(entities);
|
commands.insert_or_spawn_batch(entities);
|
||||||
|
|
||||||
component_uniforms
|
|
||||||
.uniforms
|
|
||||||
.write_buffer(&render_device, &render_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This plugin extracts the components into the "render world".
|
/// This plugin extracts the components into the "render world".
|
||||||
|
|||||||
@ -1,14 +1,17 @@
|
|||||||
use std::marker::PhantomData;
|
use std::{marker::PhantomData, num::NonZeroU64};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
render_resource::Buffer,
|
render_resource::Buffer,
|
||||||
renderer::{RenderDevice, RenderQueue},
|
renderer::{RenderDevice, RenderQueue},
|
||||||
};
|
};
|
||||||
use encase::{
|
use encase::{
|
||||||
internal::WriteInto, DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType,
|
internal::{AlignmentValue, BufferMut, WriteInto},
|
||||||
|
DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType,
|
||||||
UniformBuffer as UniformBufferWrapper,
|
UniformBuffer as UniformBufferWrapper,
|
||||||
};
|
};
|
||||||
use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsages};
|
use wgpu::{
|
||||||
|
util::BufferInitDescriptor, BindingResource, BufferBinding, BufferDescriptor, BufferUsages,
|
||||||
|
};
|
||||||
|
|
||||||
/// Stores data to be transferred to the GPU and made accessible to shaders as a uniform buffer.
|
/// Stores data to be transferred to the GPU and made accessible to shaders as a uniform buffer.
|
||||||
///
|
///
|
||||||
@ -240,6 +243,67 @@ impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
|
|||||||
self.changed = true;
|
self.changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a writer that can be used to directly write elements into the target buffer.
|
||||||
|
///
|
||||||
|
/// This method uses less memory and performs fewer memory copies using over [`push`] and [`write_buffer`].
|
||||||
|
///
|
||||||
|
/// `max_count` *must* be greater than or equal to the number of elements that are to be written to the buffer, or
|
||||||
|
/// the writer will panic while writing. Dropping the writer will schedule the buffer write into the provided
|
||||||
|
/// [`RenderQueue`](crate::renderer::RenderQueue).
|
||||||
|
///
|
||||||
|
/// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously
|
||||||
|
/// allocated does not have enough capacity to hold `max_count` elements, a new GPU-side buffer is created.
|
||||||
|
///
|
||||||
|
/// Returns `None` if there is no allocated GPU-side buffer, and `max_count` is 0.
|
||||||
|
///
|
||||||
|
/// [`push`]: Self::push
|
||||||
|
/// [`write_buffer`]: Self::write_buffer
|
||||||
|
#[inline]
|
||||||
|
pub fn get_writer<'a>(
|
||||||
|
&'a mut self,
|
||||||
|
max_count: usize,
|
||||||
|
device: &RenderDevice,
|
||||||
|
queue: &'a RenderQueue,
|
||||||
|
) -> Option<DynamicUniformBufferWriter<'a, T>> {
|
||||||
|
let alignment =
|
||||||
|
AlignmentValue::new(device.limits().min_uniform_buffer_offset_alignment as u64);
|
||||||
|
let mut capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0);
|
||||||
|
let size = alignment
|
||||||
|
.round_up(T::min_size().get())
|
||||||
|
.checked_mul(max_count as u64)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
if capacity < size || self.changed {
|
||||||
|
let buffer = device.create_buffer(&BufferDescriptor {
|
||||||
|
label: self.label.as_deref(),
|
||||||
|
usage: self.buffer_usage,
|
||||||
|
size,
|
||||||
|
mapped_at_creation: false,
|
||||||
|
});
|
||||||
|
capacity = buffer.size();
|
||||||
|
self.buffer = Some(buffer);
|
||||||
|
self.changed = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(buffer) = self.buffer.as_deref() {
|
||||||
|
let buffer_view = queue
|
||||||
|
.write_buffer_with(buffer, 0, NonZeroU64::new(buffer.size())?)
|
||||||
|
.unwrap();
|
||||||
|
Some(DynamicUniformBufferWriter {
|
||||||
|
buffer: encase::DynamicUniformBuffer::new_with_alignment(
|
||||||
|
QueueWriteBufferViewWrapper {
|
||||||
|
capacity: capacity as usize,
|
||||||
|
buffer_view,
|
||||||
|
},
|
||||||
|
alignment.get(),
|
||||||
|
),
|
||||||
|
_marker: PhantomData,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Queues writing of data from system RAM to VRAM using the [`RenderDevice`](crate::renderer::RenderDevice)
|
/// Queues writing of data from system RAM to VRAM using the [`RenderDevice`](crate::renderer::RenderDevice)
|
||||||
/// and the provided [`RenderQueue`](crate::renderer::RenderQueue).
|
/// and the provided [`RenderQueue`](crate::renderer::RenderQueue).
|
||||||
///
|
///
|
||||||
@ -268,3 +332,38 @@ impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
|
|||||||
self.scratch.set_offset(0);
|
self.scratch.set_offset(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A writer that can be used to directly write elements into the target buffer.
|
||||||
|
///
|
||||||
|
/// For more information, see [`DynamicUniformBuffer::get_writer`].
|
||||||
|
pub struct DynamicUniformBufferWriter<'a, T> {
|
||||||
|
buffer: encase::DynamicUniformBuffer<QueueWriteBufferViewWrapper<'a>>,
|
||||||
|
_marker: PhantomData<fn() -> T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: ShaderType + WriteInto> DynamicUniformBufferWriter<'a, T> {
|
||||||
|
pub fn write(&mut self, value: &T) -> u32 {
|
||||||
|
self.buffer.write(value).unwrap() as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A wrapper to work around the orphan rule so that [`wgpu::QueueWriteBufferView`] can implement
|
||||||
|
/// [`encase::internal::BufferMut`].
|
||||||
|
struct QueueWriteBufferViewWrapper<'a> {
|
||||||
|
buffer_view: wgpu::QueueWriteBufferView<'a>,
|
||||||
|
// Must be kept separately and cannot be retrieved from buffer_view, as the read-only access will
|
||||||
|
// invoke a panic.
|
||||||
|
capacity: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BufferMut for QueueWriteBufferViewWrapper<'a> {
|
||||||
|
#[inline]
|
||||||
|
fn capacity(&self) -> usize {
|
||||||
|
self.capacity
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write<const N: usize>(&mut self, offset: usize, val: &[u8; N]) {
|
||||||
|
self.buffer_view.write(offset, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -356,8 +356,15 @@ pub fn prepare_view_uniforms(
|
|||||||
Option<&MipBias>,
|
Option<&MipBias>,
|
||||||
)>,
|
)>,
|
||||||
) {
|
) {
|
||||||
view_uniforms.uniforms.clear();
|
let view_iter = views.iter();
|
||||||
|
let view_count = view_iter.len();
|
||||||
|
let Some(mut writer) =
|
||||||
|
view_uniforms
|
||||||
|
.uniforms
|
||||||
|
.get_writer(view_count, &render_device, &render_queue)
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
for (entity, camera, temporal_jitter, mip_bias) in &views {
|
for (entity, camera, temporal_jitter, mip_bias) in &views {
|
||||||
let viewport = camera.viewport.as_vec4();
|
let viewport = camera.viewport.as_vec4();
|
||||||
let unjittered_projection = camera.projection;
|
let unjittered_projection = camera.projection;
|
||||||
@ -380,7 +387,7 @@ pub fn prepare_view_uniforms(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let view_uniforms = ViewUniformOffset {
|
let view_uniforms = ViewUniformOffset {
|
||||||
offset: view_uniforms.uniforms.push(ViewUniform {
|
offset: writer.write(&ViewUniform {
|
||||||
view_proj,
|
view_proj,
|
||||||
unjittered_view_proj: unjittered_projection * inverse_view,
|
unjittered_view_proj: unjittered_projection * inverse_view,
|
||||||
inverse_view_proj: view * inverse_projection,
|
inverse_view_proj: view * inverse_projection,
|
||||||
@ -397,10 +404,6 @@ pub fn prepare_view_uniforms(
|
|||||||
|
|
||||||
commands.entity(entity).insert(view_uniforms);
|
commands.entity(entity).insert(view_uniforms);
|
||||||
}
|
}
|
||||||
|
|
||||||
view_uniforms
|
|
||||||
.uniforms
|
|
||||||
.write_buffer(&render_device, &render_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user