Zero Copy Mesh (#15569)
# Objective - Another step towards #15558 ## Solution - Instead of allocating a Vec and then having wgpu copy it into a staging buffer, write directly into the staging buffer. - gets rid of another hidden copy, in `pad_to_alignment`. future work: - why is there a gcd implementation in here (and its subpar, use binary_gcd. its in the hot path, run twice for every mesh, every frame i think?) make it better and put it in bevy_math - zero-copy custom mesh api to avoid having to write out a Mesh from a custom rep ## Testing - lighting and many_cubes run fine (and slightly faster. havent benchmarked though) --- ## Showcase - look ma... no copies at least when RenderAssetUsage is GPU only :3 --------- Co-authored-by: Alice Cecile <alice.i.cecile@gmail.com> Co-authored-by: Kristoffer Søholm <k.soeholm@gmail.com>
This commit is contained in:
parent
8b0388c74a
commit
7eadc1d467
@ -1,9 +1,8 @@
|
|||||||
//! Manages mesh vertex and index buffers.
|
//! Manages mesh vertex and index buffers.
|
||||||
|
|
||||||
use alloc::{borrow::Cow, vec::Vec};
|
use alloc::vec::Vec;
|
||||||
use core::{
|
use core::{
|
||||||
fmt::{self, Display, Formatter},
|
fmt::{self, Display, Formatter},
|
||||||
iter,
|
|
||||||
ops::Range,
|
ops::Range,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -21,8 +20,8 @@ use bevy_utils::{
|
|||||||
};
|
};
|
||||||
use offset_allocator::{Allocation, Allocator};
|
use offset_allocator::{Allocation, Allocator};
|
||||||
use wgpu::{
|
use wgpu::{
|
||||||
util::BufferInitDescriptor, BufferDescriptor, BufferUsages, CommandEncoderDescriptor,
|
BufferDescriptor, BufferSize, BufferUsages, CommandEncoderDescriptor, DownlevelFlags,
|
||||||
DownlevelFlags, COPY_BUFFER_ALIGNMENT,
|
COPY_BUFFER_ALIGNMENT,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -427,7 +426,7 @@ impl MeshAllocator {
|
|||||||
if self.general_vertex_slabs_supported {
|
if self.general_vertex_slabs_supported {
|
||||||
self.allocate(
|
self.allocate(
|
||||||
mesh_id,
|
mesh_id,
|
||||||
mesh.get_vertex_size() * mesh.count_vertices() as u64,
|
mesh.get_vertex_buffer_size() as u64,
|
||||||
vertex_element_layout,
|
vertex_element_layout,
|
||||||
&mut slabs_to_grow,
|
&mut slabs_to_grow,
|
||||||
mesh_allocator_settings,
|
mesh_allocator_settings,
|
||||||
@ -474,12 +473,12 @@ impl MeshAllocator {
|
|||||||
let Some(&slab_id) = self.mesh_id_to_vertex_slab.get(mesh_id) else {
|
let Some(&slab_id) = self.mesh_id_to_vertex_slab.get(mesh_id) else {
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
let vertex_data = mesh.create_packed_vertex_buffer_data();
|
|
||||||
|
|
||||||
// Call the generic function.
|
// Call the generic function.
|
||||||
self.copy_element_data(
|
self.copy_element_data(
|
||||||
mesh_id,
|
mesh_id,
|
||||||
&vertex_data,
|
mesh.get_vertex_buffer_size(),
|
||||||
|
|slice| mesh.write_packed_vertex_buffer_data(slice),
|
||||||
BufferUsages::VERTEX,
|
BufferUsages::VERTEX,
|
||||||
slab_id,
|
slab_id,
|
||||||
render_device,
|
render_device,
|
||||||
@ -506,7 +505,8 @@ impl MeshAllocator {
|
|||||||
// Call the generic function.
|
// Call the generic function.
|
||||||
self.copy_element_data(
|
self.copy_element_data(
|
||||||
mesh_id,
|
mesh_id,
|
||||||
index_data,
|
index_data.len(),
|
||||||
|
|slice| slice.copy_from_slice(index_data),
|
||||||
BufferUsages::INDEX,
|
BufferUsages::INDEX,
|
||||||
slab_id,
|
slab_id,
|
||||||
render_device,
|
render_device,
|
||||||
@ -519,7 +519,8 @@ impl MeshAllocator {
|
|||||||
fn copy_element_data(
|
fn copy_element_data(
|
||||||
&mut self,
|
&mut self,
|
||||||
mesh_id: &AssetId<Mesh>,
|
mesh_id: &AssetId<Mesh>,
|
||||||
data: &[u8],
|
len: usize,
|
||||||
|
fill_data: impl Fn(&mut [u8]),
|
||||||
buffer_usages: BufferUsages,
|
buffer_usages: BufferUsages,
|
||||||
slab_id: SlabId,
|
slab_id: SlabId,
|
||||||
render_device: &RenderDevice,
|
render_device: &RenderDevice,
|
||||||
@ -540,12 +541,18 @@ impl MeshAllocator {
|
|||||||
|
|
||||||
let slot_size = general_slab.element_layout.slot_size();
|
let slot_size = general_slab.element_layout.slot_size();
|
||||||
|
|
||||||
// Write the data in.
|
// round up size to a multiple of the slot size to satisfy wgpu alignment requirements
|
||||||
render_queue.write_buffer(
|
if let Some(size) = BufferSize::new((len as u64).next_multiple_of(slot_size)) {
|
||||||
buffer,
|
// Write the data in.
|
||||||
allocated_range.allocation.offset as u64 * slot_size,
|
if let Some(mut buffer) = render_queue.write_buffer_with(
|
||||||
&pad_to_alignment(data, slot_size as usize),
|
buffer,
|
||||||
);
|
allocated_range.allocation.offset as u64 * slot_size,
|
||||||
|
size,
|
||||||
|
) {
|
||||||
|
let slice = &mut buffer.as_mut()[..len];
|
||||||
|
fill_data(slice);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Mark the allocation as resident.
|
// Mark the allocation as resident.
|
||||||
general_slab
|
general_slab
|
||||||
@ -557,17 +564,22 @@ impl MeshAllocator {
|
|||||||
debug_assert!(large_object_slab.buffer.is_none());
|
debug_assert!(large_object_slab.buffer.is_none());
|
||||||
|
|
||||||
// Create the buffer and its data in one go.
|
// Create the buffer and its data in one go.
|
||||||
large_object_slab.buffer = Some(render_device.create_buffer_with_data(
|
let buffer = render_device.create_buffer(&BufferDescriptor {
|
||||||
&BufferInitDescriptor {
|
label: Some(&format!(
|
||||||
label: Some(&format!(
|
"large mesh slab {} ({}buffer)",
|
||||||
"large mesh slab {} ({}buffer)",
|
slab_id,
|
||||||
slab_id,
|
buffer_usages_to_str(buffer_usages)
|
||||||
buffer_usages_to_str(buffer_usages)
|
)),
|
||||||
)),
|
size: len as u64,
|
||||||
contents: data,
|
usage: buffer_usages | BufferUsages::COPY_DST,
|
||||||
usage: buffer_usages | BufferUsages::COPY_DST,
|
mapped_at_creation: true,
|
||||||
},
|
});
|
||||||
));
|
{
|
||||||
|
let slice = &mut buffer.slice(..).get_mapped_range_mut()[..len];
|
||||||
|
fill_data(slice);
|
||||||
|
}
|
||||||
|
buffer.unmap();
|
||||||
|
large_object_slab.buffer = Some(buffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1000,21 +1012,6 @@ fn gcd(mut a: u64, mut b: u64) -> u64 {
|
|||||||
a
|
a
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ensures that the size of a buffer is a multiple of the given alignment by
|
|
||||||
/// padding it with zeroes if necessary.
|
|
||||||
///
|
|
||||||
/// If the buffer already has the required size, then this function doesn't
|
|
||||||
/// allocate. Otherwise, it copies the buffer into a new one and writes the
|
|
||||||
/// appropriate number of zeroes to the end.
|
|
||||||
fn pad_to_alignment(buffer: &[u8], align: usize) -> Cow<[u8]> {
|
|
||||||
if buffer.len() % align == 0 {
|
|
||||||
return Cow::Borrowed(buffer);
|
|
||||||
}
|
|
||||||
let mut buffer = buffer.to_vec();
|
|
||||||
buffer.extend(iter::repeat(0).take(align - buffer.len() % align));
|
|
||||||
Cow::Owned(buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a string describing the given buffer usages.
|
/// Returns a string describing the given buffer usages.
|
||||||
fn buffer_usages_to_str(buffer_usages: BufferUsages) -> &'static str {
|
fn buffer_usages_to_str(buffer_usages: BufferUsages) -> &'static str {
|
||||||
if buffer_usages.contains(BufferUsages::VERTEX) {
|
if buffer_usages.contains(BufferUsages::VERTEX) {
|
||||||
|
@ -385,6 +385,13 @@ impl Mesh {
|
|||||||
.sum()
|
.sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the size required for the vertex buffer in bytes.
|
||||||
|
pub fn get_vertex_buffer_size(&self) -> usize {
|
||||||
|
let vertex_size = self.get_vertex_size() as usize;
|
||||||
|
let vertex_count = self.count_vertices();
|
||||||
|
vertex_count * vertex_size
|
||||||
|
}
|
||||||
|
|
||||||
/// Computes and returns the index data of the mesh as bytes.
|
/// Computes and returns the index data of the mesh as bytes.
|
||||||
/// This is used to transform the index data into a GPU friendly format.
|
/// This is used to transform the index data into a GPU friendly format.
|
||||||
pub fn get_index_buffer_bytes(&self) -> Option<&[u8]> {
|
pub fn get_index_buffer_bytes(&self) -> Option<&[u8]> {
|
||||||
@ -458,10 +465,24 @@ impl Mesh {
|
|||||||
///
|
///
|
||||||
/// If the vertex attributes have different lengths, they are all truncated to
|
/// If the vertex attributes have different lengths, they are all truncated to
|
||||||
/// the length of the smallest.
|
/// the length of the smallest.
|
||||||
|
///
|
||||||
|
/// This is a convenience method which allocates a Vec.
|
||||||
|
/// Prefer pre-allocating and using [`Mesh::write_packed_vertex_buffer_data`] when possible.
|
||||||
pub fn create_packed_vertex_buffer_data(&self) -> Vec<u8> {
|
pub fn create_packed_vertex_buffer_data(&self) -> Vec<u8> {
|
||||||
|
let mut attributes_interleaved_buffer = vec![0; self.get_vertex_buffer_size()];
|
||||||
|
self.write_packed_vertex_buffer_data(&mut attributes_interleaved_buffer);
|
||||||
|
attributes_interleaved_buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes and write the vertex data of the mesh into a mutable byte slice.
|
||||||
|
/// The attributes are located in the order of their [`MeshVertexAttribute::id`].
|
||||||
|
/// This is used to transform the vertex data into a GPU friendly format.
|
||||||
|
///
|
||||||
|
/// If the vertex attributes have different lengths, they are all truncated to
|
||||||
|
/// the length of the smallest.
|
||||||
|
pub fn write_packed_vertex_buffer_data(&self, slice: &mut [u8]) {
|
||||||
let vertex_size = self.get_vertex_size() as usize;
|
let vertex_size = self.get_vertex_size() as usize;
|
||||||
let vertex_count = self.count_vertices();
|
let vertex_count = self.count_vertices();
|
||||||
let mut attributes_interleaved_buffer = vec![0; vertex_count * vertex_size];
|
|
||||||
// bundle into interleaved buffers
|
// bundle into interleaved buffers
|
||||||
let mut attribute_offset = 0;
|
let mut attribute_offset = 0;
|
||||||
for attribute_data in self.attributes.values() {
|
for attribute_data in self.attributes.values() {
|
||||||
@ -473,14 +494,11 @@ impl Mesh {
|
|||||||
.enumerate()
|
.enumerate()
|
||||||
{
|
{
|
||||||
let offset = vertex_index * vertex_size + attribute_offset;
|
let offset = vertex_index * vertex_size + attribute_offset;
|
||||||
attributes_interleaved_buffer[offset..offset + attribute_size]
|
slice[offset..offset + attribute_size].copy_from_slice(attribute_bytes);
|
||||||
.copy_from_slice(attribute_bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
attribute_offset += attribute_size;
|
attribute_offset += attribute_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes_interleaved_buffer
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Duplicates the vertex attributes so that no vertices are shared.
|
/// Duplicates the vertex attributes so that no vertices are shared.
|
||||||
|
Loading…
Reference in New Issue
Block a user