Zero Copy Mesh (#15569)

# Objective

- Another step towards #15558

## Solution

- Instead of allocating a Vec and then having wgpu copy it into a
staging buffer, write directly into the staging buffer.
- gets rid of another hidden copy, in `pad_to_alignment`.

future work:
- why is there a gcd implementation in here (and its subpar, use
binary_gcd. its in the hot path, run twice for every mesh, every frame i
think?) make it better and put it in bevy_math
- zero-copy custom mesh api to avoid having to write out a Mesh from a
custom rep

## Testing

- lighting and many_cubes run fine (and slightly faster. havent
benchmarked though)

---

## Showcase

- look ma... no copies

at least when RenderAssetUsage is GPU only :3

---------

Co-authored-by: Alice Cecile <alice.i.cecile@gmail.com>
Co-authored-by: Kristoffer Søholm <k.soeholm@gmail.com>
This commit is contained in:
vero 2024-10-04 17:24:44 -04:00 committed by GitHub
parent 8b0388c74a
commit 7eadc1d467
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 61 additions and 46 deletions

View File

@ -1,9 +1,8 @@
//! Manages mesh vertex and index buffers. //! Manages mesh vertex and index buffers.
use alloc::{borrow::Cow, vec::Vec}; use alloc::vec::Vec;
use core::{ use core::{
fmt::{self, Display, Formatter}, fmt::{self, Display, Formatter},
iter,
ops::Range, ops::Range,
}; };
@ -21,8 +20,8 @@ use bevy_utils::{
}; };
use offset_allocator::{Allocation, Allocator}; use offset_allocator::{Allocation, Allocator};
use wgpu::{ use wgpu::{
util::BufferInitDescriptor, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, BufferDescriptor, BufferSize, BufferUsages, CommandEncoderDescriptor, DownlevelFlags,
DownlevelFlags, COPY_BUFFER_ALIGNMENT, COPY_BUFFER_ALIGNMENT,
}; };
use crate::{ use crate::{
@ -427,7 +426,7 @@ impl MeshAllocator {
if self.general_vertex_slabs_supported { if self.general_vertex_slabs_supported {
self.allocate( self.allocate(
mesh_id, mesh_id,
mesh.get_vertex_size() * mesh.count_vertices() as u64, mesh.get_vertex_buffer_size() as u64,
vertex_element_layout, vertex_element_layout,
&mut slabs_to_grow, &mut slabs_to_grow,
mesh_allocator_settings, mesh_allocator_settings,
@ -474,12 +473,12 @@ impl MeshAllocator {
let Some(&slab_id) = self.mesh_id_to_vertex_slab.get(mesh_id) else { let Some(&slab_id) = self.mesh_id_to_vertex_slab.get(mesh_id) else {
return; return;
}; };
let vertex_data = mesh.create_packed_vertex_buffer_data();
// Call the generic function. // Call the generic function.
self.copy_element_data( self.copy_element_data(
mesh_id, mesh_id,
&vertex_data, mesh.get_vertex_buffer_size(),
|slice| mesh.write_packed_vertex_buffer_data(slice),
BufferUsages::VERTEX, BufferUsages::VERTEX,
slab_id, slab_id,
render_device, render_device,
@ -506,7 +505,8 @@ impl MeshAllocator {
// Call the generic function. // Call the generic function.
self.copy_element_data( self.copy_element_data(
mesh_id, mesh_id,
index_data, index_data.len(),
|slice| slice.copy_from_slice(index_data),
BufferUsages::INDEX, BufferUsages::INDEX,
slab_id, slab_id,
render_device, render_device,
@ -519,7 +519,8 @@ impl MeshAllocator {
fn copy_element_data( fn copy_element_data(
&mut self, &mut self,
mesh_id: &AssetId<Mesh>, mesh_id: &AssetId<Mesh>,
data: &[u8], len: usize,
fill_data: impl Fn(&mut [u8]),
buffer_usages: BufferUsages, buffer_usages: BufferUsages,
slab_id: SlabId, slab_id: SlabId,
render_device: &RenderDevice, render_device: &RenderDevice,
@ -540,12 +541,18 @@ impl MeshAllocator {
let slot_size = general_slab.element_layout.slot_size(); let slot_size = general_slab.element_layout.slot_size();
// Write the data in. // round up size to a multiple of the slot size to satisfy wgpu alignment requirements
render_queue.write_buffer( if let Some(size) = BufferSize::new((len as u64).next_multiple_of(slot_size)) {
buffer, // Write the data in.
allocated_range.allocation.offset as u64 * slot_size, if let Some(mut buffer) = render_queue.write_buffer_with(
&pad_to_alignment(data, slot_size as usize), buffer,
); allocated_range.allocation.offset as u64 * slot_size,
size,
) {
let slice = &mut buffer.as_mut()[..len];
fill_data(slice);
}
}
// Mark the allocation as resident. // Mark the allocation as resident.
general_slab general_slab
@ -557,17 +564,22 @@ impl MeshAllocator {
debug_assert!(large_object_slab.buffer.is_none()); debug_assert!(large_object_slab.buffer.is_none());
// Create the buffer and its data in one go. // Create the buffer and its data in one go.
large_object_slab.buffer = Some(render_device.create_buffer_with_data( let buffer = render_device.create_buffer(&BufferDescriptor {
&BufferInitDescriptor { label: Some(&format!(
label: Some(&format!( "large mesh slab {} ({}buffer)",
"large mesh slab {} ({}buffer)", slab_id,
slab_id, buffer_usages_to_str(buffer_usages)
buffer_usages_to_str(buffer_usages) )),
)), size: len as u64,
contents: data, usage: buffer_usages | BufferUsages::COPY_DST,
usage: buffer_usages | BufferUsages::COPY_DST, mapped_at_creation: true,
}, });
)); {
let slice = &mut buffer.slice(..).get_mapped_range_mut()[..len];
fill_data(slice);
}
buffer.unmap();
large_object_slab.buffer = Some(buffer);
} }
} }
} }
@ -1000,21 +1012,6 @@ fn gcd(mut a: u64, mut b: u64) -> u64 {
a a
} }
/// Ensures that the size of a buffer is a multiple of the given alignment by
/// padding it with zeroes if necessary.
///
/// If the buffer already has the required size, then this function doesn't
/// allocate. Otherwise, it copies the buffer into a new one and writes the
/// appropriate number of zeroes to the end.
fn pad_to_alignment(buffer: &[u8], align: usize) -> Cow<[u8]> {
if buffer.len() % align == 0 {
return Cow::Borrowed(buffer);
}
let mut buffer = buffer.to_vec();
buffer.extend(iter::repeat(0).take(align - buffer.len() % align));
Cow::Owned(buffer)
}
/// Returns a string describing the given buffer usages. /// Returns a string describing the given buffer usages.
fn buffer_usages_to_str(buffer_usages: BufferUsages) -> &'static str { fn buffer_usages_to_str(buffer_usages: BufferUsages) -> &'static str {
if buffer_usages.contains(BufferUsages::VERTEX) { if buffer_usages.contains(BufferUsages::VERTEX) {

View File

@ -385,6 +385,13 @@ impl Mesh {
.sum() .sum()
} }
/// Returns the size required for the vertex buffer in bytes.
pub fn get_vertex_buffer_size(&self) -> usize {
let vertex_size = self.get_vertex_size() as usize;
let vertex_count = self.count_vertices();
vertex_count * vertex_size
}
/// Computes and returns the index data of the mesh as bytes. /// Computes and returns the index data of the mesh as bytes.
/// This is used to transform the index data into a GPU friendly format. /// This is used to transform the index data into a GPU friendly format.
pub fn get_index_buffer_bytes(&self) -> Option<&[u8]> { pub fn get_index_buffer_bytes(&self) -> Option<&[u8]> {
@ -458,10 +465,24 @@ impl Mesh {
/// ///
/// If the vertex attributes have different lengths, they are all truncated to /// If the vertex attributes have different lengths, they are all truncated to
/// the length of the smallest. /// the length of the smallest.
///
/// This is a convenience method which allocates a Vec.
/// Prefer pre-allocating and using [`Mesh::write_packed_vertex_buffer_data`] when possible.
pub fn create_packed_vertex_buffer_data(&self) -> Vec<u8> { pub fn create_packed_vertex_buffer_data(&self) -> Vec<u8> {
let mut attributes_interleaved_buffer = vec![0; self.get_vertex_buffer_size()];
self.write_packed_vertex_buffer_data(&mut attributes_interleaved_buffer);
attributes_interleaved_buffer
}
/// Computes and write the vertex data of the mesh into a mutable byte slice.
/// The attributes are located in the order of their [`MeshVertexAttribute::id`].
/// This is used to transform the vertex data into a GPU friendly format.
///
/// If the vertex attributes have different lengths, they are all truncated to
/// the length of the smallest.
pub fn write_packed_vertex_buffer_data(&self, slice: &mut [u8]) {
let vertex_size = self.get_vertex_size() as usize; let vertex_size = self.get_vertex_size() as usize;
let vertex_count = self.count_vertices(); let vertex_count = self.count_vertices();
let mut attributes_interleaved_buffer = vec![0; vertex_count * vertex_size];
// bundle into interleaved buffers // bundle into interleaved buffers
let mut attribute_offset = 0; let mut attribute_offset = 0;
for attribute_data in self.attributes.values() { for attribute_data in self.attributes.values() {
@ -473,14 +494,11 @@ impl Mesh {
.enumerate() .enumerate()
{ {
let offset = vertex_index * vertex_size + attribute_offset; let offset = vertex_index * vertex_size + attribute_offset;
attributes_interleaved_buffer[offset..offset + attribute_size] slice[offset..offset + attribute_size].copy_from_slice(attribute_bytes);
.copy_from_slice(attribute_bytes);
} }
attribute_offset += attribute_size; attribute_offset += attribute_size;
} }
attributes_interleaved_buffer
} }
/// Duplicates the vertex attributes so that no vertices are shared. /// Duplicates the vertex attributes so that no vertices are shared.