Speed up CommandQueue by storing commands more densely (#6391)
# Objective * Speed up inserting and applying commands. * Halve the stack size of `CommandQueue` to 24 bytes. * Require fewer allocations. ## Solution Store commands and metadata densely within the same buffer. Each command takes up 1 `usize` of metadata, plus the bytes to store the command itself. Zero-sized types take up no space except for the metadata. # Benchmarks All of the benchmarks related to commands. | Bench | Time | % Change | p-value | |----------------------------------------|-----------|--------------|-----------------| | empty_commands/0_entities | 4.7780 ns | -18.381% | 0.00 | | spawn_commands/2000_entities | 233.11 us | -0.9961% | 0.00 | | spawn_commands/4000_entities | 448.38 us | -3.1466% | 0.00 | | spawn_commands/6000_entities | 693.12 us | -0.3978% | _0.52_ | | spawn_commands/8000_entities | 889.48 us | -2.8802% | 0.00 | | insert_commands/insert | 609.95 us | -4.8604% | 0.00 | | insert_commands/insert_batch | 355.54 us | -2.8165% | 0.00 | | fake_commands/2000_commands | 4.8018 us | **-17.802%** | 0.00 | | fake_commands/4000_commands | 9.5969 us | **-17.337%** | 0.00 | | fake_commands/6000_commands | 14.421 us | **-18.454%** | 0.00 | | fake_commands/8000_commands | 19.192 us | **-18.261%** | 0.00 | | sized_commands_0_bytes/2000_commands | 4.0593 us | -4.7145% | 0.00 | | sized_commands_0_bytes/4000_commands | 8.1541 us | -4.9470% | 0.00 | | sized_commands_0_bytes/6000_commands | 12.806 us | -12.017% | 0.00 | | sized_commands_0_bytes/8000_commands | 17.096 us | -14.070% | 0.00 | | sized_commands_12_bytes/2000_commands | 5.3425 us | **-27.632%** | 0.00 | | sized_commands_12_bytes/4000_commands | 10.283 us | **-31.158%** | 0.00 | | sized_commands_12_bytes/6000_commands | 15.339 us | **-31.418%** | 0.00 | | sized_commands_12_bytes/8000_commands | 20.206 us | **-33.133%** | 0.00 | | sized_commands_512_bytes/2000_commands | 99.118 us | -9.9655% | 0.00 | | sized_commands_512_bytes/4000_commands | 201.96 us | -8.8235% | 0.00 | | sized_commands_512_bytes/6000_commands | 300.95 us | -9.2344% | 0.00 | | sized_commands_512_bytes/8000_commands | 404.69 us | -8.4578% | 0.00 |
This commit is contained in:
parent
cbb4c26cad
commit
5d912a2f35
@ -1,4 +1,4 @@
|
||||
use std::{mem::MaybeUninit, ptr::NonNull};
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
use bevy_ptr::{OwningPtr, Unaligned};
|
||||
|
||||
@ -6,14 +6,14 @@ use super::Command;
|
||||
use crate::world::World;
|
||||
|
||||
struct CommandMeta {
|
||||
/// Offset from the start of `CommandQueue.bytes` at which the corresponding command is stored.
|
||||
offset: usize,
|
||||
/// SAFETY: The `value` must point to a value of type `T: Command`,
|
||||
/// where `T` is some specific type that was used to produce this metadata.
|
||||
apply_command: unsafe fn(value: OwningPtr<Unaligned>, world: &mut World),
|
||||
///
|
||||
/// Returns the size of `T` in bytes.
|
||||
apply_command_and_get_size: unsafe fn(value: OwningPtr<Unaligned>, world: &mut World) -> usize,
|
||||
}
|
||||
|
||||
/// A queue of [`Command`]s
|
||||
/// Densely and efficiently stores a queue of heterogenous types implementing [`Command`].
|
||||
//
|
||||
// NOTE: [`CommandQueue`] is implemented via a `Vec<MaybeUninit<u8>>` instead of a `Vec<Box<dyn Command>>`
|
||||
// as an optimization. Since commands are used frequently in systems as a way to spawn
|
||||
@ -22,12 +22,12 @@ struct CommandMeta {
|
||||
// preferred to simplicity of implementation.
|
||||
#[derive(Default)]
|
||||
pub struct CommandQueue {
|
||||
/// Densely stores the data for all commands in the queue.
|
||||
// This buffer densely stores all queued commands.
|
||||
//
|
||||
// For each command, one `CommandMeta` is stored, followed by zero or more bytes
|
||||
// to store the command itself. To interpret these bytes, a pointer must
|
||||
// be passed to the corresponding `CommandMeta.apply_command_and_get_size` fn pointer.
|
||||
bytes: Vec<MaybeUninit<u8>>,
|
||||
/// Metadata for each command stored in the queue.
|
||||
/// SAFETY: Each entry must have a corresponding value stored in `bytes`,
|
||||
/// stored at offset `CommandMeta.offset` and with an underlying type matching `CommandMeta.apply_command`.
|
||||
metas: Vec<CommandMeta>,
|
||||
}
|
||||
|
||||
// SAFETY: All commands [`Command`] implement [`Send`]
|
||||
@ -43,45 +43,50 @@ impl CommandQueue {
|
||||
where
|
||||
C: Command,
|
||||
{
|
||||
let old_len = self.bytes.len();
|
||||
// Stores a command alongside its metadata.
|
||||
// `repr(C)` prevents the compiler from reordering the fields,
|
||||
// while `repr(packed)` prevents the compiler from inserting padding bytes.
|
||||
#[repr(C, packed)]
|
||||
struct Packed<T: Command> {
|
||||
meta: CommandMeta,
|
||||
command: T,
|
||||
}
|
||||
|
||||
// SAFETY: After adding the metadata, we correctly write the corresponding `command`
|
||||
// of type `C` into `self.bytes`. Zero-sized commands do not get written into the buffer,
|
||||
// so we'll just use a dangling pointer, which is valid for zero-sized types.
|
||||
self.metas.push(CommandMeta {
|
||||
offset: old_len,
|
||||
apply_command: |command, world| {
|
||||
// SAFETY: According to the invariants of `CommandMeta.apply_command`,
|
||||
let meta = CommandMeta {
|
||||
apply_command_and_get_size: |command, world| {
|
||||
// SAFETY: According to the invariants of `CommandMeta.apply_command_and_get_size`,
|
||||
// `command` must point to a value of type `C`.
|
||||
let command: C = unsafe { command.read_unaligned() };
|
||||
command.write(world);
|
||||
std::mem::size_of::<C>()
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
let size = std::mem::size_of::<C>();
|
||||
if size > 0 {
|
||||
// Ensure that the buffer has enough space at the end to fit a value of type `C`.
|
||||
// Since `C` is non-zero sized, this also guarantees that the buffer is non-null.
|
||||
self.bytes.reserve(size);
|
||||
let old_len = self.bytes.len();
|
||||
|
||||
// SAFETY: The buffer must be at least as long as `old_len`, so this operation
|
||||
// will not overflow the pointer's original allocation.
|
||||
let ptr: *mut C = unsafe { self.bytes.as_mut_ptr().add(old_len).cast() };
|
||||
// Reserve enough bytes for both the metadata and the command itself.
|
||||
self.bytes.reserve(std::mem::size_of::<Packed<C>>());
|
||||
|
||||
// Transfer ownership of the command into the buffer.
|
||||
// SAFETY: `ptr` must be non-null, since it is within a non-null buffer.
|
||||
// The call to `reserve()` ensures that the buffer has enough space to fit a value of type `C`,
|
||||
// and it is valid to write any bit pattern since the underlying buffer is of type `MaybeUninit<u8>`.
|
||||
unsafe { ptr.write_unaligned(command) };
|
||||
// Pointer to the bytes at the end of the buffer.
|
||||
// SAFETY: We know it is within bounds of the allocation, due to the call to `.reserve()`.
|
||||
let ptr = unsafe { self.bytes.as_mut_ptr().add(old_len) };
|
||||
|
||||
// Grow the vector to include the command we just wrote.
|
||||
// SAFETY: Due to the call to `.reserve(size)` above,
|
||||
// this is guaranteed to fit in the vector's capacity.
|
||||
unsafe { self.bytes.set_len(old_len + size) };
|
||||
} else {
|
||||
// Instead of writing zero-sized types into the buffer, we'll just use a dangling pointer.
|
||||
// We must forget the command so it doesn't get double-dropped when the queue gets applied.
|
||||
std::mem::forget(command);
|
||||
// Write the metadata into the buffer, followed by the command.
|
||||
// We are using a packed struct to write them both as one operation.
|
||||
// SAFETY: `ptr` must be non-null, since it is within a non-null buffer.
|
||||
// The call to `reserve()` ensures that the buffer has enough space to fit a value of type `C`,
|
||||
// and it is valid to write any bit pattern since the underlying buffer is of type `MaybeUninit<u8>`.
|
||||
unsafe {
|
||||
ptr.cast::<Packed<C>>()
|
||||
.write_unaligned(Packed { meta, command });
|
||||
}
|
||||
|
||||
// Extend the length of the buffer to include the data we just wrote.
|
||||
// SAFETY: The new length is guaranteed to fit in the vector's capacity,
|
||||
// due to the call to `.reserve()` above.
|
||||
unsafe {
|
||||
self.bytes
|
||||
.set_len(old_len + std::mem::size_of::<Packed<C>>());
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,23 +97,43 @@ impl CommandQueue {
|
||||
// flush the previously queued entities
|
||||
world.flush();
|
||||
|
||||
// Pointer that will iterate over the entries of the buffer.
|
||||
let mut cursor = self.bytes.as_mut_ptr();
|
||||
|
||||
// The address of the end of the buffer.
|
||||
let end_addr = cursor as usize + self.bytes.len();
|
||||
|
||||
// Reset the buffer, so it can be reused after this function ends.
|
||||
// In the loop below, ownership of each command will be transferred into user code.
|
||||
// SAFETY: `set_len(0)` is always valid.
|
||||
unsafe { self.bytes.set_len(0) };
|
||||
|
||||
for meta in self.metas.drain(..) {
|
||||
// SAFETY: `CommandQueue` guarantees that each metadata must have a corresponding value stored in `self.bytes`,
|
||||
// so this addition will not overflow its original allocation.
|
||||
let cmd = unsafe { self.bytes.as_mut_ptr().add(meta.offset) };
|
||||
while (cursor as usize) < end_addr {
|
||||
// SAFETY: The cursor is either at the start of the buffer, or just after the previous command.
|
||||
// Since we know that the cursor is in bounds, it must point to the start of a new command.
|
||||
let meta = unsafe { cursor.cast::<CommandMeta>().read_unaligned() };
|
||||
// Advance to the bytes just after `meta`, which represent a type-erased command.
|
||||
// SAFETY: For most types of `Command`, the pointer immediately following the metadata
|
||||
// is guaranteed to be in bounds. If the command is a zero-sized type (ZST), then the cursor
|
||||
// might be 1 byte past the end of the buffer, which is safe.
|
||||
cursor = unsafe { cursor.add(std::mem::size_of::<CommandMeta>()) };
|
||||
// Construct an owned pointer to the command.
|
||||
// SAFETY: It is safe to transfer ownership out of `self.bytes`, since the call to `set_len(0)` above
|
||||
// guarantees that nothing stored in the buffer will get observed after this function ends.
|
||||
// `cmd` points to a valid address of a stored command, so it must be non-null.
|
||||
let cmd = unsafe { OwningPtr::new(NonNull::new_unchecked(cmd.cast())) };
|
||||
// SAFETY: The underlying type of `cmd` matches the type expected by `meta.apply_command`.
|
||||
unsafe {
|
||||
(meta.apply_command)(cmd, world);
|
||||
}
|
||||
let cmd = unsafe {
|
||||
OwningPtr::<Unaligned>::new(std::ptr::NonNull::new_unchecked(cursor.cast()))
|
||||
};
|
||||
// SAFETY: The data underneath the cursor must correspond to the type erased in metadata,
|
||||
// since they were stored next to each other by `.push()`.
|
||||
// For ZSTs, the type doesn't matter as long as the pointer is non-null.
|
||||
let size = unsafe { (meta.apply_command_and_get_size)(cmd, world) };
|
||||
// Advance the cursor past the command. For ZSTs, the cursor will not move.
|
||||
// At this point, it will either point to the next `CommandMeta`,
|
||||
// or the cursor will be out of bounds and the loop will end.
|
||||
// SAFETY: The address just past the command is either within the buffer,
|
||||
// or 1 byte past the end, so this addition will not overflow the pointer's allocation.
|
||||
cursor = unsafe { cursor.add(size) };
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -217,7 +242,6 @@ mod test {
|
||||
// even though the first command panicking.
|
||||
// the `bytes`/`metas` vectors were cleared.
|
||||
assert_eq!(queue.bytes.len(), 0);
|
||||
assert_eq!(queue.metas.len(), 0);
|
||||
|
||||
// Even though the first command panicked, it's still ok to push
|
||||
// more commands.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user