diff --git a/crates/bevy_pbr/src/lightmap/mod.rs b/crates/bevy_pbr/src/lightmap/mod.rs index fbb5ea2731..913f86a812 100644 --- a/crates/bevy_pbr/src/lightmap/mod.rs +++ b/crates/bevy_pbr/src/lightmap/mod.rs @@ -40,7 +40,7 @@ use bevy_ecs::{ }; use bevy_math::{uvec2, vec4, Rect, UVec2}; use bevy_reflect::{std_traits::ReflectDefault, Reflect}; -use bevy_render::mesh::GpuMesh; +use bevy_render::mesh::RenderMesh; use bevy_render::texture::GpuImage; use bevy_render::{ mesh::Mesh, render_asset::RenderAssets, render_resource::Shader, texture::Image, @@ -145,7 +145,7 @@ fn extract_lightmaps( lightmaps: Extract>, render_mesh_instances: Res, images: Res>, - meshes: Res>, + meshes: Res>, ) { // Clear out the old frame's data. render_lightmaps.render_lightmaps.clear(); diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index a1c07e7413..ba25fa30cd 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -25,7 +25,7 @@ use bevy_render::{ camera::TemporalJitter, extract_instances::{ExtractInstancesPlugin, ExtractedInstances}, extract_resource::ExtractResource, - mesh::{GpuMesh, MeshVertexBufferLayoutRef}, + mesh::{MeshVertexBufferLayoutRef, RenderMesh}, render_asset::{PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets}, render_phase::*, render_resource::*, @@ -537,7 +537,7 @@ pub fn queue_material_meshes( mut pipelines: ResMut>>, pipeline_cache: Res, msaa: Res, - render_meshes: Res>, + render_meshes: Res>, render_materials: Res>>, render_mesh_instances: Res, render_material_instances: Res>, diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 0d5757a687..80c6b31121 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -1,6 +1,6 @@ mod prepass_bindings; -use bevy_render::mesh::{GpuMesh, MeshVertexBufferLayoutRef}; +use bevy_render::mesh::{MeshVertexBufferLayoutRef, RenderMesh}; use bevy_render::render_resource::binding_types::uniform_buffer; use bevy_render::view::WithMesh; pub use prepass_bindings::*; @@ -680,7 +680,7 @@ pub fn queue_prepass_material_meshes( mut pipelines: ResMut>>, pipeline_cache: Res, msaa: Res, - render_meshes: Res>, + render_meshes: Res>, render_mesh_instances: Res, render_materials: Res>>, render_material_instances: Res>, diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index 66ddd2b5e0..436cbb51fa 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -7,7 +7,7 @@ use bevy_ecs::{entity::EntityHashMap, system::lifetimeless::Read}; use bevy_math::{Mat4, UVec4, Vec2, Vec3, Vec3Swizzles, Vec4, Vec4Swizzles}; use bevy_render::{ diagnostic::RecordDiagnostics, - mesh::GpuMesh, + mesh::RenderMesh, primitives::{CascadesFrusta, CubemapFrusta, Frustum, HalfSpace}, render_asset::RenderAssets, render_graph::{Node, NodeRunError, RenderGraphContext}, @@ -1162,7 +1162,7 @@ pub fn prepare_lights( pub fn queue_shadows( shadow_draw_functions: Res>, prepass_pipeline: Res>, - render_meshes: Res>, + render_meshes: Res>, render_mesh_instances: Res, render_materials: Res>>, render_material_instances: Res>, diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 8432cbd97d..6df3849cab 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -1,5 +1,6 @@ use std::mem; +use allocator::MeshAllocator; use bevy_asset::{load_internal_asset, AssetId}; use bevy_core_pipeline::{ core_3d::{AlphaMask3d, Opaque3d, Transmissive3d, Transparent3d, CORE_3D_DEPTH_FORMAT}, @@ -1209,7 +1210,8 @@ impl GetBatchData for MeshPipeline { type Param = ( SRes, SRes, - SRes>, + SRes>, + SRes, ); // The material bind group ID, the mesh ID, and the lightmap ID, // respectively. @@ -1218,7 +1220,7 @@ impl GetBatchData for MeshPipeline { type BufferData = MeshUniform; fn get_batch_data( - (mesh_instances, lightmaps, _): &SystemParamItem, + (mesh_instances, lightmaps, _, _): &SystemParamItem, entity: Entity, ) -> Option<(Self::BufferData, Option)> { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1249,7 +1251,7 @@ impl GetFullBatchData for MeshPipeline { type BufferInputData = MeshInputUniform; fn get_index_and_compare_data( - (mesh_instances, lightmaps, _): &SystemParamItem, + (mesh_instances, lightmaps, _, _): &SystemParamItem, entity: Entity, ) -> Option<(NonMaxU32, Option)> { // This should only be called during GPU building. @@ -1275,7 +1277,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_binned_batch_data( - (mesh_instances, lightmaps, _): &SystemParamItem, + (mesh_instances, lightmaps, _, _): &SystemParamItem, entity: Entity, ) -> Option { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1294,7 +1296,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_binned_index( - (mesh_instances, _, _): &SystemParamItem, + (mesh_instances, _, _, _): &SystemParamItem, entity: Entity, ) -> Option { // This should only be called during GPU building. @@ -1312,7 +1314,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_batch_indirect_parameters_index( - (mesh_instances, _, meshes): &SystemParamItem, + (mesh_instances, _, meshes, mesh_allocator): &SystemParamItem, indirect_parameters_buffer: &mut IndirectParametersBuffer, entity: Entity, instance_index: u32, @@ -1320,6 +1322,7 @@ impl GetFullBatchData for MeshPipeline { get_batch_indirect_parameters_index( mesh_instances, meshes, + mesh_allocator, indirect_parameters_buffer, entity, instance_index, @@ -1332,7 +1335,8 @@ impl GetFullBatchData for MeshPipeline { /// parameters. fn get_batch_indirect_parameters_index( mesh_instances: &RenderMeshInstances, - meshes: &RenderAssets, + meshes: &RenderAssets, + mesh_allocator: &MeshAllocator, indirect_parameters_buffer: &mut IndirectParametersBuffer, entity: Entity, instance_index: u32, @@ -1348,24 +1352,29 @@ fn get_batch_indirect_parameters_index( let mesh_instance = mesh_instances.get(&entity)?; let mesh = meshes.get(mesh_instance.mesh_asset_id)?; + let vertex_buffer_slice = mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id)?; // Note that `IndirectParameters` covers both of these structures, even // though they actually have distinct layouts. See the comment above that // type for more information. let indirect_parameters = match mesh.buffer_info { - GpuBufferInfo::Indexed { + RenderMeshBufferInfo::Indexed { count: index_count, .. - } => IndirectParameters { - vertex_or_index_count: index_count, - instance_count: 0, - first_vertex: 0, - base_vertex_or_first_instance: 0, - first_instance: instance_index, - }, - GpuBufferInfo::NonIndexed => IndirectParameters { + } => { + let index_buffer_slice = + mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id)?; + IndirectParameters { + vertex_or_index_count: index_count, + instance_count: 0, + first_vertex_or_first_index: index_buffer_slice.range.start, + base_vertex_or_first_instance: vertex_buffer_slice.range.start, + first_instance: instance_index, + } + } + RenderMeshBufferInfo::NonIndexed => IndirectParameters { vertex_or_index_count: mesh.vertex_count, instance_count: 0, - first_vertex: 0, + first_vertex_or_first_index: vertex_buffer_slice.range.start, base_vertex_or_first_instance: instance_index, first_instance: instance_index, }, @@ -1945,7 +1954,7 @@ impl MeshBindGroups { self.morph_targets.clear(); self.lightmaps.clear(); } - /// Get the `BindGroup` for `GpuMesh` with given `handle_id` and lightmap + /// Get the `BindGroup` for `RenderMesh` with given `handle_id` and lightmap /// key `lightmap`. pub fn get( &self, @@ -1982,7 +1991,7 @@ impl MeshBindGroupPair { #[allow(clippy::too_many_arguments)] pub fn prepare_mesh_bind_group( - meshes: Res>, + meshes: Res>, images: Res>, mut groups: ResMut, mesh_pipeline: Res, @@ -2238,10 +2247,11 @@ impl RenderCommand

for SetMeshBindGroup { pub struct DrawMesh; impl RenderCommand

for DrawMesh { type Param = ( - SRes>, + SRes>, SRes, SRes, SRes, + SRes, Option>, ); type ViewQuery = Has; @@ -2251,7 +2261,14 @@ impl RenderCommand

for DrawMesh { item: &P, has_preprocess_bind_group: ROQueryItem, _item_query: Option<()>, - (meshes, mesh_instances, indirect_parameters_buffer, pipeline_cache, preprocess_pipelines): SystemParamItem<'w, '_, Self::Param>, + ( + meshes, + mesh_instances, + indirect_parameters_buffer, + pipeline_cache, + mesh_allocator, + preprocess_pipelines, + ): SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { // If we're using GPU preprocessing, then we're dependent on that @@ -2268,6 +2285,7 @@ impl RenderCommand

for DrawMesh { let meshes = meshes.into_inner(); let mesh_instances = mesh_instances.into_inner(); let indirect_parameters_buffer = indirect_parameters_buffer.into_inner(); + let mesh_allocator = mesh_allocator.into_inner(); let Some(mesh_asset_id) = mesh_instances.mesh_asset_id(item.entity()) else { return RenderCommandResult::Failure; @@ -2275,6 +2293,9 @@ impl RenderCommand

for DrawMesh { let Some(gpu_mesh) = meshes.get(mesh_asset_id) else { return RenderCommandResult::Failure; }; + let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&mesh_asset_id) else { + return RenderCommandResult::Failure; + }; // Calculate the indirect offset, and look up the buffer. let indirect_parameters = match item.extra_index().as_indirect_parameters_index() { @@ -2291,21 +2312,31 @@ impl RenderCommand

for DrawMesh { }, }; - pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..)); + pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..)); let batch_range = item.batch_range(); // Draw either directly or indirectly, as appropriate. match &gpu_mesh.buffer_info { - GpuBufferInfo::Indexed { - buffer, + RenderMeshBufferInfo::Indexed { index_format, count, } => { - pass.set_index_buffer(buffer.slice(..), 0, *index_format); + let Some(index_buffer_slice) = mesh_allocator.mesh_index_slice(&mesh_asset_id) + else { + return RenderCommandResult::Failure; + }; + + pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, *index_format); + match indirect_parameters { None => { - pass.draw_indexed(0..*count, 0, batch_range.clone()); + pass.draw_indexed( + index_buffer_slice.range.start + ..(index_buffer_slice.range.start + *count), + vertex_buffer_slice.range.start as i32, + batch_range.clone(), + ); } Some((indirect_parameters_offset, indirect_parameters_buffer)) => pass .draw_indexed_indirect( @@ -2314,7 +2345,7 @@ impl RenderCommand

for DrawMesh { ), } } - GpuBufferInfo::NonIndexed => match indirect_parameters { + RenderMeshBufferInfo::NonIndexed => match indirect_parameters { None => { pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); } diff --git a/crates/bevy_pbr/src/volumetric_fog/render.rs b/crates/bevy_pbr/src/volumetric_fog/render.rs index bb3a56dc03..1c2cfc788c 100644 --- a/crates/bevy_pbr/src/volumetric_fog/render.rs +++ b/crates/bevy_pbr/src/volumetric_fog/render.rs @@ -18,7 +18,9 @@ use bevy_ecs::{ }; use bevy_math::{vec4, Mat3A, Mat4, Vec3, Vec3A, Vec4, Vec4Swizzles as _}; use bevy_render::{ - mesh::{GpuBufferInfo, GpuMesh, Mesh, MeshVertexBufferLayoutRef}, + mesh::{ + allocator::MeshAllocator, Mesh, MeshVertexBufferLayoutRef, RenderMesh, RenderMeshBufferInfo, + }, render_asset::RenderAssets, render_graph::{NodeRunError, RenderGraphContext, ViewNode}, render_resource::{ @@ -329,6 +331,7 @@ impl ViewNode for VolumetricFogNode { let volumetric_lighting_uniform_buffers = world.resource::(); let image_assets = world.resource::>(); let msaa = world.resource::(); + let mesh_allocator = world.resource::(); // Fetch the uniform buffer and binding. let ( @@ -344,7 +347,7 @@ impl ViewNode for VolumetricFogNode { return Ok(()); }; - let gpu_meshes = world.resource::>(); + let render_meshes = world.resource::>(); for view_fog_volume in view_fog_volumes.iter() { // If the camera is outside the fog volume, pick the cube mesh; @@ -356,6 +359,11 @@ impl ViewNode for VolumetricFogNode { PLANE_MESH.clone() }; + let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&mesh_handle.id()) + else { + continue; + }; + let density_image = view_fog_volume .density_texture .and_then(|density_texture| image_assets.get(density_texture)); @@ -370,7 +378,7 @@ impl ViewNode for VolumetricFogNode { // This should always succeed, but if the asset was unloaded don't // panic. - let Some(gpu_mesh) = gpu_meshes.get(&mesh_handle) else { + let Some(render_mesh) = render_meshes.get(&mesh_handle) else { return Ok(()); }; @@ -426,7 +434,7 @@ impl ViewNode for VolumetricFogNode { .command_encoder() .begin_render_pass(&render_pass_descriptor); - render_pass.set_vertex_buffer(0, *gpu_mesh.vertex_buffer.slice(..)); + render_pass.set_vertex_buffer(0, *vertex_buffer_slice.buffer.slice(..)); render_pass.set_pipeline(pipeline); render_pass.set_bind_group( 0, @@ -446,17 +454,23 @@ impl ViewNode for VolumetricFogNode { ); // Draw elements or arrays, as appropriate. - match &gpu_mesh.buffer_info { - GpuBufferInfo::Indexed { - buffer, + match &render_mesh.buffer_info { + RenderMeshBufferInfo::Indexed { index_format, count, } => { - render_pass.set_index_buffer(*buffer.slice(..), *index_format); + let Some(index_buffer_slice) = + mesh_allocator.mesh_index_slice(&mesh_handle.id()) + else { + continue; + }; + + render_pass + .set_index_buffer(*index_buffer_slice.buffer.slice(..), *index_format); render_pass.draw_indexed(0..*count, 0, 0..1); } - GpuBufferInfo::NonIndexed => { - render_pass.draw(0..gpu_mesh.vertex_count, 0..1); + RenderMeshBufferInfo::NonIndexed => { + render_pass.draw(0..render_mesh.vertex_count, 0..1); } } } @@ -584,7 +598,7 @@ pub fn prepare_volumetric_fog_pipelines( With, >, msaa: Res, - meshes: Res>, + meshes: Res>, ) { let plane_mesh = meshes.get(&PLANE_MESH).expect("Plane mesh not found!"); diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml index 4da6baaf9d..a4554f0bc2 100644 --- a/crates/bevy_render/Cargo.toml +++ b/crates/bevy_render/Cargo.toml @@ -101,6 +101,7 @@ profiling = { version = "1", features = [ async-channel = "2.2.0" nonmax = "0.5" smallvec = { version = "1.11", features = ["const_new"] } +offset-allocator = "0.2" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] # Omit the `glsl` feature in non-WebAssembly by default. diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs index 60794636b4..35ce9464df 100644 --- a/crates/bevy_render/src/batching/gpu_preprocessing.rs +++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs @@ -185,8 +185,9 @@ pub struct IndirectParameters { /// This field is in the same place in both structures. pub instance_count: u32, - /// The index of the first vertex we're to draw. - pub first_vertex: u32, + /// For `ArrayIndirectParameters`, `first_vertex`; for + /// `ElementIndirectParameters`, `first_index`. + pub first_vertex_or_first_index: u32, /// For `ArrayIndirectParameters`, `first_instance`; for /// `ElementIndirectParameters`, `base_vertex`. diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs index 49f25530b1..d03a7a6e01 100644 --- a/crates/bevy_render/src/lib.rs +++ b/crates/bevy_render/src/lib.rs @@ -64,7 +64,7 @@ use globals::GlobalsPlugin; use render_asset::RenderAssetBytesPerFrame; use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue}; -use crate::mesh::GpuMesh; +use crate::mesh::RenderMesh; use crate::renderer::WgpuWrapper; use crate::{ camera::CameraPlugin, @@ -115,7 +115,7 @@ pub enum RenderSet { /// Queue drawable entities as phase items in render phases ready for /// sorting (if necessary) Queue, - /// A sub-set within [`Queue`](RenderSet::Queue) where mesh entity queue systems are executed. Ensures `prepare_assets::` is completed. + /// A sub-set within [`Queue`](RenderSet::Queue) where mesh entity queue systems are executed. Ensures `prepare_assets::` is completed. QueueMeshes, // TODO: This could probably be moved in favor of a system ordering // abstraction in `Render` or `Queue` @@ -165,7 +165,11 @@ impl Render { ); schedule.configure_sets((ExtractCommands, PrepareAssets, Prepare).chain()); - schedule.configure_sets(QueueMeshes.in_set(Queue).after(prepare_assets::)); + schedule.configure_sets( + QueueMeshes + .in_set(Queue) + .after(prepare_assets::), + ); schedule.configure_sets( (PrepareResources, PrepareResourcesFlush, PrepareBindGroups) .chain() diff --git a/crates/bevy_render/src/mesh/allocator.rs b/crates/bevy_render/src/mesh/allocator.rs new file mode 100644 index 0000000000..218e19c475 --- /dev/null +++ b/crates/bevy_render/src/mesh/allocator.rs @@ -0,0 +1,1025 @@ +//! Manages mesh vertex and index buffers. + +use std::{ + borrow::Cow, + fmt::{self, Display, Formatter}, + iter, + ops::Range, + vec::Vec, +}; + +use bevy_app::{App, Plugin}; +use bevy_asset::AssetId; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + schedule::IntoSystemConfigs as _, + system::{Res, ResMut, Resource}, + world::{FromWorld, World}, +}; +use bevy_utils::{ + hashbrown::{HashMap, HashSet}, + tracing::error, +}; +use offset_allocator::{Allocation, Allocator}; +use wgpu::{ + util::BufferInitDescriptor, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, + DownlevelFlags, COPY_BUFFER_ALIGNMENT, +}; + +use crate::{ + mesh::{Indices, Mesh, MeshVertexBufferLayouts, RenderMesh}, + render_asset::{prepare_assets, ExtractedAssets}, + render_resource::Buffer, + renderer::{RenderAdapter, RenderDevice, RenderQueue}, + Render, RenderApp, RenderSet, +}; + +/// A plugin that manages GPU memory for mesh data. +pub struct MeshAllocatorPlugin; + +/// Manages the assignment of mesh data to GPU buffers. +/// +/// The Bevy renderer tries to pack vertex and index data for multiple meshes +/// together so that multiple meshes can be drawn back-to-back without any +/// rebinding. This resource manages these buffers. +/// +/// Within each slab, or hardware buffer, the underlying allocation algorithm is +/// [`offset-allocator`], a Rust port of Sebastian Aaltonen's hard-real-time C++ +/// `OffsetAllocator`. Slabs start small and then grow as their contents fill +/// up, up to a maximum size limit. To reduce fragmentation, vertex and index +/// buffers that are too large bypass this system and receive their own buffers. +/// +/// The [`MeshAllocatorSettings`] allows you to tune the behavior of the +/// allocator for better performance with your application. Most applications +/// won't need to change the settings from their default values. +#[derive(Resource)] +pub struct MeshAllocator { + /// Holds all buffers and allocators. + slabs: HashMap, + + /// Maps a layout to the slabs that hold elements of that layout. + /// + /// This is used when allocating, so that we can find the appropriate slab + /// to place an object in. + slab_layouts: HashMap>, + + /// Maps mesh asset IDs to the ID of the slabs that hold their vertex data. + mesh_id_to_vertex_slab: HashMap, SlabId>, + + /// Maps mesh asset IDs to the ID of the slabs that hold their index data. + mesh_id_to_index_slab: HashMap, SlabId>, + + /// The next slab ID to assign. + next_slab_id: SlabId, + + /// Whether we can pack multiple vertex arrays into a single slab on this + /// platform. + /// + /// This corresponds to [`DownlevelFlags::BASE_VERTEX`], which is unset on + /// WebGL 2. On this platform, we must give each vertex array its own + /// buffer, because we can't adjust the first vertex when we perform a draw. + general_vertex_slabs_supported: bool, +} + +/// Tunable parameters that customize the behavior of the allocator. +/// +/// Generally, these parameters adjust the tradeoff between memory fragmentation +/// and performance. You can adjust them as desired for your application. Most +/// applications can stick with the default values. +#[derive(Resource)] +pub struct MeshAllocatorSettings { + /// The minimum size of a slab (hardware buffer), in bytes. + /// + /// The default value is 1 MiB. + pub min_slab_size: u64, + + /// The maximum size of a slab (hardware buffer), in bytes. + /// + /// When a slab reaches this limit, a new slab is created. + /// + /// The default value is 512 MiB. + pub max_slab_size: u64, + + /// The maximum size of vertex or index data that can be placed in a general + /// slab, in bytes. + /// + /// If a mesh has vertex or index data that exceeds this size limit, that + /// data is placed in its own slab. This reduces fragmentation, but incurs + /// more CPU-side binding overhead when drawing the mesh. + /// + /// The default value is 256 MiB. + pub large_threshold: u64, + + /// The factor by which we scale a slab when growing it. + /// + /// This value must be greater than 1. Higher values result in more + /// fragmentation but fewer expensive copy operations when growing the + /// buffer. + /// + /// The default value is 1.5. + pub growth_factor: f64, +} + +impl Default for MeshAllocatorSettings { + fn default() -> Self { + Self { + // 1 MiB + min_slab_size: 1024 * 1024, + // 512 MiB + max_slab_size: 1024 * 1024 * 512, + // 256 MiB + large_threshold: 1024 * 1024 * 256, + // 1.5× growth + growth_factor: 1.5, + } + } +} + +/// The hardware buffer that mesh data lives in, as well as the range within +/// that buffer. +pub struct MeshBufferSlice<'a> { + /// The buffer that the mesh data resides in. + pub buffer: &'a Buffer, + + /// The range of elements within this buffer that the mesh data resides in, + /// measured in elements. + /// + /// This is not a byte range; it's an element range. For vertex data, this + /// is measured in increments of a single vertex. (Thus, if a vertex is 32 + /// bytes long, then this range is in units of 32 bytes each.) For index + /// data, this is measured in increments of a single index value (2 or 4 + /// bytes). Draw commands generally take their ranges in elements, not + /// bytes, so this is the most convenient unit in this case. + pub range: Range, +} + +/// The index of a single slab. +#[derive(Clone, Copy, Default, PartialEq, Eq, Hash, Debug)] +#[repr(transparent)] +struct SlabId(u32); + +/// Data for a single slab. +#[allow(clippy::large_enum_variant)] +enum Slab { + /// A slab that can contain multiple objects. + General(GeneralSlab), + /// A slab that contains a single object. + LargeObject(LargeObjectSlab), +} + +/// A resizable slab that can contain multiple objects. +/// +/// This is the normal type of slab used for objects that are below the +/// [`MeshAllocatorSettings::large_threshold`]. Slabs are divided into *slots*, +/// which are described in detail in the [`ElementLayout`] documentation. +struct GeneralSlab { + /// The [`Allocator`] that manages the objects in this slab. + allocator: Allocator, + + /// The GPU buffer that backs this slab. + /// + /// This may be `None` if the buffer hasn't been created yet. We delay + /// creation of buffers until allocating all the meshes for a single frame, + /// so that we don't needlessly create and resize buffers when many meshes + /// load all at once. + buffer: Option, + + /// Allocations that are on the GPU. + /// + /// The range is in slots. + resident_allocations: HashMap, SlabAllocation>, + + /// Allocations that are waiting to be uploaded to the GPU. + /// + /// The range is in slots. + pending_allocations: HashMap, SlabAllocation>, + + /// The layout of a single element (vertex or index). + element_layout: ElementLayout, + + /// The size of this slab in slots. + slot_capacity: u32, +} + +/// A slab that contains a single object. +/// +/// Typically, this is for objects that exceed the +/// [`MeshAllocatorSettings::large_threshold`]. This is also for objects that +/// would ordinarily receive their own slab but can't because of platform +/// limitations, most notably vertex arrays on WebGL 2. +struct LargeObjectSlab { + /// The GPU buffer that backs this slab. + /// + /// This may be `None` if the buffer hasn't been created yet. + buffer: Option, + + /// The layout of a single element (vertex or index). + element_layout: ElementLayout, +} + +/// The type of element that a slab can store. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +enum ElementClass { + /// Data for a vertex. + Vertex, + /// A vertex index. + Index, +} + +/// Information about the size of individual elements (vertices or indices) +/// within a slab. +/// +/// Slab objects are allocated in units of *slots*. Usually, each element takes +/// up one slot, and so elements and slots are equivalent. Occasionally, +/// however, a slot may consist of 2 or even 4 elements. This occurs when the +/// size of an element isn't divisible by [`COPY_BUFFER_ALIGNMENT`]. When we +/// resize buffers, we perform GPU-to-GPU copies to shuffle the existing +/// elements into their new positions, and such copies must be on +/// [`COPY_BUFFER_ALIGNMENT`] boundaries. Slots solve this problem by +/// guaranteeing that the size of an allocation quantum is divisible by both the +/// size of an element and [`COPY_BUFFER_ALIGNMENT`], so we can relocate it +/// freely. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +struct ElementLayout { + /// Either a vertex or an index. + class: ElementClass, + + /// The size in bytes of a single element (vertex or index). + size: u64, + + /// The number of elements that make up a single slot. + /// + /// Usually, this is 1, but it can be different if [`ElementLayout::size`] + /// isn't divisible by 4. See the comment in [`ElementLayout`] for more + /// details. + elements_per_slot: u32, +} + +/// The location of an allocation and the slab it's contained in. +struct MeshAllocation { + /// The ID of the slab. + slab_id: SlabId, + /// Holds the actual allocation. + slab_allocation: SlabAllocation, +} + +/// An allocation within a slab. +#[derive(Clone)] +struct SlabAllocation { + /// The actual [`Allocator`] handle, needed to free the allocation. + allocation: Allocation, + /// The number of slots that this allocation takes up. + slot_count: u32, +} + +/// Holds information about all slabs scheduled to be allocated or reallocated. +#[derive(Default, Deref, DerefMut)] +struct SlabsToReallocate(HashMap); + +/// Holds information about a slab that's scheduled to be allocated or +/// reallocated. +#[derive(Default)] +struct SlabToReallocate { + /// Maps all allocations that need to be relocated to their positions within + /// the *new* slab. + allocations_to_copy: HashMap, SlabAllocation>, +} + +impl Display for SlabId { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl Plugin for MeshAllocatorPlugin { + fn build(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .init_resource::() + .add_systems( + Render, + allocate_and_free_meshes + .in_set(RenderSet::PrepareAssets) + .before(prepare_assets::), + ); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + // The `RenderAdapter` isn't available until now, so we can't do this in + // [`Plugin::build`]. + render_app.init_resource::(); + } +} + +impl FromWorld for MeshAllocator { + fn from_world(world: &mut World) -> Self { + // Note whether we're on WebGL 2. In this case, we must give every + // vertex array its own slab. + let render_adapter = world.resource::(); + let general_vertex_slabs_supported = render_adapter + .get_downlevel_capabilities() + .flags + .contains(DownlevelFlags::BASE_VERTEX); + + Self { + slabs: HashMap::new(), + slab_layouts: HashMap::new(), + mesh_id_to_vertex_slab: HashMap::new(), + mesh_id_to_index_slab: HashMap::new(), + next_slab_id: SlabId(0), + general_vertex_slabs_supported, + } + } +} + +/// A system that processes newly-extracted or newly-removed meshes and writes +/// their data into buffers or frees their data as appropriate. +pub fn allocate_and_free_meshes( + mut mesh_allocator: ResMut, + mesh_allocator_settings: Res, + extracted_meshes: Res>, + mut mesh_vertex_buffer_layouts: ResMut, + render_device: Res, + render_queue: Res, +) { + // Process newly-added meshes. + mesh_allocator.allocate_meshes( + &mesh_allocator_settings, + &extracted_meshes, + &mut mesh_vertex_buffer_layouts, + &render_device, + &render_queue, + ); + + // Process removed meshes. + mesh_allocator.free_meshes(&extracted_meshes); +} + +impl MeshAllocator { + /// Returns the buffer and range within that buffer of the vertex data for + /// the mesh with the given ID. + /// + /// If the mesh wasn't allocated, returns None. + pub fn mesh_vertex_slice(&self, mesh_id: &AssetId) -> Option { + self.mesh_slice_in_slab(mesh_id, *self.mesh_id_to_vertex_slab.get(mesh_id)?) + } + + /// Returns the buffer and range within that buffer of the index data for + /// the mesh with the given ID. + /// + /// If the mesh has no index data or wasn't allocated, returns None. + pub fn mesh_index_slice(&self, mesh_id: &AssetId) -> Option { + self.mesh_slice_in_slab(mesh_id, *self.mesh_id_to_index_slab.get(mesh_id)?) + } + + /// Given a slab and a mesh with data located with it, returns the buffer + /// and range of that mesh data within the slab. + fn mesh_slice_in_slab( + &self, + mesh_id: &AssetId, + slab_id: SlabId, + ) -> Option { + match self.slabs.get(&slab_id)? { + Slab::General(ref general_slab) => { + let slab_allocation = general_slab.resident_allocations.get(mesh_id)?; + Some(MeshBufferSlice { + buffer: general_slab.buffer.as_ref()?, + range: (slab_allocation.allocation.offset + * general_slab.element_layout.elements_per_slot) + ..((slab_allocation.allocation.offset + slab_allocation.slot_count) + * general_slab.element_layout.elements_per_slot), + }) + } + + Slab::LargeObject(ref large_object_slab) => { + let buffer = large_object_slab.buffer.as_ref()?; + Some(MeshBufferSlice { + buffer, + range: 0..((buffer.size() / large_object_slab.element_layout.size) as u32), + }) + } + } + } + + /// Processes newly-loaded meshes, allocating room in the slabs for their + /// mesh data and performing upload operations as appropriate. + fn allocate_meshes( + &mut self, + mesh_allocator_settings: &MeshAllocatorSettings, + extracted_meshes: &ExtractedAssets, + mesh_vertex_buffer_layouts: &mut MeshVertexBufferLayouts, + render_device: &RenderDevice, + render_queue: &RenderQueue, + ) { + let mut slabs_to_grow = SlabsToReallocate::default(); + + // Allocate. + for (mesh_id, mesh) in &extracted_meshes.extracted { + // Allocate vertex data. Note that we can only pack mesh vertex data + // together if the platform supports it. + let vertex_element_layout = ElementLayout::vertex(mesh_vertex_buffer_layouts, mesh); + if self.general_vertex_slabs_supported { + self.allocate( + mesh_id, + mesh.get_vertex_buffer_data().len() as u64, + vertex_element_layout, + &mut slabs_to_grow, + mesh_allocator_settings, + ); + } else { + self.allocate_large(mesh_id, vertex_element_layout); + } + + // Allocate index data. + if let (Some(index_buffer_data), Some(index_element_layout)) = + (mesh.get_index_buffer_bytes(), ElementLayout::index(mesh)) + { + self.allocate( + mesh_id, + index_buffer_data.len() as u64, + index_element_layout, + &mut slabs_to_grow, + mesh_allocator_settings, + ); + } + } + + // Perform growth. + for (slab_id, slab_to_grow) in slabs_to_grow.0 { + self.reallocate_slab(render_device, render_queue, slab_id, slab_to_grow); + } + + // Copy new mesh data in. + for (mesh_id, mesh) in &extracted_meshes.extracted { + self.copy_mesh_vertex_data(mesh_id, mesh, render_device, render_queue); + self.copy_mesh_index_data(mesh_id, mesh, render_device, render_queue); + } + } + + /// Copies vertex array data from a mesh into the appropriate spot in the + /// slab. + fn copy_mesh_vertex_data( + &mut self, + mesh_id: &AssetId, + mesh: &Mesh, + render_device: &RenderDevice, + render_queue: &RenderQueue, + ) { + let Some(&slab_id) = self.mesh_id_to_vertex_slab.get(mesh_id) else { + return; + }; + let vertex_data = mesh.get_vertex_buffer_data(); + + // Call the generic function. + self.copy_element_data( + mesh_id, + mesh, + &vertex_data, + BufferUsages::VERTEX, + slab_id, + render_device, + render_queue, + ); + } + + /// Copies index array data from a mesh into the appropriate spot in the + /// slab. + fn copy_mesh_index_data( + &mut self, + mesh_id: &AssetId, + mesh: &Mesh, + render_device: &RenderDevice, + render_queue: &RenderQueue, + ) { + let Some(&slab_id) = self.mesh_id_to_index_slab.get(mesh_id) else { + return; + }; + let Some(index_data) = mesh.get_index_buffer_bytes() else { + return; + }; + + // Call the generic function. + self.copy_element_data( + mesh_id, + mesh, + index_data, + BufferUsages::INDEX, + slab_id, + render_device, + render_queue, + ); + } + + /// A generic function that copies either vertex or index data into a slab. + #[allow(clippy::too_many_arguments)] + fn copy_element_data( + &mut self, + mesh_id: &AssetId, + mesh: &Mesh, + data: &[u8], + buffer_usages: BufferUsages, + slab_id: SlabId, + render_device: &RenderDevice, + render_queue: &RenderQueue, + ) { + let Some(slab) = self.slabs.get_mut(&slab_id) else { + return; + }; + + match *slab { + Slab::General(ref mut general_slab) => { + let (Some(ref buffer), Some(allocated_range)) = ( + &general_slab.buffer, + general_slab.pending_allocations.remove(mesh_id), + ) else { + return; + }; + + let slot_size = general_slab.element_layout.slot_size(); + + // Write the data in. + render_queue.write_buffer( + buffer, + allocated_range.allocation.offset as u64 * slot_size, + &pad_to_alignment(data, slot_size as usize), + ); + + // Mark the allocation as resident. + general_slab + .resident_allocations + .insert(*mesh_id, allocated_range); + } + + Slab::LargeObject(ref mut large_object_slab) => { + debug_assert!(large_object_slab.buffer.is_none()); + + // Create the buffer and its data in one go. + large_object_slab.buffer = Some(render_device.create_buffer_with_data( + &BufferInitDescriptor { + label: Some(&format!( + "large mesh slab {} ({}buffer)", + slab_id, + buffer_usages_to_str(buffer_usages) + )), + contents: &mesh.get_vertex_buffer_data(), + usage: buffer_usages | BufferUsages::COPY_DST, + }, + )); + } + } + } + + fn free_meshes(&mut self, extracted_meshes: &ExtractedAssets) { + let mut empty_slabs = HashSet::new(); + for mesh_id in &extracted_meshes.removed { + if let Some(slab_id) = self.mesh_id_to_vertex_slab.remove(mesh_id) { + self.free_allocation_in_slab(mesh_id, slab_id, &mut empty_slabs); + } + if let Some(slab_id) = self.mesh_id_to_index_slab.remove(mesh_id) { + self.free_allocation_in_slab(mesh_id, slab_id, &mut empty_slabs); + } + } + + for empty_slab in empty_slabs { + self.slabs.remove(&empty_slab); + } + } + + /// Given a slab and the ID of a mesh containing data in it, marks the + /// allocation as free. + /// + /// If this results in the slab becoming empty, this function adds the slab + /// to the `empty_slabs` set. + fn free_allocation_in_slab( + &mut self, + mesh_id: &AssetId, + slab_id: SlabId, + empty_slabs: &mut HashSet, + ) { + let Some(slab) = self.slabs.get_mut(&slab_id) else { + return; + }; + + match *slab { + Slab::General(ref mut general_slab) => { + let Some(slab_allocation) = general_slab + .resident_allocations + .remove(mesh_id) + .or_else(|| general_slab.pending_allocations.remove(mesh_id)) + else { + return; + }; + + general_slab.allocator.free(slab_allocation.allocation); + + if general_slab.is_empty() { + empty_slabs.insert(slab_id); + } + } + Slab::LargeObject(_) => { + empty_slabs.insert(slab_id); + } + } + } + + /// Allocates space for mesh data with the given byte size and layout in the + /// appropriate slab, creating that slab if necessary. + fn allocate( + &mut self, + mesh_id: &AssetId, + data_byte_len: u64, + layout: ElementLayout, + slabs_to_grow: &mut SlabsToReallocate, + settings: &MeshAllocatorSettings, + ) { + let data_element_count = data_byte_len.div_ceil(layout.size) as u32; + let data_slot_count = data_element_count.div_ceil(layout.elements_per_slot); + + // If the mesh data is too large for a slab, give it a slab of its own. + if data_slot_count as u64 * layout.slot_size() + >= settings.large_threshold.min(settings.max_slab_size) + { + self.allocate_large(mesh_id, layout); + } else { + self.allocate_general(mesh_id, data_slot_count, layout, slabs_to_grow, settings); + } + } + + /// Allocates space for mesh data with the given slot size and layout in the + /// appropriate general slab. + fn allocate_general( + &mut self, + mesh_id: &AssetId, + data_slot_count: u32, + layout: ElementLayout, + slabs_to_grow: &mut SlabsToReallocate, + settings: &MeshAllocatorSettings, + ) { + let candidate_slabs = self.slab_layouts.entry(layout).or_default(); + + // Loop through the slabs that accept elements of the appropriate type + // and try to allocate the mesh inside them. We go with the first one + // that succeeds. + let mut mesh_allocation = None; + 'slab: for &slab_id in &*candidate_slabs { + loop { + let Some(Slab::General(ref mut slab)) = self.slabs.get_mut(&slab_id) else { + unreachable!("Slab not found") + }; + + if let Some(allocation) = slab.allocator.allocate(data_slot_count) { + mesh_allocation = Some(MeshAllocation { + slab_id, + slab_allocation: SlabAllocation { + allocation, + slot_count: data_slot_count, + }, + }); + break 'slab; + } + + // Try to grow the slab. If this fails, the slab is full; go on + // to the next slab. + match slab.try_grow(settings) { + Ok(new_mesh_allocation_records) => { + slabs_to_grow.insert(slab_id, new_mesh_allocation_records); + } + Err(()) => continue 'slab, + } + } + } + + // If we still have no allocation, make a new slab. + if mesh_allocation.is_none() { + let new_slab_id = self.next_slab_id; + self.next_slab_id.0 += 1; + + let new_slab = GeneralSlab::new( + new_slab_id, + &mut mesh_allocation, + settings, + layout, + data_slot_count, + ); + + self.slabs.insert(new_slab_id, Slab::General(new_slab)); + candidate_slabs.push(new_slab_id); + slabs_to_grow.insert(new_slab_id, SlabToReallocate::default()); + } + + let mesh_allocation = mesh_allocation.expect("Should have been able to allocate"); + + // Mark the allocation as pending. Don't copy it in just yet; further + // meshes loaded this frame may result in its final allocation location + // changing. + if let Some(Slab::General(ref mut general_slab)) = + self.slabs.get_mut(&mesh_allocation.slab_id) + { + general_slab + .pending_allocations + .insert(*mesh_id, mesh_allocation.slab_allocation); + }; + + self.record_allocation(mesh_id, mesh_allocation.slab_id, layout.class); + } + + /// Allocates an object into its own dedicated slab. + fn allocate_large(&mut self, mesh_id: &AssetId, layout: ElementLayout) { + let new_slab_id = self.next_slab_id; + self.next_slab_id.0 += 1; + + self.record_allocation(mesh_id, new_slab_id, layout.class); + + self.slabs.insert( + new_slab_id, + Slab::LargeObject(LargeObjectSlab { + buffer: None, + element_layout: layout, + }), + ); + } + + /// Reallocates a slab that needs to be resized, or allocates a new slab. + /// + /// This performs the actual growth operation that [`GeneralSlab::try_grow`] + /// scheduled. We do the growth in two phases so that, if a slab grows + /// multiple times in the same frame, only one new buffer is reallocated, + /// rather than reallocating the buffer multiple times. + fn reallocate_slab( + &mut self, + render_device: &RenderDevice, + render_queue: &RenderQueue, + slab_id: SlabId, + slab_to_grow: SlabToReallocate, + ) { + let Some(Slab::General(slab)) = self.slabs.get_mut(&slab_id) else { + error!("Couldn't find slab {:?} to grow", slab_id); + return; + }; + + let old_buffer = slab.buffer.take(); + + let mut buffer_usages = BufferUsages::COPY_SRC | BufferUsages::COPY_DST; + match slab.element_layout.class { + ElementClass::Vertex => buffer_usages |= BufferUsages::VERTEX, + ElementClass::Index => buffer_usages |= BufferUsages::INDEX, + }; + + // Create the buffer. + let new_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some(&format!( + "general mesh slab {} ({}buffer)", + slab_id, + buffer_usages_to_str(buffer_usages) + )), + size: slab.slot_capacity as u64 * slab.element_layout.slot_size(), + usage: buffer_usages, + mapped_at_creation: false, + }); + + slab.buffer = Some(new_buffer.clone()); + + // In order to do buffer copies, we need a command encoder. + let mut encoder = render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("slab resize encoder"), + }); + + // If we have no objects to copy over, we're done. + let Some(old_buffer) = old_buffer else { + return; + }; + + for (mesh_id, src_slab_allocation) in &mut slab.resident_allocations { + let Some(dest_slab_allocation) = slab_to_grow.allocations_to_copy.get(mesh_id) else { + continue; + }; + + encoder.copy_buffer_to_buffer( + &old_buffer, + src_slab_allocation.allocation.offset as u64 * slab.element_layout.slot_size(), + &new_buffer, + dest_slab_allocation.allocation.offset as u64 * slab.element_layout.slot_size(), + dest_slab_allocation.slot_count as u64 * slab.element_layout.slot_size(), + ); + // Now that we've done the copy, we can update the allocation record. + *src_slab_allocation = dest_slab_allocation.clone(); + } + + let command_buffer = encoder.finish(); + render_queue.submit([command_buffer]); + } + + /// Records the location of the given newly-allocated mesh data in the + /// [`Self::mesh_id_to_vertex_slab`] or [`Self::mesh_id_to_index_slab`] + /// tables as appropriate. + fn record_allocation( + &mut self, + mesh_id: &AssetId, + slab_id: SlabId, + element_class: ElementClass, + ) { + match element_class { + ElementClass::Vertex => { + self.mesh_id_to_vertex_slab.insert(*mesh_id, slab_id); + } + ElementClass::Index => { + self.mesh_id_to_index_slab.insert(*mesh_id, slab_id); + } + } + } +} + +impl GeneralSlab { + /// Creates a new growable slab big enough to hold an single element of + /// `data_slot_count` size with the given `layout`. + fn new( + new_slab_id: SlabId, + mesh_allocation: &mut Option, + settings: &MeshAllocatorSettings, + layout: ElementLayout, + data_slot_count: u32, + ) -> GeneralSlab { + let slab_slot_capacity = (settings.min_slab_size.div_ceil(layout.slot_size()) as u32) + .max(offset_allocator::ext::min_allocator_size(data_slot_count)); + + let mut new_slab = GeneralSlab { + allocator: Allocator::new(slab_slot_capacity), + buffer: None, + resident_allocations: HashMap::new(), + pending_allocations: HashMap::new(), + element_layout: layout, + slot_capacity: slab_slot_capacity, + }; + + // This should never fail. + if let Some(allocation) = new_slab.allocator.allocate(data_slot_count) { + *mesh_allocation = Some(MeshAllocation { + slab_id: new_slab_id, + slab_allocation: SlabAllocation { + slot_count: data_slot_count, + allocation, + }, + }); + } + + new_slab + } + + /// Attempts to grow a slab that's just run out of space. + /// + /// Returns a structure the allocations that need to be relocated if the + /// growth succeeded. If the slab is full, returns `Err`. + fn try_grow(&mut self, settings: &MeshAllocatorSettings) -> Result { + // In extremely rare cases due to allocator fragmentation, it may happen + // that we fail to re-insert every object that was in the slab after + // growing it. Even though this will likely never happen, we use this + // loop to handle this unlikely event properly if it does. + 'grow: loop { + let new_slab_slot_capacity = ((self.slot_capacity as f64 * settings.growth_factor) + .ceil() as u32) + .min((settings.max_slab_size / self.element_layout.slot_size()) as u32); + if new_slab_slot_capacity == self.slot_capacity { + // The slab is full. + return Err(()); + } + + // Grow the slab. + self.allocator = Allocator::new(new_slab_slot_capacity); + self.slot_capacity = new_slab_slot_capacity; + + let mut slab_to_grow = SlabToReallocate::default(); + + // Place every resident allocation that was in the old slab in the + // new slab. + for (allocated_mesh_id, old_allocation_range) in &self.resident_allocations { + let allocation_size = old_allocation_range.slot_count; + match self.allocator.allocate(allocation_size) { + Some(allocation) => { + slab_to_grow.allocations_to_copy.insert( + *allocated_mesh_id, + SlabAllocation { + allocation, + slot_count: allocation_size, + }, + ); + } + None => { + // We failed to insert one of the allocations that we + // had before. + continue 'grow; + } + } + } + + // Move every allocation that was pending in the old slab to the new + // slab. + for slab_allocation in self.pending_allocations.values_mut() { + let allocation_size = slab_allocation.slot_count; + match self.allocator.allocate(allocation_size) { + Some(allocation) => slab_allocation.allocation = allocation, + None => { + // We failed to insert one of the allocations that we + // had before. + continue 'grow; + } + } + } + + return Ok(slab_to_grow); + } + } +} + +impl ElementLayout { + /// Creates an [`ElementLayout`] for mesh data of the given class (vertex or + /// index) with the given byte size. + fn new(class: ElementClass, size: u64) -> ElementLayout { + ElementLayout { + class, + size, + // Make sure that slot boundaries begin and end on + // `COPY_BUFFER_ALIGNMENT`-byte (4-byte) boundaries. + elements_per_slot: (COPY_BUFFER_ALIGNMENT / gcd(size, COPY_BUFFER_ALIGNMENT)) as u32, + } + } + + fn slot_size(&self) -> u64 { + self.size * self.elements_per_slot as u64 + } + + /// Creates the appropriate [`ElementLayout`] for the given mesh's vertex + /// data. + fn vertex( + mesh_vertex_buffer_layouts: &mut MeshVertexBufferLayouts, + mesh: &Mesh, + ) -> ElementLayout { + let mesh_vertex_buffer_layout = + mesh.get_mesh_vertex_buffer_layout(mesh_vertex_buffer_layouts); + ElementLayout::new( + ElementClass::Vertex, + mesh_vertex_buffer_layout.0.layout().array_stride, + ) + } + + /// Creates the appropriate [`ElementLayout`] for the given mesh's index + /// data. + fn index(mesh: &Mesh) -> Option { + let size = match mesh.indices()? { + Indices::U16(_) => 2, + Indices::U32(_) => 4, + }; + Some(ElementLayout::new(ElementClass::Index, size)) + } +} + +impl GeneralSlab { + /// Returns true if this slab is empty. + fn is_empty(&self) -> bool { + self.resident_allocations.is_empty() && self.pending_allocations.is_empty() + } +} + +/// Returns the greatest common divisor of the two numbers. +/// +/// +fn gcd(mut a: u64, mut b: u64) -> u64 { + while b != 0 { + let t = b; + b = a % b; + a = t; + } + a +} + +/// Ensures that the size of a buffer is a multiple of the given alignment by +/// padding it with zeroes if necessary. +/// +/// If the buffer already has the required size, then this function doesn't +/// allocate. Otherwise, it copies the buffer into a new one and writes the +/// appropriate number of zeroes to the end. +fn pad_to_alignment(buffer: &[u8], align: usize) -> Cow<[u8]> { + if buffer.len() % align == 0 { + return Cow::Borrowed(buffer); + } + let mut buffer = buffer.to_vec(); + buffer.extend(iter::repeat(0).take(align - buffer.len() % align)); + Cow::Owned(buffer) +} + +/// Returns a string describing the given buffer usages. +fn buffer_usages_to_str(buffer_usages: BufferUsages) -> &'static str { + if buffer_usages.contains(BufferUsages::VERTEX) { + "vertex " + } else if buffer_usages.contains(BufferUsages::INDEX) { + "index " + } else { + "" + } +} diff --git a/crates/bevy_render/src/mesh/mesh/mod.rs b/crates/bevy_render/src/mesh/mesh/mod.rs index 02dd99e15a..1238f761c2 100644 --- a/crates/bevy_render/src/mesh/mesh/mod.rs +++ b/crates/bevy_render/src/mesh/mesh/mod.rs @@ -8,8 +8,7 @@ use crate::{ prelude::Image, primitives::Aabb, render_asset::{PrepareAssetError, RenderAsset, RenderAssetUsages, RenderAssets}, - render_resource::{Buffer, TextureView, VertexBufferLayout}, - renderer::RenderDevice, + render_resource::{TextureView, VertexBufferLayout}, texture::GpuImage, }; use bevy_asset::{Asset, Handle}; @@ -24,10 +23,7 @@ use bevy_utils::tracing::{error, warn}; use bytemuck::cast_slice; use std::{collections::BTreeMap, hash::Hash, iter::FusedIterator}; use thiserror::Error; -use wgpu::{ - util::BufferInitDescriptor, BufferUsages, IndexFormat, VertexAttribute, VertexFormat, - VertexStepMode, -}; +use wgpu::{IndexFormat, VertexAttribute, VertexFormat, VertexStepMode}; use super::{MeshVertexBufferLayoutRef, MeshVertexBufferLayouts}; @@ -1660,42 +1656,51 @@ impl BaseMeshPipelineKey { } } -/// The GPU-representation of a [`Mesh`]. -/// Consists of a vertex data buffer and an optional index data buffer. +/// The render world representation of a [`Mesh`]. #[derive(Debug, Clone)] -pub struct GpuMesh { - /// Contains all attribute data for each vertex. - pub vertex_buffer: Buffer, +pub struct RenderMesh { + /// The number of vertices in the mesh. pub vertex_count: u32, + + /// Morph targets for the mesh, if present. pub morph_targets: Option, - pub buffer_info: GpuBufferInfo, + + /// Information about the mesh data buffers, including whether the mesh uses + /// indices or not. + pub buffer_info: RenderMeshBufferInfo, + + /// Precomputed pipeline key bits for this mesh. pub key_bits: BaseMeshPipelineKey, + + /// A reference to the vertex buffer layout. + /// + /// Combined with [`RenderMesh::buffer_info`], this specifies the complete + /// layout of the buffers associated with this mesh. pub layout: MeshVertexBufferLayoutRef, } -impl GpuMesh { +impl RenderMesh { + /// Returns the primitive topology of this mesh (triangles, triangle strips, + /// etc.) #[inline] pub fn primitive_topology(&self) -> PrimitiveTopology { self.key_bits.primitive_topology() } } -/// The index/vertex buffer info of a [`GpuMesh`]. +/// The index/vertex buffer info of a [`RenderMesh`]. #[derive(Debug, Clone)] -pub enum GpuBufferInfo { +pub enum RenderMeshBufferInfo { Indexed { - /// Contains all index data of a mesh. - buffer: Buffer, count: u32, index_format: IndexFormat, }, NonIndexed, } -impl RenderAsset for GpuMesh { +impl RenderAsset for RenderMesh { type SourceAsset = Mesh; type Param = ( - SRes, SRes>, SResMut, ); @@ -1717,12 +1722,10 @@ impl RenderAsset for GpuMesh { Some(vertex_size * vertex_count + index_bytes) } - /// Converts the extracted mesh a into [`GpuMesh`]. + /// Converts the extracted mesh into a [`RenderMesh`]. fn prepare_asset( mesh: Self::SourceAsset, - (render_device, images, ref mut mesh_vertex_buffer_layouts): &mut SystemParamItem< - Self::Param, - >, + (images, ref mut mesh_vertex_buffer_layouts): &mut SystemParamItem, ) -> Result> { let morph_targets = match mesh.morph_targets.as_ref() { Some(mt) => { @@ -1734,25 +1737,12 @@ impl RenderAsset for GpuMesh { None => None, }; - let vertex_buffer_data = mesh.get_vertex_buffer_data(); - let vertex_buffer = render_device.create_buffer_with_data(&BufferInitDescriptor { - usage: BufferUsages::VERTEX, - label: Some("Mesh Vertex Buffer"), - contents: &vertex_buffer_data, - }); - - let buffer_info = if let Some(data) = mesh.get_index_buffer_bytes() { - GpuBufferInfo::Indexed { - buffer: render_device.create_buffer_with_data(&BufferInitDescriptor { - usage: BufferUsages::INDEX, - contents: data, - label: Some("Mesh Index Buffer"), - }), - count: mesh.indices().unwrap().len() as u32, - index_format: mesh.indices().unwrap().into(), - } - } else { - GpuBufferInfo::NonIndexed + let buffer_info = match mesh.indices() { + Some(indices) => RenderMeshBufferInfo::Indexed { + count: indices.len() as u32, + index_format: indices.into(), + }, + None => RenderMeshBufferInfo::NonIndexed, }; let mesh_vertex_buffer_layout = @@ -1764,8 +1754,7 @@ impl RenderAsset for GpuMesh { mesh.morph_targets.is_some(), ); - Ok(GpuMesh { - vertex_buffer, + Ok(RenderMesh { vertex_count: mesh.count_vertices() as u32, buffer_info, key_bits, diff --git a/crates/bevy_render/src/mesh/mod.rs b/crates/bevy_render/src/mesh/mod.rs index df33640716..84accac658 100644 --- a/crates/bevy_render/src/mesh/mod.rs +++ b/crates/bevy_render/src/mesh/mod.rs @@ -1,8 +1,11 @@ #[allow(clippy::module_inception)] mod mesh; + +pub mod allocator; pub mod morph; pub mod primitives; +use allocator::MeshAllocatorPlugin; use bevy_utils::HashSet; pub use mesh::*; pub use primitives::*; @@ -27,7 +30,8 @@ impl Plugin for MeshPlugin { .register_type::() .register_type::>() // 'Mesh' must be prepared after 'Image' as meshes rely on the morph target image being ready - .add_plugins(RenderAssetPlugin::::default()); + .add_plugins(RenderAssetPlugin::::default()) + .add_plugins(MeshAllocatorPlugin); let Some(render_app) = app.get_sub_app_mut(RenderApp) else { return; diff --git a/crates/bevy_render/src/render_asset.rs b/crates/bevy_render/src/render_asset.rs index e3a6aab5fb..5c9be59de3 100644 --- a/crates/bevy_render/src/render_asset.rs +++ b/crates/bevy_render/src/render_asset.rs @@ -114,7 +114,7 @@ impl Default for RenderAssetUsages { /// The `AFTER` generic parameter can be used to specify that `A::prepare_asset` should not be run until /// `prepare_assets::` has completed. This allows the `prepare_asset` function to depend on another /// prepared [`RenderAsset`], for example `Mesh::prepare_asset` relies on `RenderAssets::` for morph -/// targets, so the plugin is created as `RenderAssetPlugin::::default()`. +/// targets, so the plugin is created as `RenderAssetPlugin::::default()`. pub struct RenderAssetPlugin { phantom: PhantomData (A, AFTER)>, } @@ -168,9 +168,16 @@ impl RenderAssetDependency for A { /// Temporarily stores the extracted and removed assets of the current frame. #[derive(Resource)] pub struct ExtractedAssets { - extracted: Vec<(AssetId, A::SourceAsset)>, - removed: HashSet>, - added: HashSet>, + /// The assets extracted this frame. + pub extracted: Vec<(AssetId, A::SourceAsset)>, + + /// IDs of the assets removed this frame. + /// + /// These assets will not be present in [`ExtractedAssets::extracted`]. + pub removed: HashSet>, + + /// IDs of the assets added this frame. + pub added: HashSet>, } impl Default for ExtractedAssets { @@ -238,7 +245,10 @@ impl FromWorld for CachedExtractRenderAssetSystemState { /// This system extracts all created or modified assets of the corresponding [`RenderAsset::SourceAsset`] type /// into the "render world". -fn extract_render_asset(mut commands: Commands, mut main_world: ResMut) { +pub(crate) fn extract_render_asset( + mut commands: Commands, + mut main_world: ResMut, +) { main_world.resource_scope( |world, mut cached_state: Mut>| { let (mut events, mut assets) = cached_state.state.get_mut(world); diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs index d61ca002bf..1ef8ca1eaa 100644 --- a/crates/bevy_sprite/src/mesh2d/material.rs +++ b/crates/bevy_sprite/src/mesh2d/material.rs @@ -12,7 +12,7 @@ use bevy_ecs::{ }; use bevy_math::FloatOrd; use bevy_render::{ - mesh::{GpuMesh, MeshVertexBufferLayoutRef}, + mesh::{MeshVertexBufferLayoutRef, RenderMesh}, render_asset::{ prepare_assets, PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets, }, @@ -370,7 +370,7 @@ pub fn queue_material2d_meshes( mut pipelines: ResMut>>, pipeline_cache: Res, msaa: Res, - render_meshes: Res>, + render_meshes: Res>, render_materials: Res>>, mut render_mesh_instances: ResMut, render_material_instances: Res>, diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs index 16aee3f529..b2b69c081e 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh.rs +++ b/crates/bevy_sprite/src/mesh2d/mesh.rs @@ -18,12 +18,13 @@ use bevy_render::batching::no_gpu_preprocessing::{ self, batch_and_prepare_sorted_render_phase, write_batched_instance_buffer, BatchedInstanceBuffer, }; -use bevy_render::mesh::{GpuMesh, MeshVertexBufferLayoutRef}; +use bevy_render::mesh::allocator::MeshAllocator; +use bevy_render::mesh::{MeshVertexBufferLayoutRef, RenderMesh}; use bevy_render::texture::FallbackImage; use bevy_render::{ batching::{GetBatchData, NoAutomaticBatching}, globals::{GlobalsBuffer, GlobalsUniform}, - mesh::{GpuBufferInfo, Mesh}, + mesh::{Mesh, RenderMeshBufferInfo}, render_asset::RenderAssets, render_phase::{PhaseItem, RenderCommand, RenderCommandResult, TrackedRenderPass}, render_resource::{binding_types::uniform_buffer, *}, @@ -694,7 +695,11 @@ impl RenderCommand

for SetMesh2dBindGroup { pub struct DrawMesh2d; impl RenderCommand

for DrawMesh2d { - type Param = (SRes>, SRes); + type Param = ( + SRes>, + SRes, + SRes, + ); type ViewQuery = (); type ItemQuery = (); @@ -703,11 +708,12 @@ impl RenderCommand

for DrawMesh2d { item: &P, _view: (), _item_query: Option<()>, - (meshes, render_mesh2d_instances): SystemParamItem<'w, '_, Self::Param>, + (meshes, render_mesh2d_instances, mesh_allocator): SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { let meshes = meshes.into_inner(); let render_mesh2d_instances = render_mesh2d_instances.into_inner(); + let mesh_allocator = mesh_allocator.into_inner(); let Some(RenderMesh2dInstance { mesh_asset_id, .. }) = render_mesh2d_instances.get(&item.entity()) @@ -717,20 +723,32 @@ impl RenderCommand

for DrawMesh2d { let Some(gpu_mesh) = meshes.get(*mesh_asset_id) else { return RenderCommandResult::Failure; }; + let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(mesh_asset_id) else { + return RenderCommandResult::Failure; + }; - pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..)); + pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..)); let batch_range = item.batch_range(); match &gpu_mesh.buffer_info { - GpuBufferInfo::Indexed { - buffer, + RenderMeshBufferInfo::Indexed { index_format, count, } => { - pass.set_index_buffer(buffer.slice(..), 0, *index_format); - pass.draw_indexed(0..*count, 0, batch_range.clone()); + let Some(index_buffer_slice) = mesh_allocator.mesh_index_slice(mesh_asset_id) + else { + return RenderCommandResult::Failure; + }; + + pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, *index_format); + + pass.draw_indexed( + index_buffer_slice.range.start..(index_buffer_slice.range.start + count), + vertex_buffer_slice.range.start as i32, + batch_range.clone(), + ); } - GpuBufferInfo::NonIndexed => { + RenderMeshBufferInfo::NonIndexed => { pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); } } diff --git a/examples/2d/mesh2d_manual.rs b/examples/2d/mesh2d_manual.rs index 75b018c368..01195bc39e 100644 --- a/examples/2d/mesh2d_manual.rs +++ b/examples/2d/mesh2d_manual.rs @@ -11,7 +11,7 @@ use bevy::{ math::FloatOrd, prelude::*, render::{ - mesh::{GpuMesh, Indices, MeshVertexAttribute}, + mesh::{Indices, MeshVertexAttribute, RenderMesh}, render_asset::{RenderAssetUsages, RenderAssets}, render_phase::{ AddRenderCommand, DrawFunctions, PhaseItemExtraIndex, SetItemPipeline, @@ -352,7 +352,7 @@ pub fn queue_colored_mesh2d( mut pipelines: ResMut>, pipeline_cache: Res, msaa: Res, - render_meshes: Res>, + render_meshes: Res>, render_mesh_instances: Res, mut transparent_render_phases: ResMut>, mut views: Query<(Entity, &VisibleEntities, &ExtractedView)>, diff --git a/examples/shader/shader_instancing.rs b/examples/shader/shader_instancing.rs index 63b0c17908..9ae798b2a7 100644 --- a/examples/shader/shader_instancing.rs +++ b/examples/shader/shader_instancing.rs @@ -12,7 +12,9 @@ use bevy::{ prelude::*, render::{ extract_component::{ExtractComponent, ExtractComponentPlugin}, - mesh::{GpuBufferInfo, GpuMesh, MeshVertexBufferLayoutRef}, + mesh::{ + allocator::MeshAllocator, MeshVertexBufferLayoutRef, RenderMesh, RenderMeshBufferInfo, + }, render_asset::RenderAssets, render_phase::{ AddRenderCommand, DrawFunctions, PhaseItem, PhaseItemExtraIndex, RenderCommand, @@ -117,7 +119,7 @@ fn queue_custom( msaa: Res, mut pipelines: ResMut>, pipeline_cache: Res, - meshes: Res>, + meshes: Res>, render_mesh_instances: Res, material_meshes: Query>, mut transparent_render_phases: ResMut>, @@ -241,7 +243,11 @@ type DrawCustom = ( struct DrawMeshInstanced; impl RenderCommand

for DrawMeshInstanced { - type Param = (SRes>, SRes); + type Param = ( + SRes>, + SRes, + SRes, + ); type ViewQuery = (); type ItemQuery = Read; @@ -250,9 +256,12 @@ impl RenderCommand

for DrawMeshInstanced { item: &P, _view: (), instance_buffer: Option<&'w InstanceBuffer>, - (meshes, render_mesh_instances): SystemParamItem<'w, '_, Self::Param>, + (meshes, render_mesh_instances, mesh_allocator): SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { + // A borrow check workaround. + let mesh_allocator = mesh_allocator.into_inner(); + let Some(mesh_instance) = render_mesh_instances.render_mesh_queue_data(item.entity()) else { return RenderCommandResult::Failure; @@ -263,20 +272,34 @@ impl RenderCommand

for DrawMeshInstanced { let Some(instance_buffer) = instance_buffer else { return RenderCommandResult::Failure; }; + let Some(vertex_buffer_slice) = + mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) + else { + return RenderCommandResult::Failure; + }; - pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..)); + pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..)); pass.set_vertex_buffer(1, instance_buffer.buffer.slice(..)); match &gpu_mesh.buffer_info { - GpuBufferInfo::Indexed { - buffer, + RenderMeshBufferInfo::Indexed { index_format, count, } => { - pass.set_index_buffer(buffer.slice(..), 0, *index_format); - pass.draw_indexed(0..*count, 0, 0..instance_buffer.length as u32); + let Some(index_buffer_slice) = + mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id) + else { + return RenderCommandResult::Failure; + }; + + pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, *index_format); + pass.draw_indexed( + index_buffer_slice.range.start..(index_buffer_slice.range.start + count), + vertex_buffer_slice.range.start as i32, + 0..instance_buffer.length as u32, + ); } - GpuBufferInfo::NonIndexed => { + RenderMeshBufferInfo::NonIndexed => { pass.draw(0..gpu_mesh.vertex_count, 0..instance_buffer.length as u32); } }