Retain skins from frame to frame. (#17818)
Currently, Bevy rebuilds the buffer containing all the transforms for joints every frame, during the extraction phase. This is inefficient in cases in which many skins are present in the scene and their joints don't move, such as the Caldera test scene. To address this problem, this commit switches skin extraction to use a set of retained GPU buffers with allocations managed by the offset allocator. I use fine-grained change detection in order to determine which skins need updating. Note that the granularity is on the level of an entire skin, not individual joints. Using the change detection at that level would yield poor performance in common cases in which an entire skin is animated at once. Also, this patch yields additional performance from the fact that changing joint transforms no longer requires the skinned mesh to be re-extracted. Note that this optimization can be a double-edged sword. In `many_foxes`, fine-grained change detection regressed the performance of `extract_skins` by 3.4x. This is because every joint is updated every frame in that example, so change detection is pointless and is pure overhead. Because the `many_foxes` workload is actually representative of animated scenes, this patch includes a heuristic that disables fine-grained change detection if the number of transformed entities in the frame exceeds a certain fraction of the total number of joints. Currently, this threshold is set to 25%. Note that this is a crude heuristic, because it doesn't distinguish between the number of transformed *joints* and the number of transformed *entities*; however, it should be good enough to yield the optimum code path most of the time. Finally, this patch fixes a bug whereby skinned meshes are actually being incorrectly retained if the buffer offsets of the joints of those skinned meshes changes from frame to frame. To fix this without retaining skins, we would have to re-extract every skinned mesh every frame. Doing this was a significant regression on Caldera. With this PR, by contrast, mesh joints stay at the same buffer offset, so we don't have to update the `MeshInputUniform` containing the buffer offset every frame. This also makes PR #17717 easier to implement, because that PR uses the buffer offset from the previous frame, and the logic for calculating that is simplified if the previous frame's buffer offset is guaranteed to be identical to that of the current frame. On Caldera, this patch reduces the time spent in `extract_skins` from 1.79 ms to near zero. On `many_foxes`, this patch regresses the performance of `extract_skins` by approximately 10%-25%, depending on the number of foxes. This has only a small impact on frame rate.
This commit is contained in:
parent
8f36106f9e
commit
8976a45199
@ -1,4 +1,4 @@
|
||||
use bevy_asset::{Asset, Handle};
|
||||
use bevy_asset::{AsAssetId, Asset, AssetId, Handle};
|
||||
use bevy_ecs::{component::Component, entity::Entity, prelude::ReflectComponent};
|
||||
use bevy_math::Mat4;
|
||||
use bevy_reflect::prelude::*;
|
||||
@ -12,6 +12,16 @@ pub struct SkinnedMesh {
|
||||
pub joints: Vec<Entity>,
|
||||
}
|
||||
|
||||
impl AsAssetId for SkinnedMesh {
|
||||
type Asset = SkinnedMeshInverseBindposes;
|
||||
|
||||
// We implement this so that `AssetChanged` will work to pick up any changes
|
||||
// to `SkinnedMeshInverseBindposes`.
|
||||
fn as_asset_id(&self) -> AssetId<Self::Asset> {
|
||||
self.inverse_bindposes.id()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Asset, TypePath, Debug)]
|
||||
pub struct SkinnedMeshInverseBindposes(Box<[Mat4]>);
|
||||
|
||||
|
@ -73,6 +73,7 @@ smallvec = "1.6"
|
||||
nonmax = "0.5"
|
||||
static_assertions = "1"
|
||||
tracing = { version = "0.1", default-features = false, features = ["std"] }
|
||||
offset-allocator = "0.2"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
@ -125,7 +125,6 @@ impl InstanceManager {
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// Append instance data
|
||||
|
@ -26,7 +26,7 @@ use bevy_render::{
|
||||
no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching,
|
||||
},
|
||||
camera::Camera,
|
||||
mesh::*,
|
||||
mesh::{skinning::SkinnedMesh, *},
|
||||
primitives::Aabb,
|
||||
render_asset::RenderAssets,
|
||||
render_phase::{
|
||||
@ -47,7 +47,7 @@ use bevy_utils::{default, Parallel, TypeIdMap};
|
||||
use core::any::TypeId;
|
||||
use core::mem::size_of;
|
||||
use material_bind_groups::MaterialBindingId;
|
||||
use render::skin::{self, SkinIndex};
|
||||
use render::skin;
|
||||
use tracing::{error, warn};
|
||||
|
||||
use self::irradiance_volume::IRRADIANCE_VOLUMES_ARE_USABLE;
|
||||
@ -189,7 +189,6 @@ impl Plugin for MeshRenderPlugin {
|
||||
|
||||
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
|
||||
render_app
|
||||
.init_resource::<SkinIndices>()
|
||||
.init_resource::<MorphUniforms>()
|
||||
.init_resource::<MorphIndices>()
|
||||
.init_resource::<MeshCullingDataBuffer>()
|
||||
@ -491,8 +490,6 @@ pub struct MeshUniform {
|
||||
pub first_vertex_index: u32,
|
||||
/// The current skin index, or `u32::MAX` if there's no skin.
|
||||
pub current_skin_index: u32,
|
||||
/// The previous skin index, or `u32::MAX` if there's no previous skin.
|
||||
pub previous_skin_index: u32,
|
||||
/// The material and lightmap indices, packed into 32 bits.
|
||||
///
|
||||
/// Low 16 bits: index of the material inside the bind group data.
|
||||
@ -500,6 +497,8 @@ pub struct MeshUniform {
|
||||
pub material_and_lightmap_bind_group_slot: u32,
|
||||
/// User supplied tag to identify this mesh instance.
|
||||
pub tag: u32,
|
||||
/// Padding.
|
||||
pub pad: u32,
|
||||
}
|
||||
|
||||
/// Information that has to be transferred from CPU to GPU in order to produce
|
||||
@ -549,8 +548,6 @@ pub struct MeshInputUniform {
|
||||
pub index_count: u32,
|
||||
/// The current skin index, or `u32::MAX` if there's no skin.
|
||||
pub current_skin_index: u32,
|
||||
/// The previous skin index, or `u32::MAX` if there's no previous skin.
|
||||
pub previous_skin_index: u32,
|
||||
/// The material and lightmap indices, packed into 32 bits.
|
||||
///
|
||||
/// Low 16 bits: index of the material inside the bind group data.
|
||||
@ -559,7 +556,9 @@ pub struct MeshInputUniform {
|
||||
/// User supplied tag to identify this mesh instance.
|
||||
pub tag: u32,
|
||||
/// Padding.
|
||||
pub pad: u32,
|
||||
pub pad_a: u32,
|
||||
/// Padding.
|
||||
pub pad_b: u32,
|
||||
}
|
||||
|
||||
/// Information about each mesh instance needed to cull it on GPU.
|
||||
@ -592,7 +591,6 @@ impl MeshUniform {
|
||||
material_bind_group_slot: MaterialBindGroupSlot,
|
||||
maybe_lightmap: Option<(LightmapSlotIndex, Rect)>,
|
||||
current_skin_index: Option<u32>,
|
||||
previous_skin_index: Option<u32>,
|
||||
tag: Option<u32>,
|
||||
) -> Self {
|
||||
let (local_from_world_transpose_a, local_from_world_transpose_b) =
|
||||
@ -611,10 +609,10 @@ impl MeshUniform {
|
||||
flags: mesh_transforms.flags,
|
||||
first_vertex_index,
|
||||
current_skin_index: current_skin_index.unwrap_or(u32::MAX),
|
||||
previous_skin_index: previous_skin_index.unwrap_or(u32::MAX),
|
||||
material_and_lightmap_bind_group_slot: u32::from(material_bind_group_slot)
|
||||
| ((lightmap_bind_group_slot as u32) << 16),
|
||||
tag: tag.unwrap_or(0),
|
||||
pad: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1120,7 +1118,7 @@ impl RenderMeshInstanceGpuBuilder {
|
||||
mesh_material_ids: &RenderMeshMaterialIds,
|
||||
render_material_bindings: &RenderMaterialBindings,
|
||||
render_lightmaps: &RenderLightmaps,
|
||||
skin_indices: &SkinIndices,
|
||||
skin_uniforms: &SkinUniforms,
|
||||
) -> u32 {
|
||||
let (first_vertex_index, vertex_count) =
|
||||
match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) {
|
||||
@ -1139,13 +1137,8 @@ impl RenderMeshInstanceGpuBuilder {
|
||||
),
|
||||
None => (false, 0, 0),
|
||||
};
|
||||
|
||||
let current_skin_index = match skin_indices.current.get(&entity) {
|
||||
Some(skin_indices) => skin_indices.index(),
|
||||
None => u32::MAX,
|
||||
};
|
||||
let previous_skin_index = match skin_indices.prev.get(&entity) {
|
||||
Some(skin_indices) => skin_indices.index(),
|
||||
let current_skin_index = match skin_uniforms.skin_byte_offset(entity) {
|
||||
Some(skin_index) => skin_index.index(),
|
||||
None => u32::MAX,
|
||||
};
|
||||
|
||||
@ -1181,12 +1174,12 @@ impl RenderMeshInstanceGpuBuilder {
|
||||
vertex_count
|
||||
},
|
||||
current_skin_index,
|
||||
previous_skin_index,
|
||||
material_and_lightmap_bind_group_slot: u32::from(
|
||||
self.shared.material_bindings_index.slot,
|
||||
) | ((lightmap_slot as u32) << 16),
|
||||
tag: self.shared.tag,
|
||||
pad: 0,
|
||||
pad_a: 0,
|
||||
pad_b: 0,
|
||||
};
|
||||
|
||||
// Did the last frame contain this entity as well?
|
||||
@ -1454,6 +1447,7 @@ pub fn extract_meshes_for_gpu_building(
|
||||
Changed<NotShadowCaster>,
|
||||
Changed<NoAutomaticBatching>,
|
||||
Changed<VisibilityRange>,
|
||||
Changed<SkinnedMesh>,
|
||||
)>,
|
||||
>,
|
||||
>,
|
||||
@ -1588,10 +1582,10 @@ pub fn extract_meshes_for_gpu_building(
|
||||
/// loop.
|
||||
fn set_mesh_motion_vector_flags(
|
||||
mut render_mesh_instances: ResMut<RenderMeshInstances>,
|
||||
skin_indices: Res<SkinIndices>,
|
||||
skin_uniforms: Res<SkinUniforms>,
|
||||
morph_indices: Res<MorphIndices>,
|
||||
) {
|
||||
for &entity in skin_indices.prev.keys() {
|
||||
for &entity in skin_uniforms.all_skins() {
|
||||
render_mesh_instances
|
||||
.insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN);
|
||||
}
|
||||
@ -1614,7 +1608,7 @@ pub fn collect_meshes_for_gpu_building(
|
||||
mesh_material_ids: Res<RenderMeshMaterialIds>,
|
||||
render_material_bindings: Res<RenderMaterialBindings>,
|
||||
render_lightmaps: Res<RenderLightmaps>,
|
||||
skin_indices: Res<SkinIndices>,
|
||||
skin_uniforms: Res<SkinUniforms>,
|
||||
) {
|
||||
let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) =
|
||||
render_mesh_instances.into_inner()
|
||||
@ -1653,7 +1647,7 @@ pub fn collect_meshes_for_gpu_building(
|
||||
&mesh_material_ids,
|
||||
&render_material_bindings,
|
||||
&render_lightmaps,
|
||||
&skin_indices,
|
||||
&skin_uniforms,
|
||||
);
|
||||
}
|
||||
|
||||
@ -1680,7 +1674,7 @@ pub fn collect_meshes_for_gpu_building(
|
||||
&mesh_material_ids,
|
||||
&render_material_bindings,
|
||||
&render_lightmaps,
|
||||
&skin_indices,
|
||||
&skin_uniforms,
|
||||
);
|
||||
mesh_culling_builder
|
||||
.update(&mut mesh_culling_data_buffer, instance_data_index as usize);
|
||||
@ -1830,7 +1824,7 @@ impl GetBatchData for MeshPipeline {
|
||||
SRes<RenderLightmaps>,
|
||||
SRes<RenderAssets<RenderMesh>>,
|
||||
SRes<MeshAllocator>,
|
||||
SRes<SkinIndices>,
|
||||
SRes<SkinUniforms>,
|
||||
);
|
||||
// The material bind group ID, the mesh ID, and the lightmap ID,
|
||||
// respectively.
|
||||
@ -1843,7 +1837,9 @@ impl GetBatchData for MeshPipeline {
|
||||
type BufferData = MeshUniform;
|
||||
|
||||
fn get_batch_data(
|
||||
(mesh_instances, lightmaps, _, mesh_allocator, skin_indices): &SystemParamItem<Self::Param>,
|
||||
(mesh_instances, lightmaps, _, mesh_allocator, skin_uniforms): &SystemParamItem<
|
||||
Self::Param,
|
||||
>,
|
||||
(_entity, main_entity): (Entity, MainEntity),
|
||||
) -> Option<(Self::BufferData, Option<Self::CompareData>)> {
|
||||
let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else {
|
||||
@ -1861,9 +1857,7 @@ impl GetBatchData for MeshPipeline {
|
||||
};
|
||||
let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity);
|
||||
|
||||
let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index);
|
||||
let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index);
|
||||
|
||||
let current_skin_index = skin_uniforms.skin_index(main_entity);
|
||||
let material_bind_group_index = mesh_instance.material_bindings_index;
|
||||
|
||||
Some((
|
||||
@ -1873,7 +1867,6 @@ impl GetBatchData for MeshPipeline {
|
||||
material_bind_group_index.slot,
|
||||
maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)),
|
||||
current_skin_index,
|
||||
previous_skin_index,
|
||||
Some(mesh_instance.tag),
|
||||
),
|
||||
mesh_instance.should_batch().then_some((
|
||||
@ -1915,7 +1908,9 @@ impl GetFullBatchData for MeshPipeline {
|
||||
}
|
||||
|
||||
fn get_binned_batch_data(
|
||||
(mesh_instances, lightmaps, _, mesh_allocator, skin_indices): &SystemParamItem<Self::Param>,
|
||||
(mesh_instances, lightmaps, _, mesh_allocator, skin_uniforms): &SystemParamItem<
|
||||
Self::Param,
|
||||
>,
|
||||
main_entity: MainEntity,
|
||||
) -> Option<Self::BufferData> {
|
||||
let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else {
|
||||
@ -1932,8 +1927,7 @@ impl GetFullBatchData for MeshPipeline {
|
||||
};
|
||||
let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity);
|
||||
|
||||
let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index);
|
||||
let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index);
|
||||
let current_skin_index = skin_uniforms.skin_index(main_entity);
|
||||
|
||||
Some(MeshUniform::new(
|
||||
&mesh_instance.transforms,
|
||||
@ -1941,7 +1935,6 @@ impl GetFullBatchData for MeshPipeline {
|
||||
mesh_instance.material_bindings_index.slot,
|
||||
maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)),
|
||||
current_skin_index,
|
||||
previous_skin_index,
|
||||
Some(mesh_instance.tag),
|
||||
))
|
||||
}
|
||||
@ -2741,16 +2734,12 @@ fn prepare_mesh_bind_groups_for_phase(
|
||||
};
|
||||
|
||||
// Create the skinned mesh bind group with the current and previous buffers
|
||||
// (the latter being for motion vector computation). If there's no previous
|
||||
// buffer, just use the current one as the shader will ignore it.
|
||||
let skin = skins_uniform.current_buffer.buffer();
|
||||
if let Some(skin) = skin {
|
||||
let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin);
|
||||
groups.skinned = Some(MeshBindGroupPair {
|
||||
motion_vectors: layouts.skinned_motion(render_device, &model, skin, prev_skin),
|
||||
no_motion_vectors: layouts.skinned(render_device, &model, skin),
|
||||
});
|
||||
}
|
||||
// (the latter being for motion vector computation).
|
||||
let (skin, prev_skin) = (&skins_uniform.current_buffer, &skins_uniform.prev_buffer);
|
||||
groups.skinned = Some(MeshBindGroupPair {
|
||||
motion_vectors: layouts.skinned_motion(render_device, &model, skin, prev_skin),
|
||||
no_motion_vectors: layouts.skinned(render_device, &model, skin),
|
||||
});
|
||||
|
||||
// Create the morphed bind groups just like we did for the skinned bind
|
||||
// group.
|
||||
@ -2758,29 +2747,28 @@ fn prepare_mesh_bind_groups_for_phase(
|
||||
let prev_weights = weights_uniform.prev_buffer.buffer().unwrap_or(weights);
|
||||
for (id, gpu_mesh) in meshes.iter() {
|
||||
if let Some(targets) = gpu_mesh.morph_targets.as_ref() {
|
||||
let bind_group_pair = match skin.filter(|_| is_skinned(&gpu_mesh.layout)) {
|
||||
Some(skin) => {
|
||||
let prev_skin = skins_uniform.prev_buffer.buffer().unwrap_or(skin);
|
||||
MeshBindGroupPair {
|
||||
motion_vectors: layouts.morphed_skinned_motion(
|
||||
render_device,
|
||||
&model,
|
||||
skin,
|
||||
weights,
|
||||
targets,
|
||||
prev_skin,
|
||||
prev_weights,
|
||||
),
|
||||
no_motion_vectors: layouts.morphed_skinned(
|
||||
render_device,
|
||||
&model,
|
||||
skin,
|
||||
weights,
|
||||
targets,
|
||||
),
|
||||
}
|
||||
let bind_group_pair = if is_skinned(&gpu_mesh.layout) {
|
||||
let prev_skin = &skins_uniform.prev_buffer;
|
||||
MeshBindGroupPair {
|
||||
motion_vectors: layouts.morphed_skinned_motion(
|
||||
render_device,
|
||||
&model,
|
||||
skin,
|
||||
weights,
|
||||
targets,
|
||||
prev_skin,
|
||||
prev_weights,
|
||||
),
|
||||
no_motion_vectors: layouts.morphed_skinned(
|
||||
render_device,
|
||||
&model,
|
||||
skin,
|
||||
weights,
|
||||
targets,
|
||||
),
|
||||
}
|
||||
None => MeshBindGroupPair {
|
||||
} else {
|
||||
MeshBindGroupPair {
|
||||
motion_vectors: layouts.morphed_motion(
|
||||
render_device,
|
||||
&model,
|
||||
@ -2789,7 +2777,7 @@ fn prepare_mesh_bind_groups_for_phase(
|
||||
prev_weights,
|
||||
),
|
||||
no_motion_vectors: layouts.morphed(render_device, &model, weights, targets),
|
||||
},
|
||||
}
|
||||
};
|
||||
groups.morph_targets.insert(id, bind_group_pair);
|
||||
}
|
||||
@ -2863,7 +2851,7 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
|
||||
SRes<RenderDevice>,
|
||||
SRes<MeshBindGroups>,
|
||||
SRes<RenderMeshInstances>,
|
||||
SRes<SkinIndices>,
|
||||
SRes<SkinUniforms>,
|
||||
SRes<MorphIndices>,
|
||||
SRes<RenderLightmaps>,
|
||||
);
|
||||
@ -2879,7 +2867,7 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
|
||||
render_device,
|
||||
bind_groups,
|
||||
mesh_instances,
|
||||
skin_indices,
|
||||
skin_uniforms,
|
||||
morph_indices,
|
||||
lightmaps,
|
||||
): SystemParamItem<'w, '_, Self::Param>,
|
||||
@ -2887,7 +2875,7 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
|
||||
) -> RenderCommandResult {
|
||||
let bind_groups = bind_groups.into_inner();
|
||||
let mesh_instances = mesh_instances.into_inner();
|
||||
let skin_indices = skin_indices.into_inner();
|
||||
let skin_uniforms = skin_uniforms.into_inner();
|
||||
let morph_indices = morph_indices.into_inner();
|
||||
|
||||
let entity = &item.main_entity();
|
||||
@ -2896,12 +2884,11 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
|
||||
return RenderCommandResult::Success;
|
||||
};
|
||||
|
||||
let current_skin_index = skin_indices.current.get(entity);
|
||||
let prev_skin_index = skin_indices.prev.get(entity);
|
||||
let current_skin_byte_offset = skin_uniforms.skin_byte_offset(*entity);
|
||||
let current_morph_index = morph_indices.current.get(entity);
|
||||
let prev_morph_index = morph_indices.prev.get(entity);
|
||||
|
||||
let is_skinned = current_skin_index.is_some();
|
||||
let is_skinned = current_skin_byte_offset.is_some();
|
||||
let is_morphed = current_morph_index.is_some();
|
||||
|
||||
let lightmap_slab_index = lightmaps
|
||||
@ -2942,7 +2929,7 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
|
||||
dynamic_offsets[offset_count] = dynamic_offset;
|
||||
offset_count += 1;
|
||||
}
|
||||
if let Some(current_skin_index) = current_skin_index {
|
||||
if let Some(current_skin_index) = current_skin_byte_offset {
|
||||
if skin::skins_use_uniform_buffers(&render_device) {
|
||||
dynamic_offsets[offset_count] = current_skin_index.byte_offset;
|
||||
offset_count += 1;
|
||||
@ -2955,16 +2942,12 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
|
||||
|
||||
// Attach motion vectors if needed.
|
||||
if has_motion_vector_prepass {
|
||||
// Attach the previous skin index for motion vector computation. If
|
||||
// there isn't one, just use zero as the shader will ignore it.
|
||||
if current_skin_index.is_some() && skin::skins_use_uniform_buffers(&render_device) {
|
||||
match prev_skin_index {
|
||||
Some(prev_skin_index) => {
|
||||
dynamic_offsets[offset_count] = prev_skin_index.byte_offset;
|
||||
}
|
||||
None => dynamic_offsets[offset_count] = 0,
|
||||
// Attach the previous skin index for motion vector computation.
|
||||
if skin::skins_use_uniform_buffers(&render_device) {
|
||||
if let Some(current_skin_byte_offset) = current_skin_byte_offset {
|
||||
dynamic_offsets[offset_count] = current_skin_byte_offset.byte_offset;
|
||||
offset_count += 1;
|
||||
}
|
||||
offset_count += 1;
|
||||
}
|
||||
|
||||
// Attach the previous morph index for motion vector computation. If
|
||||
|
@ -359,7 +359,6 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
|
||||
output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect;
|
||||
output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index;
|
||||
output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index;
|
||||
output[mesh_output_index].previous_skin_index = current_input[input_index].previous_skin_index;
|
||||
output[mesh_output_index].material_and_lightmap_bind_group_slot =
|
||||
current_input[input_index].material_and_lightmap_bind_group_slot;
|
||||
output[mesh_output_index].tag = current_input[input_index].tag;
|
||||
|
@ -18,12 +18,12 @@ struct Mesh {
|
||||
// The index of the mesh's first vertex in the vertex buffer.
|
||||
first_vertex_index: u32,
|
||||
current_skin_index: u32,
|
||||
previous_skin_index: u32,
|
||||
// Low 16 bits: index of the material inside the bind group data.
|
||||
// High 16 bits: index of the lightmap in the binding array.
|
||||
material_and_lightmap_bind_group_slot: u32,
|
||||
// User supplied index to identify the mesh instance
|
||||
tag: u32,
|
||||
pad: u32,
|
||||
};
|
||||
|
||||
#ifdef SKINNED
|
||||
|
@ -13,4 +13,4 @@ pub use light::*;
|
||||
pub use mesh::*;
|
||||
pub use mesh_bindings::MeshLayouts;
|
||||
pub use mesh_view_bindings::*;
|
||||
pub use skin::{extract_skins, prepare_skins, SkinIndices, SkinUniforms, MAX_JOINTS};
|
||||
pub use skin::{extract_skins, prepare_skins, SkinUniforms, MAX_JOINTS};
|
||||
|
@ -1,19 +1,24 @@
|
||||
use core::mem::{self, size_of};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use bevy_asset::Assets;
|
||||
use bevy_asset::{prelude::AssetChanged, Assets};
|
||||
use bevy_ecs::prelude::*;
|
||||
use bevy_math::Mat4;
|
||||
use bevy_render::sync_world::MainEntityHashMap;
|
||||
use bevy_platform_support::collections::hash_map::Entry;
|
||||
use bevy_render::render_resource::{Buffer, BufferDescriptor};
|
||||
use bevy_render::sync_world::{MainEntity, MainEntityHashMap, MainEntityHashSet};
|
||||
use bevy_render::{
|
||||
batching::NoAutomaticBatching,
|
||||
mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
|
||||
render_resource::{BufferUsages, RawBufferVec},
|
||||
render_resource::BufferUsages,
|
||||
renderer::{RenderDevice, RenderQueue},
|
||||
view::ViewVisibility,
|
||||
Extract,
|
||||
};
|
||||
use bevy_transform::prelude::GlobalTransform;
|
||||
use offset_allocator::{Allocation, Allocator};
|
||||
use smallvec::SmallVec;
|
||||
use tracing::error;
|
||||
|
||||
/// Maximum number of joints supported for skinned meshes.
|
||||
///
|
||||
@ -24,18 +29,40 @@ use bevy_transform::prelude::GlobalTransform;
|
||||
/// of the GPU at runtime, which would mean not using consts anymore.
|
||||
pub const MAX_JOINTS: usize = 256;
|
||||
|
||||
/// The total number of joints we support.
|
||||
///
|
||||
/// This is 256 GiB worth of joint matrices, which we will never hit under any
|
||||
/// reasonable circumstances.
|
||||
const MAX_TOTAL_JOINTS: u32 = 1024 * 1024 * 1024;
|
||||
|
||||
/// The number of joints that we allocate at a time.
|
||||
///
|
||||
/// Some hardware requires that uniforms be allocated on 256-byte boundaries, so
|
||||
/// we need to allocate 4 64-byte matrices at a time to satisfy alignment
|
||||
/// requirements.
|
||||
const JOINTS_PER_ALLOCATION_UNIT: u32 = (256 / size_of::<Mat4>()) as u32;
|
||||
|
||||
/// The maximum ratio of the number of entities whose transforms changed to the
|
||||
/// total number of joints before we re-extract all joints.
|
||||
///
|
||||
/// We use this as a heuristic to decide whether it's worth switching over to
|
||||
/// fine-grained detection to determine which skins need extraction. If the
|
||||
/// number of changed entities is over this threshold, we skip change detection
|
||||
/// and simply re-extract the transforms of all joints.
|
||||
const JOINT_EXTRACTION_THRESHOLD_FACTOR: f64 = 0.25;
|
||||
|
||||
/// The location of the first joint matrix in the skin uniform buffer.
|
||||
#[derive(Component)]
|
||||
pub struct SkinIndex {
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct SkinByteOffset {
|
||||
/// The byte offset of the first joint matrix.
|
||||
pub byte_offset: u32,
|
||||
}
|
||||
|
||||
impl SkinIndex {
|
||||
impl SkinByteOffset {
|
||||
/// Index to be in address space based on the size of a skin uniform.
|
||||
const fn new(start: usize) -> Self {
|
||||
SkinIndex {
|
||||
byte_offset: (start * size_of::<Mat4>()) as u32,
|
||||
const fn from_index(index: usize) -> Self {
|
||||
SkinByteOffset {
|
||||
byte_offset: (index * size_of::<Mat4>()) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
@ -47,22 +74,6 @@ impl SkinIndex {
|
||||
}
|
||||
}
|
||||
|
||||
/// Maps each skinned mesh to the applicable offset within the [`SkinUniforms`]
|
||||
/// buffer.
|
||||
///
|
||||
/// We store both the current frame's joint matrices and the previous frame's
|
||||
/// joint matrices for the purposes of motion vector calculation.
|
||||
#[derive(Default, Resource)]
|
||||
pub struct SkinIndices {
|
||||
/// Maps each skinned mesh to the applicable offset within
|
||||
/// [`SkinUniforms::current_buffer`].
|
||||
pub current: MainEntityHashMap<SkinIndex>,
|
||||
|
||||
/// Maps each skinned mesh to the applicable offset within
|
||||
/// [`SkinUniforms::prev_buffer`].
|
||||
pub prev: MainEntityHashMap<SkinIndex>,
|
||||
}
|
||||
|
||||
/// The GPU buffers containing joint matrices for all skinned meshes.
|
||||
///
|
||||
/// This is double-buffered: we store the joint matrices of each mesh for the
|
||||
@ -74,28 +85,109 @@ pub struct SkinIndices {
|
||||
/// Notes on implementation: see comment on top of the `extract_skins` system.
|
||||
#[derive(Resource)]
|
||||
pub struct SkinUniforms {
|
||||
/// Stores all the joint matrices for skinned meshes in the current frame.
|
||||
pub current_buffer: RawBufferVec<Mat4>,
|
||||
/// Stores all the joint matrices for skinned meshes in the previous frame.
|
||||
pub prev_buffer: RawBufferVec<Mat4>,
|
||||
/// The CPU-side buffer that stores the joint matrices for skinned meshes in
|
||||
/// the current frame.
|
||||
pub current_staging_buffer: Vec<Mat4>,
|
||||
/// The GPU-side buffer that stores the joint matrices for skinned meshes in
|
||||
/// the current frame.
|
||||
pub current_buffer: Buffer,
|
||||
/// The GPU-side buffer that stores the joint matrices for skinned meshes in
|
||||
/// the previous frame.
|
||||
pub prev_buffer: Buffer,
|
||||
/// The offset allocator that manages the placement of the joints within the
|
||||
/// [`Self::current_buffer`].
|
||||
allocator: Allocator,
|
||||
/// Allocation information that we keep about each skin.
|
||||
skin_uniform_info: MainEntityHashMap<SkinUniformInfo>,
|
||||
/// Maps each joint entity to the skins it's associated with.
|
||||
///
|
||||
/// We use this in conjunction with change detection to only update the
|
||||
/// skins that need updating each frame.
|
||||
///
|
||||
/// Note that conceptually this is a hash map of sets, but we use a
|
||||
/// [`SmallVec`] to avoid allocations for the vast majority of the cases in
|
||||
/// which each bone belongs to exactly one skin.
|
||||
joint_to_skins: MainEntityHashMap<SmallVec<[MainEntity; 1]>>,
|
||||
/// The total number of joints in the scene.
|
||||
///
|
||||
/// We use this as part of our heuristic to decide whether to use
|
||||
/// fine-grained change detection.
|
||||
total_joints: usize,
|
||||
}
|
||||
|
||||
impl FromWorld for SkinUniforms {
|
||||
fn from_world(world: &mut World) -> Self {
|
||||
let device = world.resource::<RenderDevice>();
|
||||
let buffer_usages = if skins_use_uniform_buffers(device) {
|
||||
let buffer_usages = (if skins_use_uniform_buffers(device) {
|
||||
BufferUsages::UNIFORM
|
||||
} else {
|
||||
BufferUsages::STORAGE
|
||||
};
|
||||
}) | BufferUsages::COPY_DST;
|
||||
|
||||
// Create the current and previous buffer with the minimum sizes.
|
||||
//
|
||||
// These will be swapped every frame.
|
||||
let current_buffer = device.create_buffer(&BufferDescriptor {
|
||||
label: Some("skin uniform buffer"),
|
||||
size: MAX_JOINTS as u64 * size_of::<Mat4>() as u64,
|
||||
usage: buffer_usages,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
let prev_buffer = device.create_buffer(&BufferDescriptor {
|
||||
label: Some("skin uniform buffer"),
|
||||
size: MAX_JOINTS as u64 * size_of::<Mat4>() as u64,
|
||||
usage: buffer_usages,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
Self {
|
||||
current_buffer: RawBufferVec::new(buffer_usages),
|
||||
prev_buffer: RawBufferVec::new(buffer_usages),
|
||||
current_staging_buffer: vec![],
|
||||
current_buffer,
|
||||
prev_buffer,
|
||||
allocator: Allocator::new(MAX_TOTAL_JOINTS),
|
||||
skin_uniform_info: MainEntityHashMap::default(),
|
||||
joint_to_skins: MainEntityHashMap::default(),
|
||||
total_joints: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SkinUniforms {
|
||||
/// Returns the current offset in joints of the skin in the buffer.
|
||||
pub fn skin_index(&self, skin: MainEntity) -> Option<u32> {
|
||||
self.skin_uniform_info
|
||||
.get(&skin)
|
||||
.map(SkinUniformInfo::offset)
|
||||
}
|
||||
|
||||
/// Returns the current offset in bytes of the skin in the buffer.
|
||||
pub fn skin_byte_offset(&self, skin: MainEntity) -> Option<SkinByteOffset> {
|
||||
self.skin_uniform_info.get(&skin).map(|skin_uniform_info| {
|
||||
SkinByteOffset::from_index(skin_uniform_info.offset() as usize)
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns an iterator over all skins in the scene.
|
||||
pub fn all_skins(&self) -> impl Iterator<Item = &MainEntity> {
|
||||
self.skin_uniform_info.keys()
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocation information about each skin.
|
||||
struct SkinUniformInfo {
|
||||
/// The allocation of the joints within the [`SkinUniforms::current_buffer`].
|
||||
allocation: Allocation,
|
||||
/// The entities that comprise the joints.
|
||||
joints: Vec<MainEntity>,
|
||||
}
|
||||
|
||||
impl SkinUniformInfo {
|
||||
/// The offset in joints within the [`SkinUniforms::current_staging_buffer`].
|
||||
fn offset(&self) -> u32 {
|
||||
self.allocation.offset * JOINTS_PER_ALLOCATION_UNIT
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if skinning must use uniforms (and dynamic offsets) because
|
||||
/// storage buffers aren't supported on the current platform.
|
||||
pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool {
|
||||
@ -104,20 +196,54 @@ pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool {
|
||||
.get_or_init(|| render_device.limits().max_storage_buffers_per_shader_stage == 0)
|
||||
}
|
||||
|
||||
/// Uploads the buffers containing the joints to the GPU.
|
||||
pub fn prepare_skins(
|
||||
render_device: Res<RenderDevice>,
|
||||
render_queue: Res<RenderQueue>,
|
||||
mut uniform: ResMut<SkinUniforms>,
|
||||
uniform: ResMut<SkinUniforms>,
|
||||
) {
|
||||
if uniform.current_buffer.is_empty() {
|
||||
let uniform = uniform.into_inner();
|
||||
|
||||
if uniform.current_staging_buffer.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let len = uniform.current_buffer.len();
|
||||
uniform.current_buffer.reserve(len, &render_device);
|
||||
uniform
|
||||
.current_buffer
|
||||
.write_buffer(&render_device, &render_queue);
|
||||
// Swap current and previous buffers.
|
||||
mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer);
|
||||
|
||||
// Resize the buffer if necessary. Include extra space equal to `MAX_JOINTS`
|
||||
// because we need to be able to bind a full uniform buffer's worth of data
|
||||
// if skins use uniform buffers on this platform.
|
||||
let needed_size = (uniform.current_staging_buffer.len() as u64 + MAX_JOINTS as u64)
|
||||
* size_of::<Mat4>() as u64;
|
||||
if uniform.current_buffer.size() < needed_size {
|
||||
let mut new_size = uniform.current_buffer.size();
|
||||
while new_size < needed_size {
|
||||
// 1.5× growth factor.
|
||||
new_size += new_size / 2;
|
||||
}
|
||||
|
||||
// Create a new buffer.
|
||||
let buffer_usages = if skins_use_uniform_buffers(&render_device) {
|
||||
BufferUsages::UNIFORM
|
||||
} else {
|
||||
BufferUsages::STORAGE
|
||||
} | BufferUsages::COPY_DST;
|
||||
uniform.current_buffer = render_device.create_buffer(&BufferDescriptor {
|
||||
label: Some("skin uniform buffer"),
|
||||
usage: buffer_usages,
|
||||
size: new_size,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
}
|
||||
|
||||
// Write the data from `uniform.current_staging_buffer` into
|
||||
// `uniform.current_buffer`.
|
||||
render_queue.write_buffer(
|
||||
&uniform.current_buffer,
|
||||
0,
|
||||
bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]),
|
||||
);
|
||||
|
||||
// We don't need to write `uniform.prev_buffer` because we already wrote it
|
||||
// last frame, and the data should still be on the GPU.
|
||||
@ -150,71 +276,320 @@ pub fn prepare_skins(
|
||||
// which normally only support fixed size arrays. You just have to make sure
|
||||
// in the shader that you only read the values that are valid for that binding.
|
||||
pub fn extract_skins(
|
||||
skin_indices: ResMut<SkinIndices>,
|
||||
uniform: ResMut<SkinUniforms>,
|
||||
query: Extract<Query<(Entity, &ViewVisibility, &SkinnedMesh)>>,
|
||||
inverse_bindposes: Extract<Res<Assets<SkinnedMeshInverseBindposes>>>,
|
||||
skin_uniforms: ResMut<SkinUniforms>,
|
||||
skinned_meshes: Extract<Query<(Entity, &SkinnedMesh)>>,
|
||||
changed_skinned_meshes: Extract<
|
||||
Query<
|
||||
(Entity, &ViewVisibility, &SkinnedMesh),
|
||||
Or<(
|
||||
Changed<ViewVisibility>,
|
||||
Changed<SkinnedMesh>,
|
||||
AssetChanged<SkinnedMesh>,
|
||||
)>,
|
||||
>,
|
||||
>,
|
||||
skinned_mesh_inverse_bindposes: Extract<Res<Assets<SkinnedMeshInverseBindposes>>>,
|
||||
changed_transforms: Extract<Query<(Entity, &GlobalTransform), Changed<GlobalTransform>>>,
|
||||
joints: Extract<Query<&GlobalTransform>>,
|
||||
render_device: Res<RenderDevice>,
|
||||
mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
|
||||
mut removed_skinned_meshes_query: Extract<RemovedComponents<SkinnedMesh>>,
|
||||
) {
|
||||
let skins_use_uniform_buffers = skins_use_uniform_buffers(&render_device);
|
||||
let skin_uniforms = skin_uniforms.into_inner();
|
||||
|
||||
// Borrow check workaround.
|
||||
let (skin_indices, uniform) = (skin_indices.into_inner(), uniform.into_inner());
|
||||
// Find skins that have become visible or invisible on this frame. Allocate,
|
||||
// reallocate, or free space for them as necessary.
|
||||
add_or_delete_skins(
|
||||
skin_uniforms,
|
||||
&changed_skinned_meshes,
|
||||
&skinned_mesh_inverse_bindposes,
|
||||
&joints,
|
||||
);
|
||||
|
||||
// Swap buffers. We need to keep the previous frame's buffer around for the
|
||||
// purposes of motion vector computation.
|
||||
mem::swap(&mut skin_indices.current, &mut skin_indices.prev);
|
||||
mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer);
|
||||
skin_indices.current.clear();
|
||||
uniform.current_buffer.clear();
|
||||
// Extract the transforms for all joints from the scene, and write them into
|
||||
// the staging buffer at the appropriate spot.
|
||||
extract_joints(
|
||||
skin_uniforms,
|
||||
&skinned_meshes,
|
||||
&changed_skinned_meshes,
|
||||
&skinned_mesh_inverse_bindposes,
|
||||
&changed_transforms,
|
||||
&joints,
|
||||
);
|
||||
|
||||
let mut last_start = 0;
|
||||
// Delete skins that became invisible.
|
||||
for skinned_mesh_entity in removed_visibilities_query
|
||||
.read()
|
||||
.chain(removed_skinned_meshes_query.read())
|
||||
{
|
||||
// Only remove a skin if we didn't pick it up in `add_or_delete_skins`.
|
||||
// It's possible that a necessary component was removed and re-added in
|
||||
// the same frame.
|
||||
if !changed_skinned_meshes.contains(skinned_mesh_entity) {
|
||||
remove_skin(skin_uniforms, skinned_mesh_entity.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PERF: This can be expensive, can we move this to prepare?
|
||||
for (entity, view_visibility, skin) in &query {
|
||||
if !view_visibility.get() {
|
||||
/// Searches for all skins that have become visible or invisible this frame and
|
||||
/// allocations for them as necessary.
|
||||
fn add_or_delete_skins(
|
||||
skin_uniforms: &mut SkinUniforms,
|
||||
changed_skinned_meshes: &Query<
|
||||
(Entity, &ViewVisibility, &SkinnedMesh),
|
||||
Or<(
|
||||
Changed<ViewVisibility>,
|
||||
Changed<SkinnedMesh>,
|
||||
AssetChanged<SkinnedMesh>,
|
||||
)>,
|
||||
>,
|
||||
skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
|
||||
joints: &Query<&GlobalTransform>,
|
||||
) {
|
||||
// Find every skinned mesh that changed one of (1) visibility; (2) joint
|
||||
// entities (part of `SkinnedMesh`); (3) the associated
|
||||
// `SkinnedMeshInverseBindposes` asset.
|
||||
for (skinned_mesh_entity, skinned_mesh_view_visibility, skinned_mesh) in changed_skinned_meshes
|
||||
{
|
||||
// Remove the skin if it existed last frame.
|
||||
let skinned_mesh_entity = MainEntity::from(skinned_mesh_entity);
|
||||
remove_skin(skin_uniforms, skinned_mesh_entity);
|
||||
|
||||
// If the skin is invisible, we're done.
|
||||
if !(*skinned_mesh_view_visibility).get() {
|
||||
continue;
|
||||
}
|
||||
let buffer = &mut uniform.current_buffer;
|
||||
let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else {
|
||||
|
||||
// Initialize the skin.
|
||||
add_skin(
|
||||
skinned_mesh_entity,
|
||||
skinned_mesh,
|
||||
skin_uniforms,
|
||||
skinned_mesh_inverse_bindposes,
|
||||
joints,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts the global transforms of all joints and updates the staging buffer
|
||||
/// as necessary.
|
||||
fn extract_joints(
|
||||
skin_uniforms: &mut SkinUniforms,
|
||||
skinned_meshes: &Query<(Entity, &SkinnedMesh)>,
|
||||
changed_skinned_meshes: &Query<
|
||||
(Entity, &ViewVisibility, &SkinnedMesh),
|
||||
Or<(
|
||||
Changed<ViewVisibility>,
|
||||
Changed<SkinnedMesh>,
|
||||
AssetChanged<SkinnedMesh>,
|
||||
)>,
|
||||
>,
|
||||
skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
|
||||
changed_transforms: &Query<(Entity, &GlobalTransform), Changed<GlobalTransform>>,
|
||||
joints: &Query<&GlobalTransform>,
|
||||
) {
|
||||
// If the number of entities that changed transforms exceeds a certain
|
||||
// fraction (currently 25%) of the total joints in the scene, then skip
|
||||
// fine-grained change detection.
|
||||
//
|
||||
// Note that this is a crude heuristic, for performance reasons. It doesn't
|
||||
// consider the ratio of modified *joints* to total joints, only the ratio
|
||||
// of modified *entities* to total joints. Thus in the worst case we might
|
||||
// end up re-extracting all skins even though none of the joints changed.
|
||||
// But making the heuristic finer-grained would make it slower to evaluate,
|
||||
// and we don't want to lose performance.
|
||||
let threshold =
|
||||
(skin_uniforms.total_joints as f64 * JOINT_EXTRACTION_THRESHOLD_FACTOR).floor() as usize;
|
||||
|
||||
if changed_transforms.iter().nth(threshold).is_some() {
|
||||
// Go ahead and re-extract all skins in the scene.
|
||||
for (skin_entity, skin) in skinned_meshes {
|
||||
extract_joints_for_skin(
|
||||
skin_entity.into(),
|
||||
skin,
|
||||
skin_uniforms,
|
||||
changed_skinned_meshes,
|
||||
skinned_mesh_inverse_bindposes,
|
||||
joints,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Use fine-grained change detection to figure out only the skins that need
|
||||
// to have their joints re-extracted.
|
||||
let dirty_skins: MainEntityHashSet = changed_transforms
|
||||
.iter()
|
||||
.flat_map(|(joint, _)| skin_uniforms.joint_to_skins.get(&MainEntity::from(joint)))
|
||||
.flat_map(|skin_joint_mappings| skin_joint_mappings.iter())
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
// Re-extract the joints for only those skins.
|
||||
for skin_entity in dirty_skins {
|
||||
let Ok((_, skin)) = skinned_meshes.get(*skin_entity) else {
|
||||
continue;
|
||||
};
|
||||
let start = buffer.len();
|
||||
|
||||
let target = start + skin.joints.len().min(MAX_JOINTS);
|
||||
buffer.extend(
|
||||
joints
|
||||
.iter_many(&skin.joints)
|
||||
.zip(inverse_bindposes.iter())
|
||||
.take(MAX_JOINTS)
|
||||
.map(|(joint, bindpose)| joint.affine() * *bindpose),
|
||||
extract_joints_for_skin(
|
||||
skin_entity,
|
||||
skin,
|
||||
skin_uniforms,
|
||||
changed_skinned_meshes,
|
||||
skinned_mesh_inverse_bindposes,
|
||||
joints,
|
||||
);
|
||||
// iter_many will skip any failed fetches. This will cause it to assign the wrong bones,
|
||||
// so just bail by truncating to the start.
|
||||
if buffer.len() != target {
|
||||
buffer.truncate(start);
|
||||
continue;
|
||||
}
|
||||
last_start = last_start.max(start);
|
||||
}
|
||||
}
|
||||
|
||||
// Pad to 256 byte alignment if we're using a uniform buffer.
|
||||
// There's no need to do this if we're using storage buffers, though.
|
||||
if skins_use_uniform_buffers {
|
||||
while buffer.len() % 4 != 0 {
|
||||
buffer.push(Mat4::ZERO);
|
||||
/// Extracts all joints for a single skin and writes their transforms into the
|
||||
/// CPU staging buffer.
|
||||
fn extract_joints_for_skin(
|
||||
skin_entity: MainEntity,
|
||||
skin: &SkinnedMesh,
|
||||
skin_uniforms: &mut SkinUniforms,
|
||||
changed_skinned_meshes: &Query<
|
||||
(Entity, &ViewVisibility, &SkinnedMesh),
|
||||
Or<(
|
||||
Changed<ViewVisibility>,
|
||||
Changed<SkinnedMesh>,
|
||||
AssetChanged<SkinnedMesh>,
|
||||
)>,
|
||||
>,
|
||||
skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
|
||||
joints: &Query<&GlobalTransform>,
|
||||
) {
|
||||
// If we initialized the skin this frame, we already populated all
|
||||
// the joints, so there's no need to populate them again.
|
||||
if changed_skinned_meshes.contains(*skin_entity) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Fetch information about the skin.
|
||||
let Some(skin_uniform_info) = skin_uniforms.skin_uniform_info.get(&skin_entity) else {
|
||||
return;
|
||||
};
|
||||
let Some(skinned_mesh_inverse_bindposes) =
|
||||
skinned_mesh_inverse_bindposes.get(&skin.inverse_bindposes)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
// Calculate and write in the new joint matrices.
|
||||
for (joint_index, (&joint, skinned_mesh_inverse_bindpose)) in skin
|
||||
.joints
|
||||
.iter()
|
||||
.zip(skinned_mesh_inverse_bindposes.iter())
|
||||
.enumerate()
|
||||
{
|
||||
let Ok(joint_transform) = joints.get(joint) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let joint_matrix = joint_transform.affine() * *skinned_mesh_inverse_bindpose;
|
||||
skin_uniforms.current_staging_buffer[skin_uniform_info.offset() as usize + joint_index] =
|
||||
joint_matrix;
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocates space for a new skin in the buffers, and populates its joints.
|
||||
fn add_skin(
|
||||
skinned_mesh_entity: MainEntity,
|
||||
skinned_mesh: &SkinnedMesh,
|
||||
skin_uniforms: &mut SkinUniforms,
|
||||
skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
|
||||
joints: &Query<&GlobalTransform>,
|
||||
) {
|
||||
// Allocate space for the joints.
|
||||
let Some(allocation) = skin_uniforms.allocator.allocate(
|
||||
skinned_mesh
|
||||
.joints
|
||||
.len()
|
||||
.div_ceil(JOINTS_PER_ALLOCATION_UNIT as usize) as u32,
|
||||
) else {
|
||||
error!(
|
||||
"Out of space for skin: {:?}. Tried to allocate space for {:?} joints.",
|
||||
skinned_mesh_entity,
|
||||
skinned_mesh.joints.len()
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
// Store that allocation.
|
||||
let skin_uniform_info = SkinUniformInfo {
|
||||
allocation,
|
||||
joints: skinned_mesh
|
||||
.joints
|
||||
.iter()
|
||||
.map(|entity| MainEntity::from(*entity))
|
||||
.collect(),
|
||||
};
|
||||
|
||||
let skinned_mesh_inverse_bindposes =
|
||||
skinned_mesh_inverse_bindposes.get(&skinned_mesh.inverse_bindposes);
|
||||
|
||||
for (joint_index, &joint) in skinned_mesh.joints.iter().enumerate() {
|
||||
// Calculate the initial joint matrix.
|
||||
let skinned_mesh_inverse_bindpose =
|
||||
skinned_mesh_inverse_bindposes.and_then(|skinned_mesh_inverse_bindposes| {
|
||||
skinned_mesh_inverse_bindposes.get(joint_index)
|
||||
});
|
||||
let joint_matrix = match (skinned_mesh_inverse_bindpose, joints.get(joint)) {
|
||||
(Some(skinned_mesh_inverse_bindpose), Ok(transform)) => {
|
||||
transform.affine() * *skinned_mesh_inverse_bindpose
|
||||
}
|
||||
_ => Mat4::IDENTITY,
|
||||
};
|
||||
|
||||
// Write in the new joint matrix, growing the staging buffer if
|
||||
// necessary.
|
||||
let buffer_index = skin_uniform_info.offset() as usize + joint_index;
|
||||
if skin_uniforms.current_staging_buffer.len() < buffer_index + 1 {
|
||||
skin_uniforms
|
||||
.current_staging_buffer
|
||||
.resize(buffer_index + 1, Mat4::IDENTITY);
|
||||
}
|
||||
skin_uniforms.current_staging_buffer[buffer_index] = joint_matrix;
|
||||
|
||||
// Record the inverse mapping from the joint back to the skin. We use
|
||||
// this in order to perform fine-grained joint extraction.
|
||||
skin_uniforms
|
||||
.joint_to_skins
|
||||
.entry(MainEntity::from(joint))
|
||||
.or_default()
|
||||
.push(skinned_mesh_entity);
|
||||
}
|
||||
|
||||
// Record the number of joints.
|
||||
skin_uniforms.total_joints += skinned_mesh.joints.len();
|
||||
|
||||
skin_uniforms
|
||||
.skin_uniform_info
|
||||
.insert(skinned_mesh_entity, skin_uniform_info);
|
||||
}
|
||||
|
||||
/// Deallocates a skin and removes it from the [`SkinUniforms`].
|
||||
fn remove_skin(skin_uniforms: &mut SkinUniforms, skinned_mesh_entity: MainEntity) {
|
||||
let Some(old_skin_uniform_info) = skin_uniforms.skin_uniform_info.remove(&skinned_mesh_entity)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
// Free the allocation.
|
||||
skin_uniforms
|
||||
.allocator
|
||||
.free(old_skin_uniform_info.allocation);
|
||||
|
||||
// Remove the inverse mapping from each joint back to the skin.
|
||||
for &joint in &old_skin_uniform_info.joints {
|
||||
if let Entry::Occupied(mut entry) = skin_uniforms.joint_to_skins.entry(joint) {
|
||||
entry.get_mut().retain(|skin| *skin != skinned_mesh_entity);
|
||||
if entry.get_mut().is_empty() {
|
||||
entry.remove();
|
||||
}
|
||||
}
|
||||
|
||||
skin_indices
|
||||
.current
|
||||
.insert(entity.into(), SkinIndex::new(start));
|
||||
}
|
||||
|
||||
// Pad out the buffer to ensure that there's enough space for bindings
|
||||
while uniform.current_buffer.len() - last_start < MAX_JOINTS {
|
||||
uniform.current_buffer.push(Mat4::ZERO);
|
||||
}
|
||||
// Update the total number of joints.
|
||||
skin_uniforms.total_joints -= old_skin_uniform_info.joints.len();
|
||||
}
|
||||
|
||||
// NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per
|
||||
|
@ -57,7 +57,7 @@ fn skin_prev_model(
|
||||
+ weights.z * prev_joint_matrices.data[indexes.z]
|
||||
+ weights.w * prev_joint_matrices.data[indexes.w];
|
||||
#else // SKINS_USE_UNIFORM_BUFFERS
|
||||
let skin_index = mesh[instance_index].previous_skin_index;
|
||||
let skin_index = mesh[instance_index].current_skin_index;
|
||||
return weights.x * prev_joint_matrices[skin_index + indexes.x]
|
||||
+ weights.y * prev_joint_matrices[skin_index + indexes.y]
|
||||
+ weights.z * prev_joint_matrices[skin_index + indexes.z]
|
||||
|
@ -15,13 +15,13 @@ struct MeshInput {
|
||||
first_index_index: u32,
|
||||
index_count: u32,
|
||||
current_skin_index: u32,
|
||||
previous_skin_index: u32,
|
||||
// Low 16 bits: index of the material inside the bind group data.
|
||||
// High 16 bits: index of the lightmap in the binding array.
|
||||
material_and_lightmap_bind_group_slot: u32,
|
||||
// User supplied index to identify the mesh instance
|
||||
tag: u32,
|
||||
pad: u32,
|
||||
pad_a: u32,
|
||||
pad_b: u32,
|
||||
}
|
||||
|
||||
// The `wgpu` indirect parameters structure. This is a union of two structures.
|
||||
|
@ -374,9 +374,9 @@ impl GetBatchData for StencilPipeline {
|
||||
flags: mesh_transforms.flags,
|
||||
first_vertex_index,
|
||||
current_skin_index: u32::MAX,
|
||||
previous_skin_index: u32::MAX,
|
||||
material_and_lightmap_bind_group_slot: 0,
|
||||
tag: 0,
|
||||
pad: 0,
|
||||
}
|
||||
};
|
||||
Some((mesh_uniform, None))
|
||||
@ -430,7 +430,6 @@ impl GetFullBatchData for StencilPipeline {
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user