Transform Propagation Optimization: Static Subtree Marking (#18589)
# Objective - Optimize static scene performance by marking unchanged subtrees. - [bef0209
](bef0209de1
) fixes #18255 and #18363. - Closes #18365 - Includes change from #18321 ## Solution - Mark hierarchy subtrees with dirty bits to avoid transform propagation where not needed - This causes a performance regression when spawning many entities, or when the scene is entirely dynamic. - This results in massive speedups for largely static scenes. - In the future we could allow the user to change this behavior, or add some threshold based on how dynamic the scene is? ## Testing - Caldera Hotel scene
This commit is contained in:
parent
4519ff677a
commit
8130b229be
@ -63,7 +63,11 @@ fn assert_is_normalized(message: &str, length_squared: f32) {
|
||||
/// [transform_example]: https://github.com/bevyengine/bevy/blob/latest/examples/transforms/transform.rs
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[cfg_attr(feature = "serialize", derive(serde::Serialize, serde::Deserialize))]
|
||||
#[cfg_attr(feature = "bevy-support", derive(Component), require(GlobalTransform))]
|
||||
#[cfg_attr(
|
||||
feature = "bevy-support",
|
||||
derive(Component),
|
||||
require(GlobalTransform, TransformTreeChanged)
|
||||
)]
|
||||
#[cfg_attr(
|
||||
feature = "bevy_reflect",
|
||||
derive(Reflect),
|
||||
@ -644,3 +648,20 @@ impl Mul<Vec3> for Transform {
|
||||
self.transform_point(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// An optimization for transform propagation. This ZST marker component uses change detection to
|
||||
/// mark all entities of the hierarchy as "dirty" if any of their descendants have a changed
|
||||
/// `Transform`. If this component is *not* marked `is_changed()`, propagation will halt.
|
||||
#[derive(Clone, Copy, Default, PartialEq, Debug)]
|
||||
#[cfg_attr(feature = "serialize", derive(serde::Serialize, serde::Deserialize))]
|
||||
#[cfg_attr(feature = "bevy-support", derive(Component))]
|
||||
#[cfg_attr(
|
||||
feature = "bevy_reflect",
|
||||
derive(Reflect),
|
||||
reflect(Component, Default, PartialEq, Debug)
|
||||
)]
|
||||
#[cfg_attr(
|
||||
all(feature = "bevy_reflect", feature = "serialize"),
|
||||
reflect(Serialize, Deserialize)
|
||||
)]
|
||||
pub struct TransformTreeChanged;
|
||||
|
@ -1,6 +1,4 @@
|
||||
use crate::systems::{
|
||||
compute_transform_leaves, propagate_parent_transforms, sync_simple_transforms,
|
||||
};
|
||||
use crate::systems::{mark_dirty_trees, propagate_parent_transforms, sync_simple_transforms};
|
||||
use bevy_app::{App, Plugin, PostStartup, PostUpdate};
|
||||
use bevy_ecs::schedule::{IntoScheduleConfigs, SystemSet};
|
||||
|
||||
@ -17,43 +15,33 @@ pub struct TransformPlugin;
|
||||
|
||||
impl Plugin for TransformPlugin {
|
||||
fn build(&self, app: &mut App) {
|
||||
// A set for `propagate_transforms` to mark it as ambiguous with `sync_simple_transforms`.
|
||||
// Used instead of the `SystemTypeSet` as that would not allow multiple instances of the system.
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, SystemSet)]
|
||||
struct PropagateTransformsSet;
|
||||
|
||||
#[cfg(feature = "bevy_reflect")]
|
||||
app.register_type::<crate::components::Transform>()
|
||||
.register_type::<crate::components::TransformTreeChanged>()
|
||||
.register_type::<crate::components::GlobalTransform>();
|
||||
|
||||
app.configure_sets(
|
||||
PostStartup,
|
||||
PropagateTransformsSet.in_set(TransformSystem::TransformPropagate),
|
||||
)
|
||||
// add transform systems to startup so the first update is "correct"
|
||||
.add_systems(
|
||||
PostStartup,
|
||||
(
|
||||
propagate_parent_transforms,
|
||||
(compute_transform_leaves, sync_simple_transforms)
|
||||
.ambiguous_with(TransformSystem::TransformPropagate),
|
||||
app
|
||||
// add transform systems to startup so the first update is "correct"
|
||||
.add_systems(
|
||||
PostStartup,
|
||||
(
|
||||
mark_dirty_trees,
|
||||
propagate_parent_transforms,
|
||||
sync_simple_transforms,
|
||||
)
|
||||
.chain()
|
||||
.in_set(TransformSystem::TransformPropagate),
|
||||
)
|
||||
.chain()
|
||||
.in_set(PropagateTransformsSet),
|
||||
)
|
||||
.configure_sets(
|
||||
PostUpdate,
|
||||
PropagateTransformsSet.in_set(TransformSystem::TransformPropagate),
|
||||
)
|
||||
.add_systems(
|
||||
PostUpdate,
|
||||
(
|
||||
propagate_parent_transforms,
|
||||
(compute_transform_leaves, sync_simple_transforms) // TODO: Adjust the internal parallel queries to make these parallel systems more efficiently share and fill CPU time.
|
||||
.ambiguous_with(TransformSystem::TransformPropagate),
|
||||
)
|
||||
.chain()
|
||||
.in_set(PropagateTransformsSet),
|
||||
);
|
||||
.add_systems(
|
||||
PostUpdate,
|
||||
(
|
||||
mark_dirty_trees,
|
||||
propagate_parent_transforms,
|
||||
// TODO: Adjust the internal parallel queries to make this system more efficiently share and fill CPU time.
|
||||
sync_simple_transforms,
|
||||
)
|
||||
.chain()
|
||||
.in_set(TransformSystem::TransformPropagate),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
use crate::components::{GlobalTransform, Transform};
|
||||
use crate::components::{GlobalTransform, Transform, TransformTreeChanged};
|
||||
use bevy_ecs::prelude::*;
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub use parallel::propagate_parent_transforms;
|
||||
#[cfg(not(feature = "std"))]
|
||||
@ -9,7 +8,7 @@ pub use serial::propagate_parent_transforms;
|
||||
/// Update [`GlobalTransform`] component of entities that aren't in the hierarchy
|
||||
///
|
||||
/// Third party plugins should ensure that this is used in concert with
|
||||
/// [`propagate_parent_transforms`] and [`compute_transform_leaves`].
|
||||
/// [`propagate_parent_transforms`] and [`mark_dirty_trees`].
|
||||
pub fn sync_simple_transforms(
|
||||
mut query: ParamSet<(
|
||||
Query<
|
||||
@ -41,28 +40,33 @@ pub fn sync_simple_transforms(
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute leaf [`GlobalTransform`]s in parallel.
|
||||
///
|
||||
/// This is run after [`propagate_parent_transforms`], to ensure the parents' [`GlobalTransform`]s
|
||||
/// have been computed. This makes computing leaf nodes at different levels of the hierarchy much
|
||||
/// more cache friendly, because data can be iterated over densely from the same archetype.
|
||||
pub fn compute_transform_leaves(
|
||||
parents: Query<Ref<GlobalTransform>, With<Children>>,
|
||||
mut leaves: Query<(Ref<Transform>, &mut GlobalTransform, &ChildOf), Without<Children>>,
|
||||
/// Optimization for static scenes. Propagates a "dirty bit" up the hierarchy towards ancestors.
|
||||
/// Transform propagation can ignore entire subtrees of the hierarchy if it encounters an entity
|
||||
/// without the dirty bit.
|
||||
pub fn mark_dirty_trees(
|
||||
changed_transforms: Query<
|
||||
Entity,
|
||||
Or<(Changed<Transform>, Changed<ChildOf>, Added<GlobalTransform>)>,
|
||||
>,
|
||||
mut orphaned: RemovedComponents<ChildOf>,
|
||||
mut transforms: Query<(Option<&ChildOf>, &mut TransformTreeChanged)>,
|
||||
) {
|
||||
leaves
|
||||
.par_iter_mut()
|
||||
.for_each(|(transform, mut global_transform, child_of)| {
|
||||
let Ok(parent_transform) = parents.get(child_of.parent) else {
|
||||
return;
|
||||
};
|
||||
if parent_transform.is_changed()
|
||||
|| transform.is_changed()
|
||||
|| global_transform.is_added()
|
||||
{
|
||||
*global_transform = parent_transform.mul_transform(*transform);
|
||||
for entity in changed_transforms.iter().chain(orphaned.read()) {
|
||||
let mut next = entity;
|
||||
while let Ok((parent, mut tree)) = transforms.get_mut(next) {
|
||||
if tree.is_changed() && !tree.is_added() {
|
||||
// If the component was changed, this part of the tree has already been processed.
|
||||
// Ignore this if the change was caused by the component being added.
|
||||
break;
|
||||
}
|
||||
});
|
||||
tree.set_changed();
|
||||
if let Some(parent) = parent.map(|p| p.parent) {
|
||||
next = parent;
|
||||
} else {
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: This serial implementation isn't actually serial, it parallelizes across the roots.
|
||||
@ -91,7 +95,7 @@ mod serial {
|
||||
///
|
||||
/// Third party plugins should ensure that this is used in concert with
|
||||
/// [`sync_simple_transforms`](super::sync_simple_transforms) and
|
||||
/// [`compute_transform_leaves`](super::compute_transform_leaves).
|
||||
/// [`mark_dirty_trees`](super::mark_dirty_trees).
|
||||
pub fn propagate_parent_transforms(
|
||||
mut root_query: Query<
|
||||
(Entity, &Children, Ref<Transform>, &mut GlobalTransform),
|
||||
@ -100,7 +104,7 @@ mod serial {
|
||||
mut orphaned: RemovedComponents<ChildOf>,
|
||||
transform_query: Query<
|
||||
(Ref<Transform>, &mut GlobalTransform, Option<&Children>),
|
||||
(With<ChildOf>, With<Children>),
|
||||
With<ChildOf>,
|
||||
>,
|
||||
child_query: Query<(Entity, Ref<ChildOf>), With<GlobalTransform>>,
|
||||
mut orphaned_entities: Local<Vec<Entity>>,
|
||||
@ -168,7 +172,7 @@ mod serial {
|
||||
parent: &GlobalTransform,
|
||||
transform_query: &Query<
|
||||
(Ref<Transform>, &mut GlobalTransform, Option<&Children>),
|
||||
(With<ChildOf>, With<Children>),
|
||||
With<ChildOf>,
|
||||
>,
|
||||
child_query: &Query<(Entity, Ref<ChildOf>), With<GlobalTransform>>,
|
||||
entity: Entity,
|
||||
@ -245,12 +249,12 @@ mod serial {
|
||||
#[cfg(feature = "std")]
|
||||
mod parallel {
|
||||
use crate::prelude::*;
|
||||
// TODO: this implementation could be used in no_std if there are equivalents of these.
|
||||
use alloc::{sync::Arc, vec::Vec};
|
||||
use bevy_ecs::{entity::UniqueEntityIter, prelude::*, system::lifetimeless::Read};
|
||||
use bevy_tasks::{ComputeTaskPool, TaskPool};
|
||||
use bevy_utils::Parallel;
|
||||
use core::sync::atomic::{AtomicI32, Ordering};
|
||||
// TODO: this implementation could be used in no_std if there are equivalents of these.
|
||||
use std::sync::{
|
||||
mpsc::{Receiver, Sender},
|
||||
Mutex,
|
||||
@ -261,32 +265,20 @@ mod parallel {
|
||||
///
|
||||
/// Third party plugins should ensure that this is used in concert with
|
||||
/// [`sync_simple_transforms`](super::sync_simple_transforms) and
|
||||
/// [`compute_transform_leaves`](super::compute_transform_leaves).
|
||||
/// [`mark_dirty_trees`](super::mark_dirty_trees).
|
||||
pub fn propagate_parent_transforms(
|
||||
mut queue: Local<WorkQueue>,
|
||||
mut orphaned: RemovedComponents<ChildOf>,
|
||||
mut orphans: Local<Vec<Entity>>,
|
||||
mut roots: Query<
|
||||
(Entity, Ref<Transform>, &mut GlobalTransform, &Children),
|
||||
Without<ChildOf>,
|
||||
(Without<ChildOf>, Changed<TransformTreeChanged>),
|
||||
>,
|
||||
nodes: NodeQuery,
|
||||
) {
|
||||
// Orphans
|
||||
orphans.clear();
|
||||
orphans.extend(orphaned.read());
|
||||
orphans.sort_unstable();
|
||||
|
||||
// Process roots in parallel, seeding the work queue
|
||||
roots.par_iter_mut().for_each_init(
|
||||
|| queue.local_queue.borrow_local_mut(),
|
||||
|outbox, (parent, transform, mut parent_transform, children)| {
|
||||
if transform.is_changed()
|
||||
|| parent_transform.is_added()
|
||||
|| orphans.binary_search(&parent).is_ok()
|
||||
{
|
||||
*parent_transform = GlobalTransform::from(*transform);
|
||||
}
|
||||
*parent_transform = GlobalTransform::from(*transform);
|
||||
|
||||
// SAFETY: the parent entities passed into this function are taken from iterating
|
||||
// over the root entity query. Queries iterate over disjoint entities, preventing
|
||||
@ -315,6 +307,18 @@ mod parallel {
|
||||
// number of channel sends by avoiding sending partial batches.
|
||||
queue.send_batches();
|
||||
|
||||
if let Ok(rx) = queue.receiver.try_lock() {
|
||||
if let Some(task) = rx.try_iter().next() {
|
||||
// This is a bit silly, but the only way to see if there is any work is to grab a
|
||||
// task. Peeking will remove the task even if you don't call `next`, resulting in
|
||||
// dropping a task. What we do here is grab the first task if there is one, then
|
||||
// immediately send it to the back of the queue.
|
||||
queue.sender.send(task).ok();
|
||||
} else {
|
||||
return; // No work, don't bother spawning any tasks
|
||||
}
|
||||
}
|
||||
|
||||
// Spawn workers on the task pool to recursively propagate the hierarchy in parallel.
|
||||
let task_pool = ComputeTaskPool::get_or_init(TaskPool::default);
|
||||
task_pool.scope(|s| {
|
||||
@ -373,12 +377,12 @@ mod parallel {
|
||||
// the hierarchy, guaranteeing unique access.
|
||||
#[expect(unsafe_code, reason = "Mutating disjoint entities in parallel")]
|
||||
unsafe {
|
||||
let (_, (_, p_global_transform), (p_children, _)) =
|
||||
let (_, (_, p_global_transform, _), (p_children, _)) =
|
||||
nodes.get_unchecked(parent).unwrap();
|
||||
propagate_descendants_unchecked(
|
||||
parent,
|
||||
p_global_transform,
|
||||
p_children,
|
||||
p_children.unwrap(), // All entities in the queue should have children
|
||||
nodes,
|
||||
&mut outbox,
|
||||
queue,
|
||||
@ -395,12 +399,8 @@ mod parallel {
|
||||
}
|
||||
}
|
||||
|
||||
/// Propagate transforms from `parent` to its non-leaf `children`, pushing updated child
|
||||
/// entities to the `outbox`. Propagation does not visit leaf nodes; instead, they are computed
|
||||
/// in [`compute_transform_leaves`](super::compute_transform_leaves), which can optimize much
|
||||
/// more efficiently.
|
||||
///
|
||||
/// This function will continue propagating transforms to descendants in a depth-first
|
||||
/// Propagate transforms from `parent` to its `children`, pushing updated child entities to the
|
||||
/// `outbox`. This function will continue propagating transforms to descendants in a depth-first
|
||||
/// traversal, while simultaneously pushing unvisited branches to the outbox, for other threads
|
||||
/// to take when idle.
|
||||
///
|
||||
@ -440,38 +440,29 @@ mod parallel {
|
||||
// visiting disjoint entities in parallel, which is safe.
|
||||
#[expect(unsafe_code, reason = "Mutating disjoint entities in parallel")]
|
||||
let children_iter = unsafe {
|
||||
// Performance note: iter_many tests every child to see if it meets the query. For
|
||||
// leaf nodes, this unfortunately means we have the pay the price of checking every
|
||||
// child, even if it is a leaf node and is skipped.
|
||||
//
|
||||
// To ensure this is still the fastest design, I tried removing the second pass
|
||||
// (`compute_transform_leaves`) and instead simply doing that here. However, that
|
||||
// proved to be much slower than two pass for a few reasons:
|
||||
// - it's less cache friendly and is outright slower than the tight loop in the
|
||||
// second pass
|
||||
// - it prevents parallelism, as all children must be iterated in series
|
||||
//
|
||||
// The only way I can see to make this faster when there are many leaf nodes is to
|
||||
// speed up archetype checking to make the iterator skip leaf entities more quickly,
|
||||
// or encoding the hierarchy level as a component. That, or use some kind of change
|
||||
// detection to mark dirty subtrees when the transform is mutated.
|
||||
nodes.iter_many_unique_unsafe(UniqueEntityIter::from_iterator_unchecked(
|
||||
p_children.iter(),
|
||||
))
|
||||
};
|
||||
|
||||
let mut last_child = None;
|
||||
let new_children = children_iter.map(
|
||||
|(child, (transform, mut global_transform), (children, child_of))| {
|
||||
assert_eq!(child_of.parent, parent);
|
||||
if p_global_transform.is_changed()
|
||||
|| transform.is_changed()
|
||||
|| global_transform.is_added()
|
||||
{
|
||||
*global_transform = p_global_transform.mul_transform(*transform);
|
||||
let new_children = children_iter.filter_map(
|
||||
|(child, (transform, mut global_transform, tree), (children, child_of))| {
|
||||
if !tree.is_changed() && !p_global_transform.is_changed() {
|
||||
// Static scene optimization
|
||||
return None;
|
||||
}
|
||||
last_child = Some((child, global_transform, children));
|
||||
child
|
||||
assert_eq!(child_of.parent, parent);
|
||||
|
||||
// Transform prop is expensive - this helps avoid updating entire subtrees if
|
||||
// the GlobalTransform is unchanged, at the cost of an added equality check.
|
||||
global_transform.set_if_neq(p_global_transform.mul_transform(*transform));
|
||||
|
||||
children.map(|children| {
|
||||
// Only continue propagation if the entity has children.
|
||||
last_child = Some((child, global_transform, children));
|
||||
child
|
||||
})
|
||||
},
|
||||
);
|
||||
outbox.extend(new_children);
|
||||
@ -497,14 +488,18 @@ mod parallel {
|
||||
}
|
||||
|
||||
/// Alias for a large, repeatedly used query. Queries for transform entities that have both a
|
||||
/// parent and children, thus they are neither roots nor leaves.
|
||||
/// parent and possibly children, thus they are not roots.
|
||||
type NodeQuery<'w, 's> = Query<
|
||||
'w,
|
||||
's,
|
||||
(
|
||||
Entity,
|
||||
(Ref<'static, Transform>, Mut<'static, GlobalTransform>),
|
||||
(Read<Children>, Read<ChildOf>),
|
||||
(
|
||||
Ref<'static, Transform>,
|
||||
Mut<'static, GlobalTransform>,
|
||||
Ref<'static, TransformTreeChanged>,
|
||||
),
|
||||
(Option<Read<Children>>, Read<ChildOf>),
|
||||
),
|
||||
>;
|
||||
|
||||
@ -579,9 +574,9 @@ mod test {
|
||||
let mut schedule = Schedule::default();
|
||||
schedule.add_systems(
|
||||
(
|
||||
mark_dirty_trees,
|
||||
sync_simple_transforms,
|
||||
propagate_parent_transforms,
|
||||
compute_transform_leaves,
|
||||
)
|
||||
.chain(),
|
||||
);
|
||||
@ -637,9 +632,9 @@ mod test {
|
||||
let mut schedule = Schedule::default();
|
||||
schedule.add_systems(
|
||||
(
|
||||
mark_dirty_trees,
|
||||
sync_simple_transforms,
|
||||
propagate_parent_transforms,
|
||||
compute_transform_leaves,
|
||||
)
|
||||
.chain(),
|
||||
);
|
||||
@ -674,9 +669,9 @@ mod test {
|
||||
let mut schedule = Schedule::default();
|
||||
schedule.add_systems(
|
||||
(
|
||||
mark_dirty_trees,
|
||||
sync_simple_transforms,
|
||||
propagate_parent_transforms,
|
||||
compute_transform_leaves,
|
||||
)
|
||||
.chain(),
|
||||
);
|
||||
@ -713,9 +708,9 @@ mod test {
|
||||
let mut schedule = Schedule::default();
|
||||
schedule.add_systems(
|
||||
(
|
||||
mark_dirty_trees,
|
||||
sync_simple_transforms,
|
||||
propagate_parent_transforms,
|
||||
compute_transform_leaves,
|
||||
)
|
||||
.chain(),
|
||||
);
|
||||
@ -793,9 +788,9 @@ mod test {
|
||||
app.add_systems(
|
||||
Update,
|
||||
(
|
||||
mark_dirty_trees,
|
||||
sync_simple_transforms,
|
||||
propagate_parent_transforms,
|
||||
compute_transform_leaves,
|
||||
)
|
||||
.chain(),
|
||||
);
|
||||
@ -847,12 +842,10 @@ mod test {
|
||||
|
||||
app.add_systems(
|
||||
Update,
|
||||
(
|
||||
propagate_parent_transforms,
|
||||
sync_simple_transforms,
|
||||
compute_transform_leaves,
|
||||
)
|
||||
.chain(),
|
||||
// It is unsound for this unsafe system to encounter a cycle without panicking. This
|
||||
// requirement only applies to systems with unsafe parallel traversal that result in
|
||||
// aliased mutability during a cycle.
|
||||
propagate_parent_transforms,
|
||||
);
|
||||
|
||||
fn setup_world(world: &mut World) -> (Entity, Entity) {
|
||||
@ -912,11 +905,14 @@ mod test {
|
||||
|
||||
// Create transform propagation schedule
|
||||
let mut schedule = Schedule::default();
|
||||
schedule.add_systems((
|
||||
sync_simple_transforms,
|
||||
propagate_parent_transforms,
|
||||
compute_transform_leaves,
|
||||
));
|
||||
schedule.add_systems(
|
||||
(
|
||||
mark_dirty_trees,
|
||||
propagate_parent_transforms,
|
||||
sync_simple_transforms,
|
||||
)
|
||||
.chain(),
|
||||
);
|
||||
|
||||
// Spawn a `Transform` entity with a local translation of `Vec3::ONE`
|
||||
let mut spawn_transform_bundle =
|
||||
|
@ -353,9 +353,10 @@ mod tests {
|
||||
use bevy_math::{Rect, UVec2, Vec2};
|
||||
use bevy_platform_support::collections::HashMap;
|
||||
use bevy_render::{camera::ManualTextureViews, prelude::Camera};
|
||||
use bevy_transform::systems::mark_dirty_trees;
|
||||
use bevy_transform::{
|
||||
prelude::GlobalTransform,
|
||||
systems::{compute_transform_leaves, propagate_parent_transforms, sync_simple_transforms},
|
||||
systems::{propagate_parent_transforms, sync_simple_transforms},
|
||||
};
|
||||
use bevy_utils::prelude::default;
|
||||
use bevy_window::{
|
||||
@ -408,9 +409,9 @@ mod tests {
|
||||
update_ui_context_system,
|
||||
ApplyDeferred,
|
||||
ui_layout_system,
|
||||
mark_dirty_trees,
|
||||
sync_simple_transforms,
|
||||
propagate_parent_transforms,
|
||||
compute_transform_leaves,
|
||||
)
|
||||
.chain(),
|
||||
);
|
||||
|
Loading…
Reference in New Issue
Block a user