From bbe88d5e1dc9712b541b778acbcca0eee004bafe Mon Sep 17 00:00:00 2001 From: Aevyrie Roessler Date: Fri, 28 Feb 2025 11:59:23 -0800 Subject: [PATCH] ZST dirty bits go brrrrrr --- .../src/components/transform.rs | 23 +++- crates/bevy_transform/src/plugins.rs | 14 +-- crates/bevy_transform/src/systems.rs | 117 +++++++----------- 3 files changed, 69 insertions(+), 85 deletions(-) diff --git a/crates/bevy_transform/src/components/transform.rs b/crates/bevy_transform/src/components/transform.rs index 2949015848..b112fee98e 100644 --- a/crates/bevy_transform/src/components/transform.rs +++ b/crates/bevy_transform/src/components/transform.rs @@ -60,7 +60,11 @@ fn assert_is_normalized(message: &str, length_squared: f32) { /// [transform_example]: https://github.com/bevyengine/bevy/blob/latest/examples/transforms/transform.rs #[derive(Debug, PartialEq, Clone, Copy)] #[cfg_attr(feature = "serialize", derive(serde::Serialize, serde::Deserialize))] -#[cfg_attr(feature = "bevy-support", derive(Component), require(GlobalTransform))] +#[cfg_attr( + feature = "bevy-support", + derive(Component), + require(GlobalTransform, TransformTreeChanged) +)] #[cfg_attr( feature = "bevy_reflect", derive(Reflect), @@ -641,3 +645,20 @@ impl Mul for Transform { self.transform_point(value) } } + +/// An optimization for transform propagation. This ZST marker component uses change detection to +/// mark all entities of the hierarchy as "dirty" if any of their descendants have a changed +/// `Transform`. If this component is *not* marked `is_changed()`, propagation will halt. +#[derive(Clone, Copy, Default, PartialEq, Debug)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "bevy-support", derive(Component))] +#[cfg_attr( + feature = "bevy_reflect", + derive(Reflect), + reflect(Component, Default, PartialEq, Debug) +)] +#[cfg_attr( + all(feature = "bevy_reflect", feature = "serialize"), + reflect(Serialize, Deserialize) +)] +pub struct TransformTreeChanged; diff --git a/crates/bevy_transform/src/plugins.rs b/crates/bevy_transform/src/plugins.rs index e578002bcb..a79b947d09 100644 --- a/crates/bevy_transform/src/plugins.rs +++ b/crates/bevy_transform/src/plugins.rs @@ -1,7 +1,4 @@ -use crate::systems::{ - compute_transform_leaves, mark_dirty_trees, propagate_parent_transforms, - sync_simple_transforms, ChangedTransforms, -}; +use crate::systems::{mark_dirty_trees, propagate_parent_transforms, sync_simple_transforms}; use bevy_app::{App, Plugin, PostStartup, PostUpdate}; use bevy_ecs::schedule::{IntoSystemConfigs, IntoSystemSetConfigs, SystemSet}; @@ -25,21 +22,20 @@ impl Plugin for TransformPlugin { #[cfg(feature = "bevy_reflect")] app.register_type::() + .register_type::() .register_type::(); app.configure_sets( PostStartup, PropagateTransformsSet.in_set(TransformSystem::TransformPropagate), ) - .init_resource::() // add transform systems to startup so the first update is "correct" .add_systems( PostStartup, ( mark_dirty_trees, propagate_parent_transforms, - (compute_transform_leaves, sync_simple_transforms) - .ambiguous_with(TransformSystem::TransformPropagate), + sync_simple_transforms, ) .chain() .in_set(PropagateTransformsSet), @@ -53,8 +49,8 @@ impl Plugin for TransformPlugin { ( mark_dirty_trees, propagate_parent_transforms, - (compute_transform_leaves, sync_simple_transforms) // TODO: Adjust the internal parallel queries to make these parallel systems more efficiently share and fill CPU time. - .ambiguous_with(TransformSystem::TransformPropagate), + // TODO: Adjust the internal parallel queries to make this system more efficiently share and fill CPU time. + sync_simple_transforms, ) .chain() .in_set(PropagateTransformsSet), diff --git a/crates/bevy_transform/src/systems.rs b/crates/bevy_transform/src/systems.rs index 53d0ce5eb7..7e59a11588 100644 --- a/crates/bevy_transform/src/systems.rs +++ b/crates/bevy_transform/src/systems.rs @@ -1,8 +1,6 @@ -use crate::components::{GlobalTransform, Transform}; -use bevy_ecs::entity::hash_set::EntityHashSet; +use crate::components::{GlobalTransform, Transform, TransformTreeChanged}; use bevy_ecs::prelude::*; -use derive_more::Deref; - +use derive_more::From; #[cfg(feature = "std")] pub use parallel::propagate_parent_transforms; #[cfg(not(feature = "std"))] @@ -43,55 +41,28 @@ pub fn sync_simple_transforms( } } -#[derive(Default, Resource, Deref)] -pub struct ChangedTransforms(EntityHashSet); - +/// Optimization for static scenes. Propagates a "dirty bit" up the hierarchy towards ancestors. +/// Transform propagation can ignore entire subtrees of the hierarchy if it encounters an entity +/// without the dirty bit. pub fn mark_dirty_trees( - mut changed: ResMut, - transforms: Query<(Entity, Option<&ChildOf>, Ref)>, + changed_transforms: Query>, + mut transforms: Query<(Option<&ChildOf>, &mut TransformTreeChanged)>, ) { - changed.0.clear(); - 'outer: for (entity, parent, _) in transforms - .iter() - .filter(|(.., transform)| transform.is_changed()) - { - if !changed.0.insert(entity) { - continue; // Tree has already been processed - } - let mut next = parent.map(ChildOf::get); - while let Some((entity, parent, _)) = next.and_then(|entity| transforms.get(entity).ok()) { - if !changed.0.insert(entity) { + for entity in changed_transforms.iter() { + let mut next = entity; + while let Ok((parent, mut tree)) = transforms.get_mut(next) { + if tree.is_changed() { break; // Tree has already been processed + } else { + tree.set_changed(); } - next = parent.map(ChildOf::get); + if let Some(parent) = parent.map(ChildOf::get) { + next = parent; + }; } } } -/// Compute leaf [`GlobalTransform`]s in parallel. -/// -/// This is run after [`propagate_parent_transforms`], to ensure the parents' [`GlobalTransform`]s -/// have been computed. This makes computing leaf nodes at different levels of the hierarchy much -/// more cache friendly, because data can be iterated over densely from the same archetype. -pub fn compute_transform_leaves( - parents: Query, With>, - mut leaves: Query<(Ref, &mut GlobalTransform, &ChildOf), Without>, -) { - leaves - .par_iter_mut() - .for_each(|(transform, mut global_transform, parent)| { - let Ok(parent_transform) = parents.get(parent.get()) else { - return; - }; - if parent_transform.is_changed() - || transform.is_changed() - || global_transform.is_added() - { - *global_transform = parent_transform.mul_transform(*transform); - } - }); -} - // TODO: This serial implementation isn't actually serial, it parallelizes across the roots. // Additionally, this couples "no_std" with "single_threaded" when these two features should be // independent. @@ -195,7 +166,7 @@ mod serial { parent: &GlobalTransform, transform_query: &Query< (Ref, &mut GlobalTransform, Option<&Children>), - (With, With), + With, >, parent_query: &Query<(Entity, Ref), With>, entity: Entity, @@ -273,7 +244,6 @@ mod serial { mod parallel { use crate::prelude::*; // TODO: this implementation could be used in no_std if there are equivalents of these. - use crate::systems::ChangedTransforms; use alloc::{sync::Arc, vec::Vec}; use bevy_ecs::{entity::UniqueEntityIter, prelude::*, system::lifetimeless::Read}; use bevy_tasks::{ComputeTaskPool, TaskPool}; @@ -291,13 +261,12 @@ mod parallel { /// [`sync_simple_transforms`](super::sync_simple_transforms) and /// [`compute_transform_leaves`](super::compute_transform_leaves). pub fn propagate_parent_transforms( - changed_transforms: Res, mut queue: Local, mut orphaned: RemovedComponents, mut orphans: Local>, mut roots: Query< (Entity, Ref, &mut GlobalTransform, &Children), - Without, + (Without, Changed), >, nodes: NodeQuery, ) { @@ -310,11 +279,6 @@ mod parallel { roots.par_iter_mut().for_each_init( || queue.local_queue.borrow_local_mut(), |outbox, (parent, transform, mut parent_transform, children)| { - // If the parent isn't in this resource, no transforms in the subtree have changed. - if !changed_transforms.contains(&parent) { - return; - } - if transform.is_changed() || parent_transform.is_added() || orphans.binary_search(&parent).is_ok() @@ -332,7 +296,6 @@ mod parallel { parent_transform, children, &nodes, - &changed_transforms, outbox, &queue, // Need to revisit this single-max-depth by profiling more representative @@ -350,25 +313,31 @@ mod parallel { // number of channel sends by avoiding sending partial batches. queue.send_batches(); + if let Ok(rx) = queue.receiver.try_lock() { + if let Some(task) = rx.try_iter().next() { + // This is a bit silly, but the only way to see if there is any work is to grab a + // task. Peeking will remove the task even if you don't call `next`, resulting in + // dropping a task. What we do here is grab the first task if there is one, then + // immediately send it to the back of the queue. + queue.sender.send(task).ok(); + } else { + return; // No work, don't bother spawning any tasks + } + } + // Spawn workers on the task pool to recursively propagate the hierarchy in parallel. let task_pool = ComputeTaskPool::get_or_init(TaskPool::default); task_pool.scope(|s| { (1..task_pool.thread_num()) // First worker is run locally instead of the task pool. - .for_each(|_| { - s.spawn(async { propagation_worker(&queue, &nodes, &changed_transforms) }) - }); - propagation_worker(&queue, &nodes, &changed_transforms); + .for_each(|_| s.spawn(async { propagation_worker(&queue, &nodes) })); + propagation_worker(&queue, &nodes); }); } /// A parallel worker that will consume processed parent entities from the queue, and push /// children to the queue once it has propagated their [`GlobalTransform`]. #[inline] - fn propagation_worker( - queue: &WorkQueue, - nodes: &NodeQuery, - changed_transforms: &ChangedTransforms, - ) { + fn propagation_worker(queue: &WorkQueue, nodes: &NodeQuery) { #[cfg(feature = "std")] let _span = bevy_log::info_span!("transform propagation worker").entered(); @@ -419,9 +388,8 @@ mod parallel { propagate_descendants_unchecked( parent, p_global_transform, - p_children, + p_children.unwrap(), // All entities in the queue should have children nodes, - changed_transforms, &mut outbox, queue, // Only affects performance. Trees deeper than this will still be fully @@ -466,7 +434,6 @@ mod parallel { p_global_transform: Mut, p_children: &Children, nodes: &NodeQuery, - changed_transforms: &ChangedTransforms, outbox: &mut Vec, queue: &WorkQueue, max_depth: usize, @@ -477,10 +444,6 @@ mod parallel { // See the optimization note at the end to understand why this loop is here. for depth in 1..=max_depth { - if !changed_transforms.contains(&parent) { - break; - } - // Safety: traversing the entity tree from the roots, we assert that the childof and // children pointers match in both directions (see assert below) to ensure the hierarchy // does not have any cycles. Because the hierarchy does not have cycles, we know we are @@ -508,7 +471,7 @@ mod parallel { }; let mut last_child = None; - let new_children = children_iter.map( + let new_children = children_iter.filter_map( |(child, (transform, mut global_transform), (children, child_of))| { assert_eq!(child_of.get(), parent); if p_global_transform.is_changed() @@ -517,8 +480,11 @@ mod parallel { { *global_transform = p_global_transform.mul_transform(*transform); } - last_child = Some((child, global_transform, children)); - child + children.map(|children| { + // Only continue propagation if the entity has children. + last_child = Some((child, global_transform, children)); + child + }) }, ); outbox.extend(new_children); @@ -551,8 +517,9 @@ mod parallel { ( Entity, (Ref<'static, Transform>, Mut<'static, GlobalTransform>), - (Read, Read), + (Option>, Read), ), + Changed, >; /// A queue shared between threads for transform propagation.