From f0bdce7425cdf4bb744ff8ed91a367d64c254dfa Mon Sep 17 00:00:00 2001 From: re0312 <45868716+re0312@users.noreply.github.com> Date: Wed, 26 Jun 2024 20:46:41 +0800 Subject: [PATCH] Fair Change Detection Benchmarking (#11173) # Objective - #4972 introduce a benchmark to measure chang detection performance - However,it uses `iter_batch ` cause a lot of overhead in clone data to each routine closure(it feels like a bug in`iter_batch `) and constructs new query in every iter.This overhead masks the real change detection throughput we want to measure. Instead of evaluating raw change detection, the benchmark ends up dominated by data cloning and allocation costs. ## Solution - Use iter_batch_ref to reduce the benchmark overload - Use cached query to better reflect real-world usage scenarios. - Add more benmark --- ## Changelog --- benches/benches/bevy_ecs/change_detection.rs | 174 ++++++++++++++++--- 1 file changed, 152 insertions(+), 22 deletions(-) diff --git a/benches/benches/bevy_ecs/change_detection.rs b/benches/benches/bevy_ecs/change_detection.rs index 7fc3f5a2a9..ae602738fe 100644 --- a/benches/benches/bevy_ecs/change_detection.rs +++ b/benches/benches/bevy_ecs/change_detection.rs @@ -1,7 +1,8 @@ use bevy_ecs::{ component::Component, entity::Entity, - prelude::{Added, Changed}, + prelude::{Added, Changed, EntityWorldMut, QueryState}, + query::QueryFilter, world::World, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; @@ -14,15 +15,28 @@ criterion_group!( all_changed_detection, few_changed_detection, none_changed_detection, + multiple_archetype_none_changed_detection ); criterion_main!(benches); +macro_rules! modify { + ($components:ident;$($index:tt),*) => { + $( + $components.$index.map(|mut v| { + v.0+=1. + }); + )* + }; +} #[derive(Component, Default)] #[component(storage = "Table")] struct Table(f32); #[derive(Component, Default)] #[component(storage = "SparseSet")] struct Sparse(f32); +#[derive(Component, Default)] +#[component(storage = "Table")] +struct Data(f32); trait BenchModify { fn bench_modify(&mut self) -> f32; @@ -41,7 +55,7 @@ impl BenchModify for Sparse { } } -const RANGE_ENTITIES_TO_BENCH_COUNT: std::ops::Range = 5..7; +const ENTITIES_TO_BENCH_COUNT: &[u32] = &[5000, 50000]; type BenchGroup<'a> = criterion::BenchmarkGroup<'a, criterion::measurement::WallTime>; @@ -55,6 +69,11 @@ fn setup(entity_count: u32) -> World { black_box(world) } +// create a cached query in setup to avoid extra costs in each iter +fn generic_filter_query(world: &mut World) -> QueryState { + world.query_filtered::() +} + fn generic_bench( bench_group: &mut BenchGroup, mut benches: Vec>, @@ -69,11 +88,14 @@ fn all_added_detection_generic(group: &mut BenchGroup, e group.bench_function( format!("{}_entities_{}", entity_count, std::any::type_name::()), |bencher| { - bencher.iter_batched( - || setup::(entity_count), - |mut world| { + bencher.iter_batched_ref( + || { + let mut world = setup::(entity_count); + let query = generic_filter_query::>(&mut world); + (world, query) + }, + |(ref mut world, ref mut query)| { let mut count = 0; - let mut query = world.query_filtered::>(); for entity in query.iter(&world) { black_box(entity); count += 1; @@ -90,7 +112,7 @@ fn all_added_detection(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("all_added_detection"); group.warm_up_time(std::time::Duration::from_millis(500)); group.measurement_time(std::time::Duration::from_secs(4)); - for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) { + for &entity_count in ENTITIES_TO_BENCH_COUNT { generic_bench( &mut group, vec![ @@ -109,7 +131,7 @@ fn all_changed_detection_generic( group.bench_function( format!("{}_entities_{}", entity_count, std::any::type_name::()), |bencher| { - bencher.iter_batched( + bencher.iter_batched_ref( || { let mut world = setup::(entity_count); world.clear_trackers(); @@ -117,11 +139,11 @@ fn all_changed_detection_generic( for mut component in query.iter_mut(&mut world) { black_box(component.bench_modify()); } - world + let query = generic_filter_query::>(&mut world); + (world, query) }, - |mut world| { + |(ref mut world, ref mut query)| { let mut count = 0; - let mut query = world.query_filtered::>(); for entity in query.iter(&world) { black_box(entity); count += 1; @@ -138,7 +160,7 @@ fn all_changed_detection(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("all_changed_detection"); group.warm_up_time(std::time::Duration::from_millis(500)); group.measurement_time(std::time::Duration::from_secs(4)); - for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) { + for &entity_count in ENTITIES_TO_BENCH_COUNT { generic_bench( &mut group, vec![ @@ -159,7 +181,7 @@ fn few_changed_detection_generic( group.bench_function( format!("{}_entities_{}", entity_count, std::any::type_name::()), |bencher| { - bencher.iter_batched( + bencher.iter_batched_ref( || { let mut world = setup::(entity_count); world.clear_trackers(); @@ -170,10 +192,10 @@ fn few_changed_detection_generic( for component in to_modify[0..amount_to_modify].iter_mut() { black_box(component.bench_modify()); } - world + let query = generic_filter_query::>(&mut world); + (world, query) }, - |mut world| { - let mut query = world.query_filtered::>(); + |(ref mut world, ref mut query)| { for entity in query.iter(&world) { black_box(entity); } @@ -188,7 +210,7 @@ fn few_changed_detection(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("few_changed_detection"); group.warm_up_time(std::time::Duration::from_millis(500)); group.measurement_time(std::time::Duration::from_secs(4)); - for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) { + for &entity_count in ENTITIES_TO_BENCH_COUNT { generic_bench( &mut group, vec![ @@ -207,15 +229,15 @@ fn none_changed_detection_generic( group.bench_function( format!("{}_entities_{}", entity_count, std::any::type_name::()), |bencher| { - bencher.iter_batched( + bencher.iter_batched_ref( || { let mut world = setup::(entity_count); world.clear_trackers(); - world + let query = generic_filter_query::>(&mut world); + (world, query) }, - |mut world| { + |(ref mut world, ref mut query)| { let mut count = 0; - let mut query = world.query_filtered::>(); for entity in query.iter(&world) { black_box(entity); count += 1; @@ -232,7 +254,7 @@ fn none_changed_detection(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("none_changed_detection"); group.warm_up_time(std::time::Duration::from_millis(500)); group.measurement_time(std::time::Duration::from_secs(4)); - for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) { + for &entity_count in ENTITIES_TO_BENCH_COUNT { generic_bench( &mut group, vec![ @@ -243,3 +265,111 @@ fn none_changed_detection(criterion: &mut Criterion) { ); } } +fn insert_if_bit_enabled(entity: &mut EntityWorldMut, i: u16) { + if i & 1 << B != 0 { + entity.insert(Data::(1.0)); + } +} + +fn add_archetypes_entities( + world: &mut World, + archetype_count: u16, + entity_count: u32, +) { + for i in 0..archetype_count { + for _j in 0..entity_count { + let mut e = world.spawn(T::default()); + insert_if_bit_enabled::<0>(&mut e, i); + insert_if_bit_enabled::<1>(&mut e, i); + insert_if_bit_enabled::<2>(&mut e, i); + insert_if_bit_enabled::<3>(&mut e, i); + insert_if_bit_enabled::<4>(&mut e, i); + insert_if_bit_enabled::<5>(&mut e, i); + insert_if_bit_enabled::<6>(&mut e, i); + insert_if_bit_enabled::<7>(&mut e, i); + insert_if_bit_enabled::<8>(&mut e, i); + insert_if_bit_enabled::<9>(&mut e, i); + insert_if_bit_enabled::<10>(&mut e, i); + insert_if_bit_enabled::<11>(&mut e, i); + insert_if_bit_enabled::<12>(&mut e, i); + insert_if_bit_enabled::<13>(&mut e, i); + insert_if_bit_enabled::<14>(&mut e, i); + insert_if_bit_enabled::<15>(&mut e, i); + } + } +} +fn multiple_archetype_none_changed_detection_generic( + group: &mut BenchGroup, + archetype_count: u16, + entity_count: u32, +) { + group.bench_function( + format!( + "{}_archetypes_{}_entities_{}", + archetype_count, + entity_count, + std::any::type_name::() + ), + |bencher| { + bencher.iter_batched_ref( + || { + let mut world = World::new(); + add_archetypes_entities::(&mut world, archetype_count, entity_count); + world.clear_trackers(); + let mut query = world.query::<( + Option<&mut Data<0>>, + Option<&mut Data<1>>, + Option<&mut Data<2>>, + Option<&mut Data<3>>, + Option<&mut Data<4>>, + Option<&mut Data<5>>, + Option<&mut Data<6>>, + Option<&mut Data<7>>, + Option<&mut Data<8>>, + Option<&mut Data<9>>, + Option<&mut Data<10>>, + Option<&mut Data<11>>, + Option<&mut Data<12>>, + Option<&mut Data<13>>, + Option<&mut Data<14>>, + )>(); + for components in query.iter_mut(&mut world) { + // change Data while keeping T unchanged + modify!(components;0,1,2,3,4,5,6,7,8,9,10,11,12,13,14); + } + let query = generic_filter_query::>(&mut world); + (world, query) + }, + |(ref mut world, ref mut query)| { + let mut count = 0; + for entity in query.iter(&world) { + black_box(entity); + count += 1; + } + assert_eq!(0, count); + }, + criterion::BatchSize::LargeInput, + ) + }, + ); +} + +fn multiple_archetype_none_changed_detection(criterion: &mut Criterion) { + let mut group = criterion.benchmark_group("multiple_archetypes_none_changed_detection"); + group.warm_up_time(std::time::Duration::from_millis(800)); + group.measurement_time(std::time::Duration::from_secs(8)); + for archetype_count in [5, 20, 100] { + for entity_count in [10, 100, 1000, 10000] { + multiple_archetype_none_changed_detection_generic::( + &mut group, + archetype_count, + entity_count, + ); + multiple_archetype_none_changed_detection_generic::( + &mut group, + archetype_count, + entity_count, + ); + } + } +}