Opportunistically use dense iter for archetypal iteration in Par_iter (#14673)
# Objective - follow of #14049 ,we could use it on our Parallel Iterator,this pr also unified the used function in both regular iter and parallel iterations. ## Performance  no performance regression for regular itertaion 3.5X faster in hybrid parallel iteraion,this number is far greater than the benefits obtained in regular iteration(~1.81) because mutable iterations on continuous memory can effectively reduce the cost of mataining core cache coherence
This commit is contained in:
		
							parent
							
								
									ec728c31c1
								
							
						
					
					
						commit
						739007f148
					
				| @ -20,7 +20,7 @@ impl<'w> Benchmark<'w> { | ||||
| 
 | ||||
|         let mut v = vec![]; | ||||
|         for _ in 0..10000 { | ||||
|             world.spawn((TableData(0.0), SparseData(0.0))).id(); | ||||
|             world.spawn((TableData(0.0), SparseData(0.0))); | ||||
|             v.push(world.spawn(TableData(0.)).id()); | ||||
|         } | ||||
| 
 | ||||
|  | ||||
| @ -20,6 +20,7 @@ mod iter_simple_system; | ||||
| mod iter_simple_wide; | ||||
| mod iter_simple_wide_sparse_set; | ||||
| mod par_iter_simple; | ||||
| mod par_iter_simple_foreach_hybrid; | ||||
| 
 | ||||
| use heavy_compute::*; | ||||
| 
 | ||||
| @ -135,4 +136,8 @@ fn par_iter_simple(c: &mut Criterion) { | ||||
|             b.iter(move || bench.run()); | ||||
|         }); | ||||
|     } | ||||
|     group.bench_function(format!("hybrid"), |b| { | ||||
|         let mut bench = par_iter_simple_foreach_hybrid::Benchmark::new(); | ||||
|         b.iter(move || bench.run()); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| @ -0,0 +1,45 @@ | ||||
| use bevy_ecs::prelude::*; | ||||
| use bevy_tasks::{ComputeTaskPool, TaskPool}; | ||||
| use rand::{prelude::SliceRandom, SeedableRng}; | ||||
| use rand_chacha::ChaCha8Rng; | ||||
| 
 | ||||
| #[derive(Component, Copy, Clone)] | ||||
| struct TableData(f32); | ||||
| 
 | ||||
| #[derive(Component, Copy, Clone)] | ||||
| #[component(storage = "SparseSet")] | ||||
| struct SparseData(f32); | ||||
| 
 | ||||
| fn deterministic_rand() -> ChaCha8Rng { | ||||
|     ChaCha8Rng::seed_from_u64(42) | ||||
| } | ||||
| pub struct Benchmark<'w>(World, QueryState<(&'w mut TableData, &'w SparseData)>); | ||||
| 
 | ||||
| impl<'w> Benchmark<'w> { | ||||
|     pub fn new() -> Self { | ||||
|         let mut world = World::new(); | ||||
|         ComputeTaskPool::get_or_init(TaskPool::default); | ||||
| 
 | ||||
|         let mut v = vec![]; | ||||
|         for _ in 0..100000 { | ||||
|             world.spawn((TableData(0.0), SparseData(0.0))); | ||||
|             v.push(world.spawn(TableData(0.)).id()); | ||||
|         } | ||||
| 
 | ||||
|         // by shuffling ,randomize the archetype iteration order to significantly deviate from the table order. This maximizes the loss of cache locality during archetype-based iteration.
 | ||||
|         v.shuffle(&mut deterministic_rand()); | ||||
|         for e in v.into_iter() { | ||||
|             world.entity_mut(e).despawn(); | ||||
|         } | ||||
| 
 | ||||
|         let query = world.query::<(&mut TableData, &SparseData)>(); | ||||
|         Self(world, query) | ||||
|     } | ||||
| 
 | ||||
|     #[inline(never)] | ||||
|     pub fn run(&mut self) { | ||||
|         self.1 | ||||
|             .par_iter_mut(&mut self.0) | ||||
|             .for_each(|(mut v1, v2)| v1.0 += v2.0) | ||||
|     } | ||||
| } | ||||
| @ -122,6 +122,67 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> { | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
 | ||||
|     /// from an storage.
 | ||||
|     ///
 | ||||
|     ///  # Safety
 | ||||
|     ///  - `range` must be in `[0, storage::entity_count)` or None.
 | ||||
|     #[inline] | ||||
|     pub(super) unsafe fn fold_over_storage_range<B, Func>( | ||||
|         &mut self, | ||||
|         mut accum: B, | ||||
|         func: &mut Func, | ||||
|         storage: StorageId, | ||||
|         range: Option<Range<usize>>, | ||||
|     ) -> B | ||||
|     where | ||||
|         Func: FnMut(B, D::Item<'w>) -> B, | ||||
|     { | ||||
|         if self.cursor.is_dense { | ||||
|             // SAFETY: `self.cursor.is_dense` is true, so storage ids are guaranteed to be table ids.
 | ||||
|             let table_id = unsafe { storage.table_id }; | ||||
|             // SAFETY: Matched table IDs are guaranteed to still exist.
 | ||||
|             let table = unsafe { self.tables.get(table_id).debug_checked_unwrap() }; | ||||
| 
 | ||||
|             let range = range.unwrap_or(0..table.entity_count()); | ||||
|             accum = | ||||
|                 // SAFETY: 
 | ||||
|                 // - The fetched table matches both D and F
 | ||||
|                 // - caller ensures `range` is within `[0, table.entity_count)`
 | ||||
|                 // - The if block ensures that the query iteration is dense
 | ||||
|                 unsafe { self.fold_over_table_range(accum, func, table, range) }; | ||||
|         } else { | ||||
|             // SAFETY: `self.cursor.is_dense` is false, so storage ids are guaranteed to be archetype ids.
 | ||||
|             let archetype_id = unsafe { storage.archetype_id }; | ||||
|             // SAFETY: Matched archetype IDs are guaranteed to still exist.
 | ||||
|             let archetype = unsafe { self.archetypes.get(archetype_id).debug_checked_unwrap() }; | ||||
|             // SAFETY: Matched table IDs are guaranteed to still exist.
 | ||||
|             let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() }; | ||||
| 
 | ||||
|             let range = range.unwrap_or(0..archetype.len()); | ||||
| 
 | ||||
|             // When an archetype and its table have equal entity counts, dense iteration can be safely used.
 | ||||
|             // this leverages cache locality to optimize performance.
 | ||||
|             if table.entity_count() == archetype.len() { | ||||
|                 accum = | ||||
|                 // SAFETY:
 | ||||
|                 // - The fetched archetype matches both D and F
 | ||||
|                 // - The provided archetype and its' table have the same length.
 | ||||
|                 // - caller ensures `range` is within `[0, archetype.len)`
 | ||||
|                 // - The if block ensures that the query iteration is not dense.
 | ||||
|                 unsafe { self.fold_over_dense_archetype_range(accum, func, archetype,range) }; | ||||
|             } else { | ||||
|                 accum = | ||||
|                 // SAFETY:
 | ||||
|                 // - The fetched archetype matches both D and F
 | ||||
|                 // - caller ensures `range` is within `[0, archetype.len)`
 | ||||
|                 // - The if block ensures that the query iteration is not dense.
 | ||||
|                 unsafe { self.fold_over_archetype_range(accum, func, archetype,range) }; | ||||
|             } | ||||
|         } | ||||
|         accum | ||||
|     } | ||||
| 
 | ||||
|     /// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
 | ||||
|     /// from an table.
 | ||||
|     ///
 | ||||
| @ -143,7 +204,7 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> { | ||||
|         if table.is_empty() { | ||||
|             return accum; | ||||
|         } | ||||
|         assert!( | ||||
|         debug_assert!( | ||||
|             rows.end <= u32::MAX as usize, | ||||
|             "TableRow is only valid up to u32::MAX" | ||||
|         ); | ||||
| @ -267,12 +328,11 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> { | ||||
|         if archetype.is_empty() { | ||||
|             return accum; | ||||
|         } | ||||
|         assert!( | ||||
|         debug_assert!( | ||||
|             rows.end <= u32::MAX as usize, | ||||
|             "TableRow is only valid up to u32::MAX" | ||||
|         ); | ||||
|         let table = self.tables.get(archetype.table_id()).debug_checked_unwrap(); | ||||
| 
 | ||||
|         debug_assert!( | ||||
|             archetype.len() == table.entity_count(), | ||||
|             "archetype and it's table must have the same length. " | ||||
| @ -1032,48 +1092,10 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F> | ||||
|             accum = func(accum, item); | ||||
|         } | ||||
| 
 | ||||
|         if self.cursor.is_dense { | ||||
|             for id in self.cursor.storage_id_iter.clone() { | ||||
|                 // SAFETY: `self.cursor.is_dense` is true, so storage ids are guaranteed to be table ids.
 | ||||
|                 let table_id = unsafe { id.table_id }; | ||||
|                 // SAFETY: Matched table IDs are guaranteed to still exist.
 | ||||
|                 let table = unsafe { self.tables.get(table_id).debug_checked_unwrap() }; | ||||
| 
 | ||||
|                 accum = | ||||
|                     // SAFETY: 
 | ||||
|                     // - The fetched table matches both D and F
 | ||||
|                     // - The provided range is equivalent to [0, table.entity_count)
 | ||||
|                     // - The if block ensures that the query iteration is dense
 | ||||
|                     unsafe { self.fold_over_table_range(accum, &mut func, table, 0..table.entity_count()) }; | ||||
|             } | ||||
|         } else { | ||||
|             for id in self.cursor.storage_id_iter.clone() { | ||||
|                 // SAFETY: `self.cursor.is_dense` is false, so storage ids are guaranteed to be archetype ids.
 | ||||
|                 let archetype_id = unsafe { id.archetype_id }; | ||||
|                 // SAFETY: Matched archetype IDs are guaranteed to still exist.
 | ||||
|                 let archetype = unsafe { self.archetypes.get(archetype_id).debug_checked_unwrap() }; | ||||
|                 // SAFETY: Matched table IDs are guaranteed to still exist.
 | ||||
|                 let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() }; | ||||
| 
 | ||||
|                 // When an archetype and its table have equal entity counts, dense iteration can be safely used.
 | ||||
|                 // this leverages cache locality to optimize performance.
 | ||||
|                 if table.entity_count() == archetype.len() { | ||||
|                     accum = | ||||
|                         // SAFETY:
 | ||||
|                         // - The fetched archetype matches both D and F
 | ||||
|                         // - The provided archetype and its' table have the same length.
 | ||||
|                         // - The provided range is equivalent to [0, archetype.len)
 | ||||
|                         // - The if block ensures that the query iteration is not dense.
 | ||||
|                         unsafe { self.fold_over_dense_archetype_range(accum, &mut func, archetype, 0..archetype.len()) }; | ||||
|                 } else { | ||||
|                     accum = | ||||
|                         // SAFETY:
 | ||||
|                         // - The fetched archetype matches both D and F
 | ||||
|                         // - The provided range is equivalent to [0, archetype.len)
 | ||||
|                         // - The if block ensures that the query iteration is not dense.
 | ||||
|                         unsafe { self.fold_over_archetype_range(accum, &mut func, archetype, 0..archetype.len()) }; | ||||
|                 } | ||||
|             } | ||||
|         for id in self.cursor.storage_id_iter.clone().copied() { | ||||
|             // SAFETY:
 | ||||
|             // - The range(None) is equivalent to [0, storage.entity_count)
 | ||||
|             accum = unsafe { self.fold_over_storage_range(accum, &mut func, id, None) }; | ||||
|         } | ||||
|         accum | ||||
|     } | ||||
|  | ||||
| @ -1505,25 +1505,7 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> { | ||||
|                     let mut iter = self.iter_unchecked_manual(world, last_run, this_run); | ||||
|                     let mut accum = init_accum(); | ||||
|                     for storage_id in queue { | ||||
|                         if self.is_dense { | ||||
|                             let id = storage_id.table_id; | ||||
|                             let table = &world.storages().tables.get(id).debug_checked_unwrap(); | ||||
|                             accum = iter.fold_over_table_range( | ||||
|                                 accum, | ||||
|                                 &mut func, | ||||
|                                 table, | ||||
|                                 0..table.entity_count(), | ||||
|                             ); | ||||
|                         } else { | ||||
|                             let id = storage_id.archetype_id; | ||||
|                             let archetype = world.archetypes().get(id).debug_checked_unwrap(); | ||||
|                             accum = iter.fold_over_archetype_range( | ||||
|                                 accum, | ||||
|                                 &mut func, | ||||
|                                 archetype, | ||||
|                                 0..archetype.len(), | ||||
|                             ); | ||||
|                         } | ||||
|                         accum = iter.fold_over_storage_range(accum, &mut func, storage_id, None); | ||||
|                     } | ||||
|                 }); | ||||
|             }; | ||||
| @ -1539,17 +1521,8 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> { | ||||
|                         #[cfg(feature = "trace")] | ||||
|                         let _span = self.par_iter_span.enter(); | ||||
|                         let accum = init_accum(); | ||||
|                         if self.is_dense { | ||||
|                             let id = storage_id.table_id; | ||||
|                             let table = world.storages().tables.get(id).debug_checked_unwrap(); | ||||
|                             self.iter_unchecked_manual(world, last_run, this_run) | ||||
|                                 .fold_over_table_range(accum, &mut func, table, batch); | ||||
|                         } else { | ||||
|                             let id = storage_id.archetype_id; | ||||
|                             let archetype = world.archetypes().get(id).debug_checked_unwrap(); | ||||
|                             self.iter_unchecked_manual(world, last_run, this_run) | ||||
|                                 .fold_over_archetype_range(accum, &mut func, archetype, batch); | ||||
|                         } | ||||
|                         self.iter_unchecked_manual(world, last_run, this_run) | ||||
|                             .fold_over_storage_range(accum, &mut func, storage_id, Some(batch)); | ||||
|                     }); | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 re0312
						re0312