Store only the IDs needed for Query iteration (#12476)

# Objective
Other than the exposed functions for reading matched tables and
archetypes, a `QueryState` does not actually need both internal Vecs for
storing matched archetypes and tables. In practice, it will only use one
of the two depending on if it uses dense or archetypal iteration.

Same vein as #12474. The goal is to reduce the memory overhead of using
queries, which Bevy itself, ecosystem plugins, and end users are already
fairly liberally using.

## Solution
Add `StorageId`, which is a union over `TableId` and `ArchetypeId`, and
store only one of the two at runtime. Read the slice as if it was one ID
depending on whether the query is dense or not.

This follows in the same vein as #5085; however, this one directly
impacts heap memory usage at runtime, while #5085 primarily targeted
transient pointers that might not actually exist at runtime.

---

## Changelog
Changed: `QueryState::matched_tables` now returns an iterator instead of
a reference to a slice.
Changed: `QueryState::matched_archetypes` now returns an iterator
instead of a reference to a slice.

## Migration Guide
`QueryState::matched_tables` and `QueryState::matched_archetypes` does
not return a reference to a slice, but an iterator instead. You may need
to use iterator combinators or collect them into a Vec to use it as a
slice.

---------

Co-authored-by: Alice Cecile <alice.i.cecile@gmail.com>
This commit is contained in:
James Liu 2024-03-30 03:15:55 -07:00 committed by GitHub
parent 24030d2a0c
commit 286bc8cce5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 109 additions and 87 deletions

View File

@ -1,9 +1,9 @@
use crate::{ use crate::{
archetype::{Archetype, ArchetypeEntity, ArchetypeId, Archetypes}, archetype::{Archetype, ArchetypeEntity, Archetypes},
component::Tick, component::Tick,
entity::{Entities, Entity}, entity::{Entities, Entity},
query::{ArchetypeFilter, DebugCheckedUnwrap, QueryState}, query::{ArchetypeFilter, DebugCheckedUnwrap, QueryState, StorageId},
storage::{Table, TableId, TableRow, Tables}, storage::{Table, TableRow, Tables},
world::unsafe_world_cell::UnsafeWorldCell, world::unsafe_world_cell::UnsafeWorldCell,
}; };
use std::{borrow::Borrow, iter::FusedIterator, mem::MaybeUninit, ops::Range}; use std::{borrow::Borrow, iter::FusedIterator, mem::MaybeUninit, ops::Range};
@ -239,22 +239,20 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F>
let Some(item) = self.next() else { break }; let Some(item) = self.next() else { break };
accum = func(accum, item); accum = func(accum, item);
} }
if D::IS_DENSE && F::IS_DENSE { for id in self.cursor.storage_id_iter.clone() {
for table_id in self.cursor.table_id_iter.clone() { if D::IS_DENSE && F::IS_DENSE {
// SAFETY: Matched table IDs are guaranteed to still exist. // SAFETY: Matched table IDs are guaranteed to still exist.
let table = unsafe { self.tables.get(*table_id).debug_checked_unwrap() }; let table = unsafe { self.tables.get(id.table_id).debug_checked_unwrap() };
accum = accum =
// SAFETY: // SAFETY:
// - The fetched table matches both D and F // - The fetched table matches both D and F
// - The provided range is equivalent to [0, table.entity_count) // - The provided range is equivalent to [0, table.entity_count)
// - The if block ensures that D::IS_DENSE and F::IS_DENSE are both true // - The if block ensures that D::IS_DENSE and F::IS_DENSE are both true
unsafe { self.fold_over_table_range(accum, &mut func, table, 0..table.entity_count()) }; unsafe { self.fold_over_table_range(accum, &mut func, table, 0..table.entity_count()) };
} } else {
} else {
for archetype_id in self.cursor.archetype_id_iter.clone() {
let archetype = let archetype =
// SAFETY: Matched archetype IDs are guaranteed to still exist. // SAFETY: Matched archetype IDs are guaranteed to still exist.
unsafe { self.archetypes.get(*archetype_id).debug_checked_unwrap() }; unsafe { self.archetypes.get(id.archetype_id).debug_checked_unwrap() };
accum = accum =
// SAFETY: // SAFETY:
// - The fetched archetype matches both D and F // - The fetched archetype matches both D and F
@ -650,8 +648,7 @@ impl<'w, 's, D: ReadOnlyQueryData, F: QueryFilter, const K: usize> FusedIterator
} }
struct QueryIterationCursor<'w, 's, D: QueryData, F: QueryFilter> { struct QueryIterationCursor<'w, 's, D: QueryData, F: QueryFilter> {
table_id_iter: std::slice::Iter<'s, TableId>, storage_id_iter: std::slice::Iter<'s, StorageId>,
archetype_id_iter: std::slice::Iter<'s, ArchetypeId>,
table_entities: &'w [Entity], table_entities: &'w [Entity],
archetype_entities: &'w [ArchetypeEntity], archetype_entities: &'w [ArchetypeEntity],
fetch: D::Fetch<'w>, fetch: D::Fetch<'w>,
@ -665,8 +662,7 @@ struct QueryIterationCursor<'w, 's, D: QueryData, F: QueryFilter> {
impl<D: QueryData, F: QueryFilter> Clone for QueryIterationCursor<'_, '_, D, F> { impl<D: QueryData, F: QueryFilter> Clone for QueryIterationCursor<'_, '_, D, F> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
Self { Self {
table_id_iter: self.table_id_iter.clone(), storage_id_iter: self.storage_id_iter.clone(),
archetype_id_iter: self.archetype_id_iter.clone(),
table_entities: self.table_entities, table_entities: self.table_entities,
archetype_entities: self.archetype_entities, archetype_entities: self.archetype_entities,
fetch: self.fetch.clone(), fetch: self.fetch.clone(),
@ -687,8 +683,7 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> {
this_run: Tick, this_run: Tick,
) -> Self { ) -> Self {
QueryIterationCursor { QueryIterationCursor {
table_id_iter: [].iter(), storage_id_iter: [].iter(),
archetype_id_iter: [].iter(),
..Self::init(world, query_state, last_run, this_run) ..Self::init(world, query_state, last_run, this_run)
} }
} }
@ -709,8 +704,7 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> {
filter, filter,
table_entities: &[], table_entities: &[],
archetype_entities: &[], archetype_entities: &[],
table_id_iter: query_state.matched_table_ids.iter(), storage_id_iter: query_state.matched_storage_ids.iter(),
archetype_id_iter: query_state.matched_archetype_ids.iter(),
current_len: 0, current_len: 0,
current_row: 0, current_row: 0,
} }
@ -746,12 +740,13 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> {
/// Note that if `F::IS_ARCHETYPAL`, the return value /// Note that if `F::IS_ARCHETYPAL`, the return value
/// will be **the exact count of remaining values**. /// will be **the exact count of remaining values**.
fn max_remaining(&self, tables: &'w Tables, archetypes: &'w Archetypes) -> usize { fn max_remaining(&self, tables: &'w Tables, archetypes: &'w Archetypes) -> usize {
let ids = self.storage_id_iter.clone();
let remaining_matched: usize = if Self::IS_DENSE { let remaining_matched: usize = if Self::IS_DENSE {
let ids = self.table_id_iter.clone(); // SAFETY: The if check ensures that storage_id_iter stores TableIds
ids.map(|id| tables[*id].entity_count()).sum() unsafe { ids.map(|id| tables[id.table_id].entity_count()).sum() }
} else { } else {
let ids = self.archetype_id_iter.clone(); // SAFETY: The if check ensures that storage_id_iter stores ArchetypeIds
ids.map(|id| archetypes[*id].len()).sum() unsafe { ids.map(|id| archetypes[id.archetype_id].len()).sum() }
}; };
remaining_matched + self.current_len - self.current_row remaining_matched + self.current_len - self.current_row
} }
@ -773,8 +768,8 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> {
loop { loop {
// we are on the beginning of the query, or finished processing a table, so skip to the next // we are on the beginning of the query, or finished processing a table, so skip to the next
if self.current_row == self.current_len { if self.current_row == self.current_len {
let table_id = self.table_id_iter.next()?; let table_id = self.storage_id_iter.next()?.table_id;
let table = tables.get(*table_id).debug_checked_unwrap(); let table = tables.get(table_id).debug_checked_unwrap();
// SAFETY: `table` is from the world that `fetch/filter` were created for, // SAFETY: `table` is from the world that `fetch/filter` were created for,
// `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with // `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with
unsafe { unsafe {
@ -809,8 +804,8 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> {
} else { } else {
loop { loop {
if self.current_row == self.current_len { if self.current_row == self.current_len {
let archetype_id = self.archetype_id_iter.next()?; let archetype_id = self.storage_id_iter.next()?.archetype_id;
let archetype = archetypes.get(*archetype_id).debug_checked_unwrap(); let archetype = archetypes.get(archetype_id).debug_checked_unwrap();
let table = tables.get(archetype.table_id()).debug_checked_unwrap(); let table = tables.get(archetype.table_id()).debug_checked_unwrap();
// SAFETY: `archetype` and `tables` are from the world that `fetch/filter` were created for, // SAFETY: `archetype` and `tables` are from the world that `fetch/filter` were created for,
// `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with // `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with

View File

@ -160,24 +160,22 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryParIter<'w, 's, D, F> {
thread_count > 0, thread_count > 0,
"Attempted to run parallel iteration over a query with an empty TaskPool" "Attempted to run parallel iteration over a query with an empty TaskPool"
); );
let id_iter = self.state.matched_storage_ids.iter();
let max_size = if D::IS_DENSE && F::IS_DENSE { let max_size = if D::IS_DENSE && F::IS_DENSE {
// SAFETY: We only access table metadata. // SAFETY: We only access table metadata.
let tables = unsafe { &self.world.world_metadata().storages().tables }; let tables = unsafe { &self.world.world_metadata().storages().tables };
self.state id_iter
.matched_table_ids // SAFETY: The if check ensures that matched_storage_ids stores TableIds
.iter() .map(|id| unsafe { tables[id.table_id].entity_count() })
.map(|id| tables[*id].entity_count())
.max() .max()
.unwrap_or(0)
} else { } else {
let archetypes = &self.world.archetypes(); let archetypes = &self.world.archetypes();
self.state id_iter
.matched_archetype_ids // SAFETY: The if check ensures that matched_storage_ids stores ArchetypeIds
.iter() .map(|id| unsafe { archetypes[id.archetype_id].len() })
.map(|id| archetypes[*id].len())
.max() .max()
.unwrap_or(0)
}; };
let max_size = max_size.unwrap_or(0);
let batches = thread_count * self.batching_strategy.batches_per_thread; let batches = thread_count * self.batching_strategy.batches_per_thread;
// Round up to the nearest batch size. // Round up to the nearest batch size.

View File

@ -21,6 +21,25 @@ use super::{
QuerySingleError, ROQueryItem, QuerySingleError, ROQueryItem,
}; };
/// An ID for either a table or an archetype. Used for Query iteration.
///
/// Query iteration is exclusively dense (over tables) or archetypal (over archetypes) based on whether
/// both `D::IS_DENSE` and `F::IS_DENSE` are true or not.
///
/// This is a union instead of an enum as the usage is determined at compile time, as all [`StorageId`]s for
/// a [`QueryState`] will be all [`TableId`]s or all [`ArchetypeId`]s, and not a mixture of both. This
/// removes the need for discriminator to minimize memory usage and branching during iteration, but requires
/// a safety invariant be verified when disambiguating them.
///
/// # Safety
/// Must be initialized and accessed as a [`TableId`], if both generic parameters to the query are dense.
/// Must be initialized and accessed as an [`ArchetypeId`] otherwise.
#[derive(Clone, Copy)]
pub(super) union StorageId {
pub(super) table_id: TableId,
pub(super) archetype_id: ArchetypeId,
}
/// Provides scoped access to a [`World`] state according to a given [`QueryData`] and [`QueryFilter`]. /// Provides scoped access to a [`World`] state according to a given [`QueryData`] and [`QueryFilter`].
/// ///
/// This data is cached between system runs, and is used to: /// This data is cached between system runs, and is used to:
@ -47,10 +66,8 @@ pub struct QueryState<D: QueryData, F: QueryFilter = ()> {
/// [`FilteredAccess`] computed by combining the `D` and `F` access. Used to check which other queries /// [`FilteredAccess`] computed by combining the `D` and `F` access. Used to check which other queries
/// this query can run in parallel with. /// this query can run in parallel with.
pub(crate) component_access: FilteredAccess<ComponentId>, pub(crate) component_access: FilteredAccess<ComponentId>,
// NOTE: we maintain both a TableId bitset and a vec because iterating the vec is faster // NOTE: we maintain both a bitset and a vec because iterating the vec is faster
pub(crate) matched_table_ids: Vec<TableId>, pub(super) matched_storage_ids: Vec<StorageId>,
// NOTE: we maintain both a ArchetypeId bitset and a vec because iterating the vec is faster
pub(crate) matched_archetype_ids: Vec<ArchetypeId>,
pub(crate) fetch_state: D::State, pub(crate) fetch_state: D::State,
pub(crate) filter_state: F::State, pub(crate) filter_state: F::State,
#[cfg(feature = "trace")] #[cfg(feature = "trace")]
@ -61,8 +78,11 @@ impl<D: QueryData, F: QueryFilter> fmt::Debug for QueryState<D, F> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("QueryState") f.debug_struct("QueryState")
.field("world_id", &self.world_id) .field("world_id", &self.world_id)
.field("matched_table_count", &self.matched_table_ids.len()) .field("matched_table_count", &self.matched_tables.count_ones(..))
.field("matched_archetype_count", &self.matched_archetype_ids.len()) .field(
"matched_archetype_count",
&self.matched_archetypes.count_ones(..),
)
.finish_non_exhaustive() .finish_non_exhaustive()
} }
} }
@ -116,13 +136,13 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
} }
/// Returns the tables matched by this query. /// Returns the tables matched by this query.
pub fn matched_tables(&self) -> &[TableId] { pub fn matched_tables(&self) -> impl Iterator<Item = TableId> + '_ {
&self.matched_table_ids self.matched_tables.ones().map(TableId::from_usize)
} }
/// Returns the archetypes matched by this query. /// Returns the archetypes matched by this query.
pub fn matched_archetypes(&self) -> &[ArchetypeId] { pub fn matched_archetypes(&self) -> impl Iterator<Item = ArchetypeId> + '_ {
&self.matched_archetype_ids self.matched_archetypes.ones().map(ArchetypeId::new)
} }
} }
@ -173,8 +193,7 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
Self { Self {
world_id: world.id(), world_id: world.id(),
archetype_generation: ArchetypeGeneration::initial(), archetype_generation: ArchetypeGeneration::initial(),
matched_table_ids: Vec::new(), matched_storage_ids: Vec::new(),
matched_archetype_ids: Vec::new(),
fetch_state, fetch_state,
filter_state, filter_state,
component_access, component_access,
@ -198,8 +217,7 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
let mut state = Self { let mut state = Self {
world_id: builder.world().id(), world_id: builder.world().id(),
archetype_generation: ArchetypeGeneration::initial(), archetype_generation: ArchetypeGeneration::initial(),
matched_table_ids: Vec::new(), matched_storage_ids: Vec::new(),
matched_archetype_ids: Vec::new(),
fetch_state, fetch_state,
filter_state, filter_state,
component_access: builder.access().clone(), component_access: builder.access().clone(),
@ -376,12 +394,20 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
let archetype_index = archetype.id().index(); let archetype_index = archetype.id().index();
if !self.matched_archetypes.contains(archetype_index) { if !self.matched_archetypes.contains(archetype_index) {
self.matched_archetypes.grow_and_insert(archetype_index); self.matched_archetypes.grow_and_insert(archetype_index);
self.matched_archetype_ids.push(archetype.id()); if !D::IS_DENSE || !F::IS_DENSE {
self.matched_storage_ids.push(StorageId {
archetype_id: archetype.id(),
});
}
} }
let table_index = archetype.table_id().as_usize(); let table_index = archetype.table_id().as_usize();
if !self.matched_tables.contains(table_index) { if !self.matched_tables.contains(table_index) {
self.matched_tables.grow_and_insert(table_index); self.matched_tables.grow_and_insert(table_index);
self.matched_table_ids.push(archetype.table_id()); if D::IS_DENSE && F::IS_DENSE {
self.matched_storage_ids.push(StorageId {
table_id: archetype.table_id(),
});
}
} }
true true
} else { } else {
@ -462,8 +488,7 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
QueryState { QueryState {
world_id: self.world_id, world_id: self.world_id,
archetype_generation: self.archetype_generation, archetype_generation: self.archetype_generation,
matched_table_ids: self.matched_table_ids.clone(), matched_storage_ids: self.matched_storage_ids.clone(),
matched_archetype_ids: self.matched_archetype_ids.clone(),
fetch_state, fetch_state,
filter_state, filter_state,
component_access: self.component_access.clone(), component_access: self.component_access.clone(),
@ -553,24 +578,30 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
} }
// take the intersection of the matched ids // take the intersection of the matched ids
let matched_tables: FixedBitSet = self let mut matched_tables = self.matched_tables.clone();
.matched_tables let mut matched_archetypes = self.matched_archetypes.clone();
.intersection(&other.matched_tables) matched_tables.intersect_with(&other.matched_tables);
.collect(); matched_archetypes.intersect_with(&other.matched_archetypes);
let matched_table_ids: Vec<TableId> = let matched_storage_ids = if NewD::IS_DENSE && NewF::IS_DENSE {
matched_tables.ones().map(TableId::from_usize).collect(); matched_tables
let matched_archetypes: FixedBitSet = self .ones()
.matched_archetypes .map(|id| StorageId {
.intersection(&other.matched_archetypes) table_id: TableId::from_usize(id),
.collect(); })
let matched_archetype_ids: Vec<ArchetypeId> = .collect()
matched_archetypes.ones().map(ArchetypeId::new).collect(); } else {
matched_archetypes
.ones()
.map(|id| StorageId {
archetype_id: ArchetypeId::new(id),
})
.collect()
};
QueryState { QueryState {
world_id: self.world_id, world_id: self.world_id,
archetype_generation: self.archetype_generation, archetype_generation: self.archetype_generation,
matched_table_ids, matched_storage_ids,
matched_archetype_ids,
fetch_state: new_fetch_state, fetch_state: new_fetch_state,
filter_state: new_filter_state, filter_state: new_filter_state,
component_access: joined_component_access, component_access: joined_component_access,
@ -1356,12 +1387,15 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
) { ) {
// NOTE: If you are changing query iteration code, remember to update the following places, where relevant: // NOTE: If you are changing query iteration code, remember to update the following places, where relevant:
// QueryIter, QueryIterationCursor, QueryManyIter, QueryCombinationIter, QueryState::for_each_unchecked_manual, QueryState::par_for_each_unchecked_manual // QueryIter, QueryIterationCursor, QueryManyIter, QueryCombinationIter, QueryState::for_each_unchecked_manual, QueryState::par_for_each_unchecked_manual
bevy_tasks::ComputeTaskPool::get().scope(|scope| { bevy_tasks::ComputeTaskPool::get().scope(|scope| {
if D::IS_DENSE && F::IS_DENSE { // SAFETY: We only access table data that has been registered in `self.archetype_component_access`.
// SAFETY: We only access table data that has been registered in `self.archetype_component_access`. let tables = unsafe { &world.storages().tables };
let tables = unsafe { &world.storages().tables }; let archetypes = world.archetypes();
for table_id in &self.matched_table_ids { for storage_id in &self.matched_storage_ids {
let table = &tables[*table_id]; if D::IS_DENSE && F::IS_DENSE {
let table_id = storage_id.table_id;
let table = &tables[table_id];
if table.is_empty() { if table.is_empty() {
continue; continue;
} }
@ -1370,39 +1404,34 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
while offset < table.entity_count() { while offset < table.entity_count() {
let mut func = func.clone(); let mut func = func.clone();
let len = batch_size.min(table.entity_count() - offset); let len = batch_size.min(table.entity_count() - offset);
let batch = offset..offset + len;
scope.spawn(async move { scope.spawn(async move {
#[cfg(feature = "trace")] #[cfg(feature = "trace")]
let _span = self.par_iter_span.enter(); let _span = self.par_iter_span.enter();
let table = &world let table =
.storages() &world.storages().tables.get(table_id).debug_checked_unwrap();
.tables
.get(*table_id)
.debug_checked_unwrap();
let batch = offset..offset + len;
self.iter_unchecked_manual(world, last_run, this_run) self.iter_unchecked_manual(world, last_run, this_run)
.for_each_in_table_range(&mut func, table, batch); .for_each_in_table_range(&mut func, table, batch);
}); });
offset += batch_size; offset += batch_size;
} }
} } else {
} else { let archetype_id = storage_id.archetype_id;
let archetypes = world.archetypes(); let archetype = &archetypes[archetype_id];
for archetype_id in &self.matched_archetype_ids {
let mut offset = 0;
let archetype = &archetypes[*archetype_id];
if archetype.is_empty() { if archetype.is_empty() {
continue; continue;
} }
let mut offset = 0;
while offset < archetype.len() { while offset < archetype.len() {
let mut func = func.clone(); let mut func = func.clone();
let len = batch_size.min(archetype.len() - offset); let len = batch_size.min(archetype.len() - offset);
let batch = offset..offset + len;
scope.spawn(async move { scope.spawn(async move {
#[cfg(feature = "trace")] #[cfg(feature = "trace")]
let _span = self.par_iter_span.enter(); let _span = self.par_iter_span.enter();
let archetype = let archetype =
world.archetypes().get(*archetype_id).debug_checked_unwrap(); world.archetypes().get(archetype_id).debug_checked_unwrap();
let batch = offset..offset + len;
self.iter_unchecked_manual(world, last_run, this_run) self.iter_unchecked_manual(world, last_run, this_run)
.for_each_in_archetype_range(&mut func, archetype, batch); .for_each_in_archetype_range(&mut func, archetype, batch);
}); });