feat(ecs): configurable error handling for fallible systems (#17753)

You can now configure error handlers for fallible systems. These can be
configured on several levels:

- Globally via `App::set_systems_error_handler`
- Per-schedule via `Schedule::set_error_handler`
- Per-system via a piped system (this is existing functionality)

The default handler of panicking on error keeps the same behavior as
before this commit.

The "fallible_systems" example demonstrates the new functionality.

This builds on top of #17731, #16589, #17051.

---------

Signed-off-by: Jean Mertz <git@jeanmertz.com>
This commit is contained in:
Jean Mertz 2025-02-11 19:36:08 +01:00 committed by GitHub
parent c896ad6146
commit fd67ca7eb0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 307 additions and 47 deletions

View File

@ -13,13 +13,13 @@ use bevy_ecs::{
event::{event_update_system, EventCursor},
intern::Interned,
prelude::*,
result::{Error, SystemErrorContext},
schedule::{ScheduleBuildSettings, ScheduleLabel},
system::{IntoObserverSystem, SystemId, SystemInput},
};
use bevy_platform_support::collections::HashMap;
use core::{fmt::Debug, num::NonZero, panic::AssertUnwindSafe};
use log::debug;
use thiserror::Error;
#[cfg(feature = "trace")]
use tracing::info_span;
@ -44,7 +44,7 @@ pub use bevy_ecs::label::DynEq;
/// A shorthand for `Interned<dyn AppLabel>`.
pub type InternedAppLabel = Interned<dyn AppLabel>;
#[derive(Debug, Error)]
#[derive(Debug, thiserror::Error)]
pub(crate) enum AppError {
#[error("duplicate plugin {plugin_name:?}")]
DuplicatePlugin { plugin_name: String },
@ -1274,6 +1274,18 @@ impl App {
self
}
/// Set the global system error handler to use for systems that return a [`Result`].
///
/// See the [`bevy_ecs::result` module-level documentation](../../bevy_ecs/result/index.html)
/// for more information.
pub fn set_system_error_handler(
&mut self,
error_handler: fn(Error, SystemErrorContext),
) -> &mut Self {
self.main_mut().set_system_error_handler(error_handler);
self
}
/// Attempts to determine if an [`AppExit`] was raised since the last update.
///
/// Will attempt to return the first [`Error`](AppExit::Error) it encounters.

View File

@ -3,6 +3,7 @@ use alloc::{boxed::Box, string::String, vec::Vec};
use bevy_ecs::{
event::EventRegistry,
prelude::*,
result::{DefaultSystemErrorHandler, SystemErrorContext},
schedule::{InternedScheduleLabel, ScheduleBuildSettings, ScheduleLabel},
system::{SystemId, SystemInput},
};
@ -335,6 +336,22 @@ impl SubApp {
self
}
/// Set the global error handler to use for systems that return a [`Result`].
///
/// See the [`bevy_ecs::result` module-level documentation](../../bevy_ecs/result/index.html)
/// for more information.
pub fn set_system_error_handler(
&mut self,
error_handler: fn(Error, SystemErrorContext),
) -> &mut Self {
let mut default_handler = self
.world_mut()
.get_resource_or_init::<DefaultSystemErrorHandler>();
default_handler.0 = error_handler;
self
}
/// See [`App::add_event`].
pub fn add_event<T>(&mut self) -> &mut Self
where

View File

@ -1,9 +1,153 @@
//! Contains error and result helpers for use in fallible systems.
//! Error handling for "fallible" systems.
//!
//! When a system is added to a [`Schedule`], and its return type is that of [`Result`], then Bevy
//! considers those systems to be "fallible", and the ECS scheduler will special-case the [`Err`]
//! variant of the returned `Result`.
//!
//! All [`Error`]s returned by a system are handled by an "error handler". By default, the
//! [`panic`] error handler function is used, resulting in a panic with the error message attached.
//!
//! You can change the default behavior by registering a custom error handler, either globally or
//! per `Schedule`:
//!
//! - [`App::set_system_error_handler`] sets the global error handler for all systems of the
//! current [`World`].
//! - [`Schedule::set_error_handler`] sets the error handler for all systems of that schedule.
//!
//! Bevy provides a number of pre-built error-handlers for you to use:
//!
//! - [`panic`] panics with the system error
//! - [`error`] logs the system error at the `error` level
//! - [`warn`] logs the system error at the `warn` level
//! - [`info`] logs the system error at the `info` level
//! - [`debug`] logs the system error at the `debug` level
//! - [`trace`] logs the system error at the `trace` level
//! - [`ignore`] ignores the system error
//!
//! However, you can use any custom error handler logic by providing your own function (or
//! non-capturing closure that coerces to the function signature) as long as it matches the
//! signature:
//!
//! ```rust,ignore
//! fn(Error, SystemErrorContext)
//! ```
//!
//! The [`SystemErrorContext`] allows you to access additional details relevant to providing
//! context surrounding the system error such as the system's [`name`] in your error messages.
//!
//! For example:
//!
//! ```rust
//! # use bevy_ecs::prelude::*;
//! # use bevy_ecs::schedule::ScheduleLabel;
//! # use log::trace;
//! # fn update() -> Result { Ok(()) }
//! # #[derive(ScheduleLabel, Hash, Debug, PartialEq, Eq, Clone, Copy)]
//! # struct MySchedule;
//! # fn main() {
//! let mut schedule = Schedule::new(MySchedule);
//! schedule.add_systems(update);
//! schedule.set_error_handler(|error, ctx| {
//! if ctx.name.ends_with("update") {
//! trace!("Nothing to see here, move along.");
//! return;
//! }
//!
//! bevy_ecs::result::error(error, ctx);
//! });
//! # }
//! ```
//!
//! If you need special handling of individual fallible systems, you can use Bevy's [`system piping
//! feature`] to capture the `Result` output of the system and handle it accordingly.
//!
//! [`Schedule`]: crate::schedule::Schedule
//! [`panic`]: panic()
//! [`World`]: crate::world::World
//! [`Schedule::set_error_handler`]: crate::schedule::Schedule::set_error_handler
//! [`System`]: crate::system::System
//! [`name`]: crate::system::System::name
//! [`App::set_system_error_handler`]: ../../bevy_app/struct.App.html#method.set_system_error_handler
//! [`system piping feature`]: crate::system::In
use alloc::boxed::Box;
use crate::{component::Tick, resource::Resource};
use alloc::{borrow::Cow, boxed::Box};
/// A dynamic error type for use in fallible systems.
pub type Error = Box<dyn core::error::Error + Send + Sync + 'static>;
/// A result type for use in fallible systems.
pub type Result<T = (), E = Error> = core::result::Result<T, E>;
/// Additional context for a failed system run.
pub struct SystemErrorContext {
/// The name of the system that failed.
pub name: Cow<'static, str>,
/// The last tick that the system was run.
pub last_run: Tick,
}
/// The default systems error handler stored as a resource in the [`World`](crate::world::World).
pub struct DefaultSystemErrorHandler(pub fn(Error, SystemErrorContext));
impl Resource for DefaultSystemErrorHandler {}
impl Default for DefaultSystemErrorHandler {
fn default() -> Self {
Self(panic)
}
}
macro_rules! inner {
($call:path, $e:ident, $c:ident) => {
$call!("Encountered an error in system `{}`: {:?}", $c.name, $e);
};
}
/// Error handler that panics with the system error.
#[track_caller]
#[inline]
pub fn panic(error: Error, ctx: SystemErrorContext) {
inner!(panic, error, ctx);
}
/// Error handler that logs the system error at the `error` level.
#[track_caller]
#[inline]
pub fn error(error: Error, ctx: SystemErrorContext) {
inner!(log::error, error, ctx);
}
/// Error handler that logs the system error at the `warn` level.
#[track_caller]
#[inline]
pub fn warn(error: Error, ctx: SystemErrorContext) {
inner!(log::warn, error, ctx);
}
/// Error handler that logs the system error at the `info` level.
#[track_caller]
#[inline]
pub fn info(error: Error, ctx: SystemErrorContext) {
inner!(log::info, error, ctx);
}
/// Error handler that logs the system error at the `debug` level.
#[track_caller]
#[inline]
pub fn debug(error: Error, ctx: SystemErrorContext) {
inner!(log::debug, error, ctx);
}
/// Error handler that logs the system error at the `trace` level.
#[track_caller]
#[inline]
pub fn trace(error: Error, ctx: SystemErrorContext) {
inner!(log::trace, error, ctx);
}
/// Error handler that ignores the system error.
#[track_caller]
#[inline]
pub fn ignore(_: Error, _: SystemErrorContext) {}

View File

@ -18,7 +18,7 @@ use crate::{
component::{ComponentId, Tick},
prelude::{IntoSystemSet, SystemSet},
query::Access,
result::Result,
result::{Error, Result, SystemErrorContext},
schedule::{BoxedCondition, InternedSystemSet, NodeId, SystemTypeSet},
system::{ScheduleSystem, System, SystemIn},
world::{unsafe_world_cell::UnsafeWorldCell, DeferredWorld, World},
@ -33,6 +33,7 @@ pub(super) trait SystemExecutor: Send + Sync {
schedule: &mut SystemSchedule,
world: &mut World,
skip_systems: Option<&FixedBitSet>,
error_handler: fn(Error, SystemErrorContext),
);
fn set_apply_final_deferred(&mut self, value: bool);
}

View File

@ -17,6 +17,7 @@ use crate::{
archetype::ArchetypeComponentId,
prelude::Resource,
query::Access,
result::{Error, Result, SystemErrorContext},
schedule::{is_apply_deferred, BoxedCondition, ExecutorKind, SystemExecutor, SystemSchedule},
system::ScheduleSystem,
world::{unsafe_world_cell::UnsafeWorldCell, World},
@ -131,6 +132,7 @@ pub struct ExecutorState {
struct Context<'scope, 'env, 'sys> {
environment: &'env Environment<'env, 'sys>,
scope: &'scope Scope<'scope, 'env, ()>,
error_handler: fn(Error, SystemErrorContext),
}
impl Default for MultiThreadedExecutor {
@ -181,6 +183,7 @@ impl SystemExecutor for MultiThreadedExecutor {
schedule: &mut SystemSchedule,
world: &mut World,
_skip_systems: Option<&FixedBitSet>,
error_handler: fn(Error, SystemErrorContext),
) {
let state = self.state.get_mut().unwrap();
// reset counts
@ -220,7 +223,11 @@ impl SystemExecutor for MultiThreadedExecutor {
false,
thread_executor,
|scope| {
let context = Context { environment, scope };
let context = Context {
environment,
scope,
error_handler,
};
// The first tick won't need to process finished systems, but we still need to run the loop in
// tick_executor() in case a system completes while the first tick still holds the mutex.
@ -601,17 +608,18 @@ impl ExecutorState {
// access the world data used by the system.
// - `update_archetype_component_access` has been called.
unsafe {
// TODO: implement an error-handling API instead of panicking.
if let Err(err) = __rust_begin_short_backtrace::run_unsafe(
system,
context.environment.world_cell,
) {
panic!(
"Encountered an error in system `{}`: {:?}",
&*system.name(),
err
(context.error_handler)(
err,
SystemErrorContext {
name: system.name(),
last_run: system.get_last_run(),
},
);
};
}
};
}));
context.system_completed(system_index, res, system);
@ -655,14 +663,15 @@ impl ExecutorState {
// that no other systems currently have access to the world.
let world = unsafe { context.environment.world_cell.world_mut() };
let res = std::panic::catch_unwind(AssertUnwindSafe(|| {
// TODO: implement an error-handling API instead of panicking.
if let Err(err) = __rust_begin_short_backtrace::run(system, world) {
panic!(
"Encountered an error in system `{}`: {:?}",
&*system.name(),
err
(context.error_handler)(
err,
SystemErrorContext {
name: system.name(),
last_run: system.get_last_run(),
},
);
};
}
}));
context.system_completed(system_index, res, system);
};

View File

@ -8,6 +8,7 @@ use tracing::info_span;
use std::eprintln;
use crate::{
result::{Error, SystemErrorContext},
schedule::{
executor::is_apply_deferred, BoxedCondition, ExecutorKind, SystemExecutor, SystemSchedule,
},
@ -43,6 +44,7 @@ impl SystemExecutor for SimpleExecutor {
schedule: &mut SystemSchedule,
world: &mut World,
_skip_systems: Option<&FixedBitSet>,
error_handler: fn(Error, SystemErrorContext),
) {
// If stepping is enabled, make sure we skip those systems that should
// not be run.
@ -104,12 +106,13 @@ impl SystemExecutor for SimpleExecutor {
}
let f = AssertUnwindSafe(|| {
// TODO: implement an error-handling API instead of panicking.
if let Err(err) = __rust_begin_short_backtrace::run(system, world) {
panic!(
"Encountered an error in system `{}`: {:?}",
&*system.name(),
err
error_handler(
err,
SystemErrorContext {
name: system.name(),
last_run: system.get_last_run(),
},
);
}
});

View File

@ -8,6 +8,7 @@ use tracing::info_span;
use std::eprintln;
use crate::{
result::{Error, SystemErrorContext},
schedule::{is_apply_deferred, BoxedCondition, ExecutorKind, SystemExecutor, SystemSchedule},
world::World,
};
@ -49,6 +50,7 @@ impl SystemExecutor for SingleThreadedExecutor {
schedule: &mut SystemSchedule,
world: &mut World,
_skip_systems: Option<&FixedBitSet>,
error_handler: fn(Error, SystemErrorContext),
) {
// If stepping is enabled, make sure we skip those systems that should
// not be run.
@ -112,12 +114,13 @@ impl SystemExecutor for SingleThreadedExecutor {
let f = AssertUnwindSafe(|| {
if system.is_exclusive() {
// TODO: implement an error-handling API instead of panicking.
if let Err(err) = __rust_begin_short_backtrace::run(system, world) {
panic!(
"Encountered an error in system `{}`: {:?}",
&*system.name(),
err
error_handler(
err,
SystemErrorContext {
name: system.name(),
last_run: system.get_last_run(),
},
);
}
} else {
@ -127,12 +130,13 @@ impl SystemExecutor for SingleThreadedExecutor {
// SAFETY: We have exclusive, single-threaded access to the world and
// update_archetype_component_access is being called immediately before this.
unsafe {
// TODO: implement an error-handling API instead of panicking.
if let Err(err) = __rust_begin_short_backtrace::run_unsafe(system, world) {
panic!(
"Encountered an error in system `{}`: {:?}",
&*system.name(),
err
error_handler(
err,
SystemErrorContext {
name: system.name(),
last_run: system.get_last_run(),
},
);
}
};

View File

@ -28,7 +28,7 @@ use crate::{
component::{ComponentId, Components, Tick},
prelude::Component,
resource::Resource,
result::Result,
result::{DefaultSystemErrorHandler, Error, SystemErrorContext},
schedule::*,
system::ScheduleSystem,
world::World,
@ -49,10 +49,7 @@ pub struct Schedules {
impl Schedules {
/// Constructs an empty `Schedules` with zero initial capacity.
pub fn new() -> Self {
Self {
inner: HashMap::default(),
ignored_scheduling_ambiguities: BTreeSet::new(),
}
Self::default()
}
/// Inserts a labeled schedule into the map.
@ -299,6 +296,7 @@ pub struct Schedule {
executable: SystemSchedule,
executor: Box<dyn SystemExecutor>,
executor_initialized: bool,
error_handler: Option<fn(Error, SystemErrorContext)>,
}
#[derive(ScheduleLabel, Hash, PartialEq, Eq, Debug, Clone)]
@ -323,6 +321,7 @@ impl Schedule {
executable: SystemSchedule::new(),
executor: make_executor(ExecutorKind::default()),
executor_initialized: false,
error_handler: None,
};
// Call `set_build_settings` to add any default build passes
this.set_build_settings(Default::default());
@ -400,6 +399,13 @@ impl Schedule {
self
}
/// Set the error handler to use for systems that return a [`Result`](crate::result::Result).
///
/// See the [`result` module-level documentation](crate::result) for more information.
pub fn set_error_handler(&mut self, error_handler: fn(Error, SystemErrorContext)) {
self.error_handler = Some(error_handler);
}
/// Returns the schedule's current `ScheduleBuildSettings`.
pub fn get_build_settings(&self) -> ScheduleBuildSettings {
self.graph.settings.clone()
@ -437,8 +443,11 @@ impl Schedule {
self.initialize(world)
.unwrap_or_else(|e| panic!("Error when initializing schedule {:?}: {e}", self.label));
let error_handler = self.error_handler.expect("schedule initialized");
#[cfg(not(feature = "bevy_debug_stepping"))]
self.executor.run(&mut self.executable, world, None);
self.executor
.run(&mut self.executable, world, None, error_handler);
#[cfg(feature = "bevy_debug_stepping")]
{
@ -447,8 +456,12 @@ impl Schedule {
Some(mut stepping) => stepping.skipped_systems(self),
};
self.executor
.run(&mut self.executable, world, skip_systems.as_ref());
self.executor.run(
&mut self.executable,
world,
skip_systems.as_ref(),
error_handler,
);
}
}
@ -473,6 +486,10 @@ impl Schedule {
self.executor_initialized = false;
}
if self.error_handler.is_none() {
self.error_handler = Some(world.get_resource_or_init::<DefaultSystemErrorHandler>().0);
}
if !self.executor_initialized {
self.executor.init(&self.executable);
self.executor_initialized = true;

View File

@ -48,7 +48,6 @@ use crate::{
query::{DebugCheckedUnwrap, QueryData, QueryFilter, QueryState},
removal_detection::RemovedComponentEvents,
resource::Resource,
result::Result,
schedule::{Schedule, ScheduleLabel, Schedules},
storage::{ResourceData, Storages},
system::Commands,

View File

@ -6,13 +6,53 @@ use bevy::prelude::*;
use rand::distributions::Distribution;
fn main() {
App::new()
.add_plugins(DefaultPlugins)
.add_systems(Startup, setup)
.run();
let mut app = App::new();
app.add_plugins(DefaultPlugins);
// Fallible systems can be used the same way as regular systems. The only difference is they
// return a `Result<(), Box<dyn Error>>` instead of a `()` (unit) type. Bevy will handle both
// types of systems the same way, except for the error handling.
app.add_systems(Startup, (setup, failing_system));
// By default, fallible systems that return an error will panic.
//
// We can change this by setting a custom error handler. This can be done globally for all
// systems in a given `App`. Here we set the global error handler using one of the built-in
// error handlers. Bevy provides built-in handlers for `panic`, `error`, `warn`, `info`,
// `debug`, `trace` and `ignore`.
app.set_system_error_handler(bevy::ecs::result::warn);
// Additionally, you can set a custom error handler per `Schedule`. This will take precedence
// over the global error handler.
//
// In this instance we provide our own non-capturing closure that coerces to the expected error
// handler function pointer:
//
// fn(bevy_ecs::result::Error, bevy_ecs::result::SystemErrorContext)
//
app.add_systems(PostStartup, failing_system)
.get_schedule_mut(PostStartup)
.unwrap()
.set_error_handler(|err, ctx| error!("{} failed: {err}", ctx.name));
// Individual systems can also be handled by piping the output result:
app.add_systems(
PostStartup,
failing_system.pipe(|result: In<Result>| {
let _ = result.0.inspect_err(|err| info!("captured error: {err}"));
}),
);
// If we run the app, we'll see the following output at startup:
//
// WARN Encountered an error in system `fallible_systems::failing_system`: "Resource not initialized"
// ERROR fallible_systems::failing_system failed: Resource not initialized
// INFO captured error: Resource not initialized
app.run();
}
/// An example of a system that calls several fallible functions with the questionmark operator.
/// An example of a system that calls several fallible functions with the question mark operator.
fn setup(
mut commands: Commands,
mut meshes: ResMut<Assets<Mesh>>,
@ -77,3 +117,17 @@ fn setup(
// Indicate the system completed successfully:
Ok(())
}
#[derive(Resource)]
struct UninitializedResource;
fn failing_system(world: &mut World) -> Result {
world
// `get_resource` returns an `Option<T>`, so we use `ok_or` to convert it to a `Result` on
// which we can call `?` to propagate the error.
.get_resource::<UninitializedResource>()
// We can provide a `str` here because `Box<dyn Error>` implements `From<&str>`.
.ok_or("Resource not initialized")?;
Ok(())
}