Stageless: prettier cycle reporting (#7463)

Graph theory make head hurty. Closes #7367. Basically, when we topologically sort the dependency graph, we already find its strongly-connected components (a really [neat algorithm][1]). This PR adds an algorithm that can dissect those into simple cycles, giving us more useful error reports. test: `system_build_errors::dependency_cycle` ``` schedule has 1 before/after cycle(s): cycle 1: system set 'A' must run before itself system set 'A' ... which must run before system set 'B' ... which must run before system set 'A' ``` ``` schedule has 1 before/after cycle(s): cycle 1: system 'foo' must run before itself system 'foo' ... which must run before system 'bar' ... which must run before system 'foo' ``` test: `system_build_errors::hierarchy_cycle` ``` schedule has 1 in_set cycle(s): cycle 1: system set 'A' contains itself system set 'A' ... which contains system set 'B' ... which contains system set 'A' ``` [1]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
2023-02-21 13:42:20 +00:00 · 2023-02-21 13:42:20 +00:00 · 3ec87e49ca
commit 3ec87e49ca
parent 5bd1907517
2 changed files with 204 additions and 35 deletions
--- a/crates/bevy_ecs/src/schedule/graph_utils.rs
+++ b/crates/bevy_ecs/src/schedule/graph_utils.rs
@ -1,7 +1,7 @@
 use std::fmt::Debug;

 use bevy_utils::{
-    petgraph::{graphmap::NodeTrait, prelude::*},
+    petgraph::{algo::TarjanScc, graphmap::NodeTrait, prelude::*},
    HashMap, HashSet,
 };
 use fixedbitset::FixedBitSet;
@ -274,3 +274,119 @@ where
        transitive_closure,
    }
 }
+
+/// Returns the simple cycles in a strongly-connected component of a directed graph.
+///
+/// The algorithm implemented comes from
+/// ["Finding all the elementary circuits of a directed graph"][1] by D. B. Johnson.
+///
+/// [1]: https://doi.org/10.1137/0204007
+pub fn simple_cycles_in_component<N>(graph: &DiGraphMap<N, ()>, scc: &[N]) -> Vec<Vec<N>>
+where
+    N: NodeTrait + Debug,
+{
+    let mut cycles = vec![];
+    let mut sccs = vec![scc.to_vec()];
+
+    while let Some(mut scc) = sccs.pop() {
+        // only look at nodes and edges in this strongly-connected component
+        let mut subgraph = DiGraphMap::new();
+        for &node in &scc {
+            subgraph.add_node(node);
+        }
+
+        for &node in &scc {
+            for successor in graph.neighbors(node) {
+                if subgraph.contains_node(successor) {
+                    subgraph.add_edge(node, successor, ());
+                }
+            }
+        }
+
+        // path of nodes that may form a cycle
+        let mut path = Vec::with_capacity(subgraph.node_count());
+        // we mark nodes as "blocked" to avoid finding permutations of the same cycles
+        let mut blocked = HashSet::with_capacity(subgraph.node_count());
+        // connects nodes along path segments that can't be part of a cycle (given current root)
+        // those nodes can be unblocked at the same time
+        let mut unblock_together: HashMap<N, HashSet<N>> =
+            HashMap::with_capacity(subgraph.node_count());
+        // stack for unblocking nodes
+        let mut unblock_stack = Vec::with_capacity(subgraph.node_count());
+        // nodes can be involved in multiple cycles
+        let mut maybe_in_more_cycles: HashSet<N> = HashSet::with_capacity(subgraph.node_count());
+        // stack for DFS
+        let mut stack = Vec::with_capacity(subgraph.node_count());
+
+        // we're going to look for all cycles that begin and end at this node
+        let root = scc.pop().unwrap();
+        // start a path at the root
+        path.clear();
+        path.push(root);
+        // mark this node as blocked
+        blocked.insert(root);
+
+        // DFS
+        stack.clear();
+        stack.push((root, subgraph.neighbors(root)));
+        while !stack.is_empty() {
+            let (ref node, successors) = stack.last_mut().unwrap();
+            if let Some(next) = successors.next() {
+                if next == root {
+                    // found a cycle
+                    maybe_in_more_cycles.extend(path.iter());
+                    cycles.push(path.clone());
+                } else if !blocked.contains(&next) {
+                    // first time seeing `next` on this path
+                    maybe_in_more_cycles.remove(&next);
+                    path.push(next);
+                    blocked.insert(next);
+                    stack.push((next, subgraph.neighbors(next)));
+                    continue;
+                } else {
+                    // not first time seeing `next` on this path
+                }
+            }
+
+            if successors.peekable().peek().is_none() {
+                if maybe_in_more_cycles.contains(node) {
+                    unblock_stack.push(*node);
+                    // unblock this node's ancestors
+                    while let Some(n) = unblock_stack.pop() {
+                        if blocked.remove(&n) {
+                            let unblock_predecessors =
+                                unblock_together.entry(n).or_insert_with(HashSet::new);
+                            unblock_stack.extend(unblock_predecessors.iter());
+                            unblock_predecessors.clear();
+                        }
+                    }
+                } else {
+                    // if its descendants can be unblocked later, this node will be too
+                    for successor in subgraph.neighbors(*node) {
+                        unblock_together
+                            .entry(successor)
+                            .or_insert_with(HashSet::new)
+                            .insert(*node);
+                    }
+                }
+
+                // remove node from path and DFS stack
+                path.pop();
+                stack.pop();
+            }
+        }
+
+        // remove node from subgraph
+        subgraph.remove_node(root);
+
+        // divide remainder into smaller SCCs
+        let mut tarjan_scc = TarjanScc::new();
+        tarjan_scc.run(&subgraph, |scc| {
+            if scc.len() > 1 {
+                sccs.push(scc.to_vec());
+            }
+        });
+    }
+
+    cycles
+}
--- a/crates/bevy_ecs/src/schedule/schedule.rs
+++ b/crates/bevy_ecs/src/schedule/schedule.rs
@ -7,7 +7,7 @@ use bevy_utils::default;
 #[cfg(feature = "trace")]
 use bevy_utils::tracing::info_span;
 use bevy_utils::{
-    petgraph::prelude::*,
+    petgraph::{algo::TarjanScc, prelude::*},
    thiserror::Error,
    tracing::{error, warn},
    HashMap, HashSet,
@ -942,7 +942,7 @@ impl ScheduleGraph {

        // check hierarchy for cycles
        self.hierarchy.topsort = self
-            .topsort_graph(&self.hierarchy.graph)
+            .topsort_graph(&self.hierarchy.graph, ReportCycles::Hierarchy)
            .map_err(|_| ScheduleBuildError::HierarchyCycle)?;

        let hier_results = check_graph(&self.hierarchy.graph, &self.hierarchy.topsort);
@ -960,7 +960,7 @@ impl ScheduleGraph {

        // check dependencies for cycles
        self.dependency.topsort = self
-            .topsort_graph(&self.dependency.graph)
+            .topsort_graph(&self.dependency.graph, ReportCycles::Dependency)
            .map_err(|_| ScheduleBuildError::DependencyCycle)?;

        // check for systems or system sets depending on sets they belong to
@ -1083,7 +1083,7 @@ impl ScheduleGraph {

        // topsort
        self.dependency_flattened.topsort = self
-            .topsort_graph(&dependency_flattened)
+            .topsort_graph(&dependency_flattened, ReportCycles::Dependency)
            .map_err(|_| ScheduleBuildError::DependencyCycle)?;
        self.dependency_flattened.graph = dependency_flattened;

@ -1318,6 +1318,12 @@ impl ScheduleGraph {
    }
 }

+/// Used to select the appropriate reporting function.
+enum ReportCycles {
+    Hierarchy,
+    Dependency,
+}
+
 // methods for reporting errors
 impl ScheduleGraph {
    fn get_node_name(&self, id: &NodeId) -> String {
@ -1345,7 +1351,7 @@ impl ScheduleGraph {
        name
    }

-    fn get_node_kind(id: &NodeId) -> &'static str {
+    fn get_node_kind(&self, id: &NodeId) -> &'static str {
        match id {
            NodeId::System(_) => "system",
            NodeId::Set(_) => "system set",
@ -1366,7 +1372,7 @@ impl ScheduleGraph {
            writeln!(
                message,
                " -- {:?} '{:?}' cannot be child of set '{:?}', longer path exists",
-                Self::get_node_kind(child),
+                self.get_node_kind(child),
                self.get_node_name(child),
                self.get_node_name(parent),
            )
@ -1376,48 +1382,95 @@ impl ScheduleGraph {
        error!("{}", message);
    }

-    /// Get topology sorted [`NodeId`], also ensures the graph contains no cycle
-    /// returns Err(()) if there are cycles
-    fn topsort_graph(&self, graph: &DiGraphMap<NodeId, ()>) -> Result<Vec<NodeId>, ()> {
-        // tarjan_scc's run order is reverse topological order
-        let mut rev_top_sorted_nodes = Vec::<NodeId>::with_capacity(graph.node_count());
-        let mut tarjan_scc = bevy_utils::petgraph::algo::TarjanScc::new();
-        let mut sccs_with_cycle = Vec::<Vec<NodeId>>::new();
+    /// Tries to topologically sort `graph`.
+    ///
+    /// If the graph is acyclic, returns [`Ok`] with the list of [`NodeId`] in a valid
+    /// topological order. If the graph contains cycles, returns [`Err`] with the list of
+    /// strongly-connected components that contain cycles (also in a valid topological order).
+    ///
+    /// # Errors
+    ///
+    /// If the graph contain cycles, then an error is returned.
+    fn topsort_graph(
+        &self,
+        graph: &DiGraphMap<NodeId, ()>,
+        report: ReportCycles,
+    ) -> Result<Vec<NodeId>, Vec<Vec<NodeId>>> {
+        // Tarjan's SCC algorithm returns elements in *reverse* topological order.
+        let mut tarjan_scc = TarjanScc::new();
+        let mut top_sorted_nodes = Vec::with_capacity(graph.node_count());
+        let mut sccs_with_cycles = Vec::new();

        tarjan_scc.run(graph, |scc| {
-            // by scc's definition, each scc is the cluster of nodes that they can reach each other
-            // so scc with size larger than 1, means there is/are cycle(s).
+            // A strongly-connected component is a group of nodes who can all reach each other
+            // through one or more paths. If an SCC contains more than one node, there must be
+            // at least one cycle within them.
            if scc.len() > 1 {
-                sccs_with_cycle.push(scc.to_vec());
+                sccs_with_cycles.push(scc.to_vec());
            }
-            rev_top_sorted_nodes.extend_from_slice(scc);
+            top_sorted_nodes.extend_from_slice(scc);
        });

-        if sccs_with_cycle.is_empty() {
-            // reverse the reverted to get topological order
-            let mut top_sorted_nodes = rev_top_sorted_nodes;
+        if sccs_with_cycles.is_empty() {
+            // reverse to get topological order
            top_sorted_nodes.reverse();
            Ok(top_sorted_nodes)
        } else {
-            self.report_cycles(&sccs_with_cycle);
-            Err(())
+            let mut cycles = Vec::new();
+            for scc in &sccs_with_cycles {
+                cycles.append(&mut simple_cycles_in_component(graph, scc));
+            }
+
+            match report {
+                ReportCycles::Hierarchy => self.report_hierarchy_cycles(&cycles),
+                ReportCycles::Dependency => self.report_dependency_cycles(&cycles),
+            }
+
+            Err(sccs_with_cycles)
        }
    }

-    /// Print detailed cycle messages
-    fn report_cycles(&self, sccs_with_cycles: &[Vec<NodeId>]) {
-        let mut message = format!(
-            "schedule contains at least {} cycle(s)",
-            sccs_with_cycles.len()
-        );
+    /// Logs details of cycles in the hierarchy graph.
+    fn report_hierarchy_cycles(&self, cycles: &[Vec<NodeId>]) {
+        let mut message = format!("schedule has {} in_set cycle(s):\n", cycles.len());
+        for (i, cycle) in cycles.iter().enumerate() {
+            let mut names = cycle.iter().map(|id| self.get_node_name(id));
+            let first_name = names.next().unwrap();
+            writeln!(
+                message,
+                "cycle {}: set '{first_name}' contains itself",
+                i + 1,
+            )
+            .unwrap();
+            writeln!(message, "set '{first_name}'").unwrap();
+            for name in names.chain(std::iter::once(first_name)) {
+                writeln!(message, " ... which contains set '{name}'").unwrap();
+            }
+            writeln!(message).unwrap();
+        }

-        writeln!(message, " -- cycle(s) found within:").unwrap();
-        for (i, scc) in sccs_with_cycles.iter().enumerate() {
-            let names = scc
+        error!("{}", message);
+    }
+
+    /// Logs details of cycles in the dependency graph.
+    fn report_dependency_cycles(&self, cycles: &[Vec<NodeId>]) {
+        let mut message = format!("schedule has {} before/after cycle(s):\n", cycles.len());
+        for (i, cycle) in cycles.iter().enumerate() {
+            let mut names = cycle
                .iter()
-                .map(|id| self.get_node_name(id))
-                .collect::<Vec<_>>();
-            writeln!(message, " ---- {i}: {names:?}").unwrap();
+                .map(|id| (self.get_node_kind(id), self.get_node_name(id)));
+            let (first_kind, first_name) = names.next().unwrap();
+            writeln!(
+                message,
+                "cycle {}: {first_kind} '{first_name}' must run before itself",
+                i + 1,
+            )
+            .unwrap();
+            writeln!(message, "{first_kind} '{first_name}'").unwrap();
+            for (kind, name) in names.chain(std::iter::once((first_kind, first_name))) {
+                writeln!(message, " ... which must run before {kind} '{name}'").unwrap();
+            }
+            writeln!(message).unwrap();
        }

        error!("{}", message);