compiler/rustc_data_structures/src/graph/scc/mod.rs

   1 //! Routine to compute the strongly connected components (SCCs) of a graph.
   2 //!
   3 //! Also computes as the resulting DAG if each SCC is replaced with a
   4 //! node in the graph. This uses [Tarjan's algorithm](
   5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
   6 //! that completes in *O*(*n*) time.
   7
   8 use crate::fx::FxHashSet;
   9 use crate::graph::vec_graph::VecGraph;
  10 use crate::graph::{DirectedGraph, NumEdges, Successors};
  11 use rustc_index::{Idx, IndexSlice, IndexVec};
  12 use std::ops::Range;
  13 use tracing::{debug, instrument};
  14
  15 #[cfg(test)]
  16 mod tests;
  17
  18 /// Strongly connected components (SCC) of a graph. The type `N` is
  19 /// the index type for the graph nodes and `S` is the index type for
  20 /// the SCCs. We can map from each node to the SCC that it
  21 /// participates in, and we also have the successors of each SCC.
  22 pub struct Sccs<N: Idx, S: Idx> {
  23     /// For each node, what is the SCC index of the SCC to which it
  24     /// belongs.
  25     scc_indices: IndexVec<N, S>,
  26
  27     /// Data about each SCC.
  28     scc_data: SccData<S>,
  29 }
  30
  31 pub struct SccData<S: Idx> {
  32     /// For each SCC, the range of `all_successors` where its
  33     /// successors can be found.
  34     ranges: IndexVec<S, Range<usize>>,
  35
  36     /// Contains the successors for all the Sccs, concatenated. The
  37     /// range of indices corresponding to a given SCC is found in its
  38     /// SccData.
  39     all_successors: Vec<S>,
  40 }
  41
  42 impl<N: Idx, S: Idx + Ord> Sccs<N, S> {
  43     pub fn new(graph: &impl Successors<Node = N>) -> Self {
  44         SccsConstruction::construct(graph)
  45     }
  46
  47     pub fn scc_indices(&self) -> &IndexSlice<N, S> {
  48         &self.scc_indices
  49     }
  50
  51     pub fn scc_data(&self) -> &SccData<S> {
  52         &self.scc_data
  53     }
  54
  55     /// Returns the number of SCCs in the graph.
  56     pub fn num_sccs(&self) -> usize {
  57         self.scc_data.len()
  58     }
  59
  60     /// Returns an iterator over the SCCs in the graph.
  61     ///
  62     /// The SCCs will be iterated in **dependency order** (or **post order**),
  63     /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
  64     /// This is convenient when the edges represent dependencies: when you visit
  65     /// `S1`, the value for `S2` will already have been computed.
  66     pub fn all_sccs(&self) -> impl Iterator<Item = S> {
  67         (0..self.scc_data.len()).map(S::new)
  68     }
  69
  70     /// Returns the SCC to which a node `r` belongs.
  71     pub fn scc(&self, r: N) -> S {
  72         self.scc_indices[r]
  73     }
  74
  75     /// Returns the successors of the given SCC.
  76     pub fn successors(&self, scc: S) -> &[S] {
  77         self.scc_data.successors(scc)
  78     }
  79
  80     /// Construct the reverse graph of the SCC graph.
  81     pub fn reverse(&self) -> VecGraph<S> {
  82         VecGraph::new(
  83             self.num_sccs(),
  84             self.all_sccs()
  85                 .flat_map(|source| {
  86                     self.successors(source).iter().map(move |&target| (target, source))
  87                 })
  88                 .collect(),
  89         )
  90     }
  91 }
  92
  93 impl<N: Idx, S: Idx + Ord> DirectedGraph for Sccs<N, S> {
  94     type Node = S;
  95
  96     fn num_nodes(&self) -> usize {
  97         self.num_sccs()
  98     }
  99 }
 100
 101 impl<N: Idx, S: Idx + Ord> NumEdges for Sccs<N, S> {
 102     fn num_edges(&self) -> usize {
 103         self.scc_data.all_successors.len()
 104     }
 105 }
 106
 107 impl<N: Idx, S: Idx + Ord> Successors for Sccs<N, S> {
 108     fn successors(&self, node: S) -> impl Iterator<Item = Self::Node> {
 109         self.successors(node).iter().cloned()
 110     }
 111 }
 112
 113 impl<S: Idx> SccData<S> {
 114     /// Number of SCCs,
 115     fn len(&self) -> usize {
 116         self.ranges.len()
 117     }
 118
 119     pub fn ranges(&self) -> &IndexSlice<S, Range<usize>> {
 120         &self.ranges
 121     }
 122
 123     pub fn all_successors(&self) -> &Vec<S> {
 124         &self.all_successors
 125     }
 126
 127     /// Returns the successors of the given SCC.
 128     fn successors(&self, scc: S) -> &[S] {
 129         // Annoyingly, `range` does not implement `Copy`, so we have
 130         // to do `range.start..range.end`:
 131         let range = &self.ranges[scc];
 132         &self.all_successors[range.start..range.end]
 133     }
 134
 135     /// Creates a new SCC with `successors` as its successors and
 136     /// returns the resulting index.
 137     fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
 138         // Store the successors on `scc_successors_vec`, remembering
 139         // the range of indices.
 140         let all_successors_start = self.all_successors.len();
 141         self.all_successors.extend(successors);
 142         let all_successors_end = self.all_successors.len();
 143
 144         debug!(
 145             "create_scc({:?}) successors={:?}",
 146             self.ranges.len(),
 147             &self.all_successors[all_successors_start..all_successors_end],
 148         );
 149
 150         self.ranges.push(all_successors_start..all_successors_end)
 151     }
 152 }
 153
 154 struct SccsConstruction<'c, G: DirectedGraph + Successors, S: Idx> {
 155     graph: &'c G,
 156
 157     /// The state of each node; used during walk to record the stack
 158     /// and after walk to record what cycle each node ended up being
 159     /// in.
 160     node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
 161
 162     /// The stack of nodes that we are visiting as part of the DFS.
 163     node_stack: Vec<G::Node>,
 164
 165     /// The stack of successors: as we visit a node, we mark our
 166     /// position in this stack, and when we encounter a successor SCC,
 167     /// we push it on the stack. When we complete an SCC, we can pop
 168     /// everything off the stack that was found along the way.
 169     successors_stack: Vec<S>,
 170
 171     /// A set used to strip duplicates. As we accumulate successors
 172     /// into the successors_stack, we sometimes get duplicate entries.
 173     /// We use this set to remove those -- we also keep its storage
 174     /// around between successors to amortize memory allocation costs.
 175     duplicate_set: FxHashSet<S>,
 176
 177     scc_data: SccData<S>,
 178 }
 179
 180 #[derive(Copy, Clone, Debug)]
 181 enum NodeState<N, S> {
 182     /// This node has not yet been visited as part of the DFS.
 183     ///
 184     /// After SCC construction is complete, this state ought to be
 185     /// impossible.
 186     NotVisited,
 187
 188     /// This node is currently being walk as part of our DFS. It is on
 189     /// the stack at the depth `depth`.
 190     ///
 191     /// After SCC construction is complete, this state ought to be
 192     /// impossible.
 193     BeingVisited { depth: usize },
 194
 195     /// Indicates that this node is a member of the given cycle.
 196     InCycle { scc_index: S },
 197
 198     /// Indicates that this node is a member of whatever cycle
 199     /// `parent` is a member of. This state is transient: whenever we
 200     /// see it, we try to overwrite it with the current state of
 201     /// `parent` (this is the "path compression" step of a union-find
 202     /// algorithm).
 203     InCycleWith { parent: N },
 204 }
 205
 206 #[derive(Copy, Clone, Debug)]
 207 enum WalkReturn<S> {
 208     Cycle { min_depth: usize },
 209     Complete { scc_index: S },
 210 }
 211
 212 impl<'c, G, S> SccsConstruction<'c, G, S>
 213 where
 214     G: DirectedGraph + Successors,
 215     S: Idx,
 216 {
 217     /// Identifies SCCs in the graph `G` and computes the resulting
 218     /// DAG. This uses a variant of [Tarjan's
 219     /// algorithm][wikipedia]. The high-level summary of the algorithm
 220     /// is that we do a depth-first search. Along the way, we keep a
 221     /// stack of each node whose successors are being visited. We
 222     /// track the depth of each node on this stack (there is no depth
 223     /// if the node is not on the stack). When we find that some node
 224     /// N with depth D can reach some other node N' with lower depth
 225     /// D' (i.e., D' < D), we know that N, N', and all nodes in
 226     /// between them on the stack are part of an SCC.
 227     ///
 228     /// [wikipedia]: https://bit.ly/2EZIx84
 229     fn construct(graph: &'c G) -> Sccs<G::Node, S> {
 230         let num_nodes = graph.num_nodes();
 231
 232         let mut this = Self {
 233             graph,
 234             node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
 235             node_stack: Vec::with_capacity(num_nodes),
 236             successors_stack: Vec::new(),
 237             scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
 238             duplicate_set: FxHashSet::default(),
 239         };
 240
 241         let scc_indices = (0..num_nodes)
 242             .map(G::Node::new)
 243             .map(|node| match this.start_walk_from(node) {
 244                 WalkReturn::Complete { scc_index } => scc_index,
 245                 WalkReturn::Cycle { min_depth } => {
 246                     panic!("`start_walk_node({node:?})` returned cycle with depth {min_depth:?}")
 247                 }
 248             })
 249             .collect();
 250
 251         Sccs { scc_indices, scc_data: this.scc_data }
 252     }
 253
 254     fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> {
 255         if let Some(result) = self.inspect_node(node) {
 256             result
 257         } else {
 258             self.walk_unvisited_node(node)
 259         }
 260     }
 261
 262     /// Inspect a node during the DFS. We first examine its current
 263     /// state -- if it is not yet visited (`NotVisited`), return `None` so
 264     /// that the caller might push it onto the stack and start walking its
 265     /// successors.
 266     ///
 267     /// If it is already on the DFS stack it will be in the state
 268     /// `BeingVisited`. In that case, we have found a cycle and we
 269     /// return the depth from the stack.
 270     ///
 271     /// Otherwise, we are looking at a node that has already been
 272     /// completely visited. We therefore return `WalkReturn::Complete`
 273     /// with its associated SCC index.
 274     fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> {
 275         Some(match self.find_state(node) {
 276             NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
 277
 278             NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
 279
 280             NodeState::NotVisited => return None,
 281
 282             NodeState::InCycleWith { parent } => panic!(
 283                 "`find_state` returned `InCycleWith({parent:?})`, which ought to be impossible"
 284             ),
 285         })
 286     }
 287
 288     /// Fetches the state of the node `r`. If `r` is recorded as being
 289     /// in a cycle with some other node `r2`, then fetches the state
 290     /// of `r2` (and updates `r` to reflect current result). This is
 291     /// basically the "find" part of a standard union-find algorithm
 292     /// (with path compression).
 293     fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> {
 294         // To avoid recursion we temporarily reuse the `parent` of each
 295         // InCycleWith link to encode a downwards link while compressing
 296         // the path. After we have found the root or deepest node being
 297         // visited, we traverse the reverse links and correct the node
 298         // states on the way.
 299         //
 300         // **Note**: This mutation requires that this is a leaf function
 301         // or at least that none of the called functions inspects the
 302         // current node states. Luckily, we are a leaf.
 303
 304         // Remember one previous link. The termination condition when
 305         // following links downwards is then simply as soon as we have
 306         // found the initial self-loop.
 307         let mut previous_node = node;
 308
 309         // Ultimately assigned by the parent when following
 310         // `InCycleWith` upwards.
 311         let node_state = loop {
 312             debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]);
 313             match self.node_states[node] {
 314                 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index },
 315                 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth },
 316                 NodeState::NotVisited => break NodeState::NotVisited,
 317                 NodeState::InCycleWith { parent } => {
 318                     // We test this, to be extremely sure that we never
 319                     // ever break our termination condition for the
 320                     // reverse iteration loop.
 321                     assert!(node != parent, "Node can not be in cycle with itself");
 322                     // Store the previous node as an inverted list link
 323                     self.node_states[node] = NodeState::InCycleWith { parent: previous_node };
 324                     // Update to parent node.
 325                     previous_node = node;
 326                     node = parent;
 327                 }
 328             }
 329         };
 330
 331         // The states form a graph where up to one outgoing link is stored at
 332         // each node. Initially in general,
 333         //
 334         //                                                  E
 335         //                                                  ^
 336         //                                                  |
 337         //                                InCycleWith/BeingVisited/NotVisited
 338         //                                                  |
 339         //   A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
 340         //   |
 341         //   = node, previous_node
 342         //
 343         // After the first loop, this will look like
 344         //                                                  E
 345         //                                                  ^
 346         //                                                  |
 347         //                                InCycleWith/BeingVisited/NotVisited
 348         //                                                  |
 349         // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
 350         // | |                             |              |
 351         // | InCycleWith                   |              = node
 352         // +-+                             =previous_node
 353         //
 354         // Note in particular that A will be linked to itself in a self-cycle
 355         // and no other self-cycles occur due to how InCycleWith is assigned in
 356         // the find phase implemented by `walk_unvisited_node`.
 357         //
 358         // We now want to compress the path, that is assign the state of the
 359         // link D-E to all other links.
 360         //
 361         // We can then walk backwards, starting from `previous_node`, and assign
 362         // each node in the list with the updated state. The loop terminates
 363         // when we reach the self-cycle.
 364
 365         // Move backwards until we found the node where we started. We
 366         // will know when we hit the state where previous_node == node.
 367         loop {
 368             // Back at the beginning, we can return.
 369             if previous_node == node {
 370                 return node_state;
 371             }
 372             // Update to previous node in the link.
 373             match self.node_states[previous_node] {
 374                 NodeState::InCycleWith { parent: previous } => {
 375                     node = previous_node;
 376                     previous_node = previous;
 377                 }
 378                 // Only InCycleWith nodes were added to the reverse linked list.
 379                 other => panic!("Invalid previous link while compressing cycle: {other:?}"),
 380             }
 381
 382             debug!("find_state: parent_state = {:?}", node_state);
 383
 384             // Update the node state from the parent state. The assigned
 385             // state is actually a loop invariant but it will only be
 386             // evaluated if there is at least one backlink to follow.
 387             // Fully trusting llvm here to find this loop optimization.
 388             match node_state {
 389                 // Path compression, make current node point to the same root.
 390                 NodeState::InCycle { .. } => {
 391                     self.node_states[node] = node_state;
 392                 }
 393                 // Still visiting nodes, compress to cycle to the node
 394                 // at that depth.
 395                 NodeState::BeingVisited { depth } => {
 396                     self.node_states[node] =
 397                         NodeState::InCycleWith { parent: self.node_stack[depth] };
 398                 }
 399                 // These are never allowed as parent nodes. InCycleWith
 400                 // should have been followed to a real parent and
 401                 // NotVisited can not be part of a cycle since it should
 402                 // have instead gotten explored.
 403                 NodeState::NotVisited | NodeState::InCycleWith { .. } => {
 404                     panic!("invalid parent state: {node_state:?}")
 405                 }
 406             }
 407         }
 408     }
 409
 410     /// Walks a node that has never been visited before.
 411     ///
 412     /// Call this method when `inspect_node` has returned `None`. Having the
 413     /// caller decide avoids mutual recursion between the two methods and allows
 414     /// us to maintain an allocated stack for nodes on the path between calls.
 415     #[instrument(skip(self, initial), level = "debug")]
 416     fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> {
 417         struct VisitingNodeFrame<G: DirectedGraph, Successors> {
 418             node: G::Node,
 419             iter: Option<Successors>,
 420             depth: usize,
 421             min_depth: usize,
 422             successors_len: usize,
 423             min_cycle_root: G::Node,
 424             successor_node: G::Node,
 425         }
 426
 427         // Move the stack to a local variable. We want to utilize the existing allocation and
 428         // mutably borrow it without borrowing self at the same time.
 429         let mut successors_stack = core::mem::take(&mut self.successors_stack);
 430         debug_assert_eq!(successors_stack.len(), 0);
 431
 432         let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame {
 433             node: initial,
 434             depth: 0,
 435             min_depth: 0,
 436             iter: None,
 437             successors_len: 0,
 438             min_cycle_root: initial,
 439             successor_node: initial,
 440         }];
 441
 442         let mut return_value = None;
 443
 444         'recurse: while let Some(frame) = stack.last_mut() {
 445             let VisitingNodeFrame {
 446                 node,
 447                 depth,
 448                 iter,
 449                 successors_len,
 450                 min_depth,
 451                 min_cycle_root,
 452                 successor_node,
 453             } = frame;
 454
 455             let node = *node;
 456             let depth = *depth;
 457
 458             let successors = match iter {
 459                 Some(iter) => iter,
 460                 None => {
 461                     // This None marks that we still have the initialize this node's frame.
 462                     debug!(?depth, ?node);
 463
 464                     debug_assert!(matches!(self.node_states[node], NodeState::NotVisited));
 465
 466                     // Push `node` onto the stack.
 467                     self.node_states[node] = NodeState::BeingVisited { depth };
 468                     self.node_stack.push(node);
 469
 470                     // Walk each successor of the node, looking to see if any of
 471                     // them can reach a node that is presently on the stack. If
 472                     // so, that means they can also reach us.
 473                     *successors_len = successors_stack.len();
 474                     // Set and return a reference, this is currently empty.
 475                     iter.get_or_insert(self.graph.successors(node))
 476                 }
 477             };
 478
 479             // Now that iter is initialized, this is a constant for this frame.
 480             let successors_len = *successors_len;
 481
 482             // Construct iterators for the nodes and walk results. There are two cases:
 483             // * The walk of a successor node returned.
 484             // * The remaining successor nodes.
 485             let returned_walk =
 486                 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk)));
 487
 488             let successor_walk = successors.map(|successor_node| {
 489                 debug!(?node, ?successor_node);
 490                 (successor_node, self.inspect_node(successor_node))
 491             });
 492
 493             for (successor_node, walk) in returned_walk.chain(successor_walk) {
 494                 match walk {
 495                     Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => {
 496                         // Track the minimum depth we can reach.
 497                         assert!(successor_min_depth <= depth);
 498                         if successor_min_depth < *min_depth {
 499                             debug!(?node, ?successor_min_depth);
 500                             *min_depth = successor_min_depth;
 501                             *min_cycle_root = successor_node;
 502                         }
 503                     }
 504
 505                     Some(WalkReturn::Complete { scc_index: successor_scc_index }) => {
 506                         // Push the completed SCC indices onto
 507                         // the `successors_stack` for later.
 508                         debug!(?node, ?successor_scc_index);
 509                         successors_stack.push(successor_scc_index);
 510                     }
 511
 512                     None => {
 513                         let depth = depth + 1;
 514                         debug!(?depth, ?successor_node);
 515                         // Remember which node the return value will come from.
 516                         frame.successor_node = successor_node;
 517                         // Start a new stack frame the step into it.
 518                         stack.push(VisitingNodeFrame {
 519                             node: successor_node,
 520                             depth,
 521                             iter: None,
 522                             successors_len: 0,
 523                             min_depth: depth,
 524                             min_cycle_root: successor_node,
 525                             successor_node,
 526                         });
 527                         continue 'recurse;
 528                     }
 529                 }
 530             }
 531
 532             // Completed walk, remove `node` from the stack.
 533             let r = self.node_stack.pop();
 534             debug_assert_eq!(r, Some(node));
 535
 536             // Remove the frame, it's done.
 537             let frame = stack.pop().unwrap();
 538
 539             // If `min_depth == depth`, then we are the root of the
 540             // cycle: we can't reach anyone further down the stack.
 541
 542             // Pass the 'return value' down the stack.
 543             // We return one frame at a time so there can't be another return value.
 544             debug_assert!(return_value.is_none());
 545             return_value = Some(if frame.min_depth == depth {
 546                 // Note that successor stack may have duplicates, so we
 547                 // want to remove those:
 548                 let deduplicated_successors = {
 549                     let duplicate_set = &mut self.duplicate_set;
 550                     duplicate_set.clear();
 551                     successors_stack
 552                         .drain(successors_len..)
 553                         .filter(move |&i| duplicate_set.insert(i))
 554                 };
 555                 let scc_index = self.scc_data.create_scc(deduplicated_successors);
 556                 self.node_states[node] = NodeState::InCycle { scc_index };
 557                 WalkReturn::Complete { scc_index }
 558             } else {
 559                 // We are not the head of the cycle. Return back to our
 560                 // caller. They will take ownership of the
 561                 // `self.successors` data that we pushed.
 562                 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root };
 563                 WalkReturn::Cycle { min_depth: frame.min_depth }
 564             });
 565         }
 566
 567         // Keep the allocation we used for successors_stack.
 568         self.successors_stack = successors_stack;
 569         debug_assert_eq!(self.successors_stack.len(), 0);
 570
 571         return_value.unwrap()
 572     }
 573 }