compiler/rustc_data_structures/src/graph/scc/mod.rs

   1 //! Routine to compute the strongly connected components (SCCs) of a graph.
   2 //!
   3 //! Also computes as the resulting DAG if each SCC is replaced with a
   4 //! node in the graph. This uses [Tarjan's algorithm](
   5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
   6 //! that completes in *O*(*n*) time.
   7
   8 use crate::fx::FxHashSet;
   9 use crate::graph::vec_graph::VecGraph;
  10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
  11 use rustc_index::vec::{Idx, IndexVec};
  12 use std::ops::Range;
  13
  14 #[cfg(test)]
  15 mod tests;
  16
  17 /// Strongly connected components (SCC) of a graph. The type `N` is
  18 /// the index type for the graph nodes and `S` is the index type for
  19 /// the SCCs. We can map from each node to the SCC that it
  20 /// participates in, and we also have the successors of each SCC.
  21 pub struct Sccs<N: Idx, S: Idx> {
  22     /// For each node, what is the SCC index of the SCC to which it
  23     /// belongs.
  24     scc_indices: IndexVec<N, S>,
  25
  26     /// Data about each SCC.
  27     scc_data: SccData<S>,
  28 }
  29
  30 struct SccData<S: Idx> {
  31     /// For each SCC, the range of `all_successors` where its
  32     /// successors can be found.
  33     ranges: IndexVec<S, Range<usize>>,
  34
  35     /// Contains the successors for all the Sccs, concatenated. The
  36     /// range of indices corresponding to a given SCC is found in its
  37     /// SccData.
  38     all_successors: Vec<S>,
  39 }
  40
  41 impl<N: Idx, S: Idx> Sccs<N, S> {
  42     pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
  43         SccsConstruction::construct(graph)
  44     }
  45
  46     /// Returns the number of SCCs in the graph.
  47     pub fn num_sccs(&self) -> usize {
  48         self.scc_data.len()
  49     }
  50
  51     /// Returns an iterator over the SCCs in the graph.
  52     ///
  53     /// The SCCs will be iterated in **dependency order** (or **post order**),
  54     /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
  55     /// This is convenient when the edges represent dependencies: when you visit
  56     /// `S1`, the value for `S2` will already have been computed.
  57     pub fn all_sccs(&self) -> impl Iterator<Item = S> {
  58         (0..self.scc_data.len()).map(S::new)
  59     }
  60
  61     /// Returns the SCC to which a node `r` belongs.
  62     pub fn scc(&self, r: N) -> S {
  63         self.scc_indices[r]
  64     }
  65
  66     /// Returns the successors of the given SCC.
  67     pub fn successors(&self, scc: S) -> &[S] {
  68         self.scc_data.successors(scc)
  69     }
  70
  71     /// Construct the reverse graph of the SCC graph.
  72     pub fn reverse(&self) -> VecGraph<S> {
  73         VecGraph::new(
  74             self.num_sccs(),
  75             self.all_sccs()
  76                 .flat_map(|source| {
  77                     self.successors(source).iter().map(move |&target| (target, source))
  78                 })
  79                 .collect(),
  80         )
  81     }
  82 }
  83
  84 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> {
  85     type Node = S;
  86 }
  87
  88 impl<N: Idx, S: Idx> WithNumNodes for Sccs<N, S> {
  89     fn num_nodes(&self) -> usize {
  90         self.num_sccs()
  91     }
  92 }
  93
  94 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> {
  95     fn num_edges(&self) -> usize {
  96         self.scc_data.all_successors.len()
  97     }
  98 }
  99
 100 impl<N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> {
 101     type Item = S;
 102
 103     type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>;
 104 }
 105
 106 impl<N: Idx, S: Idx> WithSuccessors for Sccs<N, S> {
 107     fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter {
 108         self.successors(node).iter().cloned()
 109     }
 110 }
 111
 112 impl<S: Idx> SccData<S> {
 113     /// Number of SCCs,
 114     fn len(&self) -> usize {
 115         self.ranges.len()
 116     }
 117
 118     /// Returns the successors of the given SCC.
 119     fn successors(&self, scc: S) -> &[S] {
 120         // Annoyingly, `range` does not implement `Copy`, so we have
 121         // to do `range.start..range.end`:
 122         let range = &self.ranges[scc];
 123         &self.all_successors[range.start..range.end]
 124     }
 125
 126     /// Creates a new SCC with `successors` as its successors and
 127     /// returns the resulting index.
 128     fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
 129         // Store the successors on `scc_successors_vec`, remembering
 130         // the range of indices.
 131         let all_successors_start = self.all_successors.len();
 132         self.all_successors.extend(successors);
 133         let all_successors_end = self.all_successors.len();
 134
 135         debug!(
 136             "create_scc({:?}) successors={:?}",
 137             self.ranges.len(),
 138             &self.all_successors[all_successors_start..all_successors_end],
 139         );
 140
 141         self.ranges.push(all_successors_start..all_successors_end)
 142     }
 143 }
 144
 145 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> {
 146     graph: &'c G,
 147
 148     /// The state of each node; used during walk to record the stack
 149     /// and after walk to record what cycle each node ended up being
 150     /// in.
 151     node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
 152
 153     /// The stack of nodes that we are visiting as part of the DFS.
 154     node_stack: Vec<G::Node>,
 155
 156     /// The stack of successors: as we visit a node, we mark our
 157     /// position in this stack, and when we encounter a successor SCC,
 158     /// we push it on the stack. When we complete an SCC, we can pop
 159     /// everything off the stack that was found along the way.
 160     successors_stack: Vec<S>,
 161
 162     /// A set used to strip duplicates. As we accumulate successors
 163     /// into the successors_stack, we sometimes get duplicate entries.
 164     /// We use this set to remove those -- we also keep its storage
 165     /// around between successors to amortize memory allocation costs.
 166     duplicate_set: FxHashSet<S>,
 167
 168     scc_data: SccData<S>,
 169 }
 170
 171 #[derive(Copy, Clone, Debug)]
 172 enum NodeState<N, S> {
 173     /// This node has not yet been visited as part of the DFS.
 174     ///
 175     /// After SCC construction is complete, this state ought to be
 176     /// impossible.
 177     NotVisited,
 178
 179     /// This node is currently being walk as part of our DFS. It is on
 180     /// the stack at the depth `depth`.
 181     ///
 182     /// After SCC construction is complete, this state ought to be
 183     /// impossible.
 184     BeingVisited { depth: usize },
 185
 186     /// Indicates that this node is a member of the given cycle.
 187     InCycle { scc_index: S },
 188
 189     /// Indicates that this node is a member of whatever cycle
 190     /// `parent` is a member of. This state is transient: whenever we
 191     /// see it, we try to overwrite it with the current state of
 192     /// `parent` (this is the "path compression" step of a union-find
 193     /// algorithm).
 194     InCycleWith { parent: N },
 195 }
 196
 197 #[derive(Copy, Clone, Debug)]
 198 enum WalkReturn<S> {
 199     Cycle { min_depth: usize },
 200     Complete { scc_index: S },
 201 }
 202
 203 impl<'c, G, S> SccsConstruction<'c, G, S>
 204 where
 205     G: DirectedGraph + WithNumNodes + WithSuccessors,
 206     S: Idx,
 207 {
 208     /// Identifies SCCs in the graph `G` and computes the resulting
 209     /// DAG. This uses a variant of [Tarjan's
 210     /// algorithm][wikipedia]. The high-level summary of the algorithm
 211     /// is that we do a depth-first search. Along the way, we keep a
 212     /// stack of each node whose successors are being visited. We
 213     /// track the depth of each node on this stack (there is no depth
 214     /// if the node is not on the stack). When we find that some node
 215     /// N with depth D can reach some other node N' with lower depth
 216     /// D' (i.e., D' < D), we know that N, N', and all nodes in
 217     /// between them on the stack are part of an SCC.
 218     ///
 219     /// [wikipedia]: https://bit.ly/2EZIx84
 220     fn construct(graph: &'c G) -> Sccs<G::Node, S> {
 221         let num_nodes = graph.num_nodes();
 222
 223         let mut this = Self {
 224             graph,
 225             node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
 226             node_stack: Vec::with_capacity(num_nodes),
 227             successors_stack: Vec::new(),
 228             scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
 229             duplicate_set: FxHashSet::default(),
 230         };
 231
 232         let scc_indices = (0..num_nodes)
 233             .map(G::Node::new)
 234             .map(|node| match this.start_walk_from(node) {
 235                 WalkReturn::Complete { scc_index } => scc_index,
 236                 WalkReturn::Cycle { min_depth } => panic!(
 237                     "`start_walk_node({:?})` returned cycle with depth {:?}",
 238                     node, min_depth
 239                 ),
 240             })
 241             .collect();
 242
 243         Sccs { scc_indices, scc_data: this.scc_data }
 244     }
 245
 246     fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> {
 247         if let Some(result) = self.inspect_node(node) {
 248             result
 249         } else {
 250             self.walk_unvisited_node(node)
 251         }
 252     }
 253
 254     /// Inspect a node during the DFS. We first examine its current
 255     /// state -- if it is not yet visited (`NotVisited`), return `None` so
 256     /// that the caller might push it onto the stack and start walking its
 257     /// successors.
 258     ///
 259     /// If it is already on the DFS stack it will be in the state
 260     /// `BeingVisited`. In that case, we have found a cycle and we
 261     /// return the depth from the stack.
 262     ///
 263     /// Otherwise, we are looking at a node that has already been
 264     /// completely visited. We therefore return `WalkReturn::Complete`
 265     /// with its associated SCC index.
 266     fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> {
 267         Some(match self.find_state(node) {
 268             NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
 269
 270             NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
 271
 272             NodeState::NotVisited => return None,
 273
 274             NodeState::InCycleWith { parent } => panic!(
 275                 "`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
 276                 parent
 277             ),
 278         })
 279     }
 280
 281     /// Fetches the state of the node `r`. If `r` is recorded as being
 282     /// in a cycle with some other node `r2`, then fetches the state
 283     /// of `r2` (and updates `r` to reflect current result). This is
 284     /// basically the "find" part of a standard union-find algorithm
 285     /// (with path compression).
 286     fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> {
 287         // To avoid recursion we temporarily reuse the `parent` of each
 288         // InCycleWith link to encode a downwards link while compressing
 289         // the path. After we have found the root or deepest node being
 290         // visited, we traverse the reverse links and correct the node
 291         // states on the way.
 292         //
 293         // **Note**: This mutation requires that this is a leaf function
 294         // or at least that none of the called functions inspects the
 295         // current node states. Luckily, we are a leaf.
 296
 297         // Remember one previous link. The termination condition when
 298         // following links downwards is then simply as soon as we have
 299         // found the initial self-loop.
 300         let mut previous_node = node;
 301
 302         // Ultimately assigned by the parent when following
 303         // `InCycleWith` upwards.
 304         let node_state = loop {
 305             debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]);
 306             match self.node_states[node] {
 307                 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index },
 308                 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth },
 309                 NodeState::NotVisited => break NodeState::NotVisited,
 310                 NodeState::InCycleWith { parent } => {
 311                     // We test this, to be extremely sure that we never
 312                     // ever break our termination condition for the
 313                     // reverse iteration loop.
 314                     assert!(node != parent, "Node can not be in cycle with itself");
 315                     // Store the previous node as an inverted list link
 316                     self.node_states[node] = NodeState::InCycleWith { parent: previous_node };
 317                     // Update to parent node.
 318                     previous_node = node;
 319                     node = parent;
 320                 }
 321             }
 322         };
 323
 324         // The states form a graph where up to one outgoing link is stored at
 325         // each node. Initially in general,
 326         //
 327         //                                                  E
 328         //                                                  ^
 329         //                                                  |
 330         //                                InCycleWith/BeingVisited/NotVisited
 331         //                                                  |
 332         //   A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
 333         //   |
 334         //   = node, previous_node
 335         //
 336         // After the first loop, this will look like
 337         //                                                  E
 338         //                                                  ^
 339         //                                                  |
 340         //                                InCycleWith/BeingVisited/NotVisited
 341         //                                                  |
 342         // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
 343         // | |                             |              |
 344         // | InCycleWith                   |              = node
 345         // +-+                             =previous_node
 346         //
 347         // Note in particular that A will be linked to itself in a self-cycle
 348         // and no other self-cycles occur due to how InCycleWith is assigned in
 349         // the find phase implemented by `walk_unvisited_node`.
 350         //
 351         // We now want to compress the path, that is assign the state of the
 352         // link D-E to all other links.
 353         //
 354         // We can then walk backwards, starting from `previous_node`, and assign
 355         // each node in the list with the updated state. The loop terminates
 356         // when we reach the self-cycle.
 357
 358         // Move backwards until we found the node where we started. We
 359         // will know when we hit the state where previous_node == node.
 360         loop {
 361             // Back at the beginning, we can return.
 362             if previous_node == node {
 363                 return node_state;
 364             }
 365             // Update to previous node in the link.
 366             match self.node_states[previous_node] {
 367                 NodeState::InCycleWith { parent: previous } => {
 368                     node = previous_node;
 369                     previous_node = previous;
 370                 }
 371                 // Only InCycleWith nodes were added to the reverse linked list.
 372                 other => panic!("Invalid previous link while compressing cycle: {:?}", other),
 373             }
 374
 375             debug!("find_state: parent_state = {:?}", node_state);
 376
 377             // Update the node state from the parent state. The assigned
 378             // state is actually a loop invariant but it will only be
 379             // evaluated if there is at least one backlink to follow.
 380             // Fully trusting llvm here to find this loop optimization.
 381             match node_state {
 382                 // Path compression, make current node point to the same root.
 383                 NodeState::InCycle { .. } => {
 384                     self.node_states[node] = node_state;
 385                 }
 386                 // Still visiting nodes, compress to cycle to the node
 387                 // at that depth.
 388                 NodeState::BeingVisited { depth } => {
 389                     self.node_states[node] =
 390                         NodeState::InCycleWith { parent: self.node_stack[depth] };
 391                 }
 392                 // These are never allowed as parent nodes. InCycleWith
 393                 // should have been followed to a real parent and
 394                 // NotVisited can not be part of a cycle since it should
 395                 // have instead gotten explored.
 396                 NodeState::NotVisited | NodeState::InCycleWith { .. } => {
 397                     panic!("invalid parent state: {:?}", node_state)
 398                 }
 399             }
 400         }
 401     }
 402
 403     /// Walks a node that has never been visited before.
 404     ///
 405     /// Call this method when `inspect_node` has returned `None`. Having the
 406     /// caller decide avoids mutual recursion between the two methods and allows
 407     /// us to maintain an allocated stack for nodes on the path between calls.
 408     #[instrument(skip(self, initial), level = "debug")]
 409     fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> {
 410         struct VisitingNodeFrame<G: DirectedGraph, Successors> {
 411             node: G::Node,
 412             iter: Option<Successors>,
 413             depth: usize,
 414             min_depth: usize,
 415             successors_len: usize,
 416             min_cycle_root: G::Node,
 417             successor_node: G::Node,
 418         }
 419
 420         // Move the stack to a local variable. We want to utilize the existing allocation and
 421         // mutably borrow it without borrowing self at the same time.
 422         let mut successors_stack = core::mem::take(&mut self.successors_stack);
 423         debug_assert_eq!(successors_stack.len(), 0);
 424
 425         let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame {
 426             node: initial,
 427             depth: 0,
 428             min_depth: 0,
 429             iter: None,
 430             successors_len: 0,
 431             min_cycle_root: initial,
 432             successor_node: initial,
 433         }];
 434
 435         let mut return_value = None;
 436
 437         'recurse: while let Some(frame) = stack.last_mut() {
 438             let VisitingNodeFrame {
 439                 node,
 440                 depth,
 441                 iter,
 442                 successors_len,
 443                 min_depth,
 444                 min_cycle_root,
 445                 successor_node,
 446             } = frame;
 447
 448             let node = *node;
 449             let depth = *depth;
 450
 451             let successors = match iter {
 452                 Some(iter) => iter,
 453                 None => {
 454                     // This None marks that we still have the initialize this node's frame.
 455                     debug!(?depth, ?node);
 456
 457                     debug_assert!(matches!(self.node_states[node], NodeState::NotVisited));
 458
 459                     // Push `node` onto the stack.
 460                     self.node_states[node] = NodeState::BeingVisited { depth };
 461                     self.node_stack.push(node);
 462
 463                     // Walk each successor of the node, looking to see if any of
 464                     // them can reach a node that is presently on the stack. If
 465                     // so, that means they can also reach us.
 466                     *successors_len = successors_stack.len();
 467                     // Set and return a reference, this is currently empty.
 468                     iter.get_or_insert(self.graph.successors(node))
 469                 }
 470             };
 471
 472             // Now that iter is initialized, this is a constant for this frame.
 473             let successors_len = *successors_len;
 474
 475             // Construct iterators for the nodes and walk results. There are two cases:
 476             // * The walk of a successor node returned.
 477             // * The remaining successor nodes.
 478             let returned_walk =
 479                 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk)));
 480
 481             let successor_walk = successors.by_ref().map(|successor_node| {
 482                 debug!(?node, ?successor_node);
 483                 (successor_node, self.inspect_node(successor_node))
 484             });
 485
 486             for (successor_node, walk) in returned_walk.chain(successor_walk) {
 487                 match walk {
 488                     Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => {
 489                         // Track the minimum depth we can reach.
 490                         assert!(successor_min_depth <= depth);
 491                         if successor_min_depth < *min_depth {
 492                             debug!(?node, ?successor_min_depth);
 493                             *min_depth = successor_min_depth;
 494                             *min_cycle_root = successor_node;
 495                         }
 496                     }
 497
 498                     Some(WalkReturn::Complete { scc_index: successor_scc_index }) => {
 499                         // Push the completed SCC indices onto
 500                         // the `successors_stack` for later.
 501                         debug!(?node, ?successor_scc_index);
 502                         successors_stack.push(successor_scc_index);
 503                     }
 504
 505                     None => {
 506                         let depth = depth + 1;
 507                         debug!(?depth, ?successor_node);
 508                         // Remember which node the return value will come from.
 509                         frame.successor_node = successor_node;
 510                         // Start a new stack frame the step into it.
 511                         stack.push(VisitingNodeFrame {
 512                             node: successor_node,
 513                             depth,
 514                             iter: None,
 515                             successors_len: 0,
 516                             min_depth: depth,
 517                             min_cycle_root: successor_node,
 518                             successor_node,
 519                         });
 520                         continue 'recurse;
 521                     }
 522                 }
 523             }
 524
 525             // Completed walk, remove `node` from the stack.
 526             let r = self.node_stack.pop();
 527             debug_assert_eq!(r, Some(node));
 528
 529             // Remove the frame, it's done.
 530             let frame = stack.pop().unwrap();
 531
 532             // If `min_depth == depth`, then we are the root of the
 533             // cycle: we can't reach anyone further down the stack.
 534
 535             // Pass the 'return value' down the stack.
 536             // We return one frame at a time so there can't be another return value.
 537             debug_assert!(return_value.is_none());
 538             return_value = Some(if frame.min_depth == depth {
 539                 // Note that successor stack may have duplicates, so we
 540                 // want to remove those:
 541                 let deduplicated_successors = {
 542                     let duplicate_set = &mut self.duplicate_set;
 543                     duplicate_set.clear();
 544                     successors_stack
 545                         .drain(successors_len..)
 546                         .filter(move |&i| duplicate_set.insert(i))
 547                 };
 548                 let scc_index = self.scc_data.create_scc(deduplicated_successors);
 549                 self.node_states[node] = NodeState::InCycle { scc_index };
 550                 WalkReturn::Complete { scc_index }
 551             } else {
 552                 // We are not the head of the cycle. Return back to our
 553                 // caller. They will take ownership of the
 554                 // `self.successors` data that we pushed.
 555                 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root };
 556                 WalkReturn::Cycle { min_depth: frame.min_depth }
 557             });
 558         }
 559
 560         // Keep the allocation we used for successors_stack.
 561         self.successors_stack = successors_stack;
 562         debug_assert_eq!(self.successors_stack.len(), 0);
 563
 564         return_value.unwrap()
 565     }
 566 }