compiler/rustc_data_structures/src/graph/scc/mod.rs

   1 //! Routine to compute the strongly connected components (SCCs) of a graph.
   2 //!
   3 //! Also computes as the resulting DAG if each SCC is replaced with a
   4 //! node in the graph. This uses [Tarjan's algorithm](
   5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
   6 //! that completes in *O(n)* time.
   7
   8 use crate::fx::FxHashSet;
   9 use crate::graph::vec_graph::VecGraph;
  10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
  11 use rustc_index::vec::{Idx, IndexVec};
  12 use std::ops::Range;
  13
  14 #[cfg(test)]
  15 mod tests;
  16
  17 /// Strongly connected components (SCC) of a graph. The type `N` is
  18 /// the index type for the graph nodes and `S` is the index type for
  19 /// the SCCs. We can map from each node to the SCC that it
  20 /// participates in, and we also have the successors of each SCC.
  21 pub struct Sccs<N: Idx, S: Idx> {
  22     /// For each node, what is the SCC index of the SCC to which it
  23     /// belongs.
  24     scc_indices: IndexVec<N, S>,
  25
  26     /// Data about each SCC.
  27     scc_data: SccData<S>,
  28 }
  29
  30 struct SccData<S: Idx> {
  31     /// For each SCC, the range of `all_successors` where its
  32     /// successors can be found.
  33     ranges: IndexVec<S, Range<usize>>,
  34
  35     /// Contains the successors for all the Sccs, concatenated. The
  36     /// range of indices corresponding to a given SCC is found in its
  37     /// SccData.
  38     all_successors: Vec<S>,
  39 }
  40
  41 impl<N: Idx, S: Idx> Sccs<N, S> {
  42     pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
  43         SccsConstruction::construct(graph)
  44     }
  45
  46     /// Returns the number of SCCs in the graph.
  47     pub fn num_sccs(&self) -> usize {
  48         self.scc_data.len()
  49     }
  50
  51     /// Returns an iterator over the SCCs in the graph.
  52     ///
  53     /// The SCCs will be iterated in **dependency order** (or **post order**),
  54     /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
  55     /// This is convenient when the edges represent dependencies: when you visit
  56     /// `S1`, the value for `S2` will already have been computed.
  57     pub fn all_sccs(&self) -> impl Iterator<Item = S> {
  58         (0..self.scc_data.len()).map(S::new)
  59     }
  60
  61     /// Returns the SCC to which a node `r` belongs.
  62     pub fn scc(&self, r: N) -> S {
  63         self.scc_indices[r]
  64     }
  65
  66     /// Returns the successors of the given SCC.
  67     pub fn successors(&self, scc: S) -> &[S] {
  68         self.scc_data.successors(scc)
  69     }
  70
  71     /// Construct the reverse graph of the SCC graph.
  72     pub fn reverse(&self) -> VecGraph<S> {
  73         VecGraph::new(
  74             self.num_sccs(),
  75             self.all_sccs()
  76                 .flat_map(|source| {
  77                     self.successors(source).iter().map(move |&target| (target, source))
  78                 })
  79                 .collect(),
  80         )
  81     }
  82 }
  83
  84 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> {
  85     type Node = S;
  86 }
  87
  88 impl<N: Idx, S: Idx> WithNumNodes for Sccs<N, S> {
  89     fn num_nodes(&self) -> usize {
  90         self.num_sccs()
  91     }
  92 }
  93
  94 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> {
  95     fn num_edges(&self) -> usize {
  96         self.scc_data.all_successors.len()
  97     }
  98 }
  99
 100 impl<N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> {
 101     type Item = S;
 102
 103     type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>;
 104 }
 105
 106 impl<N: Idx, S: Idx> WithSuccessors for Sccs<N, S> {
 107     fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter {
 108         self.successors(node).iter().cloned()
 109     }
 110 }
 111
 112 impl<S: Idx> SccData<S> {
 113     /// Number of SCCs,
 114     fn len(&self) -> usize {
 115         self.ranges.len()
 116     }
 117
 118     /// Returns the successors of the given SCC.
 119     fn successors(&self, scc: S) -> &[S] {
 120         // Annoyingly, `range` does not implement `Copy`, so we have
 121         // to do `range.start..range.end`:
 122         let range = &self.ranges[scc];
 123         &self.all_successors[range.start..range.end]
 124     }
 125
 126     /// Creates a new SCC with `successors` as its successors and
 127     /// returns the resulting index.
 128     fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
 129         // Store the successors on `scc_successors_vec`, remembering
 130         // the range of indices.
 131         let all_successors_start = self.all_successors.len();
 132         self.all_successors.extend(successors);
 133         let all_successors_end = self.all_successors.len();
 134
 135         debug!(
 136             "create_scc({:?}) successors={:?}",
 137             self.ranges.len(),
 138             &self.all_successors[all_successors_start..all_successors_end],
 139         );
 140
 141         self.ranges.push(all_successors_start..all_successors_end)
 142     }
 143 }
 144
 145 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> {
 146     graph: &'c G,
 147
 148     /// The state of each node; used during walk to record the stack
 149     /// and after walk to record what cycle each node ended up being
 150     /// in.
 151     node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
 152
 153     /// The stack of nodes that we are visiting as part of the DFS.
 154     node_stack: Vec<G::Node>,
 155
 156     /// The stack of successors: as we visit a node, we mark our
 157     /// position in this stack, and when we encounter a successor SCC,
 158     /// we push it on the stack. When we complete an SCC, we can pop
 159     /// everything off the stack that was found along the way.
 160     successors_stack: Vec<S>,
 161
 162     /// A set used to strip duplicates. As we accumulate successors
 163     /// into the successors_stack, we sometimes get duplicate entries.
 164     /// We use this set to remove those -- we also keep its storage
 165     /// around between successors to amortize memory allocation costs.
 166     duplicate_set: FxHashSet<S>,
 167
 168     scc_data: SccData<S>,
 169 }
 170
 171 #[derive(Copy, Clone, Debug)]
 172 enum NodeState<N, S> {
 173     /// This node has not yet been visited as part of the DFS.
 174     ///
 175     /// After SCC construction is complete, this state ought to be
 176     /// impossible.
 177     NotVisited,
 178
 179     /// This node is currently being walk as part of our DFS. It is on
 180     /// the stack at the depth `depth`.
 181     ///
 182     /// After SCC construction is complete, this state ought to be
 183     /// impossible.
 184     BeingVisited { depth: usize },
 185
 186     /// Indicates that this node is a member of the given cycle.
 187     InCycle { scc_index: S },
 188
 189     /// Indicates that this node is a member of whatever cycle
 190     /// `parent` is a member of. This state is transient: whenever we
 191     /// see it, we try to overwrite it with the current state of
 192     /// `parent` (this is the "path compression" step of a union-find
 193     /// algorithm).
 194     InCycleWith { parent: N },
 195 }
 196
 197 #[derive(Copy, Clone, Debug)]
 198 enum WalkReturn<S> {
 199     Cycle { min_depth: usize },
 200     Complete { scc_index: S },
 201 }
 202
 203 impl<'c, G, S> SccsConstruction<'c, G, S>
 204 where
 205     G: DirectedGraph + WithNumNodes + WithSuccessors,
 206     S: Idx,
 207 {
 208     /// Identifies SCCs in the graph `G` and computes the resulting
 209     /// DAG. This uses a variant of [Tarjan's
 210     /// algorithm][wikipedia]. The high-level summary of the algorithm
 211     /// is that we do a depth-first search. Along the way, we keep a
 212     /// stack of each node whose successors are being visited. We
 213     /// track the depth of each node on this stack (there is no depth
 214     /// if the node is not on the stack). When we find that some node
 215     /// N with depth D can reach some other node N' with lower depth
 216     /// D' (i.e., D' < D), we know that N, N', and all nodes in
 217     /// between them on the stack are part of an SCC.
 218     ///
 219     /// [wikipedia]: https://bit.ly/2EZIx84
 220     fn construct(graph: &'c G) -> Sccs<G::Node, S> {
 221         let num_nodes = graph.num_nodes();
 222
 223         let mut this = Self {
 224             graph,
 225             node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
 226             node_stack: Vec::with_capacity(num_nodes),
 227             successors_stack: Vec::new(),
 228             scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
 229             duplicate_set: FxHashSet::default(),
 230         };
 231
 232         let scc_indices = (0..num_nodes)
 233             .map(G::Node::new)
 234             .map(|node| match this.walk_node(0, node) {
 235                 WalkReturn::Complete { scc_index } => scc_index,
 236                 WalkReturn::Cycle { min_depth } => {
 237                     panic!("`walk_node(0, {:?})` returned cycle with depth {:?}", node, min_depth)
 238                 }
 239             })
 240             .collect();
 241
 242         Sccs { scc_indices, scc_data: this.scc_data }
 243     }
 244
 245     /// Visits a node during the DFS. We first examine its current
 246     /// state -- if it is not yet visited (`NotVisited`), we can push
 247     /// it onto the stack and start walking its successors.
 248     ///
 249     /// If it is already on the DFS stack it will be in the state
 250     /// `BeingVisited`. In that case, we have found a cycle and we
 251     /// return the depth from the stack.
 252     ///
 253     /// Otherwise, we are looking at a node that has already been
 254     /// completely visited. We therefore return `WalkReturn::Complete`
 255     /// with its associated SCC index.
 256     fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
 257         debug!("walk_node(depth = {:?}, node = {:?})", depth, node);
 258         match self.find_state(node) {
 259             NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
 260
 261             NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
 262
 263             NodeState::NotVisited => self.walk_unvisited_node(depth, node),
 264
 265             NodeState::InCycleWith { parent } => panic!(
 266                 "`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
 267                 parent
 268             ),
 269         }
 270     }
 271
 272     /// Fetches the state of the node `r`. If `r` is recorded as being
 273     /// in a cycle with some other node `r2`, then fetches the state
 274     /// of `r2` (and updates `r` to reflect current result). This is
 275     /// basically the "find" part of a standard union-find algorithm
 276     /// (with path compression).
 277     fn find_state(&mut self, r: G::Node) -> NodeState<G::Node, S> {
 278         debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]);
 279         match self.node_states[r] {
 280             NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index },
 281             NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth },
 282             NodeState::NotVisited => NodeState::NotVisited,
 283             NodeState::InCycleWith { parent } => {
 284                 let parent_state = self.find_state(parent);
 285                 debug!("find_state: parent_state = {:?}", parent_state);
 286                 match parent_state {
 287                     NodeState::InCycle { .. } => {
 288                         self.node_states[r] = parent_state;
 289                         parent_state
 290                     }
 291
 292                     NodeState::BeingVisited { depth } => {
 293                         self.node_states[r] =
 294                             NodeState::InCycleWith { parent: self.node_stack[depth] };
 295                         parent_state
 296                     }
 297
 298                     NodeState::NotVisited | NodeState::InCycleWith { .. } => {
 299                         panic!("invalid parent state: {:?}", parent_state)
 300                     }
 301                 }
 302             }
 303         }
 304     }
 305
 306     /// Walks a node that has never been visited before.
 307     fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
 308         debug!("walk_unvisited_node(depth = {:?}, node = {:?})", depth, node);
 309
 310         debug_assert!(match self.node_states[node] {
 311             NodeState::NotVisited => true,
 312             _ => false,
 313         });
 314
 315         // Push `node` onto the stack.
 316         self.node_states[node] = NodeState::BeingVisited { depth };
 317         self.node_stack.push(node);
 318
 319         // Walk each successor of the node, looking to see if any of
 320         // them can reach a node that is presently on the stack. If
 321         // so, that means they can also reach us.
 322         let mut min_depth = depth;
 323         let mut min_cycle_root = node;
 324         let successors_len = self.successors_stack.len();
 325         for successor_node in self.graph.successors(node) {
 326             debug!("walk_unvisited_node: node = {:?} successor_ode = {:?}", node, successor_node);
 327             match self.walk_node(depth + 1, successor_node) {
 328                 WalkReturn::Cycle { min_depth: successor_min_depth } => {
 329                     // Track the minimum depth we can reach.
 330                     assert!(successor_min_depth <= depth);
 331                     if successor_min_depth < min_depth {
 332                         debug!(
 333                             "walk_unvisited_node: node = {:?} successor_min_depth = {:?}",
 334                             node, successor_min_depth
 335                         );
 336                         min_depth = successor_min_depth;
 337                         min_cycle_root = successor_node;
 338                     }
 339                 }
 340
 341                 WalkReturn::Complete { scc_index: successor_scc_index } => {
 342                     // Push the completed SCC indices onto
 343                     // the `successors_stack` for later.
 344                     debug!(
 345                         "walk_unvisited_node: node = {:?} successor_scc_index = {:?}",
 346                         node, successor_scc_index
 347                     );
 348                     self.successors_stack.push(successor_scc_index);
 349                 }
 350             }
 351         }
 352
 353         // Completed walk, remove `node` from the stack.
 354         let r = self.node_stack.pop();
 355         debug_assert_eq!(r, Some(node));
 356
 357         // If `min_depth == depth`, then we are the root of the
 358         // cycle: we can't reach anyone further down the stack.
 359         if min_depth == depth {
 360             // Note that successor stack may have duplicates, so we
 361             // want to remove those:
 362             let deduplicated_successors = {
 363                 let duplicate_set = &mut self.duplicate_set;
 364                 duplicate_set.clear();
 365                 self.successors_stack
 366                     .drain(successors_len..)
 367                     .filter(move |&i| duplicate_set.insert(i))
 368             };
 369             let scc_index = self.scc_data.create_scc(deduplicated_successors);
 370             self.node_states[node] = NodeState::InCycle { scc_index };
 371             WalkReturn::Complete { scc_index }
 372         } else {
 373             // We are not the head of the cycle. Return back to our
 374             // caller. They will take ownership of the
 375             // `self.successors` data that we pushed.
 376             self.node_states[node] = NodeState::InCycleWith { parent: min_cycle_root };
 377             WalkReturn::Cycle { min_depth }
 378         }
 379     }
 380 }