]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_data_structures/src/graph/scc/mod.rs
New upstream version 1.57.0+dfsg1
[rustc.git] / compiler / rustc_data_structures / src / graph / scc / mod.rs
1 //! Routine to compute the strongly connected components (SCCs) of a graph.
2 //!
3 //! Also computes as the resulting DAG if each SCC is replaced with a
4 //! node in the graph. This uses [Tarjan's algorithm](
5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
6 //! that completes in *O*(*n*) time.
7
8 use crate::fx::FxHashSet;
9 use crate::graph::vec_graph::VecGraph;
10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
11 use rustc_index::vec::{Idx, IndexVec};
12 use std::ops::Range;
13
14 #[cfg(test)]
15 mod tests;
16
17 /// Strongly connected components (SCC) of a graph. The type `N` is
18 /// the index type for the graph nodes and `S` is the index type for
19 /// the SCCs. We can map from each node to the SCC that it
20 /// participates in, and we also have the successors of each SCC.
21 pub struct Sccs<N: Idx, S: Idx> {
22 /// For each node, what is the SCC index of the SCC to which it
23 /// belongs.
24 scc_indices: IndexVec<N, S>,
25
26 /// Data about each SCC.
27 scc_data: SccData<S>,
28 }
29
30 struct SccData<S: Idx> {
31 /// For each SCC, the range of `all_successors` where its
32 /// successors can be found.
33 ranges: IndexVec<S, Range<usize>>,
34
35 /// Contains the successors for all the Sccs, concatenated. The
36 /// range of indices corresponding to a given SCC is found in its
37 /// SccData.
38 all_successors: Vec<S>,
39 }
40
41 impl<N: Idx, S: Idx> Sccs<N, S> {
42 pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
43 SccsConstruction::construct(graph)
44 }
45
46 /// Returns the number of SCCs in the graph.
47 pub fn num_sccs(&self) -> usize {
48 self.scc_data.len()
49 }
50
51 /// Returns an iterator over the SCCs in the graph.
52 ///
53 /// The SCCs will be iterated in **dependency order** (or **post order**),
54 /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
55 /// This is convenient when the edges represent dependencies: when you visit
56 /// `S1`, the value for `S2` will already have been computed.
57 pub fn all_sccs(&self) -> impl Iterator<Item = S> {
58 (0..self.scc_data.len()).map(S::new)
59 }
60
61 /// Returns the SCC to which a node `r` belongs.
62 pub fn scc(&self, r: N) -> S {
63 self.scc_indices[r]
64 }
65
66 /// Returns the successors of the given SCC.
67 pub fn successors(&self, scc: S) -> &[S] {
68 self.scc_data.successors(scc)
69 }
70
71 /// Construct the reverse graph of the SCC graph.
72 pub fn reverse(&self) -> VecGraph<S> {
73 VecGraph::new(
74 self.num_sccs(),
75 self.all_sccs()
76 .flat_map(|source| {
77 self.successors(source).iter().map(move |&target| (target, source))
78 })
79 .collect(),
80 )
81 }
82 }
83
84 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> {
85 type Node = S;
86 }
87
88 impl<N: Idx, S: Idx> WithNumNodes for Sccs<N, S> {
89 fn num_nodes(&self) -> usize {
90 self.num_sccs()
91 }
92 }
93
94 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> {
95 fn num_edges(&self) -> usize {
96 self.scc_data.all_successors.len()
97 }
98 }
99
100 impl<N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> {
101 type Item = S;
102
103 type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>;
104 }
105
106 impl<N: Idx, S: Idx> WithSuccessors for Sccs<N, S> {
107 fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter {
108 self.successors(node).iter().cloned()
109 }
110 }
111
112 impl<S: Idx> SccData<S> {
113 /// Number of SCCs,
114 fn len(&self) -> usize {
115 self.ranges.len()
116 }
117
118 /// Returns the successors of the given SCC.
119 fn successors(&self, scc: S) -> &[S] {
120 // Annoyingly, `range` does not implement `Copy`, so we have
121 // to do `range.start..range.end`:
122 let range = &self.ranges[scc];
123 &self.all_successors[range.start..range.end]
124 }
125
126 /// Creates a new SCC with `successors` as its successors and
127 /// returns the resulting index.
128 fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
129 // Store the successors on `scc_successors_vec`, remembering
130 // the range of indices.
131 let all_successors_start = self.all_successors.len();
132 self.all_successors.extend(successors);
133 let all_successors_end = self.all_successors.len();
134
135 debug!(
136 "create_scc({:?}) successors={:?}",
137 self.ranges.len(),
138 &self.all_successors[all_successors_start..all_successors_end],
139 );
140
141 self.ranges.push(all_successors_start..all_successors_end)
142 }
143 }
144
145 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> {
146 graph: &'c G,
147
148 /// The state of each node; used during walk to record the stack
149 /// and after walk to record what cycle each node ended up being
150 /// in.
151 node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
152
153 /// The stack of nodes that we are visiting as part of the DFS.
154 node_stack: Vec<G::Node>,
155
156 /// The stack of successors: as we visit a node, we mark our
157 /// position in this stack, and when we encounter a successor SCC,
158 /// we push it on the stack. When we complete an SCC, we can pop
159 /// everything off the stack that was found along the way.
160 successors_stack: Vec<S>,
161
162 /// A set used to strip duplicates. As we accumulate successors
163 /// into the successors_stack, we sometimes get duplicate entries.
164 /// We use this set to remove those -- we also keep its storage
165 /// around between successors to amortize memory allocation costs.
166 duplicate_set: FxHashSet<S>,
167
168 scc_data: SccData<S>,
169 }
170
171 #[derive(Copy, Clone, Debug)]
172 enum NodeState<N, S> {
173 /// This node has not yet been visited as part of the DFS.
174 ///
175 /// After SCC construction is complete, this state ought to be
176 /// impossible.
177 NotVisited,
178
179 /// This node is currently being walk as part of our DFS. It is on
180 /// the stack at the depth `depth`.
181 ///
182 /// After SCC construction is complete, this state ought to be
183 /// impossible.
184 BeingVisited { depth: usize },
185
186 /// Indicates that this node is a member of the given cycle.
187 InCycle { scc_index: S },
188
189 /// Indicates that this node is a member of whatever cycle
190 /// `parent` is a member of. This state is transient: whenever we
191 /// see it, we try to overwrite it with the current state of
192 /// `parent` (this is the "path compression" step of a union-find
193 /// algorithm).
194 InCycleWith { parent: N },
195 }
196
197 #[derive(Copy, Clone, Debug)]
198 enum WalkReturn<S> {
199 Cycle { min_depth: usize },
200 Complete { scc_index: S },
201 }
202
203 impl<'c, G, S> SccsConstruction<'c, G, S>
204 where
205 G: DirectedGraph + WithNumNodes + WithSuccessors,
206 S: Idx,
207 {
208 /// Identifies SCCs in the graph `G` and computes the resulting
209 /// DAG. This uses a variant of [Tarjan's
210 /// algorithm][wikipedia]. The high-level summary of the algorithm
211 /// is that we do a depth-first search. Along the way, we keep a
212 /// stack of each node whose successors are being visited. We
213 /// track the depth of each node on this stack (there is no depth
214 /// if the node is not on the stack). When we find that some node
215 /// N with depth D can reach some other node N' with lower depth
216 /// D' (i.e., D' < D), we know that N, N', and all nodes in
217 /// between them on the stack are part of an SCC.
218 ///
219 /// [wikipedia]: https://bit.ly/2EZIx84
220 fn construct(graph: &'c G) -> Sccs<G::Node, S> {
221 let num_nodes = graph.num_nodes();
222
223 let mut this = Self {
224 graph,
225 node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
226 node_stack: Vec::with_capacity(num_nodes),
227 successors_stack: Vec::new(),
228 scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
229 duplicate_set: FxHashSet::default(),
230 };
231
232 let scc_indices = (0..num_nodes)
233 .map(G::Node::new)
234 .map(|node| match this.start_walk_from(node) {
235 WalkReturn::Complete { scc_index } => scc_index,
236 WalkReturn::Cycle { min_depth } => panic!(
237 "`start_walk_node({:?})` returned cycle with depth {:?}",
238 node, min_depth
239 ),
240 })
241 .collect();
242
243 Sccs { scc_indices, scc_data: this.scc_data }
244 }
245
246 fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> {
247 if let Some(result) = self.inspect_node(node) {
248 result
249 } else {
250 self.walk_unvisited_node(node)
251 }
252 }
253
254 /// Inspect a node during the DFS. We first examine its current
255 /// state -- if it is not yet visited (`NotVisited`), return `None` so
256 /// that the caller might push it onto the stack and start walking its
257 /// successors.
258 ///
259 /// If it is already on the DFS stack it will be in the state
260 /// `BeingVisited`. In that case, we have found a cycle and we
261 /// return the depth from the stack.
262 ///
263 /// Otherwise, we are looking at a node that has already been
264 /// completely visited. We therefore return `WalkReturn::Complete`
265 /// with its associated SCC index.
266 fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> {
267 Some(match self.find_state(node) {
268 NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
269
270 NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
271
272 NodeState::NotVisited => return None,
273
274 NodeState::InCycleWith { parent } => panic!(
275 "`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
276 parent
277 ),
278 })
279 }
280
281 /// Fetches the state of the node `r`. If `r` is recorded as being
282 /// in a cycle with some other node `r2`, then fetches the state
283 /// of `r2` (and updates `r` to reflect current result). This is
284 /// basically the "find" part of a standard union-find algorithm
285 /// (with path compression).
286 fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> {
287 // To avoid recursion we temporarily reuse the `parent` of each
288 // InCycleWith link to encode a downwards link while compressing
289 // the path. After we have found the root or deepest node being
290 // visited, we traverse the reverse links and correct the node
291 // states on the way.
292 //
293 // **Note**: This mutation requires that this is a leaf function
294 // or at least that none of the called functions inspects the
295 // current node states. Luckily, we are a leaf.
296
297 // Remember one previous link. The termination condition when
298 // following links downwards is then simply as soon as we have
299 // found the initial self-loop.
300 let mut previous_node = node;
301
302 // Ultimately assigned by the parent when following
303 // `InCycleWith` upwards.
304 let node_state = loop {
305 debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]);
306 match self.node_states[node] {
307 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index },
308 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth },
309 NodeState::NotVisited => break NodeState::NotVisited,
310 NodeState::InCycleWith { parent } => {
311 // We test this, to be extremely sure that we never
312 // ever break our termination condition for the
313 // reverse iteration loop.
314 assert!(node != parent, "Node can not be in cycle with itself");
315 // Store the previous node as an inverted list link
316 self.node_states[node] = NodeState::InCycleWith { parent: previous_node };
317 // Update to parent node.
318 previous_node = node;
319 node = parent;
320 }
321 }
322 };
323
324 // The states form a graph where up to one outgoing link is stored at
325 // each node. Initially in general,
326 //
327 // E
328 // ^
329 // |
330 // InCycleWith/BeingVisited/NotVisited
331 // |
332 // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
333 // |
334 // = node, previous_node
335 //
336 // After the first loop, this will look like
337 // E
338 // ^
339 // |
340 // InCycleWith/BeingVisited/NotVisited
341 // |
342 // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
343 // | | | |
344 // | InCycleWith | = node
345 // +-+ =previous_node
346 //
347 // Note in particular that A will be linked to itself in a self-cycle
348 // and no other self-cycles occur due to how InCycleWith is assigned in
349 // the find phase implemented by `walk_unvisited_node`.
350 //
351 // We now want to compress the path, that is assign the state of the
352 // link D-E to all other links.
353 //
354 // We can then walk backwards, starting from `previous_node`, and assign
355 // each node in the list with the updated state. The loop terminates
356 // when we reach the self-cycle.
357
358 // Move backwards until we found the node where we started. We
359 // will know when we hit the state where previous_node == node.
360 loop {
361 // Back at the beginning, we can return.
362 if previous_node == node {
363 return node_state;
364 }
365 // Update to previous node in the link.
366 match self.node_states[previous_node] {
367 NodeState::InCycleWith { parent: previous } => {
368 node = previous_node;
369 previous_node = previous;
370 }
371 // Only InCycleWith nodes were added to the reverse linked list.
372 other => panic!("Invalid previous link while compressing cycle: {:?}", other),
373 }
374
375 debug!("find_state: parent_state = {:?}", node_state);
376
377 // Update the node state from the parent state. The assigned
378 // state is actually a loop invariant but it will only be
379 // evaluated if there is at least one backlink to follow.
380 // Fully trusting llvm here to find this loop optimization.
381 match node_state {
382 // Path compression, make current node point to the same root.
383 NodeState::InCycle { .. } => {
384 self.node_states[node] = node_state;
385 }
386 // Still visiting nodes, compress to cycle to the node
387 // at that depth.
388 NodeState::BeingVisited { depth } => {
389 self.node_states[node] =
390 NodeState::InCycleWith { parent: self.node_stack[depth] };
391 }
392 // These are never allowed as parent nodes. InCycleWith
393 // should have been followed to a real parent and
394 // NotVisited can not be part of a cycle since it should
395 // have instead gotten explored.
396 NodeState::NotVisited | NodeState::InCycleWith { .. } => {
397 panic!("invalid parent state: {:?}", node_state)
398 }
399 }
400 }
401 }
402
403 /// Walks a node that has never been visited before.
404 ///
405 /// Call this method when `inspect_node` has returned `None`. Having the
406 /// caller decide avoids mutual recursion between the two methods and allows
407 /// us to maintain an allocated stack for nodes on the path between calls.
408 #[instrument(skip(self, initial), level = "debug")]
409 fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> {
410 struct VisitingNodeFrame<G: DirectedGraph, Successors> {
411 node: G::Node,
412 iter: Option<Successors>,
413 depth: usize,
414 min_depth: usize,
415 successors_len: usize,
416 min_cycle_root: G::Node,
417 successor_node: G::Node,
418 }
419
420 // Move the stack to a local variable. We want to utilize the existing allocation and
421 // mutably borrow it without borrowing self at the same time.
422 let mut successors_stack = core::mem::take(&mut self.successors_stack);
423 debug_assert_eq!(successors_stack.len(), 0);
424
425 let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame {
426 node: initial,
427 depth: 0,
428 min_depth: 0,
429 iter: None,
430 successors_len: 0,
431 min_cycle_root: initial,
432 successor_node: initial,
433 }];
434
435 let mut return_value = None;
436
437 'recurse: while let Some(frame) = stack.last_mut() {
438 let VisitingNodeFrame {
439 node,
440 depth,
441 iter,
442 successors_len,
443 min_depth,
444 min_cycle_root,
445 successor_node,
446 } = frame;
447
448 let node = *node;
449 let depth = *depth;
450
451 let successors = match iter {
452 Some(iter) => iter,
453 None => {
454 // This None marks that we still have the initialize this node's frame.
455 debug!(?depth, ?node);
456
457 debug_assert!(matches!(self.node_states[node], NodeState::NotVisited));
458
459 // Push `node` onto the stack.
460 self.node_states[node] = NodeState::BeingVisited { depth };
461 self.node_stack.push(node);
462
463 // Walk each successor of the node, looking to see if any of
464 // them can reach a node that is presently on the stack. If
465 // so, that means they can also reach us.
466 *successors_len = successors_stack.len();
467 // Set and return a reference, this is currently empty.
468 iter.get_or_insert(self.graph.successors(node))
469 }
470 };
471
472 // Now that iter is initialized, this is a constant for this frame.
473 let successors_len = *successors_len;
474
475 // Construct iterators for the nodes and walk results. There are two cases:
476 // * The walk of a successor node returned.
477 // * The remaining successor nodes.
478 let returned_walk =
479 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk)));
480
481 let successor_walk = successors.by_ref().map(|successor_node| {
482 debug!(?node, ?successor_node);
483 (successor_node, self.inspect_node(successor_node))
484 });
485
486 for (successor_node, walk) in returned_walk.chain(successor_walk) {
487 match walk {
488 Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => {
489 // Track the minimum depth we can reach.
490 assert!(successor_min_depth <= depth);
491 if successor_min_depth < *min_depth {
492 debug!(?node, ?successor_min_depth);
493 *min_depth = successor_min_depth;
494 *min_cycle_root = successor_node;
495 }
496 }
497
498 Some(WalkReturn::Complete { scc_index: successor_scc_index }) => {
499 // Push the completed SCC indices onto
500 // the `successors_stack` for later.
501 debug!(?node, ?successor_scc_index);
502 successors_stack.push(successor_scc_index);
503 }
504
505 None => {
506 let depth = depth + 1;
507 debug!(?depth, ?successor_node);
508 // Remember which node the return value will come from.
509 frame.successor_node = successor_node;
510 // Start a new stack frame the step into it.
511 stack.push(VisitingNodeFrame {
512 node: successor_node,
513 depth,
514 iter: None,
515 successors_len: 0,
516 min_depth: depth,
517 min_cycle_root: successor_node,
518 successor_node,
519 });
520 continue 'recurse;
521 }
522 }
523 }
524
525 // Completed walk, remove `node` from the stack.
526 let r = self.node_stack.pop();
527 debug_assert_eq!(r, Some(node));
528
529 // Remove the frame, it's done.
530 let frame = stack.pop().unwrap();
531
532 // If `min_depth == depth`, then we are the root of the
533 // cycle: we can't reach anyone further down the stack.
534
535 // Pass the 'return value' down the stack.
536 // We return one frame at a time so there can't be another return value.
537 debug_assert!(return_value.is_none());
538 return_value = Some(if frame.min_depth == depth {
539 // Note that successor stack may have duplicates, so we
540 // want to remove those:
541 let deduplicated_successors = {
542 let duplicate_set = &mut self.duplicate_set;
543 duplicate_set.clear();
544 successors_stack
545 .drain(successors_len..)
546 .filter(move |&i| duplicate_set.insert(i))
547 };
548 let scc_index = self.scc_data.create_scc(deduplicated_successors);
549 self.node_states[node] = NodeState::InCycle { scc_index };
550 WalkReturn::Complete { scc_index }
551 } else {
552 // We are not the head of the cycle. Return back to our
553 // caller. They will take ownership of the
554 // `self.successors` data that we pushed.
555 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root };
556 WalkReturn::Cycle { min_depth: frame.min_depth }
557 });
558 }
559
560 // Keep the allocation we used for successors_stack.
561 self.successors_stack = successors_stack;
562 debug_assert_eq!(self.successors_stack.len(), 0);
563
564 return_value.unwrap()
565 }
566 }