]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_data_structures/src/graph/scc/mod.rs
bump version to 1.80.1+dfsg1-1~bpo12+pve1
[rustc.git] / compiler / rustc_data_structures / src / graph / scc / mod.rs
1 //! Routine to compute the strongly connected components (SCCs) of a graph.
2 //!
3 //! Also computes as the resulting DAG if each SCC is replaced with a
4 //! node in the graph. This uses [Tarjan's algorithm](
5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
6 //! that completes in *O*(*n*) time.
7
8 use crate::fx::FxHashSet;
9 use crate::graph::vec_graph::VecGraph;
10 use crate::graph::{DirectedGraph, NumEdges, Successors};
11 use rustc_index::{Idx, IndexSlice, IndexVec};
12 use std::ops::Range;
13 use tracing::{debug, instrument};
14
15 #[cfg(test)]
16 mod tests;
17
18 /// Strongly connected components (SCC) of a graph. The type `N` is
19 /// the index type for the graph nodes and `S` is the index type for
20 /// the SCCs. We can map from each node to the SCC that it
21 /// participates in, and we also have the successors of each SCC.
22 pub struct Sccs<N: Idx, S: Idx> {
23 /// For each node, what is the SCC index of the SCC to which it
24 /// belongs.
25 scc_indices: IndexVec<N, S>,
26
27 /// Data about each SCC.
28 scc_data: SccData<S>,
29 }
30
31 pub struct SccData<S: Idx> {
32 /// For each SCC, the range of `all_successors` where its
33 /// successors can be found.
34 ranges: IndexVec<S, Range<usize>>,
35
36 /// Contains the successors for all the Sccs, concatenated. The
37 /// range of indices corresponding to a given SCC is found in its
38 /// SccData.
39 all_successors: Vec<S>,
40 }
41
42 impl<N: Idx, S: Idx + Ord> Sccs<N, S> {
43 pub fn new(graph: &impl Successors<Node = N>) -> Self {
44 SccsConstruction::construct(graph)
45 }
46
47 pub fn scc_indices(&self) -> &IndexSlice<N, S> {
48 &self.scc_indices
49 }
50
51 pub fn scc_data(&self) -> &SccData<S> {
52 &self.scc_data
53 }
54
55 /// Returns the number of SCCs in the graph.
56 pub fn num_sccs(&self) -> usize {
57 self.scc_data.len()
58 }
59
60 /// Returns an iterator over the SCCs in the graph.
61 ///
62 /// The SCCs will be iterated in **dependency order** (or **post order**),
63 /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
64 /// This is convenient when the edges represent dependencies: when you visit
65 /// `S1`, the value for `S2` will already have been computed.
66 pub fn all_sccs(&self) -> impl Iterator<Item = S> {
67 (0..self.scc_data.len()).map(S::new)
68 }
69
70 /// Returns the SCC to which a node `r` belongs.
71 pub fn scc(&self, r: N) -> S {
72 self.scc_indices[r]
73 }
74
75 /// Returns the successors of the given SCC.
76 pub fn successors(&self, scc: S) -> &[S] {
77 self.scc_data.successors(scc)
78 }
79
80 /// Construct the reverse graph of the SCC graph.
81 pub fn reverse(&self) -> VecGraph<S> {
82 VecGraph::new(
83 self.num_sccs(),
84 self.all_sccs()
85 .flat_map(|source| {
86 self.successors(source).iter().map(move |&target| (target, source))
87 })
88 .collect(),
89 )
90 }
91 }
92
93 impl<N: Idx, S: Idx + Ord> DirectedGraph for Sccs<N, S> {
94 type Node = S;
95
96 fn num_nodes(&self) -> usize {
97 self.num_sccs()
98 }
99 }
100
101 impl<N: Idx, S: Idx + Ord> NumEdges for Sccs<N, S> {
102 fn num_edges(&self) -> usize {
103 self.scc_data.all_successors.len()
104 }
105 }
106
107 impl<N: Idx, S: Idx + Ord> Successors for Sccs<N, S> {
108 fn successors(&self, node: S) -> impl Iterator<Item = Self::Node> {
109 self.successors(node).iter().cloned()
110 }
111 }
112
113 impl<S: Idx> SccData<S> {
114 /// Number of SCCs,
115 fn len(&self) -> usize {
116 self.ranges.len()
117 }
118
119 pub fn ranges(&self) -> &IndexSlice<S, Range<usize>> {
120 &self.ranges
121 }
122
123 pub fn all_successors(&self) -> &Vec<S> {
124 &self.all_successors
125 }
126
127 /// Returns the successors of the given SCC.
128 fn successors(&self, scc: S) -> &[S] {
129 // Annoyingly, `range` does not implement `Copy`, so we have
130 // to do `range.start..range.end`:
131 let range = &self.ranges[scc];
132 &self.all_successors[range.start..range.end]
133 }
134
135 /// Creates a new SCC with `successors` as its successors and
136 /// returns the resulting index.
137 fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
138 // Store the successors on `scc_successors_vec`, remembering
139 // the range of indices.
140 let all_successors_start = self.all_successors.len();
141 self.all_successors.extend(successors);
142 let all_successors_end = self.all_successors.len();
143
144 debug!(
145 "create_scc({:?}) successors={:?}",
146 self.ranges.len(),
147 &self.all_successors[all_successors_start..all_successors_end],
148 );
149
150 self.ranges.push(all_successors_start..all_successors_end)
151 }
152 }
153
154 struct SccsConstruction<'c, G: DirectedGraph + Successors, S: Idx> {
155 graph: &'c G,
156
157 /// The state of each node; used during walk to record the stack
158 /// and after walk to record what cycle each node ended up being
159 /// in.
160 node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
161
162 /// The stack of nodes that we are visiting as part of the DFS.
163 node_stack: Vec<G::Node>,
164
165 /// The stack of successors: as we visit a node, we mark our
166 /// position in this stack, and when we encounter a successor SCC,
167 /// we push it on the stack. When we complete an SCC, we can pop
168 /// everything off the stack that was found along the way.
169 successors_stack: Vec<S>,
170
171 /// A set used to strip duplicates. As we accumulate successors
172 /// into the successors_stack, we sometimes get duplicate entries.
173 /// We use this set to remove those -- we also keep its storage
174 /// around between successors to amortize memory allocation costs.
175 duplicate_set: FxHashSet<S>,
176
177 scc_data: SccData<S>,
178 }
179
180 #[derive(Copy, Clone, Debug)]
181 enum NodeState<N, S> {
182 /// This node has not yet been visited as part of the DFS.
183 ///
184 /// After SCC construction is complete, this state ought to be
185 /// impossible.
186 NotVisited,
187
188 /// This node is currently being walk as part of our DFS. It is on
189 /// the stack at the depth `depth`.
190 ///
191 /// After SCC construction is complete, this state ought to be
192 /// impossible.
193 BeingVisited { depth: usize },
194
195 /// Indicates that this node is a member of the given cycle.
196 InCycle { scc_index: S },
197
198 /// Indicates that this node is a member of whatever cycle
199 /// `parent` is a member of. This state is transient: whenever we
200 /// see it, we try to overwrite it with the current state of
201 /// `parent` (this is the "path compression" step of a union-find
202 /// algorithm).
203 InCycleWith { parent: N },
204 }
205
206 #[derive(Copy, Clone, Debug)]
207 enum WalkReturn<S> {
208 Cycle { min_depth: usize },
209 Complete { scc_index: S },
210 }
211
212 impl<'c, G, S> SccsConstruction<'c, G, S>
213 where
214 G: DirectedGraph + Successors,
215 S: Idx,
216 {
217 /// Identifies SCCs in the graph `G` and computes the resulting
218 /// DAG. This uses a variant of [Tarjan's
219 /// algorithm][wikipedia]. The high-level summary of the algorithm
220 /// is that we do a depth-first search. Along the way, we keep a
221 /// stack of each node whose successors are being visited. We
222 /// track the depth of each node on this stack (there is no depth
223 /// if the node is not on the stack). When we find that some node
224 /// N with depth D can reach some other node N' with lower depth
225 /// D' (i.e., D' < D), we know that N, N', and all nodes in
226 /// between them on the stack are part of an SCC.
227 ///
228 /// [wikipedia]: https://bit.ly/2EZIx84
229 fn construct(graph: &'c G) -> Sccs<G::Node, S> {
230 let num_nodes = graph.num_nodes();
231
232 let mut this = Self {
233 graph,
234 node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
235 node_stack: Vec::with_capacity(num_nodes),
236 successors_stack: Vec::new(),
237 scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
238 duplicate_set: FxHashSet::default(),
239 };
240
241 let scc_indices = (0..num_nodes)
242 .map(G::Node::new)
243 .map(|node| match this.start_walk_from(node) {
244 WalkReturn::Complete { scc_index } => scc_index,
245 WalkReturn::Cycle { min_depth } => {
246 panic!("`start_walk_node({node:?})` returned cycle with depth {min_depth:?}")
247 }
248 })
249 .collect();
250
251 Sccs { scc_indices, scc_data: this.scc_data }
252 }
253
254 fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> {
255 if let Some(result) = self.inspect_node(node) {
256 result
257 } else {
258 self.walk_unvisited_node(node)
259 }
260 }
261
262 /// Inspect a node during the DFS. We first examine its current
263 /// state -- if it is not yet visited (`NotVisited`), return `None` so
264 /// that the caller might push it onto the stack and start walking its
265 /// successors.
266 ///
267 /// If it is already on the DFS stack it will be in the state
268 /// `BeingVisited`. In that case, we have found a cycle and we
269 /// return the depth from the stack.
270 ///
271 /// Otherwise, we are looking at a node that has already been
272 /// completely visited. We therefore return `WalkReturn::Complete`
273 /// with its associated SCC index.
274 fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> {
275 Some(match self.find_state(node) {
276 NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
277
278 NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
279
280 NodeState::NotVisited => return None,
281
282 NodeState::InCycleWith { parent } => panic!(
283 "`find_state` returned `InCycleWith({parent:?})`, which ought to be impossible"
284 ),
285 })
286 }
287
288 /// Fetches the state of the node `r`. If `r` is recorded as being
289 /// in a cycle with some other node `r2`, then fetches the state
290 /// of `r2` (and updates `r` to reflect current result). This is
291 /// basically the "find" part of a standard union-find algorithm
292 /// (with path compression).
293 fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> {
294 // To avoid recursion we temporarily reuse the `parent` of each
295 // InCycleWith link to encode a downwards link while compressing
296 // the path. After we have found the root or deepest node being
297 // visited, we traverse the reverse links and correct the node
298 // states on the way.
299 //
300 // **Note**: This mutation requires that this is a leaf function
301 // or at least that none of the called functions inspects the
302 // current node states. Luckily, we are a leaf.
303
304 // Remember one previous link. The termination condition when
305 // following links downwards is then simply as soon as we have
306 // found the initial self-loop.
307 let mut previous_node = node;
308
309 // Ultimately assigned by the parent when following
310 // `InCycleWith` upwards.
311 let node_state = loop {
312 debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]);
313 match self.node_states[node] {
314 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index },
315 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth },
316 NodeState::NotVisited => break NodeState::NotVisited,
317 NodeState::InCycleWith { parent } => {
318 // We test this, to be extremely sure that we never
319 // ever break our termination condition for the
320 // reverse iteration loop.
321 assert!(node != parent, "Node can not be in cycle with itself");
322 // Store the previous node as an inverted list link
323 self.node_states[node] = NodeState::InCycleWith { parent: previous_node };
324 // Update to parent node.
325 previous_node = node;
326 node = parent;
327 }
328 }
329 };
330
331 // The states form a graph where up to one outgoing link is stored at
332 // each node. Initially in general,
333 //
334 // E
335 // ^
336 // |
337 // InCycleWith/BeingVisited/NotVisited
338 // |
339 // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
340 // |
341 // = node, previous_node
342 //
343 // After the first loop, this will look like
344 // E
345 // ^
346 // |
347 // InCycleWith/BeingVisited/NotVisited
348 // |
349 // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
350 // | | | |
351 // | InCycleWith | = node
352 // +-+ =previous_node
353 //
354 // Note in particular that A will be linked to itself in a self-cycle
355 // and no other self-cycles occur due to how InCycleWith is assigned in
356 // the find phase implemented by `walk_unvisited_node`.
357 //
358 // We now want to compress the path, that is assign the state of the
359 // link D-E to all other links.
360 //
361 // We can then walk backwards, starting from `previous_node`, and assign
362 // each node in the list with the updated state. The loop terminates
363 // when we reach the self-cycle.
364
365 // Move backwards until we found the node where we started. We
366 // will know when we hit the state where previous_node == node.
367 loop {
368 // Back at the beginning, we can return.
369 if previous_node == node {
370 return node_state;
371 }
372 // Update to previous node in the link.
373 match self.node_states[previous_node] {
374 NodeState::InCycleWith { parent: previous } => {
375 node = previous_node;
376 previous_node = previous;
377 }
378 // Only InCycleWith nodes were added to the reverse linked list.
379 other => panic!("Invalid previous link while compressing cycle: {other:?}"),
380 }
381
382 debug!("find_state: parent_state = {:?}", node_state);
383
384 // Update the node state from the parent state. The assigned
385 // state is actually a loop invariant but it will only be
386 // evaluated if there is at least one backlink to follow.
387 // Fully trusting llvm here to find this loop optimization.
388 match node_state {
389 // Path compression, make current node point to the same root.
390 NodeState::InCycle { .. } => {
391 self.node_states[node] = node_state;
392 }
393 // Still visiting nodes, compress to cycle to the node
394 // at that depth.
395 NodeState::BeingVisited { depth } => {
396 self.node_states[node] =
397 NodeState::InCycleWith { parent: self.node_stack[depth] };
398 }
399 // These are never allowed as parent nodes. InCycleWith
400 // should have been followed to a real parent and
401 // NotVisited can not be part of a cycle since it should
402 // have instead gotten explored.
403 NodeState::NotVisited | NodeState::InCycleWith { .. } => {
404 panic!("invalid parent state: {node_state:?}")
405 }
406 }
407 }
408 }
409
410 /// Walks a node that has never been visited before.
411 ///
412 /// Call this method when `inspect_node` has returned `None`. Having the
413 /// caller decide avoids mutual recursion between the two methods and allows
414 /// us to maintain an allocated stack for nodes on the path between calls.
415 #[instrument(skip(self, initial), level = "debug")]
416 fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> {
417 struct VisitingNodeFrame<G: DirectedGraph, Successors> {
418 node: G::Node,
419 iter: Option<Successors>,
420 depth: usize,
421 min_depth: usize,
422 successors_len: usize,
423 min_cycle_root: G::Node,
424 successor_node: G::Node,
425 }
426
427 // Move the stack to a local variable. We want to utilize the existing allocation and
428 // mutably borrow it without borrowing self at the same time.
429 let mut successors_stack = core::mem::take(&mut self.successors_stack);
430 debug_assert_eq!(successors_stack.len(), 0);
431
432 let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame {
433 node: initial,
434 depth: 0,
435 min_depth: 0,
436 iter: None,
437 successors_len: 0,
438 min_cycle_root: initial,
439 successor_node: initial,
440 }];
441
442 let mut return_value = None;
443
444 'recurse: while let Some(frame) = stack.last_mut() {
445 let VisitingNodeFrame {
446 node,
447 depth,
448 iter,
449 successors_len,
450 min_depth,
451 min_cycle_root,
452 successor_node,
453 } = frame;
454
455 let node = *node;
456 let depth = *depth;
457
458 let successors = match iter {
459 Some(iter) => iter,
460 None => {
461 // This None marks that we still have the initialize this node's frame.
462 debug!(?depth, ?node);
463
464 debug_assert!(matches!(self.node_states[node], NodeState::NotVisited));
465
466 // Push `node` onto the stack.
467 self.node_states[node] = NodeState::BeingVisited { depth };
468 self.node_stack.push(node);
469
470 // Walk each successor of the node, looking to see if any of
471 // them can reach a node that is presently on the stack. If
472 // so, that means they can also reach us.
473 *successors_len = successors_stack.len();
474 // Set and return a reference, this is currently empty.
475 iter.get_or_insert(self.graph.successors(node))
476 }
477 };
478
479 // Now that iter is initialized, this is a constant for this frame.
480 let successors_len = *successors_len;
481
482 // Construct iterators for the nodes and walk results. There are two cases:
483 // * The walk of a successor node returned.
484 // * The remaining successor nodes.
485 let returned_walk =
486 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk)));
487
488 let successor_walk = successors.map(|successor_node| {
489 debug!(?node, ?successor_node);
490 (successor_node, self.inspect_node(successor_node))
491 });
492
493 for (successor_node, walk) in returned_walk.chain(successor_walk) {
494 match walk {
495 Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => {
496 // Track the minimum depth we can reach.
497 assert!(successor_min_depth <= depth);
498 if successor_min_depth < *min_depth {
499 debug!(?node, ?successor_min_depth);
500 *min_depth = successor_min_depth;
501 *min_cycle_root = successor_node;
502 }
503 }
504
505 Some(WalkReturn::Complete { scc_index: successor_scc_index }) => {
506 // Push the completed SCC indices onto
507 // the `successors_stack` for later.
508 debug!(?node, ?successor_scc_index);
509 successors_stack.push(successor_scc_index);
510 }
511
512 None => {
513 let depth = depth + 1;
514 debug!(?depth, ?successor_node);
515 // Remember which node the return value will come from.
516 frame.successor_node = successor_node;
517 // Start a new stack frame the step into it.
518 stack.push(VisitingNodeFrame {
519 node: successor_node,
520 depth,
521 iter: None,
522 successors_len: 0,
523 min_depth: depth,
524 min_cycle_root: successor_node,
525 successor_node,
526 });
527 continue 'recurse;
528 }
529 }
530 }
531
532 // Completed walk, remove `node` from the stack.
533 let r = self.node_stack.pop();
534 debug_assert_eq!(r, Some(node));
535
536 // Remove the frame, it's done.
537 let frame = stack.pop().unwrap();
538
539 // If `min_depth == depth`, then we are the root of the
540 // cycle: we can't reach anyone further down the stack.
541
542 // Pass the 'return value' down the stack.
543 // We return one frame at a time so there can't be another return value.
544 debug_assert!(return_value.is_none());
545 return_value = Some(if frame.min_depth == depth {
546 // Note that successor stack may have duplicates, so we
547 // want to remove those:
548 let deduplicated_successors = {
549 let duplicate_set = &mut self.duplicate_set;
550 duplicate_set.clear();
551 successors_stack
552 .drain(successors_len..)
553 .filter(move |&i| duplicate_set.insert(i))
554 };
555 let scc_index = self.scc_data.create_scc(deduplicated_successors);
556 self.node_states[node] = NodeState::InCycle { scc_index };
557 WalkReturn::Complete { scc_index }
558 } else {
559 // We are not the head of the cycle. Return back to our
560 // caller. They will take ownership of the
561 // `self.successors` data that we pushed.
562 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root };
563 WalkReturn::Cycle { min_depth: frame.min_depth }
564 });
565 }
566
567 // Keep the allocation we used for successors_stack.
568 self.successors_stack = successors_stack;
569 debug_assert_eq!(self.successors_stack.len(), 0);
570
571 return_value.unwrap()
572 }
573 }