1 //! Routine to compute the strongly connected components (SCCs) of a graph.
3 //! Also computes as the resulting DAG if each SCC is replaced with a
4 //! node in the graph. This uses [Tarjan's algorithm](
5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
6 //! that completes in *O*(*n*) time.
8 use crate::fx
::FxHashSet
;
9 use crate::graph
::vec_graph
::VecGraph
;
10 use crate::graph
::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}
;
11 use rustc_index
::vec
::{Idx, IndexVec}
;
17 /// Strongly connected components (SCC) of a graph. The type `N` is
18 /// the index type for the graph nodes and `S` is the index type for
19 /// the SCCs. We can map from each node to the SCC that it
20 /// participates in, and we also have the successors of each SCC.
21 pub struct Sccs
<N
: Idx
, S
: Idx
> {
22 /// For each node, what is the SCC index of the SCC to which it
24 scc_indices
: IndexVec
<N
, S
>,
26 /// Data about each SCC.
30 struct SccData
<S
: Idx
> {
31 /// For each SCC, the range of `all_successors` where its
32 /// successors can be found.
33 ranges
: IndexVec
<S
, Range
<usize>>,
35 /// Contains the successors for all the Sccs, concatenated. The
36 /// range of indices corresponding to a given SCC is found in its
38 all_successors
: Vec
<S
>,
41 impl<N
: Idx
, S
: Idx
> Sccs
<N
, S
> {
42 pub fn new(graph
: &(impl DirectedGraph
<Node
= N
> + WithNumNodes
+ WithSuccessors
)) -> Self {
43 SccsConstruction
::construct(graph
)
46 /// Returns the number of SCCs in the graph.
47 pub fn num_sccs(&self) -> usize {
51 /// Returns an iterator over the SCCs in the graph.
53 /// The SCCs will be iterated in **dependency order** (or **post order**),
54 /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
55 /// This is convenient when the edges represent dependencies: when you visit
56 /// `S1`, the value for `S2` will already have been computed.
57 pub fn all_sccs(&self) -> impl Iterator
<Item
= S
> {
58 (0..self.scc_data
.len()).map(S
::new
)
61 /// Returns the SCC to which a node `r` belongs.
62 pub fn scc(&self, r
: N
) -> S
{
66 /// Returns the successors of the given SCC.
67 pub fn successors(&self, scc
: S
) -> &[S
] {
68 self.scc_data
.successors(scc
)
71 /// Construct the reverse graph of the SCC graph.
72 pub fn reverse(&self) -> VecGraph
<S
> {
77 self.successors(source
).iter().map(move |&target
| (target
, source
))
84 impl<N
: Idx
, S
: Idx
> DirectedGraph
for Sccs
<N
, S
> {
88 impl<N
: Idx
, S
: Idx
> WithNumNodes
for Sccs
<N
, S
> {
89 fn num_nodes(&self) -> usize {
94 impl<N
: Idx
, S
: Idx
> WithNumEdges
for Sccs
<N
, S
> {
95 fn num_edges(&self) -> usize {
96 self.scc_data
.all_successors
.len()
100 impl<N
: Idx
, S
: Idx
> GraphSuccessors
<'graph
> for Sccs
<N
, S
> {
103 type Iter
= std
::iter
::Cloned
<std
::slice
::Iter
<'graph
, S
>>;
106 impl<N
: Idx
, S
: Idx
> WithSuccessors
for Sccs
<N
, S
> {
107 fn successors(&self, node
: S
) -> <Self as GraphSuccessors
<'_
>>::Iter
{
108 self.successors(node
).iter().cloned()
112 impl<S
: Idx
> SccData
<S
> {
114 fn len(&self) -> usize {
118 /// Returns the successors of the given SCC.
119 fn successors(&self, scc
: S
) -> &[S
] {
120 // Annoyingly, `range` does not implement `Copy`, so we have
121 // to do `range.start..range.end`:
122 let range
= &self.ranges
[scc
];
123 &self.all_successors
[range
.start
..range
.end
]
126 /// Creates a new SCC with `successors` as its successors and
127 /// returns the resulting index.
128 fn create_scc(&mut self, successors
: impl IntoIterator
<Item
= S
>) -> S
{
129 // Store the successors on `scc_successors_vec`, remembering
130 // the range of indices.
131 let all_successors_start
= self.all_successors
.len();
132 self.all_successors
.extend(successors
);
133 let all_successors_end
= self.all_successors
.len();
136 "create_scc({:?}) successors={:?}",
138 &self.all_successors
[all_successors_start
..all_successors_end
],
141 self.ranges
.push(all_successors_start
..all_successors_end
)
145 struct SccsConstruction
<'c
, G
: DirectedGraph
+ WithNumNodes
+ WithSuccessors
, S
: Idx
> {
148 /// The state of each node; used during walk to record the stack
149 /// and after walk to record what cycle each node ended up being
151 node_states
: IndexVec
<G
::Node
, NodeState
<G
::Node
, S
>>,
153 /// The stack of nodes that we are visiting as part of the DFS.
154 node_stack
: Vec
<G
::Node
>,
156 /// The stack of successors: as we visit a node, we mark our
157 /// position in this stack, and when we encounter a successor SCC,
158 /// we push it on the stack. When we complete an SCC, we can pop
159 /// everything off the stack that was found along the way.
160 successors_stack
: Vec
<S
>,
162 /// A set used to strip duplicates. As we accumulate successors
163 /// into the successors_stack, we sometimes get duplicate entries.
164 /// We use this set to remove those -- we also keep its storage
165 /// around between successors to amortize memory allocation costs.
166 duplicate_set
: FxHashSet
<S
>,
168 scc_data
: SccData
<S
>,
171 #[derive(Copy, Clone, Debug)]
172 enum NodeState
<N
, S
> {
173 /// This node has not yet been visited as part of the DFS.
175 /// After SCC construction is complete, this state ought to be
179 /// This node is currently being walk as part of our DFS. It is on
180 /// the stack at the depth `depth`.
182 /// After SCC construction is complete, this state ought to be
184 BeingVisited { depth: usize }
,
186 /// Indicates that this node is a member of the given cycle.
187 InCycle { scc_index: S }
,
189 /// Indicates that this node is a member of whatever cycle
190 /// `parent` is a member of. This state is transient: whenever we
191 /// see it, we try to overwrite it with the current state of
192 /// `parent` (this is the "path compression" step of a union-find
194 InCycleWith { parent: N }
,
197 #[derive(Copy, Clone, Debug)]
199 Cycle { min_depth: usize }
,
200 Complete { scc_index: S }
,
203 impl<'c
, G
, S
> SccsConstruction
<'c
, G
, S
>
205 G
: DirectedGraph
+ WithNumNodes
+ WithSuccessors
,
208 /// Identifies SCCs in the graph `G` and computes the resulting
209 /// DAG. This uses a variant of [Tarjan's
210 /// algorithm][wikipedia]. The high-level summary of the algorithm
211 /// is that we do a depth-first search. Along the way, we keep a
212 /// stack of each node whose successors are being visited. We
213 /// track the depth of each node on this stack (there is no depth
214 /// if the node is not on the stack). When we find that some node
215 /// N with depth D can reach some other node N' with lower depth
216 /// D' (i.e., D' < D), we know that N, N', and all nodes in
217 /// between them on the stack are part of an SCC.
219 /// [wikipedia]: https://bit.ly/2EZIx84
220 fn construct(graph
: &'c G
) -> Sccs
<G
::Node
, S
> {
221 let num_nodes
= graph
.num_nodes();
223 let mut this
= Self {
225 node_states
: IndexVec
::from_elem_n(NodeState
::NotVisited
, num_nodes
),
226 node_stack
: Vec
::with_capacity(num_nodes
),
227 successors_stack
: Vec
::new(),
228 scc_data
: SccData { ranges: IndexVec::new(), all_successors: Vec::new() }
,
229 duplicate_set
: FxHashSet
::default(),
232 let scc_indices
= (0..num_nodes
)
234 .map(|node
| match this
.start_walk_from(node
) {
235 WalkReturn
::Complete { scc_index }
=> scc_index
,
236 WalkReturn
::Cycle { min_depth }
=> panic
!(
237 "`start_walk_node({:?})` returned cycle with depth {:?}",
243 Sccs { scc_indices, scc_data: this.scc_data }
246 fn start_walk_from(&mut self, node
: G
::Node
) -> WalkReturn
<S
> {
247 if let Some(result
) = self.inspect_node(node
) {
250 self.walk_unvisited_node(node
)
254 /// Inspect a node during the DFS. We first examine its current
255 /// state -- if it is not yet visited (`NotVisited`), return `None` so
256 /// that the caller might push it onto the stack and start walking its
259 /// If it is already on the DFS stack it will be in the state
260 /// `BeingVisited`. In that case, we have found a cycle and we
261 /// return the depth from the stack.
263 /// Otherwise, we are looking at a node that has already been
264 /// completely visited. We therefore return `WalkReturn::Complete`
265 /// with its associated SCC index.
266 fn inspect_node(&mut self, node
: G
::Node
) -> Option
<WalkReturn
<S
>> {
267 Some(match self.find_state(node
) {
268 NodeState
::InCycle { scc_index }
=> WalkReturn
::Complete { scc_index }
,
270 NodeState
::BeingVisited { depth: min_depth }
=> WalkReturn
::Cycle { min_depth }
,
272 NodeState
::NotVisited
=> return None
,
274 NodeState
::InCycleWith { parent }
=> panic
!(
275 "`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
281 /// Fetches the state of the node `r`. If `r` is recorded as being
282 /// in a cycle with some other node `r2`, then fetches the state
283 /// of `r2` (and updates `r` to reflect current result). This is
284 /// basically the "find" part of a standard union-find algorithm
285 /// (with path compression).
286 fn find_state(&mut self, mut node
: G
::Node
) -> NodeState
<G
::Node
, S
> {
287 // To avoid recursion we temporarily reuse the `parent` of each
288 // InCycleWith link to encode a downwards link while compressing
289 // the path. After we have found the root or deepest node being
290 // visited, we traverse the reverse links and correct the node
291 // states on the way.
293 // **Note**: This mutation requires that this is a leaf function
294 // or at least that none of the called functions inspects the
295 // current node states. Luckily, we are a leaf.
297 // Remember one previous link. The termination condition when
298 // following links downwards is then simply as soon as we have
299 // found the initial self-loop.
300 let mut previous_node
= node
;
302 // Ultimately assigned by the parent when following
303 // `InCycleWith` upwards.
304 let node_state
= loop {
305 debug
!("find_state(r = {:?} in state {:?})", node
, self.node_states
[node
]);
306 match self.node_states
[node
] {
307 NodeState
::InCycle { scc_index }
=> break NodeState
::InCycle { scc_index }
,
308 NodeState
::BeingVisited { depth }
=> break NodeState
::BeingVisited { depth }
,
309 NodeState
::NotVisited
=> break NodeState
::NotVisited
,
310 NodeState
::InCycleWith { parent }
=> {
311 // We test this, to be extremely sure that we never
312 // ever break our termination condition for the
313 // reverse iteration loop.
314 assert
!(node
!= parent
, "Node can not be in cycle with itself");
315 // Store the previous node as an inverted list link
316 self.node_states
[node
] = NodeState
::InCycleWith { parent: previous_node }
;
317 // Update to parent node.
318 previous_node
= node
;
324 // The states form a graph where up to one outgoing link is stored at
325 // each node. Initially in general,
330 // InCycleWith/BeingVisited/NotVisited
332 // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
334 // = node, previous_node
336 // After the first loop, this will look like
340 // InCycleWith/BeingVisited/NotVisited
342 // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
344 // | InCycleWith | = node
345 // +-+ =previous_node
347 // Note in particular that A will be linked to itself in a self-cycle
348 // and no other self-cycles occur due to how InCycleWith is assigned in
349 // the find phase implemented by `walk_unvisited_node`.
351 // We now want to compress the path, that is assign the state of the
352 // link D-E to all other links.
354 // We can then walk backwards, starting from `previous_node`, and assign
355 // each node in the list with the updated state. The loop terminates
356 // when we reach the self-cycle.
358 // Move backwards until we found the node where we started. We
359 // will know when we hit the state where previous_node == node.
361 // Back at the beginning, we can return.
362 if previous_node
== node
{
365 // Update to previous node in the link.
366 match self.node_states
[previous_node
] {
367 NodeState
::InCycleWith { parent: previous }
=> {
368 node
= previous_node
;
369 previous_node
= previous
;
371 // Only InCycleWith nodes were added to the reverse linked list.
372 other
=> panic
!("Invalid previous link while compressing cycle: {:?}", other
),
375 debug
!("find_state: parent_state = {:?}", node_state
);
377 // Update the node state from the parent state. The assigned
378 // state is actually a loop invariant but it will only be
379 // evaluated if there is at least one backlink to follow.
380 // Fully trusting llvm here to find this loop optimization.
382 // Path compression, make current node point to the same root.
383 NodeState
::InCycle { .. }
=> {
384 self.node_states
[node
] = node_state
;
386 // Still visiting nodes, compress to cycle to the node
388 NodeState
::BeingVisited { depth }
=> {
389 self.node_states
[node
] =
390 NodeState
::InCycleWith { parent: self.node_stack[depth] }
;
392 // These are never allowed as parent nodes. InCycleWith
393 // should have been followed to a real parent and
394 // NotVisited can not be part of a cycle since it should
395 // have instead gotten explored.
396 NodeState
::NotVisited
| NodeState
::InCycleWith { .. }
=> {
397 panic
!("invalid parent state: {:?}", node_state
)
403 /// Walks a node that has never been visited before.
405 /// Call this method when `inspect_node` has returned `None`. Having the
406 /// caller decide avoids mutual recursion between the two methods and allows
407 /// us to maintain an allocated stack for nodes on the path between calls.
408 #[instrument(skip(self, initial), level = "debug")]
409 fn walk_unvisited_node(&mut self, initial
: G
::Node
) -> WalkReturn
<S
> {
410 struct VisitingNodeFrame
<G
: DirectedGraph
, Successors
> {
412 iter
: Option
<Successors
>,
415 successors_len
: usize,
416 min_cycle_root
: G
::Node
,
417 successor_node
: G
::Node
,
420 // Move the stack to a local variable. We want to utilize the existing allocation and
421 // mutably borrow it without borrowing self at the same time.
422 let mut successors_stack
= core
::mem
::take(&mut self.successors_stack
);
423 debug_assert_eq
!(successors_stack
.len(), 0);
425 let mut stack
: Vec
<VisitingNodeFrame
<G
, _
>> = vec
![VisitingNodeFrame
{
431 min_cycle_root
: initial
,
432 successor_node
: initial
,
435 let mut return_value
= None
;
437 'recurse
: while let Some(frame
) = stack
.last_mut() {
438 let VisitingNodeFrame
{
451 let successors
= match iter
{
454 // This None marks that we still have the initialize this node's frame.
455 debug
!(?depth
, ?node
);
457 debug_assert
!(matches
!(self.node_states
[node
], NodeState
::NotVisited
));
459 // Push `node` onto the stack.
460 self.node_states
[node
] = NodeState
::BeingVisited { depth }
;
461 self.node_stack
.push(node
);
463 // Walk each successor of the node, looking to see if any of
464 // them can reach a node that is presently on the stack. If
465 // so, that means they can also reach us.
466 *successors_len
= successors_stack
.len();
467 // Set and return a reference, this is currently empty.
468 iter
.get_or_insert(self.graph
.successors(node
))
472 // Now that iter is initialized, this is a constant for this frame.
473 let successors_len
= *successors_len
;
475 // Construct iterators for the nodes and walk results. There are two cases:
476 // * The walk of a successor node returned.
477 // * The remaining successor nodes.
479 return_value
.take().into_iter().map(|walk
| (*successor_node
, Some(walk
)));
481 let successor_walk
= successors
.by_ref().map(|successor_node
| {
482 debug
!(?node
, ?successor_node
);
483 (successor_node
, self.inspect_node(successor_node
))
486 for (successor_node
, walk
) in returned_walk
.chain(successor_walk
) {
488 Some(WalkReturn
::Cycle { min_depth: successor_min_depth }
) => {
489 // Track the minimum depth we can reach.
490 assert
!(successor_min_depth
<= depth
);
491 if successor_min_depth
< *min_depth
{
492 debug
!(?node
, ?successor_min_depth
);
493 *min_depth
= successor_min_depth
;
494 *min_cycle_root
= successor_node
;
498 Some(WalkReturn
::Complete { scc_index: successor_scc_index }
) => {
499 // Push the completed SCC indices onto
500 // the `successors_stack` for later.
501 debug
!(?node
, ?successor_scc_index
);
502 successors_stack
.push(successor_scc_index
);
506 let depth
= depth
+ 1;
507 debug
!(?depth
, ?successor_node
);
508 // Remember which node the return value will come from.
509 frame
.successor_node
= successor_node
;
510 // Start a new stack frame the step into it.
511 stack
.push(VisitingNodeFrame
{
512 node
: successor_node
,
517 min_cycle_root
: successor_node
,
525 // Completed walk, remove `node` from the stack.
526 let r
= self.node_stack
.pop();
527 debug_assert_eq
!(r
, Some(node
));
529 // Remove the frame, it's done.
530 let frame
= stack
.pop().unwrap();
532 // If `min_depth == depth`, then we are the root of the
533 // cycle: we can't reach anyone further down the stack.
535 // Pass the 'return value' down the stack.
536 // We return one frame at a time so there can't be another return value.
537 debug_assert
!(return_value
.is_none());
538 return_value
= Some(if frame
.min_depth
== depth
{
539 // Note that successor stack may have duplicates, so we
540 // want to remove those:
541 let deduplicated_successors
= {
542 let duplicate_set
= &mut self.duplicate_set
;
543 duplicate_set
.clear();
545 .drain(successors_len
..)
546 .filter(move |&i
| duplicate_set
.insert(i
))
548 let scc_index
= self.scc_data
.create_scc(deduplicated_successors
);
549 self.node_states
[node
] = NodeState
::InCycle { scc_index }
;
550 WalkReturn
::Complete { scc_index }
552 // We are not the head of the cycle. Return back to our
553 // caller. They will take ownership of the
554 // `self.successors` data that we pushed.
555 self.node_states
[node
] = NodeState
::InCycleWith { parent: frame.min_cycle_root }
;
556 WalkReturn
::Cycle { min_depth: frame.min_depth }
560 // Keep the allocation we used for successors_stack.
561 self.successors_stack
= successors_stack
;
562 debug_assert_eq
!(self.successors_stack
.len(), 0);
564 return_value
.unwrap()