1 //! Routine to compute the strongly connected components (SCCs) of a graph.
3 //! Also computes as the resulting DAG if each SCC is replaced with a
4 //! node in the graph. This uses [Tarjan's algorithm](
5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
6 //! that completes in *O*(*n*) time.
8 use crate::fx
::FxHashSet
;
9 use crate::graph
::vec_graph
::VecGraph
;
10 use crate::graph
::{DirectedGraph, NumEdges, Successors}
;
11 use rustc_index
::{Idx, IndexSlice, IndexVec}
;
13 use tracing
::{debug, instrument}
;
18 /// Strongly connected components (SCC) of a graph. The type `N` is
19 /// the index type for the graph nodes and `S` is the index type for
20 /// the SCCs. We can map from each node to the SCC that it
21 /// participates in, and we also have the successors of each SCC.
22 pub struct Sccs
<N
: Idx
, S
: Idx
> {
23 /// For each node, what is the SCC index of the SCC to which it
25 scc_indices
: IndexVec
<N
, S
>,
27 /// Data about each SCC.
31 pub struct SccData
<S
: Idx
> {
32 /// For each SCC, the range of `all_successors` where its
33 /// successors can be found.
34 ranges
: IndexVec
<S
, Range
<usize>>,
36 /// Contains the successors for all the Sccs, concatenated. The
37 /// range of indices corresponding to a given SCC is found in its
39 all_successors
: Vec
<S
>,
42 impl<N
: Idx
, S
: Idx
+ Ord
> Sccs
<N
, S
> {
43 pub fn new(graph
: &impl Successors
<Node
= N
>) -> Self {
44 SccsConstruction
::construct(graph
)
47 pub fn scc_indices(&self) -> &IndexSlice
<N
, S
> {
51 pub fn scc_data(&self) -> &SccData
<S
> {
55 /// Returns the number of SCCs in the graph.
56 pub fn num_sccs(&self) -> usize {
60 /// Returns an iterator over the SCCs in the graph.
62 /// The SCCs will be iterated in **dependency order** (or **post order**),
63 /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
64 /// This is convenient when the edges represent dependencies: when you visit
65 /// `S1`, the value for `S2` will already have been computed.
66 pub fn all_sccs(&self) -> impl Iterator
<Item
= S
> {
67 (0..self.scc_data
.len()).map(S
::new
)
70 /// Returns the SCC to which a node `r` belongs.
71 pub fn scc(&self, r
: N
) -> S
{
75 /// Returns the successors of the given SCC.
76 pub fn successors(&self, scc
: S
) -> &[S
] {
77 self.scc_data
.successors(scc
)
80 /// Construct the reverse graph of the SCC graph.
81 pub fn reverse(&self) -> VecGraph
<S
> {
86 self.successors(source
).iter().map(move |&target
| (target
, source
))
93 impl<N
: Idx
, S
: Idx
+ Ord
> DirectedGraph
for Sccs
<N
, S
> {
96 fn num_nodes(&self) -> usize {
101 impl<N
: Idx
, S
: Idx
+ Ord
> NumEdges
for Sccs
<N
, S
> {
102 fn num_edges(&self) -> usize {
103 self.scc_data
.all_successors
.len()
107 impl<N
: Idx
, S
: Idx
+ Ord
> Successors
for Sccs
<N
, S
> {
108 fn successors(&self, node
: S
) -> impl Iterator
<Item
= Self::Node
> {
109 self.successors(node
).iter().cloned()
113 impl<S
: Idx
> SccData
<S
> {
115 fn len(&self) -> usize {
119 pub fn ranges(&self) -> &IndexSlice
<S
, Range
<usize>> {
123 pub fn all_successors(&self) -> &Vec
<S
> {
127 /// Returns the successors of the given SCC.
128 fn successors(&self, scc
: S
) -> &[S
] {
129 // Annoyingly, `range` does not implement `Copy`, so we have
130 // to do `range.start..range.end`:
131 let range
= &self.ranges
[scc
];
132 &self.all_successors
[range
.start
..range
.end
]
135 /// Creates a new SCC with `successors` as its successors and
136 /// returns the resulting index.
137 fn create_scc(&mut self, successors
: impl IntoIterator
<Item
= S
>) -> S
{
138 // Store the successors on `scc_successors_vec`, remembering
139 // the range of indices.
140 let all_successors_start
= self.all_successors
.len();
141 self.all_successors
.extend(successors
);
142 let all_successors_end
= self.all_successors
.len();
145 "create_scc({:?}) successors={:?}",
147 &self.all_successors
[all_successors_start
..all_successors_end
],
150 self.ranges
.push(all_successors_start
..all_successors_end
)
154 struct SccsConstruction
<'c
, G
: DirectedGraph
+ Successors
, S
: Idx
> {
157 /// The state of each node; used during walk to record the stack
158 /// and after walk to record what cycle each node ended up being
160 node_states
: IndexVec
<G
::Node
, NodeState
<G
::Node
, S
>>,
162 /// The stack of nodes that we are visiting as part of the DFS.
163 node_stack
: Vec
<G
::Node
>,
165 /// The stack of successors: as we visit a node, we mark our
166 /// position in this stack, and when we encounter a successor SCC,
167 /// we push it on the stack. When we complete an SCC, we can pop
168 /// everything off the stack that was found along the way.
169 successors_stack
: Vec
<S
>,
171 /// A set used to strip duplicates. As we accumulate successors
172 /// into the successors_stack, we sometimes get duplicate entries.
173 /// We use this set to remove those -- we also keep its storage
174 /// around between successors to amortize memory allocation costs.
175 duplicate_set
: FxHashSet
<S
>,
177 scc_data
: SccData
<S
>,
180 #[derive(Copy, Clone, Debug)]
181 enum NodeState
<N
, S
> {
182 /// This node has not yet been visited as part of the DFS.
184 /// After SCC construction is complete, this state ought to be
188 /// This node is currently being walk as part of our DFS. It is on
189 /// the stack at the depth `depth`.
191 /// After SCC construction is complete, this state ought to be
193 BeingVisited { depth: usize }
,
195 /// Indicates that this node is a member of the given cycle.
196 InCycle { scc_index: S }
,
198 /// Indicates that this node is a member of whatever cycle
199 /// `parent` is a member of. This state is transient: whenever we
200 /// see it, we try to overwrite it with the current state of
201 /// `parent` (this is the "path compression" step of a union-find
203 InCycleWith { parent: N }
,
206 #[derive(Copy, Clone, Debug)]
208 Cycle { min_depth: usize }
,
209 Complete { scc_index: S }
,
212 impl<'c
, G
, S
> SccsConstruction
<'c
, G
, S
>
214 G
: DirectedGraph
+ Successors
,
217 /// Identifies SCCs in the graph `G` and computes the resulting
218 /// DAG. This uses a variant of [Tarjan's
219 /// algorithm][wikipedia]. The high-level summary of the algorithm
220 /// is that we do a depth-first search. Along the way, we keep a
221 /// stack of each node whose successors are being visited. We
222 /// track the depth of each node on this stack (there is no depth
223 /// if the node is not on the stack). When we find that some node
224 /// N with depth D can reach some other node N' with lower depth
225 /// D' (i.e., D' < D), we know that N, N', and all nodes in
226 /// between them on the stack are part of an SCC.
228 /// [wikipedia]: https://bit.ly/2EZIx84
229 fn construct(graph
: &'c G
) -> Sccs
<G
::Node
, S
> {
230 let num_nodes
= graph
.num_nodes();
232 let mut this
= Self {
234 node_states
: IndexVec
::from_elem_n(NodeState
::NotVisited
, num_nodes
),
235 node_stack
: Vec
::with_capacity(num_nodes
),
236 successors_stack
: Vec
::new(),
237 scc_data
: SccData { ranges: IndexVec::new(), all_successors: Vec::new() }
,
238 duplicate_set
: FxHashSet
::default(),
241 let scc_indices
= (0..num_nodes
)
243 .map(|node
| match this
.start_walk_from(node
) {
244 WalkReturn
::Complete { scc_index }
=> scc_index
,
245 WalkReturn
::Cycle { min_depth }
=> {
246 panic
!("`start_walk_node({node:?})` returned cycle with depth {min_depth:?}")
251 Sccs { scc_indices, scc_data: this.scc_data }
254 fn start_walk_from(&mut self, node
: G
::Node
) -> WalkReturn
<S
> {
255 if let Some(result
) = self.inspect_node(node
) {
258 self.walk_unvisited_node(node
)
262 /// Inspect a node during the DFS. We first examine its current
263 /// state -- if it is not yet visited (`NotVisited`), return `None` so
264 /// that the caller might push it onto the stack and start walking its
267 /// If it is already on the DFS stack it will be in the state
268 /// `BeingVisited`. In that case, we have found a cycle and we
269 /// return the depth from the stack.
271 /// Otherwise, we are looking at a node that has already been
272 /// completely visited. We therefore return `WalkReturn::Complete`
273 /// with its associated SCC index.
274 fn inspect_node(&mut self, node
: G
::Node
) -> Option
<WalkReturn
<S
>> {
275 Some(match self.find_state(node
) {
276 NodeState
::InCycle { scc_index }
=> WalkReturn
::Complete { scc_index }
,
278 NodeState
::BeingVisited { depth: min_depth }
=> WalkReturn
::Cycle { min_depth }
,
280 NodeState
::NotVisited
=> return None
,
282 NodeState
::InCycleWith { parent }
=> panic
!(
283 "`find_state` returned `InCycleWith({parent:?})`, which ought to be impossible"
288 /// Fetches the state of the node `r`. If `r` is recorded as being
289 /// in a cycle with some other node `r2`, then fetches the state
290 /// of `r2` (and updates `r` to reflect current result). This is
291 /// basically the "find" part of a standard union-find algorithm
292 /// (with path compression).
293 fn find_state(&mut self, mut node
: G
::Node
) -> NodeState
<G
::Node
, S
> {
294 // To avoid recursion we temporarily reuse the `parent` of each
295 // InCycleWith link to encode a downwards link while compressing
296 // the path. After we have found the root or deepest node being
297 // visited, we traverse the reverse links and correct the node
298 // states on the way.
300 // **Note**: This mutation requires that this is a leaf function
301 // or at least that none of the called functions inspects the
302 // current node states. Luckily, we are a leaf.
304 // Remember one previous link. The termination condition when
305 // following links downwards is then simply as soon as we have
306 // found the initial self-loop.
307 let mut previous_node
= node
;
309 // Ultimately assigned by the parent when following
310 // `InCycleWith` upwards.
311 let node_state
= loop {
312 debug
!("find_state(r = {:?} in state {:?})", node
, self.node_states
[node
]);
313 match self.node_states
[node
] {
314 NodeState
::InCycle { scc_index }
=> break NodeState
::InCycle { scc_index }
,
315 NodeState
::BeingVisited { depth }
=> break NodeState
::BeingVisited { depth }
,
316 NodeState
::NotVisited
=> break NodeState
::NotVisited
,
317 NodeState
::InCycleWith { parent }
=> {
318 // We test this, to be extremely sure that we never
319 // ever break our termination condition for the
320 // reverse iteration loop.
321 assert
!(node
!= parent
, "Node can not be in cycle with itself");
322 // Store the previous node as an inverted list link
323 self.node_states
[node
] = NodeState
::InCycleWith { parent: previous_node }
;
324 // Update to parent node.
325 previous_node
= node
;
331 // The states form a graph where up to one outgoing link is stored at
332 // each node. Initially in general,
337 // InCycleWith/BeingVisited/NotVisited
339 // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
341 // = node, previous_node
343 // After the first loop, this will look like
347 // InCycleWith/BeingVisited/NotVisited
349 // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
351 // | InCycleWith | = node
352 // +-+ =previous_node
354 // Note in particular that A will be linked to itself in a self-cycle
355 // and no other self-cycles occur due to how InCycleWith is assigned in
356 // the find phase implemented by `walk_unvisited_node`.
358 // We now want to compress the path, that is assign the state of the
359 // link D-E to all other links.
361 // We can then walk backwards, starting from `previous_node`, and assign
362 // each node in the list with the updated state. The loop terminates
363 // when we reach the self-cycle.
365 // Move backwards until we found the node where we started. We
366 // will know when we hit the state where previous_node == node.
368 // Back at the beginning, we can return.
369 if previous_node
== node
{
372 // Update to previous node in the link.
373 match self.node_states
[previous_node
] {
374 NodeState
::InCycleWith { parent: previous }
=> {
375 node
= previous_node
;
376 previous_node
= previous
;
378 // Only InCycleWith nodes were added to the reverse linked list.
379 other
=> panic
!("Invalid previous link while compressing cycle: {other:?}"),
382 debug
!("find_state: parent_state = {:?}", node_state
);
384 // Update the node state from the parent state. The assigned
385 // state is actually a loop invariant but it will only be
386 // evaluated if there is at least one backlink to follow.
387 // Fully trusting llvm here to find this loop optimization.
389 // Path compression, make current node point to the same root.
390 NodeState
::InCycle { .. }
=> {
391 self.node_states
[node
] = node_state
;
393 // Still visiting nodes, compress to cycle to the node
395 NodeState
::BeingVisited { depth }
=> {
396 self.node_states
[node
] =
397 NodeState
::InCycleWith { parent: self.node_stack[depth] }
;
399 // These are never allowed as parent nodes. InCycleWith
400 // should have been followed to a real parent and
401 // NotVisited can not be part of a cycle since it should
402 // have instead gotten explored.
403 NodeState
::NotVisited
| NodeState
::InCycleWith { .. }
=> {
404 panic
!("invalid parent state: {node_state:?}")
410 /// Walks a node that has never been visited before.
412 /// Call this method when `inspect_node` has returned `None`. Having the
413 /// caller decide avoids mutual recursion between the two methods and allows
414 /// us to maintain an allocated stack for nodes on the path between calls.
415 #[instrument(skip(self, initial), level = "debug")]
416 fn walk_unvisited_node(&mut self, initial
: G
::Node
) -> WalkReturn
<S
> {
417 struct VisitingNodeFrame
<G
: DirectedGraph
, Successors
> {
419 iter
: Option
<Successors
>,
422 successors_len
: usize,
423 min_cycle_root
: G
::Node
,
424 successor_node
: G
::Node
,
427 // Move the stack to a local variable. We want to utilize the existing allocation and
428 // mutably borrow it without borrowing self at the same time.
429 let mut successors_stack
= core
::mem
::take(&mut self.successors_stack
);
430 debug_assert_eq
!(successors_stack
.len(), 0);
432 let mut stack
: Vec
<VisitingNodeFrame
<G
, _
>> = vec
![VisitingNodeFrame
{
438 min_cycle_root
: initial
,
439 successor_node
: initial
,
442 let mut return_value
= None
;
444 'recurse
: while let Some(frame
) = stack
.last_mut() {
445 let VisitingNodeFrame
{
458 let successors
= match iter
{
461 // This None marks that we still have the initialize this node's frame.
462 debug
!(?depth
, ?node
);
464 debug_assert
!(matches
!(self.node_states
[node
], NodeState
::NotVisited
));
466 // Push `node` onto the stack.
467 self.node_states
[node
] = NodeState
::BeingVisited { depth }
;
468 self.node_stack
.push(node
);
470 // Walk each successor of the node, looking to see if any of
471 // them can reach a node that is presently on the stack. If
472 // so, that means they can also reach us.
473 *successors_len
= successors_stack
.len();
474 // Set and return a reference, this is currently empty.
475 iter
.get_or_insert(self.graph
.successors(node
))
479 // Now that iter is initialized, this is a constant for this frame.
480 let successors_len
= *successors_len
;
482 // Construct iterators for the nodes and walk results. There are two cases:
483 // * The walk of a successor node returned.
484 // * The remaining successor nodes.
486 return_value
.take().into_iter().map(|walk
| (*successor_node
, Some(walk
)));
488 let successor_walk
= successors
.map(|successor_node
| {
489 debug
!(?node
, ?successor_node
);
490 (successor_node
, self.inspect_node(successor_node
))
493 for (successor_node
, walk
) in returned_walk
.chain(successor_walk
) {
495 Some(WalkReturn
::Cycle { min_depth: successor_min_depth }
) => {
496 // Track the minimum depth we can reach.
497 assert
!(successor_min_depth
<= depth
);
498 if successor_min_depth
< *min_depth
{
499 debug
!(?node
, ?successor_min_depth
);
500 *min_depth
= successor_min_depth
;
501 *min_cycle_root
= successor_node
;
505 Some(WalkReturn
::Complete { scc_index: successor_scc_index }
) => {
506 // Push the completed SCC indices onto
507 // the `successors_stack` for later.
508 debug
!(?node
, ?successor_scc_index
);
509 successors_stack
.push(successor_scc_index
);
513 let depth
= depth
+ 1;
514 debug
!(?depth
, ?successor_node
);
515 // Remember which node the return value will come from.
516 frame
.successor_node
= successor_node
;
517 // Start a new stack frame the step into it.
518 stack
.push(VisitingNodeFrame
{
519 node
: successor_node
,
524 min_cycle_root
: successor_node
,
532 // Completed walk, remove `node` from the stack.
533 let r
= self.node_stack
.pop();
534 debug_assert_eq
!(r
, Some(node
));
536 // Remove the frame, it's done.
537 let frame
= stack
.pop().unwrap();
539 // If `min_depth == depth`, then we are the root of the
540 // cycle: we can't reach anyone further down the stack.
542 // Pass the 'return value' down the stack.
543 // We return one frame at a time so there can't be another return value.
544 debug_assert
!(return_value
.is_none());
545 return_value
= Some(if frame
.min_depth
== depth
{
546 // Note that successor stack may have duplicates, so we
547 // want to remove those:
548 let deduplicated_successors
= {
549 let duplicate_set
= &mut self.duplicate_set
;
550 duplicate_set
.clear();
552 .drain(successors_len
..)
553 .filter(move |&i
| duplicate_set
.insert(i
))
555 let scc_index
= self.scc_data
.create_scc(deduplicated_successors
);
556 self.node_states
[node
] = NodeState
::InCycle { scc_index }
;
557 WalkReturn
::Complete { scc_index }
559 // We are not the head of the cycle. Return back to our
560 // caller. They will take ownership of the
561 // `self.successors` data that we pushed.
562 self.node_states
[node
] = NodeState
::InCycleWith { parent: frame.min_cycle_root }
;
563 WalkReturn
::Cycle { min_depth: frame.min_depth }
567 // Keep the allocation we used for successors_stack.
568 self.successors_stack
= successors_stack
;
569 debug_assert_eq
!(self.successors_stack
.len(), 0);
571 return_value
.unwrap()