]>
Commit | Line | Data |
---|---|---|
8faf50e0 XL |
1 | //! Routine to compute the strongly connected components (SCCs) of a |
2 | //! graph, as well as the resulting DAG if each SCC is replaced with a | |
3 | //! node in the graph. This uses Tarjan's algorithm that completes in | |
4 | //! O(n) time. | |
5 | ||
9fa01778 | 6 | use crate::fx::FxHashSet; |
dc9dc135 | 7 | use crate::graph::vec_graph::VecGraph; |
dfeec247 | 8 | use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}; |
e74abb32 | 9 | use rustc_index::vec::{Idx, IndexVec}; |
8faf50e0 XL |
10 | use std::ops::Range; |
11 | ||
416331ca XL |
12 | #[cfg(test)] |
13 | mod tests; | |
8faf50e0 XL |
14 | |
15 | /// Strongly connected components (SCC) of a graph. The type `N` is | |
16 | /// the index type for the graph nodes and `S` is the index type for | |
17 | /// the SCCs. We can map from each node to the SCC that it | |
18 | /// participates in, and we also have the successors of each SCC. | |
19 | pub struct Sccs<N: Idx, S: Idx> { | |
20 | /// For each node, what is the SCC index of the SCC to which it | |
21 | /// belongs. | |
22 | scc_indices: IndexVec<N, S>, | |
23 | ||
24 | /// Data about each SCC. | |
25 | scc_data: SccData<S>, | |
26 | } | |
27 | ||
28 | struct SccData<S: Idx> { | |
29 | /// For each SCC, the range of `all_successors` where its | |
30 | /// successors can be found. | |
31 | ranges: IndexVec<S, Range<usize>>, | |
32 | ||
a1dfa0c6 | 33 | /// Contains the successors for all the Sccs, concatenated. The |
8faf50e0 XL |
34 | /// range of indices corresponding to a given SCC is found in its |
35 | /// SccData. | |
36 | all_successors: Vec<S>, | |
37 | } | |
38 | ||
39 | impl<N: Idx, S: Idx> Sccs<N, S> { | |
40 | pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self { | |
41 | SccsConstruction::construct(graph) | |
42 | } | |
43 | ||
44 | /// Returns the number of SCCs in the graph. | |
45 | pub fn num_sccs(&self) -> usize { | |
46 | self.scc_data.len() | |
47 | } | |
48 | ||
49 | /// Returns an iterator over the SCCs in the graph. | |
50 | pub fn all_sccs(&self) -> impl Iterator<Item = S> { | |
dfeec247 | 51 | (0..self.scc_data.len()).map(S::new) |
8faf50e0 XL |
52 | } |
53 | ||
54 | /// Returns the SCC to which a node `r` belongs. | |
55 | pub fn scc(&self, r: N) -> S { | |
56 | self.scc_indices[r] | |
57 | } | |
58 | ||
59 | /// Returns the successors of the given SCC. | |
60 | pub fn successors(&self, scc: S) -> &[S] { | |
61 | self.scc_data.successors(scc) | |
62 | } | |
dc9dc135 XL |
63 | |
64 | /// Construct the reverse graph of the SCC graph. | |
65 | pub fn reverse(&self) -> VecGraph<S> { | |
66 | VecGraph::new( | |
67 | self.num_sccs(), | |
68 | self.all_sccs() | |
dfeec247 XL |
69 | .flat_map(|source| { |
70 | self.successors(source).iter().map(move |&target| (target, source)) | |
71 | }) | |
dc9dc135 XL |
72 | .collect(), |
73 | ) | |
74 | } | |
75 | } | |
76 | ||
77 | impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> { | |
78 | type Node = S; | |
79 | } | |
80 | ||
81 | impl<N: Idx, S: Idx> WithNumNodes for Sccs<N, S> { | |
82 | fn num_nodes(&self) -> usize { | |
83 | self.num_sccs() | |
84 | } | |
85 | } | |
86 | ||
87 | impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> { | |
88 | fn num_edges(&self) -> usize { | |
89 | self.scc_data.all_successors.len() | |
90 | } | |
91 | } | |
92 | ||
93 | impl<N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> { | |
94 | type Item = S; | |
95 | ||
96 | type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>; | |
97 | } | |
98 | ||
99 | impl<N: Idx, S: Idx> WithSuccessors for Sccs<N, S> { | |
ba9703b0 | 100 | fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter { |
dc9dc135 XL |
101 | self.successors(node).iter().cloned() |
102 | } | |
8faf50e0 XL |
103 | } |
104 | ||
105 | impl<S: Idx> SccData<S> { | |
106 | /// Number of SCCs, | |
107 | fn len(&self) -> usize { | |
108 | self.ranges.len() | |
109 | } | |
110 | ||
111 | /// Returns the successors of the given SCC. | |
112 | fn successors(&self, scc: S) -> &[S] { | |
113 | // Annoyingly, `range` does not implement `Copy`, so we have | |
114 | // to do `range.start..range.end`: | |
115 | let range = &self.ranges[scc]; | |
116 | &self.all_successors[range.start..range.end] | |
117 | } | |
118 | ||
119 | /// Creates a new SCC with `successors` as its successors and | |
120 | /// returns the resulting index. | |
121 | fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S { | |
122 | // Store the successors on `scc_successors_vec`, remembering | |
123 | // the range of indices. | |
124 | let all_successors_start = self.all_successors.len(); | |
125 | self.all_successors.extend(successors); | |
126 | let all_successors_end = self.all_successors.len(); | |
127 | ||
128 | debug!( | |
129 | "create_scc({:?}) successors={:?}", | |
130 | self.ranges.len(), | |
131 | &self.all_successors[all_successors_start..all_successors_end], | |
132 | ); | |
133 | ||
134 | self.ranges.push(all_successors_start..all_successors_end) | |
135 | } | |
136 | } | |
137 | ||
9fa01778 | 138 | struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> { |
8faf50e0 XL |
139 | graph: &'c G, |
140 | ||
141 | /// The state of each node; used during walk to record the stack | |
142 | /// and after walk to record what cycle each node ended up being | |
143 | /// in. | |
144 | node_states: IndexVec<G::Node, NodeState<G::Node, S>>, | |
145 | ||
146 | /// The stack of nodes that we are visiting as part of the DFS. | |
147 | node_stack: Vec<G::Node>, | |
148 | ||
149 | /// The stack of successors: as we visit a node, we mark our | |
150 | /// position in this stack, and when we encounter a successor SCC, | |
151 | /// we push it on the stack. When we complete an SCC, we can pop | |
152 | /// everything off the stack that was found along the way. | |
153 | successors_stack: Vec<S>, | |
154 | ||
155 | /// A set used to strip duplicates. As we accumulate successors | |
156 | /// into the successors_stack, we sometimes get duplicate entries. | |
157 | /// We use this set to remove those -- we also keep its storage | |
158 | /// around between successors to amortize memory allocation costs. | |
159 | duplicate_set: FxHashSet<S>, | |
160 | ||
161 | scc_data: SccData<S>, | |
162 | } | |
163 | ||
164 | #[derive(Copy, Clone, Debug)] | |
165 | enum NodeState<N, S> { | |
166 | /// This node has not yet been visited as part of the DFS. | |
167 | /// | |
168 | /// After SCC construction is complete, this state ought to be | |
169 | /// impossible. | |
170 | NotVisited, | |
171 | ||
172 | /// This node is currently being walk as part of our DFS. It is on | |
173 | /// the stack at the depth `depth`. | |
174 | /// | |
175 | /// After SCC construction is complete, this state ought to be | |
176 | /// impossible. | |
177 | BeingVisited { depth: usize }, | |
178 | ||
179 | /// Indicates that this node is a member of the given cycle. | |
180 | InCycle { scc_index: S }, | |
181 | ||
182 | /// Indicates that this node is a member of whatever cycle | |
183 | /// `parent` is a member of. This state is transient: whenever we | |
184 | /// see it, we try to overwrite it with the current state of | |
185 | /// `parent` (this is the "path compression" step of a union-find | |
186 | /// algorithm). | |
187 | InCycleWith { parent: N }, | |
188 | } | |
189 | ||
190 | #[derive(Copy, Clone, Debug)] | |
191 | enum WalkReturn<S> { | |
192 | Cycle { min_depth: usize }, | |
193 | Complete { scc_index: S }, | |
194 | } | |
195 | ||
196 | impl<'c, G, S> SccsConstruction<'c, G, S> | |
197 | where | |
198 | G: DirectedGraph + WithNumNodes + WithSuccessors, | |
199 | S: Idx, | |
200 | { | |
201 | /// Identifies SCCs in the graph `G` and computes the resulting | |
202 | /// DAG. This uses a variant of [Tarjan's | |
203 | /// algorithm][wikipedia]. The high-level summary of the algorithm | |
204 | /// is that we do a depth-first search. Along the way, we keep a | |
205 | /// stack of each node whose successors are being visited. We | |
206 | /// track the depth of each node on this stack (there is no depth | |
207 | /// if the node is not on the stack). When we find that some node | |
208 | /// N with depth D can reach some other node N' with lower depth | |
209 | /// D' (i.e., D' < D), we know that N, N', and all nodes in | |
210 | /// between them on the stack are part of an SCC. | |
211 | /// | |
212 | /// [wikipedia]: https://bit.ly/2EZIx84 | |
213 | fn construct(graph: &'c G) -> Sccs<G::Node, S> { | |
214 | let num_nodes = graph.num_nodes(); | |
215 | ||
216 | let mut this = Self { | |
217 | graph, | |
218 | node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes), | |
219 | node_stack: Vec::with_capacity(num_nodes), | |
220 | successors_stack: Vec::new(), | |
dfeec247 | 221 | scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() }, |
8faf50e0 XL |
222 | duplicate_set: FxHashSet::default(), |
223 | }; | |
224 | ||
225 | let scc_indices = (0..num_nodes) | |
226 | .map(G::Node::new) | |
227 | .map(|node| match this.walk_node(0, node) { | |
228 | WalkReturn::Complete { scc_index } => scc_index, | |
dfeec247 XL |
229 | WalkReturn::Cycle { min_depth } => { |
230 | panic!("`walk_node(0, {:?})` returned cycle with depth {:?}", node, min_depth) | |
231 | } | |
8faf50e0 XL |
232 | }) |
233 | .collect(); | |
234 | ||
dfeec247 | 235 | Sccs { scc_indices, scc_data: this.scc_data } |
8faf50e0 XL |
236 | } |
237 | ||
9fa01778 | 238 | /// Visits a node during the DFS. We first examine its current |
8faf50e0 XL |
239 | /// state -- if it is not yet visited (`NotVisited`), we can push |
240 | /// it onto the stack and start walking its successors. | |
241 | /// | |
242 | /// If it is already on the DFS stack it will be in the state | |
243 | /// `BeingVisited`. In that case, we have found a cycle and we | |
244 | /// return the depth from the stack. | |
245 | /// | |
246 | /// Otherwise, we are looking at a node that has already been | |
247 | /// completely visited. We therefore return `WalkReturn::Complete` | |
248 | /// with its associated SCC index. | |
249 | fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> { | |
250 | debug!("walk_node(depth = {:?}, node = {:?})", depth, node); | |
251 | match self.find_state(node) { | |
252 | NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index }, | |
253 | ||
254 | NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth }, | |
255 | ||
256 | NodeState::NotVisited => self.walk_unvisited_node(depth, node), | |
257 | ||
258 | NodeState::InCycleWith { parent } => panic!( | |
259 | "`find_state` returned `InCycleWith({:?})`, which ought to be impossible", | |
260 | parent | |
261 | ), | |
262 | } | |
263 | } | |
264 | ||
265 | /// Fetches the state of the node `r`. If `r` is recorded as being | |
266 | /// in a cycle with some other node `r2`, then fetches the state | |
267 | /// of `r2` (and updates `r` to reflect current result). This is | |
268 | /// basically the "find" part of a standard union-find algorithm | |
269 | /// (with path compression). | |
270 | fn find_state(&mut self, r: G::Node) -> NodeState<G::Node, S> { | |
271 | debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]); | |
272 | match self.node_states[r] { | |
273 | NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index }, | |
274 | NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth }, | |
275 | NodeState::NotVisited => NodeState::NotVisited, | |
276 | NodeState::InCycleWith { parent } => { | |
277 | let parent_state = self.find_state(parent); | |
278 | debug!("find_state: parent_state = {:?}", parent_state); | |
279 | match parent_state { | |
280 | NodeState::InCycle { .. } => { | |
281 | self.node_states[r] = parent_state; | |
282 | parent_state | |
283 | } | |
284 | ||
285 | NodeState::BeingVisited { depth } => { | |
dfeec247 XL |
286 | self.node_states[r] = |
287 | NodeState::InCycleWith { parent: self.node_stack[depth] }; | |
8faf50e0 XL |
288 | parent_state |
289 | } | |
290 | ||
291 | NodeState::NotVisited | NodeState::InCycleWith { .. } => { | |
292 | panic!("invalid parent state: {:?}", parent_state) | |
293 | } | |
294 | } | |
295 | } | |
296 | } | |
297 | } | |
298 | ||
299 | /// Walks a node that has never been visited before. | |
300 | fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> { | |
dfeec247 | 301 | debug!("walk_unvisited_node(depth = {:?}, node = {:?})", depth, node); |
8faf50e0 XL |
302 | |
303 | debug_assert!(match self.node_states[node] { | |
304 | NodeState::NotVisited => true, | |
305 | _ => false, | |
306 | }); | |
307 | ||
308 | // Push `node` onto the stack. | |
309 | self.node_states[node] = NodeState::BeingVisited { depth }; | |
310 | self.node_stack.push(node); | |
311 | ||
312 | // Walk each successor of the node, looking to see if any of | |
313 | // them can reach a node that is presently on the stack. If | |
314 | // so, that means they can also reach us. | |
315 | let mut min_depth = depth; | |
316 | let mut min_cycle_root = node; | |
317 | let successors_len = self.successors_stack.len(); | |
318 | for successor_node in self.graph.successors(node) { | |
dfeec247 | 319 | debug!("walk_unvisited_node: node = {:?} successor_ode = {:?}", node, successor_node); |
8faf50e0 | 320 | match self.walk_node(depth + 1, successor_node) { |
dfeec247 | 321 | WalkReturn::Cycle { min_depth: successor_min_depth } => { |
8faf50e0 XL |
322 | // Track the minimum depth we can reach. |
323 | assert!(successor_min_depth <= depth); | |
324 | if successor_min_depth < min_depth { | |
325 | debug!( | |
326 | "walk_unvisited_node: node = {:?} successor_min_depth = {:?}", | |
327 | node, successor_min_depth | |
328 | ); | |
329 | min_depth = successor_min_depth; | |
330 | min_cycle_root = successor_node; | |
331 | } | |
332 | } | |
333 | ||
dfeec247 | 334 | WalkReturn::Complete { scc_index: successor_scc_index } => { |
8faf50e0 XL |
335 | // Push the completed SCC indices onto |
336 | // the `successors_stack` for later. | |
337 | debug!( | |
338 | "walk_unvisited_node: node = {:?} successor_scc_index = {:?}", | |
339 | node, successor_scc_index | |
340 | ); | |
341 | self.successors_stack.push(successor_scc_index); | |
342 | } | |
343 | } | |
344 | } | |
345 | ||
346 | // Completed walk, remove `node` from the stack. | |
347 | let r = self.node_stack.pop(); | |
348 | debug_assert_eq!(r, Some(node)); | |
349 | ||
350 | // If `min_depth == depth`, then we are the root of the | |
351 | // cycle: we can't reach anyone further down the stack. | |
352 | if min_depth == depth { | |
353 | // Note that successor stack may have duplicates, so we | |
354 | // want to remove those: | |
355 | let deduplicated_successors = { | |
356 | let duplicate_set = &mut self.duplicate_set; | |
357 | duplicate_set.clear(); | |
358 | self.successors_stack | |
359 | .drain(successors_len..) | |
360 | .filter(move |&i| duplicate_set.insert(i)) | |
361 | }; | |
362 | let scc_index = self.scc_data.create_scc(deduplicated_successors); | |
363 | self.node_states[node] = NodeState::InCycle { scc_index }; | |
364 | WalkReturn::Complete { scc_index } | |
365 | } else { | |
366 | // We are not the head of the cycle. Return back to our | |
367 | // caller. They will take ownership of the | |
368 | // `self.successors` data that we pushed. | |
dfeec247 | 369 | self.node_states[node] = NodeState::InCycleWith { parent: min_cycle_root }; |
8faf50e0 XL |
370 | WalkReturn::Cycle { min_depth } |
371 | } | |
372 | } | |
373 | } |