]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_data_structures/src/graph/scc/mod.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / compiler / rustc_data_structures / src / graph / scc / mod.rs
1 //! Routine to compute the strongly connected components (SCCs) of a graph.
2 //!
3 //! Also computes as the resulting DAG if each SCC is replaced with a
4 //! node in the graph. This uses [Tarjan's algorithm](
5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
6 //! that completes in *O(n)* time.
7
8 use crate::fx::FxHashSet;
9 use crate::graph::vec_graph::VecGraph;
10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
11 use rustc_index::vec::{Idx, IndexVec};
12 use std::ops::Range;
13
14 #[cfg(test)]
15 mod tests;
16
17 /// Strongly connected components (SCC) of a graph. The type `N` is
18 /// the index type for the graph nodes and `S` is the index type for
19 /// the SCCs. We can map from each node to the SCC that it
20 /// participates in, and we also have the successors of each SCC.
21 pub struct Sccs<N: Idx, S: Idx> {
22 /// For each node, what is the SCC index of the SCC to which it
23 /// belongs.
24 scc_indices: IndexVec<N, S>,
25
26 /// Data about each SCC.
27 scc_data: SccData<S>,
28 }
29
30 struct SccData<S: Idx> {
31 /// For each SCC, the range of `all_successors` where its
32 /// successors can be found.
33 ranges: IndexVec<S, Range<usize>>,
34
35 /// Contains the successors for all the Sccs, concatenated. The
36 /// range of indices corresponding to a given SCC is found in its
37 /// SccData.
38 all_successors: Vec<S>,
39 }
40
41 impl<N: Idx, S: Idx> Sccs<N, S> {
42 pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
43 SccsConstruction::construct(graph)
44 }
45
46 /// Returns the number of SCCs in the graph.
47 pub fn num_sccs(&self) -> usize {
48 self.scc_data.len()
49 }
50
51 /// Returns an iterator over the SCCs in the graph.
52 ///
53 /// The SCCs will be iterated in **dependency order** (or **post order**),
54 /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
55 /// This is convenient when the edges represent dependencies: when you visit
56 /// `S1`, the value for `S2` will already have been computed.
57 pub fn all_sccs(&self) -> impl Iterator<Item = S> {
58 (0..self.scc_data.len()).map(S::new)
59 }
60
61 /// Returns the SCC to which a node `r` belongs.
62 pub fn scc(&self, r: N) -> S {
63 self.scc_indices[r]
64 }
65
66 /// Returns the successors of the given SCC.
67 pub fn successors(&self, scc: S) -> &[S] {
68 self.scc_data.successors(scc)
69 }
70
71 /// Construct the reverse graph of the SCC graph.
72 pub fn reverse(&self) -> VecGraph<S> {
73 VecGraph::new(
74 self.num_sccs(),
75 self.all_sccs()
76 .flat_map(|source| {
77 self.successors(source).iter().map(move |&target| (target, source))
78 })
79 .collect(),
80 )
81 }
82 }
83
84 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> {
85 type Node = S;
86 }
87
88 impl<N: Idx, S: Idx> WithNumNodes for Sccs<N, S> {
89 fn num_nodes(&self) -> usize {
90 self.num_sccs()
91 }
92 }
93
94 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> {
95 fn num_edges(&self) -> usize {
96 self.scc_data.all_successors.len()
97 }
98 }
99
100 impl<N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> {
101 type Item = S;
102
103 type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>;
104 }
105
106 impl<N: Idx, S: Idx> WithSuccessors for Sccs<N, S> {
107 fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter {
108 self.successors(node).iter().cloned()
109 }
110 }
111
112 impl<S: Idx> SccData<S> {
113 /// Number of SCCs,
114 fn len(&self) -> usize {
115 self.ranges.len()
116 }
117
118 /// Returns the successors of the given SCC.
119 fn successors(&self, scc: S) -> &[S] {
120 // Annoyingly, `range` does not implement `Copy`, so we have
121 // to do `range.start..range.end`:
122 let range = &self.ranges[scc];
123 &self.all_successors[range.start..range.end]
124 }
125
126 /// Creates a new SCC with `successors` as its successors and
127 /// returns the resulting index.
128 fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
129 // Store the successors on `scc_successors_vec`, remembering
130 // the range of indices.
131 let all_successors_start = self.all_successors.len();
132 self.all_successors.extend(successors);
133 let all_successors_end = self.all_successors.len();
134
135 debug!(
136 "create_scc({:?}) successors={:?}",
137 self.ranges.len(),
138 &self.all_successors[all_successors_start..all_successors_end],
139 );
140
141 self.ranges.push(all_successors_start..all_successors_end)
142 }
143 }
144
145 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> {
146 graph: &'c G,
147
148 /// The state of each node; used during walk to record the stack
149 /// and after walk to record what cycle each node ended up being
150 /// in.
151 node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
152
153 /// The stack of nodes that we are visiting as part of the DFS.
154 node_stack: Vec<G::Node>,
155
156 /// The stack of successors: as we visit a node, we mark our
157 /// position in this stack, and when we encounter a successor SCC,
158 /// we push it on the stack. When we complete an SCC, we can pop
159 /// everything off the stack that was found along the way.
160 successors_stack: Vec<S>,
161
162 /// A set used to strip duplicates. As we accumulate successors
163 /// into the successors_stack, we sometimes get duplicate entries.
164 /// We use this set to remove those -- we also keep its storage
165 /// around between successors to amortize memory allocation costs.
166 duplicate_set: FxHashSet<S>,
167
168 scc_data: SccData<S>,
169 }
170
171 #[derive(Copy, Clone, Debug)]
172 enum NodeState<N, S> {
173 /// This node has not yet been visited as part of the DFS.
174 ///
175 /// After SCC construction is complete, this state ought to be
176 /// impossible.
177 NotVisited,
178
179 /// This node is currently being walk as part of our DFS. It is on
180 /// the stack at the depth `depth`.
181 ///
182 /// After SCC construction is complete, this state ought to be
183 /// impossible.
184 BeingVisited { depth: usize },
185
186 /// Indicates that this node is a member of the given cycle.
187 InCycle { scc_index: S },
188
189 /// Indicates that this node is a member of whatever cycle
190 /// `parent` is a member of. This state is transient: whenever we
191 /// see it, we try to overwrite it with the current state of
192 /// `parent` (this is the "path compression" step of a union-find
193 /// algorithm).
194 InCycleWith { parent: N },
195 }
196
197 #[derive(Copy, Clone, Debug)]
198 enum WalkReturn<S> {
199 Cycle { min_depth: usize },
200 Complete { scc_index: S },
201 }
202
203 impl<'c, G, S> SccsConstruction<'c, G, S>
204 where
205 G: DirectedGraph + WithNumNodes + WithSuccessors,
206 S: Idx,
207 {
208 /// Identifies SCCs in the graph `G` and computes the resulting
209 /// DAG. This uses a variant of [Tarjan's
210 /// algorithm][wikipedia]. The high-level summary of the algorithm
211 /// is that we do a depth-first search. Along the way, we keep a
212 /// stack of each node whose successors are being visited. We
213 /// track the depth of each node on this stack (there is no depth
214 /// if the node is not on the stack). When we find that some node
215 /// N with depth D can reach some other node N' with lower depth
216 /// D' (i.e., D' < D), we know that N, N', and all nodes in
217 /// between them on the stack are part of an SCC.
218 ///
219 /// [wikipedia]: https://bit.ly/2EZIx84
220 fn construct(graph: &'c G) -> Sccs<G::Node, S> {
221 let num_nodes = graph.num_nodes();
222
223 let mut this = Self {
224 graph,
225 node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
226 node_stack: Vec::with_capacity(num_nodes),
227 successors_stack: Vec::new(),
228 scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
229 duplicate_set: FxHashSet::default(),
230 };
231
232 let scc_indices = (0..num_nodes)
233 .map(G::Node::new)
234 .map(|node| match this.walk_node(0, node) {
235 WalkReturn::Complete { scc_index } => scc_index,
236 WalkReturn::Cycle { min_depth } => {
237 panic!("`walk_node(0, {:?})` returned cycle with depth {:?}", node, min_depth)
238 }
239 })
240 .collect();
241
242 Sccs { scc_indices, scc_data: this.scc_data }
243 }
244
245 /// Visits a node during the DFS. We first examine its current
246 /// state -- if it is not yet visited (`NotVisited`), we can push
247 /// it onto the stack and start walking its successors.
248 ///
249 /// If it is already on the DFS stack it will be in the state
250 /// `BeingVisited`. In that case, we have found a cycle and we
251 /// return the depth from the stack.
252 ///
253 /// Otherwise, we are looking at a node that has already been
254 /// completely visited. We therefore return `WalkReturn::Complete`
255 /// with its associated SCC index.
256 fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
257 debug!("walk_node(depth = {:?}, node = {:?})", depth, node);
258 match self.find_state(node) {
259 NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
260
261 NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
262
263 NodeState::NotVisited => self.walk_unvisited_node(depth, node),
264
265 NodeState::InCycleWith { parent } => panic!(
266 "`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
267 parent
268 ),
269 }
270 }
271
272 /// Fetches the state of the node `r`. If `r` is recorded as being
273 /// in a cycle with some other node `r2`, then fetches the state
274 /// of `r2` (and updates `r` to reflect current result). This is
275 /// basically the "find" part of a standard union-find algorithm
276 /// (with path compression).
277 fn find_state(&mut self, r: G::Node) -> NodeState<G::Node, S> {
278 debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]);
279 match self.node_states[r] {
280 NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index },
281 NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth },
282 NodeState::NotVisited => NodeState::NotVisited,
283 NodeState::InCycleWith { parent } => {
284 let parent_state = self.find_state(parent);
285 debug!("find_state: parent_state = {:?}", parent_state);
286 match parent_state {
287 NodeState::InCycle { .. } => {
288 self.node_states[r] = parent_state;
289 parent_state
290 }
291
292 NodeState::BeingVisited { depth } => {
293 self.node_states[r] =
294 NodeState::InCycleWith { parent: self.node_stack[depth] };
295 parent_state
296 }
297
298 NodeState::NotVisited | NodeState::InCycleWith { .. } => {
299 panic!("invalid parent state: {:?}", parent_state)
300 }
301 }
302 }
303 }
304 }
305
306 /// Walks a node that has never been visited before.
307 fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
308 debug!("walk_unvisited_node(depth = {:?}, node = {:?})", depth, node);
309
310 debug_assert!(match self.node_states[node] {
311 NodeState::NotVisited => true,
312 _ => false,
313 });
314
315 // Push `node` onto the stack.
316 self.node_states[node] = NodeState::BeingVisited { depth };
317 self.node_stack.push(node);
318
319 // Walk each successor of the node, looking to see if any of
320 // them can reach a node that is presently on the stack. If
321 // so, that means they can also reach us.
322 let mut min_depth = depth;
323 let mut min_cycle_root = node;
324 let successors_len = self.successors_stack.len();
325 for successor_node in self.graph.successors(node) {
326 debug!("walk_unvisited_node: node = {:?} successor_ode = {:?}", node, successor_node);
327 match self.walk_node(depth + 1, successor_node) {
328 WalkReturn::Cycle { min_depth: successor_min_depth } => {
329 // Track the minimum depth we can reach.
330 assert!(successor_min_depth <= depth);
331 if successor_min_depth < min_depth {
332 debug!(
333 "walk_unvisited_node: node = {:?} successor_min_depth = {:?}",
334 node, successor_min_depth
335 );
336 min_depth = successor_min_depth;
337 min_cycle_root = successor_node;
338 }
339 }
340
341 WalkReturn::Complete { scc_index: successor_scc_index } => {
342 // Push the completed SCC indices onto
343 // the `successors_stack` for later.
344 debug!(
345 "walk_unvisited_node: node = {:?} successor_scc_index = {:?}",
346 node, successor_scc_index
347 );
348 self.successors_stack.push(successor_scc_index);
349 }
350 }
351 }
352
353 // Completed walk, remove `node` from the stack.
354 let r = self.node_stack.pop();
355 debug_assert_eq!(r, Some(node));
356
357 // If `min_depth == depth`, then we are the root of the
358 // cycle: we can't reach anyone further down the stack.
359 if min_depth == depth {
360 // Note that successor stack may have duplicates, so we
361 // want to remove those:
362 let deduplicated_successors = {
363 let duplicate_set = &mut self.duplicate_set;
364 duplicate_set.clear();
365 self.successors_stack
366 .drain(successors_len..)
367 .filter(move |&i| duplicate_set.insert(i))
368 };
369 let scc_index = self.scc_data.create_scc(deduplicated_successors);
370 self.node_states[node] = NodeState::InCycle { scc_index };
371 WalkReturn::Complete { scc_index }
372 } else {
373 // We are not the head of the cycle. Return back to our
374 // caller. They will take ownership of the
375 // `self.successors` data that we pushed.
376 self.node_states[node] = NodeState::InCycleWith { parent: min_cycle_root };
377 WalkReturn::Cycle { min_depth }
378 }
379 }
380 }