]>
Commit | Line | Data |
---|---|---|
e74abb32 | 1 | use crate::vec::{Idx, IndexVec}; |
3dfed10e | 2 | use arrayvec::ArrayVec; |
0bf4aa26 XL |
3 | use std::fmt; |
4 | use std::iter; | |
5 | use std::marker::PhantomData; | |
6 | use std::mem; | |
3c0e092e | 7 | use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl}; |
0bf4aa26 | 8 | use std::slice; |
416331ca | 9 | |
3dfed10e XL |
10 | use rustc_macros::{Decodable, Encodable}; |
11 | ||
dc9dc135 | 12 | #[cfg(test)] |
416331ca | 13 | mod tests; |
0bf4aa26 XL |
14 | |
15 | pub type Word = u64; | |
16 | pub const WORD_BYTES: usize = mem::size_of::<Word>(); | |
17 | pub const WORD_BITS: usize = WORD_BYTES * 8; | |
18 | ||
94222f64 XL |
19 | pub trait BitRelations<Rhs> { |
20 | fn union(&mut self, other: &Rhs) -> bool; | |
21 | fn subtract(&mut self, other: &Rhs) -> bool; | |
22 | fn intersect(&mut self, other: &Rhs) -> bool; | |
23 | } | |
24 | ||
3c0e092e XL |
25 | #[inline] |
26 | fn inclusive_start_end<T: Idx>( | |
27 | range: impl RangeBounds<T>, | |
28 | domain: usize, | |
29 | ) -> Option<(usize, usize)> { | |
30 | // Both start and end are inclusive. | |
31 | let start = match range.start_bound().cloned() { | |
32 | Bound::Included(start) => start.index(), | |
33 | Bound::Excluded(start) => start.index() + 1, | |
34 | Bound::Unbounded => 0, | |
35 | }; | |
36 | let end = match range.end_bound().cloned() { | |
37 | Bound::Included(end) => end.index(), | |
38 | Bound::Excluded(end) => end.index().checked_sub(1)?, | |
39 | Bound::Unbounded => domain - 1, | |
40 | }; | |
41 | assert!(end < domain); | |
42 | if start > end { | |
43 | return None; | |
44 | } | |
45 | Some((start, end)) | |
46 | } | |
47 | ||
94222f64 XL |
48 | macro_rules! bit_relations_inherent_impls { |
49 | () => { | |
50 | /// Sets `self = self | other` and returns `true` if `self` changed | |
51 | /// (i.e., if new bits were added). | |
52 | pub fn union<Rhs>(&mut self, other: &Rhs) -> bool | |
53 | where | |
54 | Self: BitRelations<Rhs>, | |
55 | { | |
56 | <Self as BitRelations<Rhs>>::union(self, other) | |
57 | } | |
58 | ||
59 | /// Sets `self = self - other` and returns `true` if `self` changed. | |
60 | /// (i.e., if any bits were removed). | |
61 | pub fn subtract<Rhs>(&mut self, other: &Rhs) -> bool | |
62 | where | |
63 | Self: BitRelations<Rhs>, | |
64 | { | |
65 | <Self as BitRelations<Rhs>>::subtract(self, other) | |
66 | } | |
67 | ||
68 | /// Sets `self = self & other` and return `true` if `self` changed. | |
69 | /// (i.e., if any bits were removed). | |
70 | pub fn intersect<Rhs>(&mut self, other: &Rhs) -> bool | |
71 | where | |
72 | Self: BitRelations<Rhs>, | |
73 | { | |
74 | <Self as BitRelations<Rhs>>::intersect(self, other) | |
75 | } | |
76 | }; | |
77 | } | |
78 | ||
e74abb32 XL |
79 | /// A fixed-size bitset type with a dense representation. |
80 | /// | |
81 | /// NOTE: Use [`GrowableBitSet`] if you need support for resizing after creation. | |
0bf4aa26 XL |
82 | /// |
83 | /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also | |
84 | /// just be `usize`. | |
85 | /// | |
86 | /// All operations that involve an element will panic if the element is equal | |
87 | /// to or greater than the domain size. All operations that involve two bitsets | |
88 | /// will panic if the bitsets have differing domain sizes. | |
e74abb32 | 89 | /// |
1b1a35ee XL |
90 | #[derive(Eq, PartialEq, Decodable, Encodable)] |
91 | pub struct BitSet<T> { | |
0bf4aa26 XL |
92 | domain_size: usize, |
93 | words: Vec<Word>, | |
94 | marker: PhantomData<T>, | |
95 | } | |
96 | ||
1b1a35ee XL |
97 | impl<T> BitSet<T> { |
98 | /// Gets the domain size. | |
99 | pub fn domain_size(&self) -> usize { | |
100 | self.domain_size | |
101 | } | |
102 | } | |
103 | ||
0bf4aa26 | 104 | impl<T: Idx> BitSet<T> { |
9fa01778 | 105 | /// Creates a new, empty bitset with a given `domain_size`. |
0bf4aa26 XL |
106 | #[inline] |
107 | pub fn new_empty(domain_size: usize) -> BitSet<T> { | |
108 | let num_words = num_words(domain_size); | |
dfeec247 | 109 | BitSet { domain_size, words: vec![0; num_words], marker: PhantomData } |
0bf4aa26 XL |
110 | } |
111 | ||
9fa01778 | 112 | /// Creates a new, filled bitset with a given `domain_size`. |
0bf4aa26 XL |
113 | #[inline] |
114 | pub fn new_filled(domain_size: usize) -> BitSet<T> { | |
115 | let num_words = num_words(domain_size); | |
dfeec247 | 116 | let mut result = BitSet { domain_size, words: vec![!0; num_words], marker: PhantomData }; |
0bf4aa26 XL |
117 | result.clear_excess_bits(); |
118 | result | |
119 | } | |
120 | ||
0bf4aa26 XL |
121 | /// Clear all elements. |
122 | #[inline] | |
123 | pub fn clear(&mut self) { | |
124 | for word in &mut self.words { | |
125 | *word = 0; | |
126 | } | |
127 | } | |
128 | ||
129 | /// Clear excess bits in the final word. | |
130 | fn clear_excess_bits(&mut self) { | |
131 | let num_bits_in_final_word = self.domain_size % WORD_BITS; | |
132 | if num_bits_in_final_word > 0 { | |
133 | let mask = (1 << num_bits_in_final_word) - 1; | |
134 | let final_word_idx = self.words.len() - 1; | |
135 | self.words[final_word_idx] &= mask; | |
136 | } | |
137 | } | |
138 | ||
0bf4aa26 XL |
139 | /// Count the number of set bits in the set. |
140 | pub fn count(&self) -> usize { | |
141 | self.words.iter().map(|e| e.count_ones() as usize).sum() | |
142 | } | |
143 | ||
9fa01778 | 144 | /// Returns `true` if `self` contains `elem`. |
0bf4aa26 XL |
145 | #[inline] |
146 | pub fn contains(&self, elem: T) -> bool { | |
147 | assert!(elem.index() < self.domain_size); | |
148 | let (word_index, mask) = word_index_and_mask(elem); | |
149 | (self.words[word_index] & mask) != 0 | |
150 | } | |
151 | ||
152 | /// Is `self` is a (non-strict) superset of `other`? | |
153 | #[inline] | |
154 | pub fn superset(&self, other: &BitSet<T>) -> bool { | |
155 | assert_eq!(self.domain_size, other.domain_size); | |
156 | self.words.iter().zip(&other.words).all(|(a, b)| (a & b) == *b) | |
157 | } | |
158 | ||
159 | /// Is the set empty? | |
160 | #[inline] | |
161 | pub fn is_empty(&self) -> bool { | |
162 | self.words.iter().all(|a| *a == 0) | |
163 | } | |
164 | ||
9fa01778 | 165 | /// Insert `elem`. Returns whether the set has changed. |
0bf4aa26 XL |
166 | #[inline] |
167 | pub fn insert(&mut self, elem: T) -> bool { | |
168 | assert!(elem.index() < self.domain_size); | |
169 | let (word_index, mask) = word_index_and_mask(elem); | |
170 | let word_ref = &mut self.words[word_index]; | |
171 | let word = *word_ref; | |
172 | let new_word = word | mask; | |
173 | *word_ref = new_word; | |
174 | new_word != word | |
175 | } | |
176 | ||
3c0e092e XL |
177 | #[inline] |
178 | pub fn insert_range(&mut self, elems: impl RangeBounds<T>) { | |
179 | let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else { | |
180 | return; | |
181 | }; | |
182 | ||
183 | let (start_word_index, start_mask) = word_index_and_mask(start); | |
184 | let (end_word_index, end_mask) = word_index_and_mask(end); | |
185 | ||
186 | // Set all words in between start and end (exclusively of both). | |
187 | for word_index in (start_word_index + 1)..end_word_index { | |
188 | self.words[word_index] = !0; | |
189 | } | |
190 | ||
191 | if start_word_index != end_word_index { | |
192 | // Start and end are in different words, so we handle each in turn. | |
193 | // | |
194 | // We set all leading bits. This includes the start_mask bit. | |
195 | self.words[start_word_index] |= !(start_mask - 1); | |
196 | // And all trailing bits (i.e. from 0..=end) in the end word, | |
197 | // including the end. | |
198 | self.words[end_word_index] |= end_mask | end_mask - 1; | |
199 | } else { | |
200 | self.words[start_word_index] |= end_mask | (end_mask - start_mask); | |
201 | } | |
202 | } | |
203 | ||
0bf4aa26 XL |
204 | /// Sets all bits to true. |
205 | pub fn insert_all(&mut self) { | |
206 | for word in &mut self.words { | |
207 | *word = !0; | |
208 | } | |
209 | self.clear_excess_bits(); | |
210 | } | |
211 | ||
9fa01778 | 212 | /// Returns `true` if the set has changed. |
0bf4aa26 XL |
213 | #[inline] |
214 | pub fn remove(&mut self, elem: T) -> bool { | |
215 | assert!(elem.index() < self.domain_size); | |
216 | let (word_index, mask) = word_index_and_mask(elem); | |
217 | let word_ref = &mut self.words[word_index]; | |
218 | let word = *word_ref; | |
219 | let new_word = word & !mask; | |
220 | *word_ref = new_word; | |
221 | new_word != word | |
222 | } | |
223 | ||
9fa01778 | 224 | /// Gets a slice of the underlying words. |
0bf4aa26 XL |
225 | pub fn words(&self) -> &[Word] { |
226 | &self.words | |
227 | } | |
228 | ||
229 | /// Iterates over the indices of set bits in a sorted order. | |
230 | #[inline] | |
416331ca | 231 | pub fn iter(&self) -> BitIter<'_, T> { |
e74abb32 | 232 | BitIter::new(&self.words) |
0bf4aa26 XL |
233 | } |
234 | ||
235 | /// Duplicates the set as a hybrid set. | |
236 | pub fn to_hybrid(&self) -> HybridBitSet<T> { | |
237 | // Note: we currently don't bother trying to make a Sparse set. | |
238 | HybridBitSet::Dense(self.to_owned()) | |
239 | } | |
dc9dc135 XL |
240 | |
241 | /// Set `self = self | other`. In contrast to `union` returns `true` if the set contains at | |
242 | /// least one bit that is not in `other` (i.e. `other` is not a superset of `self`). | |
243 | /// | |
244 | /// This is an optimization for union of a hybrid bitset. | |
245 | fn reverse_union_sparse(&mut self, sparse: &SparseBitSet<T>) -> bool { | |
246 | assert!(sparse.domain_size == self.domain_size); | |
247 | self.clear_excess_bits(); | |
248 | ||
249 | let mut not_already = false; | |
250 | // Index of the current word not yet merged. | |
251 | let mut current_index = 0; | |
252 | // Mask of bits that came from the sparse set in the current word. | |
253 | let mut new_bit_mask = 0; | |
254 | for (word_index, mask) in sparse.iter().map(|x| word_index_and_mask(*x)) { | |
255 | // Next bit is in a word not inspected yet. | |
256 | if word_index > current_index { | |
257 | self.words[current_index] |= new_bit_mask; | |
258 | // Were there any bits in the old word that did not occur in the sparse set? | |
259 | not_already |= (self.words[current_index] ^ new_bit_mask) != 0; | |
260 | // Check all words we skipped for any set bit. | |
dfeec247 | 261 | not_already |= self.words[current_index + 1..word_index].iter().any(|&x| x != 0); |
dc9dc135 XL |
262 | // Update next word. |
263 | current_index = word_index; | |
264 | // Reset bit mask, no bits have been merged yet. | |
265 | new_bit_mask = 0; | |
266 | } | |
267 | // Add bit and mark it as coming from the sparse set. | |
268 | // self.words[word_index] |= mask; | |
269 | new_bit_mask |= mask; | |
270 | } | |
271 | self.words[current_index] |= new_bit_mask; | |
272 | // Any bits in the last inspected word that were not in the sparse set? | |
273 | not_already |= (self.words[current_index] ^ new_bit_mask) != 0; | |
274 | // Any bits in the tail? Note `clear_excess_bits` before. | |
dfeec247 | 275 | not_already |= self.words[current_index + 1..].iter().any(|&x| x != 0); |
dc9dc135 XL |
276 | |
277 | not_already | |
278 | } | |
94222f64 | 279 | |
3c0e092e XL |
280 | fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> { |
281 | let (start, end) = inclusive_start_end(range, self.domain_size)?; | |
282 | let (start_word_index, _) = word_index_and_mask(start); | |
283 | let (end_word_index, end_mask) = word_index_and_mask(end); | |
284 | ||
285 | let end_word = self.words[end_word_index] & (end_mask | (end_mask - 1)); | |
286 | if end_word != 0 { | |
287 | let pos = max_bit(end_word) + WORD_BITS * end_word_index; | |
288 | if start <= pos { | |
289 | return Some(T::new(pos)); | |
290 | } | |
291 | } | |
292 | ||
293 | // We exclude end_word_index from the range here, because we don't want | |
294 | // to limit ourselves to *just* the last word: the bits set it in may be | |
295 | // after `end`, so it may not work out. | |
296 | if let Some(offset) = | |
297 | self.words[start_word_index..end_word_index].iter().rposition(|&w| w != 0) | |
298 | { | |
299 | let word_idx = start_word_index + offset; | |
300 | let start_word = self.words[word_idx]; | |
301 | let pos = max_bit(start_word) + WORD_BITS * word_idx; | |
302 | if start <= pos { | |
303 | return Some(T::new(pos)); | |
304 | } | |
305 | } | |
306 | ||
307 | None | |
308 | } | |
309 | ||
94222f64 | 310 | bit_relations_inherent_impls! {} |
0bf4aa26 XL |
311 | } |
312 | ||
94222f64 XL |
313 | // dense REL dense |
314 | impl<T: Idx> BitRelations<BitSet<T>> for BitSet<T> { | |
315 | fn union(&mut self, other: &BitSet<T>) -> bool { | |
316 | assert_eq!(self.domain_size, other.domain_size); | |
317 | bitwise(&mut self.words, &other.words, |a, b| a | b) | |
318 | } | |
319 | ||
320 | fn subtract(&mut self, other: &BitSet<T>) -> bool { | |
321 | assert_eq!(self.domain_size, other.domain_size); | |
322 | bitwise(&mut self.words, &other.words, |a, b| a & !b) | |
323 | } | |
324 | ||
325 | fn intersect(&mut self, other: &BitSet<T>) -> bool { | |
326 | assert_eq!(self.domain_size, other.domain_size); | |
327 | bitwise(&mut self.words, &other.words, |a, b| a & b) | |
328 | } | |
0bf4aa26 XL |
329 | } |
330 | ||
94222f64 XL |
331 | // Applies a function to mutate a bitset, and returns true if any |
332 | // of the applications return true | |
333 | fn sequential_update<T: Idx>( | |
334 | mut self_update: impl FnMut(T) -> bool, | |
335 | it: impl Iterator<Item = T>, | |
336 | ) -> bool { | |
337 | let mut changed = false; | |
338 | for elem in it { | |
339 | changed |= self_update(elem); | |
340 | } | |
341 | changed | |
0bf4aa26 XL |
342 | } |
343 | ||
94222f64 XL |
344 | // Optimization of intersection for SparseBitSet that's generic |
345 | // over the RHS | |
346 | fn sparse_intersect<T: Idx>( | |
347 | set: &mut SparseBitSet<T>, | |
348 | other_contains: impl Fn(&T) -> bool, | |
349 | ) -> bool { | |
350 | let size = set.elems.len(); | |
351 | set.elems.retain(|elem| other_contains(elem)); | |
352 | set.elems.len() != size | |
353 | } | |
354 | ||
355 | // Optimization of dense/sparse intersection. The resulting set is | |
356 | // guaranteed to be at most the size of the sparse set, and hence can be | |
357 | // represented as a sparse set. Therefore the sparse set is copied and filtered, | |
358 | // then returned as the new set. | |
359 | fn dense_sparse_intersect<T: Idx>( | |
360 | dense: &BitSet<T>, | |
361 | sparse: &SparseBitSet<T>, | |
362 | ) -> (SparseBitSet<T>, bool) { | |
363 | let mut sparse_copy = sparse.clone(); | |
364 | sparse_intersect(&mut sparse_copy, |el| dense.contains(*el)); | |
365 | let n = sparse_copy.len(); | |
366 | (sparse_copy, n != dense.count()) | |
367 | } | |
368 | ||
369 | // hybrid REL dense | |
370 | impl<T: Idx> BitRelations<BitSet<T>> for HybridBitSet<T> { | |
371 | fn union(&mut self, other: &BitSet<T>) -> bool { | |
372 | assert_eq!(self.domain_size(), other.domain_size); | |
373 | match self { | |
374 | HybridBitSet::Sparse(sparse) => { | |
375 | // `self` is sparse and `other` is dense. To | |
376 | // merge them, we have two available strategies: | |
377 | // * Densify `self` then merge other | |
378 | // * Clone other then integrate bits from `self` | |
379 | // The second strategy requires dedicated method | |
380 | // since the usual `union` returns the wrong | |
381 | // result. In the dedicated case the computation | |
382 | // is slightly faster if the bits of the sparse | |
383 | // bitset map to only few words of the dense | |
384 | // representation, i.e. indices are near each | |
385 | // other. | |
386 | // | |
387 | // Benchmarking seems to suggest that the second | |
388 | // option is worth it. | |
389 | let mut new_dense = other.clone(); | |
390 | let changed = new_dense.reverse_union_sparse(sparse); | |
391 | *self = HybridBitSet::Dense(new_dense); | |
392 | changed | |
393 | } | |
394 | ||
395 | HybridBitSet::Dense(dense) => dense.union(other), | |
396 | } | |
397 | } | |
398 | ||
399 | fn subtract(&mut self, other: &BitSet<T>) -> bool { | |
400 | assert_eq!(self.domain_size(), other.domain_size); | |
401 | match self { | |
402 | HybridBitSet::Sparse(sparse) => { | |
403 | sequential_update(|elem| sparse.remove(elem), other.iter()) | |
404 | } | |
405 | HybridBitSet::Dense(dense) => dense.subtract(other), | |
406 | } | |
407 | } | |
408 | ||
409 | fn intersect(&mut self, other: &BitSet<T>) -> bool { | |
410 | assert_eq!(self.domain_size(), other.domain_size); | |
411 | match self { | |
412 | HybridBitSet::Sparse(sparse) => sparse_intersect(sparse, |elem| other.contains(*elem)), | |
413 | HybridBitSet::Dense(dense) => dense.intersect(other), | |
414 | } | |
0bf4aa26 XL |
415 | } |
416 | } | |
417 | ||
94222f64 XL |
418 | // dense REL hybrid |
419 | impl<T: Idx> BitRelations<HybridBitSet<T>> for BitSet<T> { | |
420 | fn union(&mut self, other: &HybridBitSet<T>) -> bool { | |
421 | assert_eq!(self.domain_size, other.domain_size()); | |
422 | match other { | |
423 | HybridBitSet::Sparse(sparse) => { | |
424 | sequential_update(|elem| self.insert(elem), sparse.iter().cloned()) | |
425 | } | |
426 | HybridBitSet::Dense(dense) => self.union(dense), | |
427 | } | |
428 | } | |
429 | ||
430 | fn subtract(&mut self, other: &HybridBitSet<T>) -> bool { | |
431 | assert_eq!(self.domain_size, other.domain_size()); | |
432 | match other { | |
433 | HybridBitSet::Sparse(sparse) => { | |
434 | sequential_update(|elem| self.remove(elem), sparse.iter().cloned()) | |
435 | } | |
436 | HybridBitSet::Dense(dense) => self.subtract(dense), | |
437 | } | |
438 | } | |
439 | ||
440 | fn intersect(&mut self, other: &HybridBitSet<T>) -> bool { | |
441 | assert_eq!(self.domain_size, other.domain_size()); | |
442 | match other { | |
443 | HybridBitSet::Sparse(sparse) => { | |
444 | let (updated, changed) = dense_sparse_intersect(self, sparse); | |
445 | ||
446 | // We can't directly assign the SparseBitSet to the BitSet, and | |
447 | // doing `*self = updated.to_dense()` would cause a drop / reallocation. Instead, | |
448 | // the BitSet is cleared and `updated` is copied into `self`. | |
449 | self.clear(); | |
450 | for elem in updated.iter() { | |
451 | self.insert(*elem); | |
452 | } | |
453 | changed | |
454 | } | |
455 | HybridBitSet::Dense(dense) => self.intersect(dense), | |
456 | } | |
457 | } | |
458 | } | |
459 | ||
460 | // hybrid REL hybrid | |
461 | impl<T: Idx> BitRelations<HybridBitSet<T>> for HybridBitSet<T> { | |
462 | fn union(&mut self, other: &HybridBitSet<T>) -> bool { | |
463 | assert_eq!(self.domain_size(), other.domain_size()); | |
464 | match self { | |
465 | HybridBitSet::Sparse(_) => { | |
466 | match other { | |
467 | HybridBitSet::Sparse(other_sparse) => { | |
468 | // Both sets are sparse. Add the elements in | |
469 | // `other_sparse` to `self` one at a time. This | |
470 | // may or may not cause `self` to be densified. | |
471 | let mut changed = false; | |
472 | for elem in other_sparse.iter() { | |
473 | changed |= self.insert(*elem); | |
474 | } | |
475 | changed | |
476 | } | |
477 | ||
478 | HybridBitSet::Dense(other_dense) => self.union(other_dense), | |
479 | } | |
480 | } | |
481 | ||
482 | HybridBitSet::Dense(self_dense) => self_dense.union(other), | |
483 | } | |
484 | } | |
485 | ||
486 | fn subtract(&mut self, other: &HybridBitSet<T>) -> bool { | |
487 | assert_eq!(self.domain_size(), other.domain_size()); | |
488 | match self { | |
489 | HybridBitSet::Sparse(self_sparse) => { | |
490 | sequential_update(|elem| self_sparse.remove(elem), other.iter()) | |
491 | } | |
492 | HybridBitSet::Dense(self_dense) => self_dense.subtract(other), | |
493 | } | |
494 | } | |
495 | ||
496 | fn intersect(&mut self, other: &HybridBitSet<T>) -> bool { | |
497 | assert_eq!(self.domain_size(), other.domain_size()); | |
498 | match self { | |
499 | HybridBitSet::Sparse(self_sparse) => { | |
500 | sparse_intersect(self_sparse, |elem| other.contains(*elem)) | |
501 | } | |
502 | HybridBitSet::Dense(self_dense) => match other { | |
503 | HybridBitSet::Sparse(other_sparse) => { | |
504 | let (updated, changed) = dense_sparse_intersect(self_dense, other_sparse); | |
505 | *self = HybridBitSet::Sparse(updated); | |
506 | changed | |
507 | } | |
508 | HybridBitSet::Dense(other_dense) => self_dense.intersect(other_dense), | |
509 | }, | |
510 | } | |
0bf4aa26 XL |
511 | } |
512 | } | |
513 | ||
1b1a35ee XL |
514 | impl<T> Clone for BitSet<T> { |
515 | fn clone(&self) -> Self { | |
516 | BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData } | |
517 | } | |
518 | ||
519 | fn clone_from(&mut self, from: &Self) { | |
520 | if self.domain_size != from.domain_size { | |
521 | self.words.resize(from.domain_size, 0); | |
522 | self.domain_size = from.domain_size; | |
523 | } | |
524 | ||
525 | self.words.copy_from_slice(&from.words); | |
526 | } | |
527 | } | |
528 | ||
0bf4aa26 | 529 | impl<T: Idx> fmt::Debug for BitSet<T> { |
9fa01778 | 530 | fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { |
dfeec247 | 531 | w.debug_list().entries(self.iter()).finish() |
0bf4aa26 XL |
532 | } |
533 | } | |
534 | ||
535 | impl<T: Idx> ToString for BitSet<T> { | |
536 | fn to_string(&self) -> String { | |
537 | let mut result = String::new(); | |
538 | let mut sep = '['; | |
539 | ||
540 | // Note: this is a little endian printout of bytes. | |
541 | ||
542 | // i tracks how many bits we have printed so far. | |
543 | let mut i = 0; | |
544 | for word in &self.words { | |
545 | let mut word = *word; | |
dfeec247 XL |
546 | for _ in 0..WORD_BYTES { |
547 | // for each byte in `word`: | |
0bf4aa26 XL |
548 | let remain = self.domain_size - i; |
549 | // If less than a byte remains, then mask just that many bits. | |
550 | let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF }; | |
551 | assert!(mask <= 0xFF); | |
552 | let byte = word & mask; | |
553 | ||
554 | result.push_str(&format!("{}{:02x}", sep, byte)); | |
555 | ||
dfeec247 XL |
556 | if remain <= 8 { |
557 | break; | |
558 | } | |
0bf4aa26 XL |
559 | word >>= 8; |
560 | i += 8; | |
561 | sep = '-'; | |
562 | } | |
563 | sep = '|'; | |
564 | } | |
565 | result.push(']'); | |
566 | ||
567 | result | |
568 | } | |
569 | } | |
570 | ||
571 | pub struct BitIter<'a, T: Idx> { | |
e74abb32 XL |
572 | /// A copy of the current word, but with any already-visited bits cleared. |
573 | /// (This lets us use `trailing_zeros()` to find the next set bit.) When it | |
574 | /// is reduced to 0, we move onto the next word. | |
575 | word: Word, | |
576 | ||
577 | /// The offset (measured in bits) of the current word. | |
578 | offset: usize, | |
579 | ||
580 | /// Underlying iterator over the words. | |
581 | iter: slice::Iter<'a, Word>, | |
582 | ||
dfeec247 | 583 | marker: PhantomData<T>, |
0bf4aa26 XL |
584 | } |
585 | ||
e74abb32 XL |
586 | impl<'a, T: Idx> BitIter<'a, T> { |
587 | #[inline] | |
588 | fn new(words: &'a [Word]) -> BitIter<'a, T> { | |
589 | // We initialize `word` and `offset` to degenerate values. On the first | |
590 | // call to `next()` we will fall through to getting the first word from | |
591 | // `iter`, which sets `word` to the first word (if there is one) and | |
592 | // `offset` to 0. Doing it this way saves us from having to maintain | |
593 | // additional state about whether we have started. | |
594 | BitIter { | |
595 | word: 0, | |
ba9703b0 | 596 | offset: usize::MAX - (WORD_BITS - 1), |
e74abb32 XL |
597 | iter: words.iter(), |
598 | marker: PhantomData, | |
599 | } | |
600 | } | |
601 | } | |
602 | ||
0bf4aa26 XL |
603 | impl<'a, T: Idx> Iterator for BitIter<'a, T> { |
604 | type Item = T; | |
605 | fn next(&mut self) -> Option<T> { | |
606 | loop { | |
e74abb32 XL |
607 | if self.word != 0 { |
608 | // Get the position of the next set bit in the current word, | |
609 | // then clear the bit. | |
610 | let bit_pos = self.word.trailing_zeros() as usize; | |
611 | let bit = 1 << bit_pos; | |
612 | self.word ^= bit; | |
dfeec247 | 613 | return Some(T::new(bit_pos + self.offset)); |
0bf4aa26 XL |
614 | } |
615 | ||
e74abb32 XL |
616 | // Move onto the next word. `wrapping_add()` is needed to handle |
617 | // the degenerate initial value given to `offset` in `new()`. | |
618 | let word = self.iter.next()?; | |
619 | self.word = *word; | |
620 | self.offset = self.offset.wrapping_add(WORD_BITS); | |
0bf4aa26 XL |
621 | } |
622 | } | |
623 | } | |
624 | ||
0bf4aa26 XL |
625 | #[inline] |
626 | fn bitwise<Op>(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool | |
dfeec247 XL |
627 | where |
628 | Op: Fn(Word, Word) -> Word, | |
0bf4aa26 XL |
629 | { |
630 | assert_eq!(out_vec.len(), in_vec.len()); | |
17df50a5 | 631 | let mut changed = 0; |
cdc7bbd5 | 632 | for (out_elem, in_elem) in iter::zip(out_vec, in_vec) { |
0bf4aa26 XL |
633 | let old_val = *out_elem; |
634 | let new_val = op(old_val, *in_elem); | |
635 | *out_elem = new_val; | |
17df50a5 XL |
636 | // This is essentially equivalent to a != with changed being a bool, but |
637 | // in practice this code gets auto-vectorized by the compiler for most | |
638 | // operators. Using != here causes us to generate quite poor code as the | |
639 | // compiler tries to go back to a boolean on each loop iteration. | |
640 | changed |= old_val ^ new_val; | |
0bf4aa26 | 641 | } |
17df50a5 | 642 | changed != 0 |
0bf4aa26 XL |
643 | } |
644 | ||
645 | const SPARSE_MAX: usize = 8; | |
646 | ||
647 | /// A fixed-size bitset type with a sparse representation and a maximum of | |
3dfed10e XL |
648 | /// `SPARSE_MAX` elements. The elements are stored as a sorted `ArrayVec` with |
649 | /// no duplicates. | |
0bf4aa26 XL |
650 | /// |
651 | /// This type is used by `HybridBitSet`; do not use directly. | |
652 | #[derive(Clone, Debug)] | |
1b1a35ee | 653 | pub struct SparseBitSet<T> { |
0bf4aa26 | 654 | domain_size: usize, |
cdc7bbd5 | 655 | elems: ArrayVec<T, SPARSE_MAX>, |
0bf4aa26 XL |
656 | } |
657 | ||
658 | impl<T: Idx> SparseBitSet<T> { | |
659 | fn new_empty(domain_size: usize) -> Self { | |
3dfed10e | 660 | SparseBitSet { domain_size, elems: ArrayVec::new() } |
0bf4aa26 XL |
661 | } |
662 | ||
663 | fn len(&self) -> usize { | |
664 | self.elems.len() | |
665 | } | |
666 | ||
667 | fn is_empty(&self) -> bool { | |
668 | self.elems.len() == 0 | |
669 | } | |
670 | ||
671 | fn contains(&self, elem: T) -> bool { | |
672 | assert!(elem.index() < self.domain_size); | |
673 | self.elems.contains(&elem) | |
674 | } | |
675 | ||
676 | fn insert(&mut self, elem: T) -> bool { | |
677 | assert!(elem.index() < self.domain_size); | |
678 | let changed = if let Some(i) = self.elems.iter().position(|&e| e >= elem) { | |
679 | if self.elems[i] == elem { | |
680 | // `elem` is already in the set. | |
681 | false | |
682 | } else { | |
683 | // `elem` is smaller than one or more existing elements. | |
684 | self.elems.insert(i, elem); | |
685 | true | |
686 | } | |
687 | } else { | |
688 | // `elem` is larger than all existing elements. | |
689 | self.elems.push(elem); | |
690 | true | |
691 | }; | |
692 | assert!(self.len() <= SPARSE_MAX); | |
693 | changed | |
694 | } | |
695 | ||
696 | fn remove(&mut self, elem: T) -> bool { | |
697 | assert!(elem.index() < self.domain_size); | |
698 | if let Some(i) = self.elems.iter().position(|&e| e == elem) { | |
699 | self.elems.remove(i); | |
700 | true | |
701 | } else { | |
702 | false | |
703 | } | |
704 | } | |
705 | ||
706 | fn to_dense(&self) -> BitSet<T> { | |
707 | let mut dense = BitSet::new_empty(self.domain_size); | |
708 | for elem in self.elems.iter() { | |
709 | dense.insert(*elem); | |
710 | } | |
711 | dense | |
712 | } | |
713 | ||
9fa01778 | 714 | fn iter(&self) -> slice::Iter<'_, T> { |
0bf4aa26 XL |
715 | self.elems.iter() |
716 | } | |
0bf4aa26 | 717 | |
3c0e092e XL |
718 | fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> { |
719 | let mut last_leq = None; | |
720 | for e in self.iter() { | |
721 | if range.contains(e) { | |
722 | last_leq = Some(*e); | |
723 | } | |
724 | } | |
725 | last_leq | |
726 | } | |
727 | ||
94222f64 | 728 | bit_relations_inherent_impls! {} |
0bf4aa26 XL |
729 | } |
730 | ||
731 | /// A fixed-size bitset type with a hybrid representation: sparse when there | |
732 | /// are up to a `SPARSE_MAX` elements in the set, but dense when there are more | |
733 | /// than `SPARSE_MAX`. | |
734 | /// | |
735 | /// This type is especially efficient for sets that typically have a small | |
736 | /// number of elements, but a large `domain_size`, and are cleared frequently. | |
737 | /// | |
738 | /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also | |
739 | /// just be `usize`. | |
740 | /// | |
741 | /// All operations that involve an element will panic if the element is equal | |
742 | /// to or greater than the domain size. All operations that involve two bitsets | |
743 | /// will panic if the bitsets have differing domain sizes. | |
1b1a35ee XL |
744 | #[derive(Clone)] |
745 | pub enum HybridBitSet<T> { | |
0bf4aa26 XL |
746 | Sparse(SparseBitSet<T>), |
747 | Dense(BitSet<T>), | |
748 | } | |
749 | ||
1b1a35ee XL |
750 | impl<T: Idx> fmt::Debug for HybridBitSet<T> { |
751 | fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { | |
752 | match self { | |
753 | Self::Sparse(b) => b.fmt(w), | |
754 | Self::Dense(b) => b.fmt(w), | |
755 | } | |
756 | } | |
757 | } | |
758 | ||
0bf4aa26 XL |
759 | impl<T: Idx> HybridBitSet<T> { |
760 | pub fn new_empty(domain_size: usize) -> Self { | |
761 | HybridBitSet::Sparse(SparseBitSet::new_empty(domain_size)) | |
762 | } | |
763 | ||
1b1a35ee | 764 | pub fn domain_size(&self) -> usize { |
0bf4aa26 XL |
765 | match self { |
766 | HybridBitSet::Sparse(sparse) => sparse.domain_size, | |
767 | HybridBitSet::Dense(dense) => dense.domain_size, | |
768 | } | |
769 | } | |
770 | ||
771 | pub fn clear(&mut self) { | |
772 | let domain_size = self.domain_size(); | |
773 | *self = HybridBitSet::new_empty(domain_size); | |
774 | } | |
775 | ||
776 | pub fn contains(&self, elem: T) -> bool { | |
777 | match self { | |
778 | HybridBitSet::Sparse(sparse) => sparse.contains(elem), | |
779 | HybridBitSet::Dense(dense) => dense.contains(elem), | |
780 | } | |
781 | } | |
782 | ||
783 | pub fn superset(&self, other: &HybridBitSet<T>) -> bool { | |
784 | match (self, other) { | |
785 | (HybridBitSet::Dense(self_dense), HybridBitSet::Dense(other_dense)) => { | |
786 | self_dense.superset(other_dense) | |
787 | } | |
788 | _ => { | |
789 | assert!(self.domain_size() == other.domain_size()); | |
790 | other.iter().all(|elem| self.contains(elem)) | |
791 | } | |
792 | } | |
793 | } | |
794 | ||
795 | pub fn is_empty(&self) -> bool { | |
796 | match self { | |
797 | HybridBitSet::Sparse(sparse) => sparse.is_empty(), | |
798 | HybridBitSet::Dense(dense) => dense.is_empty(), | |
799 | } | |
800 | } | |
801 | ||
3c0e092e XL |
802 | /// Returns the previous element present in the bitset from `elem`, |
803 | /// inclusively of elem. That is, will return `Some(elem)` if elem is in the | |
804 | /// bitset. | |
805 | pub fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> { | |
806 | match self { | |
807 | HybridBitSet::Sparse(sparse) => sparse.last_set_in(range), | |
808 | HybridBitSet::Dense(dense) => dense.last_set_in(range), | |
809 | } | |
810 | } | |
811 | ||
0bf4aa26 XL |
812 | pub fn insert(&mut self, elem: T) -> bool { |
813 | // No need to check `elem` against `self.domain_size` here because all | |
814 | // the match cases check it, one way or another. | |
815 | match self { | |
816 | HybridBitSet::Sparse(sparse) if sparse.len() < SPARSE_MAX => { | |
817 | // The set is sparse and has space for `elem`. | |
818 | sparse.insert(elem) | |
819 | } | |
820 | HybridBitSet::Sparse(sparse) if sparse.contains(elem) => { | |
821 | // The set is sparse and does not have space for `elem`, but | |
822 | // that doesn't matter because `elem` is already present. | |
823 | false | |
824 | } | |
825 | HybridBitSet::Sparse(sparse) => { | |
826 | // The set is sparse and full. Convert to a dense set. | |
827 | let mut dense = sparse.to_dense(); | |
828 | let changed = dense.insert(elem); | |
829 | assert!(changed); | |
830 | *self = HybridBitSet::Dense(dense); | |
831 | changed | |
832 | } | |
833 | HybridBitSet::Dense(dense) => dense.insert(elem), | |
834 | } | |
835 | } | |
836 | ||
3c0e092e XL |
837 | pub fn insert_range(&mut self, elems: impl RangeBounds<T>) { |
838 | // No need to check `elem` against `self.domain_size` here because all | |
839 | // the match cases check it, one way or another. | |
840 | let start = match elems.start_bound().cloned() { | |
841 | Bound::Included(start) => start.index(), | |
842 | Bound::Excluded(start) => start.index() + 1, | |
843 | Bound::Unbounded => 0, | |
844 | }; | |
845 | let end = match elems.end_bound().cloned() { | |
846 | Bound::Included(end) => end.index() + 1, | |
847 | Bound::Excluded(end) => end.index(), | |
848 | Bound::Unbounded => self.domain_size() - 1, | |
849 | }; | |
850 | let len = if let Some(l) = end.checked_sub(start) { | |
851 | l | |
852 | } else { | |
853 | return; | |
854 | }; | |
855 | match self { | |
856 | HybridBitSet::Sparse(sparse) if sparse.len() + len < SPARSE_MAX => { | |
857 | // The set is sparse and has space for `elems`. | |
858 | for elem in start..end { | |
859 | sparse.insert(T::new(elem)); | |
860 | } | |
861 | } | |
862 | HybridBitSet::Sparse(sparse) => { | |
863 | // The set is sparse and full. Convert to a dense set. | |
864 | let mut dense = sparse.to_dense(); | |
865 | dense.insert_range(elems); | |
866 | *self = HybridBitSet::Dense(dense); | |
867 | } | |
868 | HybridBitSet::Dense(dense) => dense.insert_range(elems), | |
869 | } | |
870 | } | |
871 | ||
0bf4aa26 XL |
872 | pub fn insert_all(&mut self) { |
873 | let domain_size = self.domain_size(); | |
874 | match self { | |
875 | HybridBitSet::Sparse(_) => { | |
876 | *self = HybridBitSet::Dense(BitSet::new_filled(domain_size)); | |
877 | } | |
878 | HybridBitSet::Dense(dense) => dense.insert_all(), | |
879 | } | |
880 | } | |
881 | ||
882 | pub fn remove(&mut self, elem: T) -> bool { | |
883 | // Note: we currently don't bother going from Dense back to Sparse. | |
884 | match self { | |
885 | HybridBitSet::Sparse(sparse) => sparse.remove(elem), | |
886 | HybridBitSet::Dense(dense) => dense.remove(elem), | |
887 | } | |
888 | } | |
889 | ||
0bf4aa26 XL |
890 | /// Converts to a dense set, consuming itself in the process. |
891 | pub fn to_dense(self) -> BitSet<T> { | |
892 | match self { | |
893 | HybridBitSet::Sparse(sparse) => sparse.to_dense(), | |
894 | HybridBitSet::Dense(dense) => dense, | |
895 | } | |
896 | } | |
897 | ||
9fa01778 | 898 | pub fn iter(&self) -> HybridIter<'_, T> { |
0bf4aa26 XL |
899 | match self { |
900 | HybridBitSet::Sparse(sparse) => HybridIter::Sparse(sparse.iter()), | |
901 | HybridBitSet::Dense(dense) => HybridIter::Dense(dense.iter()), | |
902 | } | |
903 | } | |
0bf4aa26 | 904 | |
94222f64 | 905 | bit_relations_inherent_impls! {} |
0bf4aa26 XL |
906 | } |
907 | ||
908 | pub enum HybridIter<'a, T: Idx> { | |
909 | Sparse(slice::Iter<'a, T>), | |
910 | Dense(BitIter<'a, T>), | |
911 | } | |
912 | ||
913 | impl<'a, T: Idx> Iterator for HybridIter<'a, T> { | |
914 | type Item = T; | |
915 | ||
916 | fn next(&mut self) -> Option<T> { | |
917 | match self { | |
e74abb32 | 918 | HybridIter::Sparse(sparse) => sparse.next().copied(), |
0bf4aa26 XL |
919 | HybridIter::Dense(dense) => dense.next(), |
920 | } | |
921 | } | |
922 | } | |
923 | ||
924 | /// A resizable bitset type with a dense representation. | |
925 | /// | |
926 | /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also | |
927 | /// just be `usize`. | |
928 | /// | |
929 | /// All operations that involve an element will panic if the element is equal | |
930 | /// to or greater than the domain size. | |
931 | #[derive(Clone, Debug, PartialEq)] | |
932 | pub struct GrowableBitSet<T: Idx> { | |
933 | bit_set: BitSet<T>, | |
934 | } | |
935 | ||
936 | impl<T: Idx> GrowableBitSet<T> { | |
937 | /// Ensure that the set can hold at least `min_domain_size` elements. | |
938 | pub fn ensure(&mut self, min_domain_size: usize) { | |
939 | if self.bit_set.domain_size < min_domain_size { | |
940 | self.bit_set.domain_size = min_domain_size; | |
941 | } | |
942 | ||
943 | let min_num_words = num_words(min_domain_size); | |
944 | if self.bit_set.words.len() < min_num_words { | |
945 | self.bit_set.words.resize(min_num_words, 0) | |
946 | } | |
947 | } | |
948 | ||
949 | pub fn new_empty() -> GrowableBitSet<T> { | |
950 | GrowableBitSet { bit_set: BitSet::new_empty(0) } | |
951 | } | |
952 | ||
48663c56 XL |
953 | pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> { |
954 | GrowableBitSet { bit_set: BitSet::new_empty(capacity) } | |
0bf4aa26 XL |
955 | } |
956 | ||
9fa01778 | 957 | /// Returns `true` if the set has changed. |
0bf4aa26 XL |
958 | #[inline] |
959 | pub fn insert(&mut self, elem: T) -> bool { | |
960 | self.ensure(elem.index() + 1); | |
961 | self.bit_set.insert(elem) | |
962 | } | |
963 | ||
5869c6ff XL |
964 | /// Returns `true` if the set has changed. |
965 | #[inline] | |
966 | pub fn remove(&mut self, elem: T) -> bool { | |
967 | self.ensure(elem.index() + 1); | |
968 | self.bit_set.remove(elem) | |
969 | } | |
970 | ||
971 | #[inline] | |
972 | pub fn is_empty(&self) -> bool { | |
973 | self.bit_set.is_empty() | |
974 | } | |
975 | ||
0bf4aa26 XL |
976 | #[inline] |
977 | pub fn contains(&self, elem: T) -> bool { | |
978 | let (word_index, mask) = word_index_and_mask(elem); | |
c295e0f8 | 979 | self.bit_set.words.get(word_index).map_or(false, |word| (word & mask) != 0) |
0bf4aa26 XL |
980 | } |
981 | } | |
982 | ||
983 | /// A fixed-size 2D bit matrix type with a dense representation. | |
984 | /// | |
985 | /// `R` and `C` are index types used to identify rows and columns respectively; | |
986 | /// typically newtyped `usize` wrappers, but they can also just be `usize`. | |
987 | /// | |
988 | /// All operations that involve a row and/or column index will panic if the | |
989 | /// index exceeds the relevant bound. | |
3dfed10e | 990 | #[derive(Clone, Eq, PartialEq, Decodable, Encodable)] |
0bf4aa26 XL |
991 | pub struct BitMatrix<R: Idx, C: Idx> { |
992 | num_rows: usize, | |
993 | num_columns: usize, | |
994 | words: Vec<Word>, | |
995 | marker: PhantomData<(R, C)>, | |
996 | } | |
997 | ||
998 | impl<R: Idx, C: Idx> BitMatrix<R, C> { | |
9fa01778 | 999 | /// Creates a new `rows x columns` matrix, initially empty. |
0bf4aa26 XL |
1000 | pub fn new(num_rows: usize, num_columns: usize) -> BitMatrix<R, C> { |
1001 | // For every element, we need one bit for every other | |
1002 | // element. Round up to an even number of words. | |
1003 | let words_per_row = num_words(num_columns); | |
1004 | BitMatrix { | |
1005 | num_rows, | |
1006 | num_columns, | |
1007 | words: vec![0; num_rows * words_per_row], | |
1008 | marker: PhantomData, | |
1009 | } | |
1010 | } | |
1011 | ||
dc9dc135 XL |
1012 | /// Creates a new matrix, with `row` used as the value for every row. |
1013 | pub fn from_row_n(row: &BitSet<C>, num_rows: usize) -> BitMatrix<R, C> { | |
1014 | let num_columns = row.domain_size(); | |
1015 | let words_per_row = num_words(num_columns); | |
1016 | assert_eq!(words_per_row, row.words().len()); | |
1017 | BitMatrix { | |
1018 | num_rows, | |
1019 | num_columns, | |
1020 | words: iter::repeat(row.words()).take(num_rows).flatten().cloned().collect(), | |
1021 | marker: PhantomData, | |
1022 | } | |
1023 | } | |
1024 | ||
1025 | pub fn rows(&self) -> impl Iterator<Item = R> { | |
1026 | (0..self.num_rows).map(R::new) | |
1027 | } | |
1028 | ||
0bf4aa26 XL |
1029 | /// The range of bits for a given row. |
1030 | fn range(&self, row: R) -> (usize, usize) { | |
1031 | let words_per_row = num_words(self.num_columns); | |
1032 | let start = row.index() * words_per_row; | |
1033 | (start, start + words_per_row) | |
1034 | } | |
1035 | ||
1036 | /// Sets the cell at `(row, column)` to true. Put another way, insert | |
1037 | /// `column` to the bitset for `row`. | |
1038 | /// | |
9fa01778 | 1039 | /// Returns `true` if this changed the matrix. |
0bf4aa26 XL |
1040 | pub fn insert(&mut self, row: R, column: C) -> bool { |
1041 | assert!(row.index() < self.num_rows && column.index() < self.num_columns); | |
1042 | let (start, _) = self.range(row); | |
1043 | let (word_index, mask) = word_index_and_mask(column); | |
1044 | let words = &mut self.words[..]; | |
1045 | let word = words[start + word_index]; | |
1046 | let new_word = word | mask; | |
1047 | words[start + word_index] = new_word; | |
1048 | word != new_word | |
1049 | } | |
1050 | ||
1051 | /// Do the bits from `row` contain `column`? Put another way, is | |
1052 | /// the matrix cell at `(row, column)` true? Put yet another way, | |
1053 | /// if the matrix represents (transitive) reachability, can | |
1054 | /// `row` reach `column`? | |
1055 | pub fn contains(&self, row: R, column: C) -> bool { | |
1056 | assert!(row.index() < self.num_rows && column.index() < self.num_columns); | |
1057 | let (start, _) = self.range(row); | |
1058 | let (word_index, mask) = word_index_and_mask(column); | |
1059 | (self.words[start + word_index] & mask) != 0 | |
1060 | } | |
1061 | ||
9fa01778 | 1062 | /// Returns those indices that are true in rows `a` and `b`. This |
3dfed10e | 1063 | /// is an *O*(*n*) operation where *n* is the number of elements |
0bf4aa26 XL |
1064 | /// (somewhat independent from the actual size of the |
1065 | /// intersection, in particular). | |
1066 | pub fn intersect_rows(&self, row1: R, row2: R) -> Vec<C> { | |
1067 | assert!(row1.index() < self.num_rows && row2.index() < self.num_rows); | |
1068 | let (row1_start, row1_end) = self.range(row1); | |
1069 | let (row2_start, row2_end) = self.range(row2); | |
1070 | let mut result = Vec::with_capacity(self.num_columns); | |
1071 | for (base, (i, j)) in (row1_start..row1_end).zip(row2_start..row2_end).enumerate() { | |
1072 | let mut v = self.words[i] & self.words[j]; | |
1073 | for bit in 0..WORD_BITS { | |
1074 | if v == 0 { | |
1075 | break; | |
1076 | } | |
1077 | if v & 0x1 != 0 { | |
1078 | result.push(C::new(base * WORD_BITS + bit)); | |
1079 | } | |
1080 | v >>= 1; | |
1081 | } | |
1082 | } | |
1083 | result | |
1084 | } | |
1085 | ||
9fa01778 XL |
1086 | /// Adds the bits from row `read` to the bits from row `write`, and |
1087 | /// returns `true` if anything changed. | |
0bf4aa26 XL |
1088 | /// |
1089 | /// This is used when computing transitive reachability because if | |
1090 | /// you have an edge `write -> read`, because in that case | |
1091 | /// `write` can reach everything that `read` can (and | |
1092 | /// potentially more). | |
1093 | pub fn union_rows(&mut self, read: R, write: R) -> bool { | |
1094 | assert!(read.index() < self.num_rows && write.index() < self.num_rows); | |
1095 | let (read_start, read_end) = self.range(read); | |
1096 | let (write_start, write_end) = self.range(write); | |
1097 | let words = &mut self.words[..]; | |
1098 | let mut changed = false; | |
cdc7bbd5 | 1099 | for (read_index, write_index) in iter::zip(read_start..read_end, write_start..write_end) { |
0bf4aa26 XL |
1100 | let word = words[write_index]; |
1101 | let new_word = word | words[read_index]; | |
1102 | words[write_index] = new_word; | |
1103 | changed |= word != new_word; | |
1104 | } | |
1105 | changed | |
1106 | } | |
1107 | ||
dc9dc135 XL |
1108 | /// Adds the bits from `with` to the bits from row `write`, and |
1109 | /// returns `true` if anything changed. | |
1110 | pub fn union_row_with(&mut self, with: &BitSet<C>, write: R) -> bool { | |
1111 | assert!(write.index() < self.num_rows); | |
1112 | assert_eq!(with.domain_size(), self.num_columns); | |
1113 | let (write_start, write_end) = self.range(write); | |
1114 | let mut changed = false; | |
cdc7bbd5 | 1115 | for (read_index, write_index) in iter::zip(0..with.words().len(), write_start..write_end) { |
dc9dc135 XL |
1116 | let word = self.words[write_index]; |
1117 | let new_word = word | with.words()[read_index]; | |
1118 | self.words[write_index] = new_word; | |
1119 | changed |= word != new_word; | |
1120 | } | |
1121 | changed | |
1122 | } | |
1123 | ||
1124 | /// Sets every cell in `row` to true. | |
1125 | pub fn insert_all_into_row(&mut self, row: R) { | |
1126 | assert!(row.index() < self.num_rows); | |
1127 | let (start, end) = self.range(row); | |
3c0e092e XL |
1128 | let words = &mut self.words[..]; |
1129 | for index in start..end { | |
1130 | words[index] = !0; | |
dc9dc135 XL |
1131 | } |
1132 | self.clear_excess_bits(row); | |
1133 | } | |
1134 | ||
1135 | /// Clear excess bits in the final word of the row. | |
1136 | fn clear_excess_bits(&mut self, row: R) { | |
1137 | let num_bits_in_final_word = self.num_columns % WORD_BITS; | |
1138 | if num_bits_in_final_word > 0 { | |
1139 | let mask = (1 << num_bits_in_final_word) - 1; | |
1140 | let (_, end) = self.range(row); | |
1141 | let final_word_idx = end - 1; | |
1142 | self.words[final_word_idx] &= mask; | |
1143 | } | |
1144 | } | |
1145 | ||
1146 | /// Gets a slice of the underlying words. | |
1147 | pub fn words(&self) -> &[Word] { | |
1148 | &self.words | |
1149 | } | |
1150 | ||
0bf4aa26 XL |
1151 | /// Iterates through all the columns set to true in a given row of |
1152 | /// the matrix. | |
416331ca | 1153 | pub fn iter(&self, row: R) -> BitIter<'_, C> { |
0bf4aa26 XL |
1154 | assert!(row.index() < self.num_rows); |
1155 | let (start, end) = self.range(row); | |
e74abb32 | 1156 | BitIter::new(&self.words[start..end]) |
0bf4aa26 | 1157 | } |
dc9dc135 XL |
1158 | |
1159 | /// Returns the number of elements in `row`. | |
1160 | pub fn count(&self, row: R) -> usize { | |
1161 | let (start, end) = self.range(row); | |
1162 | self.words[start..end].iter().map(|e| e.count_ones() as usize).sum() | |
1163 | } | |
0bf4aa26 XL |
1164 | } |
1165 | ||
f035d41b XL |
1166 | impl<R: Idx, C: Idx> fmt::Debug for BitMatrix<R, C> { |
1167 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1168 | /// Forces its contents to print in regular mode instead of alternate mode. | |
1169 | struct OneLinePrinter<T>(T); | |
1170 | impl<T: fmt::Debug> fmt::Debug for OneLinePrinter<T> { | |
1171 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1172 | write!(fmt, "{:?}", self.0) | |
1173 | } | |
1174 | } | |
1175 | ||
1176 | write!(fmt, "BitMatrix({}x{}) ", self.num_rows, self.num_columns)?; | |
1177 | let items = self.rows().flat_map(|r| self.iter(r).map(move |c| (r, c))); | |
1178 | fmt.debug_set().entries(items.map(OneLinePrinter)).finish() | |
1179 | } | |
1180 | } | |
1181 | ||
0bf4aa26 XL |
1182 | /// A fixed-column-size, variable-row-size 2D bit matrix with a moderately |
1183 | /// sparse representation. | |
1184 | /// | |
1185 | /// Initially, every row has no explicit representation. If any bit within a | |
1186 | /// row is set, the entire row is instantiated as `Some(<HybridBitSet>)`. | |
1187 | /// Furthermore, any previously uninstantiated rows prior to it will be | |
1188 | /// instantiated as `None`. Those prior rows may themselves become fully | |
1189 | /// instantiated later on if any of their bits are set. | |
1190 | /// | |
1191 | /// `R` and `C` are index types used to identify rows and columns respectively; | |
1192 | /// typically newtyped `usize` wrappers, but they can also just be `usize`. | |
1193 | #[derive(Clone, Debug)] | |
1194 | pub struct SparseBitMatrix<R, C> | |
1195 | where | |
1196 | R: Idx, | |
1197 | C: Idx, | |
1198 | { | |
1199 | num_columns: usize, | |
1200 | rows: IndexVec<R, Option<HybridBitSet<C>>>, | |
1201 | } | |
1202 | ||
1203 | impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { | |
9fa01778 | 1204 | /// Creates a new empty sparse bit matrix with no rows or columns. |
0bf4aa26 | 1205 | pub fn new(num_columns: usize) -> Self { |
dfeec247 | 1206 | Self { num_columns, rows: IndexVec::new() } |
0bf4aa26 XL |
1207 | } |
1208 | ||
1209 | fn ensure_row(&mut self, row: R) -> &mut HybridBitSet<C> { | |
c295e0f8 | 1210 | // Instantiate any missing rows up to and including row `row` with an empty HybridBitSet. |
0bf4aa26 | 1211 | // Then replace row `row` with a full HybridBitSet if necessary. |
c295e0f8 | 1212 | self.rows.get_or_insert_with(row, || HybridBitSet::new_empty(self.num_columns)) |
0bf4aa26 XL |
1213 | } |
1214 | ||
1215 | /// Sets the cell at `(row, column)` to true. Put another way, insert | |
1216 | /// `column` to the bitset for `row`. | |
1217 | /// | |
9fa01778 | 1218 | /// Returns `true` if this changed the matrix. |
0bf4aa26 XL |
1219 | pub fn insert(&mut self, row: R, column: C) -> bool { |
1220 | self.ensure_row(row).insert(column) | |
1221 | } | |
1222 | ||
94222f64 XL |
1223 | /// Sets the cell at `(row, column)` to false. Put another way, delete |
1224 | /// `column` from the bitset for `row`. Has no effect if `row` does not | |
1225 | /// exist. | |
1226 | /// | |
1227 | /// Returns `true` if this changed the matrix. | |
1228 | pub fn remove(&mut self, row: R, column: C) -> bool { | |
1229 | match self.rows.get_mut(row) { | |
1230 | Some(Some(row)) => row.remove(column), | |
1231 | _ => false, | |
1232 | } | |
1233 | } | |
1234 | ||
1235 | /// Sets all columns at `row` to false. Has no effect if `row` does | |
1236 | /// not exist. | |
1237 | pub fn clear(&mut self, row: R) { | |
1238 | if let Some(Some(row)) = self.rows.get_mut(row) { | |
1239 | row.clear(); | |
1240 | } | |
1241 | } | |
1242 | ||
0bf4aa26 XL |
1243 | /// Do the bits from `row` contain `column`? Put another way, is |
1244 | /// the matrix cell at `(row, column)` true? Put yet another way, | |
1245 | /// if the matrix represents (transitive) reachability, can | |
1246 | /// `row` reach `column`? | |
1247 | pub fn contains(&self, row: R, column: C) -> bool { | |
1248 | self.row(row).map_or(false, |r| r.contains(column)) | |
1249 | } | |
1250 | ||
9fa01778 XL |
1251 | /// Adds the bits from row `read` to the bits from row `write`, and |
1252 | /// returns `true` if anything changed. | |
0bf4aa26 XL |
1253 | /// |
1254 | /// This is used when computing transitive reachability because if | |
1255 | /// you have an edge `write -> read`, because in that case | |
1256 | /// `write` can reach everything that `read` can (and | |
1257 | /// potentially more). | |
1258 | pub fn union_rows(&mut self, read: R, write: R) -> bool { | |
1259 | if read == write || self.row(read).is_none() { | |
1260 | return false; | |
1261 | } | |
1262 | ||
1263 | self.ensure_row(write); | |
1264 | if let (Some(read_row), Some(write_row)) = self.rows.pick2_mut(read, write) { | |
1265 | write_row.union(read_row) | |
1266 | } else { | |
1267 | unreachable!() | |
1268 | } | |
1269 | } | |
1270 | ||
0bf4aa26 XL |
1271 | /// Insert all bits in the given row. |
1272 | pub fn insert_all_into_row(&mut self, row: R) { | |
1273 | self.ensure_row(row).insert_all(); | |
1274 | } | |
1275 | ||
1276 | pub fn rows(&self) -> impl Iterator<Item = R> { | |
1277 | self.rows.indices() | |
1278 | } | |
1279 | ||
1280 | /// Iterates through all the columns set to true in a given row of | |
1281 | /// the matrix. | |
3c0e092e | 1282 | pub fn iter<'a>(&'a self, row: R) -> impl Iterator<Item = C> + 'a { |
0bf4aa26 XL |
1283 | self.row(row).into_iter().flat_map(|r| r.iter()) |
1284 | } | |
1285 | ||
1286 | pub fn row(&self, row: R) -> Option<&HybridBitSet<C>> { | |
dfeec247 | 1287 | if let Some(Some(row)) = self.rows.get(row) { Some(row) } else { None } |
0bf4aa26 | 1288 | } |
94222f64 XL |
1289 | |
1290 | /// Interescts `row` with `set`. `set` can be either `BitSet` or | |
1291 | /// `HybridBitSet`. Has no effect if `row` does not exist. | |
1292 | /// | |
1293 | /// Returns true if the row was changed. | |
1294 | pub fn intersect_row<Set>(&mut self, row: R, set: &Set) -> bool | |
1295 | where | |
1296 | HybridBitSet<C>: BitRelations<Set>, | |
1297 | { | |
1298 | match self.rows.get_mut(row) { | |
1299 | Some(Some(row)) => row.intersect(set), | |
1300 | _ => false, | |
1301 | } | |
1302 | } | |
1303 | ||
1304 | /// Subtracts `set from `row`. `set` can be either `BitSet` or | |
1305 | /// `HybridBitSet`. Has no effect if `row` does not exist. | |
1306 | /// | |
1307 | /// Returns true if the row was changed. | |
1308 | pub fn subtract_row<Set>(&mut self, row: R, set: &Set) -> bool | |
1309 | where | |
1310 | HybridBitSet<C>: BitRelations<Set>, | |
1311 | { | |
1312 | match self.rows.get_mut(row) { | |
1313 | Some(Some(row)) => row.subtract(set), | |
1314 | _ => false, | |
1315 | } | |
1316 | } | |
1317 | ||
1318 | /// Unions `row` with `set`. `set` can be either `BitSet` or | |
1319 | /// `HybridBitSet`. | |
1320 | /// | |
1321 | /// Returns true if the row was changed. | |
1322 | pub fn union_row<Set>(&mut self, row: R, set: &Set) -> bool | |
1323 | where | |
1324 | HybridBitSet<C>: BitRelations<Set>, | |
1325 | { | |
1326 | self.ensure_row(row).union(set) | |
1327 | } | |
0bf4aa26 XL |
1328 | } |
1329 | ||
1330 | #[inline] | |
1331 | fn num_words<T: Idx>(domain_size: T) -> usize { | |
1332 | (domain_size.index() + WORD_BITS - 1) / WORD_BITS | |
1333 | } | |
1334 | ||
1335 | #[inline] | |
1336 | fn word_index_and_mask<T: Idx>(elem: T) -> (usize, Word) { | |
1337 | let elem = elem.index(); | |
1338 | let word_index = elem / WORD_BITS; | |
1339 | let mask = 1 << (elem % WORD_BITS); | |
1340 | (word_index, mask) | |
1341 | } | |
3dfed10e | 1342 | |
3c0e092e XL |
1343 | #[inline] |
1344 | fn max_bit(word: Word) -> usize { | |
1345 | WORD_BITS - 1 - word.leading_zeros() as usize | |
1346 | } | |
1347 | ||
3dfed10e XL |
1348 | /// Integral type used to represent the bit set. |
1349 | pub trait FiniteBitSetTy: | |
1350 | BitAnd<Output = Self> | |
1351 | + BitAndAssign | |
1352 | + BitOrAssign | |
1353 | + Clone | |
1354 | + Copy | |
1355 | + Shl | |
1356 | + Not<Output = Self> | |
1357 | + PartialEq | |
1358 | + Sized | |
1359 | { | |
1360 | /// Size of the domain representable by this type, e.g. 64 for `u64`. | |
1361 | const DOMAIN_SIZE: u32; | |
1362 | ||
1363 | /// Value which represents the `FiniteBitSet` having every bit set. | |
1364 | const FILLED: Self; | |
1365 | /// Value which represents the `FiniteBitSet` having no bits set. | |
1366 | const EMPTY: Self; | |
1367 | ||
1368 | /// Value for one as the integral type. | |
1369 | const ONE: Self; | |
1370 | /// Value for zero as the integral type. | |
1371 | const ZERO: Self; | |
1372 | ||
1373 | /// Perform a checked left shift on the integral type. | |
1374 | fn checked_shl(self, rhs: u32) -> Option<Self>; | |
1375 | /// Perform a checked right shift on the integral type. | |
1376 | fn checked_shr(self, rhs: u32) -> Option<Self>; | |
1377 | } | |
1378 | ||
1379 | impl FiniteBitSetTy for u32 { | |
1380 | const DOMAIN_SIZE: u32 = 32; | |
1381 | ||
1382 | const FILLED: Self = Self::MAX; | |
1383 | const EMPTY: Self = Self::MIN; | |
1384 | ||
1385 | const ONE: Self = 1u32; | |
1386 | const ZERO: Self = 0u32; | |
1387 | ||
1388 | fn checked_shl(self, rhs: u32) -> Option<Self> { | |
1389 | self.checked_shl(rhs) | |
1390 | } | |
1391 | ||
1392 | fn checked_shr(self, rhs: u32) -> Option<Self> { | |
1393 | self.checked_shr(rhs) | |
1394 | } | |
1395 | } | |
1396 | ||
1397 | impl std::fmt::Debug for FiniteBitSet<u32> { | |
1398 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1399 | write!(f, "{:032b}", self.0) | |
1400 | } | |
1401 | } | |
1402 | ||
1403 | impl FiniteBitSetTy for u64 { | |
1404 | const DOMAIN_SIZE: u32 = 64; | |
1405 | ||
1406 | const FILLED: Self = Self::MAX; | |
1407 | const EMPTY: Self = Self::MIN; | |
1408 | ||
1409 | const ONE: Self = 1u64; | |
1410 | const ZERO: Self = 0u64; | |
1411 | ||
1412 | fn checked_shl(self, rhs: u32) -> Option<Self> { | |
1413 | self.checked_shl(rhs) | |
1414 | } | |
1415 | ||
1416 | fn checked_shr(self, rhs: u32) -> Option<Self> { | |
1417 | self.checked_shr(rhs) | |
1418 | } | |
1419 | } | |
1420 | ||
1421 | impl std::fmt::Debug for FiniteBitSet<u64> { | |
1422 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1423 | write!(f, "{:064b}", self.0) | |
1424 | } | |
1425 | } | |
1426 | ||
1427 | impl FiniteBitSetTy for u128 { | |
1428 | const DOMAIN_SIZE: u32 = 128; | |
1429 | ||
1430 | const FILLED: Self = Self::MAX; | |
1431 | const EMPTY: Self = Self::MIN; | |
1432 | ||
1433 | const ONE: Self = 1u128; | |
1434 | const ZERO: Self = 0u128; | |
1435 | ||
1436 | fn checked_shl(self, rhs: u32) -> Option<Self> { | |
1437 | self.checked_shl(rhs) | |
1438 | } | |
1439 | ||
1440 | fn checked_shr(self, rhs: u32) -> Option<Self> { | |
1441 | self.checked_shr(rhs) | |
1442 | } | |
1443 | } | |
1444 | ||
1445 | impl std::fmt::Debug for FiniteBitSet<u128> { | |
1446 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1447 | write!(f, "{:0128b}", self.0) | |
1448 | } | |
1449 | } | |
1450 | ||
1451 | /// A fixed-sized bitset type represented by an integer type. Indices outwith than the range | |
1452 | /// representable by `T` are considered set. | |
1453 | #[derive(Copy, Clone, Eq, PartialEq, Decodable, Encodable)] | |
1454 | pub struct FiniteBitSet<T: FiniteBitSetTy>(pub T); | |
1455 | ||
1456 | impl<T: FiniteBitSetTy> FiniteBitSet<T> { | |
1457 | /// Creates a new, empty bitset. | |
1458 | pub fn new_empty() -> Self { | |
1459 | Self(T::EMPTY) | |
1460 | } | |
1461 | ||
1462 | /// Sets the `index`th bit. | |
1463 | pub fn set(&mut self, index: u32) { | |
1464 | self.0 |= T::ONE.checked_shl(index).unwrap_or(T::ZERO); | |
1465 | } | |
1466 | ||
1467 | /// Unsets the `index`th bit. | |
1468 | pub fn clear(&mut self, index: u32) { | |
1469 | self.0 &= !T::ONE.checked_shl(index).unwrap_or(T::ZERO); | |
1470 | } | |
1471 | ||
1472 | /// Sets the `i`th to `j`th bits. | |
1473 | pub fn set_range(&mut self, range: Range<u32>) { | |
1474 | let bits = T::FILLED | |
1475 | .checked_shl(range.end - range.start) | |
1476 | .unwrap_or(T::ZERO) | |
1477 | .not() | |
1478 | .checked_shl(range.start) | |
1479 | .unwrap_or(T::ZERO); | |
1480 | self.0 |= bits; | |
1481 | } | |
1482 | ||
1483 | /// Is the set empty? | |
1484 | pub fn is_empty(&self) -> bool { | |
1485 | self.0 == T::EMPTY | |
1486 | } | |
1487 | ||
1488 | /// Returns the domain size of the bitset. | |
1489 | pub fn within_domain(&self, index: u32) -> bool { | |
1490 | index < T::DOMAIN_SIZE | |
1491 | } | |
1492 | ||
1493 | /// Returns if the `index`th bit is set. | |
1494 | pub fn contains(&self, index: u32) -> Option<bool> { | |
1495 | self.within_domain(index) | |
1496 | .then(|| ((self.0.checked_shr(index).unwrap_or(T::ONE)) & T::ONE) == T::ONE) | |
1497 | } | |
1498 | } | |
1499 | ||
1500 | impl<T: FiniteBitSetTy> Default for FiniteBitSet<T> { | |
1501 | fn default() -> Self { | |
1502 | Self::new_empty() | |
1503 | } | |
1504 | } |