]>
Commit | Line | Data |
---|---|---|
e74abb32 | 1 | use crate::vec::{Idx, IndexVec}; |
3dfed10e | 2 | use arrayvec::ArrayVec; |
0bf4aa26 XL |
3 | use std::fmt; |
4 | use std::iter; | |
5 | use std::marker::PhantomData; | |
6 | use std::mem; | |
3dfed10e | 7 | use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Not, Range, Shl}; |
0bf4aa26 | 8 | use std::slice; |
416331ca | 9 | |
3dfed10e XL |
10 | use rustc_macros::{Decodable, Encodable}; |
11 | ||
dc9dc135 | 12 | #[cfg(test)] |
416331ca | 13 | mod tests; |
0bf4aa26 XL |
14 | |
15 | pub type Word = u64; | |
16 | pub const WORD_BYTES: usize = mem::size_of::<Word>(); | |
17 | pub const WORD_BITS: usize = WORD_BYTES * 8; | |
18 | ||
94222f64 XL |
19 | pub trait BitRelations<Rhs> { |
20 | fn union(&mut self, other: &Rhs) -> bool; | |
21 | fn subtract(&mut self, other: &Rhs) -> bool; | |
22 | fn intersect(&mut self, other: &Rhs) -> bool; | |
23 | } | |
24 | ||
25 | macro_rules! bit_relations_inherent_impls { | |
26 | () => { | |
27 | /// Sets `self = self | other` and returns `true` if `self` changed | |
28 | /// (i.e., if new bits were added). | |
29 | pub fn union<Rhs>(&mut self, other: &Rhs) -> bool | |
30 | where | |
31 | Self: BitRelations<Rhs>, | |
32 | { | |
33 | <Self as BitRelations<Rhs>>::union(self, other) | |
34 | } | |
35 | ||
36 | /// Sets `self = self - other` and returns `true` if `self` changed. | |
37 | /// (i.e., if any bits were removed). | |
38 | pub fn subtract<Rhs>(&mut self, other: &Rhs) -> bool | |
39 | where | |
40 | Self: BitRelations<Rhs>, | |
41 | { | |
42 | <Self as BitRelations<Rhs>>::subtract(self, other) | |
43 | } | |
44 | ||
45 | /// Sets `self = self & other` and return `true` if `self` changed. | |
46 | /// (i.e., if any bits were removed). | |
47 | pub fn intersect<Rhs>(&mut self, other: &Rhs) -> bool | |
48 | where | |
49 | Self: BitRelations<Rhs>, | |
50 | { | |
51 | <Self as BitRelations<Rhs>>::intersect(self, other) | |
52 | } | |
53 | }; | |
54 | } | |
55 | ||
e74abb32 XL |
56 | /// A fixed-size bitset type with a dense representation. |
57 | /// | |
58 | /// NOTE: Use [`GrowableBitSet`] if you need support for resizing after creation. | |
0bf4aa26 XL |
59 | /// |
60 | /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also | |
61 | /// just be `usize`. | |
62 | /// | |
63 | /// All operations that involve an element will panic if the element is equal | |
64 | /// to or greater than the domain size. All operations that involve two bitsets | |
65 | /// will panic if the bitsets have differing domain sizes. | |
e74abb32 | 66 | /// |
1b1a35ee XL |
67 | #[derive(Eq, PartialEq, Decodable, Encodable)] |
68 | pub struct BitSet<T> { | |
0bf4aa26 XL |
69 | domain_size: usize, |
70 | words: Vec<Word>, | |
71 | marker: PhantomData<T>, | |
72 | } | |
73 | ||
1b1a35ee XL |
74 | impl<T> BitSet<T> { |
75 | /// Gets the domain size. | |
76 | pub fn domain_size(&self) -> usize { | |
77 | self.domain_size | |
78 | } | |
79 | } | |
80 | ||
0bf4aa26 | 81 | impl<T: Idx> BitSet<T> { |
9fa01778 | 82 | /// Creates a new, empty bitset with a given `domain_size`. |
0bf4aa26 XL |
83 | #[inline] |
84 | pub fn new_empty(domain_size: usize) -> BitSet<T> { | |
85 | let num_words = num_words(domain_size); | |
dfeec247 | 86 | BitSet { domain_size, words: vec![0; num_words], marker: PhantomData } |
0bf4aa26 XL |
87 | } |
88 | ||
9fa01778 | 89 | /// Creates a new, filled bitset with a given `domain_size`. |
0bf4aa26 XL |
90 | #[inline] |
91 | pub fn new_filled(domain_size: usize) -> BitSet<T> { | |
92 | let num_words = num_words(domain_size); | |
dfeec247 | 93 | let mut result = BitSet { domain_size, words: vec![!0; num_words], marker: PhantomData }; |
0bf4aa26 XL |
94 | result.clear_excess_bits(); |
95 | result | |
96 | } | |
97 | ||
0bf4aa26 XL |
98 | /// Clear all elements. |
99 | #[inline] | |
100 | pub fn clear(&mut self) { | |
101 | for word in &mut self.words { | |
102 | *word = 0; | |
103 | } | |
104 | } | |
105 | ||
106 | /// Clear excess bits in the final word. | |
107 | fn clear_excess_bits(&mut self) { | |
108 | let num_bits_in_final_word = self.domain_size % WORD_BITS; | |
109 | if num_bits_in_final_word > 0 { | |
110 | let mask = (1 << num_bits_in_final_word) - 1; | |
111 | let final_word_idx = self.words.len() - 1; | |
112 | self.words[final_word_idx] &= mask; | |
113 | } | |
114 | } | |
115 | ||
0bf4aa26 XL |
116 | /// Count the number of set bits in the set. |
117 | pub fn count(&self) -> usize { | |
118 | self.words.iter().map(|e| e.count_ones() as usize).sum() | |
119 | } | |
120 | ||
9fa01778 | 121 | /// Returns `true` if `self` contains `elem`. |
0bf4aa26 XL |
122 | #[inline] |
123 | pub fn contains(&self, elem: T) -> bool { | |
124 | assert!(elem.index() < self.domain_size); | |
125 | let (word_index, mask) = word_index_and_mask(elem); | |
126 | (self.words[word_index] & mask) != 0 | |
127 | } | |
128 | ||
129 | /// Is `self` is a (non-strict) superset of `other`? | |
130 | #[inline] | |
131 | pub fn superset(&self, other: &BitSet<T>) -> bool { | |
132 | assert_eq!(self.domain_size, other.domain_size); | |
133 | self.words.iter().zip(&other.words).all(|(a, b)| (a & b) == *b) | |
134 | } | |
135 | ||
136 | /// Is the set empty? | |
137 | #[inline] | |
138 | pub fn is_empty(&self) -> bool { | |
139 | self.words.iter().all(|a| *a == 0) | |
140 | } | |
141 | ||
9fa01778 | 142 | /// Insert `elem`. Returns whether the set has changed. |
0bf4aa26 XL |
143 | #[inline] |
144 | pub fn insert(&mut self, elem: T) -> bool { | |
145 | assert!(elem.index() < self.domain_size); | |
146 | let (word_index, mask) = word_index_and_mask(elem); | |
147 | let word_ref = &mut self.words[word_index]; | |
148 | let word = *word_ref; | |
149 | let new_word = word | mask; | |
150 | *word_ref = new_word; | |
151 | new_word != word | |
152 | } | |
153 | ||
154 | /// Sets all bits to true. | |
155 | pub fn insert_all(&mut self) { | |
156 | for word in &mut self.words { | |
157 | *word = !0; | |
158 | } | |
159 | self.clear_excess_bits(); | |
160 | } | |
161 | ||
9fa01778 | 162 | /// Returns `true` if the set has changed. |
0bf4aa26 XL |
163 | #[inline] |
164 | pub fn remove(&mut self, elem: T) -> bool { | |
165 | assert!(elem.index() < self.domain_size); | |
166 | let (word_index, mask) = word_index_and_mask(elem); | |
167 | let word_ref = &mut self.words[word_index]; | |
168 | let word = *word_ref; | |
169 | let new_word = word & !mask; | |
170 | *word_ref = new_word; | |
171 | new_word != word | |
172 | } | |
173 | ||
9fa01778 | 174 | /// Gets a slice of the underlying words. |
0bf4aa26 XL |
175 | pub fn words(&self) -> &[Word] { |
176 | &self.words | |
177 | } | |
178 | ||
179 | /// Iterates over the indices of set bits in a sorted order. | |
180 | #[inline] | |
416331ca | 181 | pub fn iter(&self) -> BitIter<'_, T> { |
e74abb32 | 182 | BitIter::new(&self.words) |
0bf4aa26 XL |
183 | } |
184 | ||
185 | /// Duplicates the set as a hybrid set. | |
186 | pub fn to_hybrid(&self) -> HybridBitSet<T> { | |
187 | // Note: we currently don't bother trying to make a Sparse set. | |
188 | HybridBitSet::Dense(self.to_owned()) | |
189 | } | |
dc9dc135 XL |
190 | |
191 | /// Set `self = self | other`. In contrast to `union` returns `true` if the set contains at | |
192 | /// least one bit that is not in `other` (i.e. `other` is not a superset of `self`). | |
193 | /// | |
194 | /// This is an optimization for union of a hybrid bitset. | |
195 | fn reverse_union_sparse(&mut self, sparse: &SparseBitSet<T>) -> bool { | |
196 | assert!(sparse.domain_size == self.domain_size); | |
197 | self.clear_excess_bits(); | |
198 | ||
199 | let mut not_already = false; | |
200 | // Index of the current word not yet merged. | |
201 | let mut current_index = 0; | |
202 | // Mask of bits that came from the sparse set in the current word. | |
203 | let mut new_bit_mask = 0; | |
204 | for (word_index, mask) in sparse.iter().map(|x| word_index_and_mask(*x)) { | |
205 | // Next bit is in a word not inspected yet. | |
206 | if word_index > current_index { | |
207 | self.words[current_index] |= new_bit_mask; | |
208 | // Were there any bits in the old word that did not occur in the sparse set? | |
209 | not_already |= (self.words[current_index] ^ new_bit_mask) != 0; | |
210 | // Check all words we skipped for any set bit. | |
dfeec247 | 211 | not_already |= self.words[current_index + 1..word_index].iter().any(|&x| x != 0); |
dc9dc135 XL |
212 | // Update next word. |
213 | current_index = word_index; | |
214 | // Reset bit mask, no bits have been merged yet. | |
215 | new_bit_mask = 0; | |
216 | } | |
217 | // Add bit and mark it as coming from the sparse set. | |
218 | // self.words[word_index] |= mask; | |
219 | new_bit_mask |= mask; | |
220 | } | |
221 | self.words[current_index] |= new_bit_mask; | |
222 | // Any bits in the last inspected word that were not in the sparse set? | |
223 | not_already |= (self.words[current_index] ^ new_bit_mask) != 0; | |
224 | // Any bits in the tail? Note `clear_excess_bits` before. | |
dfeec247 | 225 | not_already |= self.words[current_index + 1..].iter().any(|&x| x != 0); |
dc9dc135 XL |
226 | |
227 | not_already | |
228 | } | |
94222f64 XL |
229 | |
230 | bit_relations_inherent_impls! {} | |
0bf4aa26 XL |
231 | } |
232 | ||
94222f64 XL |
233 | // dense REL dense |
234 | impl<T: Idx> BitRelations<BitSet<T>> for BitSet<T> { | |
235 | fn union(&mut self, other: &BitSet<T>) -> bool { | |
236 | assert_eq!(self.domain_size, other.domain_size); | |
237 | bitwise(&mut self.words, &other.words, |a, b| a | b) | |
238 | } | |
239 | ||
240 | fn subtract(&mut self, other: &BitSet<T>) -> bool { | |
241 | assert_eq!(self.domain_size, other.domain_size); | |
242 | bitwise(&mut self.words, &other.words, |a, b| a & !b) | |
243 | } | |
244 | ||
245 | fn intersect(&mut self, other: &BitSet<T>) -> bool { | |
246 | assert_eq!(self.domain_size, other.domain_size); | |
247 | bitwise(&mut self.words, &other.words, |a, b| a & b) | |
248 | } | |
0bf4aa26 XL |
249 | } |
250 | ||
94222f64 XL |
251 | // Applies a function to mutate a bitset, and returns true if any |
252 | // of the applications return true | |
253 | fn sequential_update<T: Idx>( | |
254 | mut self_update: impl FnMut(T) -> bool, | |
255 | it: impl Iterator<Item = T>, | |
256 | ) -> bool { | |
257 | let mut changed = false; | |
258 | for elem in it { | |
259 | changed |= self_update(elem); | |
260 | } | |
261 | changed | |
0bf4aa26 XL |
262 | } |
263 | ||
94222f64 XL |
264 | // Optimization of intersection for SparseBitSet that's generic |
265 | // over the RHS | |
266 | fn sparse_intersect<T: Idx>( | |
267 | set: &mut SparseBitSet<T>, | |
268 | other_contains: impl Fn(&T) -> bool, | |
269 | ) -> bool { | |
270 | let size = set.elems.len(); | |
271 | set.elems.retain(|elem| other_contains(elem)); | |
272 | set.elems.len() != size | |
273 | } | |
274 | ||
275 | // Optimization of dense/sparse intersection. The resulting set is | |
276 | // guaranteed to be at most the size of the sparse set, and hence can be | |
277 | // represented as a sparse set. Therefore the sparse set is copied and filtered, | |
278 | // then returned as the new set. | |
279 | fn dense_sparse_intersect<T: Idx>( | |
280 | dense: &BitSet<T>, | |
281 | sparse: &SparseBitSet<T>, | |
282 | ) -> (SparseBitSet<T>, bool) { | |
283 | let mut sparse_copy = sparse.clone(); | |
284 | sparse_intersect(&mut sparse_copy, |el| dense.contains(*el)); | |
285 | let n = sparse_copy.len(); | |
286 | (sparse_copy, n != dense.count()) | |
287 | } | |
288 | ||
289 | // hybrid REL dense | |
290 | impl<T: Idx> BitRelations<BitSet<T>> for HybridBitSet<T> { | |
291 | fn union(&mut self, other: &BitSet<T>) -> bool { | |
292 | assert_eq!(self.domain_size(), other.domain_size); | |
293 | match self { | |
294 | HybridBitSet::Sparse(sparse) => { | |
295 | // `self` is sparse and `other` is dense. To | |
296 | // merge them, we have two available strategies: | |
297 | // * Densify `self` then merge other | |
298 | // * Clone other then integrate bits from `self` | |
299 | // The second strategy requires dedicated method | |
300 | // since the usual `union` returns the wrong | |
301 | // result. In the dedicated case the computation | |
302 | // is slightly faster if the bits of the sparse | |
303 | // bitset map to only few words of the dense | |
304 | // representation, i.e. indices are near each | |
305 | // other. | |
306 | // | |
307 | // Benchmarking seems to suggest that the second | |
308 | // option is worth it. | |
309 | let mut new_dense = other.clone(); | |
310 | let changed = new_dense.reverse_union_sparse(sparse); | |
311 | *self = HybridBitSet::Dense(new_dense); | |
312 | changed | |
313 | } | |
314 | ||
315 | HybridBitSet::Dense(dense) => dense.union(other), | |
316 | } | |
317 | } | |
318 | ||
319 | fn subtract(&mut self, other: &BitSet<T>) -> bool { | |
320 | assert_eq!(self.domain_size(), other.domain_size); | |
321 | match self { | |
322 | HybridBitSet::Sparse(sparse) => { | |
323 | sequential_update(|elem| sparse.remove(elem), other.iter()) | |
324 | } | |
325 | HybridBitSet::Dense(dense) => dense.subtract(other), | |
326 | } | |
327 | } | |
328 | ||
329 | fn intersect(&mut self, other: &BitSet<T>) -> bool { | |
330 | assert_eq!(self.domain_size(), other.domain_size); | |
331 | match self { | |
332 | HybridBitSet::Sparse(sparse) => sparse_intersect(sparse, |elem| other.contains(*elem)), | |
333 | HybridBitSet::Dense(dense) => dense.intersect(other), | |
334 | } | |
0bf4aa26 XL |
335 | } |
336 | } | |
337 | ||
94222f64 XL |
338 | // dense REL hybrid |
339 | impl<T: Idx> BitRelations<HybridBitSet<T>> for BitSet<T> { | |
340 | fn union(&mut self, other: &HybridBitSet<T>) -> bool { | |
341 | assert_eq!(self.domain_size, other.domain_size()); | |
342 | match other { | |
343 | HybridBitSet::Sparse(sparse) => { | |
344 | sequential_update(|elem| self.insert(elem), sparse.iter().cloned()) | |
345 | } | |
346 | HybridBitSet::Dense(dense) => self.union(dense), | |
347 | } | |
348 | } | |
349 | ||
350 | fn subtract(&mut self, other: &HybridBitSet<T>) -> bool { | |
351 | assert_eq!(self.domain_size, other.domain_size()); | |
352 | match other { | |
353 | HybridBitSet::Sparse(sparse) => { | |
354 | sequential_update(|elem| self.remove(elem), sparse.iter().cloned()) | |
355 | } | |
356 | HybridBitSet::Dense(dense) => self.subtract(dense), | |
357 | } | |
358 | } | |
359 | ||
360 | fn intersect(&mut self, other: &HybridBitSet<T>) -> bool { | |
361 | assert_eq!(self.domain_size, other.domain_size()); | |
362 | match other { | |
363 | HybridBitSet::Sparse(sparse) => { | |
364 | let (updated, changed) = dense_sparse_intersect(self, sparse); | |
365 | ||
366 | // We can't directly assign the SparseBitSet to the BitSet, and | |
367 | // doing `*self = updated.to_dense()` would cause a drop / reallocation. Instead, | |
368 | // the BitSet is cleared and `updated` is copied into `self`. | |
369 | self.clear(); | |
370 | for elem in updated.iter() { | |
371 | self.insert(*elem); | |
372 | } | |
373 | changed | |
374 | } | |
375 | HybridBitSet::Dense(dense) => self.intersect(dense), | |
376 | } | |
377 | } | |
378 | } | |
379 | ||
380 | // hybrid REL hybrid | |
381 | impl<T: Idx> BitRelations<HybridBitSet<T>> for HybridBitSet<T> { | |
382 | fn union(&mut self, other: &HybridBitSet<T>) -> bool { | |
383 | assert_eq!(self.domain_size(), other.domain_size()); | |
384 | match self { | |
385 | HybridBitSet::Sparse(_) => { | |
386 | match other { | |
387 | HybridBitSet::Sparse(other_sparse) => { | |
388 | // Both sets are sparse. Add the elements in | |
389 | // `other_sparse` to `self` one at a time. This | |
390 | // may or may not cause `self` to be densified. | |
391 | let mut changed = false; | |
392 | for elem in other_sparse.iter() { | |
393 | changed |= self.insert(*elem); | |
394 | } | |
395 | changed | |
396 | } | |
397 | ||
398 | HybridBitSet::Dense(other_dense) => self.union(other_dense), | |
399 | } | |
400 | } | |
401 | ||
402 | HybridBitSet::Dense(self_dense) => self_dense.union(other), | |
403 | } | |
404 | } | |
405 | ||
406 | fn subtract(&mut self, other: &HybridBitSet<T>) -> bool { | |
407 | assert_eq!(self.domain_size(), other.domain_size()); | |
408 | match self { | |
409 | HybridBitSet::Sparse(self_sparse) => { | |
410 | sequential_update(|elem| self_sparse.remove(elem), other.iter()) | |
411 | } | |
412 | HybridBitSet::Dense(self_dense) => self_dense.subtract(other), | |
413 | } | |
414 | } | |
415 | ||
416 | fn intersect(&mut self, other: &HybridBitSet<T>) -> bool { | |
417 | assert_eq!(self.domain_size(), other.domain_size()); | |
418 | match self { | |
419 | HybridBitSet::Sparse(self_sparse) => { | |
420 | sparse_intersect(self_sparse, |elem| other.contains(*elem)) | |
421 | } | |
422 | HybridBitSet::Dense(self_dense) => match other { | |
423 | HybridBitSet::Sparse(other_sparse) => { | |
424 | let (updated, changed) = dense_sparse_intersect(self_dense, other_sparse); | |
425 | *self = HybridBitSet::Sparse(updated); | |
426 | changed | |
427 | } | |
428 | HybridBitSet::Dense(other_dense) => self_dense.intersect(other_dense), | |
429 | }, | |
430 | } | |
0bf4aa26 XL |
431 | } |
432 | } | |
433 | ||
1b1a35ee XL |
434 | impl<T> Clone for BitSet<T> { |
435 | fn clone(&self) -> Self { | |
436 | BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData } | |
437 | } | |
438 | ||
439 | fn clone_from(&mut self, from: &Self) { | |
440 | if self.domain_size != from.domain_size { | |
441 | self.words.resize(from.domain_size, 0); | |
442 | self.domain_size = from.domain_size; | |
443 | } | |
444 | ||
445 | self.words.copy_from_slice(&from.words); | |
446 | } | |
447 | } | |
448 | ||
0bf4aa26 | 449 | impl<T: Idx> fmt::Debug for BitSet<T> { |
9fa01778 | 450 | fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { |
dfeec247 | 451 | w.debug_list().entries(self.iter()).finish() |
0bf4aa26 XL |
452 | } |
453 | } | |
454 | ||
455 | impl<T: Idx> ToString for BitSet<T> { | |
456 | fn to_string(&self) -> String { | |
457 | let mut result = String::new(); | |
458 | let mut sep = '['; | |
459 | ||
460 | // Note: this is a little endian printout of bytes. | |
461 | ||
462 | // i tracks how many bits we have printed so far. | |
463 | let mut i = 0; | |
464 | for word in &self.words { | |
465 | let mut word = *word; | |
dfeec247 XL |
466 | for _ in 0..WORD_BYTES { |
467 | // for each byte in `word`: | |
0bf4aa26 XL |
468 | let remain = self.domain_size - i; |
469 | // If less than a byte remains, then mask just that many bits. | |
470 | let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF }; | |
471 | assert!(mask <= 0xFF); | |
472 | let byte = word & mask; | |
473 | ||
474 | result.push_str(&format!("{}{:02x}", sep, byte)); | |
475 | ||
dfeec247 XL |
476 | if remain <= 8 { |
477 | break; | |
478 | } | |
0bf4aa26 XL |
479 | word >>= 8; |
480 | i += 8; | |
481 | sep = '-'; | |
482 | } | |
483 | sep = '|'; | |
484 | } | |
485 | result.push(']'); | |
486 | ||
487 | result | |
488 | } | |
489 | } | |
490 | ||
491 | pub struct BitIter<'a, T: Idx> { | |
e74abb32 XL |
492 | /// A copy of the current word, but with any already-visited bits cleared. |
493 | /// (This lets us use `trailing_zeros()` to find the next set bit.) When it | |
494 | /// is reduced to 0, we move onto the next word. | |
495 | word: Word, | |
496 | ||
497 | /// The offset (measured in bits) of the current word. | |
498 | offset: usize, | |
499 | ||
500 | /// Underlying iterator over the words. | |
501 | iter: slice::Iter<'a, Word>, | |
502 | ||
dfeec247 | 503 | marker: PhantomData<T>, |
0bf4aa26 XL |
504 | } |
505 | ||
e74abb32 XL |
506 | impl<'a, T: Idx> BitIter<'a, T> { |
507 | #[inline] | |
508 | fn new(words: &'a [Word]) -> BitIter<'a, T> { | |
509 | // We initialize `word` and `offset` to degenerate values. On the first | |
510 | // call to `next()` we will fall through to getting the first word from | |
511 | // `iter`, which sets `word` to the first word (if there is one) and | |
512 | // `offset` to 0. Doing it this way saves us from having to maintain | |
513 | // additional state about whether we have started. | |
514 | BitIter { | |
515 | word: 0, | |
ba9703b0 | 516 | offset: usize::MAX - (WORD_BITS - 1), |
e74abb32 XL |
517 | iter: words.iter(), |
518 | marker: PhantomData, | |
519 | } | |
520 | } | |
521 | } | |
522 | ||
0bf4aa26 XL |
523 | impl<'a, T: Idx> Iterator for BitIter<'a, T> { |
524 | type Item = T; | |
525 | fn next(&mut self) -> Option<T> { | |
526 | loop { | |
e74abb32 XL |
527 | if self.word != 0 { |
528 | // Get the position of the next set bit in the current word, | |
529 | // then clear the bit. | |
530 | let bit_pos = self.word.trailing_zeros() as usize; | |
531 | let bit = 1 << bit_pos; | |
532 | self.word ^= bit; | |
dfeec247 | 533 | return Some(T::new(bit_pos + self.offset)); |
0bf4aa26 XL |
534 | } |
535 | ||
e74abb32 XL |
536 | // Move onto the next word. `wrapping_add()` is needed to handle |
537 | // the degenerate initial value given to `offset` in `new()`. | |
538 | let word = self.iter.next()?; | |
539 | self.word = *word; | |
540 | self.offset = self.offset.wrapping_add(WORD_BITS); | |
0bf4aa26 XL |
541 | } |
542 | } | |
543 | } | |
544 | ||
0bf4aa26 XL |
545 | #[inline] |
546 | fn bitwise<Op>(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool | |
dfeec247 XL |
547 | where |
548 | Op: Fn(Word, Word) -> Word, | |
0bf4aa26 XL |
549 | { |
550 | assert_eq!(out_vec.len(), in_vec.len()); | |
17df50a5 | 551 | let mut changed = 0; |
cdc7bbd5 | 552 | for (out_elem, in_elem) in iter::zip(out_vec, in_vec) { |
0bf4aa26 XL |
553 | let old_val = *out_elem; |
554 | let new_val = op(old_val, *in_elem); | |
555 | *out_elem = new_val; | |
17df50a5 XL |
556 | // This is essentially equivalent to a != with changed being a bool, but |
557 | // in practice this code gets auto-vectorized by the compiler for most | |
558 | // operators. Using != here causes us to generate quite poor code as the | |
559 | // compiler tries to go back to a boolean on each loop iteration. | |
560 | changed |= old_val ^ new_val; | |
0bf4aa26 | 561 | } |
17df50a5 | 562 | changed != 0 |
0bf4aa26 XL |
563 | } |
564 | ||
565 | const SPARSE_MAX: usize = 8; | |
566 | ||
567 | /// A fixed-size bitset type with a sparse representation and a maximum of | |
3dfed10e XL |
568 | /// `SPARSE_MAX` elements. The elements are stored as a sorted `ArrayVec` with |
569 | /// no duplicates. | |
0bf4aa26 XL |
570 | /// |
571 | /// This type is used by `HybridBitSet`; do not use directly. | |
572 | #[derive(Clone, Debug)] | |
1b1a35ee | 573 | pub struct SparseBitSet<T> { |
0bf4aa26 | 574 | domain_size: usize, |
cdc7bbd5 | 575 | elems: ArrayVec<T, SPARSE_MAX>, |
0bf4aa26 XL |
576 | } |
577 | ||
578 | impl<T: Idx> SparseBitSet<T> { | |
579 | fn new_empty(domain_size: usize) -> Self { | |
3dfed10e | 580 | SparseBitSet { domain_size, elems: ArrayVec::new() } |
0bf4aa26 XL |
581 | } |
582 | ||
583 | fn len(&self) -> usize { | |
584 | self.elems.len() | |
585 | } | |
586 | ||
587 | fn is_empty(&self) -> bool { | |
588 | self.elems.len() == 0 | |
589 | } | |
590 | ||
591 | fn contains(&self, elem: T) -> bool { | |
592 | assert!(elem.index() < self.domain_size); | |
593 | self.elems.contains(&elem) | |
594 | } | |
595 | ||
596 | fn insert(&mut self, elem: T) -> bool { | |
597 | assert!(elem.index() < self.domain_size); | |
598 | let changed = if let Some(i) = self.elems.iter().position(|&e| e >= elem) { | |
599 | if self.elems[i] == elem { | |
600 | // `elem` is already in the set. | |
601 | false | |
602 | } else { | |
603 | // `elem` is smaller than one or more existing elements. | |
604 | self.elems.insert(i, elem); | |
605 | true | |
606 | } | |
607 | } else { | |
608 | // `elem` is larger than all existing elements. | |
609 | self.elems.push(elem); | |
610 | true | |
611 | }; | |
612 | assert!(self.len() <= SPARSE_MAX); | |
613 | changed | |
614 | } | |
615 | ||
616 | fn remove(&mut self, elem: T) -> bool { | |
617 | assert!(elem.index() < self.domain_size); | |
618 | if let Some(i) = self.elems.iter().position(|&e| e == elem) { | |
619 | self.elems.remove(i); | |
620 | true | |
621 | } else { | |
622 | false | |
623 | } | |
624 | } | |
625 | ||
626 | fn to_dense(&self) -> BitSet<T> { | |
627 | let mut dense = BitSet::new_empty(self.domain_size); | |
628 | for elem in self.elems.iter() { | |
629 | dense.insert(*elem); | |
630 | } | |
631 | dense | |
632 | } | |
633 | ||
9fa01778 | 634 | fn iter(&self) -> slice::Iter<'_, T> { |
0bf4aa26 XL |
635 | self.elems.iter() |
636 | } | |
0bf4aa26 | 637 | |
94222f64 | 638 | bit_relations_inherent_impls! {} |
0bf4aa26 XL |
639 | } |
640 | ||
641 | /// A fixed-size bitset type with a hybrid representation: sparse when there | |
642 | /// are up to a `SPARSE_MAX` elements in the set, but dense when there are more | |
643 | /// than `SPARSE_MAX`. | |
644 | /// | |
645 | /// This type is especially efficient for sets that typically have a small | |
646 | /// number of elements, but a large `domain_size`, and are cleared frequently. | |
647 | /// | |
648 | /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also | |
649 | /// just be `usize`. | |
650 | /// | |
651 | /// All operations that involve an element will panic if the element is equal | |
652 | /// to or greater than the domain size. All operations that involve two bitsets | |
653 | /// will panic if the bitsets have differing domain sizes. | |
1b1a35ee XL |
654 | #[derive(Clone)] |
655 | pub enum HybridBitSet<T> { | |
0bf4aa26 XL |
656 | Sparse(SparseBitSet<T>), |
657 | Dense(BitSet<T>), | |
658 | } | |
659 | ||
1b1a35ee XL |
660 | impl<T: Idx> fmt::Debug for HybridBitSet<T> { |
661 | fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { | |
662 | match self { | |
663 | Self::Sparse(b) => b.fmt(w), | |
664 | Self::Dense(b) => b.fmt(w), | |
665 | } | |
666 | } | |
667 | } | |
668 | ||
0bf4aa26 XL |
669 | impl<T: Idx> HybridBitSet<T> { |
670 | pub fn new_empty(domain_size: usize) -> Self { | |
671 | HybridBitSet::Sparse(SparseBitSet::new_empty(domain_size)) | |
672 | } | |
673 | ||
1b1a35ee | 674 | pub fn domain_size(&self) -> usize { |
0bf4aa26 XL |
675 | match self { |
676 | HybridBitSet::Sparse(sparse) => sparse.domain_size, | |
677 | HybridBitSet::Dense(dense) => dense.domain_size, | |
678 | } | |
679 | } | |
680 | ||
681 | pub fn clear(&mut self) { | |
682 | let domain_size = self.domain_size(); | |
683 | *self = HybridBitSet::new_empty(domain_size); | |
684 | } | |
685 | ||
686 | pub fn contains(&self, elem: T) -> bool { | |
687 | match self { | |
688 | HybridBitSet::Sparse(sparse) => sparse.contains(elem), | |
689 | HybridBitSet::Dense(dense) => dense.contains(elem), | |
690 | } | |
691 | } | |
692 | ||
693 | pub fn superset(&self, other: &HybridBitSet<T>) -> bool { | |
694 | match (self, other) { | |
695 | (HybridBitSet::Dense(self_dense), HybridBitSet::Dense(other_dense)) => { | |
696 | self_dense.superset(other_dense) | |
697 | } | |
698 | _ => { | |
699 | assert!(self.domain_size() == other.domain_size()); | |
700 | other.iter().all(|elem| self.contains(elem)) | |
701 | } | |
702 | } | |
703 | } | |
704 | ||
705 | pub fn is_empty(&self) -> bool { | |
706 | match self { | |
707 | HybridBitSet::Sparse(sparse) => sparse.is_empty(), | |
708 | HybridBitSet::Dense(dense) => dense.is_empty(), | |
709 | } | |
710 | } | |
711 | ||
712 | pub fn insert(&mut self, elem: T) -> bool { | |
713 | // No need to check `elem` against `self.domain_size` here because all | |
714 | // the match cases check it, one way or another. | |
715 | match self { | |
716 | HybridBitSet::Sparse(sparse) if sparse.len() < SPARSE_MAX => { | |
717 | // The set is sparse and has space for `elem`. | |
718 | sparse.insert(elem) | |
719 | } | |
720 | HybridBitSet::Sparse(sparse) if sparse.contains(elem) => { | |
721 | // The set is sparse and does not have space for `elem`, but | |
722 | // that doesn't matter because `elem` is already present. | |
723 | false | |
724 | } | |
725 | HybridBitSet::Sparse(sparse) => { | |
726 | // The set is sparse and full. Convert to a dense set. | |
727 | let mut dense = sparse.to_dense(); | |
728 | let changed = dense.insert(elem); | |
729 | assert!(changed); | |
730 | *self = HybridBitSet::Dense(dense); | |
731 | changed | |
732 | } | |
733 | HybridBitSet::Dense(dense) => dense.insert(elem), | |
734 | } | |
735 | } | |
736 | ||
737 | pub fn insert_all(&mut self) { | |
738 | let domain_size = self.domain_size(); | |
739 | match self { | |
740 | HybridBitSet::Sparse(_) => { | |
741 | *self = HybridBitSet::Dense(BitSet::new_filled(domain_size)); | |
742 | } | |
743 | HybridBitSet::Dense(dense) => dense.insert_all(), | |
744 | } | |
745 | } | |
746 | ||
747 | pub fn remove(&mut self, elem: T) -> bool { | |
748 | // Note: we currently don't bother going from Dense back to Sparse. | |
749 | match self { | |
750 | HybridBitSet::Sparse(sparse) => sparse.remove(elem), | |
751 | HybridBitSet::Dense(dense) => dense.remove(elem), | |
752 | } | |
753 | } | |
754 | ||
0bf4aa26 XL |
755 | /// Converts to a dense set, consuming itself in the process. |
756 | pub fn to_dense(self) -> BitSet<T> { | |
757 | match self { | |
758 | HybridBitSet::Sparse(sparse) => sparse.to_dense(), | |
759 | HybridBitSet::Dense(dense) => dense, | |
760 | } | |
761 | } | |
762 | ||
9fa01778 | 763 | pub fn iter(&self) -> HybridIter<'_, T> { |
0bf4aa26 XL |
764 | match self { |
765 | HybridBitSet::Sparse(sparse) => HybridIter::Sparse(sparse.iter()), | |
766 | HybridBitSet::Dense(dense) => HybridIter::Dense(dense.iter()), | |
767 | } | |
768 | } | |
0bf4aa26 | 769 | |
94222f64 | 770 | bit_relations_inherent_impls! {} |
0bf4aa26 XL |
771 | } |
772 | ||
773 | pub enum HybridIter<'a, T: Idx> { | |
774 | Sparse(slice::Iter<'a, T>), | |
775 | Dense(BitIter<'a, T>), | |
776 | } | |
777 | ||
778 | impl<'a, T: Idx> Iterator for HybridIter<'a, T> { | |
779 | type Item = T; | |
780 | ||
781 | fn next(&mut self) -> Option<T> { | |
782 | match self { | |
e74abb32 | 783 | HybridIter::Sparse(sparse) => sparse.next().copied(), |
0bf4aa26 XL |
784 | HybridIter::Dense(dense) => dense.next(), |
785 | } | |
786 | } | |
787 | } | |
788 | ||
789 | /// A resizable bitset type with a dense representation. | |
790 | /// | |
791 | /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also | |
792 | /// just be `usize`. | |
793 | /// | |
794 | /// All operations that involve an element will panic if the element is equal | |
795 | /// to or greater than the domain size. | |
796 | #[derive(Clone, Debug, PartialEq)] | |
797 | pub struct GrowableBitSet<T: Idx> { | |
798 | bit_set: BitSet<T>, | |
799 | } | |
800 | ||
801 | impl<T: Idx> GrowableBitSet<T> { | |
802 | /// Ensure that the set can hold at least `min_domain_size` elements. | |
803 | pub fn ensure(&mut self, min_domain_size: usize) { | |
804 | if self.bit_set.domain_size < min_domain_size { | |
805 | self.bit_set.domain_size = min_domain_size; | |
806 | } | |
807 | ||
808 | let min_num_words = num_words(min_domain_size); | |
809 | if self.bit_set.words.len() < min_num_words { | |
810 | self.bit_set.words.resize(min_num_words, 0) | |
811 | } | |
812 | } | |
813 | ||
814 | pub fn new_empty() -> GrowableBitSet<T> { | |
815 | GrowableBitSet { bit_set: BitSet::new_empty(0) } | |
816 | } | |
817 | ||
48663c56 XL |
818 | pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> { |
819 | GrowableBitSet { bit_set: BitSet::new_empty(capacity) } | |
0bf4aa26 XL |
820 | } |
821 | ||
9fa01778 | 822 | /// Returns `true` if the set has changed. |
0bf4aa26 XL |
823 | #[inline] |
824 | pub fn insert(&mut self, elem: T) -> bool { | |
825 | self.ensure(elem.index() + 1); | |
826 | self.bit_set.insert(elem) | |
827 | } | |
828 | ||
5869c6ff XL |
829 | /// Returns `true` if the set has changed. |
830 | #[inline] | |
831 | pub fn remove(&mut self, elem: T) -> bool { | |
832 | self.ensure(elem.index() + 1); | |
833 | self.bit_set.remove(elem) | |
834 | } | |
835 | ||
836 | #[inline] | |
837 | pub fn is_empty(&self) -> bool { | |
838 | self.bit_set.is_empty() | |
839 | } | |
840 | ||
0bf4aa26 XL |
841 | #[inline] |
842 | pub fn contains(&self, elem: T) -> bool { | |
843 | let (word_index, mask) = word_index_and_mask(elem); | |
c295e0f8 | 844 | self.bit_set.words.get(word_index).map_or(false, |word| (word & mask) != 0) |
0bf4aa26 XL |
845 | } |
846 | } | |
847 | ||
848 | /// A fixed-size 2D bit matrix type with a dense representation. | |
849 | /// | |
850 | /// `R` and `C` are index types used to identify rows and columns respectively; | |
851 | /// typically newtyped `usize` wrappers, but they can also just be `usize`. | |
852 | /// | |
853 | /// All operations that involve a row and/or column index will panic if the | |
854 | /// index exceeds the relevant bound. | |
3dfed10e | 855 | #[derive(Clone, Eq, PartialEq, Decodable, Encodable)] |
0bf4aa26 XL |
856 | pub struct BitMatrix<R: Idx, C: Idx> { |
857 | num_rows: usize, | |
858 | num_columns: usize, | |
859 | words: Vec<Word>, | |
860 | marker: PhantomData<(R, C)>, | |
861 | } | |
862 | ||
863 | impl<R: Idx, C: Idx> BitMatrix<R, C> { | |
9fa01778 | 864 | /// Creates a new `rows x columns` matrix, initially empty. |
0bf4aa26 XL |
865 | pub fn new(num_rows: usize, num_columns: usize) -> BitMatrix<R, C> { |
866 | // For every element, we need one bit for every other | |
867 | // element. Round up to an even number of words. | |
868 | let words_per_row = num_words(num_columns); | |
869 | BitMatrix { | |
870 | num_rows, | |
871 | num_columns, | |
872 | words: vec![0; num_rows * words_per_row], | |
873 | marker: PhantomData, | |
874 | } | |
875 | } | |
876 | ||
dc9dc135 XL |
877 | /// Creates a new matrix, with `row` used as the value for every row. |
878 | pub fn from_row_n(row: &BitSet<C>, num_rows: usize) -> BitMatrix<R, C> { | |
879 | let num_columns = row.domain_size(); | |
880 | let words_per_row = num_words(num_columns); | |
881 | assert_eq!(words_per_row, row.words().len()); | |
882 | BitMatrix { | |
883 | num_rows, | |
884 | num_columns, | |
885 | words: iter::repeat(row.words()).take(num_rows).flatten().cloned().collect(), | |
886 | marker: PhantomData, | |
887 | } | |
888 | } | |
889 | ||
890 | pub fn rows(&self) -> impl Iterator<Item = R> { | |
891 | (0..self.num_rows).map(R::new) | |
892 | } | |
893 | ||
0bf4aa26 XL |
894 | /// The range of bits for a given row. |
895 | fn range(&self, row: R) -> (usize, usize) { | |
896 | let words_per_row = num_words(self.num_columns); | |
897 | let start = row.index() * words_per_row; | |
898 | (start, start + words_per_row) | |
899 | } | |
900 | ||
901 | /// Sets the cell at `(row, column)` to true. Put another way, insert | |
902 | /// `column` to the bitset for `row`. | |
903 | /// | |
9fa01778 | 904 | /// Returns `true` if this changed the matrix. |
0bf4aa26 XL |
905 | pub fn insert(&mut self, row: R, column: C) -> bool { |
906 | assert!(row.index() < self.num_rows && column.index() < self.num_columns); | |
907 | let (start, _) = self.range(row); | |
908 | let (word_index, mask) = word_index_and_mask(column); | |
909 | let words = &mut self.words[..]; | |
910 | let word = words[start + word_index]; | |
911 | let new_word = word | mask; | |
912 | words[start + word_index] = new_word; | |
913 | word != new_word | |
914 | } | |
915 | ||
916 | /// Do the bits from `row` contain `column`? Put another way, is | |
917 | /// the matrix cell at `(row, column)` true? Put yet another way, | |
918 | /// if the matrix represents (transitive) reachability, can | |
919 | /// `row` reach `column`? | |
920 | pub fn contains(&self, row: R, column: C) -> bool { | |
921 | assert!(row.index() < self.num_rows && column.index() < self.num_columns); | |
922 | let (start, _) = self.range(row); | |
923 | let (word_index, mask) = word_index_and_mask(column); | |
924 | (self.words[start + word_index] & mask) != 0 | |
925 | } | |
926 | ||
9fa01778 | 927 | /// Returns those indices that are true in rows `a` and `b`. This |
3dfed10e | 928 | /// is an *O*(*n*) operation where *n* is the number of elements |
0bf4aa26 XL |
929 | /// (somewhat independent from the actual size of the |
930 | /// intersection, in particular). | |
931 | pub fn intersect_rows(&self, row1: R, row2: R) -> Vec<C> { | |
932 | assert!(row1.index() < self.num_rows && row2.index() < self.num_rows); | |
933 | let (row1_start, row1_end) = self.range(row1); | |
934 | let (row2_start, row2_end) = self.range(row2); | |
935 | let mut result = Vec::with_capacity(self.num_columns); | |
936 | for (base, (i, j)) in (row1_start..row1_end).zip(row2_start..row2_end).enumerate() { | |
937 | let mut v = self.words[i] & self.words[j]; | |
938 | for bit in 0..WORD_BITS { | |
939 | if v == 0 { | |
940 | break; | |
941 | } | |
942 | if v & 0x1 != 0 { | |
943 | result.push(C::new(base * WORD_BITS + bit)); | |
944 | } | |
945 | v >>= 1; | |
946 | } | |
947 | } | |
948 | result | |
949 | } | |
950 | ||
9fa01778 XL |
951 | /// Adds the bits from row `read` to the bits from row `write`, and |
952 | /// returns `true` if anything changed. | |
0bf4aa26 XL |
953 | /// |
954 | /// This is used when computing transitive reachability because if | |
955 | /// you have an edge `write -> read`, because in that case | |
956 | /// `write` can reach everything that `read` can (and | |
957 | /// potentially more). | |
958 | pub fn union_rows(&mut self, read: R, write: R) -> bool { | |
959 | assert!(read.index() < self.num_rows && write.index() < self.num_rows); | |
960 | let (read_start, read_end) = self.range(read); | |
961 | let (write_start, write_end) = self.range(write); | |
962 | let words = &mut self.words[..]; | |
963 | let mut changed = false; | |
cdc7bbd5 | 964 | for (read_index, write_index) in iter::zip(read_start..read_end, write_start..write_end) { |
0bf4aa26 XL |
965 | let word = words[write_index]; |
966 | let new_word = word | words[read_index]; | |
967 | words[write_index] = new_word; | |
968 | changed |= word != new_word; | |
969 | } | |
970 | changed | |
971 | } | |
972 | ||
dc9dc135 XL |
973 | /// Adds the bits from `with` to the bits from row `write`, and |
974 | /// returns `true` if anything changed. | |
975 | pub fn union_row_with(&mut self, with: &BitSet<C>, write: R) -> bool { | |
976 | assert!(write.index() < self.num_rows); | |
977 | assert_eq!(with.domain_size(), self.num_columns); | |
978 | let (write_start, write_end) = self.range(write); | |
979 | let mut changed = false; | |
cdc7bbd5 | 980 | for (read_index, write_index) in iter::zip(0..with.words().len(), write_start..write_end) { |
dc9dc135 XL |
981 | let word = self.words[write_index]; |
982 | let new_word = word | with.words()[read_index]; | |
983 | self.words[write_index] = new_word; | |
984 | changed |= word != new_word; | |
985 | } | |
986 | changed | |
987 | } | |
988 | ||
989 | /// Sets every cell in `row` to true. | |
990 | pub fn insert_all_into_row(&mut self, row: R) { | |
991 | assert!(row.index() < self.num_rows); | |
992 | let (start, end) = self.range(row); | |
c295e0f8 XL |
993 | for word in self.words[start..end].iter_mut() { |
994 | *word = !0; | |
dc9dc135 XL |
995 | } |
996 | self.clear_excess_bits(row); | |
997 | } | |
998 | ||
999 | /// Clear excess bits in the final word of the row. | |
1000 | fn clear_excess_bits(&mut self, row: R) { | |
1001 | let num_bits_in_final_word = self.num_columns % WORD_BITS; | |
1002 | if num_bits_in_final_word > 0 { | |
1003 | let mask = (1 << num_bits_in_final_word) - 1; | |
1004 | let (_, end) = self.range(row); | |
1005 | let final_word_idx = end - 1; | |
1006 | self.words[final_word_idx] &= mask; | |
1007 | } | |
1008 | } | |
1009 | ||
1010 | /// Gets a slice of the underlying words. | |
1011 | pub fn words(&self) -> &[Word] { | |
1012 | &self.words | |
1013 | } | |
1014 | ||
0bf4aa26 XL |
1015 | /// Iterates through all the columns set to true in a given row of |
1016 | /// the matrix. | |
416331ca | 1017 | pub fn iter(&self, row: R) -> BitIter<'_, C> { |
0bf4aa26 XL |
1018 | assert!(row.index() < self.num_rows); |
1019 | let (start, end) = self.range(row); | |
e74abb32 | 1020 | BitIter::new(&self.words[start..end]) |
0bf4aa26 | 1021 | } |
dc9dc135 XL |
1022 | |
1023 | /// Returns the number of elements in `row`. | |
1024 | pub fn count(&self, row: R) -> usize { | |
1025 | let (start, end) = self.range(row); | |
1026 | self.words[start..end].iter().map(|e| e.count_ones() as usize).sum() | |
1027 | } | |
0bf4aa26 XL |
1028 | } |
1029 | ||
f035d41b XL |
1030 | impl<R: Idx, C: Idx> fmt::Debug for BitMatrix<R, C> { |
1031 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1032 | /// Forces its contents to print in regular mode instead of alternate mode. | |
1033 | struct OneLinePrinter<T>(T); | |
1034 | impl<T: fmt::Debug> fmt::Debug for OneLinePrinter<T> { | |
1035 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1036 | write!(fmt, "{:?}", self.0) | |
1037 | } | |
1038 | } | |
1039 | ||
1040 | write!(fmt, "BitMatrix({}x{}) ", self.num_rows, self.num_columns)?; | |
1041 | let items = self.rows().flat_map(|r| self.iter(r).map(move |c| (r, c))); | |
1042 | fmt.debug_set().entries(items.map(OneLinePrinter)).finish() | |
1043 | } | |
1044 | } | |
1045 | ||
0bf4aa26 XL |
1046 | /// A fixed-column-size, variable-row-size 2D bit matrix with a moderately |
1047 | /// sparse representation. | |
1048 | /// | |
1049 | /// Initially, every row has no explicit representation. If any bit within a | |
1050 | /// row is set, the entire row is instantiated as `Some(<HybridBitSet>)`. | |
1051 | /// Furthermore, any previously uninstantiated rows prior to it will be | |
1052 | /// instantiated as `None`. Those prior rows may themselves become fully | |
1053 | /// instantiated later on if any of their bits are set. | |
1054 | /// | |
1055 | /// `R` and `C` are index types used to identify rows and columns respectively; | |
1056 | /// typically newtyped `usize` wrappers, but they can also just be `usize`. | |
1057 | #[derive(Clone, Debug)] | |
1058 | pub struct SparseBitMatrix<R, C> | |
1059 | where | |
1060 | R: Idx, | |
1061 | C: Idx, | |
1062 | { | |
1063 | num_columns: usize, | |
1064 | rows: IndexVec<R, Option<HybridBitSet<C>>>, | |
1065 | } | |
1066 | ||
1067 | impl<R: Idx, C: Idx> SparseBitMatrix<R, C> { | |
9fa01778 | 1068 | /// Creates a new empty sparse bit matrix with no rows or columns. |
0bf4aa26 | 1069 | pub fn new(num_columns: usize) -> Self { |
dfeec247 | 1070 | Self { num_columns, rows: IndexVec::new() } |
0bf4aa26 XL |
1071 | } |
1072 | ||
1073 | fn ensure_row(&mut self, row: R) -> &mut HybridBitSet<C> { | |
c295e0f8 | 1074 | // Instantiate any missing rows up to and including row `row` with an empty HybridBitSet. |
0bf4aa26 | 1075 | // Then replace row `row` with a full HybridBitSet if necessary. |
c295e0f8 | 1076 | self.rows.get_or_insert_with(row, || HybridBitSet::new_empty(self.num_columns)) |
0bf4aa26 XL |
1077 | } |
1078 | ||
1079 | /// Sets the cell at `(row, column)` to true. Put another way, insert | |
1080 | /// `column` to the bitset for `row`. | |
1081 | /// | |
9fa01778 | 1082 | /// Returns `true` if this changed the matrix. |
0bf4aa26 XL |
1083 | pub fn insert(&mut self, row: R, column: C) -> bool { |
1084 | self.ensure_row(row).insert(column) | |
1085 | } | |
1086 | ||
94222f64 XL |
1087 | /// Sets the cell at `(row, column)` to false. Put another way, delete |
1088 | /// `column` from the bitset for `row`. Has no effect if `row` does not | |
1089 | /// exist. | |
1090 | /// | |
1091 | /// Returns `true` if this changed the matrix. | |
1092 | pub fn remove(&mut self, row: R, column: C) -> bool { | |
1093 | match self.rows.get_mut(row) { | |
1094 | Some(Some(row)) => row.remove(column), | |
1095 | _ => false, | |
1096 | } | |
1097 | } | |
1098 | ||
1099 | /// Sets all columns at `row` to false. Has no effect if `row` does | |
1100 | /// not exist. | |
1101 | pub fn clear(&mut self, row: R) { | |
1102 | if let Some(Some(row)) = self.rows.get_mut(row) { | |
1103 | row.clear(); | |
1104 | } | |
1105 | } | |
1106 | ||
0bf4aa26 XL |
1107 | /// Do the bits from `row` contain `column`? Put another way, is |
1108 | /// the matrix cell at `(row, column)` true? Put yet another way, | |
1109 | /// if the matrix represents (transitive) reachability, can | |
1110 | /// `row` reach `column`? | |
1111 | pub fn contains(&self, row: R, column: C) -> bool { | |
1112 | self.row(row).map_or(false, |r| r.contains(column)) | |
1113 | } | |
1114 | ||
9fa01778 XL |
1115 | /// Adds the bits from row `read` to the bits from row `write`, and |
1116 | /// returns `true` if anything changed. | |
0bf4aa26 XL |
1117 | /// |
1118 | /// This is used when computing transitive reachability because if | |
1119 | /// you have an edge `write -> read`, because in that case | |
1120 | /// `write` can reach everything that `read` can (and | |
1121 | /// potentially more). | |
1122 | pub fn union_rows(&mut self, read: R, write: R) -> bool { | |
1123 | if read == write || self.row(read).is_none() { | |
1124 | return false; | |
1125 | } | |
1126 | ||
1127 | self.ensure_row(write); | |
1128 | if let (Some(read_row), Some(write_row)) = self.rows.pick2_mut(read, write) { | |
1129 | write_row.union(read_row) | |
1130 | } else { | |
1131 | unreachable!() | |
1132 | } | |
1133 | } | |
1134 | ||
0bf4aa26 XL |
1135 | /// Insert all bits in the given row. |
1136 | pub fn insert_all_into_row(&mut self, row: R) { | |
1137 | self.ensure_row(row).insert_all(); | |
1138 | } | |
1139 | ||
1140 | pub fn rows(&self) -> impl Iterator<Item = R> { | |
1141 | self.rows.indices() | |
1142 | } | |
1143 | ||
1144 | /// Iterates through all the columns set to true in a given row of | |
1145 | /// the matrix. | |
c295e0f8 | 1146 | pub fn iter(&self, row: R) -> impl Iterator<Item = C> + '_ { |
0bf4aa26 XL |
1147 | self.row(row).into_iter().flat_map(|r| r.iter()) |
1148 | } | |
1149 | ||
1150 | pub fn row(&self, row: R) -> Option<&HybridBitSet<C>> { | |
dfeec247 | 1151 | if let Some(Some(row)) = self.rows.get(row) { Some(row) } else { None } |
0bf4aa26 | 1152 | } |
94222f64 XL |
1153 | |
1154 | /// Interescts `row` with `set`. `set` can be either `BitSet` or | |
1155 | /// `HybridBitSet`. Has no effect if `row` does not exist. | |
1156 | /// | |
1157 | /// Returns true if the row was changed. | |
1158 | pub fn intersect_row<Set>(&mut self, row: R, set: &Set) -> bool | |
1159 | where | |
1160 | HybridBitSet<C>: BitRelations<Set>, | |
1161 | { | |
1162 | match self.rows.get_mut(row) { | |
1163 | Some(Some(row)) => row.intersect(set), | |
1164 | _ => false, | |
1165 | } | |
1166 | } | |
1167 | ||
1168 | /// Subtracts `set from `row`. `set` can be either `BitSet` or | |
1169 | /// `HybridBitSet`. Has no effect if `row` does not exist. | |
1170 | /// | |
1171 | /// Returns true if the row was changed. | |
1172 | pub fn subtract_row<Set>(&mut self, row: R, set: &Set) -> bool | |
1173 | where | |
1174 | HybridBitSet<C>: BitRelations<Set>, | |
1175 | { | |
1176 | match self.rows.get_mut(row) { | |
1177 | Some(Some(row)) => row.subtract(set), | |
1178 | _ => false, | |
1179 | } | |
1180 | } | |
1181 | ||
1182 | /// Unions `row` with `set`. `set` can be either `BitSet` or | |
1183 | /// `HybridBitSet`. | |
1184 | /// | |
1185 | /// Returns true if the row was changed. | |
1186 | pub fn union_row<Set>(&mut self, row: R, set: &Set) -> bool | |
1187 | where | |
1188 | HybridBitSet<C>: BitRelations<Set>, | |
1189 | { | |
1190 | self.ensure_row(row).union(set) | |
1191 | } | |
0bf4aa26 XL |
1192 | } |
1193 | ||
1194 | #[inline] | |
1195 | fn num_words<T: Idx>(domain_size: T) -> usize { | |
1196 | (domain_size.index() + WORD_BITS - 1) / WORD_BITS | |
1197 | } | |
1198 | ||
1199 | #[inline] | |
1200 | fn word_index_and_mask<T: Idx>(elem: T) -> (usize, Word) { | |
1201 | let elem = elem.index(); | |
1202 | let word_index = elem / WORD_BITS; | |
1203 | let mask = 1 << (elem % WORD_BITS); | |
1204 | (word_index, mask) | |
1205 | } | |
3dfed10e XL |
1206 | |
1207 | /// Integral type used to represent the bit set. | |
1208 | pub trait FiniteBitSetTy: | |
1209 | BitAnd<Output = Self> | |
1210 | + BitAndAssign | |
1211 | + BitOrAssign | |
1212 | + Clone | |
1213 | + Copy | |
1214 | + Shl | |
1215 | + Not<Output = Self> | |
1216 | + PartialEq | |
1217 | + Sized | |
1218 | { | |
1219 | /// Size of the domain representable by this type, e.g. 64 for `u64`. | |
1220 | const DOMAIN_SIZE: u32; | |
1221 | ||
1222 | /// Value which represents the `FiniteBitSet` having every bit set. | |
1223 | const FILLED: Self; | |
1224 | /// Value which represents the `FiniteBitSet` having no bits set. | |
1225 | const EMPTY: Self; | |
1226 | ||
1227 | /// Value for one as the integral type. | |
1228 | const ONE: Self; | |
1229 | /// Value for zero as the integral type. | |
1230 | const ZERO: Self; | |
1231 | ||
1232 | /// Perform a checked left shift on the integral type. | |
1233 | fn checked_shl(self, rhs: u32) -> Option<Self>; | |
1234 | /// Perform a checked right shift on the integral type. | |
1235 | fn checked_shr(self, rhs: u32) -> Option<Self>; | |
1236 | } | |
1237 | ||
1238 | impl FiniteBitSetTy for u32 { | |
1239 | const DOMAIN_SIZE: u32 = 32; | |
1240 | ||
1241 | const FILLED: Self = Self::MAX; | |
1242 | const EMPTY: Self = Self::MIN; | |
1243 | ||
1244 | const ONE: Self = 1u32; | |
1245 | const ZERO: Self = 0u32; | |
1246 | ||
1247 | fn checked_shl(self, rhs: u32) -> Option<Self> { | |
1248 | self.checked_shl(rhs) | |
1249 | } | |
1250 | ||
1251 | fn checked_shr(self, rhs: u32) -> Option<Self> { | |
1252 | self.checked_shr(rhs) | |
1253 | } | |
1254 | } | |
1255 | ||
1256 | impl std::fmt::Debug for FiniteBitSet<u32> { | |
1257 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1258 | write!(f, "{:032b}", self.0) | |
1259 | } | |
1260 | } | |
1261 | ||
1262 | impl FiniteBitSetTy for u64 { | |
1263 | const DOMAIN_SIZE: u32 = 64; | |
1264 | ||
1265 | const FILLED: Self = Self::MAX; | |
1266 | const EMPTY: Self = Self::MIN; | |
1267 | ||
1268 | const ONE: Self = 1u64; | |
1269 | const ZERO: Self = 0u64; | |
1270 | ||
1271 | fn checked_shl(self, rhs: u32) -> Option<Self> { | |
1272 | self.checked_shl(rhs) | |
1273 | } | |
1274 | ||
1275 | fn checked_shr(self, rhs: u32) -> Option<Self> { | |
1276 | self.checked_shr(rhs) | |
1277 | } | |
1278 | } | |
1279 | ||
1280 | impl std::fmt::Debug for FiniteBitSet<u64> { | |
1281 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1282 | write!(f, "{:064b}", self.0) | |
1283 | } | |
1284 | } | |
1285 | ||
1286 | impl FiniteBitSetTy for u128 { | |
1287 | const DOMAIN_SIZE: u32 = 128; | |
1288 | ||
1289 | const FILLED: Self = Self::MAX; | |
1290 | const EMPTY: Self = Self::MIN; | |
1291 | ||
1292 | const ONE: Self = 1u128; | |
1293 | const ZERO: Self = 0u128; | |
1294 | ||
1295 | fn checked_shl(self, rhs: u32) -> Option<Self> { | |
1296 | self.checked_shl(rhs) | |
1297 | } | |
1298 | ||
1299 | fn checked_shr(self, rhs: u32) -> Option<Self> { | |
1300 | self.checked_shr(rhs) | |
1301 | } | |
1302 | } | |
1303 | ||
1304 | impl std::fmt::Debug for FiniteBitSet<u128> { | |
1305 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
1306 | write!(f, "{:0128b}", self.0) | |
1307 | } | |
1308 | } | |
1309 | ||
1310 | /// A fixed-sized bitset type represented by an integer type. Indices outwith than the range | |
1311 | /// representable by `T` are considered set. | |
1312 | #[derive(Copy, Clone, Eq, PartialEq, Decodable, Encodable)] | |
1313 | pub struct FiniteBitSet<T: FiniteBitSetTy>(pub T); | |
1314 | ||
1315 | impl<T: FiniteBitSetTy> FiniteBitSet<T> { | |
1316 | /// Creates a new, empty bitset. | |
1317 | pub fn new_empty() -> Self { | |
1318 | Self(T::EMPTY) | |
1319 | } | |
1320 | ||
1321 | /// Sets the `index`th bit. | |
1322 | pub fn set(&mut self, index: u32) { | |
1323 | self.0 |= T::ONE.checked_shl(index).unwrap_or(T::ZERO); | |
1324 | } | |
1325 | ||
1326 | /// Unsets the `index`th bit. | |
1327 | pub fn clear(&mut self, index: u32) { | |
1328 | self.0 &= !T::ONE.checked_shl(index).unwrap_or(T::ZERO); | |
1329 | } | |
1330 | ||
1331 | /// Sets the `i`th to `j`th bits. | |
1332 | pub fn set_range(&mut self, range: Range<u32>) { | |
1333 | let bits = T::FILLED | |
1334 | .checked_shl(range.end - range.start) | |
1335 | .unwrap_or(T::ZERO) | |
1336 | .not() | |
1337 | .checked_shl(range.start) | |
1338 | .unwrap_or(T::ZERO); | |
1339 | self.0 |= bits; | |
1340 | } | |
1341 | ||
1342 | /// Is the set empty? | |
1343 | pub fn is_empty(&self) -> bool { | |
1344 | self.0 == T::EMPTY | |
1345 | } | |
1346 | ||
1347 | /// Returns the domain size of the bitset. | |
1348 | pub fn within_domain(&self, index: u32) -> bool { | |
1349 | index < T::DOMAIN_SIZE | |
1350 | } | |
1351 | ||
1352 | /// Returns if the `index`th bit is set. | |
1353 | pub fn contains(&self, index: u32) -> Option<bool> { | |
1354 | self.within_domain(index) | |
1355 | .then(|| ((self.0.checked_shr(index).unwrap_or(T::ONE)) & T::ONE) == T::ONE) | |
1356 | } | |
1357 | } | |
1358 | ||
1359 | impl<T: FiniteBitSetTy> Default for FiniteBitSet<T> { | |
1360 | fn default() -> Self { | |
1361 | Self::new_empty() | |
1362 | } | |
1363 | } |