src/librustc_data_structures/bitvec.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 use indexed_vec::{Idx, IndexVec};
  12 use std::collections::btree_map::Entry;
  13 use std::collections::BTreeMap;
  14 use std::iter::FromIterator;
  15 use std::marker::PhantomData;
  16
  17 type Word = u128;
  18 const WORD_BITS: usize = 128;
  19
  20 /// A very simple BitVector type.
  21 #[derive(Clone, Debug, PartialEq)]
  22 pub struct BitVector {
  23     data: Vec<Word>,
  24 }
  25
  26 impl BitVector {
  27     #[inline]
  28     pub fn new(num_bits: usize) -> BitVector {
  29         let num_words = words(num_bits);
  30         BitVector {
  31             data: vec![0; num_words],
  32         }
  33     }
  34
  35     #[inline]
  36     pub fn clear(&mut self) {
  37         for p in &mut self.data {
  38             *p = 0;
  39         }
  40     }
  41
  42     pub fn count(&self) -> usize {
  43         self.data.iter().map(|e| e.count_ones() as usize).sum()
  44     }
  45
  46     #[inline]
  47     pub fn contains(&self, bit: usize) -> bool {
  48         let (word, mask) = word_mask(bit);
  49         (self.data[word] & mask) != 0
  50     }
  51
  52     /// Returns true if the bit has changed.
  53     #[inline]
  54     pub fn insert(&mut self, bit: usize) -> bool {
  55         let (word, mask) = word_mask(bit);
  56         let data = &mut self.data[word];
  57         let value = *data;
  58         let new_value = value | mask;
  59         *data = new_value;
  60         new_value != value
  61     }
  62
  63     /// Returns true if the bit has changed.
  64     #[inline]
  65     pub fn remove(&mut self, bit: usize) -> bool {
  66         let (word, mask) = word_mask(bit);
  67         let data = &mut self.data[word];
  68         let value = *data;
  69         let new_value = value & !mask;
  70         *data = new_value;
  71         new_value != value
  72     }
  73
  74     #[inline]
  75     pub fn insert_all(&mut self, all: &BitVector) -> bool {
  76         assert!(self.data.len() == all.data.len());
  77         let mut changed = false;
  78         for (i, j) in self.data.iter_mut().zip(&all.data) {
  79             let value = *i;
  80             *i = value | *j;
  81             if value != *i {
  82                 changed = true;
  83             }
  84         }
  85         changed
  86     }
  87
  88     #[inline]
  89     pub fn grow(&mut self, num_bits: usize) {
  90         let num_words = words(num_bits);
  91         if self.data.len() < num_words {
  92             self.data.resize(num_words, 0)
  93         }
  94     }
  95
  96     /// Iterates over indexes of set bits in a sorted order
  97     #[inline]
  98     pub fn iter<'a>(&'a self) -> BitVectorIter<'a> {
  99         BitVectorIter {
 100             iter: self.data.iter(),
 101             current: 0,
 102             idx: 0,
 103         }
 104     }
 105 }
 106
 107 pub struct BitVectorIter<'a> {
 108     iter: ::std::slice::Iter<'a, Word>,
 109     current: Word,
 110     idx: usize,
 111 }
 112
 113 impl<'a> Iterator for BitVectorIter<'a> {
 114     type Item = usize;
 115     fn next(&mut self) -> Option<usize> {
 116         while self.current == 0 {
 117             self.current = if let Some(&i) = self.iter.next() {
 118                 if i == 0 {
 119                     self.idx += WORD_BITS;
 120                     continue;
 121                 } else {
 122                     self.idx = words(self.idx) * WORD_BITS;
 123                     i
 124                 }
 125             } else {
 126                 return None;
 127             }
 128         }
 129         let offset = self.current.trailing_zeros() as usize;
 130         self.current >>= offset;
 131         self.current >>= 1; // shift otherwise overflows for 0b1000_0000_…_0000
 132         self.idx += offset + 1;
 133         return Some(self.idx - 1);
 134     }
 135
 136     fn size_hint(&self) -> (usize, Option<usize>) {
 137         let (_, upper) = self.iter.size_hint();
 138         (0, upper)
 139     }
 140 }
 141
 142 impl FromIterator<bool> for BitVector {
 143     fn from_iter<I>(iter: I) -> BitVector
 144     where
 145         I: IntoIterator<Item = bool>,
 146     {
 147         let iter = iter.into_iter();
 148         let (len, _) = iter.size_hint();
 149         // Make the minimum length for the bitvector WORD_BITS bits since that's
 150         // the smallest non-zero size anyway.
 151         let len = if len < WORD_BITS { WORD_BITS } else { len };
 152         let mut bv = BitVector::new(len);
 153         for (idx, val) in iter.enumerate() {
 154             if idx > len {
 155                 bv.grow(idx);
 156             }
 157             if val {
 158                 bv.insert(idx);
 159             }
 160         }
 161
 162         bv
 163     }
 164 }
 165
 166 /// A "bit matrix" is basically a matrix of booleans represented as
 167 /// one gigantic bitvector. In other words, it is as if you have
 168 /// `rows` bitvectors, each of length `columns`.
 169 #[derive(Clone, Debug)]
 170 pub struct BitMatrix {
 171     columns: usize,
 172     vector: Vec<Word>,
 173 }
 174
 175 impl BitMatrix {
 176     /// Create a new `rows x columns` matrix, initially empty.
 177     pub fn new(rows: usize, columns: usize) -> BitMatrix {
 178         // For every element, we need one bit for every other
 179         // element. Round up to an even number of words.
 180         let words_per_row = words(columns);
 181         BitMatrix {
 182             columns,
 183             vector: vec![0; rows * words_per_row],
 184         }
 185     }
 186
 187     /// The range of bits for a given row.
 188     fn range(&self, row: usize) -> (usize, usize) {
 189         let words_per_row = words(self.columns);
 190         let start = row * words_per_row;
 191         (start, start + words_per_row)
 192     }
 193
 194     /// Sets the cell at `(row, column)` to true. Put another way, add
 195     /// `column` to the bitset for `row`.
 196     ///
 197     /// Returns true if this changed the matrix, and false otherwise.
 198     pub fn add(&mut self, row: usize, column: usize) -> bool {
 199         let (start, _) = self.range(row);
 200         let (word, mask) = word_mask(column);
 201         let vector = &mut self.vector[..];
 202         let v1 = vector[start + word];
 203         let v2 = v1 | mask;
 204         vector[start + word] = v2;
 205         v1 != v2
 206     }
 207
 208     /// Do the bits from `row` contain `column`? Put another way, is
 209     /// the matrix cell at `(row, column)` true?  Put yet another way,
 210     /// if the matrix represents (transitive) reachability, can
 211     /// `row` reach `column`?
 212     pub fn contains(&self, row: usize, column: usize) -> bool {
 213         let (start, _) = self.range(row);
 214         let (word, mask) = word_mask(column);
 215         (self.vector[start + word] & mask) != 0
 216     }
 217
 218     /// Returns those indices that are true in rows `a` and `b`.  This
 219     /// is an O(n) operation where `n` is the number of elements
 220     /// (somewhat independent from the actual size of the
 221     /// intersection, in particular).
 222     pub fn intersection(&self, a: usize, b: usize) -> Vec<usize> {
 223         let (a_start, a_end) = self.range(a);
 224         let (b_start, b_end) = self.range(b);
 225         let mut result = Vec::with_capacity(self.columns);
 226         for (base, (i, j)) in (a_start..a_end).zip(b_start..b_end).enumerate() {
 227             let mut v = self.vector[i] & self.vector[j];
 228             for bit in 0..WORD_BITS {
 229                 if v == 0 {
 230                     break;
 231                 }
 232                 if v & 0x1 != 0 {
 233                     result.push(base * WORD_BITS + bit);
 234                 }
 235                 v >>= 1;
 236             }
 237         }
 238         result
 239     }
 240
 241     /// Add the bits from row `read` to the bits from row `write`,
 242     /// return true if anything changed.
 243     ///
 244     /// This is used when computing transitive reachability because if
 245     /// you have an edge `write -> read`, because in that case
 246     /// `write` can reach everything that `read` can (and
 247     /// potentially more).
 248     pub fn merge(&mut self, read: usize, write: usize) -> bool {
 249         let (read_start, read_end) = self.range(read);
 250         let (write_start, write_end) = self.range(write);
 251         let vector = &mut self.vector[..];
 252         let mut changed = false;
 253         for (read_index, write_index) in (read_start..read_end).zip(write_start..write_end) {
 254             let v1 = vector[write_index];
 255             let v2 = v1 | vector[read_index];
 256             vector[write_index] = v2;
 257             changed = changed | (v1 != v2);
 258         }
 259         changed
 260     }
 261
 262     /// Iterates through all the columns set to true in a given row of
 263     /// the matrix.
 264     pub fn iter<'a>(&'a self, row: usize) -> BitVectorIter<'a> {
 265         let (start, end) = self.range(row);
 266         BitVectorIter {
 267             iter: self.vector[start..end].iter(),
 268             current: 0,
 269             idx: 0,
 270         }
 271     }
 272 }
 273
 274 #[derive(Clone, Debug)]
 275 pub struct SparseBitMatrix<R, C>
 276 where
 277     R: Idx,
 278     C: Idx,
 279 {
 280     vector: IndexVec<R, SparseBitSet<C>>,
 281 }
 282
 283 impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
 284     /// Create a new `rows x columns` matrix, initially empty.
 285     pub fn new(rows: R, _columns: C) -> SparseBitMatrix<R, C> {
 286         SparseBitMatrix {
 287             vector: IndexVec::from_elem_n(SparseBitSet::new(), rows.index()),
 288         }
 289     }
 290
 291     /// Sets the cell at `(row, column)` to true. Put another way, insert
 292     /// `column` to the bitset for `row`.
 293     ///
 294     /// Returns true if this changed the matrix, and false otherwise.
 295     pub fn add(&mut self, row: R, column: C) -> bool {
 296         self.vector[row].insert(column)
 297     }
 298
 299     /// Do the bits from `row` contain `column`? Put another way, is
 300     /// the matrix cell at `(row, column)` true?  Put yet another way,
 301     /// if the matrix represents (transitive) reachability, can
 302     /// `row` reach `column`?
 303     pub fn contains(&self, row: R, column: C) -> bool {
 304         self.vector[row].contains(column)
 305     }
 306
 307     /// Add the bits from row `read` to the bits from row `write`,
 308     /// return true if anything changed.
 309     ///
 310     /// This is used when computing transitive reachability because if
 311     /// you have an edge `write -> read`, because in that case
 312     /// `write` can reach everything that `read` can (and
 313     /// potentially more).
 314     pub fn merge(&mut self, read: R, write: R) -> bool {
 315         let mut changed = false;
 316
 317         if read != write {
 318             let (bit_set_read, bit_set_write) = self.vector.pick2_mut(read, write);
 319
 320             for read_chunk in bit_set_read.chunks() {
 321                 changed = changed | bit_set_write.insert_chunk(read_chunk).any();
 322             }
 323         }
 324
 325         changed
 326     }
 327
 328     /// True if `sub` is a subset of `sup`
 329     pub fn is_subset(&self, sub: R, sup: R) -> bool {
 330         sub == sup || {
 331             let bit_set_sub = &self.vector[sub];
 332             let bit_set_sup = &self.vector[sup];
 333             bit_set_sub
 334                 .chunks()
 335                 .all(|read_chunk| read_chunk.bits_eq(bit_set_sup.contains_chunk(read_chunk)))
 336         }
 337     }
 338
 339     /// Iterates through all the columns set to true in a given row of
 340     /// the matrix.
 341     pub fn iter<'a>(&'a self, row: R) -> impl Iterator<Item = C> + 'a {
 342         self.vector[row].iter()
 343     }
 344 }
 345
 346 #[derive(Clone, Debug)]
 347 pub struct SparseBitSet<I: Idx> {
 348     chunk_bits: BTreeMap<u32, Word>,
 349     _marker: PhantomData<I>,
 350 }
 351
 352 #[derive(Copy, Clone)]
 353 pub struct SparseChunk<I> {
 354     key: u32,
 355     bits: Word,
 356     _marker: PhantomData<I>,
 357 }
 358
 359 impl<I: Idx> SparseChunk<I> {
 360     #[inline]
 361     pub fn one(index: I) -> Self {
 362         let index = index.index();
 363         let key_usize = index / 128;
 364         let key = key_usize as u32;
 365         assert_eq!(key as usize, key_usize);
 366         SparseChunk {
 367             key,
 368             bits: 1 << (index % 128),
 369             _marker: PhantomData,
 370         }
 371     }
 372
 373     #[inline]
 374     pub fn any(&self) -> bool {
 375         self.bits != 0
 376     }
 377
 378     #[inline]
 379     pub fn bits_eq(&self, other: SparseChunk<I>) -> bool {
 380         self.bits == other.bits
 381     }
 382
 383     pub fn iter(&self) -> impl Iterator<Item = I> {
 384         let base = self.key as usize * 128;
 385         let mut bits = self.bits;
 386         (0..128)
 387             .map(move |i| {
 388                 let current_bits = bits;
 389                 bits >>= 1;
 390                 (i, current_bits)
 391             })
 392             .take_while(|&(_, bits)| bits != 0)
 393             .filter_map(move |(i, bits)| {
 394                 if (bits & 1) != 0 {
 395                     Some(I::new(base + i))
 396                 } else {
 397                     None
 398                 }
 399             })
 400     }
 401 }
 402
 403 impl<I: Idx> SparseBitSet<I> {
 404     pub fn new() -> Self {
 405         SparseBitSet {
 406             chunk_bits: BTreeMap::new(),
 407             _marker: PhantomData,
 408         }
 409     }
 410
 411     pub fn capacity(&self) -> usize {
 412         self.chunk_bits.len() * 128
 413     }
 414
 415     /// Returns a chunk containing only those bits that are already
 416     /// present. You can test therefore if `self` contains all the
 417     /// bits in chunk already by doing `chunk ==
 418     /// self.contains_chunk(chunk)`.
 419     pub fn contains_chunk(&self, chunk: SparseChunk<I>) -> SparseChunk<I> {
 420         SparseChunk {
 421             bits: self.chunk_bits
 422                 .get(&chunk.key)
 423                 .map_or(0, |bits| bits & chunk.bits),
 424             ..chunk
 425         }
 426     }
 427
 428     /// Modifies `self` to contain all the bits from `chunk` (in
 429     /// addition to any pre-existing bits); returns a new chunk that
 430     /// contains only those bits that were newly added. You can test
 431     /// if anything was inserted by invoking `any()` on the returned
 432     /// value.
 433     pub fn insert_chunk(&mut self, chunk: SparseChunk<I>) -> SparseChunk<I> {
 434         if chunk.bits == 0 {
 435             return chunk;
 436         }
 437         let bits = self.chunk_bits.entry(chunk.key).or_insert(0);
 438         let old_bits = *bits;
 439         let new_bits = old_bits | chunk.bits;
 440         *bits = new_bits;
 441         let changed = new_bits ^ old_bits;
 442         SparseChunk {
 443             bits: changed,
 444             ..chunk
 445         }
 446     }
 447
 448     pub fn remove_chunk(&mut self, chunk: SparseChunk<I>) -> SparseChunk<I> {
 449         if chunk.bits == 0 {
 450             return chunk;
 451         }
 452         let changed = match self.chunk_bits.entry(chunk.key) {
 453             Entry::Occupied(mut bits) => {
 454                 let old_bits = *bits.get();
 455                 let new_bits = old_bits & !chunk.bits;
 456                 if new_bits == 0 {
 457                     bits.remove();
 458                 } else {
 459                     bits.insert(new_bits);
 460                 }
 461                 new_bits ^ old_bits
 462             }
 463             Entry::Vacant(_) => 0,
 464         };
 465         SparseChunk {
 466             bits: changed,
 467             ..chunk
 468         }
 469     }
 470
 471     pub fn clear(&mut self) {
 472         self.chunk_bits.clear();
 473     }
 474
 475     pub fn chunks<'a>(&'a self) -> impl Iterator<Item = SparseChunk<I>> + 'a {
 476         self.chunk_bits.iter().map(|(&key, &bits)| SparseChunk {
 477             key,
 478             bits,
 479             _marker: PhantomData,
 480         })
 481     }
 482
 483     pub fn contains(&self, index: I) -> bool {
 484         self.contains_chunk(SparseChunk::one(index)).any()
 485     }
 486
 487     pub fn insert(&mut self, index: I) -> bool {
 488         self.insert_chunk(SparseChunk::one(index)).any()
 489     }
 490
 491     pub fn remove(&mut self, index: I) -> bool {
 492         self.remove_chunk(SparseChunk::one(index)).any()
 493     }
 494
 495     pub fn iter<'a>(&'a self) -> impl Iterator<Item = I> + 'a {
 496         self.chunks().flat_map(|chunk| chunk.iter())
 497     }
 498 }
 499
 500 #[inline]
 501 fn words(elements: usize) -> usize {
 502     (elements + WORD_BITS - 1) / WORD_BITS
 503 }
 504
 505 #[inline]
 506 fn word_mask(index: usize) -> (usize, Word) {
 507     let word = index / WORD_BITS;
 508     let mask = 1 << (index % WORD_BITS);
 509     (word, mask)
 510 }
 511
 512 #[test]
 513 fn bitvec_iter_works() {
 514     let mut bitvec = BitVector::new(100);
 515     bitvec.insert(1);
 516     bitvec.insert(10);
 517     bitvec.insert(19);
 518     bitvec.insert(62);
 519     bitvec.insert(63);
 520     bitvec.insert(64);
 521     bitvec.insert(65);
 522     bitvec.insert(66);
 523     bitvec.insert(99);
 524     assert_eq!(
 525         bitvec.iter().collect::<Vec<_>>(),
 526         [1, 10, 19, 62, 63, 64, 65, 66, 99]
 527     );
 528 }
 529
 530 #[test]
 531 fn bitvec_iter_works_2() {
 532     let mut bitvec = BitVector::new(319);
 533     bitvec.insert(0);
 534     bitvec.insert(127);
 535     bitvec.insert(191);
 536     bitvec.insert(255);
 537     bitvec.insert(319);
 538     assert_eq!(bitvec.iter().collect::<Vec<_>>(), [0, 127, 191, 255, 319]);
 539 }
 540
 541 #[test]
 542 fn union_two_vecs() {
 543     let mut vec1 = BitVector::new(65);
 544     let mut vec2 = BitVector::new(65);
 545     assert!(vec1.insert(3));
 546     assert!(!vec1.insert(3));
 547     assert!(vec2.insert(5));
 548     assert!(vec2.insert(64));
 549     assert!(vec1.insert_all(&vec2));
 550     assert!(!vec1.insert_all(&vec2));
 551     assert!(vec1.contains(3));
 552     assert!(!vec1.contains(4));
 553     assert!(vec1.contains(5));
 554     assert!(!vec1.contains(63));
 555     assert!(vec1.contains(64));
 556 }
 557
 558 #[test]
 559 fn grow() {
 560     let mut vec1 = BitVector::new(65);
 561     for index in 0..65 {
 562         assert!(vec1.insert(index));
 563         assert!(!vec1.insert(index));
 564     }
 565     vec1.grow(128);
 566
 567     // Check if the bits set before growing are still set
 568     for index in 0..65 {
 569         assert!(vec1.contains(index));
 570     }
 571
 572     // Check if the new bits are all un-set
 573     for index in 65..128 {
 574         assert!(!vec1.contains(index));
 575     }
 576
 577     // Check that we can set all new bits without running out of bounds
 578     for index in 65..128 {
 579         assert!(vec1.insert(index));
 580         assert!(!vec1.insert(index));
 581     }
 582 }
 583
 584 #[test]
 585 fn matrix_intersection() {
 586     let mut vec1 = BitMatrix::new(200, 200);
 587
 588     // (*) Elements reachable from both 2 and 65.
 589
 590     vec1.add(2, 3);
 591     vec1.add(2, 6);
 592     vec1.add(2, 10); // (*)
 593     vec1.add(2, 64); // (*)
 594     vec1.add(2, 65);
 595     vec1.add(2, 130);
 596     vec1.add(2, 160); // (*)
 597
 598     vec1.add(64, 133);
 599
 600     vec1.add(65, 2);
 601     vec1.add(65, 8);
 602     vec1.add(65, 10); // (*)
 603     vec1.add(65, 64); // (*)
 604     vec1.add(65, 68);
 605     vec1.add(65, 133);
 606     vec1.add(65, 160); // (*)
 607
 608     let intersection = vec1.intersection(2, 64);
 609     assert!(intersection.is_empty());
 610
 611     let intersection = vec1.intersection(2, 65);
 612     assert_eq!(intersection, &[10, 64, 160]);
 613 }
 614
 615 #[test]
 616 fn matrix_iter() {
 617     let mut matrix = BitMatrix::new(64, 100);
 618     matrix.add(3, 22);
 619     matrix.add(3, 75);
 620     matrix.add(2, 99);
 621     matrix.add(4, 0);
 622     matrix.merge(3, 5);
 623
 624     let expected = [99];
 625     let mut iter = expected.iter();
 626     for i in matrix.iter(2) {
 627         let j = *iter.next().unwrap();
 628         assert_eq!(i, j);
 629     }
 630     assert!(iter.next().is_none());
 631
 632     let expected = [22, 75];
 633     let mut iter = expected.iter();
 634     for i in matrix.iter(3) {
 635         let j = *iter.next().unwrap();
 636         assert_eq!(i, j);
 637     }
 638     assert!(iter.next().is_none());
 639
 640     let expected = [0];
 641     let mut iter = expected.iter();
 642     for i in matrix.iter(4) {
 643         let j = *iter.next().unwrap();
 644         assert_eq!(i, j);
 645     }
 646     assert!(iter.next().is_none());
 647
 648     let expected = [22, 75];
 649     let mut iter = expected.iter();
 650     for i in matrix.iter(5) {
 651         let j = *iter.next().unwrap();
 652         assert_eq!(i, j);
 653     }
 654     assert!(iter.next().is_none());
 655 }