src/libstd/collections/hash/table.rs

   1 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // ignore-lexer-test FIXME #15883
  12
  13 use self::BucketState::*;
  14
  15 use clone::Clone;
  16 use cmp;
  17 use hash::{Hash, Hasher};
  18 use iter::{Iterator, ExactSizeIterator};
  19 use marker::{Copy, Send, Sync, Sized, self};
  20 use mem::{min_align_of, size_of};
  21 use mem;
  22 use num::wrapping::OverflowingOps;
  23 use ops::{Deref, DerefMut, Drop};
  24 use option::Option;
  25 use option::Option::{Some, None};
  26 use ptr::{self, Unique};
  27 use rt::heap::{allocate, deallocate, EMPTY};
  28 use collections::hash_state::HashState;
  29
  30 const EMPTY_BUCKET: u64 = 0;
  31
  32 /// The raw hashtable, providing safe-ish access to the unzipped and highly
  33 /// optimized arrays of hashes, keys, and values.
  34 ///
  35 /// This design uses less memory and is a lot faster than the naive
  36 /// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an
  37 /// option on every element, and we get a generally more cache-aware design.
  38 ///
  39 /// Essential invariants of this structure:
  40 ///
  41 ///   - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw`
  42 ///     points to 'undefined' contents. Don't read from it. This invariant is
  43 ///     enforced outside this module with the `EmptyBucket`, `FullBucket`,
  44 ///     and `SafeHash` types.
  45 ///
  46 ///   - An `EmptyBucket` is only constructed at an index with
  47 ///     a hash of EMPTY_BUCKET.
  48 ///
  49 ///   - A `FullBucket` is only constructed at an index with a
  50 ///     non-EMPTY_BUCKET hash.
  51 ///
  52 ///   - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
  53 ///     around hashes of zero by changing them to 0x8000_0000_0000_0000,
  54 ///     which will likely map to the same bucket, while not being confused
  55 ///     with "empty".
  56 ///
  57 ///   - All three "arrays represented by pointers" are the same length:
  58 ///     `capacity`. This is set at creation and never changes. The arrays
  59 ///     are unzipped to save space (we don't have to pay for the padding
  60 ///     between odd sized elements, such as in a map from u64 to u8), and
  61 ///     be more cache aware (scanning through 8 hashes brings in at most
  62 ///     2 cache lines, since they're all right beside each other).
  63 ///
  64 /// You can kind of think of this module/data structure as a safe wrapper
  65 /// around just the "table" part of the hashtable. It enforces some
  66 /// invariants at the type level and employs some performance trickery,
  67 /// but in general is just a tricked out `Vec<Option<u64, K, V>>`.
  68 #[unsafe_no_drop_flag]
  69 pub struct RawTable<K, V> {
  70     capacity: usize,
  71     size:     usize,
  72     hashes:   Unique<u64>,
  73
  74     // Because K/V do not appear directly in any of the types in the struct,
  75     // inform rustc that in fact instances of K and V are reachable from here.
  76     marker:   marker::PhantomData<(K,V)>,
  77 }
  78
  79 unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {}
  80 unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}
  81
  82 struct RawBucket<K, V> {
  83     hash: *mut u64,
  84     key:  *mut K,
  85     val:  *mut V,
  86     _marker: marker::PhantomData<(K,V)>,
  87 }
  88
  89 impl<K,V> Copy for RawBucket<K,V> {}
  90 impl<K,V> Clone for RawBucket<K,V> {
  91     fn clone(&self) -> RawBucket<K, V> { *self }
  92 }
  93
  94 pub struct Bucket<K, V, M> {
  95     raw:   RawBucket<K, V>,
  96     idx:   usize,
  97     table: M
  98 }
  99
 100 impl<K,V,M:Copy> Copy for Bucket<K,V,M> {}
 101 impl<K,V,M:Copy> Clone for Bucket<K,V,M> {
 102     fn clone(&self) -> Bucket<K,V,M> { *self }
 103 }
 104
 105 pub struct EmptyBucket<K, V, M> {
 106     raw:   RawBucket<K, V>,
 107     idx:   usize,
 108     table: M
 109 }
 110
 111 pub struct FullBucket<K, V, M> {
 112     raw:   RawBucket<K, V>,
 113     idx:   usize,
 114     table: M
 115 }
 116
 117 pub type EmptyBucketImm<'table, K, V> = EmptyBucket<K, V, &'table RawTable<K, V>>;
 118 pub type  FullBucketImm<'table, K, V> =  FullBucket<K, V, &'table RawTable<K, V>>;
 119
 120 pub type EmptyBucketMut<'table, K, V> = EmptyBucket<K, V, &'table mut RawTable<K, V>>;
 121 pub type  FullBucketMut<'table, K, V> =  FullBucket<K, V, &'table mut RawTable<K, V>>;
 122
 123 pub enum BucketState<K, V, M> {
 124     Empty(EmptyBucket<K, V, M>),
 125     Full(FullBucket<K, V, M>),
 126 }
 127
 128 // A GapThenFull encapsulates the state of two consecutive buckets at once.
 129 // The first bucket, called the gap, is known to be empty.
 130 // The second bucket is full.
 131 struct GapThenFull<K, V, M> {
 132     gap: EmptyBucket<K, V, ()>,
 133     full: FullBucket<K, V, M>,
 134 }
 135
 136 /// A hash that is not zero, since we use a hash of zero to represent empty
 137 /// buckets.
 138 #[derive(PartialEq, Copy, Clone)]
 139 pub struct SafeHash {
 140     hash: u64,
 141 }
 142
 143 impl SafeHash {
 144     /// Peek at the hash value, which is guaranteed to be non-zero.
 145     #[inline(always)]
 146     pub fn inspect(&self) -> u64 { self.hash }
 147 }
 148
 149 /// We need to remove hashes of 0. That's reserved for empty buckets.
 150 /// This function wraps up `hash_keyed` to be the only way outside this
 151 /// module to generate a SafeHash.
 152 pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash
 153     where T: Hash, S: HashState
 154 {
 155     let mut state = hash_state.hasher();
 156     t.hash(&mut state);
 157     // We need to avoid 0 in order to prevent collisions with
 158     // EMPTY_HASH. We can maintain our precious uniform distribution
 159     // of initial indexes by unconditionally setting the MSB,
 160     // effectively reducing 64-bits hashes to 63 bits.
 161     SafeHash { hash: 0x8000_0000_0000_0000 | state.finish() }
 162 }
 163
 164 // `replace` casts a `*u64` to a `*SafeHash`. Since we statically
 165 // ensure that a `FullBucket` points to an index with a non-zero hash,
 166 // and a `SafeHash` is just a `u64` with a different name, this is
 167 // safe.
 168 //
 169 // This test ensures that a `SafeHash` really IS the same size as a
 170 // `u64`. If you need to change the size of `SafeHash` (and
 171 // consequently made this test fail), `replace` needs to be
 172 // modified to no longer assume this.
 173 #[test]
 174 fn can_alias_safehash_as_u64() {
 175     assert_eq!(size_of::<SafeHash>(), size_of::<u64>())
 176 }
 177
 178 impl<K, V> RawBucket<K, V> {
 179     unsafe fn offset(self, count: isize) -> RawBucket<K, V> {
 180         RawBucket {
 181             hash: self.hash.offset(count),
 182             key:  self.key.offset(count),
 183             val:  self.val.offset(count),
 184             _marker: marker::PhantomData,
 185         }
 186     }
 187 }
 188
 189 // Buckets hold references to the table.
 190 impl<K, V, M> FullBucket<K, V, M> {
 191     /// Borrow a reference to the table.
 192     pub fn table(&self) -> &M {
 193         &self.table
 194     }
 195     /// Move out the reference to the table.
 196     pub fn into_table(self) -> M {
 197         self.table
 198     }
 199     /// Get the raw index.
 200     pub fn index(&self) -> usize {
 201         self.idx
 202     }
 203 }
 204
 205 impl<K, V, M> EmptyBucket<K, V, M> {
 206     /// Borrow a reference to the table.
 207     pub fn table(&self) -> &M {
 208         &self.table
 209     }
 210     /// Move out the reference to the table.
 211     pub fn into_table(self) -> M {
 212         self.table
 213     }
 214 }
 215
 216 impl<K, V, M> Bucket<K, V, M> {
 217     /// Move out the reference to the table.
 218     pub fn into_table(self) -> M {
 219         self.table
 220     }
 221     /// Get the raw index.
 222     pub fn index(&self) -> usize {
 223         self.idx
 224     }
 225 }
 226
 227 impl<K, V, M: Deref<Target=RawTable<K, V>>> Bucket<K, V, M> {
 228     pub fn new(table: M, hash: SafeHash) -> Bucket<K, V, M> {
 229         Bucket::at_index(table, hash.inspect() as usize)
 230     }
 231
 232     pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> {
 233         // if capacity is 0, then the RawBucket will be populated with bogus pointers.
 234         // This is an uncommon case though, so avoid it in release builds.
 235         debug_assert!(table.capacity() > 0, "Table should have capacity at this point");
 236         let ib_index = ib_index & (table.capacity() - 1);
 237         Bucket {
 238             raw: unsafe {
 239                table.first_bucket_raw().offset(ib_index as isize)
 240             },
 241             idx: ib_index,
 242             table: table
 243         }
 244     }
 245
 246     pub fn first(table: M) -> Bucket<K, V, M> {
 247         Bucket {
 248             raw: table.first_bucket_raw(),
 249             idx: 0,
 250             table: table
 251         }
 252     }
 253
 254     /// Reads a bucket at a given index, returning an enum indicating whether
 255     /// it's initialized or not. You need to match on this enum to get
 256     /// the appropriate types to call most of the other functions in
 257     /// this module.
 258     pub fn peek(self) -> BucketState<K, V, M> {
 259         match unsafe { *self.raw.hash } {
 260             EMPTY_BUCKET =>
 261                 Empty(EmptyBucket {
 262                     raw: self.raw,
 263                     idx: self.idx,
 264                     table: self.table
 265                 }),
 266             _ =>
 267                 Full(FullBucket {
 268                     raw: self.raw,
 269                     idx: self.idx,
 270                     table: self.table
 271                 })
 272         }
 273     }
 274
 275     /// Modifies the bucket pointer in place to make it point to the next slot.
 276     pub fn next(&mut self) {
 277         // Branchless bucket iteration step.
 278         // As we reach the end of the table...
 279         // We take the current idx:          0111111b
 280         // Xor it by its increment:        ^ 1000000b
 281         //                               ------------
 282         //                                   1111111b
 283         // Then AND with the capacity:     & 1000000b
 284         //                               ------------
 285         // to get the backwards offset:      1000000b
 286         // ... and it's zero at all other times.
 287         let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity();
 288         // Finally, we obtain the offset 1 or the offset -cap + 1.
 289         let dist = 1 - (maybe_wraparound_dist as isize);
 290
 291         self.idx += 1;
 292
 293         unsafe {
 294             self.raw = self.raw.offset(dist);
 295         }
 296     }
 297 }
 298
 299 impl<K, V, M: Deref<Target=RawTable<K, V>>> EmptyBucket<K, V, M> {
 300     #[inline]
 301     pub fn next(self) -> Bucket<K, V, M> {
 302         let mut bucket = self.into_bucket();
 303         bucket.next();
 304         bucket
 305     }
 306
 307     #[inline]
 308     pub fn into_bucket(self) -> Bucket<K, V, M> {
 309         Bucket {
 310             raw: self.raw,
 311             idx: self.idx,
 312             table: self.table
 313         }
 314     }
 315
 316     pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> {
 317         let gap = EmptyBucket {
 318             raw: self.raw,
 319             idx: self.idx,
 320             table: ()
 321         };
 322
 323         match self.next().peek() {
 324             Full(bucket) => {
 325                 Some(GapThenFull {
 326                     gap: gap,
 327                     full: bucket
 328                 })
 329             }
 330             Empty(..) => None
 331         }
 332     }
 333 }
 334
 335 impl<K, V, M: Deref<Target=RawTable<K, V>> + DerefMut> EmptyBucket<K, V, M> {
 336     /// Puts given key and value pair, along with the key's hash,
 337     /// into this bucket in the hashtable. Note how `self` is 'moved' into
 338     /// this function, because this slot will no longer be empty when
 339     /// we return! A `FullBucket` is returned for later use, pointing to
 340     /// the newly-filled slot in the hashtable.
 341     ///
 342     /// Use `make_hash` to construct a `SafeHash` to pass to this function.
 343     pub fn put(mut self, hash: SafeHash, key: K, value: V)
 344                -> FullBucket<K, V, M> {
 345         unsafe {
 346             *self.raw.hash = hash.inspect();
 347             ptr::write(self.raw.key, key);
 348             ptr::write(self.raw.val, value);
 349         }
 350
 351         self.table.size += 1;
 352
 353         FullBucket { raw: self.raw, idx: self.idx, table: self.table }
 354     }
 355 }
 356
 357 impl<K, V, M: Deref<Target=RawTable<K, V>>> FullBucket<K, V, M> {
 358     #[inline]
 359     pub fn next(self) -> Bucket<K, V, M> {
 360         let mut bucket = self.into_bucket();
 361         bucket.next();
 362         bucket
 363     }
 364
 365     #[inline]
 366     pub fn into_bucket(self) -> Bucket<K, V, M> {
 367         Bucket {
 368             raw: self.raw,
 369             idx: self.idx,
 370             table: self.table
 371         }
 372     }
 373
 374     /// Get the distance between this bucket and the 'ideal' location
 375     /// as determined by the key's hash stored in it.
 376     ///
 377     /// In the cited blog posts above, this is called the "distance to
 378     /// initial bucket", or DIB. Also known as "probe count".
 379     pub fn distance(&self) -> usize {
 380         // Calculates the distance one has to travel when going from
 381         // `hash mod capacity` onwards to `idx mod capacity`, wrapping around
 382         // if the destination is not reached before the end of the table.
 383         (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1)
 384     }
 385
 386     #[inline]
 387     pub fn hash(&self) -> SafeHash {
 388         unsafe {
 389             SafeHash {
 390                 hash: *self.raw.hash
 391             }
 392         }
 393     }
 394
 395     /// Gets references to the key and value at a given index.
 396     pub fn read(&self) -> (&K, &V) {
 397         unsafe {
 398             (&*self.raw.key,
 399              &*self.raw.val)
 400         }
 401     }
 402 }
 403
 404 impl<K, V, M: Deref<Target=RawTable<K, V>> + DerefMut> FullBucket<K, V, M> {
 405     /// Removes this bucket's key and value from the hashtable.
 406     ///
 407     /// This works similarly to `put`, building an `EmptyBucket` out of the
 408     /// taken bucket.
 409     pub fn take(mut self) -> (EmptyBucket<K, V, M>, K, V) {
 410         self.table.size -= 1;
 411
 412         unsafe {
 413             *self.raw.hash = EMPTY_BUCKET;
 414             (
 415                 EmptyBucket {
 416                     raw: self.raw,
 417                     idx: self.idx,
 418                     table: self.table
 419                 },
 420                 ptr::read(self.raw.key),
 421                 ptr::read(self.raw.val)
 422             )
 423         }
 424     }
 425
 426     pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) {
 427         unsafe {
 428             let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h);
 429             let old_key  = ptr::replace(self.raw.key,  k);
 430             let old_val  = ptr::replace(self.raw.val,  v);
 431
 432             (old_hash, old_key, old_val)
 433         }
 434     }
 435
 436     /// Gets mutable references to the key and value at a given index.
 437     pub fn read_mut(&mut self) -> (&mut K, &mut V) {
 438         unsafe {
 439             (&mut *self.raw.key,
 440              &mut *self.raw.val)
 441         }
 442     }
 443 }
 444
 445 impl<'t, K, V, M: Deref<Target=RawTable<K, V>> + 't> FullBucket<K, V, M> {
 446     /// Exchange a bucket state for immutable references into the table.
 447     /// Because the underlying reference to the table is also consumed,
 448     /// no further changes to the structure of the table are possible;
 449     /// in exchange for this, the returned references have a longer lifetime
 450     /// than the references returned by `read()`.
 451     pub fn into_refs(self) -> (&'t K, &'t V) {
 452         unsafe {
 453             (&*self.raw.key,
 454              &*self.raw.val)
 455         }
 456     }
 457 }
 458
 459 impl<'t, K, V, M: Deref<Target=RawTable<K, V>> + DerefMut + 't> FullBucket<K, V, M> {
 460     /// This works similarly to `into_refs`, exchanging a bucket state
 461     /// for mutable references into the table.
 462     pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) {
 463         unsafe {
 464             (&mut *self.raw.key,
 465              &mut *self.raw.val)
 466         }
 467     }
 468 }
 469
 470 impl<K, V, M> BucketState<K, V, M> {
 471     // For convenience.
 472     pub fn expect_full(self) -> FullBucket<K, V, M> {
 473         match self {
 474             Full(full) => full,
 475             Empty(..) => panic!("Expected full bucket")
 476         }
 477     }
 478 }
 479
 480 impl<K, V, M: Deref<Target=RawTable<K, V>>> GapThenFull<K, V, M> {
 481     #[inline]
 482     pub fn full(&self) -> &FullBucket<K, V, M> {
 483         &self.full
 484     }
 485
 486     pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
 487         unsafe {
 488             *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
 489             ptr::copy_nonoverlapping(self.full.raw.key, self.gap.raw.key, 1);
 490             ptr::copy_nonoverlapping(self.full.raw.val, self.gap.raw.val, 1);
 491         }
 492
 493         let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full;
 494
 495         match self.full.next().peek() {
 496             Full(bucket) => {
 497                 self.gap.raw = prev_raw;
 498                 self.gap.idx = prev_idx;
 499
 500                 self.full = bucket;
 501
 502                 Some(self)
 503             }
 504             Empty(..) => None
 505         }
 506     }
 507 }
 508
 509
 510 /// Rounds up to a multiple of a power of two. Returns the closest multiple
 511 /// of `target_alignment` that is higher or equal to `unrounded`.
 512 ///
 513 /// # Panics
 514 ///
 515 /// Panics if `target_alignment` is not a power of two.
 516 fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize {
 517     assert!(target_alignment.is_power_of_two());
 518     (unrounded + target_alignment - 1) & !(target_alignment - 1)
 519 }
 520
 521 #[test]
 522 fn test_rounding() {
 523     assert_eq!(round_up_to_next(0, 4), 0);
 524     assert_eq!(round_up_to_next(1, 4), 4);
 525     assert_eq!(round_up_to_next(2, 4), 4);
 526     assert_eq!(round_up_to_next(3, 4), 4);
 527     assert_eq!(round_up_to_next(4, 4), 4);
 528     assert_eq!(round_up_to_next(5, 4), 8);
 529 }
 530
 531 // Returns a tuple of (key_offset, val_offset),
 532 // from the start of a mallocated array.
 533 fn calculate_offsets(hashes_size: usize,
 534                      keys_size: usize, keys_align: usize,
 535                      vals_align: usize)
 536                      -> (usize, usize, bool) {
 537     let keys_offset = round_up_to_next(hashes_size, keys_align);
 538     let (end_of_keys, oflo) = keys_offset.overflowing_add(keys_size);
 539
 540     let vals_offset = round_up_to_next(end_of_keys, vals_align);
 541
 542     (keys_offset, vals_offset, oflo)
 543 }
 544
 545 // Returns a tuple of (minimum required malloc alignment, hash_offset,
 546 // array_size), from the start of a mallocated array.
 547 fn calculate_allocation(hash_size: usize, hash_align: usize,
 548                         keys_size: usize, keys_align: usize,
 549                         vals_size: usize, vals_align: usize)
 550                         -> (usize, usize, usize, bool) {
 551     let hash_offset = 0;
 552     let (_, vals_offset, oflo) = calculate_offsets(hash_size,
 553                                                    keys_size, keys_align,
 554                                                               vals_align);
 555     let (end_of_vals, oflo2) = vals_offset.overflowing_add(vals_size);
 556
 557     let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align));
 558
 559     (min_align, hash_offset, end_of_vals, oflo || oflo2)
 560 }
 561
 562 #[test]
 563 fn test_offset_calculation() {
 564     assert_eq!(calculate_allocation(128, 8, 15, 1, 4,  4), (8, 0, 148, false));
 565     assert_eq!(calculate_allocation(3,   1, 2,  1, 1,  1), (1, 0, 6, false));
 566     assert_eq!(calculate_allocation(6,   2, 12, 4, 24, 8), (8, 0, 48, false));
 567     assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144, false));
 568     assert_eq!(calculate_offsets(3,   2,  1, 1), (3,   5, false));
 569     assert_eq!(calculate_offsets(6,   12, 4, 8), (8,   24, false));
 570 }
 571
 572 impl<K, V> RawTable<K, V> {
 573     /// Does not initialize the buckets. The caller should ensure they,
 574     /// at the very least, set every hash to EMPTY_BUCKET.
 575     unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> {
 576         if capacity == 0 {
 577             return RawTable {
 578                 size: 0,
 579                 capacity: 0,
 580                 hashes: Unique::new(EMPTY as *mut u64),
 581                 marker: marker::PhantomData,
 582             };
 583         }
 584
 585         // No need for `checked_mul` before a more restrictive check performed
 586         // later in this method.
 587         let hashes_size = capacity * size_of::<u64>();
 588         let keys_size   = capacity * size_of::< K >();
 589         let vals_size   = capacity * size_of::< V >();
 590
 591         // Allocating hashmaps is a little tricky. We need to allocate three
 592         // arrays, but since we know their sizes and alignments up front,
 593         // we just allocate a single array, and then have the subarrays
 594         // point into it.
 595         //
 596         // This is great in theory, but in practice getting the alignment
 597         // right is a little subtle. Therefore, calculating offsets has been
 598         // factored out into a different function.
 599         let (malloc_alignment, hash_offset, size, oflo) =
 600             calculate_allocation(
 601                 hashes_size, min_align_of::<u64>(),
 602                 keys_size,   min_align_of::< K >(),
 603                 vals_size,   min_align_of::< V >());
 604
 605         assert!(!oflo, "capacity overflow");
 606
 607         // One check for overflow that covers calculation and rounding of size.
 608         let size_of_bucket = size_of::<u64>().checked_add(size_of::<K>()).unwrap()
 609                                              .checked_add(size_of::<V>()).unwrap();
 610         assert!(size >= capacity.checked_mul(size_of_bucket)
 611                                 .expect("capacity overflow"),
 612                 "capacity overflow");
 613
 614         let buffer = allocate(size, malloc_alignment);
 615         if buffer.is_null() { ::alloc::oom() }
 616
 617         let hashes = buffer.offset(hash_offset as isize) as *mut u64;
 618
 619         RawTable {
 620             capacity: capacity,
 621             size:     0,
 622             hashes:   Unique::new(hashes),
 623             marker:   marker::PhantomData,
 624         }
 625     }
 626
 627     fn first_bucket_raw(&self) -> RawBucket<K, V> {
 628         let hashes_size = self.capacity * size_of::<u64>();
 629         let keys_size = self.capacity * size_of::<K>();
 630
 631         let buffer = *self.hashes as *mut u8;
 632         let (keys_offset, vals_offset, oflo) =
 633             calculate_offsets(hashes_size,
 634                               keys_size, min_align_of::<K>(),
 635                               min_align_of::<V>());
 636         debug_assert!(!oflo, "capacity overflow");
 637         unsafe {
 638             RawBucket {
 639                 hash: *self.hashes,
 640                 key:  buffer.offset(keys_offset as isize) as *mut K,
 641                 val:  buffer.offset(vals_offset as isize) as *mut V,
 642                 _marker: marker::PhantomData,
 643             }
 644         }
 645     }
 646
 647     /// Creates a new raw table from a given capacity. All buckets are
 648     /// initially empty.
 649     pub fn new(capacity: usize) -> RawTable<K, V> {
 650         unsafe {
 651             let ret = RawTable::new_uninitialized(capacity);
 652             ptr::write_bytes(*ret.hashes, 0, capacity);
 653             ret
 654         }
 655     }
 656
 657     /// The hashtable's capacity, similar to a vector's.
 658     pub fn capacity(&self) -> usize {
 659         self.capacity
 660     }
 661
 662     /// The number of elements ever `put` in the hashtable, minus the number
 663     /// of elements ever `take`n.
 664     pub fn size(&self) -> usize {
 665         self.size
 666     }
 667
 668     fn raw_buckets(&self) -> RawBuckets<K, V> {
 669         RawBuckets {
 670             raw: self.first_bucket_raw(),
 671             hashes_end: unsafe {
 672                 self.hashes.offset(self.capacity as isize)
 673             },
 674             marker: marker::PhantomData,
 675         }
 676     }
 677
 678     pub fn iter(&self) -> Iter<K, V> {
 679         Iter {
 680             iter: self.raw_buckets(),
 681             elems_left: self.size(),
 682         }
 683     }
 684
 685     pub fn iter_mut(&mut self) -> IterMut<K, V> {
 686         IterMut {
 687             iter: self.raw_buckets(),
 688             elems_left: self.size(),
 689         }
 690     }
 691
 692     pub fn into_iter(self) -> IntoIter<K, V> {
 693         let RawBuckets { raw, hashes_end, .. } = self.raw_buckets();
 694         // Replace the marker regardless of lifetime bounds on parameters.
 695         IntoIter {
 696             iter: RawBuckets {
 697                 raw: raw,
 698                 hashes_end: hashes_end,
 699                 marker: marker::PhantomData,
 700             },
 701             table: self,
 702         }
 703     }
 704
 705     pub fn drain(&mut self) -> Drain<K, V> {
 706         let RawBuckets { raw, hashes_end, .. } = self.raw_buckets();
 707         // Replace the marker regardless of lifetime bounds on parameters.
 708         Drain {
 709             iter: RawBuckets {
 710                 raw: raw,
 711                 hashes_end: hashes_end,
 712                 marker: marker::PhantomData,
 713             },
 714             table: self,
 715         }
 716     }
 717
 718     /// Returns an iterator that copies out each entry. Used while the table
 719     /// is being dropped.
 720     unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> {
 721         let raw_bucket = self.first_bucket_raw();
 722         RevMoveBuckets {
 723             raw: raw_bucket.offset(self.capacity as isize),
 724             hashes_end: raw_bucket.hash,
 725             elems_left: self.size,
 726             marker:     marker::PhantomData,
 727         }
 728     }
 729 }
 730
 731 /// A raw iterator. The basis for some other iterators in this module. Although
 732 /// this interface is safe, it's not used outside this module.
 733 struct RawBuckets<'a, K, V> {
 734     raw: RawBucket<K, V>,
 735     hashes_end: *mut u64,
 736
 737     // Strictly speaking, this should be &'a (K,V), but that would
 738     // require that K:'a, and we often use RawBuckets<'static...> for
 739     // move iterations, so that messes up a lot of other things. So
 740     // just use `&'a (K,V)` as this is not a publicly exposed type
 741     // anyway.
 742     marker: marker::PhantomData<&'a ()>,
 743 }
 744
 745 // FIXME(#19839) Remove in favor of `#[derive(Clone)]`
 746 impl<'a, K, V> Clone for RawBuckets<'a, K, V> {
 747     fn clone(&self) -> RawBuckets<'a, K, V> {
 748         RawBuckets {
 749             raw: self.raw,
 750             hashes_end: self.hashes_end,
 751             marker: marker::PhantomData,
 752         }
 753     }
 754 }
 755
 756
 757 impl<'a, K, V> Iterator for RawBuckets<'a, K, V> {
 758     type Item = RawBucket<K, V>;
 759
 760     fn next(&mut self) -> Option<RawBucket<K, V>> {
 761         while self.raw.hash != self.hashes_end {
 762             unsafe {
 763                 // We are swapping out the pointer to a bucket and replacing
 764                 // it with the pointer to the next one.
 765                 let prev = ptr::replace(&mut self.raw, self.raw.offset(1));
 766                 if *prev.hash != EMPTY_BUCKET {
 767                     return Some(prev);
 768                 }
 769             }
 770         }
 771
 772         None
 773     }
 774 }
 775
 776 /// An iterator that moves out buckets in reverse order. It leaves the table
 777 /// in an inconsistent state and should only be used for dropping
 778 /// the table's remaining entries. It's used in the implementation of Drop.
 779 struct RevMoveBuckets<'a, K, V> {
 780     raw: RawBucket<K, V>,
 781     hashes_end: *mut u64,
 782     elems_left: usize,
 783
 784     // As above, `&'a (K,V)` would seem better, but we often use
 785     // 'static for the lifetime, and this is not a publicly exposed
 786     // type.
 787     marker: marker::PhantomData<&'a ()>,
 788 }
 789
 790 impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> {
 791     type Item = (K, V);
 792
 793     fn next(&mut self) -> Option<(K, V)> {
 794         if self.elems_left == 0 {
 795             return None;
 796         }
 797
 798         loop {
 799             debug_assert!(self.raw.hash != self.hashes_end);
 800
 801             unsafe {
 802                 self.raw = self.raw.offset(-1);
 803
 804                 if *self.raw.hash != EMPTY_BUCKET {
 805                     self.elems_left -= 1;
 806                     return Some((
 807                         ptr::read(self.raw.key),
 808                         ptr::read(self.raw.val)
 809                     ));
 810                 }
 811             }
 812         }
 813     }
 814 }
 815
 816 /// Iterator over shared references to entries in a table.
 817 pub struct Iter<'a, K: 'a, V: 'a> {
 818     iter: RawBuckets<'a, K, V>,
 819     elems_left: usize,
 820 }
 821
 822 // FIXME(#19839) Remove in favor of `#[derive(Clone)]`
 823 impl<'a, K, V> Clone for Iter<'a, K, V> {
 824     fn clone(&self) -> Iter<'a, K, V> {
 825         Iter {
 826             iter: self.iter.clone(),
 827             elems_left: self.elems_left
 828         }
 829     }
 830 }
 831
 832
 833 /// Iterator over mutable references to entries in a table.
 834 pub struct IterMut<'a, K: 'a, V: 'a> {
 835     iter: RawBuckets<'a, K, V>,
 836     elems_left: usize,
 837 }
 838
 839 /// Iterator over the entries in a table, consuming the table.
 840 pub struct IntoIter<K, V> {
 841     table: RawTable<K, V>,
 842     iter: RawBuckets<'static, K, V>
 843 }
 844
 845 /// Iterator over the entries in a table, clearing the table.
 846 pub struct Drain<'a, K: 'a, V: 'a> {
 847     table: &'a mut RawTable<K, V>,
 848     iter: RawBuckets<'static, K, V>,
 849 }
 850
 851 impl<'a, K, V> Iterator for Iter<'a, K, V> {
 852     type Item = (&'a K, &'a V);
 853
 854     fn next(&mut self) -> Option<(&'a K, &'a V)> {
 855         self.iter.next().map(|bucket| {
 856             self.elems_left -= 1;
 857             unsafe {
 858                 (&*bucket.key,
 859                  &*bucket.val)
 860             }
 861         })
 862     }
 863
 864     fn size_hint(&self) -> (usize, Option<usize>) {
 865         (self.elems_left, Some(self.elems_left))
 866     }
 867 }
 868 impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> {
 869     fn len(&self) -> usize { self.elems_left }
 870 }
 871
 872 impl<'a, K, V> Iterator for IterMut<'a, K, V> {
 873     type Item = (&'a K, &'a mut V);
 874
 875     fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
 876         self.iter.next().map(|bucket| {
 877             self.elems_left -= 1;
 878             unsafe {
 879                 (&*bucket.key,
 880                  &mut *bucket.val)
 881             }
 882         })
 883     }
 884
 885     fn size_hint(&self) -> (usize, Option<usize>) {
 886         (self.elems_left, Some(self.elems_left))
 887     }
 888 }
 889 impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> {
 890     fn len(&self) -> usize { self.elems_left }
 891 }
 892
 893 impl<K, V> Iterator for IntoIter<K, V> {
 894     type Item = (SafeHash, K, V);
 895
 896     fn next(&mut self) -> Option<(SafeHash, K, V)> {
 897         self.iter.next().map(|bucket| {
 898             self.table.size -= 1;
 899             unsafe {
 900                 (
 901                     SafeHash {
 902                         hash: *bucket.hash,
 903                     },
 904                     ptr::read(bucket.key),
 905                     ptr::read(bucket.val)
 906                 )
 907             }
 908         })
 909     }
 910
 911     fn size_hint(&self) -> (usize, Option<usize>) {
 912         let size = self.table.size();
 913         (size, Some(size))
 914     }
 915 }
 916 impl<K, V> ExactSizeIterator for IntoIter<K, V> {
 917     fn len(&self) -> usize { self.table.size() }
 918 }
 919
 920 impl<'a, K, V> Iterator for Drain<'a, K, V> {
 921     type Item = (SafeHash, K, V);
 922
 923     #[inline]
 924     fn next(&mut self) -> Option<(SafeHash, K, V)> {
 925         self.iter.next().map(|bucket| {
 926             self.table.size -= 1;
 927             unsafe {
 928                 (
 929                     SafeHash {
 930                         hash: ptr::replace(bucket.hash, EMPTY_BUCKET),
 931                     },
 932                     ptr::read(bucket.key),
 933                     ptr::read(bucket.val)
 934                 )
 935             }
 936         })
 937     }
 938
 939     fn size_hint(&self) -> (usize, Option<usize>) {
 940         let size = self.table.size();
 941         (size, Some(size))
 942     }
 943 }
 944 impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> {
 945     fn len(&self) -> usize { self.table.size() }
 946 }
 947
 948 #[unsafe_destructor]
 949 impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> {
 950     fn drop(&mut self) {
 951         for _ in self.by_ref() {}
 952     }
 953 }
 954
 955 impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
 956     fn clone(&self) -> RawTable<K, V> {
 957         unsafe {
 958             let mut new_ht = RawTable::new_uninitialized(self.capacity());
 959
 960             {
 961                 let cap = self.capacity();
 962                 let mut new_buckets = Bucket::first(&mut new_ht);
 963                 let mut buckets = Bucket::first(self);
 964                 while buckets.index() != cap {
 965                     match buckets.peek() {
 966                         Full(full) => {
 967                             let (h, k, v) = {
 968                                 let (k, v) = full.read();
 969                                 (full.hash(), k.clone(), v.clone())
 970                             };
 971                             *new_buckets.raw.hash = h.inspect();
 972                             ptr::write(new_buckets.raw.key, k);
 973                             ptr::write(new_buckets.raw.val, v);
 974                         }
 975                         Empty(..) => {
 976                             *new_buckets.raw.hash = EMPTY_BUCKET;
 977                         }
 978                     }
 979                     new_buckets.next();
 980                     buckets.next();
 981                 }
 982             };
 983
 984             new_ht.size = self.size();
 985
 986             new_ht
 987         }
 988     }
 989 }
 990
 991 #[unsafe_destructor]
 992 impl<K, V> Drop for RawTable<K, V> {
 993     fn drop(&mut self) {
 994         if self.capacity == 0 || self.capacity == mem::POST_DROP_USIZE {
 995             return;
 996         }
 997
 998         // This is done in reverse because we've likely partially taken
 999         // some elements out with `.into_iter()` from the front.
1000         // Check if the size is 0, so we don't do a useless scan when
1001         // dropping empty tables such as on resize.
1002         // Also avoid double drop of elements that have been already moved out.
1003         unsafe {
1004             for _ in self.rev_move_buckets() {}
1005         }
1006
1007         let hashes_size = self.capacity * size_of::<u64>();
1008         let keys_size = self.capacity * size_of::<K>();
1009         let vals_size = self.capacity * size_of::<V>();
1010         let (align, _, size, oflo) =
1011             calculate_allocation(hashes_size, min_align_of::<u64>(),
1012                                  keys_size, min_align_of::<K>(),
1013                                  vals_size, min_align_of::<V>());
1014
1015         debug_assert!(!oflo, "should be impossible");
1016
1017         unsafe {
1018             deallocate(*self.hashes as *mut u8, size, align);
1019             // Remember how everything was allocated out of one buffer
1020             // during initialization? We only need one call to free here.
1021         }
1022     }
1023 }