src/libstd/collections/hash/table.rs

   1 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 use alloc::heap::{EMPTY, allocate, deallocate};
  12
  13 use cmp;
  14 use hash::{BuildHasher, Hash, Hasher};
  15 use intrinsics::needs_drop;
  16 use marker;
  17 use mem::{align_of, size_of};
  18 use mem;
  19 use ops::{Deref, DerefMut};
  20 use ptr::{self, Unique, Shared};
  21
  22 use self::BucketState::*;
  23
  24 /// Integer type used for stored hash values.
  25 ///
  26 /// No more than bit_width(usize) bits are needed to select a bucket.
  27 ///
  28 /// The most significant bit is ours to use for tagging `SafeHash`.
  29 ///
  30 /// (Even if we could have usize::MAX bytes allocated for buckets,
  31 /// each bucket stores at least a `HashUint`, so there can be no more than
  32 /// usize::MAX / size_of(usize) buckets.)
  33 type HashUint = usize;
  34
  35 const EMPTY_BUCKET: HashUint = 0;
  36
  37 /// The raw hashtable, providing safe-ish access to the unzipped and highly
  38 /// optimized arrays of hashes, and key-value pairs.
  39 ///
  40 /// This design is a lot faster than the naive
  41 /// `Vec<Option<(u64, K, V)>>`, because we don't pay for the overhead of an
  42 /// option on every element, and we get a generally more cache-aware design.
  43 ///
  44 /// Essential invariants of this structure:
  45 ///
  46 ///   - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw`
  47 ///     points to 'undefined' contents. Don't read from it. This invariant is
  48 ///     enforced outside this module with the `EmptyBucket`, `FullBucket`,
  49 ///     and `SafeHash` types.
  50 ///
  51 ///   - An `EmptyBucket` is only constructed at an index with
  52 ///     a hash of EMPTY_BUCKET.
  53 ///
  54 ///   - A `FullBucket` is only constructed at an index with a
  55 ///     non-EMPTY_BUCKET hash.
  56 ///
  57 ///   - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
  58 ///     around hashes of zero by changing them to 0x8000_0000_0000_0000,
  59 ///     which will likely map to the same bucket, while not being confused
  60 ///     with "empty".
  61 ///
  62 ///   - Both "arrays represented by pointers" are the same length:
  63 ///     `capacity`. This is set at creation and never changes. The arrays
  64 ///     are unzipped and are more cache aware (scanning through 8 hashes
  65 ///     brings in at most 2 cache lines, since they're all right beside each
  66 ///     other). This layout may waste space in padding such as in a map from
  67 ///     u64 to u8, but is a more cache conscious layout as the key-value pairs
  68 ///     are only very shortly probed and the desired value will be in the same
  69 ///     or next cache line.
  70 ///
  71 /// You can kind of think of this module/data structure as a safe wrapper
  72 /// around just the "table" part of the hashtable. It enforces some
  73 /// invariants at the type level and employs some performance trickery,
  74 /// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`.
  75 pub struct RawTable<K, V> {
  76     capacity: usize,
  77     size: usize,
  78     hashes: Unique<HashUint>,
  79
  80     // Because K/V do not appear directly in any of the types in the struct,
  81     // inform rustc that in fact instances of K and V are reachable from here.
  82     marker: marker::PhantomData<(K, V)>,
  83 }
  84
  85 unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {}
  86 unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}
  87
  88 struct RawBucket<K, V> {
  89     hash: *mut HashUint,
  90     // We use *const to ensure covariance with respect to K and V
  91     pair: *const (K, V),
  92     _marker: marker::PhantomData<(K, V)>,
  93 }
  94
  95 impl<K, V> Copy for RawBucket<K, V> {}
  96 impl<K, V> Clone for RawBucket<K, V> {
  97     fn clone(&self) -> RawBucket<K, V> {
  98         *self
  99     }
 100 }
 101
 102 pub struct Bucket<K, V, M> {
 103     raw: RawBucket<K, V>,
 104     idx: usize,
 105     table: M,
 106 }
 107
 108 impl<K, V, M: Copy> Copy for Bucket<K, V, M> {}
 109 impl<K, V, M: Copy> Clone for Bucket<K, V, M> {
 110     fn clone(&self) -> Bucket<K, V, M> {
 111         *self
 112     }
 113 }
 114
 115 pub struct EmptyBucket<K, V, M> {
 116     raw: RawBucket<K, V>,
 117     idx: usize,
 118     table: M,
 119 }
 120
 121 pub struct FullBucket<K, V, M> {
 122     raw: RawBucket<K, V>,
 123     idx: usize,
 124     table: M,
 125 }
 126
 127 pub type FullBucketMut<'table, K, V> = FullBucket<K, V, &'table mut RawTable<K, V>>;
 128
 129 pub enum BucketState<K, V, M> {
 130     Empty(EmptyBucket<K, V, M>),
 131     Full(FullBucket<K, V, M>),
 132 }
 133
 134 // A GapThenFull encapsulates the state of two consecutive buckets at once.
 135 // The first bucket, called the gap, is known to be empty.
 136 // The second bucket is full.
 137 pub struct GapThenFull<K, V, M> {
 138     gap: EmptyBucket<K, V, ()>,
 139     full: FullBucket<K, V, M>,
 140 }
 141
 142 /// A hash that is not zero, since we use a hash of zero to represent empty
 143 /// buckets.
 144 #[derive(PartialEq, Copy, Clone)]
 145 pub struct SafeHash {
 146     hash: HashUint,
 147 }
 148
 149 impl SafeHash {
 150     /// Peek at the hash value, which is guaranteed to be non-zero.
 151     #[inline(always)]
 152     pub fn inspect(&self) -> HashUint {
 153         self.hash
 154     }
 155
 156     #[inline(always)]
 157     pub fn new(hash: u64) -> Self {
 158         // We need to avoid 0 in order to prevent collisions with
 159         // EMPTY_HASH. We can maintain our precious uniform distribution
 160         // of initial indexes by unconditionally setting the MSB,
 161         // effectively reducing the hashes by one bit.
 162         //
 163         // Truncate hash to fit in `HashUint`.
 164         let hash_bits = size_of::<HashUint>() * 8;
 165         SafeHash { hash: (1 << (hash_bits - 1)) | (hash as HashUint) }
 166     }
 167 }
 168
 169 /// We need to remove hashes of 0. That's reserved for empty buckets.
 170 /// This function wraps up `hash_keyed` to be the only way outside this
 171 /// module to generate a SafeHash.
 172 pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash
 173     where T: Hash,
 174           S: BuildHasher
 175 {
 176     let mut state = hash_state.build_hasher();
 177     t.hash(&mut state);
 178     SafeHash::new(state.finish())
 179 }
 180
 181 // `replace` casts a `*HashUint` to a `*SafeHash`. Since we statically
 182 // ensure that a `FullBucket` points to an index with a non-zero hash,
 183 // and a `SafeHash` is just a `HashUint` with a different name, this is
 184 // safe.
 185 //
 186 // This test ensures that a `SafeHash` really IS the same size as a
 187 // `HashUint`. If you need to change the size of `SafeHash` (and
 188 // consequently made this test fail), `replace` needs to be
 189 // modified to no longer assume this.
 190 #[test]
 191 fn can_alias_safehash_as_hash() {
 192     assert_eq!(size_of::<SafeHash>(), size_of::<HashUint>())
 193 }
 194
 195 impl<K, V> RawBucket<K, V> {
 196     unsafe fn offset(self, count: isize) -> RawBucket<K, V> {
 197         RawBucket {
 198             hash: self.hash.offset(count),
 199             pair: self.pair.offset(count),
 200             _marker: marker::PhantomData,
 201         }
 202     }
 203 }
 204
 205 // Buckets hold references to the table.
 206 impl<K, V, M> FullBucket<K, V, M> {
 207     /// Borrow a reference to the table.
 208     pub fn table(&self) -> &M {
 209         &self.table
 210     }
 211     /// Move out the reference to the table.
 212     pub fn into_table(self) -> M {
 213         self.table
 214     }
 215     /// Get the raw index.
 216     pub fn index(&self) -> usize {
 217         self.idx
 218     }
 219 }
 220
 221 impl<K, V, M> EmptyBucket<K, V, M> {
 222     /// Borrow a reference to the table.
 223     pub fn table(&self) -> &M {
 224         &self.table
 225     }
 226 }
 227
 228 impl<K, V, M> Bucket<K, V, M> {
 229     /// Get the raw index.
 230     pub fn index(&self) -> usize {
 231         self.idx
 232     }
 233 }
 234
 235 impl<K, V, M> Deref for FullBucket<K, V, M>
 236     where M: Deref<Target = RawTable<K, V>>
 237 {
 238     type Target = RawTable<K, V>;
 239     fn deref(&self) -> &RawTable<K, V> {
 240         &self.table
 241     }
 242 }
 243
 244 /// `Put` is implemented for types which provide access to a table and cannot be invalidated
 245 ///  by filling a bucket. A similar implementation for `Take` is possible.
 246 pub trait Put<K, V> {
 247     unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V>;
 248 }
 249
 250
 251 impl<'t, K, V> Put<K, V> for &'t mut RawTable<K, V> {
 252     unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> {
 253         *self
 254     }
 255 }
 256
 257 impl<K, V, M> Put<K, V> for Bucket<K, V, M>
 258     where M: Put<K, V>
 259 {
 260     unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> {
 261         self.table.borrow_table_mut()
 262     }
 263 }
 264
 265 impl<K, V, M> Put<K, V> for FullBucket<K, V, M>
 266     where M: Put<K, V>
 267 {
 268     unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> {
 269         self.table.borrow_table_mut()
 270     }
 271 }
 272
 273 impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
 274     pub fn new(table: M, hash: SafeHash) -> Bucket<K, V, M> {
 275         Bucket::at_index(table, hash.inspect() as usize)
 276     }
 277
 278     pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> {
 279         // if capacity is 0, then the RawBucket will be populated with bogus pointers.
 280         // This is an uncommon case though, so avoid it in release builds.
 281         debug_assert!(table.capacity() > 0,
 282                       "Table should have capacity at this point");
 283         let ib_index = ib_index & (table.capacity() - 1);
 284         Bucket {
 285             raw: unsafe { table.first_bucket_raw().offset(ib_index as isize) },
 286             idx: ib_index,
 287             table: table,
 288         }
 289     }
 290
 291     pub fn first(table: M) -> Bucket<K, V, M> {
 292         Bucket {
 293             raw: table.first_bucket_raw(),
 294             idx: 0,
 295             table: table,
 296         }
 297     }
 298
 299     /// Reads a bucket at a given index, returning an enum indicating whether
 300     /// it's initialized or not. You need to match on this enum to get
 301     /// the appropriate types to call most of the other functions in
 302     /// this module.
 303     pub fn peek(self) -> BucketState<K, V, M> {
 304         match unsafe { *self.raw.hash } {
 305             EMPTY_BUCKET => {
 306                 Empty(EmptyBucket {
 307                     raw: self.raw,
 308                     idx: self.idx,
 309                     table: self.table,
 310                 })
 311             }
 312             _ => {
 313                 Full(FullBucket {
 314                     raw: self.raw,
 315                     idx: self.idx,
 316                     table: self.table,
 317                 })
 318             }
 319         }
 320     }
 321
 322     /// Modifies the bucket pointer in place to make it point to the next slot.
 323     pub fn next(&mut self) {
 324         self.idx += 1;
 325         let range = self.table.capacity();
 326         // This code is branchless thanks to a conditional move.
 327         let dist = if self.idx & (range - 1) == 0 {
 328             1 - range as isize
 329         } else {
 330             1
 331         };
 332         unsafe {
 333             self.raw = self.raw.offset(dist);
 334         }
 335     }
 336 }
 337
 338 impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {
 339     #[inline]
 340     pub fn next(self) -> Bucket<K, V, M> {
 341         let mut bucket = self.into_bucket();
 342         bucket.next();
 343         bucket
 344     }
 345
 346     #[inline]
 347     pub fn into_bucket(self) -> Bucket<K, V, M> {
 348         Bucket {
 349             raw: self.raw,
 350             idx: self.idx,
 351             table: self.table,
 352         }
 353     }
 354
 355     pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> {
 356         let gap = EmptyBucket {
 357             raw: self.raw,
 358             idx: self.idx,
 359             table: (),
 360         };
 361
 362         match self.next().peek() {
 363             Full(bucket) => {
 364                 Some(GapThenFull {
 365                     gap: gap,
 366                     full: bucket,
 367                 })
 368             }
 369             Empty(..) => None,
 370         }
 371     }
 372 }
 373
 374 impl<K, V, M> EmptyBucket<K, V, M>
 375     where M: Put<K, V>
 376 {
 377     /// Puts given key and value pair, along with the key's hash,
 378     /// into this bucket in the hashtable. Note how `self` is 'moved' into
 379     /// this function, because this slot will no longer be empty when
 380     /// we return! A `FullBucket` is returned for later use, pointing to
 381     /// the newly-filled slot in the hashtable.
 382     ///
 383     /// Use `make_hash` to construct a `SafeHash` to pass to this function.
 384     pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket<K, V, M> {
 385         unsafe {
 386             *self.raw.hash = hash.inspect();
 387             ptr::write(self.raw.pair as *mut (K, V), (key, value));
 388
 389             self.table.borrow_table_mut().size += 1;
 390         }
 391
 392         FullBucket {
 393             raw: self.raw,
 394             idx: self.idx,
 395             table: self.table,
 396         }
 397     }
 398 }
 399
 400 impl<K, V, M: Deref<Target = RawTable<K, V>>> FullBucket<K, V, M> {
 401     #[inline]
 402     pub fn next(self) -> Bucket<K, V, M> {
 403         let mut bucket = self.into_bucket();
 404         bucket.next();
 405         bucket
 406     }
 407
 408     #[inline]
 409     pub fn into_bucket(self) -> Bucket<K, V, M> {
 410         Bucket {
 411             raw: self.raw,
 412             idx: self.idx,
 413             table: self.table,
 414         }
 415     }
 416
 417     /// Duplicates the current position. This can be useful for operations
 418     /// on two or more buckets.
 419     pub fn stash(self) -> FullBucket<K, V, Self> {
 420         FullBucket {
 421             raw: self.raw,
 422             idx: self.idx,
 423             table: self,
 424         }
 425     }
 426
 427     /// Get the distance between this bucket and the 'ideal' location
 428     /// as determined by the key's hash stored in it.
 429     ///
 430     /// In the cited blog posts above, this is called the "distance to
 431     /// initial bucket", or DIB. Also known as "probe count".
 432     pub fn displacement(&self) -> usize {
 433         // Calculates the distance one has to travel when going from
 434         // `hash mod capacity` onwards to `idx mod capacity`, wrapping around
 435         // if the destination is not reached before the end of the table.
 436         (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1)
 437     }
 438
 439     #[inline]
 440     pub fn hash(&self) -> SafeHash {
 441         unsafe { SafeHash { hash: *self.raw.hash } }
 442     }
 443
 444     /// Gets references to the key and value at a given index.
 445     pub fn read(&self) -> (&K, &V) {
 446         unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) }
 447     }
 448 }
 449
 450 // We take a mutable reference to the table instead of accepting anything that
 451 // implements `DerefMut` to prevent fn `take` from being called on `stash`ed
 452 // buckets.
 453 impl<'t, K, V> FullBucket<K, V, &'t mut RawTable<K, V>> {
 454     /// Removes this bucket's key and value from the hashtable.
 455     ///
 456     /// This works similarly to `put`, building an `EmptyBucket` out of the
 457     /// taken bucket.
 458     pub fn take(mut self) -> (EmptyBucket<K, V, &'t mut RawTable<K, V>>, K, V) {
 459         self.table.size -= 1;
 460
 461         unsafe {
 462             *self.raw.hash = EMPTY_BUCKET;
 463             let (k, v) = ptr::read(self.raw.pair);
 464             (EmptyBucket {
 465                  raw: self.raw,
 466                  idx: self.idx,
 467                  table: self.table,
 468              },
 469             k,
 470             v)
 471         }
 472     }
 473 }
 474
 475 // This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases
 476 // where `M` is a full bucket or table reference type with mutable access to the table.
 477 impl<K, V, M> FullBucket<K, V, M>
 478     where M: Put<K, V>
 479 {
 480     pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) {
 481         unsafe {
 482             let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h);
 483             let (old_key, old_val) = ptr::replace(self.raw.pair as *mut (K, V), (k, v));
 484
 485             (old_hash, old_key, old_val)
 486         }
 487     }
 488 }
 489
 490 impl<K, V, M> FullBucket<K, V, M>
 491     where M: Deref<Target = RawTable<K, V>> + DerefMut
 492 {
 493     /// Gets mutable references to the key and value at a given index.
 494     pub fn read_mut(&mut self) -> (&mut K, &mut V) {
 495         let pair_mut = self.raw.pair as *mut (K, V);
 496         unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) }
 497     }
 498 }
 499
 500 impl<'t, K, V, M> FullBucket<K, V, M>
 501     where M: Deref<Target = RawTable<K, V>> + 't
 502 {
 503     /// Exchange a bucket state for immutable references into the table.
 504     /// Because the underlying reference to the table is also consumed,
 505     /// no further changes to the structure of the table are possible;
 506     /// in exchange for this, the returned references have a longer lifetime
 507     /// than the references returned by `read()`.
 508     pub fn into_refs(self) -> (&'t K, &'t V) {
 509         unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) }
 510     }
 511 }
 512
 513 impl<'t, K, V, M> FullBucket<K, V, M>
 514     where M: Deref<Target = RawTable<K, V>> + DerefMut + 't
 515 {
 516     /// This works similarly to `into_refs`, exchanging a bucket state
 517     /// for mutable references into the table.
 518     pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) {
 519         let pair_mut = self.raw.pair as *mut (K, V);
 520         unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) }
 521     }
 522 }
 523
 524 impl<K, V, M> GapThenFull<K, V, M>
 525     where M: Deref<Target = RawTable<K, V>>
 526 {
 527     #[inline]
 528     pub fn full(&self) -> &FullBucket<K, V, M> {
 529         &self.full
 530     }
 531
 532     pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
 533         unsafe {
 534             *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
 535             ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1);
 536         }
 537
 538         let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full;
 539
 540         match self.full.next().peek() {
 541             Full(bucket) => {
 542                 self.gap.raw = prev_raw;
 543                 self.gap.idx = prev_idx;
 544
 545                 self.full = bucket;
 546
 547                 Some(self)
 548             }
 549             Empty(..) => None,
 550         }
 551     }
 552 }
 553
 554
 555 /// Rounds up to a multiple of a power of two. Returns the closest multiple
 556 /// of `target_alignment` that is higher or equal to `unrounded`.
 557 ///
 558 /// # Panics
 559 ///
 560 /// Panics if `target_alignment` is not a power of two.
 561 #[inline]
 562 fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize {
 563     assert!(target_alignment.is_power_of_two());
 564     (unrounded + target_alignment - 1) & !(target_alignment - 1)
 565 }
 566
 567 #[test]
 568 fn test_rounding() {
 569     assert_eq!(round_up_to_next(0, 4), 0);
 570     assert_eq!(round_up_to_next(1, 4), 4);
 571     assert_eq!(round_up_to_next(2, 4), 4);
 572     assert_eq!(round_up_to_next(3, 4), 4);
 573     assert_eq!(round_up_to_next(4, 4), 4);
 574     assert_eq!(round_up_to_next(5, 4), 8);
 575 }
 576
 577 // Returns a tuple of (pairs_offset, end_of_pairs_offset),
 578 // from the start of a mallocated array.
 579 #[inline]
 580 fn calculate_offsets(hashes_size: usize,
 581                      pairs_size: usize,
 582                      pairs_align: usize)
 583                      -> (usize, usize, bool) {
 584     let pairs_offset = round_up_to_next(hashes_size, pairs_align);
 585     let (end_of_pairs, oflo) = pairs_offset.overflowing_add(pairs_size);
 586
 587     (pairs_offset, end_of_pairs, oflo)
 588 }
 589
 590 // Returns a tuple of (minimum required malloc alignment, hash_offset,
 591 // array_size), from the start of a mallocated array.
 592 fn calculate_allocation(hash_size: usize,
 593                         hash_align: usize,
 594                         pairs_size: usize,
 595                         pairs_align: usize)
 596                         -> (usize, usize, usize, bool) {
 597     let hash_offset = 0;
 598     let (_, end_of_pairs, oflo) = calculate_offsets(hash_size, pairs_size, pairs_align);
 599
 600     let align = cmp::max(hash_align, pairs_align);
 601
 602     (align, hash_offset, end_of_pairs, oflo)
 603 }
 604
 605 #[test]
 606 fn test_offset_calculation() {
 607     assert_eq!(calculate_allocation(128, 8, 16, 8), (8, 0, 144, false));
 608     assert_eq!(calculate_allocation(3, 1, 2, 1), (1, 0, 5, false));
 609     assert_eq!(calculate_allocation(6, 2, 12, 4), (4, 0, 20, false));
 610     assert_eq!(calculate_offsets(128, 15, 4), (128, 143, false));
 611     assert_eq!(calculate_offsets(3, 2, 4), (4, 6, false));
 612     assert_eq!(calculate_offsets(6, 12, 4), (8, 20, false));
 613 }
 614
 615 impl<K, V> RawTable<K, V> {
 616     /// Does not initialize the buckets. The caller should ensure they,
 617     /// at the very least, set every hash to EMPTY_BUCKET.
 618     unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> {
 619         if capacity == 0 {
 620             return RawTable {
 621                 size: 0,
 622                 capacity: 0,
 623                 hashes: Unique::new(EMPTY as *mut HashUint),
 624                 marker: marker::PhantomData,
 625             };
 626         }
 627
 628         // No need for `checked_mul` before a more restrictive check performed
 629         // later in this method.
 630         let hashes_size = capacity.wrapping_mul(size_of::<HashUint>());
 631         let pairs_size = capacity.wrapping_mul(size_of::<(K, V)>());
 632
 633         // Allocating hashmaps is a little tricky. We need to allocate two
 634         // arrays, but since we know their sizes and alignments up front,
 635         // we just allocate a single array, and then have the subarrays
 636         // point into it.
 637         //
 638         // This is great in theory, but in practice getting the alignment
 639         // right is a little subtle. Therefore, calculating offsets has been
 640         // factored out into a different function.
 641         let (alignment, hash_offset, size, oflo) = calculate_allocation(hashes_size,
 642                                                                         align_of::<HashUint>(),
 643                                                                         pairs_size,
 644                                                                         align_of::<(K, V)>());
 645         assert!(!oflo, "capacity overflow");
 646
 647         // One check for overflow that covers calculation and rounding of size.
 648         let size_of_bucket = size_of::<HashUint>().checked_add(size_of::<(K, V)>()).unwrap();
 649         assert!(size >=
 650                 capacity.checked_mul(size_of_bucket)
 651                     .expect("capacity overflow"),
 652                 "capacity overflow");
 653
 654         let buffer = allocate(size, alignment);
 655         if buffer.is_null() {
 656             ::alloc::oom()
 657         }
 658
 659         let hashes = buffer.offset(hash_offset as isize) as *mut HashUint;
 660
 661         RawTable {
 662             capacity: capacity,
 663             size: 0,
 664             hashes: Unique::new(hashes),
 665             marker: marker::PhantomData,
 666         }
 667     }
 668
 669     fn first_bucket_raw(&self) -> RawBucket<K, V> {
 670         let hashes_size = self.capacity * size_of::<HashUint>();
 671         let pairs_size = self.capacity * size_of::<(K, V)>();
 672
 673         let buffer = *self.hashes as *mut u8;
 674         let (pairs_offset, _, oflo) =
 675             calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>());
 676         debug_assert!(!oflo, "capacity overflow");
 677         unsafe {
 678             RawBucket {
 679                 hash: *self.hashes,
 680                 pair: buffer.offset(pairs_offset as isize) as *const _,
 681                 _marker: marker::PhantomData,
 682             }
 683         }
 684     }
 685
 686     /// Creates a new raw table from a given capacity. All buckets are
 687     /// initially empty.
 688     pub fn new(capacity: usize) -> RawTable<K, V> {
 689         unsafe {
 690             let ret = RawTable::new_uninitialized(capacity);
 691             ptr::write_bytes(*ret.hashes, 0, capacity);
 692             ret
 693         }
 694     }
 695
 696     /// The hashtable's capacity, similar to a vector's.
 697     pub fn capacity(&self) -> usize {
 698         self.capacity
 699     }
 700
 701     /// The number of elements ever `put` in the hashtable, minus the number
 702     /// of elements ever `take`n.
 703     pub fn size(&self) -> usize {
 704         self.size
 705     }
 706
 707     fn raw_buckets(&self) -> RawBuckets<K, V> {
 708         RawBuckets {
 709             raw: self.first_bucket_raw(),
 710             hashes_end: unsafe { self.hashes.offset(self.capacity as isize) },
 711             marker: marker::PhantomData,
 712         }
 713     }
 714
 715     pub fn iter(&self) -> Iter<K, V> {
 716         Iter {
 717             iter: self.raw_buckets(),
 718             elems_left: self.size(),
 719         }
 720     }
 721
 722     pub fn iter_mut(&mut self) -> IterMut<K, V> {
 723         IterMut {
 724             iter: self.raw_buckets(),
 725             elems_left: self.size(),
 726             _marker: marker::PhantomData,
 727         }
 728     }
 729
 730     pub fn into_iter(self) -> IntoIter<K, V> {
 731         let RawBuckets { raw, hashes_end, .. } = self.raw_buckets();
 732         // Replace the marker regardless of lifetime bounds on parameters.
 733         IntoIter {
 734             iter: RawBuckets {
 735                 raw: raw,
 736                 hashes_end: hashes_end,
 737                 marker: marker::PhantomData,
 738             },
 739             table: self,
 740         }
 741     }
 742
 743     pub fn drain(&mut self) -> Drain<K, V> {
 744         let RawBuckets { raw, hashes_end, .. } = self.raw_buckets();
 745         // Replace the marker regardless of lifetime bounds on parameters.
 746         Drain {
 747             iter: RawBuckets {
 748                 raw: raw,
 749                 hashes_end: hashes_end,
 750                 marker: marker::PhantomData,
 751             },
 752             table: unsafe { Shared::new(self) },
 753             marker: marker::PhantomData,
 754         }
 755     }
 756
 757     /// Returns an iterator that copies out each entry. Used while the table
 758     /// is being dropped.
 759     unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> {
 760         let raw_bucket = self.first_bucket_raw();
 761         RevMoveBuckets {
 762             raw: raw_bucket.offset(self.capacity as isize),
 763             hashes_end: raw_bucket.hash,
 764             elems_left: self.size,
 765             marker: marker::PhantomData,
 766         }
 767     }
 768 }
 769
 770 /// A raw iterator. The basis for some other iterators in this module. Although
 771 /// this interface is safe, it's not used outside this module.
 772 struct RawBuckets<'a, K, V> {
 773     raw: RawBucket<K, V>,
 774     hashes_end: *mut HashUint,
 775
 776     // Strictly speaking, this should be &'a (K,V), but that would
 777     // require that K:'a, and we often use RawBuckets<'static...> for
 778     // move iterations, so that messes up a lot of other things. So
 779     // just use `&'a (K,V)` as this is not a publicly exposed type
 780     // anyway.
 781     marker: marker::PhantomData<&'a ()>,
 782 }
 783
 784 // FIXME(#19839) Remove in favor of `#[derive(Clone)]`
 785 impl<'a, K, V> Clone for RawBuckets<'a, K, V> {
 786     fn clone(&self) -> RawBuckets<'a, K, V> {
 787         RawBuckets {
 788             raw: self.raw,
 789             hashes_end: self.hashes_end,
 790             marker: marker::PhantomData,
 791         }
 792     }
 793 }
 794
 795
 796 impl<'a, K, V> Iterator for RawBuckets<'a, K, V> {
 797     type Item = RawBucket<K, V>;
 798
 799     fn next(&mut self) -> Option<RawBucket<K, V>> {
 800         while self.raw.hash != self.hashes_end {
 801             unsafe {
 802                 // We are swapping out the pointer to a bucket and replacing
 803                 // it with the pointer to the next one.
 804                 let prev = ptr::replace(&mut self.raw, self.raw.offset(1));
 805                 if *prev.hash != EMPTY_BUCKET {
 806                     return Some(prev);
 807                 }
 808             }
 809         }
 810
 811         None
 812     }
 813 }
 814
 815 /// An iterator that moves out buckets in reverse order. It leaves the table
 816 /// in an inconsistent state and should only be used for dropping
 817 /// the table's remaining entries. It's used in the implementation of Drop.
 818 struct RevMoveBuckets<'a, K, V> {
 819     raw: RawBucket<K, V>,
 820     hashes_end: *mut HashUint,
 821     elems_left: usize,
 822
 823     // As above, `&'a (K,V)` would seem better, but we often use
 824     // 'static for the lifetime, and this is not a publicly exposed
 825     // type.
 826     marker: marker::PhantomData<&'a ()>,
 827 }
 828
 829 impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> {
 830     type Item = (K, V);
 831
 832     fn next(&mut self) -> Option<(K, V)> {
 833         if self.elems_left == 0 {
 834             return None;
 835         }
 836
 837         loop {
 838             debug_assert!(self.raw.hash != self.hashes_end);
 839
 840             unsafe {
 841                 self.raw = self.raw.offset(-1);
 842
 843                 if *self.raw.hash != EMPTY_BUCKET {
 844                     self.elems_left -= 1;
 845                     return Some(ptr::read(self.raw.pair));
 846                 }
 847             }
 848         }
 849     }
 850 }
 851
 852 /// Iterator over shared references to entries in a table.
 853 pub struct Iter<'a, K: 'a, V: 'a> {
 854     iter: RawBuckets<'a, K, V>,
 855     elems_left: usize,
 856 }
 857
 858 unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {}
 859 unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {}
 860
 861 // FIXME(#19839) Remove in favor of `#[derive(Clone)]`
 862 impl<'a, K, V> Clone for Iter<'a, K, V> {
 863     fn clone(&self) -> Iter<'a, K, V> {
 864         Iter {
 865             iter: self.iter.clone(),
 866             elems_left: self.elems_left,
 867         }
 868     }
 869 }
 870
 871
 872 /// Iterator over mutable references to entries in a table.
 873 pub struct IterMut<'a, K: 'a, V: 'a> {
 874     iter: RawBuckets<'a, K, V>,
 875     elems_left: usize,
 876     // To ensure invariance with respect to V
 877     _marker: marker::PhantomData<&'a mut V>,
 878 }
 879
 880 unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {}
 881 // Both K: Sync and K: Send are correct for IterMut's Send impl,
 882 // but Send is the more useful bound
 883 unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {}
 884
 885 /// Iterator over the entries in a table, consuming the table.
 886 pub struct IntoIter<K, V> {
 887     table: RawTable<K, V>,
 888     iter: RawBuckets<'static, K, V>,
 889 }
 890
 891 unsafe impl<K: Sync, V: Sync> Sync for IntoIter<K, V> {}
 892 unsafe impl<K: Send, V: Send> Send for IntoIter<K, V> {}
 893
 894 /// Iterator over the entries in a table, clearing the table.
 895 pub struct Drain<'a, K: 'a, V: 'a> {
 896     table: Shared<RawTable<K, V>>,
 897     iter: RawBuckets<'static, K, V>,
 898     marker: marker::PhantomData<&'a RawTable<K, V>>,
 899 }
 900
 901 unsafe impl<'a, K: Sync, V: Sync> Sync for Drain<'a, K, V> {}
 902 unsafe impl<'a, K: Send, V: Send> Send for Drain<'a, K, V> {}
 903
 904 impl<'a, K, V> Iterator for Iter<'a, K, V> {
 905     type Item = (&'a K, &'a V);
 906
 907     fn next(&mut self) -> Option<(&'a K, &'a V)> {
 908         self.iter.next().map(|bucket| {
 909             self.elems_left -= 1;
 910             unsafe { (&(*bucket.pair).0, &(*bucket.pair).1) }
 911         })
 912     }
 913
 914     fn size_hint(&self) -> (usize, Option<usize>) {
 915         (self.elems_left, Some(self.elems_left))
 916     }
 917 }
 918 impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> {
 919     fn len(&self) -> usize {
 920         self.elems_left
 921     }
 922 }
 923
 924 impl<'a, K, V> Iterator for IterMut<'a, K, V> {
 925     type Item = (&'a K, &'a mut V);
 926
 927     fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
 928         self.iter.next().map(|bucket| {
 929             self.elems_left -= 1;
 930             let pair_mut = bucket.pair as *mut (K, V);
 931             unsafe { (&(*pair_mut).0, &mut (*pair_mut).1) }
 932         })
 933     }
 934
 935     fn size_hint(&self) -> (usize, Option<usize>) {
 936         (self.elems_left, Some(self.elems_left))
 937     }
 938 }
 939 impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> {
 940     fn len(&self) -> usize {
 941         self.elems_left
 942     }
 943 }
 944
 945 impl<K, V> Iterator for IntoIter<K, V> {
 946     type Item = (SafeHash, K, V);
 947
 948     fn next(&mut self) -> Option<(SafeHash, K, V)> {
 949         self.iter.next().map(|bucket| {
 950             self.table.size -= 1;
 951             unsafe {
 952                 let (k, v) = ptr::read(bucket.pair);
 953                 (SafeHash { hash: *bucket.hash }, k, v)
 954             }
 955         })
 956     }
 957
 958     fn size_hint(&self) -> (usize, Option<usize>) {
 959         let size = self.table.size();
 960         (size, Some(size))
 961     }
 962 }
 963 impl<K, V> ExactSizeIterator for IntoIter<K, V> {
 964     fn len(&self) -> usize {
 965         self.table.size()
 966     }
 967 }
 968
 969 impl<'a, K, V> Iterator for Drain<'a, K, V> {
 970     type Item = (SafeHash, K, V);
 971
 972     #[inline]
 973     fn next(&mut self) -> Option<(SafeHash, K, V)> {
 974         self.iter.next().map(|bucket| {
 975             unsafe {
 976                 (**self.table).size -= 1;
 977                 let (k, v) = ptr::read(bucket.pair);
 978                 (SafeHash { hash: ptr::replace(bucket.hash, EMPTY_BUCKET) }, k, v)
 979             }
 980         })
 981     }
 982
 983     fn size_hint(&self) -> (usize, Option<usize>) {
 984         let size = unsafe { (**self.table).size() };
 985         (size, Some(size))
 986     }
 987 }
 988 impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> {
 989     fn len(&self) -> usize {
 990         unsafe { (**self.table).size() }
 991     }
 992 }
 993
 994 impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> {
 995     fn drop(&mut self) {
 996         for _ in self {}
 997     }
 998 }
 999
1000 impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
1001     fn clone(&self) -> RawTable<K, V> {
1002         unsafe {
1003             let mut new_ht = RawTable::new_uninitialized(self.capacity());
1004
1005             {
1006                 let cap = self.capacity();
1007                 let mut new_buckets = Bucket::first(&mut new_ht);
1008                 let mut buckets = Bucket::first(self);
1009                 while buckets.index() != cap {
1010                     match buckets.peek() {
1011                         Full(full) => {
1012                             let (h, k, v) = {
1013                                 let (k, v) = full.read();
1014                                 (full.hash(), k.clone(), v.clone())
1015                             };
1016                             *new_buckets.raw.hash = h.inspect();
1017                             ptr::write(new_buckets.raw.pair as *mut (K, V), (k, v));
1018                         }
1019                         Empty(..) => {
1020                             *new_buckets.raw.hash = EMPTY_BUCKET;
1021                         }
1022                     }
1023                     new_buckets.next();
1024                     buckets.next();
1025                 }
1026             };
1027
1028             new_ht.size = self.size();
1029
1030             new_ht
1031         }
1032     }
1033 }
1034
1035 impl<K, V> Drop for RawTable<K, V> {
1036     #[unsafe_destructor_blind_to_params]
1037     fn drop(&mut self) {
1038         if self.capacity == 0 {
1039             return;
1040         }
1041
1042         // This is done in reverse because we've likely partially taken
1043         // some elements out with `.into_iter()` from the front.
1044         // Check if the size is 0, so we don't do a useless scan when
1045         // dropping empty tables such as on resize.
1046         // Also avoid double drop of elements that have been already moved out.
1047         unsafe {
1048             if needs_drop::<(K, V)>() {
1049                 // avoid linear runtime for types that don't need drop
1050                 for _ in self.rev_move_buckets() {}
1051             }
1052         }
1053
1054         let hashes_size = self.capacity * size_of::<HashUint>();
1055         let pairs_size = self.capacity * size_of::<(K, V)>();
1056         let (align, _, size, oflo) = calculate_allocation(hashes_size,
1057                                                           align_of::<HashUint>(),
1058                                                           pairs_size,
1059                                                           align_of::<(K, V)>());
1060
1061         debug_assert!(!oflo, "should be impossible");
1062
1063         unsafe {
1064             deallocate(*self.hashes as *mut u8, size, align);
1065             // Remember how everything was allocated out of one buffer
1066             // during initialization? We only need one call to free here.
1067         }
1068     }
1069 }