]> git.proxmox.com Git - rustc.git/blame - src/libstd/collections/hash/table.rs
New upstream version 1.14.0+dfsg1
[rustc.git] / src / libstd / collections / hash / table.rs
CommitLineData
1a4d82fc
JJ
1// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
1a4d82fc 10
3157f602 11use alloc::heap::{EMPTY, allocate, deallocate};
1a4d82fc 12
1a4d82fc 13use cmp;
3157f602 14use hash::{BuildHasher, Hash, Hasher};
7453a54e 15use intrinsics::needs_drop;
e9174d1e 16use marker;
62682a34 17use mem::{align_of, size_of};
1a4d82fc 18use mem;
e9174d1e 19use ops::{Deref, DerefMut};
9e0c209e 20use ptr::{self, Unique, Shared};
1a4d82fc 21
e9174d1e
SL
22use self::BucketState::*;
23
c30ab7b3
SL
24/// Integer type used for stored hash values.
25///
26/// No more than bit_width(usize) bits are needed to select a bucket.
27///
28/// The most significant bit is ours to use for tagging `SafeHash`.
29///
30/// (Even if we could have usize::MAX bytes allocated for buckets,
31/// each bucket stores at least a `HashUint`, so there can be no more than
32/// usize::MAX / size_of(usize) buckets.)
33type HashUint = usize;
34
35const EMPTY_BUCKET: HashUint = 0;
1a4d82fc
JJ
36
37/// The raw hashtable, providing safe-ish access to the unzipped and highly
c30ab7b3 38/// optimized arrays of hashes, and key-value pairs.
1a4d82fc 39///
c30ab7b3
SL
40/// This design is a lot faster than the naive
41/// `Vec<Option<(u64, K, V)>>`, because we don't pay for the overhead of an
1a4d82fc
JJ
42/// option on every element, and we get a generally more cache-aware design.
43///
44/// Essential invariants of this structure:
45///
46/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw`
47/// points to 'undefined' contents. Don't read from it. This invariant is
48/// enforced outside this module with the `EmptyBucket`, `FullBucket`,
49/// and `SafeHash` types.
50///
51/// - An `EmptyBucket` is only constructed at an index with
52/// a hash of EMPTY_BUCKET.
53///
54/// - A `FullBucket` is only constructed at an index with a
55/// non-EMPTY_BUCKET hash.
56///
57/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
58/// around hashes of zero by changing them to 0x8000_0000_0000_0000,
59/// which will likely map to the same bucket, while not being confused
60/// with "empty".
61///
c30ab7b3 62/// - Both "arrays represented by pointers" are the same length:
1a4d82fc 63/// `capacity`. This is set at creation and never changes. The arrays
c30ab7b3
SL
64/// are unzipped and are more cache aware (scanning through 8 hashes
65/// brings in at most 2 cache lines, since they're all right beside each
66/// other). This layout may waste space in padding such as in a map from
67/// u64 to u8, but is a more cache conscious layout as the key-value pairs
68/// are only very shortly probed and the desired value will be in the same
69/// or next cache line.
1a4d82fc
JJ
70///
71/// You can kind of think of this module/data structure as a safe wrapper
72/// around just the "table" part of the hashtable. It enforces some
73/// invariants at the type level and employs some performance trickery,
c30ab7b3 74/// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`.
1a4d82fc 75pub struct RawTable<K, V> {
85aaf69f 76 capacity: usize,
3157f602 77 size: usize,
c30ab7b3 78 hashes: Unique<HashUint>,
85aaf69f 79
1a4d82fc
JJ
80 // Because K/V do not appear directly in any of the types in the struct,
81 // inform rustc that in fact instances of K and V are reachable from here.
3157f602 82 marker: marker::PhantomData<(K, V)>,
1a4d82fc
JJ
83}
84
85aaf69f
SL
85unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {}
86unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}
87
1a4d82fc 88struct RawBucket<K, V> {
c30ab7b3 89 hash: *mut HashUint,
54a0048b 90 // We use *const to ensure covariance with respect to K and V
c30ab7b3 91 pair: *const (K, V),
3157f602 92 _marker: marker::PhantomData<(K, V)>,
1a4d82fc
JJ
93}
94
3157f602
XL
95impl<K, V> Copy for RawBucket<K, V> {}
96impl<K, V> Clone for RawBucket<K, V> {
97 fn clone(&self) -> RawBucket<K, V> {
98 *self
99 }
c34b1796 100}
1a4d82fc
JJ
101
102pub struct Bucket<K, V, M> {
3157f602
XL
103 raw: RawBucket<K, V>,
104 idx: usize,
105 table: M,
1a4d82fc
JJ
106}
107
3157f602
XL
108impl<K, V, M: Copy> Copy for Bucket<K, V, M> {}
109impl<K, V, M: Copy> Clone for Bucket<K, V, M> {
110 fn clone(&self) -> Bucket<K, V, M> {
111 *self
112 }
c34b1796 113}
1a4d82fc
JJ
114
115pub struct EmptyBucket<K, V, M> {
3157f602
XL
116 raw: RawBucket<K, V>,
117 idx: usize,
118 table: M,
1a4d82fc
JJ
119}
120
121pub struct FullBucket<K, V, M> {
3157f602
XL
122 raw: RawBucket<K, V>,
123 idx: usize,
124 table: M,
1a4d82fc
JJ
125}
126
3157f602 127pub type FullBucketMut<'table, K, V> = FullBucket<K, V, &'table mut RawTable<K, V>>;
1a4d82fc
JJ
128
129pub enum BucketState<K, V, M> {
130 Empty(EmptyBucket<K, V, M>),
131 Full(FullBucket<K, V, M>),
132}
133
134// A GapThenFull encapsulates the state of two consecutive buckets at once.
135// The first bucket, called the gap, is known to be empty.
136// The second bucket is full.
9cc50fc6 137pub struct GapThenFull<K, V, M> {
1a4d82fc
JJ
138 gap: EmptyBucket<K, V, ()>,
139 full: FullBucket<K, V, M>,
140}
141
142/// A hash that is not zero, since we use a hash of zero to represent empty
143/// buckets.
c34b1796 144#[derive(PartialEq, Copy, Clone)]
1a4d82fc 145pub struct SafeHash {
c30ab7b3 146 hash: HashUint,
1a4d82fc
JJ
147}
148
149impl SafeHash {
150 /// Peek at the hash value, which is guaranteed to be non-zero.
151 #[inline(always)]
c30ab7b3 152 pub fn inspect(&self) -> HashUint {
3157f602
XL
153 self.hash
154 }
c30ab7b3
SL
155
156 #[inline(always)]
157 pub fn new(hash: u64) -> Self {
158 // We need to avoid 0 in order to prevent collisions with
159 // EMPTY_HASH. We can maintain our precious uniform distribution
160 // of initial indexes by unconditionally setting the MSB,
161 // effectively reducing the hashes by one bit.
162 //
163 // Truncate hash to fit in `HashUint`.
164 let hash_bits = size_of::<HashUint>() * 8;
165 SafeHash { hash: (1 << (hash_bits - 1)) | (hash as HashUint) }
166 }
1a4d82fc
JJ
167}
168
169/// We need to remove hashes of 0. That's reserved for empty buckets.
170/// This function wraps up `hash_keyed` to be the only way outside this
171/// module to generate a SafeHash.
85aaf69f 172pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash
3157f602
XL
173 where T: Hash,
174 S: BuildHasher
85aaf69f 175{
9cc50fc6 176 let mut state = hash_state.build_hasher();
85aaf69f 177 t.hash(&mut state);
c30ab7b3 178 SafeHash::new(state.finish())
85aaf69f
SL
179}
180
c30ab7b3 181// `replace` casts a `*HashUint` to a `*SafeHash`. Since we statically
1a4d82fc 182// ensure that a `FullBucket` points to an index with a non-zero hash,
c30ab7b3 183// and a `SafeHash` is just a `HashUint` with a different name, this is
1a4d82fc
JJ
184// safe.
185//
186// This test ensures that a `SafeHash` really IS the same size as a
c30ab7b3 187// `HashUint`. If you need to change the size of `SafeHash` (and
1a4d82fc
JJ
188// consequently made this test fail), `replace` needs to be
189// modified to no longer assume this.
190#[test]
c30ab7b3
SL
191fn can_alias_safehash_as_hash() {
192 assert_eq!(size_of::<SafeHash>(), size_of::<HashUint>())
1a4d82fc
JJ
193}
194
195impl<K, V> RawBucket<K, V> {
85aaf69f 196 unsafe fn offset(self, count: isize) -> RawBucket<K, V> {
1a4d82fc
JJ
197 RawBucket {
198 hash: self.hash.offset(count),
c30ab7b3 199 pair: self.pair.offset(count),
85aaf69f 200 _marker: marker::PhantomData,
1a4d82fc
JJ
201 }
202 }
203}
204
205// Buckets hold references to the table.
206impl<K, V, M> FullBucket<K, V, M> {
207 /// Borrow a reference to the table.
208 pub fn table(&self) -> &M {
209 &self.table
210 }
211 /// Move out the reference to the table.
212 pub fn into_table(self) -> M {
213 self.table
214 }
215 /// Get the raw index.
85aaf69f 216 pub fn index(&self) -> usize {
1a4d82fc
JJ
217 self.idx
218 }
219}
220
221impl<K, V, M> EmptyBucket<K, V, M> {
222 /// Borrow a reference to the table.
223 pub fn table(&self) -> &M {
224 &self.table
225 }
1a4d82fc
JJ
226}
227
228impl<K, V, M> Bucket<K, V, M> {
1a4d82fc 229 /// Get the raw index.
85aaf69f 230 pub fn index(&self) -> usize {
1a4d82fc
JJ
231 self.idx
232 }
233}
234
3157f602
XL
235impl<K, V, M> Deref for FullBucket<K, V, M>
236 where M: Deref<Target = RawTable<K, V>>
237{
54a0048b
SL
238 type Target = RawTable<K, V>;
239 fn deref(&self) -> &RawTable<K, V> {
240 &self.table
241 }
242}
243
244/// `Put` is implemented for types which provide access to a table and cannot be invalidated
245/// by filling a bucket. A similar implementation for `Take` is possible.
246pub trait Put<K, V> {
247 unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V>;
248}
249
250
251impl<'t, K, V> Put<K, V> for &'t mut RawTable<K, V> {
252 unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> {
253 *self
254 }
255}
256
3157f602
XL
257impl<K, V, M> Put<K, V> for Bucket<K, V, M>
258 where M: Put<K, V>
259{
54a0048b
SL
260 unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> {
261 self.table.borrow_table_mut()
262 }
263}
264
3157f602
XL
265impl<K, V, M> Put<K, V> for FullBucket<K, V, M>
266 where M: Put<K, V>
267{
54a0048b
SL
268 unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> {
269 self.table.borrow_table_mut()
270 }
271}
272
3157f602 273impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
1a4d82fc 274 pub fn new(table: M, hash: SafeHash) -> Bucket<K, V, M> {
85aaf69f 275 Bucket::at_index(table, hash.inspect() as usize)
1a4d82fc
JJ
276 }
277
85aaf69f 278 pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> {
c34b1796
AL
279 // if capacity is 0, then the RawBucket will be populated with bogus pointers.
280 // This is an uncommon case though, so avoid it in release builds.
3157f602
XL
281 debug_assert!(table.capacity() > 0,
282 "Table should have capacity at this point");
1a4d82fc
JJ
283 let ib_index = ib_index & (table.capacity() - 1);
284 Bucket {
3157f602 285 raw: unsafe { table.first_bucket_raw().offset(ib_index as isize) },
1a4d82fc 286 idx: ib_index,
3157f602 287 table: table,
1a4d82fc
JJ
288 }
289 }
290
291 pub fn first(table: M) -> Bucket<K, V, M> {
292 Bucket {
293 raw: table.first_bucket_raw(),
294 idx: 0,
3157f602 295 table: table,
1a4d82fc
JJ
296 }
297 }
298
299 /// Reads a bucket at a given index, returning an enum indicating whether
300 /// it's initialized or not. You need to match on this enum to get
301 /// the appropriate types to call most of the other functions in
302 /// this module.
303 pub fn peek(self) -> BucketState<K, V, M> {
304 match unsafe { *self.raw.hash } {
3157f602 305 EMPTY_BUCKET => {
1a4d82fc
JJ
306 Empty(EmptyBucket {
307 raw: self.raw,
308 idx: self.idx,
3157f602
XL
309 table: self.table,
310 })
311 }
312 _ => {
1a4d82fc
JJ
313 Full(FullBucket {
314 raw: self.raw,
315 idx: self.idx,
3157f602 316 table: self.table,
1a4d82fc 317 })
3157f602 318 }
1a4d82fc
JJ
319 }
320 }
321
322 /// Modifies the bucket pointer in place to make it point to the next slot.
323 pub fn next(&mut self) {
1a4d82fc 324 self.idx += 1;
54a0048b
SL
325 let range = self.table.capacity();
326 // This code is branchless thanks to a conditional move.
327 let dist = if self.idx & (range - 1) == 0 {
328 1 - range as isize
329 } else {
330 1
331 };
1a4d82fc
JJ
332 unsafe {
333 self.raw = self.raw.offset(dist);
334 }
335 }
336}
337
3157f602 338impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {
1a4d82fc
JJ
339 #[inline]
340 pub fn next(self) -> Bucket<K, V, M> {
341 let mut bucket = self.into_bucket();
342 bucket.next();
343 bucket
344 }
345
346 #[inline]
347 pub fn into_bucket(self) -> Bucket<K, V, M> {
348 Bucket {
349 raw: self.raw,
350 idx: self.idx,
3157f602 351 table: self.table,
1a4d82fc
JJ
352 }
353 }
354
355 pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> {
356 let gap = EmptyBucket {
357 raw: self.raw,
358 idx: self.idx,
3157f602 359 table: (),
1a4d82fc
JJ
360 };
361
362 match self.next().peek() {
363 Full(bucket) => {
364 Some(GapThenFull {
365 gap: gap,
3157f602 366 full: bucket,
1a4d82fc
JJ
367 })
368 }
3157f602 369 Empty(..) => None,
1a4d82fc
JJ
370 }
371 }
372}
373
3157f602
XL
374impl<K, V, M> EmptyBucket<K, V, M>
375 where M: Put<K, V>
376{
1a4d82fc
JJ
377 /// Puts given key and value pair, along with the key's hash,
378 /// into this bucket in the hashtable. Note how `self` is 'moved' into
379 /// this function, because this slot will no longer be empty when
380 /// we return! A `FullBucket` is returned for later use, pointing to
381 /// the newly-filled slot in the hashtable.
382 ///
383 /// Use `make_hash` to construct a `SafeHash` to pass to this function.
3157f602 384 pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket<K, V, M> {
1a4d82fc
JJ
385 unsafe {
386 *self.raw.hash = hash.inspect();
c30ab7b3 387 ptr::write(self.raw.pair as *mut (K, V), (key, value));
1a4d82fc 388
54a0048b
SL
389 self.table.borrow_table_mut().size += 1;
390 }
1a4d82fc 391
3157f602
XL
392 FullBucket {
393 raw: self.raw,
394 idx: self.idx,
395 table: self.table,
396 }
1a4d82fc
JJ
397 }
398}
399
3157f602 400impl<K, V, M: Deref<Target = RawTable<K, V>>> FullBucket<K, V, M> {
1a4d82fc
JJ
401 #[inline]
402 pub fn next(self) -> Bucket<K, V, M> {
403 let mut bucket = self.into_bucket();
404 bucket.next();
405 bucket
406 }
407
408 #[inline]
409 pub fn into_bucket(self) -> Bucket<K, V, M> {
410 Bucket {
411 raw: self.raw,
412 idx: self.idx,
3157f602 413 table: self.table,
1a4d82fc
JJ
414 }
415 }
416
54a0048b
SL
417 /// Duplicates the current position. This can be useful for operations
418 /// on two or more buckets.
419 pub fn stash(self) -> FullBucket<K, V, Self> {
420 FullBucket {
421 raw: self.raw,
422 idx: self.idx,
423 table: self,
424 }
425 }
426
1a4d82fc
JJ
427 /// Get the distance between this bucket and the 'ideal' location
428 /// as determined by the key's hash stored in it.
429 ///
430 /// In the cited blog posts above, this is called the "distance to
431 /// initial bucket", or DIB. Also known as "probe count".
54a0048b 432 pub fn displacement(&self) -> usize {
1a4d82fc
JJ
433 // Calculates the distance one has to travel when going from
434 // `hash mod capacity` onwards to `idx mod capacity`, wrapping around
435 // if the destination is not reached before the end of the table.
c34b1796 436 (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1)
1a4d82fc
JJ
437 }
438
439 #[inline]
440 pub fn hash(&self) -> SafeHash {
3157f602 441 unsafe { SafeHash { hash: *self.raw.hash } }
1a4d82fc
JJ
442 }
443
444 /// Gets references to the key and value at a given index.
445 pub fn read(&self) -> (&K, &V) {
c30ab7b3 446 unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) }
1a4d82fc
JJ
447 }
448}
449
54a0048b
SL
450// We take a mutable reference to the table instead of accepting anything that
451// implements `DerefMut` to prevent fn `take` from being called on `stash`ed
452// buckets.
453impl<'t, K, V> FullBucket<K, V, &'t mut RawTable<K, V>> {
1a4d82fc
JJ
454 /// Removes this bucket's key and value from the hashtable.
455 ///
456 /// This works similarly to `put`, building an `EmptyBucket` out of the
457 /// taken bucket.
54a0048b 458 pub fn take(mut self) -> (EmptyBucket<K, V, &'t mut RawTable<K, V>>, K, V) {
1a4d82fc
JJ
459 self.table.size -= 1;
460
461 unsafe {
462 *self.raw.hash = EMPTY_BUCKET;
c30ab7b3 463 let (k, v) = ptr::read(self.raw.pair);
3157f602 464 (EmptyBucket {
c30ab7b3
SL
465 raw: self.raw,
466 idx: self.idx,
467 table: self.table,
468 },
469 k,
470 v)
1a4d82fc
JJ
471 }
472 }
54a0048b 473}
1a4d82fc 474
54a0048b
SL
475// This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases
476// where `M` is a full bucket or table reference type with mutable access to the table.
3157f602
XL
477impl<K, V, M> FullBucket<K, V, M>
478 where M: Put<K, V>
479{
1a4d82fc
JJ
480 pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) {
481 unsafe {
482 let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h);
c30ab7b3 483 let (old_key, old_val) = ptr::replace(self.raw.pair as *mut (K, V), (k, v));
1a4d82fc
JJ
484
485 (old_hash, old_key, old_val)
486 }
487 }
54a0048b 488}
1a4d82fc 489
3157f602
XL
490impl<K, V, M> FullBucket<K, V, M>
491 where M: Deref<Target = RawTable<K, V>> + DerefMut
492{
1a4d82fc
JJ
493 /// Gets mutable references to the key and value at a given index.
494 pub fn read_mut(&mut self) -> (&mut K, &mut V) {
c30ab7b3
SL
495 let pair_mut = self.raw.pair as *mut (K, V);
496 unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) }
1a4d82fc
JJ
497 }
498}
499
3157f602
XL
500impl<'t, K, V, M> FullBucket<K, V, M>
501 where M: Deref<Target = RawTable<K, V>> + 't
502{
1a4d82fc
JJ
503 /// Exchange a bucket state for immutable references into the table.
504 /// Because the underlying reference to the table is also consumed,
505 /// no further changes to the structure of the table are possible;
506 /// in exchange for this, the returned references have a longer lifetime
507 /// than the references returned by `read()`.
508 pub fn into_refs(self) -> (&'t K, &'t V) {
c30ab7b3 509 unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) }
1a4d82fc
JJ
510 }
511}
512
3157f602
XL
513impl<'t, K, V, M> FullBucket<K, V, M>
514 where M: Deref<Target = RawTable<K, V>> + DerefMut + 't
515{
1a4d82fc
JJ
516 /// This works similarly to `into_refs`, exchanging a bucket state
517 /// for mutable references into the table.
518 pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) {
c30ab7b3
SL
519 let pair_mut = self.raw.pair as *mut (K, V);
520 unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) }
1a4d82fc
JJ
521 }
522}
523
3157f602
XL
524impl<K, V, M> GapThenFull<K, V, M>
525 where M: Deref<Target = RawTable<K, V>>
526{
1a4d82fc
JJ
527 #[inline]
528 pub fn full(&self) -> &FullBucket<K, V, M> {
529 &self.full
530 }
531
532 pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
533 unsafe {
534 *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
c30ab7b3 535 ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1);
1a4d82fc
JJ
536 }
537
538 let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full;
539
540 match self.full.next().peek() {
541 Full(bucket) => {
542 self.gap.raw = prev_raw;
543 self.gap.idx = prev_idx;
544
545 self.full = bucket;
546
547 Some(self)
548 }
3157f602 549 Empty(..) => None,
1a4d82fc
JJ
550 }
551 }
552}
553
554
555/// Rounds up to a multiple of a power of two. Returns the closest multiple
556/// of `target_alignment` that is higher or equal to `unrounded`.
557///
558/// # Panics
559///
560/// Panics if `target_alignment` is not a power of two.
e9174d1e 561#[inline]
85aaf69f 562fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize {
1a4d82fc
JJ
563 assert!(target_alignment.is_power_of_two());
564 (unrounded + target_alignment - 1) & !(target_alignment - 1)
565}
566
567#[test]
568fn test_rounding() {
569 assert_eq!(round_up_to_next(0, 4), 0);
570 assert_eq!(round_up_to_next(1, 4), 4);
571 assert_eq!(round_up_to_next(2, 4), 4);
572 assert_eq!(round_up_to_next(3, 4), 4);
573 assert_eq!(round_up_to_next(4, 4), 4);
574 assert_eq!(round_up_to_next(5, 4), 8);
575}
576
c30ab7b3 577// Returns a tuple of (pairs_offset, end_of_pairs_offset),
1a4d82fc 578// from the start of a mallocated array.
62682a34 579#[inline]
85aaf69f 580fn calculate_offsets(hashes_size: usize,
c30ab7b3
SL
581 pairs_size: usize,
582 pairs_align: usize)
c34b1796 583 -> (usize, usize, bool) {
c30ab7b3
SL
584 let pairs_offset = round_up_to_next(hashes_size, pairs_align);
585 let (end_of_pairs, oflo) = pairs_offset.overflowing_add(pairs_size);
1a4d82fc 586
c30ab7b3 587 (pairs_offset, end_of_pairs, oflo)
1a4d82fc
JJ
588}
589
590// Returns a tuple of (minimum required malloc alignment, hash_offset,
591// array_size), from the start of a mallocated array.
3157f602
XL
592fn calculate_allocation(hash_size: usize,
593 hash_align: usize,
c30ab7b3
SL
594 pairs_size: usize,
595 pairs_align: usize)
c34b1796 596 -> (usize, usize, usize, bool) {
1a4d82fc 597 let hash_offset = 0;
c30ab7b3 598 let (_, end_of_pairs, oflo) = calculate_offsets(hash_size, pairs_size, pairs_align);
1a4d82fc 599
c30ab7b3 600 let align = cmp::max(hash_align, pairs_align);
1a4d82fc 601
c30ab7b3 602 (align, hash_offset, end_of_pairs, oflo)
1a4d82fc
JJ
603}
604
605#[test]
606fn test_offset_calculation() {
c30ab7b3
SL
607 assert_eq!(calculate_allocation(128, 8, 16, 8), (8, 0, 144, false));
608 assert_eq!(calculate_allocation(3, 1, 2, 1), (1, 0, 5, false));
609 assert_eq!(calculate_allocation(6, 2, 12, 4), (4, 0, 20, false));
610 assert_eq!(calculate_offsets(128, 15, 4), (128, 143, false));
611 assert_eq!(calculate_offsets(3, 2, 4), (4, 6, false));
612 assert_eq!(calculate_offsets(6, 12, 4), (8, 20, false));
1a4d82fc
JJ
613}
614
615impl<K, V> RawTable<K, V> {
616 /// Does not initialize the buckets. The caller should ensure they,
617 /// at the very least, set every hash to EMPTY_BUCKET.
85aaf69f 618 unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> {
1a4d82fc
JJ
619 if capacity == 0 {
620 return RawTable {
621 size: 0,
622 capacity: 0,
c30ab7b3 623 hashes: Unique::new(EMPTY as *mut HashUint),
85aaf69f 624 marker: marker::PhantomData,
1a4d82fc
JJ
625 };
626 }
85aaf69f 627
1a4d82fc
JJ
628 // No need for `checked_mul` before a more restrictive check performed
629 // later in this method.
c30ab7b3
SL
630 let hashes_size = capacity.wrapping_mul(size_of::<HashUint>());
631 let pairs_size = capacity.wrapping_mul(size_of::<(K, V)>());
1a4d82fc 632
c30ab7b3 633 // Allocating hashmaps is a little tricky. We need to allocate two
1a4d82fc
JJ
634 // arrays, but since we know their sizes and alignments up front,
635 // we just allocate a single array, and then have the subarrays
636 // point into it.
637 //
638 // This is great in theory, but in practice getting the alignment
639 // right is a little subtle. Therefore, calculating offsets has been
640 // factored out into a different function.
c30ab7b3
SL
641 let (alignment, hash_offset, size, oflo) = calculate_allocation(hashes_size,
642 align_of::<HashUint>(),
643 pairs_size,
644 align_of::<(K, V)>());
c34b1796
AL
645 assert!(!oflo, "capacity overflow");
646
1a4d82fc 647 // One check for overflow that covers calculation and rounding of size.
c30ab7b3 648 let size_of_bucket = size_of::<HashUint>().checked_add(size_of::<(K, V)>()).unwrap();
3157f602
XL
649 assert!(size >=
650 capacity.checked_mul(size_of_bucket)
c30ab7b3 651 .expect("capacity overflow"),
1a4d82fc
JJ
652 "capacity overflow");
653
c30ab7b3 654 let buffer = allocate(size, alignment);
3157f602
XL
655 if buffer.is_null() {
656 ::alloc::oom()
657 }
1a4d82fc 658
c30ab7b3 659 let hashes = buffer.offset(hash_offset as isize) as *mut HashUint;
1a4d82fc
JJ
660
661 RawTable {
662 capacity: capacity,
3157f602
XL
663 size: 0,
664 hashes: Unique::new(hashes),
665 marker: marker::PhantomData,
1a4d82fc
JJ
666 }
667 }
668
669 fn first_bucket_raw(&self) -> RawBucket<K, V> {
c30ab7b3
SL
670 let hashes_size = self.capacity * size_of::<HashUint>();
671 let pairs_size = self.capacity * size_of::<(K, V)>();
672
673 let buffer = *self.hashes as *mut u8;
674 let (pairs_offset, _, oflo) =
675 calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>());
c34b1796 676 debug_assert!(!oflo, "capacity overflow");
1a4d82fc
JJ
677 unsafe {
678 RawBucket {
85aaf69f 679 hash: *self.hashes,
c30ab7b3 680 pair: buffer.offset(pairs_offset as isize) as *const _,
85aaf69f 681 _marker: marker::PhantomData,
1a4d82fc
JJ
682 }
683 }
684 }
685
686 /// Creates a new raw table from a given capacity. All buckets are
687 /// initially empty.
85aaf69f 688 pub fn new(capacity: usize) -> RawTable<K, V> {
1a4d82fc
JJ
689 unsafe {
690 let ret = RawTable::new_uninitialized(capacity);
c34b1796 691 ptr::write_bytes(*ret.hashes, 0, capacity);
1a4d82fc
JJ
692 ret
693 }
694 }
695
696 /// The hashtable's capacity, similar to a vector's.
85aaf69f 697 pub fn capacity(&self) -> usize {
1a4d82fc
JJ
698 self.capacity
699 }
700
701 /// The number of elements ever `put` in the hashtable, minus the number
702 /// of elements ever `take`n.
85aaf69f 703 pub fn size(&self) -> usize {
1a4d82fc
JJ
704 self.size
705 }
706
707 fn raw_buckets(&self) -> RawBuckets<K, V> {
708 RawBuckets {
709 raw: self.first_bucket_raw(),
3157f602 710 hashes_end: unsafe { self.hashes.offset(self.capacity as isize) },
85aaf69f 711 marker: marker::PhantomData,
1a4d82fc
JJ
712 }
713 }
714
715 pub fn iter(&self) -> Iter<K, V> {
716 Iter {
717 iter: self.raw_buckets(),
718 elems_left: self.size(),
719 }
720 }
721
722 pub fn iter_mut(&mut self) -> IterMut<K, V> {
723 IterMut {
724 iter: self.raw_buckets(),
725 elems_left: self.size(),
54a0048b 726 _marker: marker::PhantomData,
1a4d82fc
JJ
727 }
728 }
729
730 pub fn into_iter(self) -> IntoIter<K, V> {
731 let RawBuckets { raw, hashes_end, .. } = self.raw_buckets();
732 // Replace the marker regardless of lifetime bounds on parameters.
733 IntoIter {
734 iter: RawBuckets {
735 raw: raw,
736 hashes_end: hashes_end,
85aaf69f 737 marker: marker::PhantomData,
1a4d82fc
JJ
738 },
739 table: self,
740 }
741 }
742
743 pub fn drain(&mut self) -> Drain<K, V> {
744 let RawBuckets { raw, hashes_end, .. } = self.raw_buckets();
745 // Replace the marker regardless of lifetime bounds on parameters.
746 Drain {
747 iter: RawBuckets {
748 raw: raw,
749 hashes_end: hashes_end,
85aaf69f 750 marker: marker::PhantomData,
1a4d82fc 751 },
9e0c209e
SL
752 table: unsafe { Shared::new(self) },
753 marker: marker::PhantomData,
1a4d82fc
JJ
754 }
755 }
756
757 /// Returns an iterator that copies out each entry. Used while the table
758 /// is being dropped.
759 unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> {
760 let raw_bucket = self.first_bucket_raw();
761 RevMoveBuckets {
85aaf69f 762 raw: raw_bucket.offset(self.capacity as isize),
1a4d82fc
JJ
763 hashes_end: raw_bucket.hash,
764 elems_left: self.size,
3157f602 765 marker: marker::PhantomData,
1a4d82fc
JJ
766 }
767 }
768}
769
770/// A raw iterator. The basis for some other iterators in this module. Although
771/// this interface is safe, it's not used outside this module.
772struct RawBuckets<'a, K, V> {
773 raw: RawBucket<K, V>,
c30ab7b3 774 hashes_end: *mut HashUint,
85aaf69f
SL
775
776 // Strictly speaking, this should be &'a (K,V), but that would
777 // require that K:'a, and we often use RawBuckets<'static...> for
778 // move iterations, so that messes up a lot of other things. So
779 // just use `&'a (K,V)` as this is not a publicly exposed type
780 // anyway.
781 marker: marker::PhantomData<&'a ()>,
1a4d82fc
JJ
782}
783
784// FIXME(#19839) Remove in favor of `#[derive(Clone)]`
785impl<'a, K, V> Clone for RawBuckets<'a, K, V> {
786 fn clone(&self) -> RawBuckets<'a, K, V> {
787 RawBuckets {
788 raw: self.raw,
789 hashes_end: self.hashes_end,
85aaf69f 790 marker: marker::PhantomData,
1a4d82fc
JJ
791 }
792 }
793}
794
795
796impl<'a, K, V> Iterator for RawBuckets<'a, K, V> {
797 type Item = RawBucket<K, V>;
798
799 fn next(&mut self) -> Option<RawBucket<K, V>> {
800 while self.raw.hash != self.hashes_end {
801 unsafe {
802 // We are swapping out the pointer to a bucket and replacing
803 // it with the pointer to the next one.
804 let prev = ptr::replace(&mut self.raw, self.raw.offset(1));
805 if *prev.hash != EMPTY_BUCKET {
806 return Some(prev);
807 }
808 }
809 }
810
811 None
812 }
813}
814
815/// An iterator that moves out buckets in reverse order. It leaves the table
816/// in an inconsistent state and should only be used for dropping
817/// the table's remaining entries. It's used in the implementation of Drop.
818struct RevMoveBuckets<'a, K, V> {
819 raw: RawBucket<K, V>,
c30ab7b3 820 hashes_end: *mut HashUint,
85aaf69f
SL
821 elems_left: usize,
822
823 // As above, `&'a (K,V)` would seem better, but we often use
824 // 'static for the lifetime, and this is not a publicly exposed
825 // type.
826 marker: marker::PhantomData<&'a ()>,
1a4d82fc
JJ
827}
828
829impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> {
830 type Item = (K, V);
831
832 fn next(&mut self) -> Option<(K, V)> {
833 if self.elems_left == 0 {
834 return None;
835 }
836
837 loop {
838 debug_assert!(self.raw.hash != self.hashes_end);
839
840 unsafe {
841 self.raw = self.raw.offset(-1);
842
843 if *self.raw.hash != EMPTY_BUCKET {
844 self.elems_left -= 1;
c30ab7b3 845 return Some(ptr::read(self.raw.pair));
1a4d82fc
JJ
846 }
847 }
848 }
849 }
850}
851
852/// Iterator over shared references to entries in a table.
853pub struct Iter<'a, K: 'a, V: 'a> {
854 iter: RawBuckets<'a, K, V>,
85aaf69f 855 elems_left: usize,
1a4d82fc
JJ
856}
857
e9174d1e
SL
858unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {}
859unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {}
860
1a4d82fc
JJ
861// FIXME(#19839) Remove in favor of `#[derive(Clone)]`
862impl<'a, K, V> Clone for Iter<'a, K, V> {
863 fn clone(&self) -> Iter<'a, K, V> {
864 Iter {
865 iter: self.iter.clone(),
3157f602 866 elems_left: self.elems_left,
1a4d82fc
JJ
867 }
868 }
869}
870
871
872/// Iterator over mutable references to entries in a table.
873pub struct IterMut<'a, K: 'a, V: 'a> {
874 iter: RawBuckets<'a, K, V>,
85aaf69f 875 elems_left: usize,
54a0048b
SL
876 // To ensure invariance with respect to V
877 _marker: marker::PhantomData<&'a mut V>,
1a4d82fc
JJ
878}
879
e9174d1e
SL
880unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {}
881// Both K: Sync and K: Send are correct for IterMut's Send impl,
882// but Send is the more useful bound
883unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {}
884
1a4d82fc
JJ
885/// Iterator over the entries in a table, consuming the table.
886pub struct IntoIter<K, V> {
887 table: RawTable<K, V>,
3157f602 888 iter: RawBuckets<'static, K, V>,
1a4d82fc
JJ
889}
890
e9174d1e
SL
891unsafe impl<K: Sync, V: Sync> Sync for IntoIter<K, V> {}
892unsafe impl<K: Send, V: Send> Send for IntoIter<K, V> {}
893
1a4d82fc
JJ
894/// Iterator over the entries in a table, clearing the table.
895pub struct Drain<'a, K: 'a, V: 'a> {
9e0c209e 896 table: Shared<RawTable<K, V>>,
1a4d82fc 897 iter: RawBuckets<'static, K, V>,
9e0c209e 898 marker: marker::PhantomData<&'a RawTable<K, V>>,
1a4d82fc
JJ
899}
900
e9174d1e
SL
901unsafe impl<'a, K: Sync, V: Sync> Sync for Drain<'a, K, V> {}
902unsafe impl<'a, K: Send, V: Send> Send for Drain<'a, K, V> {}
903
1a4d82fc
JJ
904impl<'a, K, V> Iterator for Iter<'a, K, V> {
905 type Item = (&'a K, &'a V);
906
907 fn next(&mut self) -> Option<(&'a K, &'a V)> {
908 self.iter.next().map(|bucket| {
909 self.elems_left -= 1;
c30ab7b3 910 unsafe { (&(*bucket.pair).0, &(*bucket.pair).1) }
1a4d82fc
JJ
911 })
912 }
913
85aaf69f 914 fn size_hint(&self) -> (usize, Option<usize>) {
1a4d82fc
JJ
915 (self.elems_left, Some(self.elems_left))
916 }
917}
85aaf69f 918impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> {
3157f602
XL
919 fn len(&self) -> usize {
920 self.elems_left
921 }
85aaf69f 922}
1a4d82fc
JJ
923
924impl<'a, K, V> Iterator for IterMut<'a, K, V> {
925 type Item = (&'a K, &'a mut V);
926
927 fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
928 self.iter.next().map(|bucket| {
929 self.elems_left -= 1;
c30ab7b3
SL
930 let pair_mut = bucket.pair as *mut (K, V);
931 unsafe { (&(*pair_mut).0, &mut (*pair_mut).1) }
1a4d82fc
JJ
932 })
933 }
934
85aaf69f 935 fn size_hint(&self) -> (usize, Option<usize>) {
1a4d82fc
JJ
936 (self.elems_left, Some(self.elems_left))
937 }
938}
85aaf69f 939impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> {
3157f602
XL
940 fn len(&self) -> usize {
941 self.elems_left
942 }
85aaf69f 943}
1a4d82fc
JJ
944
945impl<K, V> Iterator for IntoIter<K, V> {
946 type Item = (SafeHash, K, V);
947
948 fn next(&mut self) -> Option<(SafeHash, K, V)> {
949 self.iter.next().map(|bucket| {
950 self.table.size -= 1;
951 unsafe {
c30ab7b3
SL
952 let (k, v) = ptr::read(bucket.pair);
953 (SafeHash { hash: *bucket.hash }, k, v)
1a4d82fc
JJ
954 }
955 })
956 }
957
85aaf69f 958 fn size_hint(&self) -> (usize, Option<usize>) {
1a4d82fc
JJ
959 let size = self.table.size();
960 (size, Some(size))
961 }
962}
85aaf69f 963impl<K, V> ExactSizeIterator for IntoIter<K, V> {
3157f602
XL
964 fn len(&self) -> usize {
965 self.table.size()
966 }
85aaf69f 967}
1a4d82fc 968
85aaf69f 969impl<'a, K, V> Iterator for Drain<'a, K, V> {
1a4d82fc
JJ
970 type Item = (SafeHash, K, V);
971
972 #[inline]
973 fn next(&mut self) -> Option<(SafeHash, K, V)> {
974 self.iter.next().map(|bucket| {
1a4d82fc 975 unsafe {
9e0c209e 976 (**self.table).size -= 1;
c30ab7b3
SL
977 let (k, v) = ptr::read(bucket.pair);
978 (SafeHash { hash: ptr::replace(bucket.hash, EMPTY_BUCKET) }, k, v)
1a4d82fc
JJ
979 }
980 })
981 }
982
85aaf69f 983 fn size_hint(&self) -> (usize, Option<usize>) {
9e0c209e 984 let size = unsafe { (**self.table).size() };
1a4d82fc
JJ
985 (size, Some(size))
986 }
987}
85aaf69f 988impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> {
3157f602 989 fn len(&self) -> usize {
c30ab7b3 990 unsafe { (**self.table).size() }
3157f602 991 }
85aaf69f 992}
1a4d82fc 993
1a4d82fc
JJ
994impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> {
995 fn drop(&mut self) {
b039eaaf 996 for _ in self {}
1a4d82fc
JJ
997 }
998}
999
1000impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
1001 fn clone(&self) -> RawTable<K, V> {
1002 unsafe {
1003 let mut new_ht = RawTable::new_uninitialized(self.capacity());
1004
1005 {
1006 let cap = self.capacity();
1007 let mut new_buckets = Bucket::first(&mut new_ht);
1008 let mut buckets = Bucket::first(self);
1009 while buckets.index() != cap {
1010 match buckets.peek() {
1011 Full(full) => {
1012 let (h, k, v) = {
1013 let (k, v) = full.read();
1014 (full.hash(), k.clone(), v.clone())
1015 };
1016 *new_buckets.raw.hash = h.inspect();
c30ab7b3 1017 ptr::write(new_buckets.raw.pair as *mut (K, V), (k, v));
1a4d82fc
JJ
1018 }
1019 Empty(..) => {
1020 *new_buckets.raw.hash = EMPTY_BUCKET;
1021 }
1022 }
1023 new_buckets.next();
1024 buckets.next();
1025 }
1026 };
1027
1028 new_ht.size = self.size();
1029
1030 new_ht
1031 }
1032 }
1033}
1034
1a4d82fc 1035impl<K, V> Drop for RawTable<K, V> {
b039eaaf 1036 #[unsafe_destructor_blind_to_params]
1a4d82fc 1037 fn drop(&mut self) {
9e0c209e 1038 if self.capacity == 0 {
1a4d82fc
JJ
1039 return;
1040 }
85aaf69f 1041
1a4d82fc
JJ
1042 // This is done in reverse because we've likely partially taken
1043 // some elements out with `.into_iter()` from the front.
1044 // Check if the size is 0, so we don't do a useless scan when
1045 // dropping empty tables such as on resize.
1046 // Also avoid double drop of elements that have been already moved out.
1047 unsafe {
3157f602
XL
1048 if needs_drop::<(K, V)>() {
1049 // avoid linear runtime for types that don't need drop
7453a54e
SL
1050 for _ in self.rev_move_buckets() {}
1051 }
1a4d82fc
JJ
1052 }
1053
c30ab7b3
SL
1054 let hashes_size = self.capacity * size_of::<HashUint>();
1055 let pairs_size = self.capacity * size_of::<(K, V)>();
3157f602 1056 let (align, _, size, oflo) = calculate_allocation(hashes_size,
c30ab7b3
SL
1057 align_of::<HashUint>(),
1058 pairs_size,
1059 align_of::<(K, V)>());
c34b1796
AL
1060
1061 debug_assert!(!oflo, "should be impossible");
1a4d82fc
JJ
1062
1063 unsafe {
85aaf69f 1064 deallocate(*self.hashes as *mut u8, size, align);
1a4d82fc
JJ
1065 // Remember how everything was allocated out of one buffer
1066 // during initialization? We only need one call to free here.
1067 }
1068 }
1069}