]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | // | |
11 | // ignore-lexer-test FIXME #15883 | |
12 | ||
13 | use self::BucketState::*; | |
14 | ||
15 | use clone::Clone; | |
16 | use cmp; | |
17 | use hash::{Hash, Hasher}; | |
c34b1796 | 18 | use iter::{Iterator, ExactSizeIterator}; |
85aaf69f | 19 | use marker::{Copy, Send, Sync, Sized, self}; |
1a4d82fc JJ |
20 | use mem::{min_align_of, size_of}; |
21 | use mem; | |
c34b1796 | 22 | use num::wrapping::{OverflowingOps, WrappingOps}; |
1a4d82fc JJ |
23 | use ops::{Deref, DerefMut, Drop}; |
24 | use option::Option; | |
25 | use option::Option::{Some, None}; | |
c34b1796 | 26 | use ptr::{self, Unique}; |
85aaf69f | 27 | use rt::heap::{allocate, deallocate, EMPTY}; |
1a4d82fc JJ |
28 | use collections::hash_state::HashState; |
29 | ||
c34b1796 | 30 | const EMPTY_BUCKET: u64 = 0; |
1a4d82fc JJ |
31 | |
32 | /// The raw hashtable, providing safe-ish access to the unzipped and highly | |
33 | /// optimized arrays of hashes, keys, and values. | |
34 | /// | |
35 | /// This design uses less memory and is a lot faster than the naive | |
36 | /// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an | |
37 | /// option on every element, and we get a generally more cache-aware design. | |
38 | /// | |
39 | /// Essential invariants of this structure: | |
40 | /// | |
41 | /// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` | |
42 | /// points to 'undefined' contents. Don't read from it. This invariant is | |
43 | /// enforced outside this module with the `EmptyBucket`, `FullBucket`, | |
44 | /// and `SafeHash` types. | |
45 | /// | |
46 | /// - An `EmptyBucket` is only constructed at an index with | |
47 | /// a hash of EMPTY_BUCKET. | |
48 | /// | |
49 | /// - A `FullBucket` is only constructed at an index with a | |
50 | /// non-EMPTY_BUCKET hash. | |
51 | /// | |
52 | /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get | |
53 | /// around hashes of zero by changing them to 0x8000_0000_0000_0000, | |
54 | /// which will likely map to the same bucket, while not being confused | |
55 | /// with "empty". | |
56 | /// | |
57 | /// - All three "arrays represented by pointers" are the same length: | |
58 | /// `capacity`. This is set at creation and never changes. The arrays | |
59 | /// are unzipped to save space (we don't have to pay for the padding | |
60 | /// between odd sized elements, such as in a map from u64 to u8), and | |
61 | /// be more cache aware (scanning through 8 hashes brings in at most | |
62 | /// 2 cache lines, since they're all right beside each other). | |
63 | /// | |
64 | /// You can kind of think of this module/data structure as a safe wrapper | |
65 | /// around just the "table" part of the hashtable. It enforces some | |
66 | /// invariants at the type level and employs some performance trickery, | |
67 | /// but in general is just a tricked out `Vec<Option<u64, K, V>>`. | |
68 | #[unsafe_no_drop_flag] | |
69 | pub struct RawTable<K, V> { | |
85aaf69f SL |
70 | capacity: usize, |
71 | size: usize, | |
1a4d82fc | 72 | hashes: Unique<u64>, |
85aaf69f | 73 | |
1a4d82fc JJ |
74 | // Because K/V do not appear directly in any of the types in the struct, |
75 | // inform rustc that in fact instances of K and V are reachable from here. | |
85aaf69f | 76 | marker: marker::PhantomData<(K,V)>, |
1a4d82fc JJ |
77 | } |
78 | ||
85aaf69f SL |
79 | unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {} |
80 | unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {} | |
81 | ||
1a4d82fc JJ |
82 | struct RawBucket<K, V> { |
83 | hash: *mut u64, | |
84 | key: *mut K, | |
85aaf69f SL |
85 | val: *mut V, |
86 | _marker: marker::PhantomData<(K,V)>, | |
1a4d82fc JJ |
87 | } |
88 | ||
89 | impl<K,V> Copy for RawBucket<K,V> {} | |
c34b1796 AL |
90 | impl<K,V> Clone for RawBucket<K,V> { |
91 | fn clone(&self) -> RawBucket<K, V> { *self } | |
92 | } | |
1a4d82fc JJ |
93 | |
94 | pub struct Bucket<K, V, M> { | |
95 | raw: RawBucket<K, V>, | |
85aaf69f | 96 | idx: usize, |
1a4d82fc JJ |
97 | table: M |
98 | } | |
99 | ||
100 | impl<K,V,M:Copy> Copy for Bucket<K,V,M> {} | |
c34b1796 AL |
101 | impl<K,V,M:Copy> Clone for Bucket<K,V,M> { |
102 | fn clone(&self) -> Bucket<K,V,M> { *self } | |
103 | } | |
1a4d82fc JJ |
104 | |
105 | pub struct EmptyBucket<K, V, M> { | |
106 | raw: RawBucket<K, V>, | |
85aaf69f | 107 | idx: usize, |
1a4d82fc JJ |
108 | table: M |
109 | } | |
110 | ||
111 | pub struct FullBucket<K, V, M> { | |
112 | raw: RawBucket<K, V>, | |
85aaf69f | 113 | idx: usize, |
1a4d82fc JJ |
114 | table: M |
115 | } | |
116 | ||
117 | pub type EmptyBucketImm<'table, K, V> = EmptyBucket<K, V, &'table RawTable<K, V>>; | |
118 | pub type FullBucketImm<'table, K, V> = FullBucket<K, V, &'table RawTable<K, V>>; | |
119 | ||
120 | pub type EmptyBucketMut<'table, K, V> = EmptyBucket<K, V, &'table mut RawTable<K, V>>; | |
121 | pub type FullBucketMut<'table, K, V> = FullBucket<K, V, &'table mut RawTable<K, V>>; | |
122 | ||
123 | pub enum BucketState<K, V, M> { | |
124 | Empty(EmptyBucket<K, V, M>), | |
125 | Full(FullBucket<K, V, M>), | |
126 | } | |
127 | ||
128 | // A GapThenFull encapsulates the state of two consecutive buckets at once. | |
129 | // The first bucket, called the gap, is known to be empty. | |
130 | // The second bucket is full. | |
131 | struct GapThenFull<K, V, M> { | |
132 | gap: EmptyBucket<K, V, ()>, | |
133 | full: FullBucket<K, V, M>, | |
134 | } | |
135 | ||
136 | /// A hash that is not zero, since we use a hash of zero to represent empty | |
137 | /// buckets. | |
c34b1796 | 138 | #[derive(PartialEq, Copy, Clone)] |
1a4d82fc JJ |
139 | pub struct SafeHash { |
140 | hash: u64, | |
141 | } | |
142 | ||
143 | impl SafeHash { | |
144 | /// Peek at the hash value, which is guaranteed to be non-zero. | |
145 | #[inline(always)] | |
146 | pub fn inspect(&self) -> u64 { self.hash } | |
147 | } | |
148 | ||
149 | /// We need to remove hashes of 0. That's reserved for empty buckets. | |
150 | /// This function wraps up `hash_keyed` to be the only way outside this | |
151 | /// module to generate a SafeHash. | |
85aaf69f SL |
152 | pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash |
153 | where T: Hash, S: HashState | |
154 | { | |
155 | let mut state = hash_state.hasher(); | |
156 | t.hash(&mut state); | |
c34b1796 | 157 | // We need to avoid 0 in order to prevent collisions with |
85aaf69f SL |
158 | // EMPTY_HASH. We can maintain our precious uniform distribution |
159 | // of initial indexes by unconditionally setting the MSB, | |
160 | // effectively reducing 64-bits hashes to 63 bits. | |
161 | SafeHash { hash: 0x8000_0000_0000_0000 | state.finish() } | |
162 | } | |
163 | ||
1a4d82fc JJ |
164 | // `replace` casts a `*u64` to a `*SafeHash`. Since we statically |
165 | // ensure that a `FullBucket` points to an index with a non-zero hash, | |
166 | // and a `SafeHash` is just a `u64` with a different name, this is | |
167 | // safe. | |
168 | // | |
169 | // This test ensures that a `SafeHash` really IS the same size as a | |
170 | // `u64`. If you need to change the size of `SafeHash` (and | |
171 | // consequently made this test fail), `replace` needs to be | |
172 | // modified to no longer assume this. | |
173 | #[test] | |
174 | fn can_alias_safehash_as_u64() { | |
175 | assert_eq!(size_of::<SafeHash>(), size_of::<u64>()) | |
176 | } | |
177 | ||
178 | impl<K, V> RawBucket<K, V> { | |
85aaf69f | 179 | unsafe fn offset(self, count: isize) -> RawBucket<K, V> { |
1a4d82fc JJ |
180 | RawBucket { |
181 | hash: self.hash.offset(count), | |
182 | key: self.key.offset(count), | |
183 | val: self.val.offset(count), | |
85aaf69f | 184 | _marker: marker::PhantomData, |
1a4d82fc JJ |
185 | } |
186 | } | |
187 | } | |
188 | ||
189 | // Buckets hold references to the table. | |
190 | impl<K, V, M> FullBucket<K, V, M> { | |
191 | /// Borrow a reference to the table. | |
192 | pub fn table(&self) -> &M { | |
193 | &self.table | |
194 | } | |
195 | /// Move out the reference to the table. | |
196 | pub fn into_table(self) -> M { | |
197 | self.table | |
198 | } | |
199 | /// Get the raw index. | |
85aaf69f | 200 | pub fn index(&self) -> usize { |
1a4d82fc JJ |
201 | self.idx |
202 | } | |
203 | } | |
204 | ||
205 | impl<K, V, M> EmptyBucket<K, V, M> { | |
206 | /// Borrow a reference to the table. | |
207 | pub fn table(&self) -> &M { | |
208 | &self.table | |
209 | } | |
210 | /// Move out the reference to the table. | |
211 | pub fn into_table(self) -> M { | |
212 | self.table | |
213 | } | |
214 | } | |
215 | ||
216 | impl<K, V, M> Bucket<K, V, M> { | |
217 | /// Move out the reference to the table. | |
218 | pub fn into_table(self) -> M { | |
219 | self.table | |
220 | } | |
221 | /// Get the raw index. | |
85aaf69f | 222 | pub fn index(&self) -> usize { |
1a4d82fc JJ |
223 | self.idx |
224 | } | |
225 | } | |
226 | ||
227 | impl<K, V, M: Deref<Target=RawTable<K, V>>> Bucket<K, V, M> { | |
228 | pub fn new(table: M, hash: SafeHash) -> Bucket<K, V, M> { | |
85aaf69f | 229 | Bucket::at_index(table, hash.inspect() as usize) |
1a4d82fc JJ |
230 | } |
231 | ||
85aaf69f | 232 | pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> { |
c34b1796 AL |
233 | // if capacity is 0, then the RawBucket will be populated with bogus pointers. |
234 | // This is an uncommon case though, so avoid it in release builds. | |
235 | debug_assert!(table.capacity() > 0, "Table should have capacity at this point"); | |
1a4d82fc JJ |
236 | let ib_index = ib_index & (table.capacity() - 1); |
237 | Bucket { | |
238 | raw: unsafe { | |
85aaf69f | 239 | table.first_bucket_raw().offset(ib_index as isize) |
1a4d82fc JJ |
240 | }, |
241 | idx: ib_index, | |
242 | table: table | |
243 | } | |
244 | } | |
245 | ||
246 | pub fn first(table: M) -> Bucket<K, V, M> { | |
247 | Bucket { | |
248 | raw: table.first_bucket_raw(), | |
249 | idx: 0, | |
250 | table: table | |
251 | } | |
252 | } | |
253 | ||
254 | /// Reads a bucket at a given index, returning an enum indicating whether | |
255 | /// it's initialized or not. You need to match on this enum to get | |
256 | /// the appropriate types to call most of the other functions in | |
257 | /// this module. | |
258 | pub fn peek(self) -> BucketState<K, V, M> { | |
259 | match unsafe { *self.raw.hash } { | |
260 | EMPTY_BUCKET => | |
261 | Empty(EmptyBucket { | |
262 | raw: self.raw, | |
263 | idx: self.idx, | |
264 | table: self.table | |
265 | }), | |
266 | _ => | |
267 | Full(FullBucket { | |
268 | raw: self.raw, | |
269 | idx: self.idx, | |
270 | table: self.table | |
271 | }) | |
272 | } | |
273 | } | |
274 | ||
275 | /// Modifies the bucket pointer in place to make it point to the next slot. | |
276 | pub fn next(&mut self) { | |
277 | // Branchless bucket iteration step. | |
278 | // As we reach the end of the table... | |
279 | // We take the current idx: 0111111b | |
280 | // Xor it by its increment: ^ 1000000b | |
281 | // ------------ | |
282 | // 1111111b | |
283 | // Then AND with the capacity: & 1000000b | |
284 | // ------------ | |
285 | // to get the backwards offset: 1000000b | |
286 | // ... and it's zero at all other times. | |
287 | let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); | |
288 | // Finally, we obtain the offset 1 or the offset -cap + 1. | |
85aaf69f | 289 | let dist = 1 - (maybe_wraparound_dist as isize); |
1a4d82fc JJ |
290 | |
291 | self.idx += 1; | |
292 | ||
293 | unsafe { | |
294 | self.raw = self.raw.offset(dist); | |
295 | } | |
296 | } | |
297 | } | |
298 | ||
299 | impl<K, V, M: Deref<Target=RawTable<K, V>>> EmptyBucket<K, V, M> { | |
300 | #[inline] | |
301 | pub fn next(self) -> Bucket<K, V, M> { | |
302 | let mut bucket = self.into_bucket(); | |
303 | bucket.next(); | |
304 | bucket | |
305 | } | |
306 | ||
307 | #[inline] | |
308 | pub fn into_bucket(self) -> Bucket<K, V, M> { | |
309 | Bucket { | |
310 | raw: self.raw, | |
311 | idx: self.idx, | |
312 | table: self.table | |
313 | } | |
314 | } | |
315 | ||
316 | pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> { | |
317 | let gap = EmptyBucket { | |
318 | raw: self.raw, | |
319 | idx: self.idx, | |
320 | table: () | |
321 | }; | |
322 | ||
323 | match self.next().peek() { | |
324 | Full(bucket) => { | |
325 | Some(GapThenFull { | |
326 | gap: gap, | |
327 | full: bucket | |
328 | }) | |
329 | } | |
330 | Empty(..) => None | |
331 | } | |
332 | } | |
333 | } | |
334 | ||
335 | impl<K, V, M: Deref<Target=RawTable<K, V>> + DerefMut> EmptyBucket<K, V, M> { | |
336 | /// Puts given key and value pair, along with the key's hash, | |
337 | /// into this bucket in the hashtable. Note how `self` is 'moved' into | |
338 | /// this function, because this slot will no longer be empty when | |
339 | /// we return! A `FullBucket` is returned for later use, pointing to | |
340 | /// the newly-filled slot in the hashtable. | |
341 | /// | |
342 | /// Use `make_hash` to construct a `SafeHash` to pass to this function. | |
343 | pub fn put(mut self, hash: SafeHash, key: K, value: V) | |
344 | -> FullBucket<K, V, M> { | |
345 | unsafe { | |
346 | *self.raw.hash = hash.inspect(); | |
347 | ptr::write(self.raw.key, key); | |
348 | ptr::write(self.raw.val, value); | |
349 | } | |
350 | ||
351 | self.table.size += 1; | |
352 | ||
353 | FullBucket { raw: self.raw, idx: self.idx, table: self.table } | |
354 | } | |
355 | } | |
356 | ||
357 | impl<K, V, M: Deref<Target=RawTable<K, V>>> FullBucket<K, V, M> { | |
358 | #[inline] | |
359 | pub fn next(self) -> Bucket<K, V, M> { | |
360 | let mut bucket = self.into_bucket(); | |
361 | bucket.next(); | |
362 | bucket | |
363 | } | |
364 | ||
365 | #[inline] | |
366 | pub fn into_bucket(self) -> Bucket<K, V, M> { | |
367 | Bucket { | |
368 | raw: self.raw, | |
369 | idx: self.idx, | |
370 | table: self.table | |
371 | } | |
372 | } | |
373 | ||
374 | /// Get the distance between this bucket and the 'ideal' location | |
375 | /// as determined by the key's hash stored in it. | |
376 | /// | |
377 | /// In the cited blog posts above, this is called the "distance to | |
378 | /// initial bucket", or DIB. Also known as "probe count". | |
85aaf69f | 379 | pub fn distance(&self) -> usize { |
1a4d82fc JJ |
380 | // Calculates the distance one has to travel when going from |
381 | // `hash mod capacity` onwards to `idx mod capacity`, wrapping around | |
382 | // if the destination is not reached before the end of the table. | |
c34b1796 | 383 | (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1) |
1a4d82fc JJ |
384 | } |
385 | ||
386 | #[inline] | |
387 | pub fn hash(&self) -> SafeHash { | |
388 | unsafe { | |
389 | SafeHash { | |
390 | hash: *self.raw.hash | |
391 | } | |
392 | } | |
393 | } | |
394 | ||
395 | /// Gets references to the key and value at a given index. | |
396 | pub fn read(&self) -> (&K, &V) { | |
397 | unsafe { | |
398 | (&*self.raw.key, | |
399 | &*self.raw.val) | |
400 | } | |
401 | } | |
402 | } | |
403 | ||
404 | impl<K, V, M: Deref<Target=RawTable<K, V>> + DerefMut> FullBucket<K, V, M> { | |
405 | /// Removes this bucket's key and value from the hashtable. | |
406 | /// | |
407 | /// This works similarly to `put`, building an `EmptyBucket` out of the | |
408 | /// taken bucket. | |
409 | pub fn take(mut self) -> (EmptyBucket<K, V, M>, K, V) { | |
1a4d82fc JJ |
410 | self.table.size -= 1; |
411 | ||
412 | unsafe { | |
413 | *self.raw.hash = EMPTY_BUCKET; | |
414 | ( | |
415 | EmptyBucket { | |
416 | raw: self.raw, | |
417 | idx: self.idx, | |
418 | table: self.table | |
419 | }, | |
85aaf69f SL |
420 | ptr::read(self.raw.key), |
421 | ptr::read(self.raw.val) | |
1a4d82fc JJ |
422 | ) |
423 | } | |
424 | } | |
425 | ||
426 | pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { | |
427 | unsafe { | |
428 | let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); | |
429 | let old_key = ptr::replace(self.raw.key, k); | |
430 | let old_val = ptr::replace(self.raw.val, v); | |
431 | ||
432 | (old_hash, old_key, old_val) | |
433 | } | |
434 | } | |
435 | ||
436 | /// Gets mutable references to the key and value at a given index. | |
437 | pub fn read_mut(&mut self) -> (&mut K, &mut V) { | |
438 | unsafe { | |
439 | (&mut *self.raw.key, | |
440 | &mut *self.raw.val) | |
441 | } | |
442 | } | |
443 | } | |
444 | ||
445 | impl<'t, K, V, M: Deref<Target=RawTable<K, V>> + 't> FullBucket<K, V, M> { | |
446 | /// Exchange a bucket state for immutable references into the table. | |
447 | /// Because the underlying reference to the table is also consumed, | |
448 | /// no further changes to the structure of the table are possible; | |
449 | /// in exchange for this, the returned references have a longer lifetime | |
450 | /// than the references returned by `read()`. | |
451 | pub fn into_refs(self) -> (&'t K, &'t V) { | |
452 | unsafe { | |
453 | (&*self.raw.key, | |
454 | &*self.raw.val) | |
455 | } | |
456 | } | |
457 | } | |
458 | ||
459 | impl<'t, K, V, M: Deref<Target=RawTable<K, V>> + DerefMut + 't> FullBucket<K, V, M> { | |
460 | /// This works similarly to `into_refs`, exchanging a bucket state | |
461 | /// for mutable references into the table. | |
462 | pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { | |
463 | unsafe { | |
464 | (&mut *self.raw.key, | |
465 | &mut *self.raw.val) | |
466 | } | |
467 | } | |
468 | } | |
469 | ||
470 | impl<K, V, M> BucketState<K, V, M> { | |
471 | // For convenience. | |
472 | pub fn expect_full(self) -> FullBucket<K, V, M> { | |
473 | match self { | |
474 | Full(full) => full, | |
475 | Empty(..) => panic!("Expected full bucket") | |
476 | } | |
477 | } | |
478 | } | |
479 | ||
480 | impl<K, V, M: Deref<Target=RawTable<K, V>>> GapThenFull<K, V, M> { | |
481 | #[inline] | |
482 | pub fn full(&self) -> &FullBucket<K, V, M> { | |
483 | &self.full | |
484 | } | |
485 | ||
486 | pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> { | |
487 | unsafe { | |
488 | *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); | |
c34b1796 AL |
489 | ptr::copy_nonoverlapping(self.full.raw.key, self.gap.raw.key, 1); |
490 | ptr::copy_nonoverlapping(self.full.raw.val, self.gap.raw.val, 1); | |
1a4d82fc JJ |
491 | } |
492 | ||
493 | let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; | |
494 | ||
495 | match self.full.next().peek() { | |
496 | Full(bucket) => { | |
497 | self.gap.raw = prev_raw; | |
498 | self.gap.idx = prev_idx; | |
499 | ||
500 | self.full = bucket; | |
501 | ||
502 | Some(self) | |
503 | } | |
504 | Empty(..) => None | |
505 | } | |
506 | } | |
507 | } | |
508 | ||
509 | ||
510 | /// Rounds up to a multiple of a power of two. Returns the closest multiple | |
511 | /// of `target_alignment` that is higher or equal to `unrounded`. | |
512 | /// | |
513 | /// # Panics | |
514 | /// | |
515 | /// Panics if `target_alignment` is not a power of two. | |
85aaf69f | 516 | fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { |
1a4d82fc JJ |
517 | assert!(target_alignment.is_power_of_two()); |
518 | (unrounded + target_alignment - 1) & !(target_alignment - 1) | |
519 | } | |
520 | ||
521 | #[test] | |
522 | fn test_rounding() { | |
523 | assert_eq!(round_up_to_next(0, 4), 0); | |
524 | assert_eq!(round_up_to_next(1, 4), 4); | |
525 | assert_eq!(round_up_to_next(2, 4), 4); | |
526 | assert_eq!(round_up_to_next(3, 4), 4); | |
527 | assert_eq!(round_up_to_next(4, 4), 4); | |
528 | assert_eq!(round_up_to_next(5, 4), 8); | |
529 | } | |
530 | ||
531 | // Returns a tuple of (key_offset, val_offset), | |
532 | // from the start of a mallocated array. | |
85aaf69f SL |
533 | fn calculate_offsets(hashes_size: usize, |
534 | keys_size: usize, keys_align: usize, | |
535 | vals_align: usize) | |
c34b1796 | 536 | -> (usize, usize, bool) { |
1a4d82fc | 537 | let keys_offset = round_up_to_next(hashes_size, keys_align); |
c34b1796 | 538 | let (end_of_keys, oflo) = keys_offset.overflowing_add(keys_size); |
1a4d82fc JJ |
539 | |
540 | let vals_offset = round_up_to_next(end_of_keys, vals_align); | |
541 | ||
c34b1796 | 542 | (keys_offset, vals_offset, oflo) |
1a4d82fc JJ |
543 | } |
544 | ||
545 | // Returns a tuple of (minimum required malloc alignment, hash_offset, | |
546 | // array_size), from the start of a mallocated array. | |
85aaf69f SL |
547 | fn calculate_allocation(hash_size: usize, hash_align: usize, |
548 | keys_size: usize, keys_align: usize, | |
549 | vals_size: usize, vals_align: usize) | |
c34b1796 | 550 | -> (usize, usize, usize, bool) { |
1a4d82fc | 551 | let hash_offset = 0; |
c34b1796 AL |
552 | let (_, vals_offset, oflo) = calculate_offsets(hash_size, |
553 | keys_size, keys_align, | |
554 | vals_align); | |
555 | let (end_of_vals, oflo2) = vals_offset.overflowing_add(vals_size); | |
1a4d82fc JJ |
556 | |
557 | let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); | |
558 | ||
c34b1796 | 559 | (min_align, hash_offset, end_of_vals, oflo || oflo2) |
1a4d82fc JJ |
560 | } |
561 | ||
562 | #[test] | |
563 | fn test_offset_calculation() { | |
c34b1796 AL |
564 | assert_eq!(calculate_allocation(128, 8, 15, 1, 4, 4), (8, 0, 148, false)); |
565 | assert_eq!(calculate_allocation(3, 1, 2, 1, 1, 1), (1, 0, 6, false)); | |
566 | assert_eq!(calculate_allocation(6, 2, 12, 4, 24, 8), (8, 0, 48, false)); | |
567 | assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144, false)); | |
568 | assert_eq!(calculate_offsets(3, 2, 1, 1), (3, 5, false)); | |
569 | assert_eq!(calculate_offsets(6, 12, 4, 8), (8, 24, false)); | |
1a4d82fc JJ |
570 | } |
571 | ||
572 | impl<K, V> RawTable<K, V> { | |
573 | /// Does not initialize the buckets. The caller should ensure they, | |
574 | /// at the very least, set every hash to EMPTY_BUCKET. | |
85aaf69f | 575 | unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> { |
1a4d82fc JJ |
576 | if capacity == 0 { |
577 | return RawTable { | |
578 | size: 0, | |
579 | capacity: 0, | |
85aaf69f SL |
580 | hashes: Unique::new(EMPTY as *mut u64), |
581 | marker: marker::PhantomData, | |
1a4d82fc JJ |
582 | }; |
583 | } | |
85aaf69f | 584 | |
1a4d82fc JJ |
585 | // No need for `checked_mul` before a more restrictive check performed |
586 | // later in this method. | |
587 | let hashes_size = capacity * size_of::<u64>(); | |
588 | let keys_size = capacity * size_of::< K >(); | |
589 | let vals_size = capacity * size_of::< V >(); | |
590 | ||
591 | // Allocating hashmaps is a little tricky. We need to allocate three | |
592 | // arrays, but since we know their sizes and alignments up front, | |
593 | // we just allocate a single array, and then have the subarrays | |
594 | // point into it. | |
595 | // | |
596 | // This is great in theory, but in practice getting the alignment | |
597 | // right is a little subtle. Therefore, calculating offsets has been | |
598 | // factored out into a different function. | |
c34b1796 | 599 | let (malloc_alignment, hash_offset, size, oflo) = |
1a4d82fc JJ |
600 | calculate_allocation( |
601 | hashes_size, min_align_of::<u64>(), | |
602 | keys_size, min_align_of::< K >(), | |
603 | vals_size, min_align_of::< V >()); | |
604 | ||
c34b1796 AL |
605 | assert!(!oflo, "capacity overflow"); |
606 | ||
1a4d82fc JJ |
607 | // One check for overflow that covers calculation and rounding of size. |
608 | let size_of_bucket = size_of::<u64>().checked_add(size_of::<K>()).unwrap() | |
609 | .checked_add(size_of::<V>()).unwrap(); | |
610 | assert!(size >= capacity.checked_mul(size_of_bucket) | |
611 | .expect("capacity overflow"), | |
612 | "capacity overflow"); | |
613 | ||
614 | let buffer = allocate(size, malloc_alignment); | |
615 | if buffer.is_null() { ::alloc::oom() } | |
616 | ||
85aaf69f | 617 | let hashes = buffer.offset(hash_offset as isize) as *mut u64; |
1a4d82fc JJ |
618 | |
619 | RawTable { | |
620 | capacity: capacity, | |
621 | size: 0, | |
85aaf69f SL |
622 | hashes: Unique::new(hashes), |
623 | marker: marker::PhantomData, | |
1a4d82fc JJ |
624 | } |
625 | } | |
626 | ||
627 | fn first_bucket_raw(&self) -> RawBucket<K, V> { | |
628 | let hashes_size = self.capacity * size_of::<u64>(); | |
629 | let keys_size = self.capacity * size_of::<K>(); | |
630 | ||
85aaf69f | 631 | let buffer = *self.hashes as *mut u8; |
c34b1796 AL |
632 | let (keys_offset, vals_offset, oflo) = |
633 | calculate_offsets(hashes_size, | |
634 | keys_size, min_align_of::<K>(), | |
635 | min_align_of::<V>()); | |
636 | debug_assert!(!oflo, "capacity overflow"); | |
1a4d82fc JJ |
637 | unsafe { |
638 | RawBucket { | |
85aaf69f SL |
639 | hash: *self.hashes, |
640 | key: buffer.offset(keys_offset as isize) as *mut K, | |
641 | val: buffer.offset(vals_offset as isize) as *mut V, | |
642 | _marker: marker::PhantomData, | |
1a4d82fc JJ |
643 | } |
644 | } | |
645 | } | |
646 | ||
647 | /// Creates a new raw table from a given capacity. All buckets are | |
648 | /// initially empty. | |
85aaf69f | 649 | pub fn new(capacity: usize) -> RawTable<K, V> { |
1a4d82fc JJ |
650 | unsafe { |
651 | let ret = RawTable::new_uninitialized(capacity); | |
c34b1796 | 652 | ptr::write_bytes(*ret.hashes, 0, capacity); |
1a4d82fc JJ |
653 | ret |
654 | } | |
655 | } | |
656 | ||
657 | /// The hashtable's capacity, similar to a vector's. | |
85aaf69f | 658 | pub fn capacity(&self) -> usize { |
1a4d82fc JJ |
659 | self.capacity |
660 | } | |
661 | ||
662 | /// The number of elements ever `put` in the hashtable, minus the number | |
663 | /// of elements ever `take`n. | |
85aaf69f | 664 | pub fn size(&self) -> usize { |
1a4d82fc JJ |
665 | self.size |
666 | } | |
667 | ||
668 | fn raw_buckets(&self) -> RawBuckets<K, V> { | |
669 | RawBuckets { | |
670 | raw: self.first_bucket_raw(), | |
671 | hashes_end: unsafe { | |
85aaf69f | 672 | self.hashes.offset(self.capacity as isize) |
1a4d82fc | 673 | }, |
85aaf69f | 674 | marker: marker::PhantomData, |
1a4d82fc JJ |
675 | } |
676 | } | |
677 | ||
678 | pub fn iter(&self) -> Iter<K, V> { | |
679 | Iter { | |
680 | iter: self.raw_buckets(), | |
681 | elems_left: self.size(), | |
682 | } | |
683 | } | |
684 | ||
685 | pub fn iter_mut(&mut self) -> IterMut<K, V> { | |
686 | IterMut { | |
687 | iter: self.raw_buckets(), | |
688 | elems_left: self.size(), | |
689 | } | |
690 | } | |
691 | ||
692 | pub fn into_iter(self) -> IntoIter<K, V> { | |
693 | let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); | |
694 | // Replace the marker regardless of lifetime bounds on parameters. | |
695 | IntoIter { | |
696 | iter: RawBuckets { | |
697 | raw: raw, | |
698 | hashes_end: hashes_end, | |
85aaf69f | 699 | marker: marker::PhantomData, |
1a4d82fc JJ |
700 | }, |
701 | table: self, | |
702 | } | |
703 | } | |
704 | ||
705 | pub fn drain(&mut self) -> Drain<K, V> { | |
706 | let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); | |
707 | // Replace the marker regardless of lifetime bounds on parameters. | |
708 | Drain { | |
709 | iter: RawBuckets { | |
710 | raw: raw, | |
711 | hashes_end: hashes_end, | |
85aaf69f | 712 | marker: marker::PhantomData, |
1a4d82fc JJ |
713 | }, |
714 | table: self, | |
715 | } | |
716 | } | |
717 | ||
718 | /// Returns an iterator that copies out each entry. Used while the table | |
719 | /// is being dropped. | |
720 | unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> { | |
721 | let raw_bucket = self.first_bucket_raw(); | |
722 | RevMoveBuckets { | |
85aaf69f | 723 | raw: raw_bucket.offset(self.capacity as isize), |
1a4d82fc JJ |
724 | hashes_end: raw_bucket.hash, |
725 | elems_left: self.size, | |
85aaf69f | 726 | marker: marker::PhantomData, |
1a4d82fc JJ |
727 | } |
728 | } | |
729 | } | |
730 | ||
731 | /// A raw iterator. The basis for some other iterators in this module. Although | |
732 | /// this interface is safe, it's not used outside this module. | |
733 | struct RawBuckets<'a, K, V> { | |
734 | raw: RawBucket<K, V>, | |
735 | hashes_end: *mut u64, | |
85aaf69f SL |
736 | |
737 | // Strictly speaking, this should be &'a (K,V), but that would | |
738 | // require that K:'a, and we often use RawBuckets<'static...> for | |
739 | // move iterations, so that messes up a lot of other things. So | |
740 | // just use `&'a (K,V)` as this is not a publicly exposed type | |
741 | // anyway. | |
742 | marker: marker::PhantomData<&'a ()>, | |
1a4d82fc JJ |
743 | } |
744 | ||
745 | // FIXME(#19839) Remove in favor of `#[derive(Clone)]` | |
746 | impl<'a, K, V> Clone for RawBuckets<'a, K, V> { | |
747 | fn clone(&self) -> RawBuckets<'a, K, V> { | |
748 | RawBuckets { | |
749 | raw: self.raw, | |
750 | hashes_end: self.hashes_end, | |
85aaf69f | 751 | marker: marker::PhantomData, |
1a4d82fc JJ |
752 | } |
753 | } | |
754 | } | |
755 | ||
756 | ||
757 | impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { | |
758 | type Item = RawBucket<K, V>; | |
759 | ||
760 | fn next(&mut self) -> Option<RawBucket<K, V>> { | |
761 | while self.raw.hash != self.hashes_end { | |
762 | unsafe { | |
763 | // We are swapping out the pointer to a bucket and replacing | |
764 | // it with the pointer to the next one. | |
765 | let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); | |
766 | if *prev.hash != EMPTY_BUCKET { | |
767 | return Some(prev); | |
768 | } | |
769 | } | |
770 | } | |
771 | ||
772 | None | |
773 | } | |
774 | } | |
775 | ||
776 | /// An iterator that moves out buckets in reverse order. It leaves the table | |
777 | /// in an inconsistent state and should only be used for dropping | |
778 | /// the table's remaining entries. It's used in the implementation of Drop. | |
779 | struct RevMoveBuckets<'a, K, V> { | |
780 | raw: RawBucket<K, V>, | |
781 | hashes_end: *mut u64, | |
85aaf69f SL |
782 | elems_left: usize, |
783 | ||
784 | // As above, `&'a (K,V)` would seem better, but we often use | |
785 | // 'static for the lifetime, and this is not a publicly exposed | |
786 | // type. | |
787 | marker: marker::PhantomData<&'a ()>, | |
1a4d82fc JJ |
788 | } |
789 | ||
790 | impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> { | |
791 | type Item = (K, V); | |
792 | ||
793 | fn next(&mut self) -> Option<(K, V)> { | |
794 | if self.elems_left == 0 { | |
795 | return None; | |
796 | } | |
797 | ||
798 | loop { | |
799 | debug_assert!(self.raw.hash != self.hashes_end); | |
800 | ||
801 | unsafe { | |
802 | self.raw = self.raw.offset(-1); | |
803 | ||
804 | if *self.raw.hash != EMPTY_BUCKET { | |
805 | self.elems_left -= 1; | |
806 | return Some(( | |
85aaf69f SL |
807 | ptr::read(self.raw.key), |
808 | ptr::read(self.raw.val) | |
1a4d82fc JJ |
809 | )); |
810 | } | |
811 | } | |
812 | } | |
813 | } | |
814 | } | |
815 | ||
816 | /// Iterator over shared references to entries in a table. | |
817 | pub struct Iter<'a, K: 'a, V: 'a> { | |
818 | iter: RawBuckets<'a, K, V>, | |
85aaf69f | 819 | elems_left: usize, |
1a4d82fc JJ |
820 | } |
821 | ||
822 | // FIXME(#19839) Remove in favor of `#[derive(Clone)]` | |
823 | impl<'a, K, V> Clone for Iter<'a, K, V> { | |
824 | fn clone(&self) -> Iter<'a, K, V> { | |
825 | Iter { | |
826 | iter: self.iter.clone(), | |
827 | elems_left: self.elems_left | |
828 | } | |
829 | } | |
830 | } | |
831 | ||
832 | ||
833 | /// Iterator over mutable references to entries in a table. | |
834 | pub struct IterMut<'a, K: 'a, V: 'a> { | |
835 | iter: RawBuckets<'a, K, V>, | |
85aaf69f | 836 | elems_left: usize, |
1a4d82fc JJ |
837 | } |
838 | ||
839 | /// Iterator over the entries in a table, consuming the table. | |
840 | pub struct IntoIter<K, V> { | |
841 | table: RawTable<K, V>, | |
842 | iter: RawBuckets<'static, K, V> | |
843 | } | |
844 | ||
845 | /// Iterator over the entries in a table, clearing the table. | |
846 | pub struct Drain<'a, K: 'a, V: 'a> { | |
847 | table: &'a mut RawTable<K, V>, | |
848 | iter: RawBuckets<'static, K, V>, | |
849 | } | |
850 | ||
851 | impl<'a, K, V> Iterator for Iter<'a, K, V> { | |
852 | type Item = (&'a K, &'a V); | |
853 | ||
854 | fn next(&mut self) -> Option<(&'a K, &'a V)> { | |
855 | self.iter.next().map(|bucket| { | |
856 | self.elems_left -= 1; | |
857 | unsafe { | |
858 | (&*bucket.key, | |
859 | &*bucket.val) | |
860 | } | |
861 | }) | |
862 | } | |
863 | ||
85aaf69f | 864 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
865 | (self.elems_left, Some(self.elems_left)) |
866 | } | |
867 | } | |
85aaf69f SL |
868 | impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { |
869 | fn len(&self) -> usize { self.elems_left } | |
870 | } | |
1a4d82fc JJ |
871 | |
872 | impl<'a, K, V> Iterator for IterMut<'a, K, V> { | |
873 | type Item = (&'a K, &'a mut V); | |
874 | ||
875 | fn next(&mut self) -> Option<(&'a K, &'a mut V)> { | |
876 | self.iter.next().map(|bucket| { | |
877 | self.elems_left -= 1; | |
878 | unsafe { | |
879 | (&*bucket.key, | |
880 | &mut *bucket.val) | |
881 | } | |
882 | }) | |
883 | } | |
884 | ||
85aaf69f | 885 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
886 | (self.elems_left, Some(self.elems_left)) |
887 | } | |
888 | } | |
85aaf69f SL |
889 | impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { |
890 | fn len(&self) -> usize { self.elems_left } | |
891 | } | |
1a4d82fc JJ |
892 | |
893 | impl<K, V> Iterator for IntoIter<K, V> { | |
894 | type Item = (SafeHash, K, V); | |
895 | ||
896 | fn next(&mut self) -> Option<(SafeHash, K, V)> { | |
897 | self.iter.next().map(|bucket| { | |
898 | self.table.size -= 1; | |
899 | unsafe { | |
900 | ( | |
901 | SafeHash { | |
902 | hash: *bucket.hash, | |
903 | }, | |
85aaf69f SL |
904 | ptr::read(bucket.key), |
905 | ptr::read(bucket.val) | |
1a4d82fc JJ |
906 | ) |
907 | } | |
908 | }) | |
909 | } | |
910 | ||
85aaf69f | 911 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
912 | let size = self.table.size(); |
913 | (size, Some(size)) | |
914 | } | |
915 | } | |
85aaf69f SL |
916 | impl<K, V> ExactSizeIterator for IntoIter<K, V> { |
917 | fn len(&self) -> usize { self.table.size() } | |
918 | } | |
1a4d82fc | 919 | |
85aaf69f | 920 | impl<'a, K, V> Iterator for Drain<'a, K, V> { |
1a4d82fc JJ |
921 | type Item = (SafeHash, K, V); |
922 | ||
923 | #[inline] | |
924 | fn next(&mut self) -> Option<(SafeHash, K, V)> { | |
925 | self.iter.next().map(|bucket| { | |
926 | self.table.size -= 1; | |
927 | unsafe { | |
928 | ( | |
929 | SafeHash { | |
930 | hash: ptr::replace(bucket.hash, EMPTY_BUCKET), | |
931 | }, | |
85aaf69f SL |
932 | ptr::read(bucket.key), |
933 | ptr::read(bucket.val) | |
1a4d82fc JJ |
934 | ) |
935 | } | |
936 | }) | |
937 | } | |
938 | ||
85aaf69f | 939 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
940 | let size = self.table.size(); |
941 | (size, Some(size)) | |
942 | } | |
943 | } | |
85aaf69f SL |
944 | impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { |
945 | fn len(&self) -> usize { self.table.size() } | |
946 | } | |
1a4d82fc JJ |
947 | |
948 | #[unsafe_destructor] | |
949 | impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { | |
950 | fn drop(&mut self) { | |
85aaf69f | 951 | for _ in self.by_ref() {} |
1a4d82fc JJ |
952 | } |
953 | } | |
954 | ||
955 | impl<K: Clone, V: Clone> Clone for RawTable<K, V> { | |
956 | fn clone(&self) -> RawTable<K, V> { | |
957 | unsafe { | |
958 | let mut new_ht = RawTable::new_uninitialized(self.capacity()); | |
959 | ||
960 | { | |
961 | let cap = self.capacity(); | |
962 | let mut new_buckets = Bucket::first(&mut new_ht); | |
963 | let mut buckets = Bucket::first(self); | |
964 | while buckets.index() != cap { | |
965 | match buckets.peek() { | |
966 | Full(full) => { | |
967 | let (h, k, v) = { | |
968 | let (k, v) = full.read(); | |
969 | (full.hash(), k.clone(), v.clone()) | |
970 | }; | |
971 | *new_buckets.raw.hash = h.inspect(); | |
972 | ptr::write(new_buckets.raw.key, k); | |
973 | ptr::write(new_buckets.raw.val, v); | |
974 | } | |
975 | Empty(..) => { | |
976 | *new_buckets.raw.hash = EMPTY_BUCKET; | |
977 | } | |
978 | } | |
979 | new_buckets.next(); | |
980 | buckets.next(); | |
981 | } | |
982 | }; | |
983 | ||
984 | new_ht.size = self.size(); | |
985 | ||
986 | new_ht | |
987 | } | |
988 | } | |
989 | } | |
990 | ||
991 | #[unsafe_destructor] | |
992 | impl<K, V> Drop for RawTable<K, V> { | |
993 | fn drop(&mut self) { | |
c34b1796 | 994 | if self.capacity == 0 || self.capacity == mem::POST_DROP_USIZE { |
1a4d82fc JJ |
995 | return; |
996 | } | |
85aaf69f | 997 | |
1a4d82fc JJ |
998 | // This is done in reverse because we've likely partially taken |
999 | // some elements out with `.into_iter()` from the front. | |
1000 | // Check if the size is 0, so we don't do a useless scan when | |
1001 | // dropping empty tables such as on resize. | |
1002 | // Also avoid double drop of elements that have been already moved out. | |
1003 | unsafe { | |
1004 | for _ in self.rev_move_buckets() {} | |
1005 | } | |
1006 | ||
1007 | let hashes_size = self.capacity * size_of::<u64>(); | |
1008 | let keys_size = self.capacity * size_of::<K>(); | |
1009 | let vals_size = self.capacity * size_of::<V>(); | |
c34b1796 AL |
1010 | let (align, _, size, oflo) = |
1011 | calculate_allocation(hashes_size, min_align_of::<u64>(), | |
1012 | keys_size, min_align_of::<K>(), | |
1013 | vals_size, min_align_of::<V>()); | |
1014 | ||
1015 | debug_assert!(!oflo, "should be impossible"); | |
1a4d82fc JJ |
1016 | |
1017 | unsafe { | |
85aaf69f | 1018 | deallocate(*self.hashes as *mut u8, size, align); |
1a4d82fc JJ |
1019 | // Remember how everything was allocated out of one buffer |
1020 | // during initialization? We only need one call to free here. | |
1021 | } | |
1022 | } | |
1023 | } |