]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
1a4d82fc JJ |
10 | |
11 | use self::BucketState::*; | |
12 | ||
13 | use clone::Clone; | |
14 | use cmp; | |
15 | use hash::{Hash, Hasher}; | |
c34b1796 | 16 | use iter::{Iterator, ExactSizeIterator}; |
85aaf69f | 17 | use marker::{Copy, Send, Sync, Sized, self}; |
1a4d82fc JJ |
18 | use mem::{min_align_of, size_of}; |
19 | use mem; | |
9346a6ac | 20 | use num::wrapping::OverflowingOps; |
1a4d82fc JJ |
21 | use ops::{Deref, DerefMut, Drop}; |
22 | use option::Option; | |
23 | use option::Option::{Some, None}; | |
c34b1796 | 24 | use ptr::{self, Unique}; |
85aaf69f | 25 | use rt::heap::{allocate, deallocate, EMPTY}; |
1a4d82fc JJ |
26 | use collections::hash_state::HashState; |
27 | ||
c34b1796 | 28 | const EMPTY_BUCKET: u64 = 0; |
1a4d82fc JJ |
29 | |
30 | /// The raw hashtable, providing safe-ish access to the unzipped and highly | |
31 | /// optimized arrays of hashes, keys, and values. | |
32 | /// | |
33 | /// This design uses less memory and is a lot faster than the naive | |
34 | /// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an | |
35 | /// option on every element, and we get a generally more cache-aware design. | |
36 | /// | |
37 | /// Essential invariants of this structure: | |
38 | /// | |
39 | /// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` | |
40 | /// points to 'undefined' contents. Don't read from it. This invariant is | |
41 | /// enforced outside this module with the `EmptyBucket`, `FullBucket`, | |
42 | /// and `SafeHash` types. | |
43 | /// | |
44 | /// - An `EmptyBucket` is only constructed at an index with | |
45 | /// a hash of EMPTY_BUCKET. | |
46 | /// | |
47 | /// - A `FullBucket` is only constructed at an index with a | |
48 | /// non-EMPTY_BUCKET hash. | |
49 | /// | |
50 | /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get | |
51 | /// around hashes of zero by changing them to 0x8000_0000_0000_0000, | |
52 | /// which will likely map to the same bucket, while not being confused | |
53 | /// with "empty". | |
54 | /// | |
55 | /// - All three "arrays represented by pointers" are the same length: | |
56 | /// `capacity`. This is set at creation and never changes. The arrays | |
57 | /// are unzipped to save space (we don't have to pay for the padding | |
58 | /// between odd sized elements, such as in a map from u64 to u8), and | |
59 | /// be more cache aware (scanning through 8 hashes brings in at most | |
60 | /// 2 cache lines, since they're all right beside each other). | |
61 | /// | |
62 | /// You can kind of think of this module/data structure as a safe wrapper | |
63 | /// around just the "table" part of the hashtable. It enforces some | |
64 | /// invariants at the type level and employs some performance trickery, | |
65 | /// but in general is just a tricked out `Vec<Option<u64, K, V>>`. | |
66 | #[unsafe_no_drop_flag] | |
67 | pub struct RawTable<K, V> { | |
85aaf69f SL |
68 | capacity: usize, |
69 | size: usize, | |
1a4d82fc | 70 | hashes: Unique<u64>, |
85aaf69f | 71 | |
1a4d82fc JJ |
72 | // Because K/V do not appear directly in any of the types in the struct, |
73 | // inform rustc that in fact instances of K and V are reachable from here. | |
85aaf69f | 74 | marker: marker::PhantomData<(K,V)>, |
1a4d82fc JJ |
75 | } |
76 | ||
85aaf69f SL |
77 | unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {} |
78 | unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {} | |
79 | ||
1a4d82fc JJ |
80 | struct RawBucket<K, V> { |
81 | hash: *mut u64, | |
82 | key: *mut K, | |
85aaf69f SL |
83 | val: *mut V, |
84 | _marker: marker::PhantomData<(K,V)>, | |
1a4d82fc JJ |
85 | } |
86 | ||
87 | impl<K,V> Copy for RawBucket<K,V> {} | |
c34b1796 AL |
88 | impl<K,V> Clone for RawBucket<K,V> { |
89 | fn clone(&self) -> RawBucket<K, V> { *self } | |
90 | } | |
1a4d82fc JJ |
91 | |
92 | pub struct Bucket<K, V, M> { | |
93 | raw: RawBucket<K, V>, | |
85aaf69f | 94 | idx: usize, |
1a4d82fc JJ |
95 | table: M |
96 | } | |
97 | ||
98 | impl<K,V,M:Copy> Copy for Bucket<K,V,M> {} | |
c34b1796 AL |
99 | impl<K,V,M:Copy> Clone for Bucket<K,V,M> { |
100 | fn clone(&self) -> Bucket<K,V,M> { *self } | |
101 | } | |
1a4d82fc JJ |
102 | |
103 | pub struct EmptyBucket<K, V, M> { | |
104 | raw: RawBucket<K, V>, | |
85aaf69f | 105 | idx: usize, |
1a4d82fc JJ |
106 | table: M |
107 | } | |
108 | ||
109 | pub struct FullBucket<K, V, M> { | |
110 | raw: RawBucket<K, V>, | |
85aaf69f | 111 | idx: usize, |
1a4d82fc JJ |
112 | table: M |
113 | } | |
114 | ||
115 | pub type EmptyBucketImm<'table, K, V> = EmptyBucket<K, V, &'table RawTable<K, V>>; | |
116 | pub type FullBucketImm<'table, K, V> = FullBucket<K, V, &'table RawTable<K, V>>; | |
117 | ||
118 | pub type EmptyBucketMut<'table, K, V> = EmptyBucket<K, V, &'table mut RawTable<K, V>>; | |
119 | pub type FullBucketMut<'table, K, V> = FullBucket<K, V, &'table mut RawTable<K, V>>; | |
120 | ||
121 | pub enum BucketState<K, V, M> { | |
122 | Empty(EmptyBucket<K, V, M>), | |
123 | Full(FullBucket<K, V, M>), | |
124 | } | |
125 | ||
126 | // A GapThenFull encapsulates the state of two consecutive buckets at once. | |
127 | // The first bucket, called the gap, is known to be empty. | |
128 | // The second bucket is full. | |
129 | struct GapThenFull<K, V, M> { | |
130 | gap: EmptyBucket<K, V, ()>, | |
131 | full: FullBucket<K, V, M>, | |
132 | } | |
133 | ||
134 | /// A hash that is not zero, since we use a hash of zero to represent empty | |
135 | /// buckets. | |
c34b1796 | 136 | #[derive(PartialEq, Copy, Clone)] |
1a4d82fc JJ |
137 | pub struct SafeHash { |
138 | hash: u64, | |
139 | } | |
140 | ||
141 | impl SafeHash { | |
142 | /// Peek at the hash value, which is guaranteed to be non-zero. | |
143 | #[inline(always)] | |
144 | pub fn inspect(&self) -> u64 { self.hash } | |
145 | } | |
146 | ||
147 | /// We need to remove hashes of 0. That's reserved for empty buckets. | |
148 | /// This function wraps up `hash_keyed` to be the only way outside this | |
149 | /// module to generate a SafeHash. | |
85aaf69f SL |
150 | pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash |
151 | where T: Hash, S: HashState | |
152 | { | |
153 | let mut state = hash_state.hasher(); | |
154 | t.hash(&mut state); | |
c34b1796 | 155 | // We need to avoid 0 in order to prevent collisions with |
85aaf69f SL |
156 | // EMPTY_HASH. We can maintain our precious uniform distribution |
157 | // of initial indexes by unconditionally setting the MSB, | |
158 | // effectively reducing 64-bits hashes to 63 bits. | |
159 | SafeHash { hash: 0x8000_0000_0000_0000 | state.finish() } | |
160 | } | |
161 | ||
1a4d82fc JJ |
162 | // `replace` casts a `*u64` to a `*SafeHash`. Since we statically |
163 | // ensure that a `FullBucket` points to an index with a non-zero hash, | |
164 | // and a `SafeHash` is just a `u64` with a different name, this is | |
165 | // safe. | |
166 | // | |
167 | // This test ensures that a `SafeHash` really IS the same size as a | |
168 | // `u64`. If you need to change the size of `SafeHash` (and | |
169 | // consequently made this test fail), `replace` needs to be | |
170 | // modified to no longer assume this. | |
171 | #[test] | |
172 | fn can_alias_safehash_as_u64() { | |
173 | assert_eq!(size_of::<SafeHash>(), size_of::<u64>()) | |
174 | } | |
175 | ||
176 | impl<K, V> RawBucket<K, V> { | |
85aaf69f | 177 | unsafe fn offset(self, count: isize) -> RawBucket<K, V> { |
1a4d82fc JJ |
178 | RawBucket { |
179 | hash: self.hash.offset(count), | |
180 | key: self.key.offset(count), | |
181 | val: self.val.offset(count), | |
85aaf69f | 182 | _marker: marker::PhantomData, |
1a4d82fc JJ |
183 | } |
184 | } | |
185 | } | |
186 | ||
187 | // Buckets hold references to the table. | |
188 | impl<K, V, M> FullBucket<K, V, M> { | |
189 | /// Borrow a reference to the table. | |
190 | pub fn table(&self) -> &M { | |
191 | &self.table | |
192 | } | |
193 | /// Move out the reference to the table. | |
194 | pub fn into_table(self) -> M { | |
195 | self.table | |
196 | } | |
197 | /// Get the raw index. | |
85aaf69f | 198 | pub fn index(&self) -> usize { |
1a4d82fc JJ |
199 | self.idx |
200 | } | |
201 | } | |
202 | ||
203 | impl<K, V, M> EmptyBucket<K, V, M> { | |
204 | /// Borrow a reference to the table. | |
205 | pub fn table(&self) -> &M { | |
206 | &self.table | |
207 | } | |
208 | /// Move out the reference to the table. | |
209 | pub fn into_table(self) -> M { | |
210 | self.table | |
211 | } | |
212 | } | |
213 | ||
214 | impl<K, V, M> Bucket<K, V, M> { | |
215 | /// Move out the reference to the table. | |
216 | pub fn into_table(self) -> M { | |
217 | self.table | |
218 | } | |
219 | /// Get the raw index. | |
85aaf69f | 220 | pub fn index(&self) -> usize { |
1a4d82fc JJ |
221 | self.idx |
222 | } | |
223 | } | |
224 | ||
225 | impl<K, V, M: Deref<Target=RawTable<K, V>>> Bucket<K, V, M> { | |
226 | pub fn new(table: M, hash: SafeHash) -> Bucket<K, V, M> { | |
85aaf69f | 227 | Bucket::at_index(table, hash.inspect() as usize) |
1a4d82fc JJ |
228 | } |
229 | ||
85aaf69f | 230 | pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> { |
c34b1796 AL |
231 | // if capacity is 0, then the RawBucket will be populated with bogus pointers. |
232 | // This is an uncommon case though, so avoid it in release builds. | |
233 | debug_assert!(table.capacity() > 0, "Table should have capacity at this point"); | |
1a4d82fc JJ |
234 | let ib_index = ib_index & (table.capacity() - 1); |
235 | Bucket { | |
236 | raw: unsafe { | |
85aaf69f | 237 | table.first_bucket_raw().offset(ib_index as isize) |
1a4d82fc JJ |
238 | }, |
239 | idx: ib_index, | |
240 | table: table | |
241 | } | |
242 | } | |
243 | ||
244 | pub fn first(table: M) -> Bucket<K, V, M> { | |
245 | Bucket { | |
246 | raw: table.first_bucket_raw(), | |
247 | idx: 0, | |
248 | table: table | |
249 | } | |
250 | } | |
251 | ||
252 | /// Reads a bucket at a given index, returning an enum indicating whether | |
253 | /// it's initialized or not. You need to match on this enum to get | |
254 | /// the appropriate types to call most of the other functions in | |
255 | /// this module. | |
256 | pub fn peek(self) -> BucketState<K, V, M> { | |
257 | match unsafe { *self.raw.hash } { | |
258 | EMPTY_BUCKET => | |
259 | Empty(EmptyBucket { | |
260 | raw: self.raw, | |
261 | idx: self.idx, | |
262 | table: self.table | |
263 | }), | |
264 | _ => | |
265 | Full(FullBucket { | |
266 | raw: self.raw, | |
267 | idx: self.idx, | |
268 | table: self.table | |
269 | }) | |
270 | } | |
271 | } | |
272 | ||
273 | /// Modifies the bucket pointer in place to make it point to the next slot. | |
274 | pub fn next(&mut self) { | |
275 | // Branchless bucket iteration step. | |
276 | // As we reach the end of the table... | |
277 | // We take the current idx: 0111111b | |
278 | // Xor it by its increment: ^ 1000000b | |
279 | // ------------ | |
280 | // 1111111b | |
281 | // Then AND with the capacity: & 1000000b | |
282 | // ------------ | |
283 | // to get the backwards offset: 1000000b | |
284 | // ... and it's zero at all other times. | |
285 | let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); | |
286 | // Finally, we obtain the offset 1 or the offset -cap + 1. | |
85aaf69f | 287 | let dist = 1 - (maybe_wraparound_dist as isize); |
1a4d82fc JJ |
288 | |
289 | self.idx += 1; | |
290 | ||
291 | unsafe { | |
292 | self.raw = self.raw.offset(dist); | |
293 | } | |
294 | } | |
295 | } | |
296 | ||
297 | impl<K, V, M: Deref<Target=RawTable<K, V>>> EmptyBucket<K, V, M> { | |
298 | #[inline] | |
299 | pub fn next(self) -> Bucket<K, V, M> { | |
300 | let mut bucket = self.into_bucket(); | |
301 | bucket.next(); | |
302 | bucket | |
303 | } | |
304 | ||
305 | #[inline] | |
306 | pub fn into_bucket(self) -> Bucket<K, V, M> { | |
307 | Bucket { | |
308 | raw: self.raw, | |
309 | idx: self.idx, | |
310 | table: self.table | |
311 | } | |
312 | } | |
313 | ||
314 | pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> { | |
315 | let gap = EmptyBucket { | |
316 | raw: self.raw, | |
317 | idx: self.idx, | |
318 | table: () | |
319 | }; | |
320 | ||
321 | match self.next().peek() { | |
322 | Full(bucket) => { | |
323 | Some(GapThenFull { | |
324 | gap: gap, | |
325 | full: bucket | |
326 | }) | |
327 | } | |
328 | Empty(..) => None | |
329 | } | |
330 | } | |
331 | } | |
332 | ||
333 | impl<K, V, M: Deref<Target=RawTable<K, V>> + DerefMut> EmptyBucket<K, V, M> { | |
334 | /// Puts given key and value pair, along with the key's hash, | |
335 | /// into this bucket in the hashtable. Note how `self` is 'moved' into | |
336 | /// this function, because this slot will no longer be empty when | |
337 | /// we return! A `FullBucket` is returned for later use, pointing to | |
338 | /// the newly-filled slot in the hashtable. | |
339 | /// | |
340 | /// Use `make_hash` to construct a `SafeHash` to pass to this function. | |
341 | pub fn put(mut self, hash: SafeHash, key: K, value: V) | |
342 | -> FullBucket<K, V, M> { | |
343 | unsafe { | |
344 | *self.raw.hash = hash.inspect(); | |
345 | ptr::write(self.raw.key, key); | |
346 | ptr::write(self.raw.val, value); | |
347 | } | |
348 | ||
349 | self.table.size += 1; | |
350 | ||
351 | FullBucket { raw: self.raw, idx: self.idx, table: self.table } | |
352 | } | |
353 | } | |
354 | ||
355 | impl<K, V, M: Deref<Target=RawTable<K, V>>> FullBucket<K, V, M> { | |
356 | #[inline] | |
357 | pub fn next(self) -> Bucket<K, V, M> { | |
358 | let mut bucket = self.into_bucket(); | |
359 | bucket.next(); | |
360 | bucket | |
361 | } | |
362 | ||
363 | #[inline] | |
364 | pub fn into_bucket(self) -> Bucket<K, V, M> { | |
365 | Bucket { | |
366 | raw: self.raw, | |
367 | idx: self.idx, | |
368 | table: self.table | |
369 | } | |
370 | } | |
371 | ||
372 | /// Get the distance between this bucket and the 'ideal' location | |
373 | /// as determined by the key's hash stored in it. | |
374 | /// | |
375 | /// In the cited blog posts above, this is called the "distance to | |
376 | /// initial bucket", or DIB. Also known as "probe count". | |
85aaf69f | 377 | pub fn distance(&self) -> usize { |
1a4d82fc JJ |
378 | // Calculates the distance one has to travel when going from |
379 | // `hash mod capacity` onwards to `idx mod capacity`, wrapping around | |
380 | // if the destination is not reached before the end of the table. | |
c34b1796 | 381 | (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1) |
1a4d82fc JJ |
382 | } |
383 | ||
384 | #[inline] | |
385 | pub fn hash(&self) -> SafeHash { | |
386 | unsafe { | |
387 | SafeHash { | |
388 | hash: *self.raw.hash | |
389 | } | |
390 | } | |
391 | } | |
392 | ||
393 | /// Gets references to the key and value at a given index. | |
394 | pub fn read(&self) -> (&K, &V) { | |
395 | unsafe { | |
396 | (&*self.raw.key, | |
397 | &*self.raw.val) | |
398 | } | |
399 | } | |
400 | } | |
401 | ||
402 | impl<K, V, M: Deref<Target=RawTable<K, V>> + DerefMut> FullBucket<K, V, M> { | |
403 | /// Removes this bucket's key and value from the hashtable. | |
404 | /// | |
405 | /// This works similarly to `put`, building an `EmptyBucket` out of the | |
406 | /// taken bucket. | |
407 | pub fn take(mut self) -> (EmptyBucket<K, V, M>, K, V) { | |
1a4d82fc JJ |
408 | self.table.size -= 1; |
409 | ||
410 | unsafe { | |
411 | *self.raw.hash = EMPTY_BUCKET; | |
412 | ( | |
413 | EmptyBucket { | |
414 | raw: self.raw, | |
415 | idx: self.idx, | |
416 | table: self.table | |
417 | }, | |
85aaf69f SL |
418 | ptr::read(self.raw.key), |
419 | ptr::read(self.raw.val) | |
1a4d82fc JJ |
420 | ) |
421 | } | |
422 | } | |
423 | ||
424 | pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { | |
425 | unsafe { | |
426 | let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); | |
427 | let old_key = ptr::replace(self.raw.key, k); | |
428 | let old_val = ptr::replace(self.raw.val, v); | |
429 | ||
430 | (old_hash, old_key, old_val) | |
431 | } | |
432 | } | |
433 | ||
434 | /// Gets mutable references to the key and value at a given index. | |
435 | pub fn read_mut(&mut self) -> (&mut K, &mut V) { | |
436 | unsafe { | |
437 | (&mut *self.raw.key, | |
438 | &mut *self.raw.val) | |
439 | } | |
440 | } | |
441 | } | |
442 | ||
443 | impl<'t, K, V, M: Deref<Target=RawTable<K, V>> + 't> FullBucket<K, V, M> { | |
444 | /// Exchange a bucket state for immutable references into the table. | |
445 | /// Because the underlying reference to the table is also consumed, | |
446 | /// no further changes to the structure of the table are possible; | |
447 | /// in exchange for this, the returned references have a longer lifetime | |
448 | /// than the references returned by `read()`. | |
449 | pub fn into_refs(self) -> (&'t K, &'t V) { | |
450 | unsafe { | |
451 | (&*self.raw.key, | |
452 | &*self.raw.val) | |
453 | } | |
454 | } | |
455 | } | |
456 | ||
457 | impl<'t, K, V, M: Deref<Target=RawTable<K, V>> + DerefMut + 't> FullBucket<K, V, M> { | |
458 | /// This works similarly to `into_refs`, exchanging a bucket state | |
459 | /// for mutable references into the table. | |
460 | pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { | |
461 | unsafe { | |
462 | (&mut *self.raw.key, | |
463 | &mut *self.raw.val) | |
464 | } | |
465 | } | |
466 | } | |
467 | ||
468 | impl<K, V, M> BucketState<K, V, M> { | |
469 | // For convenience. | |
470 | pub fn expect_full(self) -> FullBucket<K, V, M> { | |
471 | match self { | |
472 | Full(full) => full, | |
473 | Empty(..) => panic!("Expected full bucket") | |
474 | } | |
475 | } | |
476 | } | |
477 | ||
478 | impl<K, V, M: Deref<Target=RawTable<K, V>>> GapThenFull<K, V, M> { | |
479 | #[inline] | |
480 | pub fn full(&self) -> &FullBucket<K, V, M> { | |
481 | &self.full | |
482 | } | |
483 | ||
484 | pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> { | |
485 | unsafe { | |
486 | *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); | |
c34b1796 AL |
487 | ptr::copy_nonoverlapping(self.full.raw.key, self.gap.raw.key, 1); |
488 | ptr::copy_nonoverlapping(self.full.raw.val, self.gap.raw.val, 1); | |
1a4d82fc JJ |
489 | } |
490 | ||
491 | let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; | |
492 | ||
493 | match self.full.next().peek() { | |
494 | Full(bucket) => { | |
495 | self.gap.raw = prev_raw; | |
496 | self.gap.idx = prev_idx; | |
497 | ||
498 | self.full = bucket; | |
499 | ||
500 | Some(self) | |
501 | } | |
502 | Empty(..) => None | |
503 | } | |
504 | } | |
505 | } | |
506 | ||
507 | ||
508 | /// Rounds up to a multiple of a power of two. Returns the closest multiple | |
509 | /// of `target_alignment` that is higher or equal to `unrounded`. | |
510 | /// | |
511 | /// # Panics | |
512 | /// | |
513 | /// Panics if `target_alignment` is not a power of two. | |
85aaf69f | 514 | fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { |
1a4d82fc JJ |
515 | assert!(target_alignment.is_power_of_two()); |
516 | (unrounded + target_alignment - 1) & !(target_alignment - 1) | |
517 | } | |
518 | ||
519 | #[test] | |
520 | fn test_rounding() { | |
521 | assert_eq!(round_up_to_next(0, 4), 0); | |
522 | assert_eq!(round_up_to_next(1, 4), 4); | |
523 | assert_eq!(round_up_to_next(2, 4), 4); | |
524 | assert_eq!(round_up_to_next(3, 4), 4); | |
525 | assert_eq!(round_up_to_next(4, 4), 4); | |
526 | assert_eq!(round_up_to_next(5, 4), 8); | |
527 | } | |
528 | ||
529 | // Returns a tuple of (key_offset, val_offset), | |
530 | // from the start of a mallocated array. | |
85aaf69f SL |
531 | fn calculate_offsets(hashes_size: usize, |
532 | keys_size: usize, keys_align: usize, | |
533 | vals_align: usize) | |
c34b1796 | 534 | -> (usize, usize, bool) { |
1a4d82fc | 535 | let keys_offset = round_up_to_next(hashes_size, keys_align); |
c34b1796 | 536 | let (end_of_keys, oflo) = keys_offset.overflowing_add(keys_size); |
1a4d82fc JJ |
537 | |
538 | let vals_offset = round_up_to_next(end_of_keys, vals_align); | |
539 | ||
c34b1796 | 540 | (keys_offset, vals_offset, oflo) |
1a4d82fc JJ |
541 | } |
542 | ||
543 | // Returns a tuple of (minimum required malloc alignment, hash_offset, | |
544 | // array_size), from the start of a mallocated array. | |
85aaf69f SL |
545 | fn calculate_allocation(hash_size: usize, hash_align: usize, |
546 | keys_size: usize, keys_align: usize, | |
547 | vals_size: usize, vals_align: usize) | |
c34b1796 | 548 | -> (usize, usize, usize, bool) { |
1a4d82fc | 549 | let hash_offset = 0; |
c34b1796 AL |
550 | let (_, vals_offset, oflo) = calculate_offsets(hash_size, |
551 | keys_size, keys_align, | |
552 | vals_align); | |
553 | let (end_of_vals, oflo2) = vals_offset.overflowing_add(vals_size); | |
1a4d82fc JJ |
554 | |
555 | let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); | |
556 | ||
c34b1796 | 557 | (min_align, hash_offset, end_of_vals, oflo || oflo2) |
1a4d82fc JJ |
558 | } |
559 | ||
560 | #[test] | |
561 | fn test_offset_calculation() { | |
c34b1796 AL |
562 | assert_eq!(calculate_allocation(128, 8, 15, 1, 4, 4), (8, 0, 148, false)); |
563 | assert_eq!(calculate_allocation(3, 1, 2, 1, 1, 1), (1, 0, 6, false)); | |
564 | assert_eq!(calculate_allocation(6, 2, 12, 4, 24, 8), (8, 0, 48, false)); | |
565 | assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144, false)); | |
566 | assert_eq!(calculate_offsets(3, 2, 1, 1), (3, 5, false)); | |
567 | assert_eq!(calculate_offsets(6, 12, 4, 8), (8, 24, false)); | |
1a4d82fc JJ |
568 | } |
569 | ||
570 | impl<K, V> RawTable<K, V> { | |
571 | /// Does not initialize the buckets. The caller should ensure they, | |
572 | /// at the very least, set every hash to EMPTY_BUCKET. | |
85aaf69f | 573 | unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> { |
1a4d82fc JJ |
574 | if capacity == 0 { |
575 | return RawTable { | |
576 | size: 0, | |
577 | capacity: 0, | |
85aaf69f SL |
578 | hashes: Unique::new(EMPTY as *mut u64), |
579 | marker: marker::PhantomData, | |
1a4d82fc JJ |
580 | }; |
581 | } | |
85aaf69f | 582 | |
1a4d82fc JJ |
583 | // No need for `checked_mul` before a more restrictive check performed |
584 | // later in this method. | |
585 | let hashes_size = capacity * size_of::<u64>(); | |
586 | let keys_size = capacity * size_of::< K >(); | |
587 | let vals_size = capacity * size_of::< V >(); | |
588 | ||
589 | // Allocating hashmaps is a little tricky. We need to allocate three | |
590 | // arrays, but since we know their sizes and alignments up front, | |
591 | // we just allocate a single array, and then have the subarrays | |
592 | // point into it. | |
593 | // | |
594 | // This is great in theory, but in practice getting the alignment | |
595 | // right is a little subtle. Therefore, calculating offsets has been | |
596 | // factored out into a different function. | |
c34b1796 | 597 | let (malloc_alignment, hash_offset, size, oflo) = |
1a4d82fc JJ |
598 | calculate_allocation( |
599 | hashes_size, min_align_of::<u64>(), | |
600 | keys_size, min_align_of::< K >(), | |
601 | vals_size, min_align_of::< V >()); | |
602 | ||
c34b1796 AL |
603 | assert!(!oflo, "capacity overflow"); |
604 | ||
1a4d82fc JJ |
605 | // One check for overflow that covers calculation and rounding of size. |
606 | let size_of_bucket = size_of::<u64>().checked_add(size_of::<K>()).unwrap() | |
607 | .checked_add(size_of::<V>()).unwrap(); | |
608 | assert!(size >= capacity.checked_mul(size_of_bucket) | |
609 | .expect("capacity overflow"), | |
610 | "capacity overflow"); | |
611 | ||
612 | let buffer = allocate(size, malloc_alignment); | |
613 | if buffer.is_null() { ::alloc::oom() } | |
614 | ||
85aaf69f | 615 | let hashes = buffer.offset(hash_offset as isize) as *mut u64; |
1a4d82fc JJ |
616 | |
617 | RawTable { | |
618 | capacity: capacity, | |
619 | size: 0, | |
85aaf69f SL |
620 | hashes: Unique::new(hashes), |
621 | marker: marker::PhantomData, | |
1a4d82fc JJ |
622 | } |
623 | } | |
624 | ||
625 | fn first_bucket_raw(&self) -> RawBucket<K, V> { | |
626 | let hashes_size = self.capacity * size_of::<u64>(); | |
627 | let keys_size = self.capacity * size_of::<K>(); | |
628 | ||
85aaf69f | 629 | let buffer = *self.hashes as *mut u8; |
c34b1796 AL |
630 | let (keys_offset, vals_offset, oflo) = |
631 | calculate_offsets(hashes_size, | |
632 | keys_size, min_align_of::<K>(), | |
633 | min_align_of::<V>()); | |
634 | debug_assert!(!oflo, "capacity overflow"); | |
1a4d82fc JJ |
635 | unsafe { |
636 | RawBucket { | |
85aaf69f SL |
637 | hash: *self.hashes, |
638 | key: buffer.offset(keys_offset as isize) as *mut K, | |
639 | val: buffer.offset(vals_offset as isize) as *mut V, | |
640 | _marker: marker::PhantomData, | |
1a4d82fc JJ |
641 | } |
642 | } | |
643 | } | |
644 | ||
645 | /// Creates a new raw table from a given capacity. All buckets are | |
646 | /// initially empty. | |
85aaf69f | 647 | pub fn new(capacity: usize) -> RawTable<K, V> { |
1a4d82fc JJ |
648 | unsafe { |
649 | let ret = RawTable::new_uninitialized(capacity); | |
c34b1796 | 650 | ptr::write_bytes(*ret.hashes, 0, capacity); |
1a4d82fc JJ |
651 | ret |
652 | } | |
653 | } | |
654 | ||
655 | /// The hashtable's capacity, similar to a vector's. | |
85aaf69f | 656 | pub fn capacity(&self) -> usize { |
1a4d82fc JJ |
657 | self.capacity |
658 | } | |
659 | ||
660 | /// The number of elements ever `put` in the hashtable, minus the number | |
661 | /// of elements ever `take`n. | |
85aaf69f | 662 | pub fn size(&self) -> usize { |
1a4d82fc JJ |
663 | self.size |
664 | } | |
665 | ||
666 | fn raw_buckets(&self) -> RawBuckets<K, V> { | |
667 | RawBuckets { | |
668 | raw: self.first_bucket_raw(), | |
669 | hashes_end: unsafe { | |
85aaf69f | 670 | self.hashes.offset(self.capacity as isize) |
1a4d82fc | 671 | }, |
85aaf69f | 672 | marker: marker::PhantomData, |
1a4d82fc JJ |
673 | } |
674 | } | |
675 | ||
676 | pub fn iter(&self) -> Iter<K, V> { | |
677 | Iter { | |
678 | iter: self.raw_buckets(), | |
679 | elems_left: self.size(), | |
680 | } | |
681 | } | |
682 | ||
683 | pub fn iter_mut(&mut self) -> IterMut<K, V> { | |
684 | IterMut { | |
685 | iter: self.raw_buckets(), | |
686 | elems_left: self.size(), | |
687 | } | |
688 | } | |
689 | ||
690 | pub fn into_iter(self) -> IntoIter<K, V> { | |
691 | let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); | |
692 | // Replace the marker regardless of lifetime bounds on parameters. | |
693 | IntoIter { | |
694 | iter: RawBuckets { | |
695 | raw: raw, | |
696 | hashes_end: hashes_end, | |
85aaf69f | 697 | marker: marker::PhantomData, |
1a4d82fc JJ |
698 | }, |
699 | table: self, | |
700 | } | |
701 | } | |
702 | ||
703 | pub fn drain(&mut self) -> Drain<K, V> { | |
704 | let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); | |
705 | // Replace the marker regardless of lifetime bounds on parameters. | |
706 | Drain { | |
707 | iter: RawBuckets { | |
708 | raw: raw, | |
709 | hashes_end: hashes_end, | |
85aaf69f | 710 | marker: marker::PhantomData, |
1a4d82fc JJ |
711 | }, |
712 | table: self, | |
713 | } | |
714 | } | |
715 | ||
716 | /// Returns an iterator that copies out each entry. Used while the table | |
717 | /// is being dropped. | |
718 | unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> { | |
719 | let raw_bucket = self.first_bucket_raw(); | |
720 | RevMoveBuckets { | |
85aaf69f | 721 | raw: raw_bucket.offset(self.capacity as isize), |
1a4d82fc JJ |
722 | hashes_end: raw_bucket.hash, |
723 | elems_left: self.size, | |
85aaf69f | 724 | marker: marker::PhantomData, |
1a4d82fc JJ |
725 | } |
726 | } | |
727 | } | |
728 | ||
729 | /// A raw iterator. The basis for some other iterators in this module. Although | |
730 | /// this interface is safe, it's not used outside this module. | |
731 | struct RawBuckets<'a, K, V> { | |
732 | raw: RawBucket<K, V>, | |
733 | hashes_end: *mut u64, | |
85aaf69f SL |
734 | |
735 | // Strictly speaking, this should be &'a (K,V), but that would | |
736 | // require that K:'a, and we often use RawBuckets<'static...> for | |
737 | // move iterations, so that messes up a lot of other things. So | |
738 | // just use `&'a (K,V)` as this is not a publicly exposed type | |
739 | // anyway. | |
740 | marker: marker::PhantomData<&'a ()>, | |
1a4d82fc JJ |
741 | } |
742 | ||
743 | // FIXME(#19839) Remove in favor of `#[derive(Clone)]` | |
744 | impl<'a, K, V> Clone for RawBuckets<'a, K, V> { | |
745 | fn clone(&self) -> RawBuckets<'a, K, V> { | |
746 | RawBuckets { | |
747 | raw: self.raw, | |
748 | hashes_end: self.hashes_end, | |
85aaf69f | 749 | marker: marker::PhantomData, |
1a4d82fc JJ |
750 | } |
751 | } | |
752 | } | |
753 | ||
754 | ||
755 | impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { | |
756 | type Item = RawBucket<K, V>; | |
757 | ||
758 | fn next(&mut self) -> Option<RawBucket<K, V>> { | |
759 | while self.raw.hash != self.hashes_end { | |
760 | unsafe { | |
761 | // We are swapping out the pointer to a bucket and replacing | |
762 | // it with the pointer to the next one. | |
763 | let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); | |
764 | if *prev.hash != EMPTY_BUCKET { | |
765 | return Some(prev); | |
766 | } | |
767 | } | |
768 | } | |
769 | ||
770 | None | |
771 | } | |
772 | } | |
773 | ||
774 | /// An iterator that moves out buckets in reverse order. It leaves the table | |
775 | /// in an inconsistent state and should only be used for dropping | |
776 | /// the table's remaining entries. It's used in the implementation of Drop. | |
777 | struct RevMoveBuckets<'a, K, V> { | |
778 | raw: RawBucket<K, V>, | |
779 | hashes_end: *mut u64, | |
85aaf69f SL |
780 | elems_left: usize, |
781 | ||
782 | // As above, `&'a (K,V)` would seem better, but we often use | |
783 | // 'static for the lifetime, and this is not a publicly exposed | |
784 | // type. | |
785 | marker: marker::PhantomData<&'a ()>, | |
1a4d82fc JJ |
786 | } |
787 | ||
788 | impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> { | |
789 | type Item = (K, V); | |
790 | ||
791 | fn next(&mut self) -> Option<(K, V)> { | |
792 | if self.elems_left == 0 { | |
793 | return None; | |
794 | } | |
795 | ||
796 | loop { | |
797 | debug_assert!(self.raw.hash != self.hashes_end); | |
798 | ||
799 | unsafe { | |
800 | self.raw = self.raw.offset(-1); | |
801 | ||
802 | if *self.raw.hash != EMPTY_BUCKET { | |
803 | self.elems_left -= 1; | |
804 | return Some(( | |
85aaf69f SL |
805 | ptr::read(self.raw.key), |
806 | ptr::read(self.raw.val) | |
1a4d82fc JJ |
807 | )); |
808 | } | |
809 | } | |
810 | } | |
811 | } | |
812 | } | |
813 | ||
814 | /// Iterator over shared references to entries in a table. | |
815 | pub struct Iter<'a, K: 'a, V: 'a> { | |
816 | iter: RawBuckets<'a, K, V>, | |
85aaf69f | 817 | elems_left: usize, |
1a4d82fc JJ |
818 | } |
819 | ||
820 | // FIXME(#19839) Remove in favor of `#[derive(Clone)]` | |
821 | impl<'a, K, V> Clone for Iter<'a, K, V> { | |
822 | fn clone(&self) -> Iter<'a, K, V> { | |
823 | Iter { | |
824 | iter: self.iter.clone(), | |
825 | elems_left: self.elems_left | |
826 | } | |
827 | } | |
828 | } | |
829 | ||
830 | ||
831 | /// Iterator over mutable references to entries in a table. | |
832 | pub struct IterMut<'a, K: 'a, V: 'a> { | |
833 | iter: RawBuckets<'a, K, V>, | |
85aaf69f | 834 | elems_left: usize, |
1a4d82fc JJ |
835 | } |
836 | ||
837 | /// Iterator over the entries in a table, consuming the table. | |
838 | pub struct IntoIter<K, V> { | |
839 | table: RawTable<K, V>, | |
840 | iter: RawBuckets<'static, K, V> | |
841 | } | |
842 | ||
843 | /// Iterator over the entries in a table, clearing the table. | |
844 | pub struct Drain<'a, K: 'a, V: 'a> { | |
845 | table: &'a mut RawTable<K, V>, | |
846 | iter: RawBuckets<'static, K, V>, | |
847 | } | |
848 | ||
849 | impl<'a, K, V> Iterator for Iter<'a, K, V> { | |
850 | type Item = (&'a K, &'a V); | |
851 | ||
852 | fn next(&mut self) -> Option<(&'a K, &'a V)> { | |
853 | self.iter.next().map(|bucket| { | |
854 | self.elems_left -= 1; | |
855 | unsafe { | |
856 | (&*bucket.key, | |
857 | &*bucket.val) | |
858 | } | |
859 | }) | |
860 | } | |
861 | ||
85aaf69f | 862 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
863 | (self.elems_left, Some(self.elems_left)) |
864 | } | |
865 | } | |
85aaf69f SL |
866 | impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { |
867 | fn len(&self) -> usize { self.elems_left } | |
868 | } | |
1a4d82fc JJ |
869 | |
870 | impl<'a, K, V> Iterator for IterMut<'a, K, V> { | |
871 | type Item = (&'a K, &'a mut V); | |
872 | ||
873 | fn next(&mut self) -> Option<(&'a K, &'a mut V)> { | |
874 | self.iter.next().map(|bucket| { | |
875 | self.elems_left -= 1; | |
876 | unsafe { | |
877 | (&*bucket.key, | |
878 | &mut *bucket.val) | |
879 | } | |
880 | }) | |
881 | } | |
882 | ||
85aaf69f | 883 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
884 | (self.elems_left, Some(self.elems_left)) |
885 | } | |
886 | } | |
85aaf69f SL |
887 | impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { |
888 | fn len(&self) -> usize { self.elems_left } | |
889 | } | |
1a4d82fc JJ |
890 | |
891 | impl<K, V> Iterator for IntoIter<K, V> { | |
892 | type Item = (SafeHash, K, V); | |
893 | ||
894 | fn next(&mut self) -> Option<(SafeHash, K, V)> { | |
895 | self.iter.next().map(|bucket| { | |
896 | self.table.size -= 1; | |
897 | unsafe { | |
898 | ( | |
899 | SafeHash { | |
900 | hash: *bucket.hash, | |
901 | }, | |
85aaf69f SL |
902 | ptr::read(bucket.key), |
903 | ptr::read(bucket.val) | |
1a4d82fc JJ |
904 | ) |
905 | } | |
906 | }) | |
907 | } | |
908 | ||
85aaf69f | 909 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
910 | let size = self.table.size(); |
911 | (size, Some(size)) | |
912 | } | |
913 | } | |
85aaf69f SL |
914 | impl<K, V> ExactSizeIterator for IntoIter<K, V> { |
915 | fn len(&self) -> usize { self.table.size() } | |
916 | } | |
1a4d82fc | 917 | |
85aaf69f | 918 | impl<'a, K, V> Iterator for Drain<'a, K, V> { |
1a4d82fc JJ |
919 | type Item = (SafeHash, K, V); |
920 | ||
921 | #[inline] | |
922 | fn next(&mut self) -> Option<(SafeHash, K, V)> { | |
923 | self.iter.next().map(|bucket| { | |
924 | self.table.size -= 1; | |
925 | unsafe { | |
926 | ( | |
927 | SafeHash { | |
928 | hash: ptr::replace(bucket.hash, EMPTY_BUCKET), | |
929 | }, | |
85aaf69f SL |
930 | ptr::read(bucket.key), |
931 | ptr::read(bucket.val) | |
1a4d82fc JJ |
932 | ) |
933 | } | |
934 | }) | |
935 | } | |
936 | ||
85aaf69f | 937 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
938 | let size = self.table.size(); |
939 | (size, Some(size)) | |
940 | } | |
941 | } | |
85aaf69f SL |
942 | impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { |
943 | fn len(&self) -> usize { self.table.size() } | |
944 | } | |
1a4d82fc | 945 | |
1a4d82fc JJ |
946 | impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { |
947 | fn drop(&mut self) { | |
85aaf69f | 948 | for _ in self.by_ref() {} |
1a4d82fc JJ |
949 | } |
950 | } | |
951 | ||
952 | impl<K: Clone, V: Clone> Clone for RawTable<K, V> { | |
953 | fn clone(&self) -> RawTable<K, V> { | |
954 | unsafe { | |
955 | let mut new_ht = RawTable::new_uninitialized(self.capacity()); | |
956 | ||
957 | { | |
958 | let cap = self.capacity(); | |
959 | let mut new_buckets = Bucket::first(&mut new_ht); | |
960 | let mut buckets = Bucket::first(self); | |
961 | while buckets.index() != cap { | |
962 | match buckets.peek() { | |
963 | Full(full) => { | |
964 | let (h, k, v) = { | |
965 | let (k, v) = full.read(); | |
966 | (full.hash(), k.clone(), v.clone()) | |
967 | }; | |
968 | *new_buckets.raw.hash = h.inspect(); | |
969 | ptr::write(new_buckets.raw.key, k); | |
970 | ptr::write(new_buckets.raw.val, v); | |
971 | } | |
972 | Empty(..) => { | |
973 | *new_buckets.raw.hash = EMPTY_BUCKET; | |
974 | } | |
975 | } | |
976 | new_buckets.next(); | |
977 | buckets.next(); | |
978 | } | |
979 | }; | |
980 | ||
981 | new_ht.size = self.size(); | |
982 | ||
983 | new_ht | |
984 | } | |
985 | } | |
986 | } | |
987 | ||
1a4d82fc JJ |
988 | impl<K, V> Drop for RawTable<K, V> { |
989 | fn drop(&mut self) { | |
c34b1796 | 990 | if self.capacity == 0 || self.capacity == mem::POST_DROP_USIZE { |
1a4d82fc JJ |
991 | return; |
992 | } | |
85aaf69f | 993 | |
1a4d82fc JJ |
994 | // This is done in reverse because we've likely partially taken |
995 | // some elements out with `.into_iter()` from the front. | |
996 | // Check if the size is 0, so we don't do a useless scan when | |
997 | // dropping empty tables such as on resize. | |
998 | // Also avoid double drop of elements that have been already moved out. | |
999 | unsafe { | |
1000 | for _ in self.rev_move_buckets() {} | |
1001 | } | |
1002 | ||
1003 | let hashes_size = self.capacity * size_of::<u64>(); | |
1004 | let keys_size = self.capacity * size_of::<K>(); | |
1005 | let vals_size = self.capacity * size_of::<V>(); | |
c34b1796 AL |
1006 | let (align, _, size, oflo) = |
1007 | calculate_allocation(hashes_size, min_align_of::<u64>(), | |
1008 | keys_size, min_align_of::<K>(), | |
1009 | vals_size, min_align_of::<V>()); | |
1010 | ||
1011 | debug_assert!(!oflo, "should be impossible"); | |
1a4d82fc JJ |
1012 | |
1013 | unsafe { | |
85aaf69f | 1014 | deallocate(*self.hashes as *mut u8, size, align); |
1a4d82fc JJ |
1015 | // Remember how everything was allocated out of one buffer |
1016 | // during initialization? We only need one call to free here. | |
1017 | } | |
1018 | } | |
1019 | } |