]>
Commit | Line | Data |
---|---|---|
1 | // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT | |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | use alloc::heap::{EMPTY, allocate, deallocate}; | |
12 | ||
13 | use cmp; | |
14 | use hash::{BuildHasher, Hash, Hasher}; | |
15 | use intrinsics::needs_drop; | |
16 | use marker; | |
17 | use mem::{align_of, size_of}; | |
18 | use mem; | |
19 | use ops::{Deref, DerefMut}; | |
20 | use ptr::{self, Unique, Shared}; | |
21 | ||
22 | use self::BucketState::*; | |
23 | ||
24 | /// Integer type used for stored hash values. | |
25 | /// | |
26 | /// No more than bit_width(usize) bits are needed to select a bucket. | |
27 | /// | |
28 | /// The most significant bit is ours to use for tagging `SafeHash`. | |
29 | /// | |
30 | /// (Even if we could have usize::MAX bytes allocated for buckets, | |
31 | /// each bucket stores at least a `HashUint`, so there can be no more than | |
32 | /// usize::MAX / size_of(usize) buckets.) | |
33 | type HashUint = usize; | |
34 | ||
35 | const EMPTY_BUCKET: HashUint = 0; | |
36 | ||
37 | /// The raw hashtable, providing safe-ish access to the unzipped and highly | |
38 | /// optimized arrays of hashes, and key-value pairs. | |
39 | /// | |
40 | /// This design is a lot faster than the naive | |
41 | /// `Vec<Option<(u64, K, V)>>`, because we don't pay for the overhead of an | |
42 | /// option on every element, and we get a generally more cache-aware design. | |
43 | /// | |
44 | /// Essential invariants of this structure: | |
45 | /// | |
46 | /// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` | |
47 | /// points to 'undefined' contents. Don't read from it. This invariant is | |
48 | /// enforced outside this module with the `EmptyBucket`, `FullBucket`, | |
49 | /// and `SafeHash` types. | |
50 | /// | |
51 | /// - An `EmptyBucket` is only constructed at an index with | |
52 | /// a hash of EMPTY_BUCKET. | |
53 | /// | |
54 | /// - A `FullBucket` is only constructed at an index with a | |
55 | /// non-EMPTY_BUCKET hash. | |
56 | /// | |
57 | /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get | |
58 | /// around hashes of zero by changing them to 0x8000_0000_0000_0000, | |
59 | /// which will likely map to the same bucket, while not being confused | |
60 | /// with "empty". | |
61 | /// | |
62 | /// - Both "arrays represented by pointers" are the same length: | |
63 | /// `capacity`. This is set at creation and never changes. The arrays | |
64 | /// are unzipped and are more cache aware (scanning through 8 hashes | |
65 | /// brings in at most 2 cache lines, since they're all right beside each | |
66 | /// other). This layout may waste space in padding such as in a map from | |
67 | /// u64 to u8, but is a more cache conscious layout as the key-value pairs | |
68 | /// are only very shortly probed and the desired value will be in the same | |
69 | /// or next cache line. | |
70 | /// | |
71 | /// You can kind of think of this module/data structure as a safe wrapper | |
72 | /// around just the "table" part of the hashtable. It enforces some | |
73 | /// invariants at the type level and employs some performance trickery, | |
74 | /// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`. | |
75 | pub struct RawTable<K, V> { | |
76 | capacity: usize, | |
77 | size: usize, | |
78 | hashes: Unique<HashUint>, | |
79 | ||
80 | // Because K/V do not appear directly in any of the types in the struct, | |
81 | // inform rustc that in fact instances of K and V are reachable from here. | |
82 | marker: marker::PhantomData<(K, V)>, | |
83 | } | |
84 | ||
85 | unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {} | |
86 | unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {} | |
87 | ||
88 | struct RawBucket<K, V> { | |
89 | hash: *mut HashUint, | |
90 | // We use *const to ensure covariance with respect to K and V | |
91 | pair: *const (K, V), | |
92 | _marker: marker::PhantomData<(K, V)>, | |
93 | } | |
94 | ||
95 | impl<K, V> Copy for RawBucket<K, V> {} | |
96 | impl<K, V> Clone for RawBucket<K, V> { | |
97 | fn clone(&self) -> RawBucket<K, V> { | |
98 | *self | |
99 | } | |
100 | } | |
101 | ||
102 | pub struct Bucket<K, V, M> { | |
103 | raw: RawBucket<K, V>, | |
104 | idx: usize, | |
105 | table: M, | |
106 | } | |
107 | ||
108 | impl<K, V, M: Copy> Copy for Bucket<K, V, M> {} | |
109 | impl<K, V, M: Copy> Clone for Bucket<K, V, M> { | |
110 | fn clone(&self) -> Bucket<K, V, M> { | |
111 | *self | |
112 | } | |
113 | } | |
114 | ||
115 | pub struct EmptyBucket<K, V, M> { | |
116 | raw: RawBucket<K, V>, | |
117 | idx: usize, | |
118 | table: M, | |
119 | } | |
120 | ||
121 | pub struct FullBucket<K, V, M> { | |
122 | raw: RawBucket<K, V>, | |
123 | idx: usize, | |
124 | table: M, | |
125 | } | |
126 | ||
127 | pub type FullBucketMut<'table, K, V> = FullBucket<K, V, &'table mut RawTable<K, V>>; | |
128 | ||
129 | pub enum BucketState<K, V, M> { | |
130 | Empty(EmptyBucket<K, V, M>), | |
131 | Full(FullBucket<K, V, M>), | |
132 | } | |
133 | ||
134 | // A GapThenFull encapsulates the state of two consecutive buckets at once. | |
135 | // The first bucket, called the gap, is known to be empty. | |
136 | // The second bucket is full. | |
137 | pub struct GapThenFull<K, V, M> { | |
138 | gap: EmptyBucket<K, V, ()>, | |
139 | full: FullBucket<K, V, M>, | |
140 | } | |
141 | ||
142 | /// A hash that is not zero, since we use a hash of zero to represent empty | |
143 | /// buckets. | |
144 | #[derive(PartialEq, Copy, Clone)] | |
145 | pub struct SafeHash { | |
146 | hash: HashUint, | |
147 | } | |
148 | ||
149 | impl SafeHash { | |
150 | /// Peek at the hash value, which is guaranteed to be non-zero. | |
151 | #[inline(always)] | |
152 | pub fn inspect(&self) -> HashUint { | |
153 | self.hash | |
154 | } | |
155 | ||
156 | #[inline(always)] | |
157 | pub fn new(hash: u64) -> Self { | |
158 | // We need to avoid 0 in order to prevent collisions with | |
159 | // EMPTY_HASH. We can maintain our precious uniform distribution | |
160 | // of initial indexes by unconditionally setting the MSB, | |
161 | // effectively reducing the hashes by one bit. | |
162 | // | |
163 | // Truncate hash to fit in `HashUint`. | |
164 | let hash_bits = size_of::<HashUint>() * 8; | |
165 | SafeHash { hash: (1 << (hash_bits - 1)) | (hash as HashUint) } | |
166 | } | |
167 | } | |
168 | ||
169 | /// We need to remove hashes of 0. That's reserved for empty buckets. | |
170 | /// This function wraps up `hash_keyed` to be the only way outside this | |
171 | /// module to generate a SafeHash. | |
172 | pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash | |
173 | where T: Hash, | |
174 | S: BuildHasher | |
175 | { | |
176 | let mut state = hash_state.build_hasher(); | |
177 | t.hash(&mut state); | |
178 | SafeHash::new(state.finish()) | |
179 | } | |
180 | ||
181 | // `replace` casts a `*HashUint` to a `*SafeHash`. Since we statically | |
182 | // ensure that a `FullBucket` points to an index with a non-zero hash, | |
183 | // and a `SafeHash` is just a `HashUint` with a different name, this is | |
184 | // safe. | |
185 | // | |
186 | // This test ensures that a `SafeHash` really IS the same size as a | |
187 | // `HashUint`. If you need to change the size of `SafeHash` (and | |
188 | // consequently made this test fail), `replace` needs to be | |
189 | // modified to no longer assume this. | |
190 | #[test] | |
191 | fn can_alias_safehash_as_hash() { | |
192 | assert_eq!(size_of::<SafeHash>(), size_of::<HashUint>()) | |
193 | } | |
194 | ||
195 | impl<K, V> RawBucket<K, V> { | |
196 | unsafe fn offset(self, count: isize) -> RawBucket<K, V> { | |
197 | RawBucket { | |
198 | hash: self.hash.offset(count), | |
199 | pair: self.pair.offset(count), | |
200 | _marker: marker::PhantomData, | |
201 | } | |
202 | } | |
203 | } | |
204 | ||
205 | // Buckets hold references to the table. | |
206 | impl<K, V, M> FullBucket<K, V, M> { | |
207 | /// Borrow a reference to the table. | |
208 | pub fn table(&self) -> &M { | |
209 | &self.table | |
210 | } | |
211 | /// Move out the reference to the table. | |
212 | pub fn into_table(self) -> M { | |
213 | self.table | |
214 | } | |
215 | /// Get the raw index. | |
216 | pub fn index(&self) -> usize { | |
217 | self.idx | |
218 | } | |
219 | } | |
220 | ||
221 | impl<K, V, M> EmptyBucket<K, V, M> { | |
222 | /// Borrow a reference to the table. | |
223 | pub fn table(&self) -> &M { | |
224 | &self.table | |
225 | } | |
226 | } | |
227 | ||
228 | impl<K, V, M> Bucket<K, V, M> { | |
229 | /// Get the raw index. | |
230 | pub fn index(&self) -> usize { | |
231 | self.idx | |
232 | } | |
233 | } | |
234 | ||
235 | impl<K, V, M> Deref for FullBucket<K, V, M> | |
236 | where M: Deref<Target = RawTable<K, V>> | |
237 | { | |
238 | type Target = RawTable<K, V>; | |
239 | fn deref(&self) -> &RawTable<K, V> { | |
240 | &self.table | |
241 | } | |
242 | } | |
243 | ||
244 | /// `Put` is implemented for types which provide access to a table and cannot be invalidated | |
245 | /// by filling a bucket. A similar implementation for `Take` is possible. | |
246 | pub trait Put<K, V> { | |
247 | unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V>; | |
248 | } | |
249 | ||
250 | ||
251 | impl<'t, K, V> Put<K, V> for &'t mut RawTable<K, V> { | |
252 | unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> { | |
253 | *self | |
254 | } | |
255 | } | |
256 | ||
257 | impl<K, V, M> Put<K, V> for Bucket<K, V, M> | |
258 | where M: Put<K, V> | |
259 | { | |
260 | unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> { | |
261 | self.table.borrow_table_mut() | |
262 | } | |
263 | } | |
264 | ||
265 | impl<K, V, M> Put<K, V> for FullBucket<K, V, M> | |
266 | where M: Put<K, V> | |
267 | { | |
268 | unsafe fn borrow_table_mut(&mut self) -> &mut RawTable<K, V> { | |
269 | self.table.borrow_table_mut() | |
270 | } | |
271 | } | |
272 | ||
273 | impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> { | |
274 | pub fn new(table: M, hash: SafeHash) -> Bucket<K, V, M> { | |
275 | Bucket::at_index(table, hash.inspect() as usize) | |
276 | } | |
277 | ||
278 | pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> { | |
279 | // if capacity is 0, then the RawBucket will be populated with bogus pointers. | |
280 | // This is an uncommon case though, so avoid it in release builds. | |
281 | debug_assert!(table.capacity() > 0, | |
282 | "Table should have capacity at this point"); | |
283 | let ib_index = ib_index & (table.capacity() - 1); | |
284 | Bucket { | |
285 | raw: unsafe { table.first_bucket_raw().offset(ib_index as isize) }, | |
286 | idx: ib_index, | |
287 | table: table, | |
288 | } | |
289 | } | |
290 | ||
291 | pub fn first(table: M) -> Bucket<K, V, M> { | |
292 | Bucket { | |
293 | raw: table.first_bucket_raw(), | |
294 | idx: 0, | |
295 | table: table, | |
296 | } | |
297 | } | |
298 | ||
299 | /// Reads a bucket at a given index, returning an enum indicating whether | |
300 | /// it's initialized or not. You need to match on this enum to get | |
301 | /// the appropriate types to call most of the other functions in | |
302 | /// this module. | |
303 | pub fn peek(self) -> BucketState<K, V, M> { | |
304 | match unsafe { *self.raw.hash } { | |
305 | EMPTY_BUCKET => { | |
306 | Empty(EmptyBucket { | |
307 | raw: self.raw, | |
308 | idx: self.idx, | |
309 | table: self.table, | |
310 | }) | |
311 | } | |
312 | _ => { | |
313 | Full(FullBucket { | |
314 | raw: self.raw, | |
315 | idx: self.idx, | |
316 | table: self.table, | |
317 | }) | |
318 | } | |
319 | } | |
320 | } | |
321 | ||
322 | /// Modifies the bucket pointer in place to make it point to the next slot. | |
323 | pub fn next(&mut self) { | |
324 | self.idx += 1; | |
325 | let range = self.table.capacity(); | |
326 | // This code is branchless thanks to a conditional move. | |
327 | let dist = if self.idx & (range - 1) == 0 { | |
328 | 1 - range as isize | |
329 | } else { | |
330 | 1 | |
331 | }; | |
332 | unsafe { | |
333 | self.raw = self.raw.offset(dist); | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> { | |
339 | #[inline] | |
340 | pub fn next(self) -> Bucket<K, V, M> { | |
341 | let mut bucket = self.into_bucket(); | |
342 | bucket.next(); | |
343 | bucket | |
344 | } | |
345 | ||
346 | #[inline] | |
347 | pub fn into_bucket(self) -> Bucket<K, V, M> { | |
348 | Bucket { | |
349 | raw: self.raw, | |
350 | idx: self.idx, | |
351 | table: self.table, | |
352 | } | |
353 | } | |
354 | ||
355 | pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> { | |
356 | let gap = EmptyBucket { | |
357 | raw: self.raw, | |
358 | idx: self.idx, | |
359 | table: (), | |
360 | }; | |
361 | ||
362 | match self.next().peek() { | |
363 | Full(bucket) => { | |
364 | Some(GapThenFull { | |
365 | gap: gap, | |
366 | full: bucket, | |
367 | }) | |
368 | } | |
369 | Empty(..) => None, | |
370 | } | |
371 | } | |
372 | } | |
373 | ||
374 | impl<K, V, M> EmptyBucket<K, V, M> | |
375 | where M: Put<K, V> | |
376 | { | |
377 | /// Puts given key and value pair, along with the key's hash, | |
378 | /// into this bucket in the hashtable. Note how `self` is 'moved' into | |
379 | /// this function, because this slot will no longer be empty when | |
380 | /// we return! A `FullBucket` is returned for later use, pointing to | |
381 | /// the newly-filled slot in the hashtable. | |
382 | /// | |
383 | /// Use `make_hash` to construct a `SafeHash` to pass to this function. | |
384 | pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket<K, V, M> { | |
385 | unsafe { | |
386 | *self.raw.hash = hash.inspect(); | |
387 | ptr::write(self.raw.pair as *mut (K, V), (key, value)); | |
388 | ||
389 | self.table.borrow_table_mut().size += 1; | |
390 | } | |
391 | ||
392 | FullBucket { | |
393 | raw: self.raw, | |
394 | idx: self.idx, | |
395 | table: self.table, | |
396 | } | |
397 | } | |
398 | } | |
399 | ||
400 | impl<K, V, M: Deref<Target = RawTable<K, V>>> FullBucket<K, V, M> { | |
401 | #[inline] | |
402 | pub fn next(self) -> Bucket<K, V, M> { | |
403 | let mut bucket = self.into_bucket(); | |
404 | bucket.next(); | |
405 | bucket | |
406 | } | |
407 | ||
408 | #[inline] | |
409 | pub fn into_bucket(self) -> Bucket<K, V, M> { | |
410 | Bucket { | |
411 | raw: self.raw, | |
412 | idx: self.idx, | |
413 | table: self.table, | |
414 | } | |
415 | } | |
416 | ||
417 | /// Duplicates the current position. This can be useful for operations | |
418 | /// on two or more buckets. | |
419 | pub fn stash(self) -> FullBucket<K, V, Self> { | |
420 | FullBucket { | |
421 | raw: self.raw, | |
422 | idx: self.idx, | |
423 | table: self, | |
424 | } | |
425 | } | |
426 | ||
427 | /// Get the distance between this bucket and the 'ideal' location | |
428 | /// as determined by the key's hash stored in it. | |
429 | /// | |
430 | /// In the cited blog posts above, this is called the "distance to | |
431 | /// initial bucket", or DIB. Also known as "probe count". | |
432 | pub fn displacement(&self) -> usize { | |
433 | // Calculates the distance one has to travel when going from | |
434 | // `hash mod capacity` onwards to `idx mod capacity`, wrapping around | |
435 | // if the destination is not reached before the end of the table. | |
436 | (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1) | |
437 | } | |
438 | ||
439 | #[inline] | |
440 | pub fn hash(&self) -> SafeHash { | |
441 | unsafe { SafeHash { hash: *self.raw.hash } } | |
442 | } | |
443 | ||
444 | /// Gets references to the key and value at a given index. | |
445 | pub fn read(&self) -> (&K, &V) { | |
446 | unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) } | |
447 | } | |
448 | } | |
449 | ||
450 | // We take a mutable reference to the table instead of accepting anything that | |
451 | // implements `DerefMut` to prevent fn `take` from being called on `stash`ed | |
452 | // buckets. | |
453 | impl<'t, K, V> FullBucket<K, V, &'t mut RawTable<K, V>> { | |
454 | /// Removes this bucket's key and value from the hashtable. | |
455 | /// | |
456 | /// This works similarly to `put`, building an `EmptyBucket` out of the | |
457 | /// taken bucket. | |
458 | pub fn take(mut self) -> (EmptyBucket<K, V, &'t mut RawTable<K, V>>, K, V) { | |
459 | self.table.size -= 1; | |
460 | ||
461 | unsafe { | |
462 | *self.raw.hash = EMPTY_BUCKET; | |
463 | let (k, v) = ptr::read(self.raw.pair); | |
464 | (EmptyBucket { | |
465 | raw: self.raw, | |
466 | idx: self.idx, | |
467 | table: self.table, | |
468 | }, | |
469 | k, | |
470 | v) | |
471 | } | |
472 | } | |
473 | } | |
474 | ||
475 | // This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases | |
476 | // where `M` is a full bucket or table reference type with mutable access to the table. | |
477 | impl<K, V, M> FullBucket<K, V, M> | |
478 | where M: Put<K, V> | |
479 | { | |
480 | pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { | |
481 | unsafe { | |
482 | let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); | |
483 | let (old_key, old_val) = ptr::replace(self.raw.pair as *mut (K, V), (k, v)); | |
484 | ||
485 | (old_hash, old_key, old_val) | |
486 | } | |
487 | } | |
488 | } | |
489 | ||
490 | impl<K, V, M> FullBucket<K, V, M> | |
491 | where M: Deref<Target = RawTable<K, V>> + DerefMut | |
492 | { | |
493 | /// Gets mutable references to the key and value at a given index. | |
494 | pub fn read_mut(&mut self) -> (&mut K, &mut V) { | |
495 | let pair_mut = self.raw.pair as *mut (K, V); | |
496 | unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) } | |
497 | } | |
498 | } | |
499 | ||
500 | impl<'t, K, V, M> FullBucket<K, V, M> | |
501 | where M: Deref<Target = RawTable<K, V>> + 't | |
502 | { | |
503 | /// Exchange a bucket state for immutable references into the table. | |
504 | /// Because the underlying reference to the table is also consumed, | |
505 | /// no further changes to the structure of the table are possible; | |
506 | /// in exchange for this, the returned references have a longer lifetime | |
507 | /// than the references returned by `read()`. | |
508 | pub fn into_refs(self) -> (&'t K, &'t V) { | |
509 | unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) } | |
510 | } | |
511 | } | |
512 | ||
513 | impl<'t, K, V, M> FullBucket<K, V, M> | |
514 | where M: Deref<Target = RawTable<K, V>> + DerefMut + 't | |
515 | { | |
516 | /// This works similarly to `into_refs`, exchanging a bucket state | |
517 | /// for mutable references into the table. | |
518 | pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { | |
519 | let pair_mut = self.raw.pair as *mut (K, V); | |
520 | unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) } | |
521 | } | |
522 | } | |
523 | ||
524 | impl<K, V, M> GapThenFull<K, V, M> | |
525 | where M: Deref<Target = RawTable<K, V>> | |
526 | { | |
527 | #[inline] | |
528 | pub fn full(&self) -> &FullBucket<K, V, M> { | |
529 | &self.full | |
530 | } | |
531 | ||
532 | pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> { | |
533 | unsafe { | |
534 | *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); | |
535 | ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1); | |
536 | } | |
537 | ||
538 | let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; | |
539 | ||
540 | match self.full.next().peek() { | |
541 | Full(bucket) => { | |
542 | self.gap.raw = prev_raw; | |
543 | self.gap.idx = prev_idx; | |
544 | ||
545 | self.full = bucket; | |
546 | ||
547 | Some(self) | |
548 | } | |
549 | Empty(..) => None, | |
550 | } | |
551 | } | |
552 | } | |
553 | ||
554 | ||
555 | /// Rounds up to a multiple of a power of two. Returns the closest multiple | |
556 | /// of `target_alignment` that is higher or equal to `unrounded`. | |
557 | /// | |
558 | /// # Panics | |
559 | /// | |
560 | /// Panics if `target_alignment` is not a power of two. | |
561 | #[inline] | |
562 | fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { | |
563 | assert!(target_alignment.is_power_of_two()); | |
564 | (unrounded + target_alignment - 1) & !(target_alignment - 1) | |
565 | } | |
566 | ||
567 | #[test] | |
568 | fn test_rounding() { | |
569 | assert_eq!(round_up_to_next(0, 4), 0); | |
570 | assert_eq!(round_up_to_next(1, 4), 4); | |
571 | assert_eq!(round_up_to_next(2, 4), 4); | |
572 | assert_eq!(round_up_to_next(3, 4), 4); | |
573 | assert_eq!(round_up_to_next(4, 4), 4); | |
574 | assert_eq!(round_up_to_next(5, 4), 8); | |
575 | } | |
576 | ||
577 | // Returns a tuple of (pairs_offset, end_of_pairs_offset), | |
578 | // from the start of a mallocated array. | |
579 | #[inline] | |
580 | fn calculate_offsets(hashes_size: usize, | |
581 | pairs_size: usize, | |
582 | pairs_align: usize) | |
583 | -> (usize, usize, bool) { | |
584 | let pairs_offset = round_up_to_next(hashes_size, pairs_align); | |
585 | let (end_of_pairs, oflo) = pairs_offset.overflowing_add(pairs_size); | |
586 | ||
587 | (pairs_offset, end_of_pairs, oflo) | |
588 | } | |
589 | ||
590 | // Returns a tuple of (minimum required malloc alignment, hash_offset, | |
591 | // array_size), from the start of a mallocated array. | |
592 | fn calculate_allocation(hash_size: usize, | |
593 | hash_align: usize, | |
594 | pairs_size: usize, | |
595 | pairs_align: usize) | |
596 | -> (usize, usize, usize, bool) { | |
597 | let hash_offset = 0; | |
598 | let (_, end_of_pairs, oflo) = calculate_offsets(hash_size, pairs_size, pairs_align); | |
599 | ||
600 | let align = cmp::max(hash_align, pairs_align); | |
601 | ||
602 | (align, hash_offset, end_of_pairs, oflo) | |
603 | } | |
604 | ||
605 | #[test] | |
606 | fn test_offset_calculation() { | |
607 | assert_eq!(calculate_allocation(128, 8, 16, 8), (8, 0, 144, false)); | |
608 | assert_eq!(calculate_allocation(3, 1, 2, 1), (1, 0, 5, false)); | |
609 | assert_eq!(calculate_allocation(6, 2, 12, 4), (4, 0, 20, false)); | |
610 | assert_eq!(calculate_offsets(128, 15, 4), (128, 143, false)); | |
611 | assert_eq!(calculate_offsets(3, 2, 4), (4, 6, false)); | |
612 | assert_eq!(calculate_offsets(6, 12, 4), (8, 20, false)); | |
613 | } | |
614 | ||
615 | impl<K, V> RawTable<K, V> { | |
616 | /// Does not initialize the buckets. The caller should ensure they, | |
617 | /// at the very least, set every hash to EMPTY_BUCKET. | |
618 | unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> { | |
619 | if capacity == 0 { | |
620 | return RawTable { | |
621 | size: 0, | |
622 | capacity: 0, | |
623 | hashes: Unique::new(EMPTY as *mut HashUint), | |
624 | marker: marker::PhantomData, | |
625 | }; | |
626 | } | |
627 | ||
628 | // No need for `checked_mul` before a more restrictive check performed | |
629 | // later in this method. | |
630 | let hashes_size = capacity.wrapping_mul(size_of::<HashUint>()); | |
631 | let pairs_size = capacity.wrapping_mul(size_of::<(K, V)>()); | |
632 | ||
633 | // Allocating hashmaps is a little tricky. We need to allocate two | |
634 | // arrays, but since we know their sizes and alignments up front, | |
635 | // we just allocate a single array, and then have the subarrays | |
636 | // point into it. | |
637 | // | |
638 | // This is great in theory, but in practice getting the alignment | |
639 | // right is a little subtle. Therefore, calculating offsets has been | |
640 | // factored out into a different function. | |
641 | let (alignment, hash_offset, size, oflo) = calculate_allocation(hashes_size, | |
642 | align_of::<HashUint>(), | |
643 | pairs_size, | |
644 | align_of::<(K, V)>()); | |
645 | assert!(!oflo, "capacity overflow"); | |
646 | ||
647 | // One check for overflow that covers calculation and rounding of size. | |
648 | let size_of_bucket = size_of::<HashUint>().checked_add(size_of::<(K, V)>()).unwrap(); | |
649 | assert!(size >= | |
650 | capacity.checked_mul(size_of_bucket) | |
651 | .expect("capacity overflow"), | |
652 | "capacity overflow"); | |
653 | ||
654 | let buffer = allocate(size, alignment); | |
655 | if buffer.is_null() { | |
656 | ::alloc::oom() | |
657 | } | |
658 | ||
659 | let hashes = buffer.offset(hash_offset as isize) as *mut HashUint; | |
660 | ||
661 | RawTable { | |
662 | capacity: capacity, | |
663 | size: 0, | |
664 | hashes: Unique::new(hashes), | |
665 | marker: marker::PhantomData, | |
666 | } | |
667 | } | |
668 | ||
669 | fn first_bucket_raw(&self) -> RawBucket<K, V> { | |
670 | let hashes_size = self.capacity * size_of::<HashUint>(); | |
671 | let pairs_size = self.capacity * size_of::<(K, V)>(); | |
672 | ||
673 | let buffer = *self.hashes as *mut u8; | |
674 | let (pairs_offset, _, oflo) = | |
675 | calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>()); | |
676 | debug_assert!(!oflo, "capacity overflow"); | |
677 | unsafe { | |
678 | RawBucket { | |
679 | hash: *self.hashes, | |
680 | pair: buffer.offset(pairs_offset as isize) as *const _, | |
681 | _marker: marker::PhantomData, | |
682 | } | |
683 | } | |
684 | } | |
685 | ||
686 | /// Creates a new raw table from a given capacity. All buckets are | |
687 | /// initially empty. | |
688 | pub fn new(capacity: usize) -> RawTable<K, V> { | |
689 | unsafe { | |
690 | let ret = RawTable::new_uninitialized(capacity); | |
691 | ptr::write_bytes(*ret.hashes, 0, capacity); | |
692 | ret | |
693 | } | |
694 | } | |
695 | ||
696 | /// The hashtable's capacity, similar to a vector's. | |
697 | pub fn capacity(&self) -> usize { | |
698 | self.capacity | |
699 | } | |
700 | ||
701 | /// The number of elements ever `put` in the hashtable, minus the number | |
702 | /// of elements ever `take`n. | |
703 | pub fn size(&self) -> usize { | |
704 | self.size | |
705 | } | |
706 | ||
707 | fn raw_buckets(&self) -> RawBuckets<K, V> { | |
708 | RawBuckets { | |
709 | raw: self.first_bucket_raw(), | |
710 | hashes_end: unsafe { self.hashes.offset(self.capacity as isize) }, | |
711 | marker: marker::PhantomData, | |
712 | } | |
713 | } | |
714 | ||
715 | pub fn iter(&self) -> Iter<K, V> { | |
716 | Iter { | |
717 | iter: self.raw_buckets(), | |
718 | elems_left: self.size(), | |
719 | } | |
720 | } | |
721 | ||
722 | pub fn iter_mut(&mut self) -> IterMut<K, V> { | |
723 | IterMut { | |
724 | iter: self.raw_buckets(), | |
725 | elems_left: self.size(), | |
726 | _marker: marker::PhantomData, | |
727 | } | |
728 | } | |
729 | ||
730 | pub fn into_iter(self) -> IntoIter<K, V> { | |
731 | let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); | |
732 | // Replace the marker regardless of lifetime bounds on parameters. | |
733 | IntoIter { | |
734 | iter: RawBuckets { | |
735 | raw: raw, | |
736 | hashes_end: hashes_end, | |
737 | marker: marker::PhantomData, | |
738 | }, | |
739 | table: self, | |
740 | } | |
741 | } | |
742 | ||
743 | pub fn drain(&mut self) -> Drain<K, V> { | |
744 | let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); | |
745 | // Replace the marker regardless of lifetime bounds on parameters. | |
746 | Drain { | |
747 | iter: RawBuckets { | |
748 | raw: raw, | |
749 | hashes_end: hashes_end, | |
750 | marker: marker::PhantomData, | |
751 | }, | |
752 | table: unsafe { Shared::new(self) }, | |
753 | marker: marker::PhantomData, | |
754 | } | |
755 | } | |
756 | ||
757 | /// Returns an iterator that copies out each entry. Used while the table | |
758 | /// is being dropped. | |
759 | unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> { | |
760 | let raw_bucket = self.first_bucket_raw(); | |
761 | RevMoveBuckets { | |
762 | raw: raw_bucket.offset(self.capacity as isize), | |
763 | hashes_end: raw_bucket.hash, | |
764 | elems_left: self.size, | |
765 | marker: marker::PhantomData, | |
766 | } | |
767 | } | |
768 | } | |
769 | ||
770 | /// A raw iterator. The basis for some other iterators in this module. Although | |
771 | /// this interface is safe, it's not used outside this module. | |
772 | struct RawBuckets<'a, K, V> { | |
773 | raw: RawBucket<K, V>, | |
774 | hashes_end: *mut HashUint, | |
775 | ||
776 | // Strictly speaking, this should be &'a (K,V), but that would | |
777 | // require that K:'a, and we often use RawBuckets<'static...> for | |
778 | // move iterations, so that messes up a lot of other things. So | |
779 | // just use `&'a (K,V)` as this is not a publicly exposed type | |
780 | // anyway. | |
781 | marker: marker::PhantomData<&'a ()>, | |
782 | } | |
783 | ||
784 | // FIXME(#19839) Remove in favor of `#[derive(Clone)]` | |
785 | impl<'a, K, V> Clone for RawBuckets<'a, K, V> { | |
786 | fn clone(&self) -> RawBuckets<'a, K, V> { | |
787 | RawBuckets { | |
788 | raw: self.raw, | |
789 | hashes_end: self.hashes_end, | |
790 | marker: marker::PhantomData, | |
791 | } | |
792 | } | |
793 | } | |
794 | ||
795 | ||
796 | impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { | |
797 | type Item = RawBucket<K, V>; | |
798 | ||
799 | fn next(&mut self) -> Option<RawBucket<K, V>> { | |
800 | while self.raw.hash != self.hashes_end { | |
801 | unsafe { | |
802 | // We are swapping out the pointer to a bucket and replacing | |
803 | // it with the pointer to the next one. | |
804 | let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); | |
805 | if *prev.hash != EMPTY_BUCKET { | |
806 | return Some(prev); | |
807 | } | |
808 | } | |
809 | } | |
810 | ||
811 | None | |
812 | } | |
813 | } | |
814 | ||
815 | /// An iterator that moves out buckets in reverse order. It leaves the table | |
816 | /// in an inconsistent state and should only be used for dropping | |
817 | /// the table's remaining entries. It's used in the implementation of Drop. | |
818 | struct RevMoveBuckets<'a, K, V> { | |
819 | raw: RawBucket<K, V>, | |
820 | hashes_end: *mut HashUint, | |
821 | elems_left: usize, | |
822 | ||
823 | // As above, `&'a (K,V)` would seem better, but we often use | |
824 | // 'static for the lifetime, and this is not a publicly exposed | |
825 | // type. | |
826 | marker: marker::PhantomData<&'a ()>, | |
827 | } | |
828 | ||
829 | impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> { | |
830 | type Item = (K, V); | |
831 | ||
832 | fn next(&mut self) -> Option<(K, V)> { | |
833 | if self.elems_left == 0 { | |
834 | return None; | |
835 | } | |
836 | ||
837 | loop { | |
838 | debug_assert!(self.raw.hash != self.hashes_end); | |
839 | ||
840 | unsafe { | |
841 | self.raw = self.raw.offset(-1); | |
842 | ||
843 | if *self.raw.hash != EMPTY_BUCKET { | |
844 | self.elems_left -= 1; | |
845 | return Some(ptr::read(self.raw.pair)); | |
846 | } | |
847 | } | |
848 | } | |
849 | } | |
850 | } | |
851 | ||
852 | /// Iterator over shared references to entries in a table. | |
853 | pub struct Iter<'a, K: 'a, V: 'a> { | |
854 | iter: RawBuckets<'a, K, V>, | |
855 | elems_left: usize, | |
856 | } | |
857 | ||
858 | unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {} | |
859 | unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {} | |
860 | ||
861 | // FIXME(#19839) Remove in favor of `#[derive(Clone)]` | |
862 | impl<'a, K, V> Clone for Iter<'a, K, V> { | |
863 | fn clone(&self) -> Iter<'a, K, V> { | |
864 | Iter { | |
865 | iter: self.iter.clone(), | |
866 | elems_left: self.elems_left, | |
867 | } | |
868 | } | |
869 | } | |
870 | ||
871 | ||
872 | /// Iterator over mutable references to entries in a table. | |
873 | pub struct IterMut<'a, K: 'a, V: 'a> { | |
874 | iter: RawBuckets<'a, K, V>, | |
875 | elems_left: usize, | |
876 | // To ensure invariance with respect to V | |
877 | _marker: marker::PhantomData<&'a mut V>, | |
878 | } | |
879 | ||
880 | unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {} | |
881 | // Both K: Sync and K: Send are correct for IterMut's Send impl, | |
882 | // but Send is the more useful bound | |
883 | unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {} | |
884 | ||
885 | /// Iterator over the entries in a table, consuming the table. | |
886 | pub struct IntoIter<K, V> { | |
887 | table: RawTable<K, V>, | |
888 | iter: RawBuckets<'static, K, V>, | |
889 | } | |
890 | ||
891 | unsafe impl<K: Sync, V: Sync> Sync for IntoIter<K, V> {} | |
892 | unsafe impl<K: Send, V: Send> Send for IntoIter<K, V> {} | |
893 | ||
894 | /// Iterator over the entries in a table, clearing the table. | |
895 | pub struct Drain<'a, K: 'a, V: 'a> { | |
896 | table: Shared<RawTable<K, V>>, | |
897 | iter: RawBuckets<'static, K, V>, | |
898 | marker: marker::PhantomData<&'a RawTable<K, V>>, | |
899 | } | |
900 | ||
901 | unsafe impl<'a, K: Sync, V: Sync> Sync for Drain<'a, K, V> {} | |
902 | unsafe impl<'a, K: Send, V: Send> Send for Drain<'a, K, V> {} | |
903 | ||
904 | impl<'a, K, V> Iterator for Iter<'a, K, V> { | |
905 | type Item = (&'a K, &'a V); | |
906 | ||
907 | fn next(&mut self) -> Option<(&'a K, &'a V)> { | |
908 | self.iter.next().map(|bucket| { | |
909 | self.elems_left -= 1; | |
910 | unsafe { (&(*bucket.pair).0, &(*bucket.pair).1) } | |
911 | }) | |
912 | } | |
913 | ||
914 | fn size_hint(&self) -> (usize, Option<usize>) { | |
915 | (self.elems_left, Some(self.elems_left)) | |
916 | } | |
917 | } | |
918 | impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { | |
919 | fn len(&self) -> usize { | |
920 | self.elems_left | |
921 | } | |
922 | } | |
923 | ||
924 | impl<'a, K, V> Iterator for IterMut<'a, K, V> { | |
925 | type Item = (&'a K, &'a mut V); | |
926 | ||
927 | fn next(&mut self) -> Option<(&'a K, &'a mut V)> { | |
928 | self.iter.next().map(|bucket| { | |
929 | self.elems_left -= 1; | |
930 | let pair_mut = bucket.pair as *mut (K, V); | |
931 | unsafe { (&(*pair_mut).0, &mut (*pair_mut).1) } | |
932 | }) | |
933 | } | |
934 | ||
935 | fn size_hint(&self) -> (usize, Option<usize>) { | |
936 | (self.elems_left, Some(self.elems_left)) | |
937 | } | |
938 | } | |
939 | impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { | |
940 | fn len(&self) -> usize { | |
941 | self.elems_left | |
942 | } | |
943 | } | |
944 | ||
945 | impl<K, V> Iterator for IntoIter<K, V> { | |
946 | type Item = (SafeHash, K, V); | |
947 | ||
948 | fn next(&mut self) -> Option<(SafeHash, K, V)> { | |
949 | self.iter.next().map(|bucket| { | |
950 | self.table.size -= 1; | |
951 | unsafe { | |
952 | let (k, v) = ptr::read(bucket.pair); | |
953 | (SafeHash { hash: *bucket.hash }, k, v) | |
954 | } | |
955 | }) | |
956 | } | |
957 | ||
958 | fn size_hint(&self) -> (usize, Option<usize>) { | |
959 | let size = self.table.size(); | |
960 | (size, Some(size)) | |
961 | } | |
962 | } | |
963 | impl<K, V> ExactSizeIterator for IntoIter<K, V> { | |
964 | fn len(&self) -> usize { | |
965 | self.table.size() | |
966 | } | |
967 | } | |
968 | ||
969 | impl<'a, K, V> Iterator for Drain<'a, K, V> { | |
970 | type Item = (SafeHash, K, V); | |
971 | ||
972 | #[inline] | |
973 | fn next(&mut self) -> Option<(SafeHash, K, V)> { | |
974 | self.iter.next().map(|bucket| { | |
975 | unsafe { | |
976 | (**self.table).size -= 1; | |
977 | let (k, v) = ptr::read(bucket.pair); | |
978 | (SafeHash { hash: ptr::replace(bucket.hash, EMPTY_BUCKET) }, k, v) | |
979 | } | |
980 | }) | |
981 | } | |
982 | ||
983 | fn size_hint(&self) -> (usize, Option<usize>) { | |
984 | let size = unsafe { (**self.table).size() }; | |
985 | (size, Some(size)) | |
986 | } | |
987 | } | |
988 | impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { | |
989 | fn len(&self) -> usize { | |
990 | unsafe { (**self.table).size() } | |
991 | } | |
992 | } | |
993 | ||
994 | impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { | |
995 | fn drop(&mut self) { | |
996 | for _ in self {} | |
997 | } | |
998 | } | |
999 | ||
1000 | impl<K: Clone, V: Clone> Clone for RawTable<K, V> { | |
1001 | fn clone(&self) -> RawTable<K, V> { | |
1002 | unsafe { | |
1003 | let mut new_ht = RawTable::new_uninitialized(self.capacity()); | |
1004 | ||
1005 | { | |
1006 | let cap = self.capacity(); | |
1007 | let mut new_buckets = Bucket::first(&mut new_ht); | |
1008 | let mut buckets = Bucket::first(self); | |
1009 | while buckets.index() != cap { | |
1010 | match buckets.peek() { | |
1011 | Full(full) => { | |
1012 | let (h, k, v) = { | |
1013 | let (k, v) = full.read(); | |
1014 | (full.hash(), k.clone(), v.clone()) | |
1015 | }; | |
1016 | *new_buckets.raw.hash = h.inspect(); | |
1017 | ptr::write(new_buckets.raw.pair as *mut (K, V), (k, v)); | |
1018 | } | |
1019 | Empty(..) => { | |
1020 | *new_buckets.raw.hash = EMPTY_BUCKET; | |
1021 | } | |
1022 | } | |
1023 | new_buckets.next(); | |
1024 | buckets.next(); | |
1025 | } | |
1026 | }; | |
1027 | ||
1028 | new_ht.size = self.size(); | |
1029 | ||
1030 | new_ht | |
1031 | } | |
1032 | } | |
1033 | } | |
1034 | ||
1035 | impl<K, V> Drop for RawTable<K, V> { | |
1036 | #[unsafe_destructor_blind_to_params] | |
1037 | fn drop(&mut self) { | |
1038 | if self.capacity == 0 { | |
1039 | return; | |
1040 | } | |
1041 | ||
1042 | // This is done in reverse because we've likely partially taken | |
1043 | // some elements out with `.into_iter()` from the front. | |
1044 | // Check if the size is 0, so we don't do a useless scan when | |
1045 | // dropping empty tables such as on resize. | |
1046 | // Also avoid double drop of elements that have been already moved out. | |
1047 | unsafe { | |
1048 | if needs_drop::<(K, V)>() { | |
1049 | // avoid linear runtime for types that don't need drop | |
1050 | for _ in self.rev_move_buckets() {} | |
1051 | } | |
1052 | } | |
1053 | ||
1054 | let hashes_size = self.capacity * size_of::<HashUint>(); | |
1055 | let pairs_size = self.capacity * size_of::<(K, V)>(); | |
1056 | let (align, _, size, oflo) = calculate_allocation(hashes_size, | |
1057 | align_of::<HashUint>(), | |
1058 | pairs_size, | |
1059 | align_of::<(K, V)>()); | |
1060 | ||
1061 | debug_assert!(!oflo, "should be impossible"); | |
1062 | ||
1063 | unsafe { | |
1064 | deallocate(*self.hashes as *mut u8, size, align); | |
1065 | // Remember how everything was allocated out of one buffer | |
1066 | // during initialization? We only need one call to free here. | |
1067 | } | |
1068 | } | |
1069 | } |