]>
Commit | Line | Data |
---|---|---|
48663c56 XL |
1 | use super::bitmask::BitMask; |
2 | use super::EMPTY; | |
3 | use core::{mem, ptr}; | |
4 | ||
5 | // Use the native word size as the group size. Using a 64-bit group size on | |
6 | // a 32-bit architecture will just end up being more expensive because | |
7 | // shifts and multiplies will need to be emulated. | |
8 | #[cfg(any( | |
9 | target_pointer_width = "64", | |
10 | target_arch = "aarch64", | |
11 | target_arch = "x86_64", | |
12 | ))] | |
13 | type GroupWord = u64; | |
14 | #[cfg(all( | |
15 | target_pointer_width = "32", | |
16 | not(target_arch = "aarch64"), | |
17 | not(target_arch = "x86_64"), | |
18 | ))] | |
19 | type GroupWord = u32; | |
20 | ||
21 | pub type BitMaskWord = GroupWord; | |
22 | pub const BITMASK_STRIDE: usize = 8; | |
23 | // We only care about the highest bit of each byte for the mask. | |
e74abb32 | 24 | #[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)] |
48663c56 XL |
25 | pub const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080_u64 as GroupWord; |
26 | ||
27 | /// Helper function to replicate a byte across a `GroupWord`. | |
28 | #[inline] | |
29 | fn repeat(byte: u8) -> GroupWord { | |
3dfed10e | 30 | GroupWord::from_ne_bytes([byte; Group::WIDTH]) |
48663c56 XL |
31 | } |
32 | ||
33 | /// Abstraction over a group of control bytes which can be scanned in | |
34 | /// parallel. | |
35 | /// | |
36 | /// This implementation uses a word-sized integer. | |
37 | #[derive(Copy, Clone)] | |
38 | pub struct Group(GroupWord); | |
39 | ||
40 | // We perform all operations in the native endianess, and convert to | |
41 | // little-endian just before creating a BitMask. The can potentially | |
42 | // enable the compiler to eliminate unnecessary byte swaps if we are | |
43 | // only checking whether a BitMask is empty. | |
44 | #[allow(clippy::use_self)] | |
45 | impl Group { | |
46 | /// Number of bytes in the group. | |
47 | pub const WIDTH: usize = mem::size_of::<Self>(); | |
48 | ||
49 | /// Returns a full group of empty bytes, suitable for use as the initial | |
1b1a35ee | 50 | /// value for an empty hash table. |
48663c56 XL |
51 | /// |
52 | /// This is guaranteed to be aligned to the group size. | |
1b1a35ee XL |
53 | pub const fn static_empty() -> &'static [u8; Group::WIDTH] { |
54 | #[repr(C)] | |
55 | struct AlignedBytes { | |
56 | _align: [Group; 0], | |
48663c56 | 57 | bytes: [u8; Group::WIDTH], |
6a06907d | 58 | } |
1b1a35ee XL |
59 | const ALIGNED_BYTES: AlignedBytes = AlignedBytes { |
60 | _align: [], | |
48663c56 XL |
61 | bytes: [EMPTY; Group::WIDTH], |
62 | }; | |
1b1a35ee | 63 | &ALIGNED_BYTES.bytes |
48663c56 XL |
64 | } |
65 | ||
66 | /// Loads a group of bytes starting at the given address. | |
67 | #[inline] | |
68 | #[allow(clippy::cast_ptr_alignment)] // unaligned load | |
69 | pub unsafe fn load(ptr: *const u8) -> Self { | |
6a06907d | 70 | Group(ptr::read_unaligned(ptr.cast())) |
48663c56 XL |
71 | } |
72 | ||
73 | /// Loads a group of bytes starting at the given address, which must be | |
74 | /// aligned to `mem::align_of::<Group>()`. | |
75 | #[inline] | |
76 | #[allow(clippy::cast_ptr_alignment)] | |
77 | pub unsafe fn load_aligned(ptr: *const u8) -> Self { | |
78 | // FIXME: use align_offset once it stabilizes | |
79 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); | |
6a06907d | 80 | Group(ptr::read(ptr.cast())) |
48663c56 XL |
81 | } |
82 | ||
83 | /// Stores the group of bytes to the given address, which must be | |
84 | /// aligned to `mem::align_of::<Group>()`. | |
85 | #[inline] | |
86 | #[allow(clippy::cast_ptr_alignment)] | |
87 | pub unsafe fn store_aligned(self, ptr: *mut u8) { | |
88 | // FIXME: use align_offset once it stabilizes | |
89 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); | |
6a06907d | 90 | ptr::write(ptr.cast(), self.0); |
48663c56 XL |
91 | } |
92 | ||
93 | /// Returns a `BitMask` indicating all bytes in the group which *may* | |
94 | /// have the given value. | |
95 | /// | |
96 | /// This function may return a false positive in certain cases where | |
97 | /// the byte in the group differs from the searched value only in its | |
98 | /// lowest bit. This is fine because: | |
99 | /// - This never happens for `EMPTY` and `DELETED`, only full entries. | |
100 | /// - The check for key equality will catch these. | |
101 | /// - This only happens if there is at least 1 true match. | |
102 | /// - The chance of this happening is very low (< 1% chance per byte). | |
103 | #[inline] | |
104 | pub fn match_byte(self, byte: u8) -> BitMask { | |
105 | // This algorithm is derived from | |
106 | // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord | |
107 | let cmp = self.0 ^ repeat(byte); | |
108 | BitMask((cmp.wrapping_sub(repeat(0x01)) & !cmp & repeat(0x80)).to_le()) | |
109 | } | |
110 | ||
111 | /// Returns a `BitMask` indicating all bytes in the group which are | |
112 | /// `EMPTY`. | |
113 | #[inline] | |
114 | pub fn match_empty(self) -> BitMask { | |
115 | // If the high bit is set, then the byte must be either: | |
116 | // 1111_1111 (EMPTY) or 1000_0000 (DELETED). | |
117 | // So we can just check if the top two bits are 1 by ANDing them. | |
118 | BitMask((self.0 & (self.0 << 1) & repeat(0x80)).to_le()) | |
119 | } | |
120 | ||
121 | /// Returns a `BitMask` indicating all bytes in the group which are | |
122 | /// `EMPTY` or `DELETED`. | |
123 | #[inline] | |
124 | pub fn match_empty_or_deleted(self) -> BitMask { | |
125 | // A byte is EMPTY or DELETED iff the high bit is set | |
126 | BitMask((self.0 & repeat(0x80)).to_le()) | |
127 | } | |
128 | ||
129 | /// Returns a `BitMask` indicating all bytes in the group which are full. | |
130 | #[inline] | |
3dfed10e | 131 | pub fn match_full(self) -> BitMask { |
48663c56 XL |
132 | self.match_empty_or_deleted().invert() |
133 | } | |
134 | ||
135 | /// Performs the following transformation on all bytes in the group: | |
136 | /// - `EMPTY => EMPTY` | |
137 | /// - `DELETED => EMPTY` | |
138 | /// - `FULL => DELETED` | |
139 | #[inline] | |
140 | pub fn convert_special_to_empty_and_full_to_deleted(self) -> Self { | |
141 | // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 | |
142 | // and high_bit = 0 (FULL) to 1000_0000 | |
143 | // | |
144 | // Here's this logic expanded to concrete values: | |
145 | // let full = 1000_0000 (true) or 0000_0000 (false) | |
146 | // !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry) | |
147 | // !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry) | |
148 | let full = !self.0 & repeat(0x80); | |
149 | Group(!full + (full >> 7)) | |
150 | } | |
151 | } |