]>
Commit | Line | Data |
---|---|---|
48663c56 XL |
1 | use super::bitmask::BitMask; |
2 | use super::EMPTY; | |
3 | use core::mem; | |
4 | ||
5 | #[cfg(target_arch = "x86")] | |
6 | use core::arch::x86; | |
7 | #[cfg(target_arch = "x86_64")] | |
8 | use core::arch::x86_64 as x86; | |
9 | ||
10 | pub type BitMaskWord = u16; | |
11 | pub const BITMASK_STRIDE: usize = 1; | |
12 | pub const BITMASK_MASK: BitMaskWord = 0xffff; | |
13 | ||
14 | /// Abstraction over a group of control bytes which can be scanned in | |
15 | /// parallel. | |
16 | /// | |
17 | /// This implementation uses a 128-bit SSE value. | |
18 | #[derive(Copy, Clone)] | |
19 | pub struct Group(x86::__m128i); | |
20 | ||
21 | // FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 | |
e74abb32 | 22 | #[allow(clippy::use_self)] |
48663c56 XL |
23 | impl Group { |
24 | /// Number of bytes in the group. | |
25 | pub const WIDTH: usize = mem::size_of::<Self>(); | |
26 | ||
27 | /// Returns a full group of empty bytes, suitable for use as the initial | |
1b1a35ee | 28 | /// value for an empty hash table. |
48663c56 XL |
29 | /// |
30 | /// This is guaranteed to be aligned to the group size. | |
5099ac24 | 31 | #[inline] |
6a06907d | 32 | #[allow(clippy::items_after_statements)] |
1b1a35ee XL |
33 | pub const fn static_empty() -> &'static [u8; Group::WIDTH] { |
34 | #[repr(C)] | |
35 | struct AlignedBytes { | |
36 | _align: [Group; 0], | |
48663c56 | 37 | bytes: [u8; Group::WIDTH], |
6a06907d | 38 | } |
1b1a35ee XL |
39 | const ALIGNED_BYTES: AlignedBytes = AlignedBytes { |
40 | _align: [], | |
48663c56 XL |
41 | bytes: [EMPTY; Group::WIDTH], |
42 | }; | |
1b1a35ee | 43 | &ALIGNED_BYTES.bytes |
48663c56 XL |
44 | } |
45 | ||
46 | /// Loads a group of bytes starting at the given address. | |
47 | #[inline] | |
48 | #[allow(clippy::cast_ptr_alignment)] // unaligned load | |
49 | pub unsafe fn load(ptr: *const u8) -> Self { | |
6a06907d | 50 | Group(x86::_mm_loadu_si128(ptr.cast())) |
48663c56 XL |
51 | } |
52 | ||
53 | /// Loads a group of bytes starting at the given address, which must be | |
54 | /// aligned to `mem::align_of::<Group>()`. | |
55 | #[inline] | |
56 | #[allow(clippy::cast_ptr_alignment)] | |
57 | pub unsafe fn load_aligned(ptr: *const u8) -> Self { | |
58 | // FIXME: use align_offset once it stabilizes | |
59 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); | |
6a06907d | 60 | Group(x86::_mm_load_si128(ptr.cast())) |
48663c56 XL |
61 | } |
62 | ||
63 | /// Stores the group of bytes to the given address, which must be | |
64 | /// aligned to `mem::align_of::<Group>()`. | |
65 | #[inline] | |
66 | #[allow(clippy::cast_ptr_alignment)] | |
67 | pub unsafe fn store_aligned(self, ptr: *mut u8) { | |
68 | // FIXME: use align_offset once it stabilizes | |
69 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); | |
6a06907d | 70 | x86::_mm_store_si128(ptr.cast(), self.0); |
48663c56 XL |
71 | } |
72 | ||
73 | /// Returns a `BitMask` indicating all bytes in the group which have | |
74 | /// the given value. | |
75 | #[inline] | |
76 | pub fn match_byte(self, byte: u8) -> BitMask { | |
77 | #[allow( | |
78 | clippy::cast_possible_wrap, // byte: u8 as i8 | |
79 | // byte: i32 as u16 | |
80 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the | |
81 | // upper 16-bits of the i32 are zeroed: | |
82 | clippy::cast_sign_loss, | |
83 | clippy::cast_possible_truncation | |
84 | )] | |
85 | unsafe { | |
86 | let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(byte as i8)); | |
87 | BitMask(x86::_mm_movemask_epi8(cmp) as u16) | |
88 | } | |
89 | } | |
90 | ||
91 | /// Returns a `BitMask` indicating all bytes in the group which are | |
92 | /// `EMPTY`. | |
93 | #[inline] | |
94 | pub fn match_empty(self) -> BitMask { | |
95 | self.match_byte(EMPTY) | |
96 | } | |
97 | ||
98 | /// Returns a `BitMask` indicating all bytes in the group which are | |
99 | /// `EMPTY` or `DELETED`. | |
100 | #[inline] | |
101 | pub fn match_empty_or_deleted(self) -> BitMask { | |
102 | #[allow( | |
103 | // byte: i32 as u16 | |
104 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the | |
105 | // upper 16-bits of the i32 are zeroed: | |
106 | clippy::cast_sign_loss, | |
107 | clippy::cast_possible_truncation | |
108 | )] | |
109 | unsafe { | |
110 | // A byte is EMPTY or DELETED iff the high bit is set | |
111 | BitMask(x86::_mm_movemask_epi8(self.0) as u16) | |
112 | } | |
113 | } | |
114 | ||
115 | /// Returns a `BitMask` indicating all bytes in the group which are full. | |
116 | #[inline] | |
117 | pub fn match_full(&self) -> BitMask { | |
118 | self.match_empty_or_deleted().invert() | |
119 | } | |
120 | ||
121 | /// Performs the following transformation on all bytes in the group: | |
122 | /// - `EMPTY => EMPTY` | |
123 | /// - `DELETED => EMPTY` | |
124 | /// - `FULL => DELETED` | |
125 | #[inline] | |
126 | pub fn convert_special_to_empty_and_full_to_deleted(self) -> Self { | |
127 | // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 | |
128 | // and high_bit = 0 (FULL) to 1000_0000 | |
129 | // | |
130 | // Here's this logic expanded to concrete values: | |
131 | // let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false) | |
132 | // 1111_1111 | 1000_0000 = 1111_1111 | |
133 | // 0000_0000 | 1000_0000 = 1000_0000 | |
134 | #[allow( | |
135 | clippy::cast_possible_wrap, // byte: 0x80_u8 as i8 | |
136 | )] | |
137 | unsafe { | |
138 | let zero = x86::_mm_setzero_si128(); | |
139 | let special = x86::_mm_cmpgt_epi8(zero, self.0); | |
e74abb32 XL |
140 | Group(x86::_mm_or_si128( |
141 | special, | |
142 | x86::_mm_set1_epi8(0x80_u8 as i8), | |
143 | )) | |
48663c56 XL |
144 | } |
145 | } | |
146 | } |