]>
Commit | Line | Data |
---|---|---|
48663c56 XL |
1 | use super::bitmask::BitMask; |
2 | use super::EMPTY; | |
3 | use core::mem; | |
4 | ||
5 | #[cfg(target_arch = "x86")] | |
6 | use core::arch::x86; | |
7 | #[cfg(target_arch = "x86_64")] | |
8 | use core::arch::x86_64 as x86; | |
9 | ||
10 | pub type BitMaskWord = u16; | |
11 | pub const BITMASK_STRIDE: usize = 1; | |
12 | pub const BITMASK_MASK: BitMaskWord = 0xffff; | |
13 | ||
14 | /// Abstraction over a group of control bytes which can be scanned in | |
15 | /// parallel. | |
16 | /// | |
17 | /// This implementation uses a 128-bit SSE value. | |
18 | #[derive(Copy, Clone)] | |
19 | pub struct Group(x86::__m128i); | |
20 | ||
21 | // FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 | |
e74abb32 | 22 | #[allow(clippy::use_self)] |
48663c56 XL |
23 | impl Group { |
24 | /// Number of bytes in the group. | |
25 | pub const WIDTH: usize = mem::size_of::<Self>(); | |
26 | ||
27 | /// Returns a full group of empty bytes, suitable for use as the initial | |
1b1a35ee | 28 | /// value for an empty hash table. |
48663c56 XL |
29 | /// |
30 | /// This is guaranteed to be aligned to the group size. | |
1b1a35ee XL |
31 | pub const fn static_empty() -> &'static [u8; Group::WIDTH] { |
32 | #[repr(C)] | |
33 | struct AlignedBytes { | |
34 | _align: [Group; 0], | |
48663c56 XL |
35 | bytes: [u8; Group::WIDTH], |
36 | }; | |
1b1a35ee XL |
37 | const ALIGNED_BYTES: AlignedBytes = AlignedBytes { |
38 | _align: [], | |
48663c56 XL |
39 | bytes: [EMPTY; Group::WIDTH], |
40 | }; | |
1b1a35ee | 41 | &ALIGNED_BYTES.bytes |
48663c56 XL |
42 | } |
43 | ||
44 | /// Loads a group of bytes starting at the given address. | |
45 | #[inline] | |
46 | #[allow(clippy::cast_ptr_alignment)] // unaligned load | |
47 | pub unsafe fn load(ptr: *const u8) -> Self { | |
48 | Group(x86::_mm_loadu_si128(ptr as *const _)) | |
49 | } | |
50 | ||
51 | /// Loads a group of bytes starting at the given address, which must be | |
52 | /// aligned to `mem::align_of::<Group>()`. | |
53 | #[inline] | |
54 | #[allow(clippy::cast_ptr_alignment)] | |
55 | pub unsafe fn load_aligned(ptr: *const u8) -> Self { | |
56 | // FIXME: use align_offset once it stabilizes | |
57 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); | |
58 | Group(x86::_mm_load_si128(ptr as *const _)) | |
59 | } | |
60 | ||
61 | /// Stores the group of bytes to the given address, which must be | |
62 | /// aligned to `mem::align_of::<Group>()`. | |
63 | #[inline] | |
64 | #[allow(clippy::cast_ptr_alignment)] | |
65 | pub unsafe fn store_aligned(self, ptr: *mut u8) { | |
66 | // FIXME: use align_offset once it stabilizes | |
67 | debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0); | |
68 | x86::_mm_store_si128(ptr as *mut _, self.0); | |
69 | } | |
70 | ||
71 | /// Returns a `BitMask` indicating all bytes in the group which have | |
72 | /// the given value. | |
73 | #[inline] | |
74 | pub fn match_byte(self, byte: u8) -> BitMask { | |
75 | #[allow( | |
76 | clippy::cast_possible_wrap, // byte: u8 as i8 | |
77 | // byte: i32 as u16 | |
78 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the | |
79 | // upper 16-bits of the i32 are zeroed: | |
80 | clippy::cast_sign_loss, | |
81 | clippy::cast_possible_truncation | |
82 | )] | |
83 | unsafe { | |
84 | let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(byte as i8)); | |
85 | BitMask(x86::_mm_movemask_epi8(cmp) as u16) | |
86 | } | |
87 | } | |
88 | ||
89 | /// Returns a `BitMask` indicating all bytes in the group which are | |
90 | /// `EMPTY`. | |
91 | #[inline] | |
92 | pub fn match_empty(self) -> BitMask { | |
93 | self.match_byte(EMPTY) | |
94 | } | |
95 | ||
96 | /// Returns a `BitMask` indicating all bytes in the group which are | |
97 | /// `EMPTY` or `DELETED`. | |
98 | #[inline] | |
99 | pub fn match_empty_or_deleted(self) -> BitMask { | |
100 | #[allow( | |
101 | // byte: i32 as u16 | |
102 | // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the | |
103 | // upper 16-bits of the i32 are zeroed: | |
104 | clippy::cast_sign_loss, | |
105 | clippy::cast_possible_truncation | |
106 | )] | |
107 | unsafe { | |
108 | // A byte is EMPTY or DELETED iff the high bit is set | |
109 | BitMask(x86::_mm_movemask_epi8(self.0) as u16) | |
110 | } | |
111 | } | |
112 | ||
113 | /// Returns a `BitMask` indicating all bytes in the group which are full. | |
114 | #[inline] | |
115 | pub fn match_full(&self) -> BitMask { | |
116 | self.match_empty_or_deleted().invert() | |
117 | } | |
118 | ||
119 | /// Performs the following transformation on all bytes in the group: | |
120 | /// - `EMPTY => EMPTY` | |
121 | /// - `DELETED => EMPTY` | |
122 | /// - `FULL => DELETED` | |
123 | #[inline] | |
124 | pub fn convert_special_to_empty_and_full_to_deleted(self) -> Self { | |
125 | // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 | |
126 | // and high_bit = 0 (FULL) to 1000_0000 | |
127 | // | |
128 | // Here's this logic expanded to concrete values: | |
129 | // let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false) | |
130 | // 1111_1111 | 1000_0000 = 1111_1111 | |
131 | // 0000_0000 | 1000_0000 = 1000_0000 | |
132 | #[allow( | |
133 | clippy::cast_possible_wrap, // byte: 0x80_u8 as i8 | |
134 | )] | |
135 | unsafe { | |
136 | let zero = x86::_mm_setzero_si128(); | |
137 | let special = x86::_mm_cmpgt_epi8(zero, self.0); | |
e74abb32 XL |
138 | Group(x86::_mm_or_si128( |
139 | special, | |
140 | x86::_mm_set1_epi8(0x80_u8 as i8), | |
141 | )) | |
48663c56 XL |
142 | } |
143 | } | |
144 | } |