]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/core_arch/src/arm/neon/mod.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / arm / neon / mod.rs
CommitLineData
ba9703b0
XL
1//! ARMv7 NEON intrinsics
2
3#[rustfmt::skip]
4mod generated;
5#[rustfmt::skip]
6pub use self::generated::*;
7
8use crate::{core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute, ptr};
9#[cfg(test)]
10use stdarch_test::assert_instr;
11
12types! {
13 /// ARM-specific 64-bit wide vector of eight packed `i8`.
14 pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8);
15 /// ARM-specific 64-bit wide vector of eight packed `u8`.
16 pub struct uint8x8_t(u8, u8, u8, u8, u8, u8, u8, u8);
17 /// ARM-specific 64-bit wide polynomial vector of eight packed `u8`.
18 pub struct poly8x8_t(u8, u8, u8, u8, u8, u8, u8, u8);
19 /// ARM-specific 64-bit wide vector of four packed `i16`.
20 pub struct int16x4_t(i16, i16, i16, i16);
21 /// ARM-specific 64-bit wide vector of four packed `u16`.
22 pub struct uint16x4_t(u16, u16, u16, u16);
23 // FIXME: ARM-specific 64-bit wide vector of four packed `f16`.
24 // pub struct float16x4_t(f16, f16, f16, f16);
25 /// ARM-specific 64-bit wide vector of four packed `u16`.
26 pub struct poly16x4_t(u16, u16, u16, u16);
27 /// ARM-specific 64-bit wide vector of two packed `i32`.
28 pub struct int32x2_t(i32, i32);
29 /// ARM-specific 64-bit wide vector of two packed `u32`.
30 pub struct uint32x2_t(u32, u32);
31 /// ARM-specific 64-bit wide vector of two packed `f32`.
32 pub struct float32x2_t(f32, f32);
33 /// ARM-specific 64-bit wide vector of one packed `i64`.
34 pub struct int64x1_t(i64);
35 /// ARM-specific 64-bit wide vector of one packed `u64`.
36 pub struct uint64x1_t(u64);
37
38 /// ARM-specific 128-bit wide vector of sixteen packed `i8`.
39 pub struct int8x16_t(
40 i8, i8 ,i8, i8, i8, i8 ,i8, i8,
41 i8, i8 ,i8, i8, i8, i8 ,i8, i8,
42 );
43 /// ARM-specific 128-bit wide vector of sixteen packed `u8`.
44 pub struct uint8x16_t(
45 u8, u8 ,u8, u8, u8, u8 ,u8, u8,
46 u8, u8 ,u8, u8, u8, u8 ,u8, u8,
47 );
48 /// ARM-specific 128-bit wide vector of sixteen packed `u8`.
49 pub struct poly8x16_t(
50 u8, u8, u8, u8, u8, u8, u8, u8,
51 u8, u8, u8, u8, u8, u8, u8, u8
52 );
53 /// ARM-specific 128-bit wide vector of eight packed `i16`.
54 pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16);
55 /// ARM-specific 128-bit wide vector of eight packed `u16`.
56 pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16);
57 // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`.
58 // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16);
59 /// ARM-specific 128-bit wide vector of eight packed `u16`.
60 pub struct poly16x8_t(u16, u16, u16, u16, u16, u16, u16, u16);
61 /// ARM-specific 128-bit wide vector of four packed `i32`.
62 pub struct int32x4_t(i32, i32, i32, i32);
63 /// ARM-specific 128-bit wide vector of four packed `u32`.
64 pub struct uint32x4_t(u32, u32, u32, u32);
65 /// ARM-specific 128-bit wide vector of four packed `f32`.
66 pub struct float32x4_t(f32, f32, f32, f32);
67 /// ARM-specific 128-bit wide vector of two packed `i64`.
68 pub struct int64x2_t(i64, i64);
69 /// ARM-specific 128-bit wide vector of two packed `u64`.
70 pub struct uint64x2_t(u64, u64);
71}
72
73/// ARM-specific type containing two `int8x8_t` vectors.
74#[derive(Copy, Clone)]
75pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t);
76/// ARM-specific type containing three `int8x8_t` vectors.
77#[derive(Copy, Clone)]
78pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t);
79/// ARM-specific type containing four `int8x8_t` vectors.
80#[derive(Copy, Clone)]
81pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t);
82
83/// ARM-specific type containing two `uint8x8_t` vectors.
84#[derive(Copy, Clone)]
85pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t);
86/// ARM-specific type containing three `uint8x8_t` vectors.
87#[derive(Copy, Clone)]
88pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t);
89/// ARM-specific type containing four `uint8x8_t` vectors.
90#[derive(Copy, Clone)]
91pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t);
92
93/// ARM-specific type containing two `poly8x8_t` vectors.
94#[derive(Copy, Clone)]
95pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t);
96/// ARM-specific type containing three `poly8x8_t` vectors.
97#[derive(Copy, Clone)]
98pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t);
99/// ARM-specific type containing four `poly8x8_t` vectors.
100#[derive(Copy, Clone)]
101pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t);
102
103#[allow(improper_ctypes)]
104extern "C" {
3dfed10e
XL
105 // absolute value (64-bit)
106 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
107 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i8")]
108 fn vabs_s8_(a: int8x8_t) -> int8x8_t;
109 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")]
110 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i16")]
111 fn vabs_s16_(a: int16x4_t) -> int16x4_t;
112 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")]
113 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v2i32")]
114 fn vabs_s32_(a: int32x2_t) -> int32x2_t;
115 // absolute value (128-bit)
116 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")]
117 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v16i8")]
118 fn vabsq_s8_(a: int8x16_t) -> int8x16_t;
119 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")]
120 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i16")]
121 fn vabsq_s16_(a: int16x8_t) -> int16x8_t;
122 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")]
123 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i32")]
124 fn vabsq_s32_(a: int32x4_t) -> int32x4_t;
125
ba9703b0
XL
126 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
127 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
128 fn frsqrte_v2f32(a: float32x2_t) -> float32x2_t;
129
130 //uint32x2_t vqmovn_u64 (uint64x2_t a)
131 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
132 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v2i32")]
133 fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t;
134
135 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
136 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")]
137 fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
138 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
139 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")]
140 fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
141 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
142 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")]
143 fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
144 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
145 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")]
146 fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
147 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
148 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")]
149 fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
150 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
151 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")]
152 fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
153 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
154 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")]
155 fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
156
157 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
158 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")]
159 fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
160 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
161 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")]
162 fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
163 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
164 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")]
165 fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
166 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
167 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")]
168 fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
169 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
170 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")]
171 fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
172 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
173 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")]
174 fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
175 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
176 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")]
177 fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
1b1a35ee
XL
178
179 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
180 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")]
181 fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
182 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
183 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v2i32")]
184 fn vpadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
185 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
186 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v8i8")]
187 fn vpadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
188 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v16i8")]
189 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v16i8")]
190 fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
191
192 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
193 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")]
194 fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
195 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
196 #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")]
197 fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
ba9703b0
XL
198}
199
200#[cfg(target_arch = "arm")]
201#[allow(improper_ctypes)]
202extern "C" {
203 #[link_name = "llvm.arm.neon.vtbl1"]
204 fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t;
205 #[link_name = "llvm.arm.neon.vtbl2"]
206 fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t;
207 #[link_name = "llvm.arm.neon.vtbl3"]
208 fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
209 #[link_name = "llvm.arm.neon.vtbl4"]
210 fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
211
212 #[link_name = "llvm.arm.neon.vtbx1"]
213 fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t;
214 #[link_name = "llvm.arm.neon.vtbx2"]
215 fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
216 #[link_name = "llvm.arm.neon.vtbx3"]
217 fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
218 #[link_name = "llvm.arm.neon.vtbx4"]
219 fn vtbx4(
220 a: int8x8_t,
221 b: int8x8_t,
222 b: int8x8_t,
223 c: int8x8_t,
224 d: int8x8_t,
225 e: int8x8_t,
226 ) -> int8x8_t;
1b1a35ee
XL
227 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32.p0i8")]
228 fn vld1q_v4f32(addr: *const u8, align: u32) -> float32x4_t;
229 #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32.p0i8")]
230 fn vld1q_v4i32(addr: *const u8, align: u32) -> int32x4_t;
ba9703b0
XL
231}
232
3dfed10e
XL
233/// Absolute value (wrapping).
234#[inline]
235#[target_feature(enable = "neon")]
236#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
237#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
238#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
239pub unsafe fn vabs_s8(a: int8x8_t) -> int8x8_t {
240 vabs_s8_(a)
241}
242/// Absolute value (wrapping).
243#[inline]
244#[target_feature(enable = "neon")]
245#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
246#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
247#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
248pub unsafe fn vabs_s16(a: int16x4_t) -> int16x4_t {
249 vabs_s16_(a)
250}
251/// Absolute value (wrapping).
252#[inline]
253#[target_feature(enable = "neon")]
254#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
255#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
256#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
257pub unsafe fn vabs_s32(a: int32x2_t) -> int32x2_t {
258 vabs_s32_(a)
259}
260/// Absolute value (wrapping).
261#[inline]
262#[target_feature(enable = "neon")]
263#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
264#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
265#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
266pub unsafe fn vabsq_s8(a: int8x16_t) -> int8x16_t {
267 vabsq_s8_(a)
268}
269/// Absolute value (wrapping).
270#[inline]
271#[target_feature(enable = "neon")]
272#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
273#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
274#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
275pub unsafe fn vabsq_s16(a: int16x8_t) -> int16x8_t {
276 vabsq_s16_(a)
277}
278/// Absolute value (wrapping).
279#[inline]
280#[target_feature(enable = "neon")]
281#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
282#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
283#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))]
284pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t {
285 vabsq_s32_(a)
286}
287
1b1a35ee
XL
288/// Add pairwise.
289#[inline]
290#[target_feature(enable = "neon")]
291#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
292#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
293#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
294pub unsafe fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
295 vpadd_s16_(a, b)
296}
297/// Add pairwise.
298#[inline]
299#[target_feature(enable = "neon")]
300#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
301#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
302#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
303pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
304 vpadd_s32_(a, b)
305}
306/// Add pairwise.
307#[inline]
308#[target_feature(enable = "neon")]
309#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
310#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
311#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
312pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
313 vpadd_s8_(a, b)
314}
315/// Add pairwise.
316#[inline]
317#[target_feature(enable = "neon")]
318#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
319#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
320#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
321pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
322 transmute(vpadd_s16_(transmute(a), transmute(b)))
323}
324/// Add pairwise.
325#[inline]
326#[target_feature(enable = "neon")]
327#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
328#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
329#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
330pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
331 transmute(vpadd_s32_(transmute(a), transmute(b)))
332}
333/// Add pairwise.
334#[inline]
335#[target_feature(enable = "neon")]
336#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
337#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
338#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
339pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
340 transmute(vpadd_s8_(transmute(a), transmute(b)))
341}
342
ba9703b0
XL
343/// Unsigned saturating extract narrow.
344#[inline]
345#[target_feature(enable = "neon")]
346#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
347#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn.u64))]
348#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
349pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
350 vqmovn_u64_(a)
351}
352
353/// Vector add.
354#[inline]
355#[target_feature(enable = "neon")]
356#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
357#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
358#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
359pub unsafe fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
360 simd_add(a, b)
361}
362
363/// Vector add.
364#[inline]
365#[target_feature(enable = "neon")]
366#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
367#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
368#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
369pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
370 simd_add(a, b)
371}
372
373/// Vector add.
374#[inline]
375#[target_feature(enable = "neon")]
376#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
377#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
378#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
379pub unsafe fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
380 simd_add(a, b)
381}
382
383/// Vector add.
384#[inline]
385#[target_feature(enable = "neon")]
386#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
387#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
388#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
389pub unsafe fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
390 simd_add(a, b)
391}
392
393/// Vector add.
394#[inline]
395#[target_feature(enable = "neon")]
396#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
397#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
398#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
399pub unsafe fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
400 simd_add(a, b)
401}
402
403/// Vector add.
404#[inline]
405#[target_feature(enable = "neon")]
406#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
407#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
408#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
409pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
410 simd_add(a, b)
411}
412
413/// Vector add.
414#[inline]
415#[target_feature(enable = "neon")]
416#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
417#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
418#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
419pub unsafe fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
420 simd_add(a, b)
421}
422
423/// Vector add.
424#[inline]
425#[target_feature(enable = "neon")]
426#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
427#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
428#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
429pub unsafe fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
430 simd_add(a, b)
431}
432
433/// Vector add.
434#[inline]
435#[target_feature(enable = "neon")]
436#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
437#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
438#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
439pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
440 simd_add(a, b)
441}
442
443/// Vector add.
444#[inline]
445#[target_feature(enable = "neon")]
446#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
447#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
448#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
449pub unsafe fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
450 simd_add(a, b)
451}
452
453/// Vector add.
454#[inline]
455#[target_feature(enable = "neon")]
456#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
457#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
458#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
459pub unsafe fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
460 simd_add(a, b)
461}
462
463/// Vector add.
464#[inline]
465#[target_feature(enable = "neon")]
466#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
467#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
468#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
469pub unsafe fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
470 simd_add(a, b)
471}
472
473/// Vector add.
474#[inline]
475#[target_feature(enable = "neon")]
476#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
477#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
478#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
479pub unsafe fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
480 simd_add(a, b)
481}
482
483/// Vector add.
484#[inline]
485#[target_feature(enable = "neon")]
486#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
487#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
488#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))]
489pub unsafe fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
490 simd_add(a, b)
491}
492
493/// Vector add.
494#[inline]
495#[target_feature(enable = "neon")]
496#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
497#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
498#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))]
499pub unsafe fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
500 simd_add(a, b)
501}
502
503/// Vector add.
504#[inline]
505#[target_feature(enable = "neon")]
506#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
507#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))]
508#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))]
509pub unsafe fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
510 simd_add(a, b)
511}
512
513/// Vector long add.
514#[inline]
515#[target_feature(enable = "neon")]
516#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
517#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
518#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))]
519pub unsafe fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
520 let a: int16x8_t = simd_cast(a);
521 let b: int16x8_t = simd_cast(b);
522 simd_add(a, b)
523}
524
525/// Vector long add.
526#[inline]
527#[target_feature(enable = "neon")]
528#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
529#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
530#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))]
531pub unsafe fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
532 let a: int32x4_t = simd_cast(a);
533 let b: int32x4_t = simd_cast(b);
534 simd_add(a, b)
535}
536
537/// Vector long add.
538#[inline]
539#[target_feature(enable = "neon")]
540#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
541#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
542#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))]
543pub unsafe fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
544 let a: int64x2_t = simd_cast(a);
545 let b: int64x2_t = simd_cast(b);
546 simd_add(a, b)
547}
548
549/// Vector long add.
550#[inline]
551#[target_feature(enable = "neon")]
552#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
553#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
554#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))]
555pub unsafe fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
556 let a: uint16x8_t = simd_cast(a);
557 let b: uint16x8_t = simd_cast(b);
558 simd_add(a, b)
559}
560
561/// Vector long add.
562#[inline]
563#[target_feature(enable = "neon")]
564#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
565#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
566#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))]
567pub unsafe fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
568 let a: uint32x4_t = simd_cast(a);
569 let b: uint32x4_t = simd_cast(b);
570 simd_add(a, b)
571}
572
573/// Vector long add.
574#[inline]
575#[target_feature(enable = "neon")]
576#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
577#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
578#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))]
579pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
580 let a: uint64x2_t = simd_cast(a);
581 let b: uint64x2_t = simd_cast(b);
582 simd_add(a, b)
583}
584
585/// Vector narrow integer.
586#[inline]
587#[target_feature(enable = "neon")]
588#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
589#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
590#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
591pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t {
592 simd_cast(a)
593}
594
595/// Vector narrow integer.
596#[inline]
597#[target_feature(enable = "neon")]
598#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
599#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
600#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
601pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t {
602 simd_cast(a)
603}
604
605/// Vector narrow integer.
606#[inline]
607#[target_feature(enable = "neon")]
608#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
609#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
610#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
611pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t {
612 simd_cast(a)
613}
614
615/// Vector narrow integer.
616#[inline]
617#[target_feature(enable = "neon")]
618#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
619#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
620#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
621pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
622 simd_cast(a)
623}
624
625/// Vector narrow integer.
626#[inline]
627#[target_feature(enable = "neon")]
628#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
629#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
630#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
631pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
632 simd_cast(a)
633}
634
635/// Vector narrow integer.
636#[inline]
637#[target_feature(enable = "neon")]
638#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
639#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
640#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))]
641pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
642 simd_cast(a)
643}
644
645/// Vector long move.
646#[inline]
647#[target_feature(enable = "neon")]
648#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
649#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
650#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
651pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t {
652 simd_cast(a)
653}
654
655/// Vector long move.
656#[inline]
657#[target_feature(enable = "neon")]
658#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
659#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
660#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
661pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t {
662 simd_cast(a)
663}
664
665/// Vector long move.
666#[inline]
667#[target_feature(enable = "neon")]
668#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
669#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
670#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
671pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t {
672 simd_cast(a)
673}
674
675/// Vector long move.
676#[inline]
677#[target_feature(enable = "neon")]
678#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
679#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
680#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
681pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
682 simd_cast(a)
683}
684
685/// Vector long move.
686#[inline]
687#[target_feature(enable = "neon")]
688#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
689#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
690#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
691pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
692 simd_cast(a)
693}
694
695/// Vector long move.
696#[inline]
697#[target_feature(enable = "neon")]
698#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
699#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
700#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
701pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
702 simd_cast(a)
703}
704
705/// Reciprocal square-root estimate.
706#[inline]
707#[target_feature(enable = "neon")]
708#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))]
709#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
710pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
711 frsqrte_v2f32(a)
712}
713
714/// Vector bitwise not.
715#[inline]
716#[target_feature(enable = "neon")]
717#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
718#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
719#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
720pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t {
721 let b = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1);
722 simd_xor(a, b)
723}
724
725/// Vector bitwise not.
726#[inline]
727#[target_feature(enable = "neon")]
728#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
729#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
730#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
731pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
732 let b = int8x16_t(
733 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
734 );
735 simd_xor(a, b)
736}
737
738/// Vector bitwise not.
739#[inline]
740#[target_feature(enable = "neon")]
741#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
742#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
743#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
744pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t {
745 let b = int16x4_t(-1, -1, -1, -1);
746 simd_xor(a, b)
747}
748
749/// Vector bitwise not.
750#[inline]
751#[target_feature(enable = "neon")]
752#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
753#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
754#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
755pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
756 let b = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1);
757 simd_xor(a, b)
758}
759
760/// Vector bitwise not.
761#[inline]
762#[target_feature(enable = "neon")]
763#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
764#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
765#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
766pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t {
767 let b = int32x2_t(-1, -1);
768 simd_xor(a, b)
769}
770
771/// Vector bitwise not.
772#[inline]
773#[target_feature(enable = "neon")]
774#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
775#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
776#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
777pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
778 let b = int32x4_t(-1, -1, -1, -1);
779 simd_xor(a, b)
780}
781
782/// Vector bitwise not.
783#[inline]
784#[target_feature(enable = "neon")]
785#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
786#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
787#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
788pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
789 let b = uint8x8_t(255, 255, 255, 255, 255, 255, 255, 255);
790 simd_xor(a, b)
791}
792
793/// Vector bitwise not.
794#[inline]
795#[target_feature(enable = "neon")]
796#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
797#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
798#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
799pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
800 let b = uint8x16_t(
801 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
802 );
803 simd_xor(a, b)
804}
805
806/// Vector bitwise not.
807#[inline]
808#[target_feature(enable = "neon")]
809#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
810#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
811#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
812pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
813 let b = uint16x4_t(65_535, 65_535, 65_535, 65_535);
814 simd_xor(a, b)
815}
816
817/// Vector bitwise not.
818#[inline]
819#[target_feature(enable = "neon")]
820#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
821#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
822#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
823pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
824 let b = uint16x8_t(
825 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535,
826 );
827 simd_xor(a, b)
828}
829
830/// Vector bitwise not.
831#[inline]
832#[target_feature(enable = "neon")]
833#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
834#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
835#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
836pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
837 let b = uint32x2_t(4_294_967_295, 4_294_967_295);
838 simd_xor(a, b)
839}
840
841/// Vector bitwise not.
842#[inline]
843#[target_feature(enable = "neon")]
844#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
845#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
846#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
847pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
848 let b = uint32x4_t(4_294_967_295, 4_294_967_295, 4_294_967_295, 4_294_967_295);
849 simd_xor(a, b)
850}
851
852/// Vector bitwise not.
853#[inline]
854#[target_feature(enable = "neon")]
855#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
856#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
857#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
858pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
859 let b = poly8x8_t(255, 255, 255, 255, 255, 255, 255, 255);
860 simd_xor(a, b)
861}
862
863/// Vector bitwise not.
864#[inline]
865#[target_feature(enable = "neon")]
866#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
867#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
868#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))]
869pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
870 let b = poly8x16_t(
871 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
872 );
873 simd_xor(a, b)
874}
875
876/// Folding minimum of adjacent pairs
877#[inline]
878#[target_feature(enable = "neon")]
879#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
880#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
881#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))]
882pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
883 vpmins_v8i8(a, b)
884}
885
886/// Folding minimum of adjacent pairs
887#[inline]
888#[target_feature(enable = "neon")]
889#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
890#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
891#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))]
892pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
893 vpmins_v4i16(a, b)
894}
895
896/// Folding minimum of adjacent pairs
897#[inline]
898#[target_feature(enable = "neon")]
899#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
900#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
901#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))]
902pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
903 vpmins_v2i32(a, b)
904}
905
906/// Folding minimum of adjacent pairs
907#[inline]
908#[target_feature(enable = "neon")]
909#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
910#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
911#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))]
912pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
913 vpminu_v8i8(a, b)
914}
915
916/// Folding minimum of adjacent pairs
917#[inline]
918#[target_feature(enable = "neon")]
919#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
920#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
921#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))]
922pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
923 vpminu_v4i16(a, b)
924}
925
926/// Folding minimum of adjacent pairs
927#[inline]
928#[target_feature(enable = "neon")]
929#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
930#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
931#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))]
932pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
933 vpminu_v2i32(a, b)
934}
935
936/// Folding minimum of adjacent pairs
937#[inline]
938#[target_feature(enable = "neon")]
939#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
940#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
941#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminp))]
942pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
943 vpminf_v2f32(a, b)
944}
945
946/// Folding maximum of adjacent pairs
947#[inline]
948#[target_feature(enable = "neon")]
949#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
950#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
951#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))]
952pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
953 vpmaxs_v8i8(a, b)
954}
955
956/// Folding maximum of adjacent pairs
957#[inline]
958#[target_feature(enable = "neon")]
959#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
960#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
961#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))]
962pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
963 vpmaxs_v4i16(a, b)
964}
965
966/// Folding maximum of adjacent pairs
967#[inline]
968#[target_feature(enable = "neon")]
969#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
970#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
971#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))]
972pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
973 vpmaxs_v2i32(a, b)
974}
975
976/// Folding maximum of adjacent pairs
977#[inline]
978#[target_feature(enable = "neon")]
979#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
980#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
981#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))]
982pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
983 vpmaxu_v8i8(a, b)
984}
985
986/// Folding maximum of adjacent pairs
987#[inline]
988#[target_feature(enable = "neon")]
989#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
990#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
991#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))]
992pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
993 vpmaxu_v4i16(a, b)
994}
995
996/// Folding maximum of adjacent pairs
997#[inline]
998#[target_feature(enable = "neon")]
999#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1000#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
1001#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))]
1002pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
1003 vpmaxu_v2i32(a, b)
1004}
1005
1006/// Folding maximum of adjacent pairs
1007#[inline]
1008#[target_feature(enable = "neon")]
1009#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1010#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
1011#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxp))]
1012pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
1013 vpmaxf_v2f32(a, b)
1014}
1015
1016/// Table look-up
1017#[inline]
1018#[cfg(target_arch = "arm")]
1019#[cfg(target_endian = "little")]
1020#[target_feature(enable = "neon,v7")]
1021#[cfg_attr(test, assert_instr(vtbl))]
1022pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
1023 vtbl1(a, b)
1024}
1025
1026/// Table look-up
1027#[inline]
1028#[cfg(target_arch = "arm")]
1029#[cfg(target_endian = "little")]
1030#[target_feature(enable = "neon,v7")]
1031#[cfg_attr(test, assert_instr(vtbl))]
1032pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
1033 transmute(vtbl1(transmute(a), transmute(b)))
1034}
1035
1036/// Table look-up
1037#[inline]
1038#[cfg(target_arch = "arm")]
1039#[cfg(target_endian = "little")]
1040#[target_feature(enable = "neon,v7")]
1041#[cfg_attr(test, assert_instr(vtbl))]
1042pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
1043 transmute(vtbl1(transmute(a), transmute(b)))
1044}
1045
1046/// Table look-up
1047#[inline]
1048#[cfg(target_arch = "arm")]
1049#[cfg(target_endian = "little")]
1050#[target_feature(enable = "neon,v7")]
1051#[cfg_attr(test, assert_instr(vtbl))]
1052pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
1053 vtbl2(a.0, a.1, b)
1054}
1055
1056/// Table look-up
1057#[inline]
1058#[cfg(target_arch = "arm")]
1059#[cfg(target_endian = "little")]
1060#[target_feature(enable = "neon,v7")]
1061#[cfg_attr(test, assert_instr(vtbl))]
1062pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
1063 transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)))
1064}
1065
1066/// Table look-up
1067#[inline]
1068#[cfg(target_arch = "arm")]
1069#[cfg(target_endian = "little")]
1070#[target_feature(enable = "neon,v7")]
1071#[cfg_attr(test, assert_instr(vtbl))]
1072pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
1073 transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)))
1074}
1075
1076/// Table look-up
1077#[inline]
1078#[cfg(target_arch = "arm")]
1079#[cfg(target_endian = "little")]
1080#[target_feature(enable = "neon,v7")]
1081#[cfg_attr(test, assert_instr(vtbl))]
1082pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
1083 vtbl3(a.0, a.1, a.2, b)
1084}
1085
1086/// Table look-up
1087#[inline]
1088#[cfg(target_arch = "arm")]
1089#[cfg(target_endian = "little")]
1090#[target_feature(enable = "neon,v7")]
1091#[cfg_attr(test, assert_instr(vtbl))]
1092pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
1093 transmute(vtbl3(
1094 transmute(a.0),
1095 transmute(a.1),
1096 transmute(a.2),
1097 transmute(b),
1098 ))
1099}
1100
1101/// Table look-up
1102#[inline]
1103#[cfg(target_arch = "arm")]
1104#[cfg(target_endian = "little")]
1105#[target_feature(enable = "neon,v7")]
1106#[cfg_attr(test, assert_instr(vtbl))]
1107pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
1108 transmute(vtbl3(
1109 transmute(a.0),
1110 transmute(a.1),
1111 transmute(a.2),
1112 transmute(b),
1113 ))
1114}
1115
1116/// Table look-up
1117#[inline]
1118#[cfg(target_arch = "arm")]
1119#[cfg(target_endian = "little")]
1120#[target_feature(enable = "neon,v7")]
1121#[cfg_attr(test, assert_instr(vtbl))]
1122pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
1123 vtbl4(a.0, a.1, a.2, a.3, b)
1124}
1125
1126/// Table look-up
1127#[inline]
1128#[cfg(target_arch = "arm")]
1129#[cfg(target_endian = "little")]
1130#[target_feature(enable = "neon,v7")]
1131#[cfg_attr(test, assert_instr(vtbl))]
1132pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
1133 transmute(vtbl4(
1134 transmute(a.0),
1135 transmute(a.1),
1136 transmute(a.2),
1137 transmute(a.3),
1138 transmute(b),
1139 ))
1140}
1141
1142/// Table look-up
1143#[inline]
1144#[cfg(target_arch = "arm")]
1145#[cfg(target_endian = "little")]
1146#[target_feature(enable = "neon,v7")]
1147#[cfg_attr(test, assert_instr(vtbl))]
1148pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
1149 transmute(vtbl4(
1150 transmute(a.0),
1151 transmute(a.1),
1152 transmute(a.2),
1153 transmute(a.3),
1154 transmute(b),
1155 ))
1156}
1157
1158/// Extended table look-up
1159#[inline]
1160#[cfg(target_arch = "arm")]
1161#[cfg(target_endian = "little")]
1162#[target_feature(enable = "neon,v7")]
1163#[cfg_attr(test, assert_instr(vtbx))]
1164pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
1165 vtbx1(a, b, c)
1166}
1167
1168/// Extended table look-up
1169#[inline]
1170#[cfg(target_arch = "arm")]
1171#[cfg(target_endian = "little")]
1172#[target_feature(enable = "neon,v7")]
1173#[cfg_attr(test, assert_instr(vtbx))]
1174pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
1175 transmute(vtbx1(transmute(a), transmute(b), transmute(c)))
1176}
1177
1178/// Extended table look-up
1179#[inline]
1180#[cfg(target_arch = "arm")]
1181#[cfg(target_endian = "little")]
1182#[target_feature(enable = "neon,v7")]
1183#[cfg_attr(test, assert_instr(vtbx))]
1184pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
1185 transmute(vtbx1(transmute(a), transmute(b), transmute(c)))
1186}
1187
1188/// Extended table look-up
1189#[inline]
1190#[cfg(target_arch = "arm")]
1191#[cfg(target_endian = "little")]
1192#[target_feature(enable = "neon,v7")]
1193#[cfg_attr(test, assert_instr(vtbx))]
1194pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
1195 vtbx2(a, b.0, b.1, c)
1196}
1197
1198/// Extended table look-up
1199#[inline]
1200#[cfg(target_arch = "arm")]
1201#[cfg(target_endian = "little")]
1202#[target_feature(enable = "neon,v7")]
1203#[cfg_attr(test, assert_instr(vtbx))]
1204pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
1205 transmute(vtbx2(
1206 transmute(a),
1207 transmute(b.0),
1208 transmute(b.1),
1209 transmute(c),
1210 ))
1211}
1212
1213/// Extended table look-up
1214#[inline]
1215#[cfg(target_arch = "arm")]
1216#[cfg(target_endian = "little")]
1217#[target_feature(enable = "neon,v7")]
1218#[cfg_attr(test, assert_instr(vtbx))]
1219pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
1220 transmute(vtbx2(
1221 transmute(a),
1222 transmute(b.0),
1223 transmute(b.1),
1224 transmute(c),
1225 ))
1226}
1227
1228/// Extended table look-up
1229#[inline]
1230#[cfg(target_arch = "arm")]
1231#[cfg(target_endian = "little")]
1232#[target_feature(enable = "neon,v7")]
1233#[cfg_attr(test, assert_instr(vtbx))]
1234pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
1235 vtbx3(a, b.0, b.1, b.2, c)
1236}
1237
1238/// Extended table look-up
1239#[inline]
1240#[cfg(target_arch = "arm")]
1241#[cfg(target_endian = "little")]
1242#[target_feature(enable = "neon,v7")]
1243#[cfg_attr(test, assert_instr(vtbx))]
1244pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
1245 transmute(vtbx3(
1246 transmute(a),
1247 transmute(b.0),
1248 transmute(b.1),
1249 transmute(b.2),
1250 transmute(c),
1251 ))
1252}
1253
1254/// Extended table look-up
1255#[inline]
1256#[cfg(target_arch = "arm")]
1257#[cfg(target_endian = "little")]
1258#[target_feature(enable = "neon,v7")]
1259#[cfg_attr(test, assert_instr(vtbx))]
1260pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
1261 transmute(vtbx3(
1262 transmute(a),
1263 transmute(b.0),
1264 transmute(b.1),
1265 transmute(b.2),
1266 transmute(c),
1267 ))
1268}
1269
1270/// Extended table look-up
1271#[inline]
1272#[cfg(target_arch = "arm")]
1273#[cfg(target_endian = "little")]
1274#[target_feature(enable = "neon,v7")]
1275#[cfg_attr(test, assert_instr(vtbx))]
1276pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
1277 vtbx4(a, b.0, b.1, b.2, b.3, c)
1278}
1279
1280/// Extended table look-up
1281#[inline]
1282#[cfg(target_arch = "arm")]
1283#[cfg(target_endian = "little")]
1284#[target_feature(enable = "neon,v7")]
1285#[cfg_attr(test, assert_instr(vtbx))]
1286pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
1287 transmute(vtbx4(
1288 transmute(a),
1289 transmute(b.0),
1290 transmute(b.1),
1291 transmute(b.2),
1292 transmute(b.3),
1293 transmute(c),
1294 ))
1295}
1296
1297/// Extended table look-up
1298#[inline]
1299#[cfg(target_arch = "arm")]
1300#[cfg(target_endian = "little")]
1301#[target_feature(enable = "neon,v7")]
1302#[cfg_attr(test, assert_instr(vtbx))]
1303pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
1304 transmute(vtbx4(
1305 transmute(a),
1306 transmute(b.0),
1307 transmute(b.1),
1308 transmute(b.2),
1309 transmute(b.3),
1310 transmute(c),
1311 ))
1312}
1313
1314/// Move vector element to general-purpose register
1315#[inline]
1316#[target_feature(enable = "neon")]
1317#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1318#[rustc_args_required_const(1)]
1319#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", imm5 = 1))]
1320#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, imm5 = 1))]
1321// Based on the discussion in https://github.com/rust-lang/stdarch/pull/792
1322// `mov` seems to be an acceptable intrinsic to compile to
1323// #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(vmov, imm5 = 1))]
1324pub unsafe fn vgetq_lane_u64(v: uint64x2_t, imm5: i32) -> u64 {
1b1a35ee
XL
1325 assert!(imm5 >= 0 && imm5 <= 1);
1326 simd_extract(v, imm5 as u32)
ba9703b0
XL
1327}
1328
1329/// Move vector element to general-purpose register
1330#[inline]
1331#[target_feature(enable = "neon")]
1332#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1333#[rustc_args_required_const(1)]
1334#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", imm5 = 0))]
1335#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, imm5 = 0))]
1336// FIXME: no 32bit this seems to be turned into two vmov.32 instructions
1337// validate correctness
1338pub unsafe fn vget_lane_u64(v: uint64x1_t, imm5: i32) -> u64 {
1b1a35ee 1339 assert!(imm5 == 0);
ba9703b0
XL
1340 simd_extract(v, 0)
1341}
1342
1343/// Move vector element to general-purpose register
1344#[inline]
1345#[target_feature(enable = "neon")]
1346#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1347#[rustc_args_required_const(1)]
1348#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", imm5 = 2))]
1349#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, imm5 = 2))]
1350pub unsafe fn vgetq_lane_u16(v: uint16x8_t, imm5: i32) -> u16 {
1b1a35ee
XL
1351 assert!(imm5 >= 0 && imm5 <= 7);
1352 simd_extract(v, imm5 as u32)
ba9703b0
XL
1353}
1354
1355/// Move vector element to general-purpose register
1356#[inline]
1357#[target_feature(enable = "neon")]
1358#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1359#[rustc_args_required_const(1)]
1360#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", imm5 = 2))]
1361#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, imm5 = 2))]
1362pub unsafe fn vgetq_lane_u32(v: uint32x4_t, imm5: i32) -> u32 {
1b1a35ee
XL
1363 assert!(imm5 >= 0 && imm5 <= 3);
1364 simd_extract(v, imm5 as u32)
1365}
1366
1367/// Move vector element to general-purpose register
1368#[inline]
1369#[target_feature(enable = "neon")]
1370#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1371#[rustc_args_required_const(1)]
1372#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", imm5 = 2))]
1373#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, imm5 = 2))]
1374pub unsafe fn vgetq_lane_s32(v: int32x4_t, imm5: i32) -> i32 {
1375 assert!(imm5 >= 0 && imm5 <= 3);
1376 simd_extract(v, imm5 as u32)
ba9703b0
XL
1377}
1378
1379/// Move vector element to general-purpose register
1380#[inline]
1381#[target_feature(enable = "neon")]
1382#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1383#[rustc_args_required_const(1)]
1384#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", imm5 = 2))]
1385#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, imm5 = 2))]
1386pub unsafe fn vget_lane_u8(v: uint8x8_t, imm5: i32) -> u8 {
1b1a35ee
XL
1387 assert!(imm5 >= 0 && imm5 <= 7);
1388 simd_extract(v, imm5 as u32)
ba9703b0
XL
1389}
1390
1391/// Duplicate vector element to vector or scalar
1392#[inline]
1393#[target_feature(enable = "neon")]
1394#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1395#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
1396#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
1397pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
1398 int8x16_t(
1399 value, value, value, value, value, value, value, value, value, value, value, value, value,
1400 value, value, value,
1401 )
1402}
1403
1404/// Duplicate vector element to vector or scalar
1405#[inline]
1406#[target_feature(enable = "neon")]
1407#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1408#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
1409#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
1410pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
1411 uint8x16_t(
1412 value, value, value, value, value, value, value, value, value, value, value, value, value,
1413 value, value, value,
1414 )
1415}
1416
1417/// Duplicate vector element to vector or scalar
1418#[inline]
1419#[target_feature(enable = "neon")]
1420#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1421#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
1422#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
1423pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t {
1424 vdupq_n_u8(value)
1425}
1426
1427/// Vector reinterpret cast operation
1428#[inline]
1429#[target_feature(enable = "neon")]
1430#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1431#[cfg_attr(test, assert_instr(nop))]
1432pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
1433 transmute(a)
1434}
1435
1436/// Vector reinterpret cast operation
1437#[inline]
1438#[target_feature(enable = "neon")]
1439#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1440#[cfg_attr(test, assert_instr(nop))]
1441pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
1442 transmute(a)
1443}
1444
1445/// Vector reinterpret cast operation
1446#[inline]
1447#[target_feature(enable = "neon")]
1448#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1449#[cfg_attr(test, assert_instr(nop))]
1450pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
1451 transmute(a)
1452}
1453
1454/// Vector reinterpret cast operation
1455#[inline]
1456#[target_feature(enable = "neon")]
1457#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1458#[cfg_attr(test, assert_instr(nop))]
1459pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
1460 transmute(a)
1461}
1462
1463/// Vector reinterpret cast operation
1464#[inline]
1465#[target_feature(enable = "neon")]
1466#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1467#[cfg_attr(test, assert_instr(nop))]
1468pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
1469 transmute(a)
1470}
1471
1472/// Vector reinterpret cast operation
1473#[inline]
1474#[target_feature(enable = "neon")]
1475#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1476#[cfg_attr(test, assert_instr(nop))]
1477pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
1478 transmute(a)
1479}
1480
1481/// Unsigned shift right
1482#[inline]
1483#[target_feature(enable = "neon")]
1484#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1485#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", imm3 = 1))]
1486#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("ushr", imm3 = 1))]
1487#[rustc_args_required_const(1)]
1488pub unsafe fn vshrq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
1489 if imm3 < 0 || imm3 > 7 {
1490 unreachable_unchecked();
1491 } else {
1492 uint8x16_t(
1493 a.0 >> imm3,
1494 a.1 >> imm3,
1495 a.2 >> imm3,
1496 a.3 >> imm3,
1497 a.4 >> imm3,
1498 a.5 >> imm3,
1499 a.6 >> imm3,
1500 a.7 >> imm3,
1501 a.8 >> imm3,
1502 a.9 >> imm3,
1503 a.10 >> imm3,
1504 a.11 >> imm3,
1505 a.12 >> imm3,
1506 a.13 >> imm3,
1507 a.14 >> imm3,
1508 a.15 >> imm3,
1509 )
1510 }
1511}
1512
1513/// Shift right
1514#[inline]
1515#[target_feature(enable = "neon")]
1516#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1517#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshl.s8", imm3 = 1))]
1518#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, imm3 = 1))]
1519#[rustc_args_required_const(1)]
1520pub unsafe fn vshlq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
1521 if imm3 < 0 || imm3 > 7 {
1522 unreachable_unchecked();
1523 } else {
1524 uint8x16_t(
1525 a.0 << imm3,
1526 a.1 << imm3,
1527 a.2 << imm3,
1528 a.3 << imm3,
1529 a.4 << imm3,
1530 a.5 << imm3,
1531 a.6 << imm3,
1532 a.7 << imm3,
1533 a.8 << imm3,
1534 a.9 << imm3,
1535 a.10 << imm3,
1536 a.11 << imm3,
1537 a.12 << imm3,
1538 a.13 << imm3,
1539 a.14 << imm3,
1540 a.15 << imm3,
1541 )
1542 }
1543}
1544
1545/// Extract vector from pair of vectors
1546#[inline]
1547#[target_feature(enable = "neon")]
1548#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1549#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", n = 3))]
1550#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, n = 3))]
1551#[rustc_args_required_const(2)]
1552pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t {
1553 if n < 0 || n > 15 {
1554 unreachable_unchecked();
1555 };
1556 match n & 0b1111 {
1557 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
1558 1 => simd_shuffle16(
1559 a,
1560 b,
1561 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
1562 ),
1563 2 => simd_shuffle16(
1564 a,
1565 b,
1566 [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
1567 ),
1568 3 => simd_shuffle16(
1569 a,
1570 b,
1571 [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
1572 ),
1573 4 => simd_shuffle16(
1574 a,
1575 b,
1576 [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
1577 ),
1578 5 => simd_shuffle16(
1579 a,
1580 b,
1581 [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
1582 ),
1583 6 => simd_shuffle16(
1584 a,
1585 b,
1586 [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
1587 ),
1588 7 => simd_shuffle16(
1589 a,
1590 b,
1591 [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
1592 ),
1593 8 => simd_shuffle16(
1594 a,
1595 b,
1596 [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
1597 ),
1598 9 => simd_shuffle16(
1599 a,
1600 b,
1601 [
1602 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
1603 ],
1604 ),
1605 10 => simd_shuffle16(
1606 a,
1607 b,
1608 [
1609 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
1610 ],
1611 ),
1612 11 => simd_shuffle16(
1613 a,
1614 b,
1615 [
1616 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
1617 ],
1618 ),
1619 12 => simd_shuffle16(
1620 a,
1621 b,
1622 [
1623 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
1624 ],
1625 ),
1626 13 => simd_shuffle16(
1627 a,
1628 b,
1629 [
1630 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
1631 ],
1632 ),
1633 14 => simd_shuffle16(
1634 a,
1635 b,
1636 [
1637 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
1638 ],
1639 ),
1640 15 => simd_shuffle16(
1641 a,
1642 b,
1643 [
1644 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
1645 ],
1646 ),
1647 _ => unreachable_unchecked(),
1648 }
1649}
1650
1651/// Extract vector from pair of vectors
1652#[inline]
1653#[target_feature(enable = "neon")]
1654#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1655#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", n = 3))]
1656#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, n = 3))]
1657#[rustc_args_required_const(2)]
1658pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t, n: i32) -> uint8x16_t {
1659 if n < 0 || n > 15 {
1660 unreachable_unchecked();
1661 };
1662 match n & 0b1111 {
1663 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
1664 1 => simd_shuffle16(
1665 a,
1666 b,
1667 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
1668 ),
1669 2 => simd_shuffle16(
1670 a,
1671 b,
1672 [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
1673 ),
1674 3 => simd_shuffle16(
1675 a,
1676 b,
1677 [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
1678 ),
1679 4 => simd_shuffle16(
1680 a,
1681 b,
1682 [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
1683 ),
1684 5 => simd_shuffle16(
1685 a,
1686 b,
1687 [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
1688 ),
1689 6 => simd_shuffle16(
1690 a,
1691 b,
1692 [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
1693 ),
1694 7 => simd_shuffle16(
1695 a,
1696 b,
1697 [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
1698 ),
1699 8 => simd_shuffle16(
1700 a,
1701 b,
1702 [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
1703 ),
1704 9 => simd_shuffle16(
1705 a,
1706 b,
1707 [
1708 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
1709 ],
1710 ),
1711 10 => simd_shuffle16(
1712 a,
1713 b,
1714 [
1715 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
1716 ],
1717 ),
1718 11 => simd_shuffle16(
1719 a,
1720 b,
1721 [
1722 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
1723 ],
1724 ),
1725 12 => simd_shuffle16(
1726 a,
1727 b,
1728 [
1729 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
1730 ],
1731 ),
1732 13 => simd_shuffle16(
1733 a,
1734 b,
1735 [
1736 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
1737 ],
1738 ),
1739 14 => simd_shuffle16(
1740 a,
1741 b,
1742 [
1743 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
1744 ],
1745 ),
1746 15 => simd_shuffle16(
1747 a,
1748 b,
1749 [
1750 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
1751 ],
1752 ),
1753 _ => unreachable_unchecked(),
1754 }
1755}
1756
1757/// Load multiple single-element structures to one, two, three, or four registers
1758#[inline]
1759#[target_feature(enable = "neon")]
1760#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1761#[cfg_attr(test, assert_instr(ldr))]
1762// even gcc compiles this to ldr: https://clang.godbolt.org/z/1bvH2x
1763// #[cfg_attr(test, assert_instr(ld1))]
1764pub unsafe fn vld1q_s8(addr: *const i8) -> int8x16_t {
1765 ptr::read(addr as *const int8x16_t)
1766}
1767
1768/// Load multiple single-element structures to one, two, three, or four registers
1769#[inline]
1770#[target_feature(enable = "neon")]
1771#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1772#[cfg_attr(test, assert_instr(ldr))]
1773// even gcc compiles this to ldr: https://clang.godbolt.org/z/1bvH2x
1774// #[cfg_attr(test, assert_instr(ld1))]
1775pub unsafe fn vld1q_u8(addr: *const u8) -> uint8x16_t {
1776 ptr::read(addr as *const uint8x16_t)
1777}
1778
1b1a35ee
XL
1779/// Load multiple single-element structures to one, two, three, or four registers
1780#[inline]
1781#[cfg(target_arch = "arm")]
1782#[target_feature(enable = "neon")]
1783#[target_feature(enable = "v7")]
1784#[cfg_attr(test, assert_instr("vld1.32"))]
1785pub unsafe fn vld1q_s32(addr: *const i32) -> int32x4_t {
1786 vld1q_v4i32(addr as *const u8, 4)
1787}
1788
1789/// Load multiple single-element structures to one, two, three, or four registers
1790#[inline]
1791#[cfg(target_arch = "arm")]
1792#[target_feature(enable = "neon")]
1793#[target_feature(enable = "v7")]
1794#[cfg_attr(test, assert_instr("vld1.32"))]
1795pub unsafe fn vld1q_u32(addr: *const u32) -> uint32x4_t {
1796 transmute(vld1q_v4i32(addr as *const u8, 4))
1797}
1798
1799/// Load multiple single-element structures to one, two, three, or four registers
1800#[inline]
1801#[cfg(target_arch = "arm")]
1802#[target_feature(enable = "neon")]
1803#[target_feature(enable = "v7")]
1804#[cfg_attr(test, assert_instr("vld1.32"))]
1805pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t {
1806 vld1q_v4f32(addr as *const u8, 4)
1807}
1808
1809/// Load one single-element structure and Replicate to all lanes (of one register).
1810#[inline]
1811#[target_feature(enable = "neon")]
1812#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1813#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
1814#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))]
1815pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t {
1816 use crate::core_arch::simd::f32x4;
1817 let v = *addr;
1818 transmute(f32x4::new(v, v, v, v))
1819}
1820
1821// These float-to-int implementations have undefined behaviour when `a` overflows
1822// the destination type. Clang has the same problem: https://llvm.org/PR47510
1823
1824/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector)
1825#[inline]
1826#[cfg(target_arch = "arm")]
1827#[target_feature(enable = "neon")]
1828#[target_feature(enable = "v7")]
1829#[cfg_attr(test, assert_instr("vcvt.s32.f32"))]
1830pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
1831 use crate::core_arch::simd::{f32x4, i32x4};
1832 transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a)))
1833}
1834
1835/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
1836#[inline]
1837#[cfg(target_arch = "arm")]
1838#[target_feature(enable = "neon")]
1839#[target_feature(enable = "v7")]
1840#[cfg_attr(test, assert_instr("vcvt.u32.f32"))]
1841pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
1842 use crate::core_arch::simd::{f32x4, u32x4};
1843 transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a)))
1844}
1845
1846/// Floating-point minimum (vector).
1847#[inline]
1848#[target_feature(enable = "neon")]
1849#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1850#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmin.f32"))]
1851#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))]
1852pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
1853 vminq_f32_(a, b)
1854}
1855
1856/// Floating-point maxmimum (vector).
1857#[inline]
1858#[target_feature(enable = "neon")]
1859#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1860#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmax.f32"))]
1861#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))]
1862pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
1863 vmaxq_f32_(a, b)
1864}
1865
ba9703b0
XL
1866#[cfg(test)]
1867mod tests {
1868 use super::*;
1869 use crate::core_arch::arm::test_support::*;
1870 use crate::core_arch::{arm::*, simd::*};
1871 use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec};
1872 use stdarch_test::simd_test;
1873
1874 #[simd_test(enable = "neon")]
1875 unsafe fn test_vld1q_s8() {
1876 let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
1877 let e = a;
1878 let r: i8x16 = transmute(vld1q_s8(transmute(&a)));
1879 assert_eq!(r, e);
1880 }
1881
1882 #[simd_test(enable = "neon")]
1883 unsafe fn test_vld1q_u8() {
1884 let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
1885 let e = a;
1886 let r: u8x16 = transmute(vld1q_u8(transmute(&a)));
1887 assert_eq!(r, e);
1888 }
1889
1b1a35ee
XL
1890 #[cfg(target_arch = "arm")]
1891 #[simd_test(enable = "neon")]
1892 unsafe fn test_vld1q_f32() {
1893 let e = f32x4::new(1., 2., 3., 4.);
1894 let f = [0., 1., 2., 3., 4.];
1895 // do a load that has 4 byte alignment to make sure we're not
1896 // over aligning it
1897 let r: f32x4 = transmute(vld1q_f32(f[1..].as_ptr()));
1898 assert_eq!(r, e);
1899 }
1900
1901 #[cfg(target_arch = "arm")]
1902 #[simd_test(enable = "neon")]
1903 unsafe fn test_vld1q_s32() {
1904 let e = i32x4::new(1, 2, 3, 4);
1905 let f = [0, 1, 2, 3, 4];
1906 // do a load that has 4 byte alignment to make sure we're not
1907 // over aligning it
1908 let r: i32x4 = transmute(vld1q_s32(f[1..].as_ptr()));
1909 assert_eq!(r, e);
1910 }
1911
1912 #[cfg(target_arch = "arm")]
1913 #[simd_test(enable = "neon")]
1914 unsafe fn test_vld1q_u32() {
1915 let e = u32x4::new(1, 2, 3, 4);
1916 let f = [0, 1, 2, 3, 4];
1917 // do a load that has 4 byte alignment to make sure we're not
1918 // over aligning it
1919 let r: u32x4 = transmute(vld1q_u32(f[1..].as_ptr()));
1920 assert_eq!(r, e);
1921 }
1922
1923 #[simd_test(enable = "neon")]
1924 unsafe fn test_vld1q_dup_f32() {
1925 let e = f32x4::new(1., 1., 1., 1.);
1926 let f = [1., 2., 3., 4.];
1927 let r: f32x4 = transmute(vld1q_dup_f32(f.as_ptr()));
1928 assert_eq!(r, e);
1929 }
1930
1931 #[cfg(target_arch = "arm")]
1932 #[simd_test(enable = "neon")]
1933 unsafe fn test_vcvtq_s32_f32() {
1934 let f = f32x4::new(-1., 2., 3., 4.);
1935 let e = i32x4::new(-1, 2, 3, 4);
1936 let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1937 assert_eq!(r, e);
1938 }
1939
1940 #[cfg(target_arch = "arm")]
1941 #[simd_test(enable = "neon")]
1942 unsafe fn test_vcvtq_u32_f32() {
1943 let f = f32x4::new(1., 2., 3., 4.);
1944 let e = u32x4::new(1, 2, 3, 4);
1945 let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1946 assert_eq!(r, e);
1947 }
1948
ba9703b0
XL
1949 #[simd_test(enable = "neon")]
1950 unsafe fn test_vget_lane_u8() {
1951 let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
1952 let r = vget_lane_u8(transmute(v), 1);
1953 assert_eq!(r, 2);
1954 }
1955
1956 #[simd_test(enable = "neon")]
1957 unsafe fn test_vgetq_lane_u32() {
1958 let v = i32x4::new(1, 2, 3, 4);
1959 let r = vgetq_lane_u32(transmute(v), 1);
1960 assert_eq!(r, 2);
1961 }
1962
1b1a35ee
XL
1963 #[simd_test(enable = "neon")]
1964 unsafe fn test_vgetq_lane_s32() {
1965 let v = i32x4::new(1, 2, 3, 4);
1966 let r = vgetq_lane_s32(transmute(v), 1);
1967 assert_eq!(r, 2);
1968 }
1969
ba9703b0
XL
1970 #[simd_test(enable = "neon")]
1971 unsafe fn test_vget_lane_u64() {
1972 let v: u64 = 1;
1973 let r = vget_lane_u64(transmute(v), 0);
1974 assert_eq!(r, 1);
1975 }
1976
1977 #[simd_test(enable = "neon")]
1978 unsafe fn test_vgetq_lane_u16() {
1979 let v = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
1980 let r = vgetq_lane_u16(transmute(v), 1);
1981 assert_eq!(r, 2);
1982 }
1983
1984 #[simd_test(enable = "neon")]
1985 unsafe fn test_vextq_s8() {
1986 let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
1987 let b = i8x16::new(
1988 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 31, 32,
1989 );
1990 let e = i8x16::new(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19);
1991 let r: i8x16 = transmute(vextq_s8(transmute(a), transmute(b), 3));
1992 assert_eq!(r, e);
1993 }
1994
1995 #[simd_test(enable = "neon")]
1996 unsafe fn test_vextq_u8() {
1997 let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
1998 let b = u8x16::new(
1999 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 31, 32,
2000 );
2001 let e = u8x16::new(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19);
2002 let r: u8x16 = transmute(vextq_u8(transmute(a), transmute(b), 3));
2003 assert_eq!(r, e);
2004 }
2005
2006 #[simd_test(enable = "neon")]
2007 unsafe fn test_vshrq_n_u8() {
2008 let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
2009 let e = u8x16::new(0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4);
2010 let r: u8x16 = transmute(vshrq_n_u8(transmute(a), 2));
2011 assert_eq!(r, e);
2012 }
2013
2014 #[simd_test(enable = "neon")]
2015 unsafe fn test_vshlq_n_u8() {
2016 let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
2017 let e = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
2018 let r: u8x16 = transmute(vshlq_n_u8(transmute(a), 2));
2019 assert_eq!(r, e);
2020 }
2021
2022 #[simd_test(enable = "neon")]
2023 unsafe fn test_vqmovn_u64() {
2024 let a = u64x2::new(1, 2);
2025 let e = u32x2::new(1, 2);
2026 let r: u32x2 = transmute(vqmovn_u64(transmute(a)));
2027 assert_eq!(r, e);
2028 }
2029
2030 #[simd_test(enable = "neon")]
2031 unsafe fn test_vreinterpret_u64_u32() {
2032 let v: i8 = 42;
2033 let e = i8x16::new(
2034 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
2035 );
2036 let r: i8x16 = transmute(vdupq_n_s8(v));
2037 assert_eq!(r, e);
2038 }
2039
2040 #[simd_test(enable = "neon")]
2041 unsafe fn test_vdupq_n_s8() {
2042 let v: i8 = 42;
2043 let e = i8x16::new(
2044 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
2045 );
2046 let r: i8x16 = transmute(vdupq_n_s8(v));
2047 assert_eq!(r, e);
2048 }
2049
2050 #[simd_test(enable = "neon")]
2051 unsafe fn test_vdupq_n_u8() {
2052 let v: u8 = 42;
2053 let e = u8x16::new(
2054 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
2055 );
2056 let r: u8x16 = transmute(vdupq_n_u8(v));
2057 assert_eq!(r, e);
2058 }
2059
2060 #[simd_test(enable = "neon")]
2061 unsafe fn test_vmovq_n_u8() {
2062 let v: u8 = 42;
2063 let e = u8x16::new(
2064 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
2065 );
2066 let r: u8x16 = transmute(vmovq_n_u8(v));
2067 assert_eq!(r, e);
2068 }
2069
2070 #[simd_test(enable = "neon")]
2071 unsafe fn test_vgetq_lane_u64() {
2072 let v = i64x2::new(1, 2);
2073 let r = vgetq_lane_u64(transmute(v), 1);
2074 assert_eq!(r, 2);
2075 }
2076
2077 #[simd_test(enable = "neon")]
2078 unsafe fn test_vadd_s8() {
2079 test_ari_s8(
2080 |i, j| vadd_s8(i, j),
2081 |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
2082 );
2083 }
2084 #[simd_test(enable = "neon")]
2085 unsafe fn test_vaddq_s8() {
2086 testq_ari_s8(
2087 |i, j| vaddq_s8(i, j),
2088 |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
2089 );
2090 }
2091 #[simd_test(enable = "neon")]
2092 unsafe fn test_vadd_s16() {
2093 test_ari_s16(
2094 |i, j| vadd_s16(i, j),
2095 |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
2096 );
2097 }
2098 #[simd_test(enable = "neon")]
2099 unsafe fn test_vaddq_s16() {
2100 testq_ari_s16(
2101 |i, j| vaddq_s16(i, j),
2102 |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
2103 );
2104 }
2105 #[simd_test(enable = "neon")]
2106 unsafe fn test_vadd_s32() {
2107 test_ari_s32(
2108 |i, j| vadd_s32(i, j),
2109 |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
2110 );
2111 }
2112 #[simd_test(enable = "neon")]
2113 unsafe fn test_vaddq_s32() {
2114 testq_ari_s32(
2115 |i, j| vaddq_s32(i, j),
2116 |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
2117 );
2118 }
2119
2120 #[simd_test(enable = "neon")]
2121 unsafe fn test_vadd_u8() {
2122 test_ari_u8(
2123 |i, j| vadd_u8(i, j),
2124 |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
2125 );
2126 }
2127 #[simd_test(enable = "neon")]
2128 unsafe fn test_vaddq_u8() {
2129 testq_ari_u8(
2130 |i, j| vaddq_u8(i, j),
2131 |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
2132 );
2133 }
2134 #[simd_test(enable = "neon")]
2135 unsafe fn test_vadd_u16() {
2136 test_ari_u16(
2137 |i, j| vadd_u16(i, j),
2138 |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
2139 );
2140 }
2141 #[simd_test(enable = "neon")]
2142 unsafe fn test_vaddq_u16() {
2143 testq_ari_u16(
2144 |i, j| vaddq_u16(i, j),
2145 |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
2146 );
2147 }
2148 #[simd_test(enable = "neon")]
2149 unsafe fn test_vadd_u32() {
2150 test_ari_u32(
2151 |i, j| vadd_u32(i, j),
2152 |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
2153 );
2154 }
2155 #[simd_test(enable = "neon")]
2156 unsafe fn test_vaddq_u32() {
2157 testq_ari_u32(
2158 |i, j| vaddq_u32(i, j),
2159 |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
2160 );
2161 }
2162
2163 #[simd_test(enable = "neon")]
2164 unsafe fn test_vadd_f32() {
2165 test_ari_f32(|i, j| vadd_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
2166 }
2167 #[simd_test(enable = "neon")]
2168 unsafe fn test_vaddq_f32() {
2169 testq_ari_f32(|i, j| vaddq_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
2170 }
2171
2172 #[simd_test(enable = "neon")]
2173 unsafe fn test_vaddl_s8() {
2174 let v = i8::MAX;
2175 let a = i8x8::new(v, v, v, v, v, v, v, v);
2176 let v = 2 * (v as i16);
2177 let e = i16x8::new(v, v, v, v, v, v, v, v);
2178 let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a)));
2179 assert_eq!(r, e);
2180 }
2181
2182 #[simd_test(enable = "neon")]
2183 unsafe fn test_vaddl_s16() {
2184 let v = i16::MAX;
2185 let a = i16x4::new(v, v, v, v);
2186 let v = 2 * (v as i32);
2187 let e = i32x4::new(v, v, v, v);
2188 let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a)));
2189 assert_eq!(r, e);
2190 }
2191
2192 #[simd_test(enable = "neon")]
2193 unsafe fn test_vaddl_s32() {
2194 let v = i32::MAX;
2195 let a = i32x2::new(v, v);
2196 let v = 2 * (v as i64);
2197 let e = i64x2::new(v, v);
2198 let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a)));
2199 assert_eq!(r, e);
2200 }
2201
2202 #[simd_test(enable = "neon")]
2203 unsafe fn test_vaddl_u8() {
2204 let v = u8::MAX;
2205 let a = u8x8::new(v, v, v, v, v, v, v, v);
2206 let v = 2 * (v as u16);
2207 let e = u16x8::new(v, v, v, v, v, v, v, v);
2208 let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a)));
2209 assert_eq!(r, e);
2210 }
2211
2212 #[simd_test(enable = "neon")]
2213 unsafe fn test_vaddl_u16() {
2214 let v = u16::MAX;
2215 let a = u16x4::new(v, v, v, v);
2216 let v = 2 * (v as u32);
2217 let e = u32x4::new(v, v, v, v);
2218 let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a)));
2219 assert_eq!(r, e);
2220 }
2221
2222 #[simd_test(enable = "neon")]
2223 unsafe fn test_vaddl_u32() {
2224 let v = u32::MAX;
2225 let a = u32x2::new(v, v);
2226 let v = 2 * (v as u64);
2227 let e = u64x2::new(v, v);
2228 let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a)));
2229 assert_eq!(r, e);
2230 }
2231
2232 #[simd_test(enable = "neon")]
2233 unsafe fn test_vmvn_s8() {
2234 let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
2235 let e = i8x8::new(-1, -2, -3, -4, -5, -6, -7, -8);
2236 let r: i8x8 = transmute(vmvn_s8(transmute(a)));
2237 assert_eq!(r, e);
2238 }
2239
2240 #[simd_test(enable = "neon")]
2241 unsafe fn test_vmvnq_s8() {
2242 let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2243 let e = i8x16::new(
2244 -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
2245 );
2246 let r: i8x16 = transmute(vmvnq_s8(transmute(a)));
2247 assert_eq!(r, e);
2248 }
2249
2250 #[simd_test(enable = "neon")]
2251 unsafe fn test_vmvn_s16() {
2252 let a = i16x4::new(0, 1, 2, 3);
2253 let e = i16x4::new(-1, -2, -3, -4);
2254 let r: i16x4 = transmute(vmvn_s16(transmute(a)));
2255 assert_eq!(r, e);
2256 }
2257
2258 #[simd_test(enable = "neon")]
2259 unsafe fn test_vmvnq_s16() {
2260 let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
2261 let e = i16x8::new(-1, -2, -3, -4, -5, -6, -7, -8);
2262 let r: i16x8 = transmute(vmvnq_s16(transmute(a)));
2263 assert_eq!(r, e);
2264 }
2265
2266 #[simd_test(enable = "neon")]
2267 unsafe fn test_vmvn_s32() {
2268 let a = i32x2::new(0, 1);
2269 let e = i32x2::new(-1, -2);
2270 let r: i32x2 = transmute(vmvn_s32(transmute(a)));
2271 assert_eq!(r, e);
2272 }
2273
2274 #[simd_test(enable = "neon")]
2275 unsafe fn test_vmvnq_s32() {
2276 let a = i32x4::new(0, 1, 2, 3);
2277 let e = i32x4::new(-1, -2, -3, -4);
2278 let r: i32x4 = transmute(vmvnq_s32(transmute(a)));
2279 assert_eq!(r, e);
2280 }
2281
2282 #[simd_test(enable = "neon")]
2283 unsafe fn test_vmvn_u8() {
2284 let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
2285 let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248);
2286 let r: u8x8 = transmute(vmvn_u8(transmute(a)));
2287 assert_eq!(r, e);
2288 }
2289
2290 #[simd_test(enable = "neon")]
2291 unsafe fn test_vmvnq_u8() {
2292 let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2293 let e = u8x16::new(
2294 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
2295 );
2296 let r: u8x16 = transmute(vmvnq_u8(transmute(a)));
2297 assert_eq!(r, e);
2298 }
2299
2300 #[simd_test(enable = "neon")]
2301 unsafe fn test_vmvn_u16() {
2302 let a = u16x4::new(0, 1, 2, 3);
2303 let e = u16x4::new(65_535, 65_534, 65_533, 65_532);
2304 let r: u16x4 = transmute(vmvn_u16(transmute(a)));
2305 assert_eq!(r, e);
2306 }
2307
2308 #[simd_test(enable = "neon")]
2309 unsafe fn test_vmvnq_u16() {
2310 let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
2311 let e = u16x8::new(
2312 65_535, 65_534, 65_533, 65_532, 65_531, 65_530, 65_529, 65_528,
2313 );
2314 let r: u16x8 = transmute(vmvnq_u16(transmute(a)));
2315 assert_eq!(r, e);
2316 }
2317
2318 #[simd_test(enable = "neon")]
2319 unsafe fn test_vmvn_u32() {
2320 let a = u32x2::new(0, 1);
2321 let e = u32x2::new(4_294_967_295, 4_294_967_294);
2322 let r: u32x2 = transmute(vmvn_u32(transmute(a)));
2323 assert_eq!(r, e);
2324 }
2325
2326 #[simd_test(enable = "neon")]
2327 unsafe fn test_vmvnq_u32() {
2328 let a = u32x4::new(0, 1, 2, 3);
2329 let e = u32x4::new(4_294_967_295, 4_294_967_294, 4_294_967_293, 4_294_967_292);
2330 let r: u32x4 = transmute(vmvnq_u32(transmute(a)));
2331 assert_eq!(r, e);
2332 }
2333
2334 #[simd_test(enable = "neon")]
2335 unsafe fn test_vmvn_p8() {
2336 let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
2337 let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248);
2338 let r: u8x8 = transmute(vmvn_p8(transmute(a)));
2339 assert_eq!(r, e);
2340 }
2341
2342 #[simd_test(enable = "neon")]
2343 unsafe fn test_vmvnq_p8() {
2344 let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2345 let e = u8x16::new(
2346 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
2347 );
2348 let r: u8x16 = transmute(vmvnq_p8(transmute(a)));
2349 assert_eq!(r, e);
2350 }
2351
2352 #[simd_test(enable = "neon")]
2353 unsafe fn test_vmovn_s16() {
2354 let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2355 let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2356 let r: i8x8 = transmute(vmovn_s16(transmute(a)));
2357 assert_eq!(r, e);
2358 }
2359
2360 #[simd_test(enable = "neon")]
2361 unsafe fn test_vmovn_s32() {
2362 let a = i32x4::new(1, 2, 3, 4);
2363 let e = i16x4::new(1, 2, 3, 4);
2364 let r: i16x4 = transmute(vmovn_s32(transmute(a)));
2365 assert_eq!(r, e);
2366 }
2367
2368 #[simd_test(enable = "neon")]
2369 unsafe fn test_vmovn_s64() {
2370 let a = i64x2::new(1, 2);
2371 let e = i32x2::new(1, 2);
2372 let r: i32x2 = transmute(vmovn_s64(transmute(a)));
2373 assert_eq!(r, e);
2374 }
2375
2376 #[simd_test(enable = "neon")]
2377 unsafe fn test_vmovn_u16() {
2378 let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2379 let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2380 let r: u8x8 = transmute(vmovn_u16(transmute(a)));
2381 assert_eq!(r, e);
2382 }
2383
2384 #[simd_test(enable = "neon")]
2385 unsafe fn test_vmovn_u32() {
2386 let a = u32x4::new(1, 2, 3, 4);
2387 let e = u16x4::new(1, 2, 3, 4);
2388 let r: u16x4 = transmute(vmovn_u32(transmute(a)));
2389 assert_eq!(r, e);
2390 }
2391
2392 #[simd_test(enable = "neon")]
2393 unsafe fn test_vmovn_u64() {
2394 let a = u64x2::new(1, 2);
2395 let e = u32x2::new(1, 2);
2396 let r: u32x2 = transmute(vmovn_u64(transmute(a)));
2397 assert_eq!(r, e);
2398 }
2399
2400 #[simd_test(enable = "neon")]
2401 unsafe fn test_vmovl_s8() {
2402 let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2403 let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2404 let r: i16x8 = transmute(vmovl_s8(transmute(a)));
2405 assert_eq!(r, e);
2406 }
2407
2408 #[simd_test(enable = "neon")]
2409 unsafe fn test_vmovl_s16() {
2410 let e = i32x4::new(1, 2, 3, 4);
2411 let a = i16x4::new(1, 2, 3, 4);
2412 let r: i32x4 = transmute(vmovl_s16(transmute(a)));
2413 assert_eq!(r, e);
2414 }
2415
2416 #[simd_test(enable = "neon")]
2417 unsafe fn test_vmovl_s32() {
2418 let e = i64x2::new(1, 2);
2419 let a = i32x2::new(1, 2);
2420 let r: i64x2 = transmute(vmovl_s32(transmute(a)));
2421 assert_eq!(r, e);
2422 }
2423
2424 #[simd_test(enable = "neon")]
2425 unsafe fn test_vmovl_u8() {
2426 let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2427 let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2428 let r: u16x8 = transmute(vmovl_u8(transmute(a)));
2429 assert_eq!(r, e);
2430 }
2431
2432 #[simd_test(enable = "neon")]
2433 unsafe fn test_vmovl_u16() {
2434 let e = u32x4::new(1, 2, 3, 4);
2435 let a = u16x4::new(1, 2, 3, 4);
2436 let r: u32x4 = transmute(vmovl_u16(transmute(a)));
2437 assert_eq!(r, e);
2438 }
2439
2440 #[simd_test(enable = "neon")]
2441 unsafe fn test_vmovl_u32() {
2442 let e = u64x2::new(1, 2);
2443 let a = u32x2::new(1, 2);
2444 let r: u64x2 = transmute(vmovl_u32(transmute(a)));
2445 assert_eq!(r, e);
2446 }
2447
2448 #[simd_test(enable = "neon")]
2449 unsafe fn test_vrsqrt_f32() {
2450 let a = f32x2::new(1.0, 2.0);
2451 let e = f32x2::new(0.9980469, 0.7050781);
2452 let r: f32x2 = transmute(vrsqrte_f32(transmute(a)));
2453 assert_eq!(r, e);
2454 }
2455
2456 #[simd_test(enable = "neon")]
2457 unsafe fn test_vpmin_s8() {
2458 let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8);
2459 let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
2460 let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6);
2461 let r: i8x8 = transmute(vpmin_s8(transmute(a), transmute(b)));
2462 assert_eq!(r, e);
2463 }
2464
2465 #[simd_test(enable = "neon")]
2466 unsafe fn test_vpmin_s16() {
2467 let a = i16x4::new(1, 2, 3, -4);
2468 let b = i16x4::new(0, 3, 2, 5);
2469 let e = i16x4::new(1, -4, 0, 2);
2470 let r: i16x4 = transmute(vpmin_s16(transmute(a), transmute(b)));
2471 assert_eq!(r, e);
2472 }
2473
2474 #[simd_test(enable = "neon")]
2475 unsafe fn test_vpmin_s32() {
2476 let a = i32x2::new(1, -2);
2477 let b = i32x2::new(0, 3);
2478 let e = i32x2::new(-2, 0);
2479 let r: i32x2 = transmute(vpmin_s32(transmute(a), transmute(b)));
2480 assert_eq!(r, e);
2481 }
2482
2483 #[simd_test(enable = "neon")]
2484 unsafe fn test_vpmin_u8() {
2485 let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2486 let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
2487 let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6);
2488 let r: u8x8 = transmute(vpmin_u8(transmute(a), transmute(b)));
2489 assert_eq!(r, e);
2490 }
2491
2492 #[simd_test(enable = "neon")]
2493 unsafe fn test_vpmin_u16() {
2494 let a = u16x4::new(1, 2, 3, 4);
2495 let b = u16x4::new(0, 3, 2, 5);
2496 let e = u16x4::new(1, 3, 0, 2);
2497 let r: u16x4 = transmute(vpmin_u16(transmute(a), transmute(b)));
2498 assert_eq!(r, e);
2499 }
2500
2501 #[simd_test(enable = "neon")]
2502 unsafe fn test_vpmin_u32() {
2503 let a = u32x2::new(1, 2);
2504 let b = u32x2::new(0, 3);
2505 let e = u32x2::new(1, 0);
2506 let r: u32x2 = transmute(vpmin_u32(transmute(a), transmute(b)));
2507 assert_eq!(r, e);
2508 }
2509
2510 #[simd_test(enable = "neon")]
2511 unsafe fn test_vpmin_f32() {
2512 let a = f32x2::new(1., -2.);
2513 let b = f32x2::new(0., 3.);
2514 let e = f32x2::new(-2., 0.);
2515 let r: f32x2 = transmute(vpmin_f32(transmute(a), transmute(b)));
2516 assert_eq!(r, e);
2517 }
2518
2519 #[simd_test(enable = "neon")]
2520 unsafe fn test_vpmax_s8() {
2521 let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8);
2522 let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
2523 let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9);
2524 let r: i8x8 = transmute(vpmax_s8(transmute(a), transmute(b)));
2525 assert_eq!(r, e);
2526 }
2527
2528 #[simd_test(enable = "neon")]
2529 unsafe fn test_vpmax_s16() {
2530 let a = i16x4::new(1, 2, 3, -4);
2531 let b = i16x4::new(0, 3, 2, 5);
2532 let e = i16x4::new(2, 3, 3, 5);
2533 let r: i16x4 = transmute(vpmax_s16(transmute(a), transmute(b)));
2534 assert_eq!(r, e);
2535 }
2536
2537 #[simd_test(enable = "neon")]
2538 unsafe fn test_vpmax_s32() {
2539 let a = i32x2::new(1, -2);
2540 let b = i32x2::new(0, 3);
2541 let e = i32x2::new(1, 3);
2542 let r: i32x2 = transmute(vpmax_s32(transmute(a), transmute(b)));
2543 assert_eq!(r, e);
2544 }
2545
2546 #[simd_test(enable = "neon")]
2547 unsafe fn test_vpmax_u8() {
2548 let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
2549 let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
2550 let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9);
2551 let r: u8x8 = transmute(vpmax_u8(transmute(a), transmute(b)));
2552 assert_eq!(r, e);
2553 }
2554
2555 #[simd_test(enable = "neon")]
2556 unsafe fn test_vpmax_u16() {
2557 let a = u16x4::new(1, 2, 3, 4);
2558 let b = u16x4::new(0, 3, 2, 5);
2559 let e = u16x4::new(2, 4, 3, 5);
2560 let r: u16x4 = transmute(vpmax_u16(transmute(a), transmute(b)));
2561 assert_eq!(r, e);
2562 }
2563
2564 #[simd_test(enable = "neon")]
2565 unsafe fn test_vpmax_u32() {
2566 let a = u32x2::new(1, 2);
2567 let b = u32x2::new(0, 3);
2568 let e = u32x2::new(2, 3);
2569 let r: u32x2 = transmute(vpmax_u32(transmute(a), transmute(b)));
2570 assert_eq!(r, e);
2571 }
2572
2573 #[simd_test(enable = "neon")]
2574 unsafe fn test_vpmax_f32() {
2575 let a = f32x2::new(1., -2.);
2576 let b = f32x2::new(0., 3.);
2577 let e = f32x2::new(1., 3.);
2578 let r: f32x2 = transmute(vpmax_f32(transmute(a), transmute(b)));
2579 assert_eq!(r, e);
2580 }
2581
2582 #[simd_test(enable = "neon")]
2583 unsafe fn test_vand_s8() {
2584 test_bit_s8(|i, j| vand_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
2585 }
2586 #[simd_test(enable = "neon")]
2587 unsafe fn test_vandq_s8() {
2588 testq_bit_s8(|i, j| vandq_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
2589 }
2590 #[simd_test(enable = "neon")]
2591 unsafe fn test_vand_s16() {
2592 test_bit_s16(|i, j| vand_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
2593 }
2594 #[simd_test(enable = "neon")]
2595 unsafe fn test_vandq_s16() {
2596 testq_bit_s16(|i, j| vandq_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
2597 }
2598 #[simd_test(enable = "neon")]
2599 unsafe fn test_vand_s32() {
2600 test_bit_s32(|i, j| vand_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
2601 }
2602 #[simd_test(enable = "neon")]
2603 unsafe fn test_vandq_s32() {
2604 testq_bit_s32(|i, j| vandq_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
2605 }
2606 #[simd_test(enable = "neon")]
2607 unsafe fn test_vand_s64() {
2608 test_bit_s64(|i, j| vand_s64(i, j), |a: i64, b: i64| -> i64 { a & b });
2609 }
2610 #[simd_test(enable = "neon")]
2611 unsafe fn test_vandq_s64() {
2612 testq_bit_s64(|i, j| vandq_s64(i, j), |a: i64, b: i64| -> i64 { a & b });
2613 }
2614
2615 #[simd_test(enable = "neon")]
2616 unsafe fn test_vand_u8() {
2617 test_bit_u8(|i, j| vand_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
2618 }
2619 #[simd_test(enable = "neon")]
2620 unsafe fn test_vandq_u8() {
2621 testq_bit_u8(|i, j| vandq_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
2622 }
2623 #[simd_test(enable = "neon")]
2624 unsafe fn test_vand_u16() {
2625 test_bit_u16(|i, j| vand_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
2626 }
2627 #[simd_test(enable = "neon")]
2628 unsafe fn test_vandq_u16() {
2629 testq_bit_u16(|i, j| vandq_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
2630 }
2631 #[simd_test(enable = "neon")]
2632 unsafe fn test_vand_u32() {
2633 test_bit_u32(|i, j| vand_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
2634 }
2635 #[simd_test(enable = "neon")]
2636 unsafe fn test_vandq_u32() {
2637 testq_bit_u32(|i, j| vandq_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
2638 }
2639 #[simd_test(enable = "neon")]
2640 unsafe fn test_vand_u64() {
2641 test_bit_u64(|i, j| vand_u64(i, j), |a: u64, b: u64| -> u64 { a & b });
2642 }
2643 #[simd_test(enable = "neon")]
2644 unsafe fn test_vandq_u64() {
2645 testq_bit_u64(|i, j| vandq_u64(i, j), |a: u64, b: u64| -> u64 { a & b });
2646 }
2647
2648 #[simd_test(enable = "neon")]
2649 unsafe fn test_vorr_s8() {
2650 test_bit_s8(|i, j| vorr_s8(i, j), |a: i8, b: i8| -> i8 { a | b });
2651 }
2652 #[simd_test(enable = "neon")]
2653 unsafe fn test_vorrq_s8() {
2654 testq_bit_s8(|i, j| vorrq_s8(i, j), |a: i8, b: i8| -> i8 { a | b });
2655 }
2656 #[simd_test(enable = "neon")]
2657 unsafe fn test_vorr_s16() {
2658 test_bit_s16(|i, j| vorr_s16(i, j), |a: i16, b: i16| -> i16 { a | b });
2659 }
2660 #[simd_test(enable = "neon")]
2661 unsafe fn test_vorrq_s16() {
2662 testq_bit_s16(|i, j| vorrq_s16(i, j), |a: i16, b: i16| -> i16 { a | b });
2663 }
2664 #[simd_test(enable = "neon")]
2665 unsafe fn test_vorr_s32() {
2666 test_bit_s32(|i, j| vorr_s32(i, j), |a: i32, b: i32| -> i32 { a | b });
2667 }
2668 #[simd_test(enable = "neon")]
2669 unsafe fn test_vorrq_s32() {
2670 testq_bit_s32(|i, j| vorrq_s32(i, j), |a: i32, b: i32| -> i32 { a | b });
2671 }
2672 #[simd_test(enable = "neon")]
2673 unsafe fn test_vorr_s64() {
2674 test_bit_s64(|i, j| vorr_s64(i, j), |a: i64, b: i64| -> i64 { a | b });
2675 }
2676 #[simd_test(enable = "neon")]
2677 unsafe fn test_vorrq_s64() {
2678 testq_bit_s64(|i, j| vorrq_s64(i, j), |a: i64, b: i64| -> i64 { a | b });
2679 }
2680
2681 #[simd_test(enable = "neon")]
2682 unsafe fn test_vorr_u8() {
2683 test_bit_u8(|i, j| vorr_u8(i, j), |a: u8, b: u8| -> u8 { a | b });
2684 }
2685 #[simd_test(enable = "neon")]
2686 unsafe fn test_vorrq_u8() {
2687 testq_bit_u8(|i, j| vorrq_u8(i, j), |a: u8, b: u8| -> u8 { a | b });
2688 }
2689 #[simd_test(enable = "neon")]
2690 unsafe fn test_vorr_u16() {
2691 test_bit_u16(|i, j| vorr_u16(i, j), |a: u16, b: u16| -> u16 { a | b });
2692 }
2693 #[simd_test(enable = "neon")]
2694 unsafe fn test_vorrq_u16() {
2695 testq_bit_u16(|i, j| vorrq_u16(i, j), |a: u16, b: u16| -> u16 { a | b });
2696 }
2697 #[simd_test(enable = "neon")]
2698 unsafe fn test_vorr_u32() {
2699 test_bit_u32(|i, j| vorr_u32(i, j), |a: u32, b: u32| -> u32 { a | b });
2700 }
2701 #[simd_test(enable = "neon")]
2702 unsafe fn test_vorrq_u32() {
2703 testq_bit_u32(|i, j| vorrq_u32(i, j), |a: u32, b: u32| -> u32 { a | b });
2704 }
2705 #[simd_test(enable = "neon")]
2706 unsafe fn test_vorr_u64() {
2707 test_bit_u64(|i, j| vorr_u64(i, j), |a: u64, b: u64| -> u64 { a | b });
2708 }
2709 #[simd_test(enable = "neon")]
2710 unsafe fn test_vorrq_u64() {
2711 testq_bit_u64(|i, j| vorrq_u64(i, j), |a: u64, b: u64| -> u64 { a | b });
2712 }
2713
2714 #[simd_test(enable = "neon")]
2715 unsafe fn test_veor_s8() {
2716 test_bit_s8(|i, j| veor_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b });
2717 }
2718 #[simd_test(enable = "neon")]
2719 unsafe fn test_veorq_s8() {
2720 testq_bit_s8(|i, j| veorq_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b });
2721 }
2722 #[simd_test(enable = "neon")]
2723 unsafe fn test_veor_s16() {
2724 test_bit_s16(|i, j| veor_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b });
2725 }
2726 #[simd_test(enable = "neon")]
2727 unsafe fn test_veorq_s16() {
2728 testq_bit_s16(|i, j| veorq_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b });
2729 }
2730 #[simd_test(enable = "neon")]
2731 unsafe fn test_veor_s32() {
2732 test_bit_s32(|i, j| veor_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b });
2733 }
2734 #[simd_test(enable = "neon")]
2735 unsafe fn test_veorq_s32() {
2736 testq_bit_s32(|i, j| veorq_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b });
2737 }
2738 #[simd_test(enable = "neon")]
2739 unsafe fn test_veor_s64() {
2740 test_bit_s64(|i, j| veor_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b });
2741 }
2742 #[simd_test(enable = "neon")]
2743 unsafe fn test_veorq_s64() {
2744 testq_bit_s64(|i, j| veorq_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b });
2745 }
2746
2747 #[simd_test(enable = "neon")]
2748 unsafe fn test_veor_u8() {
2749 test_bit_u8(|i, j| veor_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b });
2750 }
2751 #[simd_test(enable = "neon")]
2752 unsafe fn test_veorq_u8() {
2753 testq_bit_u8(|i, j| veorq_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b });
2754 }
2755 #[simd_test(enable = "neon")]
2756 unsafe fn test_veor_u16() {
2757 test_bit_u16(|i, j| veor_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b });
2758 }
2759 #[simd_test(enable = "neon")]
2760 unsafe fn test_veorq_u16() {
2761 testq_bit_u16(|i, j| veorq_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b });
2762 }
2763 #[simd_test(enable = "neon")]
2764 unsafe fn test_veor_u32() {
2765 test_bit_u32(|i, j| veor_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b });
2766 }
2767 #[simd_test(enable = "neon")]
2768 unsafe fn test_veorq_u32() {
2769 testq_bit_u32(|i, j| veorq_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b });
2770 }
2771 #[simd_test(enable = "neon")]
2772 unsafe fn test_veor_u64() {
2773 test_bit_u64(|i, j| veor_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b });
2774 }
2775 #[simd_test(enable = "neon")]
2776 unsafe fn test_veorq_u64() {
2777 testq_bit_u64(|i, j| veorq_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b });
2778 }
2779
2780 #[simd_test(enable = "neon")]
2781 unsafe fn test_vceq_s8() {
2782 test_cmp_s8(
2783 |i, j| vceq_s8(i, j),
2784 |a: i8, b: i8| -> u8 {
2785 if a == b {
2786 0xFF
2787 } else {
2788 0
2789 }
2790 },
2791 );
2792 }
2793 #[simd_test(enable = "neon")]
2794 unsafe fn test_vceqq_s8() {
2795 testq_cmp_s8(
2796 |i, j| vceqq_s8(i, j),
2797 |a: i8, b: i8| -> u8 {
2798 if a == b {
2799 0xFF
2800 } else {
2801 0
2802 }
2803 },
2804 );
2805 }
2806 #[simd_test(enable = "neon")]
2807 unsafe fn test_vceq_s16() {
2808 test_cmp_s16(
2809 |i, j| vceq_s16(i, j),
2810 |a: i16, b: i16| -> u16 {
2811 if a == b {
2812 0xFFFF
2813 } else {
2814 0
2815 }
2816 },
2817 );
2818 }
2819 #[simd_test(enable = "neon")]
2820 unsafe fn test_vceqq_s16() {
2821 testq_cmp_s16(
2822 |i, j| vceqq_s16(i, j),
2823 |a: i16, b: i16| -> u16 {
2824 if a == b {
2825 0xFFFF
2826 } else {
2827 0
2828 }
2829 },
2830 );
2831 }
2832 #[simd_test(enable = "neon")]
2833 unsafe fn test_vceq_s32() {
2834 test_cmp_s32(
2835 |i, j| vceq_s32(i, j),
2836 |a: i32, b: i32| -> u32 {
2837 if a == b {
2838 0xFFFFFFFF
2839 } else {
2840 0
2841 }
2842 },
2843 );
2844 }
2845 #[simd_test(enable = "neon")]
2846 unsafe fn test_vceqq_s32() {
2847 testq_cmp_s32(
2848 |i, j| vceqq_s32(i, j),
2849 |a: i32, b: i32| -> u32 {
2850 if a == b {
2851 0xFFFFFFFF
2852 } else {
2853 0
2854 }
2855 },
2856 );
2857 }
2858
2859 #[simd_test(enable = "neon")]
2860 unsafe fn test_vceq_u8() {
2861 test_cmp_u8(
2862 |i, j| vceq_u8(i, j),
2863 |a: u8, b: u8| -> u8 {
2864 if a == b {
2865 0xFF
2866 } else {
2867 0
2868 }
2869 },
2870 );
2871 }
2872 #[simd_test(enable = "neon")]
2873 unsafe fn test_vceqq_u8() {
2874 testq_cmp_u8(
2875 |i, j| vceqq_u8(i, j),
2876 |a: u8, b: u8| -> u8 {
2877 if a == b {
2878 0xFF
2879 } else {
2880 0
2881 }
2882 },
2883 );
2884 }
2885 #[simd_test(enable = "neon")]
2886 unsafe fn test_vceq_u16() {
2887 test_cmp_u16(
2888 |i, j| vceq_u16(i, j),
2889 |a: u16, b: u16| -> u16 {
2890 if a == b {
2891 0xFFFF
2892 } else {
2893 0
2894 }
2895 },
2896 );
2897 }
2898 #[simd_test(enable = "neon")]
2899 unsafe fn test_vceqq_u16() {
2900 testq_cmp_u16(
2901 |i, j| vceqq_u16(i, j),
2902 |a: u16, b: u16| -> u16 {
2903 if a == b {
2904 0xFFFF
2905 } else {
2906 0
2907 }
2908 },
2909 );
2910 }
2911 #[simd_test(enable = "neon")]
2912 unsafe fn test_vceq_u32() {
2913 test_cmp_u32(
2914 |i, j| vceq_u32(i, j),
2915 |a: u32, b: u32| -> u32 {
2916 if a == b {
2917 0xFFFFFFFF
2918 } else {
2919 0
2920 }
2921 },
2922 );
2923 }
2924 #[simd_test(enable = "neon")]
2925 unsafe fn test_vceqq_u32() {
2926 testq_cmp_u32(
2927 |i, j| vceqq_u32(i, j),
2928 |a: u32, b: u32| -> u32 {
2929 if a == b {
2930 0xFFFFFFFF
2931 } else {
2932 0
2933 }
2934 },
2935 );
2936 }
2937
2938 #[simd_test(enable = "neon")]
2939 unsafe fn test_vceq_f32() {
2940 test_cmp_f32(
2941 |i, j| vcge_f32(i, j),
2942 |a: f32, b: f32| -> u32 {
2943 if a == b {
2944 0xFFFFFFFF
2945 } else {
2946 0
2947 }
2948 },
2949 );
2950 }
2951 #[simd_test(enable = "neon")]
2952 unsafe fn test_vceqq_f32() {
2953 testq_cmp_f32(
2954 |i, j| vcgeq_f32(i, j),
2955 |a: f32, b: f32| -> u32 {
2956 if a == b {
2957 0xFFFFFFFF
2958 } else {
2959 0
2960 }
2961 },
2962 );
2963 }
2964
2965 #[simd_test(enable = "neon")]
2966 unsafe fn test_vcgt_s8() {
2967 test_cmp_s8(
2968 |i, j| vcgt_s8(i, j),
2969 |a: i8, b: i8| -> u8 {
2970 if a > b {
2971 0xFF
2972 } else {
2973 0
2974 }
2975 },
2976 );
2977 }
2978 #[simd_test(enable = "neon")]
2979 unsafe fn test_vcgtq_s8() {
2980 testq_cmp_s8(
2981 |i, j| vcgtq_s8(i, j),
2982 |a: i8, b: i8| -> u8 {
2983 if a > b {
2984 0xFF
2985 } else {
2986 0
2987 }
2988 },
2989 );
2990 }
2991 #[simd_test(enable = "neon")]
2992 unsafe fn test_vcgt_s16() {
2993 test_cmp_s16(
2994 |i, j| vcgt_s16(i, j),
2995 |a: i16, b: i16| -> u16 {
2996 if a > b {
2997 0xFFFF
2998 } else {
2999 0
3000 }
3001 },
3002 );
3003 }
3004 #[simd_test(enable = "neon")]
3005 unsafe fn test_vcgtq_s16() {
3006 testq_cmp_s16(
3007 |i, j| vcgtq_s16(i, j),
3008 |a: i16, b: i16| -> u16 {
3009 if a > b {
3010 0xFFFF
3011 } else {
3012 0
3013 }
3014 },
3015 );
3016 }
3017 #[simd_test(enable = "neon")]
3018 unsafe fn test_vcgt_s32() {
3019 test_cmp_s32(
3020 |i, j| vcgt_s32(i, j),
3021 |a: i32, b: i32| -> u32 {
3022 if a > b {
3023 0xFFFFFFFF
3024 } else {
3025 0
3026 }
3027 },
3028 );
3029 }
3030 #[simd_test(enable = "neon")]
3031 unsafe fn test_vcgtq_s32() {
3032 testq_cmp_s32(
3033 |i, j| vcgtq_s32(i, j),
3034 |a: i32, b: i32| -> u32 {
3035 if a > b {
3036 0xFFFFFFFF
3037 } else {
3038 0
3039 }
3040 },
3041 );
3042 }
3043
3044 #[simd_test(enable = "neon")]
3045 unsafe fn test_vcgt_u8() {
3046 test_cmp_u8(
3047 |i, j| vcgt_u8(i, j),
3048 |a: u8, b: u8| -> u8 {
3049 if a > b {
3050 0xFF
3051 } else {
3052 0
3053 }
3054 },
3055 );
3056 }
3057 #[simd_test(enable = "neon")]
3058 unsafe fn test_vcgtq_u8() {
3059 testq_cmp_u8(
3060 |i, j| vcgtq_u8(i, j),
3061 |a: u8, b: u8| -> u8 {
3062 if a > b {
3063 0xFF
3064 } else {
3065 0
3066 }
3067 },
3068 );
3069 }
3070 #[simd_test(enable = "neon")]
3071 unsafe fn test_vcgt_u16() {
3072 test_cmp_u16(
3073 |i, j| vcgt_u16(i, j),
3074 |a: u16, b: u16| -> u16 {
3075 if a > b {
3076 0xFFFF
3077 } else {
3078 0
3079 }
3080 },
3081 );
3082 }
3083 #[simd_test(enable = "neon")]
3084 unsafe fn test_vcgtq_u16() {
3085 testq_cmp_u16(
3086 |i, j| vcgtq_u16(i, j),
3087 |a: u16, b: u16| -> u16 {
3088 if a > b {
3089 0xFFFF
3090 } else {
3091 0
3092 }
3093 },
3094 );
3095 }
3096 #[simd_test(enable = "neon")]
3097 unsafe fn test_vcgt_u32() {
3098 test_cmp_u32(
3099 |i, j| vcgt_u32(i, j),
3100 |a: u32, b: u32| -> u32 {
3101 if a > b {
3102 0xFFFFFF
3103 } else {
3104 0
3105 }
3106 },
3107 );
3108 }
3109 #[simd_test(enable = "neon")]
3110 unsafe fn test_vcgtq_u32() {
3111 testq_cmp_u32(
3112 |i, j| vcgtq_u32(i, j),
3113 |a: u32, b: u32| -> u32 {
3114 if a > b {
3115 0xFFFFFFFF
3116 } else {
3117 0
3118 }
3119 },
3120 );
3121 }
3122
3123 #[simd_test(enable = "neon")]
3124 unsafe fn test_vcgt_f32() {
3125 test_cmp_f32(
3126 |i, j| vcgt_f32(i, j),
3127 |a: f32, b: f32| -> u32 {
3128 if a > b {
3129 0xFFFFFFFF
3130 } else {
3131 0
3132 }
3133 },
3134 );
3135 }
3136 #[simd_test(enable = "neon")]
3137 unsafe fn test_vcgtq_f32() {
3138 testq_cmp_f32(
3139 |i, j| vcgtq_f32(i, j),
3140 |a: f32, b: f32| -> u32 {
3141 if a > b {
3142 0xFFFFFFFF
3143 } else {
3144 0
3145 }
3146 },
3147 );
3148 }
3149
3150 #[simd_test(enable = "neon")]
3151 unsafe fn test_vclt_s8() {
3152 test_cmp_s8(
3153 |i, j| vclt_s8(i, j),
3154 |a: i8, b: i8| -> u8 {
3155 if a < b {
3156 0xFF
3157 } else {
3158 0
3159 }
3160 },
3161 );
3162 }
3163 #[simd_test(enable = "neon")]
3164 unsafe fn test_vcltq_s8() {
3165 testq_cmp_s8(
3166 |i, j| vcltq_s8(i, j),
3167 |a: i8, b: i8| -> u8 {
3168 if a < b {
3169 0xFF
3170 } else {
3171 0
3172 }
3173 },
3174 );
3175 }
3176 #[simd_test(enable = "neon")]
3177 unsafe fn test_vclt_s16() {
3178 test_cmp_s16(
3179 |i, j| vclt_s16(i, j),
3180 |a: i16, b: i16| -> u16 {
3181 if a < b {
3182 0xFFFF
3183 } else {
3184 0
3185 }
3186 },
3187 );
3188 }
3189 #[simd_test(enable = "neon")]
3190 unsafe fn test_vcltq_s16() {
3191 testq_cmp_s16(
3192 |i, j| vcltq_s16(i, j),
3193 |a: i16, b: i16| -> u16 {
3194 if a < b {
3195 0xFFFF
3196 } else {
3197 0
3198 }
3199 },
3200 );
3201 }
3202 #[simd_test(enable = "neon")]
3203 unsafe fn test_vclt_s32() {
3204 test_cmp_s32(
3205 |i, j| vclt_s32(i, j),
3206 |a: i32, b: i32| -> u32 {
3207 if a < b {
3208 0xFFFFFFFF
3209 } else {
3210 0
3211 }
3212 },
3213 );
3214 }
3215 #[simd_test(enable = "neon")]
3216 unsafe fn test_vcltq_s32() {
3217 testq_cmp_s32(
3218 |i, j| vcltq_s32(i, j),
3219 |a: i32, b: i32| -> u32 {
3220 if a < b {
3221 0xFFFFFFFF
3222 } else {
3223 0
3224 }
3225 },
3226 );
3227 }
3228
3229 #[simd_test(enable = "neon")]
3230 unsafe fn test_vclt_u8() {
3231 test_cmp_u8(
3232 |i, j| vclt_u8(i, j),
3233 |a: u8, b: u8| -> u8 {
3234 if a < b {
3235 0xFF
3236 } else {
3237 0
3238 }
3239 },
3240 );
3241 }
3242 #[simd_test(enable = "neon")]
3243 unsafe fn test_vcltq_u8() {
3244 testq_cmp_u8(
3245 |i, j| vcltq_u8(i, j),
3246 |a: u8, b: u8| -> u8 {
3247 if a < b {
3248 0xFF
3249 } else {
3250 0
3251 }
3252 },
3253 );
3254 }
3255 #[simd_test(enable = "neon")]
3256 unsafe fn test_vclt_u16() {
3257 test_cmp_u16(
3258 |i, j| vclt_u16(i, j),
3259 |a: u16, b: u16| -> u16 {
3260 if a < b {
3261 0xFFFF
3262 } else {
3263 0
3264 }
3265 },
3266 );
3267 }
3268 #[simd_test(enable = "neon")]
3269 unsafe fn test_vcltq_u16() {
3270 testq_cmp_u16(
3271 |i, j| vcltq_u16(i, j),
3272 |a: u16, b: u16| -> u16 {
3273 if a < b {
3274 0xFFFF
3275 } else {
3276 0
3277 }
3278 },
3279 );
3280 }
3281 #[simd_test(enable = "neon")]
3282 unsafe fn test_vclt_u32() {
3283 test_cmp_u32(
3284 |i, j| vclt_u32(i, j),
3285 |a: u32, b: u32| -> u32 {
3286 if a < b {
3287 0xFFFFFF
3288 } else {
3289 0
3290 }
3291 },
3292 );
3293 }
3294 #[simd_test(enable = "neon")]
3295 unsafe fn test_vcltq_u32() {
3296 testq_cmp_u32(
3297 |i, j| vcltq_u32(i, j),
3298 |a: u32, b: u32| -> u32 {
3299 if a < b {
3300 0xFFFFFFFF
3301 } else {
3302 0
3303 }
3304 },
3305 );
3306 }
3307
3308 #[simd_test(enable = "neon")]
3309 unsafe fn test_vclt_f32() {
3310 test_cmp_f32(
3311 |i, j| vclt_f32(i, j),
3312 |a: f32, b: f32| -> u32 {
3313 if a < b {
3314 0xFFFFFFFF
3315 } else {
3316 0
3317 }
3318 },
3319 );
3320 }
3321 #[simd_test(enable = "neon")]
3322 unsafe fn test_vcltq_f32() {
3323 testq_cmp_f32(
3324 |i, j| vcltq_f32(i, j),
3325 |a: f32, b: f32| -> u32 {
3326 if a < b {
3327 0xFFFFFFFF
3328 } else {
3329 0
3330 }
3331 },
3332 );
3333 }
3334
3335 #[simd_test(enable = "neon")]
3336 unsafe fn test_vcle_s8() {
3337 test_cmp_s8(
3338 |i, j| vcle_s8(i, j),
3339 |a: i8, b: i8| -> u8 {
3340 if a <= b {
3341 0xFF
3342 } else {
3343 0
3344 }
3345 },
3346 );
3347 }
3348 #[simd_test(enable = "neon")]
3349 unsafe fn test_vcleq_s8() {
3350 testq_cmp_s8(
3351 |i, j| vcleq_s8(i, j),
3352 |a: i8, b: i8| -> u8 {
3353 if a <= b {
3354 0xFF
3355 } else {
3356 0
3357 }
3358 },
3359 );
3360 }
3361 #[simd_test(enable = "neon")]
3362 unsafe fn test_vcle_s16() {
3363 test_cmp_s16(
3364 |i, j| vcle_s16(i, j),
3365 |a: i16, b: i16| -> u16 {
3366 if a <= b {
3367 0xFFFF
3368 } else {
3369 0
3370 }
3371 },
3372 );
3373 }
3374 #[simd_test(enable = "neon")]
3375 unsafe fn test_vcleq_s16() {
3376 testq_cmp_s16(
3377 |i, j| vcleq_s16(i, j),
3378 |a: i16, b: i16| -> u16 {
3379 if a <= b {
3380 0xFFFF
3381 } else {
3382 0
3383 }
3384 },
3385 );
3386 }
3387 #[simd_test(enable = "neon")]
3388 unsafe fn test_vcle_s32() {
3389 test_cmp_s32(
3390 |i, j| vcle_s32(i, j),
3391 |a: i32, b: i32| -> u32 {
3392 if a <= b {
3393 0xFFFFFFFF
3394 } else {
3395 0
3396 }
3397 },
3398 );
3399 }
3400 #[simd_test(enable = "neon")]
3401 unsafe fn test_vcleq_s32() {
3402 testq_cmp_s32(
3403 |i, j| vcleq_s32(i, j),
3404 |a: i32, b: i32| -> u32 {
3405 if a <= b {
3406 0xFFFFFFFF
3407 } else {
3408 0
3409 }
3410 },
3411 );
3412 }
3413
3414 #[simd_test(enable = "neon")]
3415 unsafe fn test_vcle_u8() {
3416 test_cmp_u8(
3417 |i, j| vcle_u8(i, j),
3418 |a: u8, b: u8| -> u8 {
3419 if a <= b {
3420 0xFF
3421 } else {
3422 0
3423 }
3424 },
3425 );
3426 }
3427 #[simd_test(enable = "neon")]
3428 unsafe fn test_vcleq_u8() {
3429 testq_cmp_u8(
3430 |i, j| vcleq_u8(i, j),
3431 |a: u8, b: u8| -> u8 {
3432 if a <= b {
3433 0xFF
3434 } else {
3435 0
3436 }
3437 },
3438 );
3439 }
3440 #[simd_test(enable = "neon")]
3441 unsafe fn test_vcle_u16() {
3442 test_cmp_u16(
3443 |i, j| vcle_u16(i, j),
3444 |a: u16, b: u16| -> u16 {
3445 if a <= b {
3446 0xFFFF
3447 } else {
3448 0
3449 }
3450 },
3451 );
3452 }
3453 #[simd_test(enable = "neon")]
3454 unsafe fn test_vcleq_u16() {
3455 testq_cmp_u16(
3456 |i, j| vcleq_u16(i, j),
3457 |a: u16, b: u16| -> u16 {
3458 if a <= b {
3459 0xFFFF
3460 } else {
3461 0
3462 }
3463 },
3464 );
3465 }
3466 #[simd_test(enable = "neon")]
3467 unsafe fn test_vcle_u32() {
3468 test_cmp_u32(
3469 |i, j| vcle_u32(i, j),
3470 |a: u32, b: u32| -> u32 {
3471 if a <= b {
3472 0xFFFFFFFF
3473 } else {
3474 0
3475 }
3476 },
3477 );
3478 }
3479 #[simd_test(enable = "neon")]
3480 unsafe fn test_vcleq_u32() {
3481 testq_cmp_u32(
3482 |i, j| vcleq_u32(i, j),
3483 |a: u32, b: u32| -> u32 {
3484 if a <= b {
3485 0xFFFFFFFF
3486 } else {
3487 0
3488 }
3489 },
3490 );
3491 }
3492
3493 #[simd_test(enable = "neon")]
3494 unsafe fn test_vcle_f32() {
3495 test_cmp_f32(
3496 |i, j| vcle_f32(i, j),
3497 |a: f32, b: f32| -> u32 {
3498 if a <= b {
3499 0xFFFFFFFF
3500 } else {
3501 0
3502 }
3503 },
3504 );
3505 }
3506 #[simd_test(enable = "neon")]
3507 unsafe fn test_vcleq_f32() {
3508 testq_cmp_f32(
3509 |i, j| vcleq_f32(i, j),
3510 |a: f32, b: f32| -> u32 {
3511 if a <= b {
3512 0xFFFFFFFF
3513 } else {
3514 0
3515 }
3516 },
3517 );
3518 }
3519
3520 #[simd_test(enable = "neon")]
3521 unsafe fn test_vcge_s8() {
3522 test_cmp_s8(
3523 |i, j| vcge_s8(i, j),
3524 |a: i8, b: i8| -> u8 {
3525 if a >= b {
3526 0xFF
3527 } else {
3528 0
3529 }
3530 },
3531 );
3532 }
3533 #[simd_test(enable = "neon")]
3534 unsafe fn test_vcgeq_s8() {
3535 testq_cmp_s8(
3536 |i, j| vcgeq_s8(i, j),
3537 |a: i8, b: i8| -> u8 {
3538 if a >= b {
3539 0xFF
3540 } else {
3541 0
3542 }
3543 },
3544 );
3545 }
3546 #[simd_test(enable = "neon")]
3547 unsafe fn test_vcge_s16() {
3548 test_cmp_s16(
3549 |i, j| vcge_s16(i, j),
3550 |a: i16, b: i16| -> u16 {
3551 if a >= b {
3552 0xFFFF
3553 } else {
3554 0
3555 }
3556 },
3557 );
3558 }
3559 #[simd_test(enable = "neon")]
3560 unsafe fn test_vcgeq_s16() {
3561 testq_cmp_s16(
3562 |i, j| vcgeq_s16(i, j),
3563 |a: i16, b: i16| -> u16 {
3564 if a >= b {
3565 0xFFFF
3566 } else {
3567 0
3568 }
3569 },
3570 );
3571 }
3572 #[simd_test(enable = "neon")]
3573 unsafe fn test_vcge_s32() {
3574 test_cmp_s32(
3575 |i, j| vcge_s32(i, j),
3576 |a: i32, b: i32| -> u32 {
3577 if a >= b {
3578 0xFFFFFFFF
3579 } else {
3580 0
3581 }
3582 },
3583 );
3584 }
3585 #[simd_test(enable = "neon")]
3586 unsafe fn test_vcgeq_s32() {
3587 testq_cmp_s32(
3588 |i, j| vcgeq_s32(i, j),
3589 |a: i32, b: i32| -> u32 {
3590 if a >= b {
3591 0xFFFFFFFF
3592 } else {
3593 0
3594 }
3595 },
3596 );
3597 }
3598
3599 #[simd_test(enable = "neon")]
3600 unsafe fn test_vcge_u8() {
3601 test_cmp_u8(
3602 |i, j| vcge_u8(i, j),
3603 |a: u8, b: u8| -> u8 {
3604 if a >= b {
3605 0xFF
3606 } else {
3607 0
3608 }
3609 },
3610 );
3611 }
3612 #[simd_test(enable = "neon")]
3613 unsafe fn test_vcgeq_u8() {
3614 testq_cmp_u8(
3615 |i, j| vcgeq_u8(i, j),
3616 |a: u8, b: u8| -> u8 {
3617 if a >= b {
3618 0xFF
3619 } else {
3620 0
3621 }
3622 },
3623 );
3624 }
3625 #[simd_test(enable = "neon")]
3626 unsafe fn test_vcge_u16() {
3627 test_cmp_u16(
3628 |i, j| vcge_u16(i, j),
3629 |a: u16, b: u16| -> u16 {
3630 if a >= b {
3631 0xFFFF
3632 } else {
3633 0
3634 }
3635 },
3636 );
3637 }
3638 #[simd_test(enable = "neon")]
3639 unsafe fn test_vcgeq_u16() {
3640 testq_cmp_u16(
3641 |i, j| vcgeq_u16(i, j),
3642 |a: u16, b: u16| -> u16 {
3643 if a >= b {
3644 0xFFFF
3645 } else {
3646 0
3647 }
3648 },
3649 );
3650 }
3651 #[simd_test(enable = "neon")]
3652 unsafe fn test_vcge_u32() {
3653 test_cmp_u32(
3654 |i, j| vcge_u32(i, j),
3655 |a: u32, b: u32| -> u32 {
3656 if a >= b {
3657 0xFFFFFFFF
3658 } else {
3659 0
3660 }
3661 },
3662 );
3663 }
3664 #[simd_test(enable = "neon")]
3665 unsafe fn test_vcgeq_u32() {
3666 testq_cmp_u32(
3667 |i, j| vcgeq_u32(i, j),
3668 |a: u32, b: u32| -> u32 {
3669 if a >= b {
3670 0xFFFFFFFF
3671 } else {
3672 0
3673 }
3674 },
3675 );
3676 }
3677
3678 #[simd_test(enable = "neon")]
3679 unsafe fn test_vcge_f32() {
3680 test_cmp_f32(
3681 |i, j| vcge_f32(i, j),
3682 |a: f32, b: f32| -> u32 {
3683 if a >= b {
3684 0xFFFFFFFF
3685 } else {
3686 0
3687 }
3688 },
3689 );
3690 }
3691 #[simd_test(enable = "neon")]
3692 unsafe fn test_vcgeq_f32() {
3693 testq_cmp_f32(
3694 |i, j| vcgeq_f32(i, j),
3695 |a: f32, b: f32| -> u32 {
3696 if a >= b {
3697 0xFFFFFFFF
3698 } else {
3699 0
3700 }
3701 },
3702 );
3703 }
3704
3705 #[simd_test(enable = "neon")]
3706 unsafe fn test_vqsub_s8() {
3707 test_ari_s8(
3708 |i, j| vqsub_s8(i, j),
3709 |a: i8, b: i8| -> i8 { a.saturating_sub(b) },
3710 );
3711 }
3712 #[simd_test(enable = "neon")]
3713 unsafe fn test_vqsubq_s8() {
3714 testq_ari_s8(
3715 |i, j| vqsubq_s8(i, j),
3716 |a: i8, b: i8| -> i8 { a.saturating_sub(b) },
3717 );
3718 }
3719 #[simd_test(enable = "neon")]
3720 unsafe fn test_vqsub_s16() {
3721 test_ari_s16(
3722 |i, j| vqsub_s16(i, j),
3723 |a: i16, b: i16| -> i16 { a.saturating_sub(b) },
3724 );
3725 }
3726 #[simd_test(enable = "neon")]
3727 unsafe fn test_vqsubq_s16() {
3728 testq_ari_s16(
3729 |i, j| vqsubq_s16(i, j),
3730 |a: i16, b: i16| -> i16 { a.saturating_sub(b) },
3731 );
3732 }
3733 #[simd_test(enable = "neon")]
3734 unsafe fn test_vqsub_s32() {
3735 test_ari_s32(
3736 |i, j| vqsub_s32(i, j),
3737 |a: i32, b: i32| -> i32 { a.saturating_sub(b) },
3738 );
3739 }
3740 #[simd_test(enable = "neon")]
3741 unsafe fn test_vqsubq_s32() {
3742 testq_ari_s32(
3743 |i, j| vqsubq_s32(i, j),
3744 |a: i32, b: i32| -> i32 { a.saturating_sub(b) },
3745 );
3746 }
3747
3748 #[simd_test(enable = "neon")]
3749 unsafe fn test_vqsub_u8() {
3750 test_ari_u8(
3751 |i, j| vqsub_u8(i, j),
3752 |a: u8, b: u8| -> u8 { a.saturating_sub(b) },
3753 );
3754 }
3755 #[simd_test(enable = "neon")]
3756 unsafe fn test_vqsubq_u8() {
3757 testq_ari_u8(
3758 |i, j| vqsubq_u8(i, j),
3759 |a: u8, b: u8| -> u8 { a.saturating_sub(b) },
3760 );
3761 }
3762 #[simd_test(enable = "neon")]
3763 unsafe fn test_vqsub_u16() {
3764 test_ari_u16(
3765 |i, j| vqsub_u16(i, j),
3766 |a: u16, b: u16| -> u16 { a.saturating_sub(b) },
3767 );
3768 }
3769 #[simd_test(enable = "neon")]
3770 unsafe fn test_vqsubq_u16() {
3771 testq_ari_u16(
3772 |i, j| vqsubq_u16(i, j),
3773 |a: u16, b: u16| -> u16 { a.saturating_sub(b) },
3774 );
3775 }
3776 #[simd_test(enable = "neon")]
3777 unsafe fn test_vqsub_u32() {
3778 test_ari_u32(
3779 |i, j| vqsub_u32(i, j),
3780 |a: u32, b: u32| -> u32 { a.saturating_sub(b) },
3781 );
3782 }
3783 #[simd_test(enable = "neon")]
3784 unsafe fn test_vqsubq_u32() {
3785 testq_ari_u32(
3786 |i, j| vqsubq_u32(i, j),
3787 |a: u32, b: u32| -> u32 { a.saturating_sub(b) },
3788 );
3789 }
3790
3791 #[simd_test(enable = "neon")]
3792 unsafe fn test_vhadd_s8() {
3793 test_ari_s8(|i, j| vhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
3794 }
3795 #[simd_test(enable = "neon")]
3796 unsafe fn test_vhaddq_s8() {
3797 testq_ari_s8(|i, j| vhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
3798 }
3799 #[simd_test(enable = "neon")]
3800 unsafe fn test_vhadd_s16() {
3801 test_ari_s16(|i, j| vhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
3802 }
3803 #[simd_test(enable = "neon")]
3804 unsafe fn test_vhaddq_s16() {
3805 testq_ari_s16(|i, j| vhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
3806 }
3807 #[simd_test(enable = "neon")]
3808 unsafe fn test_vhadd_s32() {
3809 test_ari_s32(|i, j| vhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
3810 }
3811 #[simd_test(enable = "neon")]
3812 unsafe fn test_vhaddq_s32() {
3813 testq_ari_s32(|i, j| vhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
3814 }
3815
3816 #[simd_test(enable = "neon")]
3817 unsafe fn test_vhadd_u8() {
3818 test_ari_u8(|i, j| vhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
3819 }
3820 #[simd_test(enable = "neon")]
3821 unsafe fn test_vhaddq_u8() {
3822 testq_ari_u8(|i, j| vhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
3823 }
3824 #[simd_test(enable = "neon")]
3825 unsafe fn test_vhadd_u16() {
3826 test_ari_u16(|i, j| vhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
3827 }
3828 #[simd_test(enable = "neon")]
3829 unsafe fn test_vhaddq_u16() {
3830 testq_ari_u16(|i, j| vhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
3831 }
3832 #[simd_test(enable = "neon")]
3833 unsafe fn test_vhadd_u32() {
3834 test_ari_u32(|i, j| vhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
3835 }
3836 #[simd_test(enable = "neon")]
3837 unsafe fn test_vhaddq_u32() {
3838 testq_ari_u32(|i, j| vhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
3839 }
3840
3841 #[simd_test(enable = "neon")]
3842 unsafe fn test_vrhadd_s8() {
3843 test_ari_s8(|i, j| vrhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
3844 }
3845 #[simd_test(enable = "neon")]
3846 unsafe fn test_vrhaddq_s8() {
3847 testq_ari_s8(|i, j| vrhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
3848 }
3849 #[simd_test(enable = "neon")]
3850 unsafe fn test_vrhadd_s16() {
3851 test_ari_s16(|i, j| vrhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
3852 }
3853 #[simd_test(enable = "neon")]
3854 unsafe fn test_vrhaddq_s16() {
3855 testq_ari_s16(|i, j| vrhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
3856 }
3857 #[simd_test(enable = "neon")]
3858 unsafe fn test_vrhadd_s32() {
3859 test_ari_s32(|i, j| vrhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
3860 }
3861 #[simd_test(enable = "neon")]
3862 unsafe fn test_vrhaddq_s32() {
3863 testq_ari_s32(|i, j| vrhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
3864 }
3865
3866 #[simd_test(enable = "neon")]
3867 unsafe fn test_vrhadd_u8() {
3868 test_ari_u8(|i, j| vrhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
3869 }
3870 #[simd_test(enable = "neon")]
3871 unsafe fn test_vrhaddq_u8() {
3872 testq_ari_u8(|i, j| vrhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
3873 }
3874 #[simd_test(enable = "neon")]
3875 unsafe fn test_vrhadd_u16() {
3876 test_ari_u16(|i, j| vrhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
3877 }
3878 #[simd_test(enable = "neon")]
3879 unsafe fn test_vrhaddq_u16() {
3880 testq_ari_u16(|i, j| vrhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
3881 }
3882 #[simd_test(enable = "neon")]
3883 unsafe fn test_vrhadd_u32() {
3884 test_ari_u32(|i, j| vrhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
3885 }
3886 #[simd_test(enable = "neon")]
3887 unsafe fn test_vrhaddq_u32() {
3888 testq_ari_u32(|i, j| vrhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
3889 }
3890
3891 #[simd_test(enable = "neon")]
3892 unsafe fn test_vqadd_s8() {
3893 test_ari_s8(
3894 |i, j| vqadd_s8(i, j),
3895 |a: i8, b: i8| -> i8 { a.saturating_add(b) },
3896 );
3897 }
3898 #[simd_test(enable = "neon")]
3899 unsafe fn test_vqaddq_s8() {
3900 testq_ari_s8(
3901 |i, j| vqaddq_s8(i, j),
3902 |a: i8, b: i8| -> i8 { a.saturating_add(b) },
3903 );
3904 }
3905 #[simd_test(enable = "neon")]
3906 unsafe fn test_vqadd_s16() {
3907 test_ari_s16(
3908 |i, j| vqadd_s16(i, j),
3909 |a: i16, b: i16| -> i16 { a.saturating_add(b) },
3910 );
3911 }
3912 #[simd_test(enable = "neon")]
3913 unsafe fn test_vqaddq_s16() {
3914 testq_ari_s16(
3915 |i, j| vqaddq_s16(i, j),
3916 |a: i16, b: i16| -> i16 { a.saturating_add(b) },
3917 );
3918 }
3919 #[simd_test(enable = "neon")]
3920 unsafe fn test_vqadd_s32() {
3921 test_ari_s32(
3922 |i, j| vqadd_s32(i, j),
3923 |a: i32, b: i32| -> i32 { a.saturating_add(b) },
3924 );
3925 }
3926 #[simd_test(enable = "neon")]
3927 unsafe fn test_vqaddq_s32() {
3928 testq_ari_s32(
3929 |i, j| vqaddq_s32(i, j),
3930 |a: i32, b: i32| -> i32 { a.saturating_add(b) },
3931 );
3932 }
3933
3934 #[simd_test(enable = "neon")]
3935 unsafe fn test_vqadd_u8() {
3936 test_ari_u8(
3937 |i, j| vqadd_u8(i, j),
3938 |a: u8, b: u8| -> u8 { a.saturating_add(b) },
3939 );
3940 }
3941 #[simd_test(enable = "neon")]
3942 unsafe fn test_vqaddq_u8() {
3943 testq_ari_u8(
3944 |i, j| vqaddq_u8(i, j),
3945 |a: u8, b: u8| -> u8 { a.saturating_add(b) },
3946 );
3947 }
3948 #[simd_test(enable = "neon")]
3949 unsafe fn test_vqadd_u16() {
3950 test_ari_u16(
3951 |i, j| vqadd_u16(i, j),
3952 |a: u16, b: u16| -> u16 { a.saturating_add(b) },
3953 );
3954 }
3955 #[simd_test(enable = "neon")]
3956 unsafe fn test_vqaddq_u16() {
3957 testq_ari_u16(
3958 |i, j| vqaddq_u16(i, j),
3959 |a: u16, b: u16| -> u16 { a.saturating_add(b) },
3960 );
3961 }
3962 #[simd_test(enable = "neon")]
3963 unsafe fn test_vqadd_u32() {
3964 test_ari_u32(
3965 |i, j| vqadd_u32(i, j),
3966 |a: u32, b: u32| -> u32 { a.saturating_add(b) },
3967 );
3968 }
3969 #[simd_test(enable = "neon")]
3970 unsafe fn test_vqaddq_u32() {
3971 testq_ari_u32(
3972 |i, j| vqaddq_u32(i, j),
3973 |a: u32, b: u32| -> u32 { a.saturating_add(b) },
3974 );
3975 }
3976
3977 #[simd_test(enable = "neon")]
3978 unsafe fn test_vmul_s8() {
3979 test_ari_s8(
3980 |i, j| vmul_s8(i, j),
3981 |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 },
3982 );
3983 }
3984 #[simd_test(enable = "neon")]
3985 unsafe fn test_vmulq_s8() {
3986 testq_ari_s8(
3987 |i, j| vmulq_s8(i, j),
3988 |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 },
3989 );
3990 }
3991 #[simd_test(enable = "neon")]
3992 unsafe fn test_vmul_s16() {
3993 test_ari_s16(
3994 |i, j| vmul_s16(i, j),
3995 |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 },
3996 );
3997 }
3998 #[simd_test(enable = "neon")]
3999 unsafe fn test_vmulq_s16() {
4000 testq_ari_s16(
4001 |i, j| vmulq_s16(i, j),
4002 |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 },
4003 );
4004 }
4005 #[simd_test(enable = "neon")]
4006 unsafe fn test_vmul_s32() {
4007 test_ari_s32(
4008 |i, j| vmul_s32(i, j),
4009 |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 },
4010 );
4011 }
4012 #[simd_test(enable = "neon")]
4013 unsafe fn test_vmulq_s32() {
4014 testq_ari_s32(
4015 |i, j| vmulq_s32(i, j),
4016 |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 },
4017 );
4018 }
4019
4020 #[simd_test(enable = "neon")]
4021 unsafe fn test_vmul_u8() {
4022 test_ari_u8(
4023 |i, j| vmul_u8(i, j),
4024 |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 },
4025 );
4026 }
4027 #[simd_test(enable = "neon")]
4028 unsafe fn test_vmulq_u8() {
4029 testq_ari_u8(
4030 |i, j| vmulq_u8(i, j),
4031 |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 },
4032 );
4033 }
4034 #[simd_test(enable = "neon")]
4035 unsafe fn test_vmul_u16() {
4036 test_ari_u16(
4037 |i, j| vmul_u16(i, j),
4038 |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 },
4039 );
4040 }
4041 #[simd_test(enable = "neon")]
4042 unsafe fn test_vmulq_u16() {
4043 testq_ari_u16(
4044 |i, j| vmulq_u16(i, j),
4045 |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 },
4046 );
4047 }
4048 #[simd_test(enable = "neon")]
4049 unsafe fn test_vmul_u32() {
4050 test_ari_u32(
4051 |i, j| vmul_u32(i, j),
4052 |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 },
4053 );
4054 }
4055 #[simd_test(enable = "neon")]
4056 unsafe fn test_vmulq_u32() {
4057 testq_ari_u32(
4058 |i, j| vmulq_u32(i, j),
4059 |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 },
4060 );
4061 }
4062
4063 #[simd_test(enable = "neon")]
4064 unsafe fn test_vmul_f32() {
4065 test_ari_f32(|i, j| vmul_f32(i, j), |a: f32, b: f32| -> f32 { a * b });
4066 }
4067 #[simd_test(enable = "neon")]
4068 unsafe fn test_vmulq_f32() {
4069 testq_ari_f32(|i, j| vmulq_f32(i, j), |a: f32, b: f32| -> f32 { a * b });
4070 }
4071
4072 #[simd_test(enable = "neon")]
4073 unsafe fn test_vsub_s8() {
4074 test_ari_s8(|i, j| vsub_s8(i, j), |a: i8, b: i8| -> i8 { a - b });
4075 }
4076 #[simd_test(enable = "neon")]
4077 unsafe fn test_vsubq_s8() {
4078 testq_ari_s8(|i, j| vsubq_s8(i, j), |a: i8, b: i8| -> i8 { a - b });
4079 }
4080 #[simd_test(enable = "neon")]
4081 unsafe fn test_vsub_s16() {
4082 test_ari_s16(|i, j| vsub_s16(i, j), |a: i16, b: i16| -> i16 { a - b });
4083 }
4084 #[simd_test(enable = "neon")]
4085 unsafe fn test_vsubq_s16() {
4086 testq_ari_s16(|i, j| vsubq_s16(i, j), |a: i16, b: i16| -> i16 { a - b });
4087 }
4088 #[simd_test(enable = "neon")]
4089 unsafe fn test_vsub_s32() {
4090 test_ari_s32(|i, j| vsub_s32(i, j), |a: i32, b: i32| -> i32 { a - b });
4091 }
4092 #[simd_test(enable = "neon")]
4093 unsafe fn test_vsubq_s32() {
4094 testq_ari_s32(|i, j| vsubq_s32(i, j), |a: i32, b: i32| -> i32 { a - b });
4095 }
4096
4097 #[simd_test(enable = "neon")]
4098 unsafe fn test_vsub_u8() {
4099 test_ari_u8(|i, j| vsub_u8(i, j), |a: u8, b: u8| -> u8 { a - b });
4100 }
4101 #[simd_test(enable = "neon")]
4102 unsafe fn test_vsubq_u8() {
4103 testq_ari_u8(|i, j| vsubq_u8(i, j), |a: u8, b: u8| -> u8 { a - b });
4104 }
4105 #[simd_test(enable = "neon")]
4106 unsafe fn test_vsub_u16() {
4107 test_ari_u16(|i, j| vsub_u16(i, j), |a: u16, b: u16| -> u16 { a - b });
4108 }
4109 #[simd_test(enable = "neon")]
4110 unsafe fn test_vsubq_u16() {
4111 testq_ari_u16(|i, j| vsubq_u16(i, j), |a: u16, b: u16| -> u16 { a - b });
4112 }
4113 #[simd_test(enable = "neon")]
4114 unsafe fn test_vsub_u32() {
4115 test_ari_u32(|i, j| vsub_u32(i, j), |a: u32, b: u32| -> u32 { a - b });
4116 }
4117 #[simd_test(enable = "neon")]
4118 unsafe fn test_vsubq_u32() {
4119 testq_ari_u32(|i, j| vsubq_u32(i, j), |a: u32, b: u32| -> u32 { a - b });
4120 }
4121
4122 #[simd_test(enable = "neon")]
4123 unsafe fn test_vsub_f32() {
4124 test_ari_f32(|i, j| vsub_f32(i, j), |a: f32, b: f32| -> f32 { a - b });
4125 }
4126 #[simd_test(enable = "neon")]
4127 unsafe fn test_vsubq_f32() {
4128 testq_ari_f32(|i, j| vsubq_f32(i, j), |a: f32, b: f32| -> f32 { a - b });
4129 }
4130
4131 #[simd_test(enable = "neon")]
4132 unsafe fn test_vhsub_s8() {
4133 test_ari_s8(
4134 |i, j| vhsub_s8(i, j),
4135 |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 },
4136 );
4137 }
4138 #[simd_test(enable = "neon")]
4139 unsafe fn test_vhsubq_s8() {
4140 testq_ari_s8(
4141 |i, j| vhsubq_s8(i, j),
4142 |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 },
4143 );
4144 }
4145 #[simd_test(enable = "neon")]
4146 unsafe fn test_vhsub_s16() {
4147 test_ari_s16(
4148 |i, j| vhsub_s16(i, j),
4149 |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 },
4150 );
4151 }
4152 #[simd_test(enable = "neon")]
4153 unsafe fn test_vhsubq_s16() {
4154 testq_ari_s16(
4155 |i, j| vhsubq_s16(i, j),
4156 |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 },
4157 );
4158 }
4159 #[simd_test(enable = "neon")]
4160 unsafe fn test_vhsub_s32() {
4161 test_ari_s32(
4162 |i, j| vhsub_s32(i, j),
4163 |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 },
4164 );
4165 }
4166 #[simd_test(enable = "neon")]
4167 unsafe fn test_vhsubq_s32() {
4168 testq_ari_s32(
4169 |i, j| vhsubq_s32(i, j),
4170 |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 },
4171 );
4172 }
4173
4174 #[simd_test(enable = "neon")]
4175 unsafe fn test_vhsub_u8() {
4176 test_ari_u8(
4177 |i, j| vhsub_u8(i, j),
4178 |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 },
4179 );
4180 }
4181 #[simd_test(enable = "neon")]
4182 unsafe fn test_vhsubq_u8() {
4183 testq_ari_u8(
4184 |i, j| vhsubq_u8(i, j),
4185 |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 },
4186 );
4187 }
4188 #[simd_test(enable = "neon")]
4189 unsafe fn test_vhsub_u16() {
4190 test_ari_u16(
4191 |i, j| vhsub_u16(i, j),
4192 |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 },
4193 );
4194 }
4195 #[simd_test(enable = "neon")]
4196 unsafe fn test_vhsubq_u16() {
4197 testq_ari_u16(
4198 |i, j| vhsubq_u16(i, j),
4199 |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 },
4200 );
4201 }
4202 #[simd_test(enable = "neon")]
4203 unsafe fn test_vhsub_u32() {
4204 test_ari_u32(
4205 |i, j| vhsub_u32(i, j),
4206 |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 },
4207 );
4208 }
4209 #[simd_test(enable = "neon")]
4210 unsafe fn test_vhsubq_u32() {
4211 testq_ari_u32(
4212 |i, j| vhsubq_u32(i, j),
4213 |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 },
4214 );
4215 }
4216
4217 #[simd_test(enable = "neon")]
4218 unsafe fn test_vreinterpretq_s8_u8() {
4219 let a = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
4220 let r: u8x16 = transmute(vreinterpretq_s8_u8(transmute(a)));
4221 let e = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
4222 assert_eq!(r, e)
4223 }
4224 #[simd_test(enable = "neon")]
4225 unsafe fn test_vreinterpretq_u16_u8() {
4226 let a = u16x8::new(
4227 0x01_00, 0x03_02, 0x05_04, 0x07_06, 0x09_08, 0x0B_0A, 0x0D_0C, 0x0F_0E,
4228 );
4229 let r: u8x16 = transmute(vreinterpretq_u16_u8(transmute(a)));
4230 let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4231 assert_eq!(r, e)
4232 }
4233 #[simd_test(enable = "neon")]
4234 unsafe fn test_vreinterpretq_u32_u8() {
4235 let a = u32x4::new(0x03_02_01_00, 0x07_06_05_04, 0x0B_0A_09_08, 0x0F_0E_0D_0C);
4236 let r: u8x16 = transmute(vreinterpretq_u32_u8(transmute(a)));
4237 let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4238 assert_eq!(r, e)
4239 }
4240 #[simd_test(enable = "neon")]
4241 unsafe fn test_vreinterpretq_u64_u8() {
4242 let a: u64x2 = u64x2::new(0x07_06_05_04_03_02_01_00, 0x0F_0E_0D_0C_0B_0A_09_08);
4243 let r: u8x16 = transmute(vreinterpretq_u64_u8(transmute(a)));
4244 let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4245 assert_eq!(r, e)
4246 }
4247 #[simd_test(enable = "neon")]
4248 unsafe fn test_vreinterpretq_u8_s8() {
4249 let a = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
4250 let r: i8x16 = transmute(vreinterpretq_u8_s8(transmute(a)));
4251 let e = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
4252 assert_eq!(r, e)
4253 }
3dfed10e
XL
4254 #[simd_test(enable = "neon")]
4255 unsafe fn test_vabs_s8() {
4256 let a = i8x8::new(-1, 0, 1, -2, 0, 2, -128, 127);
4257 let r: i8x8 = transmute(vabs_s8(transmute(a)));
4258 let e = i8x8::new(1, 0, 1, 2, 0, 2, -128, 127);
4259 assert_eq!(r, e);
4260 }
4261 #[simd_test(enable = "neon")]
4262 unsafe fn test_vabsq_s8() {
4263 let a = i8x16::new(-1, 0, 1, -2, 0, 2, -128, 127, -1, 0, 1, -2, 0, 2, -128, 127);
4264 let r: i8x16 = transmute(vabsq_s8(transmute(a)));
4265 let e = i8x16::new(1, 0, 1, 2, 0, 2, -128, 127, 1, 0, 1, 2, 0, 2, -128, 127);
4266 assert_eq!(r, e);
4267 }
4268 #[simd_test(enable = "neon")]
4269 unsafe fn test_vabs_s16() {
4270 let a = i16x4::new(-1, 0, i16::MIN, i16::MAX);
4271 let r: i16x4 = transmute(vabs_s16(transmute(a)));
4272 let e = i16x4::new(1, 0, i16::MIN, i16::MAX);
4273 assert_eq!(r, e);
4274 }
4275 #[simd_test(enable = "neon")]
4276 unsafe fn test_vabsq_s16() {
4277 let a = i16x8::new(-1, 0, i16::MIN, i16::MAX, -1, 0, i16::MIN, i16::MAX);
4278 let r: i16x8 = transmute(vabsq_s16(transmute(a)));
4279 let e = i16x8::new(1, 0, i16::MIN, i16::MAX, 1, 0, i16::MIN, i16::MAX);
4280 assert_eq!(r, e);
4281 }
4282 #[simd_test(enable = "neon")]
4283 unsafe fn test_vabs_s32() {
4284 let a = i32x2::new(i32::MIN, i32::MIN + 1);
4285 let r: i32x2 = transmute(vabs_s32(transmute(a)));
4286 let e = i32x2::new(i32::MIN, i32::MAX);
4287 assert_eq!(r, e);
4288 }
4289 #[simd_test(enable = "neon")]
4290 unsafe fn test_vabsq_s32() {
4291 let a = i32x4::new(i32::MIN, i32::MIN + 1, 0, -1);
4292 let r: i32x4 = transmute(vabsq_s32(transmute(a)));
4293 let e = i32x4::new(i32::MIN, i32::MAX, 0, 1);
4294 assert_eq!(r, e);
4295 }
1b1a35ee
XL
4296 #[simd_test(enable = "neon")]
4297 unsafe fn test_vpadd_s16() {
4298 let a = i16x4::new(1, 2, 3, 4);
4299 let b = i16x4::new(0, -1, -2, -3);
4300 let r: i16x4 = transmute(vpadd_s16(transmute(a), transmute(b)));
4301 let e = i16x4::new(3, 7, -1, -5);
4302 assert_eq!(r, e);
4303 }
4304 #[simd_test(enable = "neon")]
4305 unsafe fn test_vpadd_s32() {
4306 let a = i32x2::new(1, 2);
4307 let b = i32x2::new(0, -1);
4308 let r: i32x2 = transmute(vpadd_s32(transmute(a), transmute(b)));
4309 let e = i32x2::new(3, -1);
4310 assert_eq!(r, e);
4311 }
4312 #[simd_test(enable = "neon")]
4313 unsafe fn test_vpadd_s8() {
4314 let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
4315 let b = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7);
4316 let r: i8x8 = transmute(vpadd_s8(transmute(a), transmute(b)));
4317 let e = i8x8::new(3, 7, 11, 15, -1, -5, -9, -13);
4318 assert_eq!(r, e);
4319 }
4320 #[simd_test(enable = "neon")]
4321 unsafe fn test_vpadd_u16() {
4322 let a = u16x4::new(1, 2, 3, 4);
4323 let b = u16x4::new(30, 31, 32, 33);
4324 let r: u16x4 = transmute(vpadd_u16(transmute(a), transmute(b)));
4325 let e = u16x4::new(3, 7, 61, 65);
4326 assert_eq!(r, e);
4327 }
4328 #[simd_test(enable = "neon")]
4329 unsafe fn test_vpadd_u32() {
4330 let a = u32x2::new(1, 2);
4331 let b = u32x2::new(30, 31);
4332 let r: u32x2 = transmute(vpadd_u32(transmute(a), transmute(b)));
4333 let e = u32x2::new(3, 61);
4334 assert_eq!(r, e);
4335 }
4336 #[simd_test(enable = "neon")]
4337 unsafe fn test_vpadd_u8() {
4338 let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
4339 let b = u8x8::new(30, 31, 32, 33, 34, 35, 36, 37);
4340 let r: u8x8 = transmute(vpadd_u8(transmute(a), transmute(b)));
4341 let e = u8x8::new(3, 7, 11, 15, 61, 65, 69, 73);
4342 assert_eq!(r, e);
4343 }
4344 #[simd_test(enable = "neon")]
4345 unsafe fn test_vminq_f32() {
4346 let a = f32x4::new(1., -2., 3., -4.);
4347 let b = f32x4::new(0., 3., 2., 8.);
4348 let e = f32x4::new(0., -2., 2., -4.);
4349 let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b)));
4350 assert_eq!(r, e);
4351 }
4352 #[simd_test(enable = "neon")]
4353 unsafe fn test_vmaxq_f32() {
4354 let a = f32x4::new(1., -2., 3., -4.);
4355 let b = f32x4::new(0., 3., 2., 8.);
4356 let e = f32x4::new(1., 3., 3., 8.);
4357 let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
4358 assert_eq!(r, e);
4359 }
ba9703b0
XL
4360}
4361
4362#[cfg(test)]
4363#[cfg(target_endian = "little")]
4364mod table_lookup_tests;