]> git.proxmox.com Git - rustc.git/blob - src/stdsimd/coresimd/arm/neon.rs
New upstream version 1.26.0+dfsg1
[rustc.git] / src / stdsimd / coresimd / arm / neon.rs
1 //! ARMv7 NEON intrinsics
2
3 #[cfg(test)]
4 use stdsimd_test::assert_instr;
5
6 use coresimd::simd_llvm::simd_add;
7 use coresimd::simd::*;
8 use convert::{From, Into};
9
10 /// Vector add.
11 #[inline]
12 #[target_feature(enable = "neon")]
13 #[cfg_attr(test, assert_instr(add))]
14 pub unsafe fn vadd_s8(a: i8x8, b: i8x8) -> i8x8 {
15 simd_add(a, b)
16 }
17
18 /// Vector add.
19 #[inline]
20 #[target_feature(enable = "neon")]
21 #[cfg_attr(test, assert_instr(add))]
22 pub unsafe fn vaddq_s8(a: i8x16, b: i8x16) -> i8x16 {
23 simd_add(a, b)
24 }
25
26 /// Vector add.
27 #[inline]
28 #[target_feature(enable = "neon")]
29 #[cfg_attr(test, assert_instr(add))]
30 pub unsafe fn vadd_s16(a: i16x4, b: i16x4) -> i16x4 {
31 simd_add(a, b)
32 }
33
34 /// Vector add.
35 #[inline]
36 #[target_feature(enable = "neon")]
37 #[cfg_attr(test, assert_instr(add))]
38 pub unsafe fn vaddq_s16(a: i16x8, b: i16x8) -> i16x8 {
39 simd_add(a, b)
40 }
41
42 /// Vector add.
43 #[inline]
44 #[target_feature(enable = "neon")]
45 #[cfg_attr(test, assert_instr(add))]
46 pub unsafe fn vadd_s32(a: i32x2, b: i32x2) -> i32x2 {
47 simd_add(a, b)
48 }
49
50 /// Vector add.
51 #[inline]
52 #[target_feature(enable = "neon")]
53 #[cfg_attr(test, assert_instr(add))]
54 pub unsafe fn vaddq_s32(a: i32x4, b: i32x4) -> i32x4 {
55 simd_add(a, b)
56 }
57
58 /// Vector add.
59 #[inline]
60 #[target_feature(enable = "neon")]
61 #[cfg_attr(test, assert_instr(add))]
62 pub unsafe fn vaddq_s64(a: i64x2, b: i64x2) -> i64x2 {
63 simd_add(a, b)
64 }
65
66 /// Vector add.
67 #[inline]
68 #[target_feature(enable = "neon")]
69 #[cfg_attr(test, assert_instr(add))]
70 pub unsafe fn vadd_u8(a: u8x8, b: u8x8) -> u8x8 {
71 simd_add(a, b)
72 }
73
74 /// Vector add.
75 #[inline]
76 #[target_feature(enable = "neon")]
77 #[cfg_attr(test, assert_instr(add))]
78 pub unsafe fn vaddq_u8(a: u8x16, b: u8x16) -> u8x16 {
79 simd_add(a, b)
80 }
81
82 /// Vector add.
83 #[inline]
84 #[target_feature(enable = "neon")]
85 #[cfg_attr(test, assert_instr(add))]
86 pub unsafe fn vadd_u16(a: u16x4, b: u16x4) -> u16x4 {
87 simd_add(a, b)
88 }
89
90 /// Vector add.
91 #[inline]
92 #[target_feature(enable = "neon")]
93 #[cfg_attr(test, assert_instr(add))]
94 pub unsafe fn vaddq_u16(a: u16x8, b: u16x8) -> u16x8 {
95 simd_add(a, b)
96 }
97
98 /// Vector add.
99 #[inline]
100 #[target_feature(enable = "neon")]
101 #[cfg_attr(test, assert_instr(add))]
102 pub unsafe fn vadd_u32(a: u32x2, b: u32x2) -> u32x2 {
103 simd_add(a, b)
104 }
105
106 /// Vector add.
107 #[inline]
108 #[target_feature(enable = "neon")]
109 #[cfg_attr(test, assert_instr(add))]
110 pub unsafe fn vaddq_u32(a: u32x4, b: u32x4) -> u32x4 {
111 simd_add(a, b)
112 }
113
114 /// Vector add.
115 #[inline]
116 #[target_feature(enable = "neon")]
117 #[cfg_attr(test, assert_instr(add))]
118 pub unsafe fn vaddq_u64(a: u64x2, b: u64x2) -> u64x2 {
119 simd_add(a, b)
120 }
121
122 /// Vector add.
123 #[inline]
124 #[target_feature(enable = "neon")]
125 #[cfg_attr(test, assert_instr(fadd))]
126 pub unsafe fn vadd_f32(a: f32x2, b: f32x2) -> f32x2 {
127 simd_add(a, b)
128 }
129
130 /// Vector add.
131 #[inline]
132 #[target_feature(enable = "neon")]
133 #[cfg_attr(test, assert_instr(fadd))]
134 pub unsafe fn vaddq_f32(a: f32x4, b: f32x4) -> f32x4 {
135 simd_add(a, b)
136 }
137
138 /// Vector long add.
139 #[inline]
140 #[target_feature(enable = "neon")]
141 #[cfg_attr(test, assert_instr(saddl))]
142 pub unsafe fn vaddl_s8(a: i8x8, b: i8x8) -> i16x8 {
143 let a = i16x8::from(a);
144 let b = i16x8::from(b);
145 simd_add(a, b)
146 }
147
148 /// Vector long add.
149 #[inline]
150 #[target_feature(enable = "neon")]
151 #[cfg_attr(test, assert_instr(saddl))]
152 pub unsafe fn vaddl_s16(a: i16x4, b: i16x4) -> i32x4 {
153 let a = i32x4::from(a);
154 let b = i32x4::from(b);
155 simd_add(a, b)
156 }
157
158 /// Vector long add.
159 #[inline]
160 #[target_feature(enable = "neon")]
161 #[cfg_attr(test, assert_instr(saddl))]
162 pub unsafe fn vaddl_s32(a: i32x2, b: i32x2) -> i64x2 {
163 let a = i64x2::from(a);
164 let b = i64x2::from(b);
165 simd_add(a, b)
166 }
167
168 /// Vector long add.
169 #[inline]
170 #[target_feature(enable = "neon")]
171 #[cfg_attr(test, assert_instr(uaddl))]
172 pub unsafe fn vaddl_u8(a: u8x8, b: u8x8) -> u16x8 {
173 let a = u16x8::from(a);
174 let b = u16x8::from(b);
175 simd_add(a, b)
176 }
177
178 /// Vector long add.
179 #[inline]
180 #[target_feature(enable = "neon")]
181 #[cfg_attr(test, assert_instr(uaddl))]
182 pub unsafe fn vaddl_u16(a: u16x4, b: u16x4) -> u32x4 {
183 let a = u32x4::from(a);
184 let b = u32x4::from(b);
185 simd_add(a, b)
186 }
187
188 /// Vector long add.
189 #[inline]
190 #[target_feature(enable = "neon")]
191 #[cfg_attr(test, assert_instr(uaddl))]
192 pub unsafe fn vaddl_u32(a: u32x2, b: u32x2) -> u64x2 {
193 let a = u64x2::from(a);
194 let b = u64x2::from(b);
195 simd_add(a, b)
196 }
197
198 #[allow(improper_ctypes)]
199 extern "C" {
200 // The Reference says this instruction is
201 // supported in v7/A32/A64:
202 #[link_name = "llvm.aarch64.neon.frsqrte.v2f32"]
203 fn frsqrte_v2f32(a: f32x2) -> f32x2;
204 }
205
206 /// Reciprocal square-root estimate.
207 #[inline]
208 #[target_feature(enable = "neon")]
209 #[cfg_attr(test, assert_instr(frsqrte))]
210 pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
211 frsqrte_v2f32(a)
212 }
213
214 /// Vector narrow integer.
215 #[inline]
216 #[target_feature(enable = "neon")]
217 #[cfg_attr(test, assert_instr(xtn))]
218 pub unsafe fn vmovn_s16(a: i16x8) -> i8x8 {
219 a.into()
220 }
221
222 /// Vector narrow integer.
223 #[inline]
224 #[target_feature(enable = "neon")]
225 #[cfg_attr(test, assert_instr(xtn))]
226 pub unsafe fn vmovn_s32(a: i32x4) -> i16x4 {
227 a.into()
228 }
229
230 /// Vector narrow integer.
231 #[inline]
232 #[target_feature(enable = "neon")]
233 #[cfg_attr(test, assert_instr(xtn))]
234 pub unsafe fn vmovn_s64(a: i64x2) -> i32x2 {
235 a.into()
236 }
237
238 /// Vector narrow integer.
239 #[inline]
240 #[target_feature(enable = "neon")]
241 #[cfg_attr(test, assert_instr(xtn))]
242 pub unsafe fn vmovn_u16(a: u16x8) -> u8x8 {
243 a.into()
244 }
245
246 /// Vector narrow integer.
247 #[inline]
248 #[target_feature(enable = "neon")]
249 #[cfg_attr(test, assert_instr(xtn))]
250 pub unsafe fn vmovn_u32(a: u32x4) -> u16x4 {
251 a.into()
252 }
253
254 /// Vector narrow integer.
255 #[inline]
256 #[target_feature(enable = "neon")]
257 #[cfg_attr(test, assert_instr(xtn))]
258 pub unsafe fn vmovn_u64(a: u64x2) -> u32x2 {
259 a.into()
260 }
261
262 /// Vector long move.
263 #[inline]
264 #[target_feature(enable = "neon")]
265 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
266 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
267 pub unsafe fn vmovl_s8(a: i8x8) -> i16x8 {
268 a.into()
269 }
270
271 /// Vector long move.
272 #[inline]
273 #[target_feature(enable = "neon")]
274 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
275 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
276 pub unsafe fn vmovl_s16(a: i16x4) -> i32x4 {
277 a.into()
278 }
279
280 /// Vector long move.
281 #[inline]
282 #[target_feature(enable = "neon")]
283 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
284 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))]
285 pub unsafe fn vmovl_s32(a: i32x2) -> i64x2 {
286 a.into()
287 }
288
289 /// Vector long move.
290 #[inline]
291 #[target_feature(enable = "neon")]
292 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
293 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
294 pub unsafe fn vmovl_u8(a: u8x8) -> u16x8 {
295 a.into()
296 }
297
298 /// Vector long move.
299 #[inline]
300 #[target_feature(enable = "neon")]
301 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
302 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
303 pub unsafe fn vmovl_u16(a: u16x4) -> u32x4 {
304 a.into()
305 }
306
307 /// Vector long move.
308 #[inline]
309 #[target_feature(enable = "neon")]
310 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(sshll))]
311 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))]
312 pub unsafe fn vmovl_u32(a: u32x2) -> u64x2 {
313 a.into()
314 }
315
316 #[cfg(test)]
317 mod tests {
318 use stdsimd_test::simd_test;
319 use simd::*;
320 use coresimd::arm::neon;
321
322 #[simd_test = "neon"]
323 unsafe fn vadd_s8() {
324 let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
325 let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
326 let e = i8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
327 let r = neon::vadd_s8(a, b);
328 assert_eq!(r, e);
329 }
330
331 #[simd_test = "neon"]
332 unsafe fn vaddq_s8() {
333 let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
334 let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
335 let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
336 let r = neon::vaddq_s8(a, b);
337 assert_eq!(r, e);
338 }
339
340 #[simd_test = "neon"]
341 unsafe fn vadd_s16() {
342 let a = i16x4::new(1, 2, 3, 4);
343 let b = i16x4::new(8, 7, 6, 5);
344 let e = i16x4::new(9, 9, 9, 9);
345 let r = neon::vadd_s16(a, b);
346 assert_eq!(r, e);
347 }
348
349 #[simd_test = "neon"]
350 unsafe fn vaddq_s16() {
351 let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
352 let b = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
353 let e = i16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
354 let r = neon::vaddq_s16(a, b);
355 assert_eq!(r, e);
356 }
357
358 #[simd_test = "neon"]
359 unsafe fn vadd_s32() {
360 let a = i32x2::new(1, 2);
361 let b = i32x2::new(8, 7);
362 let e = i32x2::new(9, 9);
363 let r = neon::vadd_s32(a, b);
364 assert_eq!(r, e);
365 }
366
367 #[simd_test = "neon"]
368 unsafe fn vaddq_s32() {
369 let a = i32x4::new(1, 2, 3, 4);
370 let b = i32x4::new(8, 7, 6, 5);
371 let e = i32x4::new(9, 9, 9, 9);
372 let r = neon::vaddq_s32(a, b);
373 assert_eq!(r, e);
374 }
375
376 #[simd_test = "neon"]
377 unsafe fn vadd_u8() {
378 let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
379 let b = u8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
380 let e = u8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
381 let r = neon::vadd_u8(a, b);
382 assert_eq!(r, e);
383 }
384
385 #[simd_test = "neon"]
386 unsafe fn vaddq_u8() {
387 let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
388 let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
389 let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
390 let r = neon::vaddq_u8(a, b);
391 assert_eq!(r, e);
392 }
393
394 #[simd_test = "neon"]
395 unsafe fn vadd_u16() {
396 let a = u16x4::new(1, 2, 3, 4);
397 let b = u16x4::new(8, 7, 6, 5);
398 let e = u16x4::new(9, 9, 9, 9);
399 let r = neon::vadd_u16(a, b);
400 assert_eq!(r, e);
401 }
402
403 #[simd_test = "neon"]
404 unsafe fn vaddq_u16() {
405 let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
406 let b = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
407 let e = u16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
408 let r = neon::vaddq_u16(a, b);
409 assert_eq!(r, e);
410 }
411
412 #[simd_test = "neon"]
413 unsafe fn vadd_u32() {
414 let a = u32x2::new(1, 2);
415 let b = u32x2::new(8, 7);
416 let e = u32x2::new(9, 9);
417 let r = neon::vadd_u32(a, b);
418 assert_eq!(r, e);
419 }
420
421 #[simd_test = "neon"]
422 unsafe fn vaddq_u32() {
423 let a = u32x4::new(1, 2, 3, 4);
424 let b = u32x4::new(8, 7, 6, 5);
425 let e = u32x4::new(9, 9, 9, 9);
426 let r = neon::vaddq_u32(a, b);
427 assert_eq!(r, e);
428 }
429
430 #[simd_test = "neon"]
431 unsafe fn vadd_f32() {
432 let a = f32x2::new(1., 2.);
433 let b = f32x2::new(8., 7.);
434 let e = f32x2::new(9., 9.);
435 let r = neon::vadd_f32(a, b);
436 assert_eq!(r, e);
437 }
438
439 #[simd_test = "neon"]
440 unsafe fn vaddq_f32() {
441 let a = f32x4::new(1., 2., 3., 4.);
442 let b = f32x4::new(8., 7., 6., 5.);
443 let e = f32x4::new(9., 9., 9., 9.);
444 let r = neon::vaddq_f32(a, b);
445 assert_eq!(r, e);
446 }
447
448 #[simd_test = "neon"]
449 unsafe fn vaddl_s8() {
450 let v = ::std::i8::MAX;
451 let a = i8x8::new(v, v, v, v, v, v, v, v);
452 let v = 2 * (v as i16);
453 let e = i16x8::new(v, v, v, v, v, v, v, v);
454 let r = neon::vaddl_s8(a, a);
455 assert_eq!(r, e);
456 }
457
458 #[simd_test = "neon"]
459 unsafe fn vaddl_s16() {
460 let v = ::std::i16::MAX;
461 let a = i16x4::new(v, v, v, v);
462 let v = 2 * (v as i32);
463 let e = i32x4::new(v, v, v, v);
464 let r = neon::vaddl_s16(a, a);
465 assert_eq!(r, e);
466 }
467
468 #[simd_test = "neon"]
469 unsafe fn vaddl_s32() {
470 let v = ::std::i32::MAX;
471 let a = i32x2::new(v, v);
472 let v = 2 * (v as i64);
473 let e = i64x2::new(v, v);
474 let r = neon::vaddl_s32(a, a);
475 assert_eq!(r, e);
476 }
477
478 #[simd_test = "neon"]
479 unsafe fn vaddl_u8() {
480 let v = ::std::u8::MAX;
481 let a = u8x8::new(v, v, v, v, v, v, v, v);
482 let v = 2 * (v as u16);
483 let e = u16x8::new(v, v, v, v, v, v, v, v);
484 let r = neon::vaddl_u8(a, a);
485 assert_eq!(r, e);
486 }
487
488 #[simd_test = "neon"]
489 unsafe fn vaddl_u16() {
490 let v = ::std::u16::MAX;
491 let a = u16x4::new(v, v, v, v);
492 let v = 2 * (v as u32);
493 let e = u32x4::new(v, v, v, v);
494 let r = neon::vaddl_u16(a, a);
495 assert_eq!(r, e);
496 }
497
498 #[simd_test = "neon"]
499 unsafe fn vaddl_u32() {
500 let v = ::std::u32::MAX;
501 let a = u32x2::new(v, v);
502 let v = 2 * (v as u64);
503 let e = u64x2::new(v, v);
504 let r = neon::vaddl_u32(a, a);
505 assert_eq!(r, e);
506 }
507
508 #[simd_test = "neon"]
509 unsafe fn vrsqrt_f32() {
510 let a = f32x2::new(1.0, 2.0);
511 let e = f32x2::new(0.9980469, 0.7050781);
512 let r = neon::vrsqrte_f32(a);
513 assert_eq!(r, e);
514 }
515
516 #[simd_test = "neon"]
517 unsafe fn vmovn_s16() {
518 let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
519 let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
520 let r = neon::vmovn_s16(a);
521 assert_eq!(r, e);
522 }
523
524 #[simd_test = "neon"]
525 unsafe fn vmovn_s32() {
526 let a = i32x4::new(1, 2, 3, 4);
527 let e = i16x4::new(1, 2, 3, 4);
528 let r = neon::vmovn_s32(a);
529 assert_eq!(r, e);
530 }
531
532 #[simd_test = "neon"]
533 unsafe fn vmovn_s64() {
534 let a = i64x2::new(1, 2);
535 let e = i32x2::new(1, 2);
536 let r = neon::vmovn_s64(a);
537 assert_eq!(r, e);
538 }
539
540 #[simd_test = "neon"]
541 unsafe fn vmovn_u16() {
542 let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
543 let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
544 let r = neon::vmovn_u16(a);
545 assert_eq!(r, e);
546 }
547
548 #[simd_test = "neon"]
549 unsafe fn vmovn_u32() {
550 let a = u32x4::new(1, 2, 3, 4);
551 let e = u16x4::new(1, 2, 3, 4);
552 let r = neon::vmovn_u32(a);
553 assert_eq!(r, e);
554 }
555
556 #[simd_test = "neon"]
557 unsafe fn vmovn_u64() {
558 let a = u64x2::new(1, 2);
559 let e = u32x2::new(1, 2);
560 let r = neon::vmovn_u64(a);
561 assert_eq!(r, e);
562 }
563
564 #[simd_test = "neon"]
565 unsafe fn vmovl_s8() {
566 let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
567 let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
568 let r = neon::vmovl_s8(a);
569 assert_eq!(r, e);
570 }
571
572 #[simd_test = "neon"]
573 unsafe fn vmovl_s16() {
574 let e = i32x4::new(1, 2, 3, 4);
575 let a = i16x4::new(1, 2, 3, 4);
576 let r = neon::vmovl_s16(a);
577 assert_eq!(r, e);
578 }
579
580 #[simd_test = "neon"]
581 unsafe fn vmovl_s32() {
582 let e = i64x2::new(1, 2);
583 let a = i32x2::new(1, 2);
584 let r = neon::vmovl_s32(a);
585 assert_eq!(r, e);
586 }
587
588 #[simd_test = "neon"]
589 unsafe fn vmovl_u8() {
590 let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
591 let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
592 let r = neon::vmovl_u8(a);
593 assert_eq!(r, e);
594 }
595
596 #[simd_test = "neon"]
597 unsafe fn vmovl_u16() {
598 let e = u32x4::new(1, 2, 3, 4);
599 let a = u16x4::new(1, 2, 3, 4);
600 let r = neon::vmovl_u16(a);
601 assert_eq!(r, e);
602 }
603
604 #[simd_test = "neon"]
605 unsafe fn vmovl_u32() {
606 let e = u64x2::new(1, 2);
607 let a = u32x2::new(1, 2);
608 let r = neon::vmovl_u32(a);
609 assert_eq!(r, e);
610 }
611 }