1 // ARM Neon intrinsic specification.
3 // This file contains the specification
for a number of
4 // intrinsics that allows us to generate them along with
7 // To the syntax of the file
- it's not very intelligently parsed
!
10 // start with AT LEAST two
, or four or more slashes so
// is a
11 // comment
/////// is too.
14 // Sections start with EXACTLY three slashes followed
15 // by AT LEAST one space. Sections are used
for two things
:
17 // 1) they serve as the doc comment
for the given intrinsics.
18 // 2) they reset all variables
(name
, fn
, etc.
)
22 // name
- The prefix of the
function, suffixes are auto
23 // generated by the
type they get passed.
25 // fn
- The
function to call
in rust
-land.
27 // aarch64
- The intrinsic to check on aarch64 architecture.
28 // If this is given but no arm intrinsic is provided
,
29 // the
function will exclusively be generated
for
31 // This is used to generate both aarch64 specific and
32 // shared intrinsics by first only specifying th aarch64
33 // variant
then the arm variant.
35 // arm
- The arm v7 intrinsics used to checked
for arm code
36 // generation. All neon functions available
in arm are
37 // also available
in aarch64. If no aarch64 intrinsic was
38 // set they are assumed to be the same.
39 // Intrinsics ending with a `.` will have a size suffixes
40 // added
(such as `i8` or `i64`
) that is not sign specific
41 // Intrinsics ending with a `.s` will have a size suffixes
42 // added
(such as `s8` or `u64`
) that is sign specific
44 // a
- First input
for tests
, it gets scaled to the size of
47 // b
- Second input
for tests
, it gets scaled to the size of
52 // TRUE
- 'true' all bits are
set to
1
53 // FALSE
- 'false' all bits are
set to
0
54 // FF
- same as 'true'
55 // MIN
- minimal value
(either
0 or the lowest negative number
)
56 // MAX
- maximal value proper to overflow
58 // # validate <values>
59 // Validates a and b against the expected result of the
test.
60 // The special values 'TRUE' and 'FALSE' can be used to
61 // represent the correct NEON representation of true or
62 // false values. It too gets scaled to the
type.
64 // Validate needs to be called before generate as it sets
65 // up the rules
for validation that get generated
for each
68 // The generate
command generates the intrinsics
, it uses the
69 // Variables
set and can be called multiple
times while overwriting
70 // some of the variables.
72 /// Vector bitwise and
77 a
= 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
78 b
= 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F
79 validate
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
80 b
= 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
81 validate
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
82 generate int
*_t
, uint
*_t
, int64x
*_t
, uint64x
*_t
84 /// Vector bitwise or
(immediate
, inclusive
)
89 a
= 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
90 b
= 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
91 validate
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
92 generate int
*_t
, uint
*_t
, int64x
*_t
, uint64x
*_t
95 /// Vector bitwise exclusive or
(vector
)
100 a
= 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
101 b
= 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
102 validate
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
103 generate int
*_t
, uint
*_t
, int64x
*_t
, uint64x
*_t
105 /// Three
-way exclusive OR
107 a
= 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
108 b
= 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
109 c
= 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
110 validate
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
114 link
-aarch64
= llvm.aarch64.crypto.eor3s._EXT_
115 generate int8x16_t
, int16x8_t
, int32x4_t
, int64x2_t
116 link
-aarch64
= llvm.aarch64.crypto.eor3u._EXT_
117 generate uint8x16_t
, uint16x8_t
, uint32x4_t
, uint64x2_t
120 // Absolute difference between the arguments
123 /// Absolute difference between the arguments
125 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
126 b
= 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
127 validate
15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15
131 link
-arm
= vabds._EXT_
132 link
-aarch64
= sabd._EXT_
137 link
-arm
= vabdu._EXT_
138 link
-aarch64
= uabd._EXT_
141 /// Absolute difference between the arguments of Floating
143 a
= 1.0, 2.0, 5.0, -4.0
144 b
= 9.0, 3.0, 2.0, 8.0
145 validate
8.0, 1.0, 3.0, 12.0
148 link
-aarch64
= fabd._EXT_
153 link
-arm
= vabds._EXT_
154 link
-aarch64
= fabd._EXT_
157 /// Floating
-point absolute difference
159 multi_fn
= simd_extract
, {vabd
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
168 // Absolute difference Long
171 /// Unsigned Absolute difference Long
173 multi_fn
= simd_cast
, {vabd
-unsigned
-noext
, a
, b
}
174 a
= 1, 2, 3, 4, 4, 3, 2, 1
175 b
= 10, 10, 10, 10, 10, 10, 10, 10
176 validate
9, 8, 7, 6, 6, 7, 8, 9
180 generate uint8x8_t
:uint8x8_t
:uint16x8_t
, uint16x4_t
:uint16x4_t
:uint32x4_t
, uint32x2_t
:uint32x2_t
:uint64x2_t
182 /// Signed Absolute difference Long
184 multi_fn
= simd_cast
, c
:uint8x8_t
, {vabd
-signed
-noext
, a
, b
}
185 multi_fn
= simd_cast
, c
186 a
= 1, 2, 3, 4, 4, 3, 2, 1
187 b
= 10, 10, 10, 10, 10, 10, 10, 10
188 validate
9, 8, 7, 6, 6, 7, 8, 9
192 generate int8x8_t
:int8x8_t
:int16x8_t
194 /// Signed Absolute difference Long
196 multi_fn
= simd_cast
, c
:uint16x4_t
, {vabd
-signed
-noext
, a
, b
}
197 multi_fn
= simd_cast
, c
204 generate int16x4_t
:int16x4_t
:int32x4_t
206 /// Signed Absolute difference Long
208 multi_fn
= simd_cast
, c
:uint32x2_t
, {vabd
-signed
-noext
, a
, b
}
209 multi_fn
= simd_cast
, c
216 generate int32x2_t
:int32x2_t
:int64x2_t
218 /// Unsigned Absolute difference Long
221 multi_fn
= simd_shuffle
!, c
:uint8x8_t
, a
, a
, [8, 9, 10, 11, 12, 13, 14, 15]
222 multi_fn
= simd_shuffle
!, d
:uint8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
223 multi_fn
= simd_cast
, {vabd_u8
, c
, d
}
224 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
225 b
= 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
226 validate
1, 0, 1, 2, 3, 4, 5, 6
229 generate uint8x16_t
:uint8x16_t
:uint16x8_t
231 /// Unsigned Absolute difference Long
234 multi_fn
= simd_shuffle
!, c
:uint16x4_t
, a
, a
, [4, 5, 6, 7]
235 multi_fn
= simd_shuffle
!, d
:uint16x4_t
, b
, b
, [4, 5, 6, 7]
236 multi_fn
= simd_cast
, {vabd_u16
, c
, d
}
237 a
= 1, 2, 3, 4, 8, 9, 11, 12
238 b
= 10, 10, 10, 10, 10, 10, 10, 10
242 generate uint16x8_t
:uint16x8_t
:uint32x4_t
244 /// Unsigned Absolute difference Long
247 multi_fn
= simd_shuffle
!, c
:uint32x2_t
, a
, a
, [2, 3]
248 multi_fn
= simd_shuffle
!, d
:uint32x2_t
, b
, b
, [2, 3]
249 multi_fn
= simd_cast
, {vabd_u32
, c
, d
}
255 generate uint32x4_t
:uint32x4_t
:uint64x2_t
257 /// Signed Absolute difference Long
260 multi_fn
= simd_shuffle
!, c
:int8x8_t
, a
, a
, [8, 9, 10, 11, 12, 13, 14, 15]
261 multi_fn
= simd_shuffle
!, d
:int8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
262 multi_fn
= simd_cast
, e
:uint8x8_t
, {vabd_s8
, c
, d
}
263 multi_fn
= simd_cast
, e
264 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
265 b
= 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
266 validate
1, 0, 1, 2, 3, 4, 5, 6
269 generate int8x16_t
:int8x16_t
:int16x8_t
271 /// Signed Absolute difference Long
274 multi_fn
= simd_shuffle
!, c
:int16x4_t
, a
, a
, [4, 5, 6, 7]
275 multi_fn
= simd_shuffle
!, d
:int16x4_t
, b
, b
, [4, 5, 6, 7]
276 multi_fn
= simd_cast
, e
:uint16x4_t
, {vabd_s16
, c
, d
}
277 multi_fn
= simd_cast
, e
278 a
= 1, 2, 3, 4, 9, 10, 11, 12
279 b
= 10, 10, 10, 10, 10, 10, 10, 10
283 generate int16x8_t
:int16x8_t
:int32x4_t
285 /// Signed Absolute difference Long
288 multi_fn
= simd_shuffle
!, c
:int32x2_t
, a
, a
, [2, 3]
289 multi_fn
= simd_shuffle
!, d
:int32x2_t
, b
, b
, [2, 3]
290 multi_fn
= simd_cast
, e
:uint32x2_t
, {vabd_s32
, c
, d
}
291 multi_fn
= simd_cast
, e
297 generate int32x4_t
:int32x4_t
:int64x2_t
303 /// Compare bitwise Equal
(vector
)
306 a
= MIN
, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
307 b
= MIN
, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
308 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
309 a
= MIN
, MIN
, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX
310 b
= MIN
, MAX
, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN
311 validate TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
314 generate uint64x
*_t
, int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
, poly64x1_t
:uint64x1_t
, poly64x2_t
:uint64x2_t
317 generate uint
*_t
, int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
, poly8x8_t
:uint8x8_t
, poly8x16_t
:uint8x16_t
319 /// Floating
-point compare equal
322 a
= 1.2, 3.4, 5.6, 7.8
323 b
= 1.2, 3.4, 5.6, 7.8
324 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
327 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
330 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
332 /// Compare bitwise equal
334 multi_fn
= transmute
, {vceq
-in_ntt
-noext
, {transmute
, a
}, {transmute
, b
}}
340 generate i64
:u64
, u64
342 /// Floating
-point compare equal
344 multi_fn
= simd_extract
, {vceq
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
350 generate f32
:u32
, f64
:u64
352 /// Signed compare bitwise equal to zero
355 a
= MIN
, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
356 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
357 validate FALSE
, TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
360 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
, int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
, poly8x8_t
:uint8x8_t
, poly8x16_t
:uint8x16_t
, poly64x1_t
:uint64x1_t
, poly64x2_t
:uint64x2_t
362 /// Unsigned compare bitwise equal to zero
365 a
= MIN
, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
366 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
367 validate TRUE
, TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
370 generate uint
*_t
, uint64x
*_t
372 /// Floating
-point compare bitwise equal to zero
375 a
= 0.0, 1.2, 3.4, 5.6
376 fixed
= 0.0, 0.0, 0.0, 0.0
377 validate TRUE
, FALSE
, FALSE
, FALSE
380 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
382 /// Compare bitwise equal to zero
384 multi_fn
= transmute
, {vceqz
-in_ntt
-noext
, {transmute
, a
}}
389 generate i64
:u64
, u64
391 /// Floating
-point compare bitwise equal to zero
393 multi_fn
= simd_extract
, {vceqz
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}}, 0
398 generate f32
:u32
, f64
:u64
400 /// Signed compare bitwise Test bits nonzero
402 multi_fn
= simd_and
, c
:in_t
, a
, b
403 multi_fn
= fixed
, d
:in_t
404 multi_fn
= simd_ne
, c
, transmute
(d
)
405 a
= MIN
, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
406 b
= MIN
, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
407 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
408 validate TRUE
, FALSE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
411 generate int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
, poly64x1_t
:uint64x1_t
, poly64x2_t
:uint64x2_t
414 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
, poly8x8_t
:uint8x8_t
, poly8x16_t
:uint8x16_t
, poly16x4_t
:uint16x4_t
, poly16x8_t
:uint16x8_t
416 /// Unsigned compare bitwise Test bits nonzero
418 multi_fn
= simd_and
, c
:in_t
, a
, b
419 multi_fn
= fixed
, d
:in_t
420 multi_fn
= simd_ne
, c
, transmute
(d
)
421 a
= MIN
, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
422 b
= MIN
, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
423 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
424 validate FALSE
, FALSE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
432 /// Compare bitwise
test bits nonzero
434 multi_fn
= transmute
, {vtst
-in_ntt
-noext
, {transmute
, a
}, {transmute
, b
}}
440 generate i64
:i64
:u64
, u64
442 /// Signed saturating accumulate of unsigned value
445 a
= 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
446 b
= 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
447 validate
2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8
450 link
-aarch64
= suqadd._EXT_
451 generate i32
:u32
:i32
, i64
:u64
:i64
453 /// Signed saturating accumulate of unsigned value
456 multi_fn
= simd_extract
, {vuqadd
-out_ntt
-noext
, {vdup_n
-out_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
462 generate i8
:u8
:i8
, i16
:u16
:i16
465 // Floating
-point absolute value
468 /// Floating
-point absolute value
471 a
= -0.1, -2.2, -3.3, -6.6
472 validate
0.1, 2.2, 3.3, 6.6
474 generate float64x1_t
:float64x1_t
, float64x2_t
:float64x2_t
477 generate float32x2_t
:float32x2_t
, float32x4_t
:float32x4_t
483 /// Compare signed greater than
486 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
487 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
488 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
490 generate int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
493 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
495 /// Compare unsigned greater than
498 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
499 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
500 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
508 /// Floating
-point compare greater than
511 a
= 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
512 b
= 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
513 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
516 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
519 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
521 /// Compare greater than
523 multi_fn
= transmute
, {vcgt
-in_ntt
-noext
, {transmute
, a
}, {transmute
, b
}}
529 generate i64
:u64
, u64
531 /// Floating
-point compare greater than
533 multi_fn
= simd_extract
, {vcgt
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
539 generate f32
:u32
, f64
:u64
545 /// Compare signed less than
548 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
549 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
550 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
552 generate int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
555 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
557 /// Compare unsigned less than
560 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
561 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
562 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
570 /// Floating
-point compare less than
573 a
= 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
574 b
= 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
575 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
578 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
581 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
583 /// Compare less than
585 multi_fn
= transmute
, {vclt
-in_ntt
-noext
, {transmute
, a
}, {transmute
, b
}}
591 generate i64
:u64
, u64
593 /// Floating
-point compare less than
595 multi_fn
= simd_extract
, {vclt
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
601 generate f32
:u32
, f64
:u64
604 // lesser
then equals
607 /// Compare signed less than or equal
610 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
611 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
612 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
615 generate int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
618 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
620 /// Compare greater than or equal
622 multi_fn
= transmute
, {vcge
-in_ntt
-noext
, {transmute
, a
}, {transmute
, b
}}
628 generate i64
:u64
, u64
630 /// Floating
-point compare greater than or equal
632 multi_fn
= simd_extract
, {vcge
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
638 generate f32
:u32
, f64
:u64
640 /// Compare unsigned less than or equal
643 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
644 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
645 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
653 /// Floating
-point compare less than or equal
656 a
= 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
657 b
= 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
658 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
660 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
663 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
665 /// Compare less than or equal
667 multi_fn
= transmute
, {vcle
-in_ntt
-noext
, {transmute
, a
}, {transmute
, b
}}
673 generate i64
:u64
, u64
675 /// Floating
-point compare less than or equal
677 multi_fn
= simd_extract
, {vcle
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
683 generate f32
:u32
, f64
:u64
686 // greater
then equals
689 /// Compare signed greater than or equal
692 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
693 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
694 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
697 generate int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
700 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
702 /// Compare unsigned greater than or equal
705 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
706 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
707 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
715 /// Floating
-point compare greater than or equal
718 a
= 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
719 b
= 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
720 validate TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
723 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
726 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
728 /// Compare signed greater than or equal to zero
731 a
= MIN
, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
732 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
733 validate FALSE
, FALSE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
736 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
, int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
738 /// Floating
-point compare greater than or equal to zero
741 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
742 fixed
= 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
743 validate FALSE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
746 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
748 /// Compare signed greater than or equal to zero
750 multi_fn
= transmute
, {vcgez
-in_ntt
-noext
, {transmute
, a
}}
757 /// Floating
-point compare greater than or equal to zero
759 multi_fn
= simd_extract
, {vcgez
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}}, 0
764 generate f32
:u32
, f64
:u64
766 /// Compare signed greater than zero
769 a
= MIN
, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
770 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
771 validate FALSE
, FALSE
, FALSE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
774 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
, int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
776 /// Floating
-point compare greater than zero
779 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
780 fixed
= 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
781 validate FALSE
, FALSE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
784 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
786 /// Compare signed greater than zero
788 multi_fn
= transmute
, {vcgtz
-in_ntt
-noext
, {transmute
, a
}}
795 /// Floating
-point compare greater than zero
797 multi_fn
= simd_extract
, {vcgtz
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}}, 0
802 generate f32
:u32
, f64
:u64
804 /// Compare signed less than or equal to zero
807 a
= MIN
, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
808 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
809 validate TRUE
, TRUE
, TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
812 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
, int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
814 /// Floating
-point compare less than or equal to zero
817 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
818 fixed
= 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
819 validate TRUE
, TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
822 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
824 /// Compare less than or equal to zero
826 multi_fn
= transmute
, {vclez
-in_ntt
-noext
, {transmute
, a
}}
833 /// Floating
-point compare less than or equal to zero
835 multi_fn
= simd_extract
, {vclez
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}}, 0
840 generate f32
:u32
, f64
:u64
842 /// Compare signed less than zero
845 a
= MIN
, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
846 fixed
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
847 validate TRUE
, TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
850 generate int8x8_t
:uint8x8_t
, int8x16_t
:uint8x16_t
, int16x4_t
:uint16x4_t
, int16x8_t
:uint16x8_t
, int32x2_t
:uint32x2_t
, int32x4_t
:uint32x4_t
, int64x1_t
:uint64x1_t
, int64x2_t
:uint64x2_t
852 /// Floating
-point compare less than zero
855 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
856 fixed
= 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
857 validate TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
860 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
862 /// Compare less than zero
864 multi_fn
= transmute
, {vcltz
-in_ntt
-noext
, {transmute
, a
}}
871 /// Floating
-point compare less than zero
873 multi_fn
= simd_extract
, {vcltz
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}}, 0
878 generate f32
:u32
, f64
:u64
880 /// Count leading sign bits
882 a
= MIN
, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
883 validate
0, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, 0
887 link
-arm
= vcls._EXT_
888 link
-aarch64
= cls._EXT_
891 /// Count leading sign bits
893 multi_fn
= transmute
, {vcls
-signed
-noext
, {transmute
, a
}}
894 a
= MIN
, MAX
, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
895 validate BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
899 generate uint8x8_t
:int8x8_t
, uint8x16_t
:int8x16_t
, uint16x4_t
:int16x4_t
, uint16x8_t
:int16x8_t
, uint32x2_t
:int32x2_t
, uint32x4_t
:int32x4_t
901 /// Count leading zero bits
903 multi_fn
= self
-signed
-ext
, a
904 a
= MIN
, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
905 validate
0, 0, BITS
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, 1
911 /// Count leading zero bits
913 multi_fn
= transmute
, {self
-signed
-ext
, transmute
(a
)}
914 a
= MIN
, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
915 validate BITS
, BITS
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, BITS_M1
, 0
921 /// Floating
-point absolute compare greater than
923 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
924 b
= -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
925 validate
!0, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
928 link
-aarch64
= facgt._EXT2_._EXT_
929 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
932 link
-arm
= vacgt._EXT2_._EXT_
933 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
935 /// Floating
-point absolute compare greater than or equal
937 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
938 b
= -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
939 validate
!0, TRUE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
942 link
-aarch64
= facge._EXT2_._EXT_
943 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
946 link
-arm
= vacge._EXT2_._EXT_
947 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
949 /// Floating
-point absolute compare less than
951 multi_fn
= vcagt
-self
-noext
, b
, a
952 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
953 b
= -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
954 validate
0, FALSE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
957 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
960 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
962 /// Floating
-point absolute compare less than or equal
964 multi_fn
= vcage
-self
-noext
, b
, a
965 a
= -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
966 b
= -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
967 validate
0, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
, FALSE
, TRUE
970 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
973 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
975 /// Insert vector element from another vector element
979 multi_fn
= static_assert_imm
-in0_exp_len
-LANE1
980 multi_fn
= static_assert_imm
-in_exp_len
-LANE2
981 multi_fn
= matchn
-in0_exp_len
-LANE1
, simd_shuffle
!, a
, b
, {ins
-in0_len
-in0_len
-LANE2
}
982 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
983 b
= 0, MAX
, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
985 validate MAX
, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
988 generate int8x8_t
, int8x16_t
, int16x4_t
, int16x8_t
, int32x2_t
, int32x4_t
, int64x2_t
989 generate uint8x8_t
, uint8x16_t
, uint16x4_t
, uint16x8_t
, uint32x2_t
, uint32x4_t
, uint64x2_t
990 generate poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
, poly64x2_t
992 /// Insert vector element from another vector element
996 multi_fn
= static_assert_imm
-in0_exp_len
-LANE1
997 multi_fn
= static_assert_imm
-in_exp_len
-LANE2
998 multi_fn
= matchn
-in0_exp_len
-LANE1
, simd_shuffle
!, a
, b
, {ins
-in0_len
-in0_len
-LANE2
}
1002 validate
0.5, 2.
, 3.
, 4.
1005 generate float32x2_t
, float32x4_t
, float64x2_t
1007 /// Insert vector element from another vector element
1010 constn
= LANE1
:LANE2
1011 multi_fn
= static_assert_imm
-in0_exp_len
-LANE1
1012 multi_fn
= static_assert_imm
-in_exp_len
-LANE2
1013 multi_fn
= simd_shuffle
!, a
:in_t
, a
, a
, {asc
-0-in_len
}
1014 multi_fn
= matchn
-in0_exp_len
-LANE1
, simd_shuffle
!, a
, b
, {ins
-in0_len
-in_len
-LANE2
}
1015 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1016 b
= 0, MAX
, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1018 validate MAX
, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1021 generate int8x8_t
:int8x16_t
:int8x8_t
, int16x4_t
:int16x8_t
:int16x4_t
, int32x2_t
:int32x4_t
:int32x2_t
1022 generate uint8x8_t
:uint8x16_t
:uint8x8_t
, uint16x4_t
:uint16x8_t
:uint16x4_t
, uint32x2_t
:uint32x4_t
:uint32x2_t
1023 generate poly8x8_t
:poly8x16_t
:poly8x8_t
, poly16x4_t
:poly16x8_t
:poly16x4_t
1025 /// Insert vector element from another vector element
1028 constn
= LANE1
:LANE2
1029 multi_fn
= static_assert_imm
-in0_exp_len
-LANE1
1030 multi_fn
= static_assert_imm
-in_exp_len
-LANE2
1031 multi_fn
= simd_shuffle
!, a
:in_t
, a
, a
, {asc
-0-in_len
}
1032 multi_fn
= matchn
-in0_exp_len
-LANE1
, simd_shuffle
!, a
, b
, {ins
-in0_len
-in_len
-LANE2
}
1036 validate
0.5, 2.
, 3.
, 4.
1039 generate float32x2_t
:float32x4_t
:float32x2_t
1041 /// Insert vector element from another vector element
1044 constn
= LANE1
:LANE2
1045 multi_fn
= static_assert_imm
-in0_exp_len
-LANE1
1046 multi_fn
= static_assert_imm
-in_exp_len
-LANE2
1047 multi_fn
= simd_shuffle
!, b
:in_t0
, b
, b
, {asc
-0-in0_len
}
1048 multi_fn
= matchn
-in0_exp_len
-LANE1
, simd_shuffle
!, a
, b
, {ins
-in0_len
-in0_len
-LANE2
}
1049 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1050 b
= 0, MAX
, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1052 validate MAX
, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1055 generate int8x16_t
:int8x8_t
:int8x16_t
, int16x8_t
:int16x4_t
:int16x8_t
, int32x4_t
:int32x2_t
:int32x4_t
1056 generate uint8x16_t
:uint8x8_t
:uint8x16_t
, uint16x8_t
:uint16x4_t
:uint16x8_t
, uint32x4_t
:uint32x2_t
:uint32x4_t
1057 generate poly8x16_t
:poly8x8_t
:poly8x16_t
, poly16x8_t
:poly16x4_t
:poly16x8_t
1059 /// Insert vector element from another vector element
1062 constn
= LANE1
:LANE2
1063 multi_fn
= static_assert_imm
-in0_exp_len
-LANE1
1064 multi_fn
= static_assert_imm
-in_exp_len
-LANE2
1065 multi_fn
= simd_shuffle
!, b
:in_t0
, b
, b
, {asc
-0-in0_len
}
1066 multi_fn
= matchn
-in0_exp_len
-LANE1
, simd_shuffle
!, a
, b
, {ins
-in0_len
-in0_len
-LANE2
}
1067 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1068 b
= MAX
, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1073 generate int64x2_t
:int64x1_t
:int64x2_t
, uint64x2_t
:uint64x1_t
:uint64x2_t
, poly64x2_t
:poly64x1_t
:poly64x2_t
1075 /// Insert vector element from another vector element
1078 constn
= LANE1
:LANE2
1079 multi_fn
= static_assert_imm
-in0_exp_len
-LANE1
1080 multi_fn
= static_assert_imm
-in_exp_len
-LANE2
1081 multi_fn
= simd_shuffle
!, b
:in_t0
, b
, b
, {asc
-0-in0_len
}
1082 multi_fn
= matchn
-in0_exp_len
-LANE1
, simd_shuffle
!, a
, b
, {ins
-in0_len
-in0_len
-LANE2
}
1086 validate
1.
, 0.5, 3.
, 4.
1089 generate float32x4_t
:float32x2_t
:float32x4_t
1091 generate float64x2_t
:float64x1_t
:float64x2_t
1093 /// Insert vector element from another vector element
1096 multi_fn
= transmute
, a
1098 validate
1, 0, 0, 0, 0, 0, 0, 0
1102 generate u64
:int8x8_t
, u64
:int16x4_t
, u64
:int32x2_t
, u64
:int64x1_t
1103 generate u64
:uint8x8_t
, u64
:uint16x4_t
, u64
:uint32x2_t
, u64
:uint64x1_t
1104 generate u64
:poly8x8_t
, u64
:poly16x4_t
1106 generate u64
:poly64x1_t
1108 /// Insert vector element from another vector element
1111 multi_fn
= transmute
, a
1116 generate u64
:float64x1_t
1118 generate u64
:float32x2_t
1120 /// Fixed
-point convert to floating
-point
1125 validate
1.
, 2.
, 3.
, 4.
1128 generate int64x1_t
:float64x1_t
, int64x2_t
:float64x2_t
1130 generate uint64x1_t
:float64x1_t
, uint64x2_t
:float64x2_t
1134 generate int32x2_t
:float32x2_t
, int32x4_t
:float32x4_t
1136 generate uint32x2_t
:float32x2_t
, uint32x4_t
:float32x4_t
1138 /// Floating
-point convert to higher precision long
1143 validate
-1.2f32 as f64
, 1.2f32 as f64
1146 generate float32x2_t
:float64x2_t
1148 /// Floating
-point convert to higher precision long
1151 multi_fn
= simd_shuffle
!, b
:float32x2_t
, a
, a
, [2, 3]
1152 multi_fn
= simd_cast
, b
1153 a
= -1.2, 1.2, 2.3, 3.4
1154 validate
2.3f32 as f64
, 3.4f32 as f64
1157 generate float32x4_t
:float64x2_t
1159 /// Floating
-point convert to lower precision narrow
1164 validate
-1.2f64 as f32
, 1.2f64 as f32
1167 generate float64x2_t
:float32x2_t
1169 /// Floating
-point convert to lower precision narrow
1172 multi_fn
= simd_shuffle
!, a
, {simd_cast
, b
}, [0, 1, 2, 3]
1175 validate
-1.2, 1.2, -2.3f64 as f32
, 3.4f64 as f32
1178 generate float32x2_t
:float64x2_t
:float32x4_t
1180 /// Floating
-point convert to lower precision narrow
, rounding to odd
1187 link
-aarch64
= fcvtxn._EXT2_._EXT_
1188 generate float64x2_t
:float32x2_t
1190 /// Floating
-point convert to lower precision narrow
, rounding to odd
1193 multi_fn
= simd_extract
, {vcvtx
-_f32_f64
-noext
, {vdupq_n
-in_ntt
-noext
, a
}}, 0
1200 /// Floating
-point convert to lower precision narrow
, rounding to odd
1203 multi_fn
= simd_shuffle
!, a
, {vcvtx
-noq_doubleself
-noext
, b
}, [0, 1, 2, 3]
1206 validate
-1.0, 2.0, -3.0, 4.0
1209 generate float32x2_t
:float64x2_t
:float32x4_t
1211 /// Fixed
-point convert to floating
-point
1215 multi_fn
= static_assert
-N
-1-bits
1218 validate
0.25, 0.5, 0.75, 1.
1219 arm
-aarch64
-separate
1222 link
-aarch64
= vcvtfxs2fp._EXT2_._EXT_
1224 generate int64x1_t
:float64x1_t
, int64x2_t
:float64x2_t
, i32
:f32
, i64
:f64
1227 link
-aarch64
= vcvtfxu2fp._EXT2_._EXT_
1229 generate uint64x1_t
:float64x1_t
, uint64x2_t
:float64x2_t
, u32
:f32
, u64
:f64
1232 link
-aarch64
= vcvtfxs2fp._EXT2_._EXT_
1234 link
-arm
= vcvtfxs2fp._EXT2_._EXT_
1237 generate int32x2_t
:float32x2_t
, int32x4_t
:float32x4_t
1240 link
-aarch64
= vcvtfxu2fp._EXT2_._EXT_
1242 link
-arm
= vcvtfxu2fp._EXT2_._EXT_
1244 generate uint32x2_t
:float32x2_t
, uint32x4_t
:float32x4_t
1246 /// Floating
-point convert to fixed
-point
, rounding toward zero
1250 multi_fn
= static_assert
-N
-1-bits
1251 a
= 0.25, 0.5, 0.75, 1.
1254 arm
-aarch64
-separate
1257 link
-aarch64
= vcvtfp2fxs._EXT2_._EXT_
1259 generate float64x1_t
:int64x1_t
, float64x2_t
:int64x2_t
, f32
:i32
, f64
:i64
1262 link
-aarch64
= vcvtfp2fxu._EXT2_._EXT_
1264 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
1267 link
-aarch64
= vcvtfp2fxs._EXT2_._EXT_
1269 link
-arm
= vcvtfp2fxs._EXT2_._EXT_
1271 generate float32x2_t
:int32x2_t
, float32x4_t
:int32x4_t
1274 link
-aarch64
= vcvtfp2fxu._EXT2_._EXT_
1276 link
-arm
= vcvtfp2fxu._EXT2_._EXT_
1278 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
1280 /// Fixed
-point convert to floating
-point
1283 multi_fn
= a as out_t
1288 generate i32
:f32
, i64
:f64
1290 generate u32
:f32
, u64
:f64
1292 /// Fixed
-point convert to floating
-point
1295 multi_fn
= a as out_t
1300 generate f32
:i32
, f64
:i64
1302 generate f32
:u32
, f64
:u64
1304 /// Floating
-point convert to signed fixed
-point
, rounding toward zero
1307 link
-aarch64
= llvm.fptosi.sat._EXT2_._EXT_
1308 a
= -1.1, 2.1, -2.9, 3.9
1309 validate
-1, 2, -2, 3
1312 generate float64x1_t
:int64x1_t
, float64x2_t
:int64x2_t
1314 link
-arm
= llvm.fptosi.sat._EXT2_._EXT_
1316 generate float32x2_t
:int32x2_t
, float32x4_t
:int32x4_t
1318 /// Floating
-point convert to unsigned fixed
-point
, rounding toward zero
1321 link
-aarch64
= llvm.fptoui.sat._EXT2_._EXT_
1322 a
= 1.1, 2.1, 2.9, 3.9
1326 generate float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
1328 link
-arm
= llvm.fptoui.sat._EXT2_._EXT_
1330 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
1332 /// Floating
-point convert to signed integer
, rounding to nearest with ties to away
1335 a
= -1.1, 2.1, -2.9, 3.9
1336 validate
-1, 2, -3, 4
1339 link
-aarch64
= fcvtas._EXT2_._EXT_
1340 generate float32x2_t
:int32x2_t
, float32x4_t
:int32x4_t
, float64x1_t
:int64x1_t
, float64x2_t
:int64x2_t
1342 /// Floating
-point convert to integer
, rounding to nearest with ties to away
1349 link
-aarch64
= fcvtas._EXT2_._EXT_
1350 generate f32
:i32
, f64
:i64
1353 link
-aarch64
= fcvtau._EXT2_._EXT_
1354 generate f32
:u32
, f64
:u64
1356 /// Floating
-point convert to signed integer
, rounding to nearest with ties to even
1359 a
= -1.5, 2.1, -2.9, 3.9
1360 validate
-2, 2, -3, 4
1363 link
-aarch64
= fcvtns._EXT2_._EXT_
1364 generate float32x2_t
:int32x2_t
, float32x4_t
:int32x4_t
, float64x1_t
:int64x1_t
, float64x2_t
:int64x2_t
, f32
:i32
, f64
:i64
1366 /// Floating
-point convert to signed integer
, rounding toward minus infinity
1369 a
= -1.1, 2.1, -2.9, 3.9
1370 validate
-2, 2, -3, 3
1373 link
-aarch64
= fcvtms._EXT2_._EXT_
1374 generate float32x2_t
:int32x2_t
, float32x4_t
:int32x4_t
, float64x1_t
:int64x1_t
, float64x2_t
:int64x2_t
, f32
:i32
, f64
:i64
1376 /// Floating
-point convert to signed integer
, rounding toward plus infinity
1379 a
= -1.1, 2.1, -2.9, 3.9
1380 validate
-1, 3, -2, 4
1383 link
-aarch64
= fcvtps._EXT2_._EXT_
1384 generate float32x2_t
:int32x2_t
, float32x4_t
:int32x4_t
, float64x1_t
:int64x1_t
, float64x2_t
:int64x2_t
, f32
:i32
, f64
:i64
1386 /// Floating
-point convert to unsigned integer
, rounding to nearest with ties to away
1389 a
= 1.1, 2.1, 2.9, 3.9
1393 link
-aarch64
= fcvtau._EXT2_._EXT_
1394 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
1396 /// Floating
-point convert to unsigned integer
, rounding to nearest with ties to even
1399 a
= 1.5, 2.1, 2.9, 3.9
1403 link
-aarch64
= fcvtnu._EXT2_._EXT_
1404 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
1406 /// Floating
-point convert to unsigned integer
, rounding toward minus infinity
1409 a
= 1.1, 2.1, 2.9, 3.9
1413 link
-aarch64
= fcvtmu._EXT2_._EXT_
1414 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
1416 /// Floating
-point convert to unsigned integer
, rounding toward plus infinity
1419 a
= 1.1, 2.1, 2.9, 3.9
1423 link
-aarch64
= fcvtpu._EXT2_._EXT_
1424 generate float32x2_t
:uint32x2_t
, float32x4_t
:uint32x4_t
, float64x1_t
:uint64x1_t
, float64x2_t
:uint64x2_t
, f32
:u32
, f64
:u64
1426 /// Set all vector lanes to the same value
1430 multi_fn
= static_assert_imm
-in_exp_len
-N
1431 multi_fn
= simd_shuffle
!, a
, a
, {dup
-out_len
-N as u32
}
1432 a
= 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1434 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1437 generate poly64x2_t
, poly64x1_t
:poly64x2_t
1441 generate int8x16_t
:int8x8_t
, int16x8_t
:int16x4_t
, int32x4_t
:int32x2_t
1442 generate int8x8_t
:int8x16_t
, int16x4_t
:int16x8_t
, int32x2_t
:int32x4_t
1445 generate uint8x16_t
:uint8x8_t
, uint16x8_t
:uint16x4_t
, uint32x4_t
:uint32x2_t
1446 generate uint8x8_t
:uint8x16_t
, uint16x4_t
:uint16x8_t
, uint32x2_t
:uint32x4_t
1448 generate poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
1449 generate poly8x16_t
:poly8x8_t
, poly16x8_t
:poly16x4_t
1450 generate poly8x8_t
:poly8x16_t
, poly16x4_t
:poly16x8_t
1452 /// Set all vector lanes to the same value
1456 multi_fn
= static_assert_imm
-in_exp_len
-N
1457 multi_fn
= simd_shuffle
!, a
, a
, {dup
-out_len
-N as u32
}
1458 a
= 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1460 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1464 generate int64x2_t
, int64x1_t
:int64x2_t
, uint64x2_t
, uint64x1_t
:uint64x2_t
1466 /// Set all vector lanes to the same value
1470 multi_fn
= static_assert_imm
-in_exp_len
-N
1471 multi_fn
= simd_shuffle
!, a
, a
, {dup
-out_len
-N as u32
}
1474 validate
1.
, 1.
, 1.
, 1.
1477 generate float64x2_t
, float64x1_t
:float64x2_t
1480 generate float
*_t
, float32x4_t
:float32x2_t
, float32x2_t
:float32x4_t
1482 /// Set all vector lanes to the same value
1486 multi_fn
= static_assert_imm
-in_exp_len
-N
1496 generate int64x1_t
, uint64x1_t
1498 /// Set all vector lanes to the same value
1502 multi_fn
= static_assert_imm
-in_exp_len
-N
1509 generate float64x1_t
1511 /// Set all vector lanes to the same value
1515 multi_fn
= static_assert_imm
-in_exp_len
-N
1516 multi_fn
= transmute
--<element_t _
>, {simd_extract
, a
, N as u32
}
1522 generate poly64x2_t
:poly64x1_t
1525 generate int64x2_t
:int64x1_t
, uint64x2_t
:uint64x1_t
1527 /// Set all vector lanes to the same value
1531 multi_fn
= static_assert_imm
-in_exp_len
-N
1532 multi_fn
= transmute
--<element_t _
>, {simd_extract
, a
, N as u32
}
1538 generate float64x2_t
:float64x1_t
1540 /// Set all vector lanes to the same value
1544 multi_fn
= static_assert_imm
-in_exp_len
-N
1545 multi_fn
= simd_extract
, a
, N as u32
1546 a
= 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1551 generate int8x8_t
:i8
, int8x16_t
:i8
, int16x4_t
:i16
, int16x8_t
:i16
, int32x2_t
:i32
, int32x4_t
:i32
, int64x1_t
:i64
, int64x2_t
:i64
1552 generate uint8x8_t
:u8
, uint8x16_t
:u8
, uint16x4_t
:u16
, uint16x8_t
:u16
, uint32x2_t
:u32
, uint32x4_t
:u32
, uint64x1_t
:u64
, uint64x2_t
:u64
1553 generate poly8x8_t
:p8
, poly8x16_t
:p8
, poly16x4_t
:p16
, poly16x8_t
:p16
1555 /// Set all vector lanes to the same value
1559 multi_fn
= static_assert_imm
-in_exp_len
-N
1560 multi_fn
= simd_extract
, a
, N as u32
1566 generate float32x2_t
:f32
, float32x4_t
:f32
, float64x1_t
:f64
, float64x2_t
:f64
1568 /// Extract vector from pair of vectors
1571 multi_fn
= static_assert_imm
-out_exp_len
-N
1572 multi_fn
= matchn
-out_exp_len
-N
, simd_shuffle
!, a
, b
, {asc
-n
-out_len
}
1573 a
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1574 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1576 validate
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1580 generate int
*_t
, uint
*_t
, poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
1582 /// Extract vector from pair of vectors
1585 multi_fn
= static_assert_imm
-out_exp_len
-N
1586 multi_fn
= matchn
-out_exp_len
-N
, simd_shuffle
!, a
, b
, {asc
-n
-out_len
}
1587 a
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1588 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1590 validate
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1596 generate int64x2_t
, uint64x2_t
1598 /// Extract vector from pair of vectors
1601 multi_fn
= static_assert_imm
-out_exp_len
-N
1602 multi_fn
= matchn
-out_exp_len
-N
, simd_shuffle
!, a
, b
, {asc
-n
-out_len
}
1606 validate
1.
, 2.
, 2.
, 2.
1609 generate float64x2_t
1614 /// Multiply
-add to accumulator
1616 multi_fn
= simd_add
, a
, {simd_mul
, b
, c
}
1617 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1618 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1619 c
= 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1620 validate
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1624 generate int
*_t
, uint
*_t
1626 /// Floating
-point multiply
-add to accumulator
1628 multi_fn
= simd_add
, a
, {simd_mul
, b
, c
}
1632 validate
6.
, 7.
, 8.
, 9.
1635 generate float64x
*_t
1640 /// Vector multiply accumulate with scalar
1643 multi_fn
= vmla
-self
-noext
, a
, b
, {vdup
-nself
-noext
, c
}
1644 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1645 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1647 validate
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1651 generate int16x4_t
:int16x4_t
:i16
:int16x4_t
, int16x8_t
:int16x8_t
:i16
:int16x8_t
, int32x2_t
:int32x2_t
:i32
:int32x2_t
, int32x4_t
:int32x4_t
:i32
:int32x4_t
1652 generate uint16x4_t
:uint16x4_t
:u16
:uint16x4_t
, uint16x8_t
:uint16x8_t
:u16
:uint16x8_t
, uint32x2_t
:uint32x2_t
:u32
:uint32x2_t
, uint32x4_t
:uint32x4_t
:u32
:uint32x4_t
1654 /// Vector multiply accumulate with scalar
1657 multi_fn
= vmla
-self
-noext
, a
, b
, {vdup
-nself
-noext
, c
}
1661 validate
6.
, 7.
, 8.
, 9.
1665 generate float32x2_t
:float32x2_t
:f32
:float32x2_t
, float32x4_t
:float32x4_t
:f32
:float32x4_t
1667 /// Vector multiply accumulate with scalar
1671 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
1672 multi_fn
= vmla
-self
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
1673 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1674 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1675 c
= 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1677 validate
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1681 generate int16x4_t
, int16x4_t
:int16x4_t
:int16x8_t
:int16x4_t
, int16x8_t
:int16x8_t
:int16x4_t
:int16x8_t
, int16x8_t
1682 generate int32x2_t
, int32x2_t
:int32x2_t
:int32x4_t
:int32x2_t
, int32x4_t
:int32x4_t
:int32x2_t
:int32x4_t
, int32x4_t
1683 generate uint16x4_t
, uint16x4_t
:uint16x4_t
:uint16x8_t
:uint16x4_t
, uint16x8_t
:uint16x8_t
:uint16x4_t
:uint16x8_t
, uint16x8_t
1684 generate uint32x2_t
, uint32x2_t
:uint32x2_t
:uint32x4_t
:uint32x2_t
, uint32x4_t
:uint32x4_t
:uint32x2_t
:uint32x4_t
, uint32x4_t
1686 /// Vector multiply accumulate with scalar
1690 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
1691 multi_fn
= vmla
-self
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
1696 validate
6.
, 7.
, 8.
, 9.
1700 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
, float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
1702 /// Signed multiply
-add long
1704 multi_fn
= simd_add
, a
, {vmull
-self
-noext
, b
, c
}
1705 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1706 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1707 c
= 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1708 validate
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1712 generate int16x8_t
:int8x8_t
:int8x8_t
:int16x8_t
, int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
1714 /// Unsigned multiply
-add long
1716 multi_fn
= simd_add
, a
, {vmull
-self
-noext
, b
, c
}
1717 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1718 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1719 c
= 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1720 validate
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1724 generate uint16x8_t
:uint8x8_t
:uint8x8_t
:uint16x8_t
, uint32x4_t
:uint16x4_t
:uint16x4_t
:uint32x4_t
, uint64x2_t
:uint32x2_t
:uint32x2_t
:uint64x2_t
1726 /// Vector widening multiply accumulate with scalar
1729 multi_fn
= vmlal
-self
-noext
, a
, b
, {vdup
-nself
-noext
, c
}
1730 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1731 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1733 validate
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1737 generate int32x4_t
:int16x4_t
:i16
:int32x4_t
, int64x2_t
:int32x2_t
:i32
:int64x2_t
1739 generate uint32x4_t
:uint16x4_t
:u16
:uint32x4_t
, uint64x2_t
:uint32x2_t
:u32
:uint64x2_t
1741 /// Vector widening multiply accumulate with scalar
1745 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
1746 multi_fn
= vmlal
-self
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
1747 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1748 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1749 c
= 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1751 validate
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1755 generate int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int32x4_t
:int16x4_t
:int16x8_t
:int32x4_t
1756 generate int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
, int64x2_t
:int32x2_t
:int32x4_t
:int64x2_t
1758 generate uint32x4_t
:uint16x4_t
:uint16x4_t
:uint32x4_t
, uint32x4_t
:uint16x4_t
:uint16x8_t
:uint32x4_t
1759 generate uint64x2_t
:uint32x2_t
:uint32x2_t
:uint64x2_t
, uint64x2_t
:uint32x2_t
:uint32x4_t
:uint64x2_t
1761 /// Signed multiply
-add long
1764 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {fixed
-half
-right
}
1765 multi_fn
= simd_shuffle
!, c
:half
, c
, c
, {fixed
-half
-right
}
1766 multi_fn
= vmlal
-noqself
-noext
, a
, b
, c
1767 a
= 8, 7, 6, 5, 4, 3, 2, 1
1768 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1769 c
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1770 fixed
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1771 validate
8, 9, 10, 11, 12, 13, 14, 15
1774 generate int16x8_t
:int8x16_t
:int8x16_t
:int16x8_t
, int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
1776 /// Unsigned multiply
-add long
1779 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {fixed
-half
-right
}
1780 multi_fn
= simd_shuffle
!, c
:half
, c
, c
, {fixed
-half
-right
}
1781 multi_fn
= vmlal
-noqself
-noext
, a
, b
, c
1782 a
= 8, 7, 6, 5, 4, 3, 2, 1
1783 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1784 c
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1785 fixed
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1786 validate
8, 9, 10, 11, 12, 13, 14, 15
1789 generate uint16x8_t
:uint8x16_t
:uint8x16_t
:uint16x8_t
, uint32x4_t
:uint16x8_t
:uint16x8_t
:uint32x4_t
, uint64x2_t
:uint32x4_t
:uint32x4_t
:uint64x2_t
1791 /// Multiply
-add long
1794 multi_fn
= vmlal_high
-noqself
-noext
, a
, b
, {vdupq_n
-noqself
-noext
, c
}
1795 a
= 8, 7, 6, 5, 4, 3, 2, 1
1796 b
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1798 validate
8, 9, 10, 11, 12, 13, 14, 15
1801 generate int32x4_t
:int16x8_t
:i16
:int32x4_t
, int64x2_t
:int32x4_t
:i32
:int64x2_t
1803 generate uint32x4_t
:uint16x8_t
:u16
:uint32x4_t
, uint64x2_t
:uint32x4_t
:u32
:uint64x2_t
1805 /// Multiply
-add long
1806 name
= vmlal_high_lane
1809 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
1810 multi_fn
= vmlal_high
-noqself
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
1811 a
= 8, 7, 6, 5, 4, 3, 2, 1
1812 b
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1813 c
= 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1815 validate
8, 9, 10, 11, 12, 13, 14, 15
1818 generate int32x4_t
:int16x8_t
:int16x4_t
:int32x4_t
, int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
1819 generate int64x2_t
:int32x4_t
:int32x2_t
:int64x2_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
1821 generate uint32x4_t
:uint16x8_t
:uint16x4_t
:uint32x4_t
, uint32x4_t
:uint16x8_t
:uint16x8_t
:uint32x4_t
1822 generate uint64x2_t
:uint32x4_t
:uint32x2_t
:uint64x2_t
, uint64x2_t
:uint32x4_t
:uint32x4_t
:uint64x2_t
1824 /// Multiply
-subtract from accumulator
1826 multi_fn
= simd_sub
, a
, {simd_mul
, b
, c
}
1827 a
= 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1828 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1829 c
= 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1830 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1834 generate int
*_t
, uint
*_t
1836 /// Floating
-point multiply
-subtract from accumulator
1838 multi_fn
= simd_sub
, a
, {simd_mul
, b
, c
}
1842 validate
0.
, 1.
, 2.
, 3.
1845 generate float64x
*_t
1850 /// Vector multiply subtract with scalar
1853 multi_fn
= vmls
-self
-noext
, a
, b
, {vdup
-nself
-noext
, c
}
1854 a
= 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1855 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1857 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1861 generate int16x4_t
:int16x4_t
:i16
:int16x4_t
, int16x8_t
:int16x8_t
:i16
:int16x8_t
, int32x2_t
:int32x2_t
:i32
:int32x2_t
, int32x4_t
:int32x4_t
:i32
:int32x4_t
1862 generate uint16x4_t
:uint16x4_t
:u16
:uint16x4_t
, uint16x8_t
:uint16x8_t
:u16
:uint16x8_t
, uint32x2_t
:uint32x2_t
:u32
:uint32x2_t
, uint32x4_t
:uint32x4_t
:u32
:uint32x4_t
1864 /// Vector multiply subtract with scalar
1867 multi_fn
= vmls
-self
-noext
, a
, b
, {vdup
-nself
-noext
, c
}
1871 validate
0.
, 1.
, 2.
, 3.
1875 generate float32x2_t
:float32x2_t
:f32
:float32x2_t
, float32x4_t
:float32x4_t
:f32
:float32x4_t
1877 /// Vector multiply subtract with scalar
1881 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
1882 multi_fn
= vmls
-self
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
1883 a
= 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1884 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1885 c
= 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1887 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1891 generate int16x4_t
, int16x4_t
:int16x4_t
:int16x8_t
:int16x4_t
, int16x8_t
:int16x8_t
:int16x4_t
:int16x8_t
, int16x8_t
1892 generate int32x2_t
, int32x2_t
:int32x2_t
:int32x4_t
:int32x2_t
, int32x4_t
:int32x4_t
:int32x2_t
:int32x4_t
, int32x4_t
1893 generate uint16x4_t
, uint16x4_t
:uint16x4_t
:uint16x8_t
:uint16x4_t
, uint16x8_t
:uint16x8_t
:uint16x4_t
:uint16x8_t
, uint16x8_t
1894 generate uint32x2_t
, uint32x2_t
:uint32x2_t
:uint32x4_t
:uint32x2_t
, uint32x4_t
:uint32x4_t
:uint32x2_t
:uint32x4_t
, uint32x4_t
1896 /// Vector multiply subtract with scalar
1900 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
1901 multi_fn
= vmls
-self
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
1906 validate
0.
, 1.
, 2.
, 3.
1910 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
, float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
1912 /// Signed multiply
-subtract long
1914 multi_fn
= simd_sub
, a
, {vmull
-self
-noext
, b
, c
}
1915 a
= 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1916 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1917 c
= 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1918 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1922 generate int16x8_t
:int8x8_t
:int8x8_t
:int16x8_t
, int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
1924 /// Unsigned multiply
-subtract long
1926 multi_fn
= simd_sub
, a
, {vmull
-self
-noext
, b
, c
}
1927 a
= 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1928 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1929 c
= 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1930 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1934 generate uint16x8_t
:uint8x8_t
:uint8x8_t
:uint16x8_t
, uint32x4_t
:uint16x4_t
:uint16x4_t
:uint32x4_t
, uint64x2_t
:uint32x2_t
:uint32x2_t
:uint64x2_t
1936 /// Vector widening multiply subtract with scalar
1939 multi_fn
= vmlsl
-self
-noext
, a
, b
, {vdup
-nself
-noext
, c
}
1940 a
= 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1941 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1943 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1947 generate int32x4_t
:int16x4_t
:i16
:int32x4_t
, int64x2_t
:int32x2_t
:i32
:int64x2_t
1949 generate uint32x4_t
:uint16x4_t
:u16
:uint32x4_t
, uint64x2_t
:uint32x2_t
:u32
:uint64x2_t
1951 /// Vector widening multiply subtract with scalar
1955 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
1956 multi_fn
= vmlsl
-self
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
1957 a
= 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1958 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1959 c
= 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1961 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1965 generate int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int32x4_t
:int16x4_t
:int16x8_t
:int32x4_t
1966 generate int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
, int64x2_t
:int32x2_t
:int32x4_t
:int64x2_t
1968 generate uint32x4_t
:uint16x4_t
:uint16x4_t
:uint32x4_t
, uint32x4_t
:uint16x4_t
:uint16x8_t
:uint32x4_t
1969 generate uint64x2_t
:uint32x2_t
:uint32x2_t
:uint64x2_t
, uint64x2_t
:uint32x2_t
:uint32x4_t
:uint64x2_t
1971 /// Signed multiply
-subtract long
1974 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {fixed
-half
-right
}
1975 multi_fn
= simd_shuffle
!, c
:half
, c
, c
, {fixed
-half
-right
}
1976 multi_fn
= vmlsl
-noqself
-noext
, a
, b
, c
1977 a
= 14, 15, 16, 17, 18, 19, 20, 21
1978 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1979 c
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1980 fixed
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1981 validate
14, 13, 12, 11, 10, 9, 8, 7
1984 generate int16x8_t
:int8x16_t
:int8x16_t
:int16x8_t
, int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
1986 /// Unsigned multiply
-subtract long
1989 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {fixed
-half
-right
}
1990 multi_fn
= simd_shuffle
!, c
:half
, c
, c
, {fixed
-half
-right
}
1991 multi_fn
= vmlsl
-noqself
-noext
, a
, b
, c
1992 a
= 14, 15, 16, 17, 18, 19, 20, 21
1993 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1994 c
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1995 fixed
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1996 validate
14, 13, 12, 11, 10, 9, 8, 7
1999 generate uint16x8_t
:uint8x16_t
:uint8x16_t
:uint16x8_t
, uint32x4_t
:uint16x8_t
:uint16x8_t
:uint32x4_t
, uint64x2_t
:uint32x4_t
:uint32x4_t
:uint64x2_t
2001 /// Multiply
-subtract long
2004 multi_fn
= vmlsl_high
-noqself
-noext
, a
, b
, {vdupq_n
-noqself
-noext
, c
}
2005 a
= 14, 15, 16, 17, 18, 19, 20, 21
2006 b
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2008 validate
14, 13, 12, 11, 10, 9, 8, 7
2011 generate int32x4_t
:int16x8_t
:i16
:int32x4_t
, int64x2_t
:int32x4_t
:i32
:int64x2_t
2013 generate uint32x4_t
:uint16x8_t
:u16
:uint32x4_t
, uint64x2_t
:uint32x4_t
:u32
:uint64x2_t
2015 /// Multiply
-subtract long
2016 name
= vmlsl_high_lane
2019 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
2020 multi_fn
= vmlsl_high
-noqself
-noext
, a
, b
, {simd_shuffle
!, c
, c
, {dup
-in_len
-LANE as u32
}}
2021 a
= 14, 15, 16, 17, 18, 19, 20, 21
2022 b
= 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2023 c
= 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2025 validate
14, 13, 12, 11, 10, 9, 8, 7
2028 generate int32x4_t
:int16x8_t
:int16x4_t
:int32x4_t
, int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
2029 generate int64x2_t
:int32x4_t
:int32x2_t
:int64x2_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
2031 generate uint32x4_t
:uint16x8_t
:uint16x4_t
:uint32x4_t
, uint32x4_t
:uint16x8_t
:uint16x8_t
:uint32x4_t
2032 generate uint64x2_t
:uint32x4_t
:uint32x2_t
:uint64x2_t
, uint64x2_t
:uint32x4_t
:uint32x4_t
:uint64x2_t
2037 multi_fn
= simd_cast
, c
:in_t0
, b
2038 multi_fn
= simd_shuffle
!, a
, c
, {asc
-0-out_len
}
2039 a
= 0, 1, 2, 3, 2, 3, 4, 5
2040 b
= 2, 3, 4, 5, 12, 13, 14, 15
2041 validate
0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15
2044 generate int8x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int32x4_t
2045 generate uint8x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint32x4_t
2050 a
= 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8
2051 validate
0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8
2061 multi_fn
= a.wrapping_neg
()
2071 a
= 0.
, 1.
, -1.
, 2.
, -2.
, 3.
, -3.
, 4.
2072 validate
0.
, -1.
, 1.
, -2.
, 2.
, -3.
, 3.
, -4.
2075 generate float64x
*_t
2080 /// Signed saturating negate
2082 a
= MIN
, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7
2083 validate MAX
, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7
2084 link
-arm
= vqneg._EXT_
2085 link
-aarch64
= sqneg._EXT_
2093 /// Signed saturating negate
2095 multi_fn
= simd_extract
, {vqneg
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}}, 0
2100 generate i8
, i16
, i32
, i64
2102 /// Saturating subtract
2104 a
= 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2105 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2106 validate
41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26
2110 link
-arm
= llvm.usub.sat._EXT_
2111 link
-aarch64
= uqsub._EXT_
2112 generate uint
*_t
, uint64x
*_t
2116 link
-arm
= llvm.ssub.sat._EXT_
2117 link
-aarch64
= sqsub._EXT_
2118 generate int
*_t
, int64x
*_t
2120 /// Saturating subtract
2122 multi_fn
= vdup_n
-in_ntt
-noext
, a
:in_ntt
, a
2123 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
2124 multi_fn
= simd_extract
, {vqsub
-in_ntt
-noext
, a
, b
}, 0
2134 /// Saturating subtract
2141 link
-aarch64
= uqsub._EXT_
2145 link
-aarch64
= sqsub._EXT_
2150 a
= 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2151 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2152 validate
21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
2156 link
-aarch64
= uhadd._EXT_
2157 link
-arm
= vhaddu._EXT_
2162 link
-aarch64
= shadd._EXT_
2163 link
-arm
= vhadds._EXT_
2166 /// Reverse bit order
2168 a
= 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2169 validate
0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2172 link
-aarch64
= rbit._EXT_
2174 generate int8x8_t
, int8x16_t
2176 /// Reverse bit order
2178 multi_fn
= transmute
, {vrbit
-signed
-noext
, transmute
(a
)}
2179 a
= 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2180 validate
0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2184 generate uint8x8_t
, uint8x16_t
, poly8x8_t
, poly8x16_t
2186 /// Rounding halving add
2188 a
= 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2189 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2190 validate
22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29
2194 link
-arm
= vrhaddu._EXT_
2195 link
-aarch64
= urhadd._EXT_
2200 link
-arm
= vrhadds._EXT_
2201 link
-aarch64
= srhadd._EXT_
2204 /// Floating
-point round to integral exact
, using current rounding mode
2206 a
= -1.5, 0.5, 1.5, 2.5
2207 validate
-2.0, 0.0, 2.0, 2.0
2210 link
-aarch64
= llvm.rint._EXT_
2211 generate float
*_t
, float64x
*_t
2213 /// Floating
-point round to integral
, to nearest with ties to away
2215 a
= -1.5, 0.5, 1.5, 2.5
2216 validate
-2.0, 1.0, 2.0, 3.0
2219 link
-aarch64
= llvm.round._EXT_
2220 generate float
*_t
, float64x
*_t
2222 /// Floating
-point round to integral
, to nearest with ties to even
2224 a
= -1.5, 0.5, 1.5, 2.5
2225 validate
-2.0, 0.0, 2.0, 2.0
2227 link
-aarch64
= frintn._EXT_
2229 generate float64x
*_t
2233 link
-arm
= vrintn._EXT_
2236 /// Floating
-point round to integral
, to nearest with ties to even
2242 link
-aarch64
= llvm.roundeven._EXT_
2245 /// Floating
-point round to integral
, toward minus infinity
2247 a
= -1.5, 0.5, 1.5, 2.5
2248 validate
-2.0, 0.0, 1.0, 2.0
2251 link
-aarch64
= llvm.floor._EXT_
2252 generate float
*_t
, float64x
*_t
2254 /// Floating
-point round to integral
, toward plus infinity
2256 a
= -1.5, 0.5, 1.5, 2.5
2257 validate
-1.0, 1.0, 2.0, 3.0
2260 link
-aarch64
= llvm.ceil._EXT_
2261 generate float
*_t
, float64x
*_t
2263 /// Floating
-point round to integral
, toward zero
2265 a
= -1.5, 0.5, 1.5, 2.5
2266 validate
-1.0, 0.0, 1.0, 2.0
2269 link
-aarch64
= llvm.trunc._EXT_
2270 generate float
*_t
, float64x
*_t
2272 /// Floating
-point round to integral
, using current rounding mode
2274 a
= -1.5, 0.5, 1.5, 2.5
2275 validate
-2.0, 0.0, 2.0, 2.0
2278 link
-aarch64
= llvm.nearbyint._EXT_
2279 generate float
*_t
, float64x
*_t
2283 a
= 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2284 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2285 validate
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2289 link
-arm
= llvm.uadd.sat._EXT_
2290 link
-aarch64
= uqadd._EXT_
2291 generate uint
*_t
, uint64x
*_t
2295 link
-arm
= llvm.sadd.sat._EXT_
2296 link
-aarch64
= sqadd._EXT_
2297 generate int
*_t
, int64x
*_t
2301 multi_fn
= vdup_n
-in_ntt
-noext
, a
:in_ntt
, a
2302 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
2303 multi_fn
= simd_extract
, {vqadd
-in_ntt
-noext
, a
, b
}, 0
2315 a
= 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2316 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2317 validate
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2320 link
-aarch64
= uqadd._EXT_
2324 link
-aarch64
= sqadd._EXT_
2327 /// Load multiple single
-element structures to one
, two
, three
, or four registers
2330 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2331 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2335 link
-aarch64
= ld1x2._EXT2_
2337 link
-arm
= vld1x2._EXT2_
2338 generate
*const i8
:int8x8x2_t
, *const i16
:int16x4x2_t
, *const i32
:int32x2x2_t
, *const i64
:int64x1x2_t
2339 generate
*const i8
:int8x16x2_t
, *const i16
:int16x8x2_t
, *const i32
:int32x4x2_t
, *const i64
:int64x2x2_t
2341 link
-aarch64
= ld1x3._EXT2_
2342 link
-arm
= vld1x3._EXT2_
2343 generate
*const i8
:int8x8x3_t
, *const i16
:int16x4x3_t
, *const i32
:int32x2x3_t
, *const i64
:int64x1x3_t
2344 generate
*const i8
:int8x16x3_t
, *const i16
:int16x8x3_t
, *const i32
:int32x4x3_t
, *const i64
:int64x2x3_t
2346 link
-aarch64
= ld1x4._EXT2_
2347 link
-arm
= vld1x4._EXT2_
2348 generate
*const i8
:int8x8x4_t
, *const i16
:int16x4x4_t
, *const i32
:int32x2x4_t
, *const i64
:int64x1x4_t
2349 generate
*const i8
:int8x16x4_t
, *const i16
:int16x8x4_t
, *const i32
:int32x4x4_t
, *const i64
:int64x2x4_t
2351 /// Load multiple single
-element structures to one
, two
, three
, or four registers
2354 multi_fn
= transmute
, {vld1
-outsigned
-noext
, transmute
(a
)}
2355 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2356 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2361 generate
*const u8
:uint8x8x2_t
, *const u16
:uint16x4x2_t
, *const u32
:uint32x2x2_t
, *const u64
:uint64x1x2_t
2362 generate
*const u8
:uint8x16x2_t
, *const u16
:uint16x8x2_t
, *const u32
:uint32x4x2_t
, *const u64
:uint64x2x2_t
2363 generate
*const u8
:uint8x8x3_t
, *const u16
:uint16x4x3_t
, *const u32
:uint32x2x3_t
, *const u64
:uint64x1x3_t
2364 generate
*const u8
:uint8x16x3_t
, *const u16
:uint16x8x3_t
, *const u32
:uint32x4x3_t
, *const u64
:uint64x2x3_t
2365 generate
*const u8
:uint8x8x4_t
, *const u16
:uint16x4x4_t
, *const u32
:uint32x2x4_t
, *const u64
:uint64x1x4_t
2366 generate
*const u8
:uint8x16x4_t
, *const u16
:uint16x8x4_t
, *const u32
:uint32x4x4_t
, *const u64
:uint64x2x4_t
2367 generate
*const p8
:poly8x8x2_t
, *const p8
:poly8x8x3_t
, *const p8
:poly8x8x4_t
2368 generate
*const p8
:poly8x16x2_t
, *const p8
:poly8x16x3_t
, *const p8
:poly8x16x4_t
2369 generate
*const p16
:poly16x4x2_t
, *const p16
:poly16x4x3_t
, *const p16
:poly16x4x4_t
2370 generate
*const p16
:poly16x8x2_t
, *const p16
:poly16x8x3_t
, *const p16
:poly16x8x4_t
2372 generate
*const p64
:poly64x1x2_t
2374 generate
*const p64
:poly64x1x3_t
, *const p64
:poly64x1x4_t
2375 generate
*const p64
:poly64x2x2_t
, *const p64
:poly64x2x3_t
, *const p64
:poly64x2x4_t
2376 /// Load multiple single
-element structures to one
, two
, three
, or four registers
2379 a
= 0.
, 1.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
, 10.
, 11.
, 12.
, 13.
, 14.
, 15.
, 16.
2380 validate
1.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
, 10.
, 11.
, 12.
, 13.
, 14.
, 15.
, 16.
2384 link
-aarch64
= ld1x2._EXT2_
2385 generate
*const f64
:float64x1x2_t
, *const f64
:float64x2x2_t
2387 link
-aarch64
= ld1x3._EXT2_
2388 generate
*const f64
:float64x1x3_t
, *const f64
:float64x2x3_t
2390 link
-aarch64
= ld1x4._EXT2_
2391 generate
*const f64
:float64x1x4_t
, *const f64
:float64x2x4_t
2394 link
-aarch64
= ld1x2._EXT2_
2395 link
-arm
= vld1x2._EXT2_
2396 generate
*const f32
:float32x2x2_t
, *const f32
:float32x4x2_t
2398 link
-aarch64
= ld1x3._EXT2_
2399 link
-arm
= vld1x3._EXT2_
2400 generate
*const f32
:float32x2x3_t
, *const f32
:float32x4x3_t
2402 link
-aarch64
= ld1x4._EXT2_
2403 link
-arm
= vld1x4._EXT2_
2404 generate
*const f32
:float32x2x4_t
, *const f32
:float32x4x4_t
2406 /// Load multiple
2-element structures to two registers
2409 a
= 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2410 validate
1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2412 arm
-aarch64
-separate
2415 link
-aarch64
= ld2._EXTv2_
2416 generate
*const i64
:int64x2x2_t
2419 link
-arm
= vld2._EXTpi82_
2420 generate
*const i8
:int8x8x2_t
, *const i16
:int16x4x2_t
, *const i32
:int32x2x2_t
2421 generate
*const i8
:int8x16x2_t
, *const i16
:int16x8x2_t
, *const i32
:int32x4x2_t
2424 generate
*const i64
:int64x1x2_t
2426 /// Load multiple
2-element structures to two registers
2429 multi_fn
= transmute
, {vld2
-outsignednox
-noext
, transmute
(a
)}
2430 a
= 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2431 validate
1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2435 generate
*const u64
:uint64x2x2_t
2437 generate
*const p64
:poly64x2x2_t
2441 generate
*const u8
:uint8x8x2_t
, *const u16
:uint16x4x2_t
, *const u32
:uint32x2x2_t
2442 generate
*const u8
:uint8x16x2_t
, *const u16
:uint16x8x2_t
, *const u32
:uint32x4x2_t
2443 generate
*const p8
:poly8x8x2_t
, *const p16
:poly16x4x2_t
, *const p8
:poly8x16x2_t
, *const p16
:poly16x8x2_t
2446 generate
*const u64
:uint64x1x2_t
2448 generate
*const p64
:poly64x1x2_t
2451 /// Load multiple
2-element structures to two registers
2454 a
= 0.
, 1.
, 2.
, 2.
, 3.
, 2.
, 4.
, 3.
, 5.
, 2.
, 6.
, 3.
, 7.
, 4.
, 8.
, 5.
, 9.
2455 validate
1.
, 2.
, 2.
, 3.
, 2.
, 3.
, 4.
, 5.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
2457 arm
-aarch64
-separate
2460 link
-aarch64
= ld2._EXTv2_
2461 generate
*const f64
:float64x1x2_t
2463 generate
*const f64
:float64x2x2_t
2466 link
-arm
= vld2._EXTpi82_
2467 generate
*const f32
:float32x2x2_t
, *const f32
:float32x4x2_t
2469 /// Load single
2-element structure and replicate to all lanes of two registers
2472 a
= 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2473 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2475 arm
-aarch64
-separate
2478 link
-aarch64
= ld2r._EXT2_
2479 generate
*const i64
:int64x2x2_t
2482 link
-arm
= vld2dup._EXTpi82_
2483 generate
*const i8
:int8x8x2_t
, *const i16
:int16x4x2_t
, *const i32
:int32x2x2_t
2484 generate
*const i8
:int8x16x2_t
, *const i16
:int16x8x2_t
, *const i32
:int32x4x2_t
2486 generate
*const i64
:int64x1x2_t
2488 /// Load single
2-element structure and replicate to all lanes of two registers
2491 multi_fn
= transmute
, {vld2
-outsigneddupnox
-noext
, transmute
(a
)}
2492 a
= 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2493 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2497 generate
*const u64
:uint64x2x2_t
2499 generate
*const p64
:poly64x2x2_t
2503 generate
*const u8
:uint8x8x2_t
, *const u16
:uint16x4x2_t
, *const u32
:uint32x2x2_t
2504 generate
*const u8
:uint8x16x2_t
, *const u16
:uint16x8x2_t
, *const u32
:uint32x4x2_t
2505 generate
*const p8
:poly8x8x2_t
, *const p16
:poly16x4x2_t
, *const p8
:poly8x16x2_t
, *const p16
:poly16x8x2_t
2507 generate
*const u64
:uint64x1x2_t
2509 generate
*const p64
:poly64x1x2_t
2511 /// Load single
2-element structure and replicate to all lanes of two registers
2514 a
= 0.
, 1.
, 1.
, 2.
, 3.
, 1.
, 4.
, 3.
, 5.
2515 validate
1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
2517 arm
-aarch64
-separate
2520 link
-aarch64
= ld2r._EXT2_
2521 generate
*const f64
:float64x1x2_t
, *const f64
:float64x2x2_t
2524 link
-arm
= vld2dup._EXTpi82_
2525 generate
*const f32
:float32x2x2_t
, *const f32
:float32x4x2_t
2527 /// Load multiple
2-element structures to two registers
2530 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2532 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2533 b
= 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2535 validate
1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2537 arm
-aarch64
-separate
2540 const
-aarch64
= LANE
2541 link
-aarch64
= ld2lane._EXTpi82_
2542 generate
*const i8
:int8x16x2_t
:int8x16x2_t
, *const i64
:int64x1x2_t
:int64x1x2_t
, *const i64
:int64x2x2_t
:int64x2x2_t
2546 link
-arm
= vld2lane._EXTpi82_
2547 generate
*const i8
:int8x8x2_t
:int8x8x2_t
, *const i16
:int16x4x2_t
:int16x4x2_t
, *const i32
:int32x2x2_t
:int32x2x2_t
2548 generate
*const i16
:int16x8x2_t
:int16x8x2_t
, *const i32
:int32x4x2_t
:int32x4x2_t
2550 /// Load multiple
2-element structures to two registers
2553 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2554 multi_fn
= transmute
, {vld2
-outsignedlanenox
-::<LANE
>, transmute
(a
), transmute
(b
)}
2556 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2557 b
= 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2559 validate
1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2563 const
-aarch64
= LANE
2566 generate
*const p64
:poly64x1x2_t
:poly64x1x2_t
, *const p64
:poly64x2x2_t
:poly64x2x2_t
2569 generate
*const u8
:uint8x16x2_t
:uint8x16x2_t
, *const u64
:uint64x1x2_t
:uint64x1x2_t
, *const u64
:uint64x2x2_t
:uint64x2x2_t
2570 generate
*const p8
:poly8x16x2_t
:poly8x16x2_t
2574 generate
*const u8
:uint8x8x2_t
:uint8x8x2_t
, *const u16
:uint16x4x2_t
:uint16x4x2_t
, *const u32
:uint32x2x2_t
:uint32x2x2_t
2575 generate
*const u16
:uint16x8x2_t
:uint16x8x2_t
, *const u32
:uint32x4x2_t
:uint32x4x2_t
2576 generate
*const p8
:poly8x8x2_t
:poly8x8x2_t
, *const p16
:poly16x4x2_t
:poly16x4x2_t
2577 generate
*const p16
:poly16x8x2_t
:poly16x8x2_t
2579 /// Load multiple
2-element structures to two registers
2582 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2584 a
= 0.
, 1.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
2585 b
= 0.
, 2.
, 2.
, 14.
, 2.
, 16.
, 17.
, 18.
2587 validate
1.
, 2.
, 2.
, 14.
, 2.
, 16.
, 17.
, 18.
2589 arm
-aarch64
-separate
2592 const
-aarch64
= LANE
2593 link
-aarch64
= ld2lane._EXTpi82_
2594 generate
*const f64
:float64x1x2_t
:float64x1x2_t
, *const f64
:float64x2x2_t
:float64x2x2_t
2598 link
-arm
= vld2lane._EXTpi82_
2599 generate
*const f32
:float32x2x2_t
:float32x2x2_t
, *const f32
:float32x4x2_t
:float32x4x2_t
2601 /// Load multiple
3-element structures to three registers
2604 a
= 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2605 validate
1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2607 arm
-aarch64
-separate
2610 link
-aarch64
= ld3._EXTv2_
2611 generate
*const i64
:int64x2x3_t
2614 link
-arm
= vld3._EXTpi82_
2615 generate
*const i8
:int8x8x3_t
, *const i16
:int16x4x3_t
, *const i32
:int32x2x3_t
2616 generate
*const i8
:int8x16x3_t
, *const i16
:int16x8x3_t
, *const i32
:int32x4x3_t
2619 generate
*const i64
:int64x1x3_t
2621 /// Load multiple
3-element structures to three registers
2624 multi_fn
= transmute
, {vld3
-outsignednox
-noext
, transmute
(a
)}
2625 a
= 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2626 validate
1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2630 generate
*const u64
:uint64x2x3_t
2632 generate
*const p64
:poly64x2x3_t
2636 generate
*const u8
:uint8x8x3_t
, *const u16
:uint16x4x3_t
, *const u32
:uint32x2x3_t
2637 generate
*const u8
:uint8x16x3_t
, *const u16
:uint16x8x3_t
, *const u32
:uint32x4x3_t
2638 generate
*const p8
:poly8x8x3_t
, *const p16
:poly16x4x3_t
, *const p8
:poly8x16x3_t
, *const p16
:poly16x8x3_t
2641 generate
*const u64
:uint64x1x3_t
2643 generate
*const p64
:poly64x1x3_t
2645 /// Load multiple
3-element structures to three registers
2648 a
= 0.
, 1.
, 2.
, 2.
, 2.
, 4.
, 4.
, 2.
, 7.
, 7.
, 4.
, 8.
, 8.
2649 validate
1.
, 2.
, 2.
, 4.
, 2.
, 4.
, 7.
, 8.
, 2.
, 4.
, 7.
, 8.
2651 arm
-aarch64
-separate
2654 link
-aarch64
= ld3._EXTv2_
2655 generate
*const f64
:float64x1x3_t
2657 generate
*const f64
:float64x2x3_t
2660 link
-arm
= vld3._EXTpi82_
2661 generate
*const f32
:float32x2x3_t
, *const f32
:float32x4x3_t
2663 /// Load single
3-element structure and replicate to all lanes of three registers
2666 a
= 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2667 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2669 arm
-aarch64
-separate
2672 link
-aarch64
= ld3r._EXT2_
2673 generate
*const i64
:int64x2x3_t
2676 link
-arm
= vld3dup._EXTpi82_
2677 generate
*const i8
:int8x8x3_t
, *const i16
:int16x4x3_t
, *const i32
:int32x2x3_t
2678 generate
*const i8
:int8x16x3_t
, *const i16
:int16x8x3_t
, *const i32
:int32x4x3_t
2680 generate
*const i64
:int64x1x3_t
2682 /// Load single
3-element structure and replicate to all lanes of three registers
2685 multi_fn
= transmute
, {vld3
-outsigneddupnox
-noext
, transmute
(a
)}
2686 a
= 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2687 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2691 generate
*const u64
:uint64x2x3_t
2693 generate
*const p64
:poly64x2x3_t
2697 generate
*const u8
:uint8x8x3_t
, *const u16
:uint16x4x3_t
, *const u32
:uint32x2x3_t
2698 generate
*const u8
:uint8x16x3_t
, *const u16
:uint16x8x3_t
, *const u32
:uint32x4x3_t
2699 generate
*const p8
:poly8x8x3_t
, *const p16
:poly16x4x3_t
, *const p8
:poly8x16x3_t
, *const p16
:poly16x8x3_t
2701 generate
*const u64
:uint64x1x3_t
2703 generate
*const p64
:poly64x1x3_t
2705 /// Load single
3-element structure and replicate to all lanes of three registers
2708 a
= 0.
, 1.
, 1.
, 1.
, 3.
, 1.
, 4.
, 3.
, 5.
, 1.
, 4.
, 3.
, 5.
2709 validate
1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
2711 arm
-aarch64
-separate
2714 link
-aarch64
= ld3r._EXT2_
2715 generate
*const f64
:float64x1x3_t
, *const f64
:float64x2x3_t
2718 link
-arm
= vld3dup._EXTpi82_
2719 generate
*const f32
:float32x2x3_t
, *const f32
:float32x4x3_t
2721 /// Load multiple
3-element structures to two registers
2724 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2726 a
= 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2727 b
= 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2729 validate
1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2731 arm
-aarch64
-separate
2734 const
-aarch64
= LANE
2735 link
-aarch64
= ld3lane._EXTpi82_
2736 generate
*const i8
:int8x16x3_t
:int8x16x3_t
, *const i64
:int64x1x3_t
:int64x1x3_t
, *const i64
:int64x2x3_t
:int64x2x3_t
2740 link
-arm
= vld3lane._EXTpi82_
2741 generate
*const i8
:int8x8x3_t
:int8x8x3_t
, *const i16
:int16x4x3_t
:int16x4x3_t
, *const i32
:int32x2x3_t
:int32x2x3_t
2742 generate
*const i16
:int16x8x3_t
:int16x8x3_t
, *const i32
:int32x4x3_t
:int32x4x3_t
2744 /// Load multiple
3-element structures to three registers
2747 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2748 multi_fn
= transmute
, {vld3
-outsignedlanenox
-::<LANE
>, transmute
(a
), transmute
(b
)}
2750 a
= 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2751 b
= 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2753 validate
1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2757 const
-aarch64
= LANE
2759 generate
*const p64
:poly64x1x3_t
:poly64x1x3_t
, *const p64
:poly64x2x3_t
:poly64x2x3_t
2761 generate
*const p8
:poly8x16x3_t
:poly8x16x3_t
, *const u8
:uint8x16x3_t
:uint8x16x3_t
, *const u64
:uint64x1x3_t
:uint64x1x3_t
, *const u64
:uint64x2x3_t
:uint64x2x3_t
2765 generate
*const u8
:uint8x8x3_t
:uint8x8x3_t
, *const u16
:uint16x4x3_t
:uint16x4x3_t
, *const u32
:uint32x2x3_t
:uint32x2x3_t
2766 generate
*const u16
:uint16x8x3_t
:uint16x8x3_t
, *const u32
:uint32x4x3_t
:uint32x4x3_t
2767 generate
*const p8
:poly8x8x3_t
:poly8x8x3_t
, *const p16
:poly16x4x3_t
:poly16x4x3_t
2768 generate
*const p16
:poly16x8x3_t
:poly16x8x3_t
2770 /// Load multiple
3-element structures to three registers
2773 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2775 a
= 0.
, 1.
, 2.
, 2.
, 4.
, 5.
, 6.
, 7.
, 8.
, 5.
, 6.
, 7.
, 8.
2776 b
= 0.
, 2.
, 2.
, 14.
, 9.
, 16.
, 17.
, 18.
, 5.
, 6.
, 7.
, 8.
2778 validate
1.
, 2.
, 2.
, 14.
, 2.
, 16.
, 17.
, 18.
, 2.
, 6.
, 7.
, 8.
2780 arm
-aarch64
-separate
2783 const
-aarch64
= LANE
2784 link
-aarch64
= ld3lane._EXTpi82_
2785 generate
*const f64
:float64x1x3_t
:float64x1x3_t
, *const f64
:float64x2x3_t
:float64x2x3_t
2789 link
-arm
= vld3lane._EXTpi82_
2790 generate
*const f32
:float32x2x3_t
:float32x2x3_t
, *const f32
:float32x4x3_t
:float32x4x3_t
2792 /// Load multiple
4-element structures to four registers
2795 a
= 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2796 validate
1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2798 arm
-aarch64
-separate
2801 link
-aarch64
= ld4._EXTv2_
2802 generate
*const i64
:int64x2x4_t
2805 link
-arm
= vld4._EXTpi82_
2806 generate
*const i8
:int8x8x4_t
, *const i16
:int16x4x4_t
, *const i32
:int32x2x4_t
2807 generate
*const i8
:int8x16x4_t
, *const i16
:int16x8x4_t
, *const i32
:int32x4x4_t
2810 generate
*const i64
:int64x1x4_t
2812 /// Load multiple
4-element structures to four registers
2815 multi_fn
= transmute
, {vld4
-outsignednox
-noext
, transmute
(a
)}
2816 a
= 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2817 validate
1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2821 generate
*const u64
:uint64x2x4_t
2823 generate
*const p64
:poly64x2x4_t
2827 generate
*const u8
:uint8x8x4_t
, *const u16
:uint16x4x4_t
, *const u32
:uint32x2x4_t
2828 generate
*const u8
:uint8x16x4_t
, *const u16
:uint16x8x4_t
, *const u32
:uint32x4x4_t
2829 generate
*const p8
:poly8x8x4_t
, *const p16
:poly16x4x4_t
, *const p8
:poly8x16x4_t
, *const p16
:poly16x8x4_t
2832 generate
*const u64
:uint64x1x4_t
2834 generate
*const p64
:poly64x1x4_t
2836 /// Load multiple
4-element structures to four registers
2839 a
= 0.
, 1.
, 2.
, 2.
, 6.
, 2.
, 6.
, 6.
, 8.
, 2.
, 6.
, 6.
, 8.
, 6.
, 8.
, 15.
, 16.
2840 validate
1.
, 2.
, 2.
, 6.
, 2.
, 6.
, 6.
, 8.
, 2.
, 6.
, 6.
, 15.
, 6.
, 8.
, 8.
, 16.
2842 arm
-aarch64
-separate
2845 link
-aarch64
= ld4._EXTv2_
2846 generate
*const f64
:float64x1x4_t
2848 generate
*const f64
:float64x2x4_t
2851 link
-arm
= vld4._EXTpi82_
2852 generate
*const f32
:float32x2x4_t
, *const f32
:float32x4x4_t
2854 /// Load single
4-element structure and replicate to all lanes of four registers
2857 a
= 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2858 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2860 arm
-aarch64
-separate
2863 link
-aarch64
= ld4r._EXT2_
2864 generate
*const i64
:int64x2x4_t
2867 link
-arm
= vld4dup._EXTpi82_
2868 generate
*const i8
:int8x8x4_t
, *const i16
:int16x4x4_t
, *const i32
:int32x2x4_t
2869 generate
*const i8
:int8x16x4_t
, *const i16
:int16x8x4_t
, *const i32
:int32x4x4_t
2871 generate
*const i64
:int64x1x4_t
2873 /// Load single
4-element structure and replicate to all lanes of four registers
2876 multi_fn
= transmute
, {vld4
-outsigneddupnox
-noext
, transmute
(a
)}
2877 a
= 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2878 validate
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2882 generate
*const u64
:uint64x2x4_t
2884 generate
*const p64
:poly64x2x4_t
2888 generate
*const u8
:uint8x8x4_t
, *const u16
:uint16x4x4_t
, *const u32
:uint32x2x4_t
2889 generate
*const u8
:uint8x16x4_t
, *const u16
:uint16x8x4_t
, *const u32
:uint32x4x4_t
2890 generate
*const p8
:poly8x8x4_t
, *const p16
:poly16x4x4_t
, *const p8
:poly8x16x4_t
, *const p16
:poly16x8x4_t
2892 generate
*const u64
:uint64x1x4_t
2894 generate
*const p64
:poly64x1x4_t
2896 /// Load single
4-element structure and replicate to all lanes of four registers
2899 a
= 0.
, 1.
, 1.
, 1.
, 1.
, 6.
, 4.
, 3.
, 5.
, 7.
, 4.
, 3.
, 5.
, 8.
, 4.
, 3.
, 5.
, 9.
, 4.
, 3.
, 5.
2900 validate
1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
, 1.
2902 arm
-aarch64
-separate
2905 link
-aarch64
= ld4r._EXT2_
2906 generate
*const f64
:float64x1x4_t
, *const f64
:float64x2x4_t
2909 link
-arm
= vld4dup._EXTpi82_
2910 generate
*const f32
:float32x2x4_t
, *const f32
:float32x4x4_t
2912 /// Load multiple
4-element structures to four registers
2915 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2917 a
= 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2918 b
= 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2920 validate
1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2922 arm
-aarch64
-separate
2925 const
-aarch64
= LANE
2926 link
-aarch64
= ld4lane._EXTpi82_
2927 generate
*const i8
:int8x16x4_t
:int8x16x4_t
, *const i64
:int64x1x4_t
:int64x1x4_t
, *const i64
:int64x2x4_t
:int64x2x4_t
2931 link
-arm
= vld4lane._EXTpi82_
2932 generate
*const i8
:int8x8x4_t
:int8x8x4_t
, *const i16
:int16x4x4_t
:int16x4x4_t
, *const i32
:int32x2x4_t
:int32x2x4_t
2933 generate
*const i16
:int16x8x4_t
:int16x8x4_t
, *const i32
:int32x4x4_t
:int32x4x4_t
2935 /// Load multiple
4-element structures to four registers
2938 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2939 multi_fn
= transmute
, {vld4
-outsignedlanenox
-::<LANE
>, transmute
(a
), transmute
(b
)}
2941 a
= 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2942 b
= 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2944 validate
1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2948 const
-aarch64
= LANE
2950 generate
*const p64
:poly64x1x4_t
:poly64x1x4_t
, *const p64
:poly64x2x4_t
:poly64x2x4_t
2952 generate
*const p8
:poly8x16x4_t
:poly8x16x4_t
, *const u8
:uint8x16x4_t
:uint8x16x4_t
, *const u64
:uint64x1x4_t
:uint64x1x4_t
, *const u64
:uint64x2x4_t
:uint64x2x4_t
2956 generate
*const u8
:uint8x8x4_t
:uint8x8x4_t
, *const u16
:uint16x4x4_t
:uint16x4x4_t
, *const u32
:uint32x2x4_t
:uint32x2x4_t
2957 generate
*const u16
:uint16x8x4_t
:uint16x8x4_t
, *const u32
:uint32x4x4_t
:uint32x4x4_t
2958 generate
*const p8
:poly8x8x4_t
:poly8x8x4_t
, *const p16
:poly16x4x4_t
:poly16x4x4_t
2959 generate
*const p16
:poly16x8x4_t
:poly16x8x4_t
2961 /// Load multiple
4-element structures to four registers
2964 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2966 a
= 0.
, 1.
, 2.
, 2.
, 2.
, 5.
, 6.
, 7.
, 8.
, 5.
, 6.
, 7.
, 8.
, 1.
, 4.
, 3.
, 5.
2967 b
= 0.
, 2.
, 2.
, 2.
, 2.
, 16.
, 2.
, 18.
, 5.
, 6.
, 7.
, 8.
, 1.
, 4.
, 3.
, 5.
2969 validate
1.
, 2.
, 2.
, 2.
, 2.
, 16.
, 2.
, 18.
, 2.
, 6.
, 7.
, 8.
, 2.
, 4.
, 3.
, 5.
2971 arm
-aarch64
-separate
2974 const
-aarch64
= LANE
2975 link
-aarch64
= ld4lane._EXTpi82_
2976 generate
*const f64
:float64x1x4_t
:float64x1x4_t
, *const f64
:float64x2x4_t
:float64x2x4_t
2980 link
-arm
= vld4lane._EXTpi82_
2981 generate
*const f32
:float32x2x4_t
:float32x2x4_t
, *const f32
:float32x4x4_t
:float32x4x4_t
2983 /// Store multiple single
-element structures from one
, two
, three
, or four registers
2986 multi_fn
= static_assert_imm
-in_exp_len
-LANE
2987 multi_fn
= *a
, {simd_extract
, b
, LANE as u32
}
2989 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2991 validate
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2996 generate
*mut i8
:int8x8_t
:void
, *mut i16
:int16x4_t
:void
, *mut i32
:int32x2_t
:void
, *mut i64
:int64x1_t
:void
2997 generate
*mut i8
:int8x16_t
:void
, *mut i16
:int16x8_t
:void
, *mut i32
:int32x4_t
:void
, *mut i64
:int64x2_t
:void
2998 generate
*mut u8
:uint8x8_t
:void
, *mut u16
:uint16x4_t
:void
, *mut u32
:uint32x2_t
:void
, *mut u64
:uint64x1_t
:void
2999 generate
*mut u8
:uint8x16_t
:void
, *mut u16
:uint16x8_t
:void
, *mut u32
:uint32x4_t
:void
, *mut u64
:uint64x2_t
:void
3000 generate
*mut p8
:poly8x8_t
:void
, *mut p16
:poly16x4_t
:void
, *mut p8
:poly8x16_t
:void
, *mut p16
:poly16x8_t
:void
3002 generate
*mut p64
:poly64x1_t
:void
, *mut p64
:poly64x2_t
:void
3004 /// Store multiple single
-element structures from one
, two
, three
, or four registers
3007 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3008 multi_fn
= *a
, {simd_extract
, b
, LANE as u32
}
3010 a
= 0.
, 1.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
3012 validate
1.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
3016 generate
*mut f64
:float64x1_t
:void
, *mut f64
:float64x2_t
:void
3019 generate
*mut f32
:float32x2_t
:void
, *mut f32
:float32x4_t
:void
3021 /// Store multiple single
-element structures from one
, two
, three
, or four registers
3023 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3024 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3026 arm
-aarch64
-separate
3029 link
-aarch64
= st1x2._EXT3_
3031 link
-arm
= vst1x2._EXTr3_
3032 generate
*mut i8
:int8x8x2_t
:void
, *mut i16
:int16x4x2_t
:void
, *mut i32
:int32x2x2_t
:void
, *mut i64
:int64x1x2_t
:void
3033 generate
*mut i8
:int8x16x2_t
:void
, *mut i16
:int16x8x2_t
:void
, *mut i32
:int32x4x2_t
:void
, *mut i64
:int64x2x2_t
:void
3035 link
-aarch64
= st1x3._EXT3_
3036 link
-arm
= vst1x3._EXTr3_
3037 generate
*mut i8
:int8x8x3_t
:void
, *mut i16
:int16x4x3_t
:void
, *mut i32
:int32x2x3_t
:void
, *mut i64
:int64x1x3_t
:void
3038 generate
*mut i8
:int8x16x3_t
:void
, *mut i16
:int16x8x3_t
:void
, *mut i32
:int32x4x3_t
:void
, *mut i64
:int64x2x3_t
:void
3040 link
-aarch64
= st1x4._EXT3_
3041 link
-arm
= vst1x4._EXTr3_
3042 generate
*mut i8
:int8x8x4_t
:void
, *mut i16
:int16x4x4_t
:void
, *mut i32
:int32x2x4_t
:void
, *mut i64
:int64x1x4_t
:void
3043 generate
*mut i8
:int8x16x4_t
:void
, *mut i16
:int16x8x4_t
:void
, *mut i32
:int32x4x4_t
:void
, *mut i64
:int64x2x4_t
:void
3045 /// Store multiple single
-element structures to one
, two
, three
, or four registers
3047 multi_fn
= vst1
-signed
-noext
, transmute
(a
), transmute
(b
)
3048 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3049 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3054 generate
*mut u8
:uint8x8x2_t
:void
, *mut u16
:uint16x4x2_t
:void
, *mut u32
:uint32x2x2_t
:void
, *mut u64
:uint64x1x2_t
:void
3055 generate
*mut u8
:uint8x16x2_t
:void
, *mut u16
:uint16x8x2_t
:void
, *mut u32
:uint32x4x2_t
:void
, *mut u64
:uint64x2x2_t
:void
3056 generate
*mut u8
:uint8x8x3_t
:void
, *mut u16
:uint16x4x3_t
:void
, *mut u32
:uint32x2x3_t
:void
, *mut u64
:uint64x1x3_t
:void
3057 generate
*mut u8
:uint8x16x3_t
:void
, *mut u16
:uint16x8x3_t
:void
, *mut u32
:uint32x4x3_t
:void
, *mut u64
:uint64x2x3_t
:void
3058 generate
*mut u8
:uint8x8x4_t
:void
, *mut u16
:uint16x4x4_t
:void
, *mut u32
:uint32x2x4_t
:void
, *mut u64
:uint64x1x4_t
:void
3059 generate
*mut u8
:uint8x16x4_t
:void
, *mut u16
:uint16x8x4_t
:void
, *mut u32
:uint32x4x4_t
:void
, *mut u64
:uint64x2x4_t
:void
3060 generate
*mut p8
:poly8x8x2_t
:void
, *mut p8
:poly8x8x3_t
:void
, *mut p8
:poly8x8x4_t
:void
3061 generate
*mut p8
:poly8x16x2_t
:void
, *mut p8
:poly8x16x3_t
:void
, *mut p8
:poly8x16x4_t
:void
3062 generate
*mut p16
:poly16x4x2_t
:void
, *mut p16
:poly16x4x3_t
:void
, *mut p16
:poly16x4x4_t
:void
3063 generate
*mut p16
:poly16x8x2_t
:void
, *mut p16
:poly16x8x3_t
:void
, *mut p16
:poly16x8x4_t
:void
3065 generate
*mut p64
:poly64x1x2_t
:void
3067 generate
*mut p64
:poly64x1x3_t
:void
, *mut p64
:poly64x1x4_t
:void
3068 generate
*mut p64
:poly64x2x2_t
:void
, *mut p64
:poly64x2x3_t
:void
, *mut p64
:poly64x2x4_t
:void
3070 /// Store multiple single
-element structures to one
, two
, three
, or four registers
3072 a
= 0.
, 1.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
, 10.
, 11.
, 12.
, 13.
, 14.
, 15.
, 16.
3073 validate
1.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
, 10.
, 11.
, 12.
, 13.
, 14.
, 15.
, 16.
3075 arm
-aarch64
-separate
3078 link
-aarch64
= st1x2._EXT3_
3079 generate
*mut f64
:float64x1x2_t
:void
, *mut f64
:float64x2x2_t
:void
3081 link
-aarch64
= st1x3._EXT3_
3082 generate
*mut f64
:float64x1x3_t
:void
, *mut f64
:float64x2x3_t
:void
3084 link
-aarch64
= st1x4._EXT3_
3085 generate
*mut f64
:float64x1x4_t
:void
, *mut f64
:float64x2x4_t
:void
3088 link
-aarch64
= st1x2._EXT3_
3089 link
-arm
= vst1x2._EXTr3_
3090 generate
*mut f32
:float32x2x2_t
:void
, *mut f32
:float32x4x2_t
:void
3092 link
-aarch64
= st1x3._EXT3_
3093 link
-arm
= vst1x3._EXTr3_
3094 generate
*mut f32
:float32x2x3_t
:void
, *mut f32
:float32x4x3_t
:void
3096 link
-aarch64
= st1x4._EXT3_
3097 link
-arm
= vst1x4._EXTr3_
3098 generate
*mut f32
:float32x2x4_t
:void
, *mut f32
:float32x4x4_t
:void
3100 /// Store multiple
2-element structures from two registers
3103 a
= 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3104 validate
1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3106 arm
-aarch64
-separate
3109 link
-aarch64
= st2._EXTpi8_
3110 generate
*mut i64
:int64x2x2_t
:void
3113 link
-arm
= vst2._EXTpi8r_
3114 generate
*mut i8
:int8x8x2_t
:void
, *mut i16
:int16x4x2_t
:void
, *mut i32
:int32x2x2_t
:void
3115 generate
*mut i8
:int8x16x2_t
:void
, *mut i16
:int16x8x2_t
:void
, *mut i32
:int32x4x2_t
:void
3118 generate
*mut i64
:int64x1x2_t
:void
3120 /// Store multiple
2-element structures from two registers
3122 multi_fn
= transmute
, {vst2
-in1signednox
-noext
, transmute
(a
), transmute
(b
)}
3124 a
= 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3125 validate
1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3129 generate
*mut u64
:uint64x2x2_t
:void
3131 generate
*mut p64
:poly64x2x2_t
:void
3135 generate
*mut u8
:uint8x8x2_t
:void
, *mut u16
:uint16x4x2_t
:void
, *mut u32
:uint32x2x2_t
:void
3136 generate
*mut u8
:uint8x16x2_t
:void
, *mut u16
:uint16x8x2_t
:void
, *mut u32
:uint32x4x2_t
:void
3137 generate
*mut p8
:poly8x8x2_t
:void
, *mut p16
:poly16x4x2_t
:void
, *mut p8
:poly8x16x2_t
:void
, *mut p16
:poly16x8x2_t
:void
3140 generate
*mut u64
:uint64x1x2_t
:void
3142 generate
*mut p64
:poly64x1x2_t
:void
3144 /// Store multiple
2-element structures from two registers
3147 a
= 0.
, 1.
, 2.
, 2.
, 3.
, 2.
, 3.
, 4.
, 5.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
3148 validate
1.
, 2.
, 2.
, 3.
, 2.
, 4.
, 3.
, 5.
, 2.
, 6.
, 3.
, 7.
, 4.
, 8.
, 5.
, 9.
3150 arm
-aarch64
-separate
3153 link
-aarch64
= st2._EXTpi8_
3154 generate
*mut f64
:float64x1x2_t
:void
3156 generate
*mut f64
:float64x2x2_t
:void
3159 link
-arm
= vst2._EXTpi8r_
3160 generate
*mut f32
:float32x2x2_t
:void
, *mut f32
:float32x4x2_t
:void
3162 /// Store multiple
2-element structures from two registers
3166 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3167 a
= 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3169 validate
1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3171 arm
-aarch64
-separate
3174 link
-aarch64
= st2lane._EXTpi8_
3175 const
-aarch64
= LANE
3176 generate
*mut i8
:int8x16x2_t
:void
, *mut i64
:int64x1x2_t
:void
, *mut i64
:int64x2x2_t
:void
3179 link
-arm
= vst2lane._EXTpi8r_
3181 generate
*mut i8
:int8x8x2_t
:void
, *mut i16
:int16x4x2_t
:void
, *mut i32
:int32x2x2_t
:void
3182 generate
*mut i16
:int16x8x2_t
:void
, *mut i32
:int32x4x2_t
:void
3184 /// Store multiple
2-element structures from two registers
3188 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3189 multi_fn
= transmute
, {vst2
-in1signedlanenox
-::<LANE
>, transmute
(a
), transmute
(b
)}
3190 a
= 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3192 validate
1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3196 generate
*mut u8
:uint8x16x2_t
:void
, *mut u64
:uint64x1x2_t
:void
, *mut u64
:uint64x2x2_t
:void
, *mut p8
:poly8x16x2_t
:void
3198 generate
*mut p64
:poly64x1x2_t
:void
, *mut p64
:poly64x2x2_t
:void
3202 generate
*mut u8
:uint8x8x2_t
:void
, *mut u16
:uint16x4x2_t
:void
, *mut u32
:uint32x2x2_t
:void
3203 generate
*mut u16
:uint16x8x2_t
:void
, *mut u32
:uint32x4x2_t
:void
3204 generate
*mut p8
:poly8x8x2_t
:void
, *mut p16
:poly16x4x2_t
:void
, *mut p16
:poly16x8x2_t
:void
3206 /// Store multiple
2-element structures from two registers
3210 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3211 a
= 0.
, 1.
, 2.
, 2.
, 3.
, 2.
, 3.
, 4.
, 5.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
3213 validate
1.
, 2.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
3215 arm
-aarch64
-separate
3218 link
-aarch64
= st2lane._EXTpi8_
3219 const
-aarch64
= LANE
3220 generate
*mut f64
:float64x1x2_t
:void
, *mut f64
:float64x2x2_t
:void
3223 link
-arm
= vst2lane._EXTpi8r_
3225 generate
*mut f32
:float32x2x2_t
:void
, *mut f32
:float32x4x2_t
:void
3227 /// Store multiple
3-element structures from three registers
3230 a
= 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3231 validate
1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3233 arm
-aarch64
-separate
3236 link
-aarch64
= st3._EXTpi8_
3237 generate
*mut i64
:int64x2x3_t
:void
3240 link
-arm
= vst3._EXTpi8r_
3241 generate
*mut i8
:int8x8x3_t
:void
, *mut i16
:int16x4x3_t
:void
, *mut i32
:int32x2x3_t
:void
3242 generate
*mut i8
:int8x16x3_t
:void
, *mut i16
:int16x8x3_t
:void
, *mut i32
:int32x4x3_t
:void
3245 generate
*mut i64
:int64x1x3_t
:void
3247 /// Store multiple
3-element structures from three registers
3249 multi_fn
= transmute
, {vst3
-in1signednox
-noext
, transmute
(a
), transmute
(b
)}
3251 a
= 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3252 validate
1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3256 generate
*mut u64
:uint64x2x3_t
:void
3258 generate
*mut p64
:poly64x2x3_t
:void
3262 generate
*mut u8
:uint8x8x3_t
:void
, *mut u16
:uint16x4x3_t
:void
, *mut u32
:uint32x2x3_t
:void
3263 generate
*mut u8
:uint8x16x3_t
:void
, *mut u16
:uint16x8x3_t
:void
, *mut u32
:uint32x4x3_t
:void
3264 generate
*mut p8
:poly8x8x3_t
:void
, *mut p16
:poly16x4x3_t
:void
, *mut p8
:poly8x16x3_t
:void
, *mut p16
:poly16x8x3_t
:void
3267 generate
*mut u64
:uint64x1x3_t
:void
3269 generate
*mut p64
:poly64x1x3_t
:void
3271 /// Store multiple
3-element structures from three registers
3274 a
= 0.
, 1.
, 2.
, 2.
, 4.
, 2.
, 4.
, 7.
, 8.
, 2.
, 4.
, 7.
, 8.
, 13.
, 14.
, 15.
, 16
3275 validate
1.
, 2.
, 2.
, 2.
, 4.
, 4.
, 2.
, 7.
, 7.
, 4.
, 8.
, 8.
, 2.
, 13.
, 13.
, 4.
3277 arm
-aarch64
-separate
3280 link
-aarch64
= st3._EXTpi8_
3281 generate
*mut f64
:float64x1x3_t
:void
3283 generate
*mut f64
:float64x2x3_t
:void
3286 link
-arm
= vst3._EXTpi8r_
3287 generate
*mut f32
:float32x2x3_t
:void
, *mut f32
:float32x4x3_t
:void
3289 /// Store multiple
3-element structures from three registers
3293 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3294 a
= 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3296 validate
1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3298 arm
-aarch64
-separate
3301 link
-aarch64
= st3lane._EXTpi8_
3302 const
-aarch64
= LANE
3303 generate
*mut i8
:int8x16x3_t
:void
, *mut i64
:int64x1x3_t
:void
, *mut i64
:int64x2x3_t
:void
3306 link
-arm
= vst3lane._EXTpi8r_
3308 generate
*mut i8
:int8x8x3_t
:void
, *mut i16
:int16x4x3_t
:void
, *mut i32
:int32x2x3_t
:void
3309 generate
*mut i16
:int16x8x3_t
:void
, *mut i32
:int32x4x3_t
:void
3311 /// Store multiple
3-element structures from three registers
3315 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3316 multi_fn
= transmute
, {vst3
-in1signedlanenox
-::<LANE
>, transmute
(a
), transmute
(b
)}
3317 a
= 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3319 validate
1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3323 generate
*mut u8
:uint8x16x3_t
:void
, *mut u64
:uint64x1x3_t
:void
, *mut u64
:uint64x2x3_t
:void
, *mut p8
:poly8x16x3_t
:void
3325 generate
*mut p64
:poly64x1x3_t
:void
, *mut p64
:poly64x2x3_t
:void
3329 generate
*mut u8
:uint8x8x3_t
:void
, *mut u16
:uint16x4x3_t
:void
, *mut u32
:uint32x2x3_t
:void
3330 generate
*mut u16
:uint16x8x3_t
:void
, *mut u32
:uint32x4x3_t
:void
3331 generate
*mut p8
:poly8x8x3_t
:void
, *mut p16
:poly16x4x3_t
:void
, *mut p16
:poly16x8x3_t
:void
3333 /// Store multiple
3-element structures from three registers
3337 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3338 a
= 0.
, 1.
, 2.
, 2.
, 3.
, 2.
, 3.
, 4.
, 5.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
, 8.
, 9.
3340 validate
1.
, 2.
, 2.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
3342 arm
-aarch64
-separate
3345 link
-aarch64
= st3lane._EXTpi8_
3346 const
-aarch64
= LANE
3347 generate
*mut f64
:float64x1x3_t
:void
, *mut f64
:float64x2x3_t
:void
3350 link
-arm
= vst3lane._EXTpi8r_
3352 generate
*mut f32
:float32x2x3_t
:void
, *mut f32
:float32x4x3_t
:void
3354 /// Store multiple
4-element structures from four registers
3357 a
= 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3358 validate
1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3360 arm
-aarch64
-separate
3363 link
-aarch64
= st4._EXTpi8_
3364 generate
*mut i64
:int64x2x4_t
:void
3367 link
-arm
= vst4._EXTpi8r_
3368 generate
*mut i8
:int8x8x4_t
:void
, *mut i16
:int16x4x4_t
:void
, *mut i32
:int32x2x4_t
:void
3369 generate
*mut i8
:int8x16x4_t
:void
, *mut i16
:int16x8x4_t
:void
, *mut i32
:int32x4x4_t
:void
3372 generate
*mut i64
:int64x1x4_t
:void
3374 /// Store multiple
4-element structures from four registers
3376 multi_fn
= transmute
, {vst4
-in1signednox
-noext
, transmute
(a
), transmute
(b
)}
3378 a
= 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3379 validate
1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3383 generate
*mut u64
:uint64x2x4_t
:void
3385 generate
*mut p64
:poly64x2x4_t
:void
3389 generate
*mut u8
:uint8x8x4_t
:void
, *mut u16
:uint16x4x4_t
:void
, *mut u32
:uint32x2x4_t
:void
3390 generate
*mut u8
:uint8x16x4_t
:void
, *mut u16
:uint16x8x4_t
:void
, *mut u32
:uint32x4x4_t
:void
3391 generate
*mut p8
:poly8x8x4_t
:void
, *mut p16
:poly16x4x4_t
:void
, *mut p8
:poly8x16x4_t
:void
, *mut p16
:poly16x8x4_t
:void
3394 generate
*mut u64
:uint64x1x4_t
:void
3396 generate
*mut p64
:poly64x1x4_t
:void
3398 /// Store multiple
4-element structures from four registers
3401 a
= 0.
, 1.
, 2.
, 2.
, 6.
, 2.
, 6.
, 6.
, 8.
, 2.
, 6.
, 6.
, 8.
, 6.
, 8.
, 8.
, 16.
3402 validate
1.
, 2.
, 2.
, 6.
, 2.
, 6.
, 6.
, 8.
, 2.
, 6.
, 6.
, 8.
, 6.
, 8.
, 8.
, 16.
3404 arm
-aarch64
-separate
3407 link
-aarch64
= st4._EXTpi8_
3408 generate
*mut f64
:float64x1x4_t
:void
3410 generate
*mut f64
:float64x2x4_t
:void
3413 link
-arm
= vst4._EXTpi8r_
3414 generate
*mut f32
:float32x2x4_t
:void
, *mut f32
:float32x4x4_t
:void
3416 /// Store multiple
4-element structures from four registers
3420 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3421 a
= 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3423 validate
1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3425 arm
-aarch64
-separate
3428 link
-aarch64
= st4lane._EXTpi8_
3429 const
-aarch64
= LANE
3430 generate
*mut i8
:int8x16x4_t
:void
, *mut i64
:int64x1x4_t
:void
, *mut i64
:int64x2x4_t
:void
3433 link
-arm
= vst4lane._EXTpi8r_
3435 generate
*mut i8
:int8x8x4_t
:void
, *mut i16
:int16x4x4_t
:void
, *mut i32
:int32x2x4_t
:void
3436 generate
*mut i16
:int16x8x4_t
:void
, *mut i32
:int32x4x4_t
:void
3438 /// Store multiple
4-element structures from four registers
3442 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3443 multi_fn
= transmute
, {vst4
-in1signedlanenox
-::<LANE
>, transmute
(a
), transmute
(b
)}
3444 a
= 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3446 validate
1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3450 generate
*mut u8
:uint8x16x4_t
:void
, *mut u64
:uint64x1x4_t
:void
, *mut u64
:uint64x2x4_t
:void
, *mut p8
:poly8x16x4_t
:void
3452 generate
*mut p64
:poly64x1x4_t
:void
, *mut p64
:poly64x2x4_t
:void
3456 generate
*mut u8
:uint8x8x4_t
:void
, *mut u16
:uint16x4x4_t
:void
, *mut u32
:uint32x2x4_t
:void
3457 generate
*mut u16
:uint16x8x4_t
:void
, *mut u32
:uint32x4x4_t
:void
3458 generate
*mut p8
:poly8x8x4_t
:void
, *mut p16
:poly16x4x4_t
:void
, *mut p16
:poly16x8x4_t
:void
3460 /// Store multiple
4-element structures from four registers
3464 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3465 a
= 0.
, 1.
, 2.
, 2.
, 6.
, 2.
, 6.
, 6.
, 8.
, 2.
, 6.
, 6.
, 8.
, 6.
, 8.
, 8.
, 16.
3467 validate
1.
, 2.
, 2.
, 6.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
3469 arm
-aarch64
-separate
3472 link
-aarch64
= st4lane._EXTpi8_
3473 const
-aarch64
= LANE
3474 generate
*mut f64
:float64x1x4_t
:void
, *mut f64
:float64x2x4_t
:void
3477 link
-arm
= vst4lane._EXTpi8r_
3479 generate
*mut f32
:float32x2x4_t
:void
, *mut f32
:float32x4x4_t
:void
3481 /// Dot product vector form with unsigned and signed integers
3484 a
= 1000, -4200, -1000, 2000
3485 b
= 100, 205, 110, 195, 120, 185, 130, 175, 140, 165, 150, 155, 160, 145, 170, 135
3486 c
= 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8
3491 // 1000 + (100, 205, 110, 195) .
( 0, 1, 2, 3)
3492 // -4200 + (120, 185, 130, 175) .
(-1, -2, -3, -4)
3494 validate
2010, -5780, 2370, -1940
3496 link
-arm
= usdot._EXT2_._EXT4_
:int32x2_t
:uint8x8_t
:int8x8_t
:int32x2_t
3497 link
-aarch64
= usdot._EXT2_._EXT4_
:int32x2_t
:uint8x8_t
:int8x8_t
:int32x2_t
3498 generate int32x2_t
:uint8x8_t
:int8x8_t
:int32x2_t
3500 link
-arm
= usdot._EXT2_._EXT4_
:int32x4_t
:uint8x16_t
:int8x16_t
:int32x4_t
3501 link
-aarch64
= usdot._EXT2_._EXT4_
:int32x4_t
:uint8x16_t
:int8x16_t
:int32x4_t
3502 generate int32x4_t
:uint8x16_t
:int8x16_t
:int32x4_t
3504 /// Dot product index form with unsigned and signed integers
3511 multi_fn
= static_assert_imm
-in2_dot
-LANE
3512 multi_fn
= transmute
, c
:merge4_t2
, c
3513 multi_fn
= simd_shuffle
!, c
:out_signed
, c
, c
, {dup
-out_len
-LANE as u32
}
3514 multi_fn
= vusdot
-out
-noext
, a
, b
, {transmute
, c
}
3515 a
= 1000, -4200, -1000, 2000
3516 b
= 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3517 c
= 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3519 // 1000 + (100, 110, 120, 130) .
(4, 3, 2, 1)
3520 // -4200 + (140, 150, 160, 170) .
(4, 3, 2, 1)
3523 validate
2100, -2700, 900, 4300
3525 // 1000 + (100, 110, 120, 130) .
(0, -1, -2, -3)
3526 // -4200 + (140, 150, 160, 170) .
(0, -1, -2, -3)
3529 validate
260, -5180, -2220, 540
3531 generate int32x2_t
:uint8x8_t
:int8x8_t
:int32x2_t
3532 generate int32x4_t
:uint8x16_t
:int8x8_t
:int32x4_t
3534 /// Dot product index form with unsigned and signed integers
3538 // Only AArch64 has the laneq forms.
3541 multi_fn
= static_assert_imm
-in2_dot
-LANE
3542 multi_fn
= transmute
, c
:merge4_t2
, c
3543 multi_fn
= simd_shuffle
!, c
:out_signed
, c
, c
, {dup
-out_len
-LANE as u32
}
3544 multi_fn
= vusdot
-out
-noext
, a
, b
, {transmute
, c
}
3545 a
= 1000, -4200, -1000, 2000
3546 b
= 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3547 c
= 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3549 // 1000 + (100, 110, 120, 130) .
(-4, -5, -6, -7)
3550 // -4200 + (140, 150, 160, 170) .
(-4, -5, -6, -7)
3553 validate
-3420, -10140, -8460, -6980
3555 generate int32x2_t
:uint8x8_t
:int8x16_t
:int32x2_t
3556 generate int32x4_t
:uint8x16_t
:int8x16_t
:int32x4_t
3558 /// Dot product index form with signed and unsigned integers
3566 multi_fn
= static_assert_imm
-in2_dot
-LANE
3567 multi_fn
= transmute
, c
:merge4_t2
, c
3568 multi_fn
= simd_shuffle
!, c
:out_unsigned
, c
, c
, {dup
-out_len
-LANE as u32
}
3569 multi_fn
= vusdot
-out
-noext
, a
, {transmute
, c
}, b
3570 a
= -2000, 4200, -1000, 2000
3571 b
= 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3572 c
= 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3574 // -2000 + (4, 3, 2, 1) .
(100, 110, 120, 130)
3575 // 4200 + (0, -1, -2, -3) .
(100, 110, 120, 130)
3578 validate
-900, 3460, -3580, -2420
3580 // -2000 + (4, 3, 2, 1) .
(140, 150, 160, 170)
3581 // 4200 + (0, -1, -2, -3) .
(140, 150, 160, 170)
3584 validate
-500, 3220, -4460, -3940
3586 generate int32x2_t
:int8x8_t
:uint8x8_t
:int32x2_t
3587 generate int32x4_t
:int8x16_t
:uint8x8_t
:int32x4_t
3589 /// Dot product index form with signed and unsigned integers
3593 // Only AArch64 has the laneq forms.
3597 multi_fn
= static_assert_imm
-in2_dot
-LANE
3598 multi_fn
= transmute
, c
:merge4_t2
, c
3599 multi_fn
= simd_shuffle
!, c
:out_unsigned
, c
, c
, {dup
-out_len
-LANE as u32
}
3600 multi_fn
= vusdot
-out
-noext
, a
, {transmute
, c
}, b
3601 a
= -2000, 4200, -1000, 2000
3602 b
= 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3603 c
= 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3605 // -2000 + (4, 3, 2, 1) .
(220, 230, 240, 250)
3606 // 4200 + (0, -1, -2, -3) .
(220, 230, 240, 250)
3609 validate
300, 2740, -6220, -6980
3611 generate int32x2_t
:int8x8_t
:uint8x16_t
:int32x2_t
3612 generate int32x4_t
:int8x16_t
:uint8x16_t
:int32x4_t
3616 a
= 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3617 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3618 validate
1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3622 generate int
*_t
, uint
*_t
3624 /// Polynomial multiply
3626 a
= 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3627 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3628 validate
1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48
3631 link
-aarch64
= pmul._EXT_
3633 link
-arm
= vmulp._EXT_
3634 generate poly8x8_t
, poly8x16_t
3639 a
= 1.0, 2.0, 1.0, 2.0
3640 b
= 2.0, 3.0, 4.0, 5.0
3641 validate
2.0, 6.0, 4.0, 10.0
3644 generate float64x
*_t
3649 /// Vector multiply by scalar
3652 multi_fn
= simd_mul
, a
, {vdup
-nout
-noext
, b
}
3653 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3655 validate
2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3659 generate int16x4_t
:i16
:int16x4_t
, int16x8_t
:i16
:int16x8_t
, int32x2_t
:i32
:int32x2_t
, int32x4_t
:i32
:int32x4_t
3660 generate uint16x4_t
:u16
:uint16x4_t
, uint16x8_t
:u16
:uint16x8_t
, uint32x2_t
:u32
:uint32x2_t
, uint32x4_t
:u32
:uint32x4_t
3662 /// Vector multiply by scalar
3665 multi_fn
= simd_mul
, a
, {vdup
-nout
-noext
, b
}
3668 validate
2.
, 4.
, 6.
, 8.
3671 generate float64x1_t
:f64
:float64x1_t
, float64x2_t
:f64
:float64x2_t
3674 generate float32x2_t
:f32
:float32x2_t
, float32x4_t
:f32
:float32x4_t
3680 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3681 multi_fn
= simd_mul
, a
, {simd_shuffle
!, b
, b
, {dup
-out_len
-LANE as u32
}}
3682 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3683 b
= 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3685 validate
2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3689 generate int16x4_t
, int16x4_t
:int16x8_t
:int16x4_t
, int16x8_t
:int16x4_t
:int16x8_t
, int16x8_t
3690 generate int32x2_t
, int32x2_t
:int32x4_t
:int32x2_t
, int32x4_t
:int32x2_t
:int32x4_t
, int32x4_t
3691 generate uint16x4_t
, uint16x4_t
:uint16x8_t
:uint16x4_t
, uint16x8_t
:uint16x4_t
:uint16x8_t
, uint16x8_t
3692 generate uint32x2_t
, uint32x2_t
:uint32x4_t
:uint32x2_t
, uint32x4_t
:uint32x2_t
:uint32x4_t
, uint32x4_t
3694 /// Floating
-point multiply
3698 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3699 multi_fn
= simd_mul
, a
, {transmute
--<element_t _
>, {simd_extract
, b
, LANE as u32
}}
3703 validate
2.
, 4.
, 6.
, 8.
3706 generate float64x1_t
, float64x1_t
:float64x2_t
:float64x1_t
3708 /// Floating
-point multiply
3712 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3713 multi_fn
= simd_mul
, a
, {simd_shuffle
!, b
, b
, {dup
-out_len
-LANE as u32
}}
3717 validate
2.
, 4.
, 6.
, 8.
3720 generate float64x2_t
:float64x1_t
:float64x2_t
, float64x2_t
3723 generate float32x2_t
, float32x2_t
:float32x4_t
:float32x2_t
, float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
3725 /// Floating
-point multiply
3728 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3729 multi_fn
= simd_extract
, b
:f32
, b
, LANE as u32
3736 generate f32
:float32x2_t
:f32
, f32
:float32x4_t
:f32
3738 /// Floating
-point multiply
3741 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3742 multi_fn
= simd_extract
, b
:f64
, b
, LANE as u32
3749 generate f64
:float64x1_t
:f64
, f64
:float64x2_t
:f64
3751 /// Signed multiply long
3753 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3754 b
= 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3755 validate
1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3759 link
-arm
= vmulls._EXT_
3760 link
-aarch64
= smull._EXT_
3761 generate int8x8_t
:int8x8_t
:int16x8_t
, int16x4_t
:int16x4_t
:int32x4_t
, int32x2_t
:int32x2_t
:int64x2_t
3763 /// Signed multiply long
3766 multi_fn
= simd_shuffle
!, a
:half
, a
, a
, {fixed
-half
-right
}
3767 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {fixed
-half
-right
}
3768 multi_fn
= vmull
-noqself
-noext
, a
, b
3769 a
= 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3770 b
= 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3771 fixed
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3772 validate
9, 20, 11, 24, 13, 28, 15, 32
3775 generate int8x16_t
:int8x16_t
:int16x8_t
, int16x8_t
:int16x8_t
:int32x4_t
, int32x4_t
:int32x4_t
:int64x2_t
3777 /// Unsigned multiply long
3779 a
= 1, 2, 3, 4, 5, 6, 7, 8
3780 b
= 1, 2, 1, 2, 1, 2, 1, 2
3781 validate
1, 4, 3, 8, 5, 12, 7, 16
3785 link
-arm
= vmullu._EXT_
3786 link
-aarch64
= umull._EXT_
3787 generate uint8x8_t
:uint8x8_t
:uint16x8_t
, uint16x4_t
:uint16x4_t
:uint32x4_t
, uint32x2_t
:uint32x2_t
:uint64x2_t
3789 /// Unsigned multiply long
3792 multi_fn
= simd_shuffle
!, a
:half
, a
, a
, {fixed
-half
-right
}
3793 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {fixed
-half
-right
}
3794 multi_fn
= vmull
-noqself
-noext
, a
, b
3795 a
= 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3796 b
= 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3797 fixed
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3798 validate
9, 20, 11, 24, 13, 28, 15, 32
3801 generate uint8x16_t
:uint8x16_t
:uint16x8_t
, uint16x8_t
:uint16x8_t
:uint32x4_t
, uint32x4_t
:uint32x4_t
:uint64x2_t
3803 /// Polynomial multiply long
3805 a
= 1, 2, 3, 4, 5, 6, 7, 8
3806 b
= 1, 3, 1, 3, 1, 3, 1, 3
3807 validate
1, 6, 3, 12, 5, 10, 7, 24
3811 link
-arm
= vmullp._EXT_
3812 link
-aarch64
= pmull._EXT_
3813 generate poly8x8_t
:poly8x8_t
:poly16x8_t
3815 /// Polynomial multiply long
3824 link
-aarch64
= pmull64
:p64
:p64
:p64
:int8x16_t
3825 // Because of the support status of llvm
, vmull_p64 is currently only available on arm
3827 // link
-arm
= vmullp.v2i64
:int64x1_t
:int64x1_t
:int64x1_t
:int64x2_t
3828 generate p64
:p64
:p128
3831 /// Polynomial multiply long
3834 multi_fn
= simd_shuffle
!, a
:half
, a
, a
, {fixed
-half
-right
}
3835 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {fixed
-half
-right
}
3836 multi_fn
= vmull
-noqself
-noext
, a
, b
3837 a
= 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3838 b
= 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3839 fixed
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3840 validate
9, 30, 11, 20, 13, 18, 15, 48
3843 generate poly8x16_t
:poly8x16_t
:poly16x8_t
3845 /// Polynomial multiply long
3848 multi_fn
= vmull
-noqself
-noext
, {simd_extract
, a
, 1}, {simd_extract
, b
, 1}
3855 generate poly64x2_t
:poly64x2_t
:p128
3857 /// Vector long multiply with scalar
3860 multi_fn
= vmull
-in0
-noext
, a
, {vdup
-nin0
-noext
, b
}
3861 a
= 1, 2, 3, 4, 5, 6, 7, 8
3863 validate
2, 4, 6, 8, 10, 12, 14, 16
3867 generate int16x4_t
:i16
:int32x4_t
, int32x2_t
:i32
:int64x2_t
3869 generate uint16x4_t
:u16
:uint32x4_t
, uint32x2_t
:u32
:uint64x2_t
3871 /// Vector long multiply by scalar
3874 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3875 multi_fn
= vmull
-in0
-noext
, a
, {simd_shuffle
!, b
, b
, {dup
-in0_len
-LANE as u32
}}
3876 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3877 b
= 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3879 validate
2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3883 generate int16x4_t
:int16x4_t
:int32x4_t
, int16x4_t
:int16x8_t
:int32x4_t
3884 generate int32x2_t
:int32x2_t
:int64x2_t
, int32x2_t
:int32x4_t
:int64x2_t
3886 generate uint16x4_t
:uint16x4_t
:uint32x4_t
, uint16x4_t
:uint16x8_t
:uint32x4_t
3887 generate uint32x2_t
:uint32x2_t
:uint64x2_t
, uint32x2_t
:uint32x4_t
:uint64x2_t
3892 multi_fn
= vmull_high
-noqself
-noext
, a
, {vdup
-nin0
-noext
, b
}
3893 a
= 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3895 validate
18, 20, 22, 24, 26, 28, 30, 32
3898 generate int16x8_t
:i16
:int32x4_t
, int32x4_t
:i32
:int64x2_t
3900 generate uint16x8_t
:u16
:uint32x4_t
, uint32x4_t
:u32
:uint64x2_t
3903 name
= vmull_high_lane
3905 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3906 multi_fn
= vmull_high
-noqself
-noext
, a
, {simd_shuffle
!, b
, b
, {dup
-in0_len
-LANE as u32
}}
3907 a
= 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3908 b
= 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3910 validate
18, 20, 22, 24, 26, 28, 30, 32
3913 generate int16x8_t
:int16x4_t
:int32x4_t
, int16x8_t
:int16x8_t
:int32x4_t
3914 generate int32x4_t
:int32x2_t
:int64x2_t
, int32x4_t
:int32x4_t
:int64x2_t
3916 generate uint16x8_t
:uint16x4_t
:uint32x4_t
, uint16x8_t
:uint16x8_t
:uint32x4_t
3917 generate uint32x4_t
:uint32x2_t
:uint64x2_t
, uint32x4_t
:uint32x4_t
:uint64x2_t
3919 /// Floating
-point multiply extended
3923 validate
2.
, 4.
, 6.
, 8.
3926 link
-aarch64
= fmulx._EXT_
3927 generate float
*_t
, float64x
*_t
3929 /// Floating
-point multiply extended
3933 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3934 multi_fn
= vmulx
-in0
-noext
, a
, {transmute
--<element_t _
>, {simd_extract
, b
, LANE as u32
}}
3941 generate float64x1_t
, float64x1_t
:float64x2_t
:float64x1_t
3943 /// Floating
-point multiply extended
3947 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3948 multi_fn
= vmulx
-in0
-noext
, a
, {simd_shuffle
!, b
, b
, {dup
-in0_len
-LANE as u32
}}
3952 validate
2.
, 4.
, 6.
, 8.
3955 generate float32x2_t
, float32x2_t
:float32x4_t
:float32x2_t
, float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
3956 generate float64x2_t
:float64x1_t
:float64x2_t
, float64x2_t
3958 /// Floating
-point multiply extended
3965 link
-aarch64
= fmulx._EXT_
3968 /// Floating
-point multiply extended
3972 multi_fn
= static_assert_imm
-in_exp_len
-LANE
3973 multi_fn
= vmulx
-out
-noext
, a
, {simd_extract
, b
, LANE as u32
}
3981 generate f32
:float32x2_t
:f32
, f32
:float32x4_t
:f32
, f64
:float64x1_t
:f64
, f64
:float64x2_t
:f64
3983 /// Floating
-point fused Multiply
-Add to accumulator
(vector
)
3985 multi_fn
= vfma
-self
-_
, b
, c
, a
3986 a
= 8.0, 18.0, 12.0, 10.0
3987 b
= 6.0, 4.0, 7.0, 8.0
3988 c
= 2.0, 3.0, 4.0, 5.0
3989 validate
20.0, 30.0, 40.0, 50.0
3991 link
-aarch64
= llvm.fma._EXT_
3993 generate float64x1_t
3995 generate float64x2_t
3999 link
-arm
= llvm.fma._EXT_
4002 /// Floating
-point fused Multiply
-Add to accumulator
(vector
)
4005 multi_fn
= vfma
-self
-noext
, a
, b
, {vdup
-nselfvfp4
-noext
, c
}
4006 a
= 2.0, 3.0, 4.0, 5.0
4007 b
= 6.0, 4.0, 7.0, 8.0
4009 validate
50.0, 35.0, 60.0, 69.0
4012 generate float64x1_t
:float64x1_t
:f64
:float64x1_t
4014 generate float64x2_t
:float64x2_t
:f64
:float64x2_t
4018 generate float32x2_t
:float32x2_t
:f32
:float32x2_t
, float32x4_t
:float32x4_t
:f32
:float32x4_t
4020 /// Floating
-point fused multiply
-add to accumulator
4024 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
4025 multi_fn
= vfma
-out
-noext
, a
, b
, {vdup
-nout
-noext
, {simd_extract
, c
, LANE as u32
}}
4030 validate
14.
, 11.
, 18.
, 21.
4033 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
, float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
4035 generate float64x1_t
4037 generate float64x1_t
:float64x1_t
:float64x2_t
:float64x1_t
, float64x2_t
:float64x2_t
:float64x1_t
:float64x2_t
, float64x2_t
4039 /// Floating
-point fused multiply
-add to accumulator
4043 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
4044 multi_fn
= simd_extract
, c
:out_t
, c
, LANE as u32
4045 multi_fn
= vfma
-in2lane
-_
, b
, c
, a
4053 link
-aarch64
= llvm.fma._EXT_
:f32
:f32
:f32
:f32
4054 generate f32
:f32
:float32x2_t
:f32
, f32
:f32
:float32x4_t
:f32
4055 link
-aarch64
= llvm.fma._EXT_
:f64
:f64
:f64
:f64
4057 generate f64
:f64
:float64x1_t
:f64
4059 generate f64
:f64
:float64x2_t
:f64
4061 /// Floating
-point fused multiply
-subtract from accumulator
4063 multi_fn
= simd_neg
, b
:in_t
, b
4064 multi_fn
= vfma
-self
-noext
, a
, b
, c
4065 a
= 20.0, 30.0, 40.0, 50.0
4066 b
= 6.0, 4.0, 7.0, 8.0
4067 c
= 2.0, 3.0, 4.0, 5.0
4068 validate
8.0, 18.0, 12.0, 10.0
4071 generate float64x1_t
4073 generate float64x2_t
4079 /// Floating
-point fused Multiply
-subtract to accumulator
(vector
)
4082 multi_fn
= vfms
-self
-noext
, a
, b
, {vdup
-nselfvfp4
-noext
, c
}
4083 a
= 50.0, 35.0, 60.0, 69.0
4084 b
= 6.0, 4.0, 7.0, 8.0
4086 validate
2.0, 3.0, 4.0, 5.0
4089 generate float64x1_t
:float64x1_t
:f64
:float64x1_t
4091 generate float64x2_t
:float64x2_t
:f64
:float64x2_t
4095 generate float32x2_t
:float32x2_t
:f32
:float32x2_t
, float32x4_t
:float32x4_t
:f32
:float32x4_t
4097 /// Floating
-point fused multiply
-subtract to accumulator
4101 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
4102 multi_fn
= vfms
-out
-noext
, a
, b
, {vdup
-nout
-noext
, {simd_extract
, c
, LANE as u32
}}
4103 a
= 14.
, 11.
, 18.
, 21.
4107 validate
2.
, 3.
, 4.
, 5.
4110 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
, float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
4112 generate float64x1_t
4114 generate float64x1_t
:float64x1_t
:float64x2_t
:float64x1_t
, float64x2_t
:float64x2_t
:float64x1_t
:float64x2_t
, float64x2_t
4116 /// Floating
-point fused multiply
-subtract to accumulator
4120 multi_fn
= vfma
-in2lane
-::<LANE
>, a
, -b
, c
4128 generate f32
:f32
:float32x2_t
:f32
, f32
:f32
:float32x4_t
:f32
4130 generate f64
:f64
:float64x1_t
:f64
4132 generate f64
:f64
:float64x2_t
:f64
4137 a
= 2.0, 6.0, 4.0, 10.0
4138 b
= 1.0, 2.0, 1.0, 2.0
4139 validate
2.0, 3.0, 4.0, 5.0
4142 generate float
*_t
, float64x
*_t
4146 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4147 b
= 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4148 validate
0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
4152 generate int
*_t
, uint
*_t
, int64x
*_t
, uint64x
*_t
4157 a
= 1.0, 4.0, 3.0, 8.0
4158 b
= 1.0, 2.0, 3.0, 4.0
4159 validate
0.0, 2.0, 0.0, 4.0
4162 generate float64x
*_t
4169 multi_fn
= a.wrapping_sub
(b
)
4179 multi_fn
= a.wrapping_add
(b
)
4187 /// Bitwise exclusive OR
4189 multi_fn
= simd_xor
, a
, b
4190 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4191 b
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4192 validate
0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17
4196 generate poly8x8_t
, poly16x4_t
, poly8x16_t
, poly16x8_t
, poly64x1_t
, poly64x2_t
4198 /// Bitwise exclusive OR
4210 /// Floating
-point add across vector
4216 link
-aarch64
= faddv._EXT2_._EXT_
4217 generate float32x2_t
:f32
, float32x4_t
:f32
, float64x2_t
:f64
4219 /// Signed Add Long across Vector
4225 link
-aarch64
= llvm.aarch64.neon.saddlv.i32._EXT_
4226 generate int16x4_t
:i32
4228 /// Signed Add Long across Vector
4230 a
= 1, 2, 3, 4, 5, 6, 7, 8
4234 link
-aarch64
= llvm.aarch64.neon.saddlv.i32._EXT_
4235 generate int16x8_t
:i32
4237 /// Signed Add Long across Vector
4243 link
-aarch64
= llvm.aarch64.neon.saddlv.i64._EXT_
4244 generate int32x2_t
:i64
4246 /// Signed Add Long across Vector
4252 link
-aarch64
= llvm.aarch64.neon.saddlv.i64._EXT_
4253 generate int32x4_t
:i64
4255 /// Unsigned Add Long across Vector
4261 link
-aarch64
= llvm.aarch64.neon.uaddlv.i32._EXT_
4262 generate uint16x4_t
:u32
4264 /// Unsigned Add Long across Vector
4266 a
= 1, 2, 3, 4, 5, 6, 7, 8
4270 link
-aarch64
= llvm.aarch64.neon.uaddlv.i32._EXT_
4271 generate uint16x8_t
:u32
4273 /// Unsigned Add Long across Vector
4279 link
-aarch64
= llvm.aarch64.neon.uaddlv.i64._EXT_
4280 generate uint32x2_t
:u64
4282 /// Unsigned Add Long across Vector
4288 link
-aarch64
= llvm.aarch64.neon.uaddlv.i64._EXT_
4289 generate uint32x4_t
:u64
4291 /// Subtract returning high narrow
4294 multi_fn
= fixed
, c
:in_t
4295 multi_fn
= simd_cast
, {simd_shr
, {simd_sub
, a
, b
}, transmute
(c
)}
4296 a
= MAX
, MIN
, 1, 1, MAX
, MIN
, 1, 1
4297 b
= 1, 0, 0, 0, 1, 0, 0, 0
4298 fixed
= HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
, HFBITS
4299 validate MAX
, MIN
, 0, 0, MAX
, MIN
, 0, 0
4303 generate int16x8_t
:int8x8_t
, int32x4_t
:int16x4_t
, int64x2_t
:int32x2_t
4304 generate uint16x8_t
:uint8x8_t
, uint32x4_t
:uint16x4_t
, uint64x2_t
:uint32x2_t
4306 /// Subtract returning high narrow
4309 multi_fn
= vsubhn
-noqself
-noext
, d
:in_t0
, b
, c
4310 multi_fn
= simd_shuffle
!, a
, d
, {asc
-0-out_len
}
4311 a
= MAX
, 0, MAX
, 0, MAX
, 0, MAX
, 0
4312 b
= MAX
, 1, MAX
, 1, MAX
, 1, MAX
, 1
4313 c
= 1, 0, 1, 0, 1, 0, 1, 0
4314 validate MAX
, 0, MAX
, 0, MAX
, 0, MAX
, 0, MAX
, 0, MAX
, 0, MAX
, 0, MAX
, 0
4318 generate int8x8_t
:int16x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int64x2_t
:int32x4_t
4319 generate uint8x8_t
:uint16x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint64x2_t
:uint32x4_t
4321 /// Signed halving subtract
4323 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4324 b
= 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4325 validate
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
4329 link
-arm
= vhsubu._EXT_
4330 link
-aarch64
= uhsub._EXT_
4335 link
-arm
= vhsubs._EXT_
4336 link
-aarch64
= shsub._EXT_
4339 /// Signed Subtract Wide
4342 multi_fn
= simd_sub
, a
, {simd_cast
, b
}
4343 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4344 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4345 validate
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4349 generate int16x8_t
:int8x8_t
:int16x8_t
, int32x4_t
:int16x4_t
:int32x4_t
, int64x2_t
:int32x2_t
:int64x2_t
4351 /// Unsigned Subtract Wide
4354 multi_fn
= simd_sub
, a
, {simd_cast
, b
}
4355 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4356 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4357 validate
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4361 generate uint16x8_t
:uint8x8_t
:uint16x8_t
, uint32x4_t
:uint16x4_t
:uint32x4_t
, uint64x2_t
:uint32x2_t
:uint64x2_t
4363 /// Signed Subtract Wide
4366 multi_fn
= simd_shuffle
!, c
:int8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
4367 multi_fn
= simd_sub
, a
, {simd_cast
, c
}
4368 a
= 8, 9, 10, 12, 13, 14, 15, 16
4369 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4370 validate
0, 0, 0, 0, 0, 0, 0, 0
4373 generate int16x8_t
:int8x16_t
:int16x8_t
4375 /// Signed Subtract Wide
4378 multi_fn
= simd_shuffle
!, c
:int16x4_t
, b
, b
, [4, 5, 6, 7]
4379 multi_fn
= simd_sub
, a
, {simd_cast
, c
}
4381 b
= 0, 1, 2, 3, 8, 9, 10, 11
4385 generate int32x4_t
:int16x8_t
:int32x4_t
4387 /// Signed Subtract Wide
4390 multi_fn
= simd_shuffle
!, c
:int32x2_t
, b
, b
, [2, 3]
4391 multi_fn
= simd_sub
, a
, {simd_cast
, c
}
4397 generate int64x2_t
:int32x4_t
:int64x2_t
4399 /// Unsigned Subtract Wide
4402 multi_fn
= simd_shuffle
!, c
:uint8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
4403 multi_fn
= simd_sub
, a
, {simd_cast
, c
}
4404 a
= 8, 9, 10, 11, 12, 13, 14, 15
4405 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4406 validate
0, 0, 0, 0, 0, 0, 0, 0
4409 generate uint16x8_t
:uint8x16_t
:uint16x8_t
4411 /// Unsigned Subtract Wide
4414 multi_fn
= simd_shuffle
!, c
:uint16x4_t
, b
, b
, [4, 5, 6, 7]
4415 multi_fn
= simd_sub
, a
, {simd_cast
, c
}
4417 b
= 0, 1, 2, 3, 8, 9, 10, 11
4421 generate uint32x4_t
:uint16x8_t
:uint32x4_t
4423 /// Unsigned Subtract Wide
4426 multi_fn
= simd_shuffle
!, c
:uint32x2_t
, b
, b
, [2, 3]
4427 multi_fn
= simd_sub
, a
, {simd_cast
, c
}
4433 generate uint64x2_t
:uint32x4_t
:uint64x2_t
4435 /// Signed Subtract Long
4438 multi_fn
= simd_cast
, c
:out_t
, a
4439 multi_fn
= simd_cast
, d
:out_t
, b
4440 multi_fn
= simd_sub
, c
, d
4442 a
= MAX
, MIN
, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4443 b
= MAX
, MIN
, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4444 validate
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4448 generate int8x8_t
:int8x8_t
:int16x8_t
, int16x4_t
:int16x4_t
:int32x4_t
, int32x2_t
:int32x2_t
:int64x2_t
4450 /// Unsigned Subtract Long
4453 multi_fn
= simd_cast
, c
:out_t
, a
4454 multi_fn
= simd_cast
, d
:out_t
, b
4455 multi_fn
= simd_sub
, c
, d
4457 a
= MAX
, MIN
, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4458 b
= MAX
, MIN
, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4459 validate
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4463 generate uint8x8_t
:uint8x8_t
:uint16x8_t
, uint16x4_t
:uint16x4_t
:uint32x4_t
, uint32x2_t
:uint32x2_t
:uint64x2_t
4465 /// Signed Subtract Long
4468 multi_fn
= simd_shuffle
!, c
:int8x8_t
, a
, a
, [8, 9, 10, 11, 12, 13, 14, 15]
4469 multi_fn
= simd_cast
, d
:out_t
, c
4470 multi_fn
= simd_shuffle
!, e
:int8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
4471 multi_fn
= simd_cast
, f
:out_t
, e
4472 multi_fn
= simd_sub
, d
, f
4474 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4475 b
= 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4476 validate
6, 7, 8, 9, 10, 11, 12, 13
4479 generate int8x16_t
:int8x16_t
:int16x8_t
4481 /// Signed Subtract Long
4484 multi_fn
= simd_shuffle
!, c
:int16x4_t
, a
, a
, [4, 5, 6, 7]
4485 multi_fn
= simd_cast
, d
:out_t
, c
4486 multi_fn
= simd_shuffle
!, e
:int16x4_t
, b
, b
, [4, 5, 6, 7]
4487 multi_fn
= simd_cast
, f
:out_t
, e
4488 multi_fn
= simd_sub
, d
, f
4490 a
= 8, 9, 10, 11, 12, 13, 14, 15
4491 b
= 6, 6, 6, 6, 8, 8, 8, 8
4495 generate int16x8_t
:int16x8_t
:int32x4_t
4497 /// Signed Subtract Long
4500 multi_fn
= simd_shuffle
!, c
:int32x2_t
, a
, a
, [2, 3]
4501 multi_fn
= simd_cast
, d
:out_t
, c
4502 multi_fn
= simd_shuffle
!, e
:int32x2_t
, b
, b
, [2, 3]
4503 multi_fn
= simd_cast
, f
:out_t
, e
4504 multi_fn
= simd_sub
, d
, f
4511 generate int32x4_t
:int32x4_t
:int64x2_t
4513 /// Unsigned Subtract Long
4516 multi_fn
= simd_shuffle
!, c
:uint8x8_t
, a
, a
, [8, 9, 10, 11, 12, 13, 14, 15]
4517 multi_fn
= simd_cast
, d
:out_t
, c
4518 multi_fn
= simd_shuffle
!, e
:uint8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
4519 multi_fn
= simd_cast
, f
:out_t
, e
4520 multi_fn
= simd_sub
, d
, f
4522 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4523 b
= 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4524 validate
6, 7, 8, 9, 10, 11, 12, 13
4527 generate uint8x16_t
:uint8x16_t
:uint16x8_t
4529 /// Unsigned Subtract Long
4532 multi_fn
= simd_shuffle
!, c
:uint16x4_t
, a
, a
, [4, 5, 6, 7]
4533 multi_fn
= simd_cast
, d
:out_t
, c
4534 multi_fn
= simd_shuffle
!, e
:uint16x4_t
, b
, b
, [4, 5, 6, 7]
4535 multi_fn
= simd_cast
, f
:out_t
, e
4536 multi_fn
= simd_sub
, d
, f
4538 a
= 8, 9, 10, 11, 12, 13, 14, 15
4539 b
= 6, 6, 6, 6, 8, 8, 8, 8
4543 generate uint16x8_t
:uint16x8_t
:uint32x4_t
4545 /// Unsigned Subtract Long
4548 multi_fn
= simd_shuffle
!, c
:uint32x2_t
, a
, a
, [2, 3]
4549 multi_fn
= simd_cast
, d
:out_t
, c
4550 multi_fn
= simd_shuffle
!, e
:uint32x2_t
, b
, b
, [2, 3]
4551 multi_fn
= simd_cast
, f
:out_t
, e
4552 multi_fn
= simd_sub
, d
, f
4559 generate uint32x4_t
:uint32x4_t
:uint64x2_t
4561 /// Bit clear and exclusive OR
4563 a
= 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
4564 b
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4565 c
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4566 validate
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14
4570 link
-aarch64
= llvm.aarch64.crypto.bcaxs._EXT_
4571 generate int8x16_t
, int16x8_t
, int32x4_t
, int64x2_t
4572 link
-aarch64
= llvm.aarch64.crypto.bcaxu._EXT_
4573 generate uint8x16_t
, uint16x8_t
, uint32x4_t
, uint64x2_t
4575 /// Floating
-point complex add
4578 a
= 1.
, -1.
, 1.
, -1.
4579 b
= -1.
, 1.
, -1.
, 1.
4580 validate
2.
, 0.
, 2.
, 0.
4584 link
-aarch64
= vcadd.rot270._EXT_
4585 generate float32x2_t
4586 name
= vcaddq_rot270
4587 generate float32x4_t
, float64x2_t
4589 /// Floating
-point complex add
4592 a
= 1.
, -1.
, 1.
, -1.
4593 b
= -1.
, 1.
, -1.
, 1.
4594 validate
0.
, -2.
, 0.
, -2.
4598 link
-aarch64
= vcadd.rot90._EXT_
4599 generate float32x2_t
4601 generate float32x4_t
, float64x2_t
4603 /// Floating
-point complex multiply accumulate
4605 a
= 1.
, -1.
, 1.
, -1.
4606 b
= -1.
, 1.
, -1.
, 1.
4607 c
= 1.
, 1.
, -1.
, -1.
4608 validate
0.
, -2.
, 2.
, 0.
4612 link
-aarch64
= vcmla.rot0._EXT_
4613 generate float32x2_t
, float32x4_t
, float64x2_t
4615 /// Floating
-point complex multiply accumulate
4619 b
= 1.
, -1.
, 1.
, -1.
4621 validate
2.
, 0.
, 2.
, 0.
4625 link
-aarch64
= vcmla.rot90._EXT_
4626 generate float32x2_t
, float32x4_t
, float64x2_t
4628 /// Floating
-point complex multiply accumulate
4632 b
= 1.
, -1.
, 1.
, -1.
4634 validate
0.
, 0.
, 0.
, 0.
4638 link
-aarch64
= vcmla.rot180._EXT_
4639 generate float32x2_t
, float32x4_t
, float64x2_t
4641 /// Floating
-point complex multiply accumulate
4645 b
= 1.
, -1.
, 1.
, -1.
4647 validate
0.
, 2.
, 0.
, 2.
4651 link
-aarch64
= vcmla.rot270._EXT_
4652 generate float32x2_t
, float32x4_t
, float64x2_t
4654 /// Floating
-point complex multiply accumulate
4658 multi_fn
= static_assert_imm
-in2_rot
-LANE
4659 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {base
-2-LANE
}
4660 multi_fn
= vcmla
-self
-noext
, a
, b
, c
4661 a
= 1.
, -1.
, 1.
, -1.
4662 b
= -1.
, 1.
, -1.
, 1.
4663 c
= 1.
, 1.
, -1.
, -1.
4665 validate
0.
, -2.
, 0.
, -2.
4669 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
4670 generate float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
4672 /// Floating
-point complex multiply accumulate
4676 multi_fn
= static_assert_imm
-in2_rot
-LANE
4677 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {base
-2-LANE
}
4678 multi_fn
= vcmla_rot90
-rot
-noext
, a
, b
, c
4679 a
= 1.
, -1.
, 1.
, -1.
4680 b
= -1.
, 1.
, -1.
, 1.
4681 c
= 1.
, 1.
, -1.
, -1.
4683 validate
0.
, 0.
, 0.
, 0.
4687 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
4688 generate float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
4690 /// Floating
-point complex multiply accumulate
4694 multi_fn
= static_assert_imm
-in2_rot
-LANE
4695 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {base
-2-LANE
}
4696 multi_fn
= vcmla_rot180
-rot
-noext
, a
, b
, c
4697 a
= 1.
, -1.
, 1.
, -1.
4698 b
= -1.
, 1.
, -1.
, 1.
4699 c
= 1.
, 1.
, -1.
, -1.
4701 validate
2.
, 0.
, 2.
, 0.
4705 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
4706 generate float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
4708 /// Floating
-point complex multiply accumulate
4712 multi_fn
= static_assert_imm
-in2_rot
-LANE
4713 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {base
-2-LANE
}
4714 multi_fn
= vcmla_rot270
-rot
-noext
, a
, b
, c
4715 a
= 1.
, -1.
, 1.
, -1.
4716 b
= -1.
, 1.
, -1.
, 1.
4717 c
= 1.
, 1.
, -1.
, -1.
4719 validate
2.
, -2.
, 2.
, -2.
4723 generate float32x2_t
, float32x2_t
:float32x2_t
:float32x4_t
:float32x2_t
4724 generate float32x4_t
:float32x4_t
:float32x2_t
:float32x4_t
, float32x4_t
4726 /// Dot product arithmetic
(vector
)
4730 b
= 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4731 c
= 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4732 validate
31, 176, 31, 176
4737 link
-arm
= sdot._EXT_._EXT3_
4738 link
-aarch64
= sdot._EXT_._EXT3_
4739 generate int32x2_t
:int8x8_t
:int8x8_t
:int32x2_t
, int32x4_t
:int8x16_t
:int8x16_t
:int32x4_t
4743 link
-arm
= udot._EXT_._EXT3_
4744 link
-aarch64
= udot._EXT_._EXT3_
4745 generate uint32x2_t
:uint8x8_t
:uint8x8_t
:uint32x2_t
, uint32x4_t
:uint8x16_t
:uint8x16_t
:uint32x4_t
4747 /// Dot product arithmetic
(indexed
)
4751 multi_fn
= static_assert_imm
-in2_dot
-LANE
4752 multi_fn
= transmute
, c
:merge4_t2
, c
4753 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {dup
-out_len
-LANE as u32
}
4754 multi_fn
= vdot
-out
-noext
, a
, b
, {transmute
, c
}
4756 b
= -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4757 c
= 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4759 validate
29, 72, 31, 72
4762 // Only AArch64 has the laneq forms.
4764 generate int32x2_t
:int8x8_t
:int8x16_t
:int32x2_t
4765 generate int32x4_t
:int8x16_t
:int8x16_t
:int32x4_t
4768 generate int32x2_t
:int8x8_t
:int8x8_t
:int32x2_t
4769 generate int32x4_t
:int8x16_t
:int8x8_t
:int32x4_t
4771 /// Dot product arithmetic
(indexed
)
4775 multi_fn
= static_assert_imm
-in2_dot
-LANE
4776 multi_fn
= transmute
, c
:merge4_t2
, c
4777 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {dup
-out_len
-LANE as u32
}
4778 multi_fn
= vdot
-out
-noext
, a
, b
, {transmute
, c
}
4780 b
= 255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4781 c
= 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4783 validate
285, 72, 31, 72
4786 // Only AArch64 has the laneq forms.
4788 generate uint32x2_t
:uint8x8_t
:uint8x16_t
:uint32x2_t
4789 generate uint32x4_t
:uint8x16_t
:uint8x16_t
:uint32x4_t
4792 generate uint32x2_t
:uint8x8_t
:uint8x8_t
:uint32x2_t
4793 generate uint32x4_t
:uint8x16_t
:uint8x8_t
:uint32x4_t
4795 /// Maximum
(vector
)
4797 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4798 b
= 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4799 validate
16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16
4803 link
-arm
= vmaxs._EXT_
4804 link
-aarch64
= smax._EXT_
4809 link
-arm
= vmaxu._EXT_
4810 link
-aarch64
= umax._EXT_
4813 /// Maximum
(vector
)
4815 a
= 1.0, -2.0, 3.0, -4.0
4816 b
= 0.0, 3.0, 2.0, 8.0
4817 validate
1.0, 3.0, 3.0, 8.0
4820 link
-aarch64
= fmax._EXT_
4821 generate float64x
*_t
4825 link
-arm
= vmaxs._EXT_
4826 link
-aarch64
= fmax._EXT_
4829 /// Floating
-point Maximum Number
(vector
)
4831 a
= 1.0, 2.0, 3.0, -4.0
4832 b
= 8.0, 16.0, -1.0, 6.0
4833 validate
8.0, 16.0, 3.0, 6.0
4836 link
-aarch64
= fmaxnm._EXT_
4837 generate float64x
*_t
4842 link
-arm
= vmaxnm._EXT_
4843 link
-aarch64
= fmaxnm._EXT_
4846 /// Floating
-point maximum number across vector
4852 link
-aarch64
= fmaxnmv._EXT2_._EXT_
4853 generate float32x2_t
:f32
, float64x2_t
:f64
4855 generate float32x4_t
:f32
4857 /// Floating
-point Maximum Number Pairwise
(vector
).
4863 link
-aarch64
= fmaxnmp._EXT_
4864 generate float32x2_t
:float32x2_t
:float32x2_t
, float64x2_t
:float64x2_t
:float64x2_t
4866 /// Floating
-point Maximum Number Pairwise
(vector
).
4868 a
= 1.0, 2.0, 3.0, -4.0
4869 b
= 8.0, 16.0, -1.0, 6.0
4870 validate
2.0, 3.0, 16.0, 6.0
4872 link
-aarch64
= fmaxnmp._EXT_
4873 generate float32x4_t
:float32x4_t
:float32x4_t
4875 /// Floating
-point maximum number pairwise
4882 link
-aarch64
= fmaxnmv._EXT2_._EXT_
4883 generate float32x2_t
:f32
4885 generate float64x2_t
:f64
4887 /// Floating
-point maximum pairwise
4894 link
-aarch64
= fmaxv._EXT2_._EXT_
4895 generate float32x2_t
:f32
4897 generate float64x2_t
:f64
4899 /// Minimum
(vector
)
4901 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4902 b
= 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4903 validate
1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1
4907 link
-arm
= vmins._EXT_
4908 link
-aarch64
= smin._EXT_
4913 link
-arm
= vminu._EXT_
4914 link
-aarch64
= umin._EXT_
4917 /// Minimum
(vector
)
4919 a
= 1.0, -2.0, 3.0, -4.0
4920 b
= 0.0, 3.0, 2.0, 8.0
4921 validate
0.0, -2.0, 2.0, -4.0
4924 link
-aarch64
= fmin._EXT_
4925 generate float64x
*_t
4929 link
-arm
= vmins._EXT_
4930 link
-aarch64
= fmin._EXT_
4933 /// Floating
-point Minimum Number
(vector
)
4935 a
= 1.0, 2.0, 3.0, -4.0
4936 b
= 8.0, 16.0, -1.0, 6.0
4937 validate
1.0, 2.0, -1.0, -4.0
4940 link
-aarch64
= fminnm._EXT_
4941 generate float64x
*_t
4946 link
-arm
= vminnm._EXT_
4947 link
-aarch64
= fminnm._EXT_
4950 /// Floating
-point minimum number across vector
4956 link
-aarch64
= fminnmv._EXT2_._EXT_
4957 generate float32x2_t
:f32
, float64x2_t
:f64
4959 generate float32x4_t
:f32
4964 multi_fn
= simd_shuffle
!, a
:half
, a
, a
, {asc
-halflen
-halflen
}
4965 multi_fn
= vmovl
-noqself
-noext
, a
4966 a
= 1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10
4967 validate
3, 4, 5, 6, 7, 8, 9, 10
4970 generate int8x16_t
:int16x8_t
, int16x8_t
:int32x4_t
, int32x4_t
:int64x2_t
4973 generate uint8x16_t
:uint16x8_t
, uint16x8_t
:uint32x4_t
, uint32x4_t
:uint64x2_t
4975 /// Floating
-point add pairwise
4979 validate
3.
, 7.
, 7.
, 11.
4982 link
-aarch64
= faddp._EXT_
4983 generate float32x4_t
, float64x2_t
4986 link
-arm
= vpadd._EXT_
4987 generate float32x2_t
4989 /// Floating
-point add pairwise
4992 multi_fn
= simd_extract
, a1
:out_t
, a
, 0
4993 multi_fn
= simd_extract
, a2
:out_t
, a
, 1
4999 generate float32x2_t
:f32
, float64x2_t
:f64
5001 /// Floating
-point Minimum Number Pairwise
(vector
).
5008 link
-aarch64
= fminnmp._EXT_
5009 generate float32x2_t
:float32x2_t
:float32x2_t
, float64x2_t
:float64x2_t
:float64x2_t
5011 /// Floating
-point Minimum Number Pairwise
(vector
).
5013 a
= 1.0, 2.0, 3.0, -4.0
5014 b
= 8.0, 16.0, -1.0, 6.0
5015 validate
1.0, -4.0, 8.0, -1.0
5017 link
-aarch64
= fminnmp._EXT_
5018 generate float32x4_t
:float32x4_t
:float32x4_t
5020 /// Floating
-point minimum number pairwise
5027 link
-aarch64
= fminnmv._EXT2_._EXT_
5028 generate float32x2_t
:f32
5030 generate float64x2_t
:f64
5032 /// Floating
-point minimum pairwise
5039 link
-aarch64
= fminv._EXT2_._EXT_
5040 generate float32x2_t
:f32
5042 generate float64x2_t
:f64
5044 /// Signed saturating doubling multiply long
5046 a
= 0, 1, 2, 3, 4, 5, 6, 7
5047 b
= 1, 2, 3, 4, 5, 6, 7, 8
5048 validate
0, 4, 12, 24, 40, 60, 84, 108
5051 link
-aarch64
= sqdmull._EXT2_
5053 link
-arm
= vqdmull._EXT2_
5054 generate int16x4_t
:int16x4_t
:int32x4_t
, int32x2_t
:int32x2_t
:int64x2_t
5056 /// Signed saturating doubling multiply long
5058 multi_fn
= vdup_n
-in_ntt
-noext
, a
:in_ntt
, a
5059 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5060 multi_fn
= simd_extract
, {vqdmull
-in_ntt
-noext
, a
, b
}, 0
5066 generate i16
:i16
:i32
5068 /// Signed saturating doubling multiply long
5075 link
-aarch64
= sqdmulls.scalar
5076 generate i32
:i32
:i64
5078 /// Vector saturating doubling long multiply with scalar
5081 multi_fn
= vqdmull
-in_ntt
-noext
, a
, {vdup_n
-in_ntt
-noext
, b
}
5084 validate
8, 16, 24, 32
5088 generate int16x4_t
:i16
:int32x4_t
, int32x2_t
:i32
:int64x2_t
5090 /// Signed saturating doubling multiply long
5093 multi_fn
= simd_shuffle
!, a
:half
, a
, a
, {asc
-halflen
-halflen
}
5094 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {asc
-halflen
-halflen
}
5095 multi_fn
= vqdmull
-noqself
-noext
, a
, b
5096 a
= 0, 1, 4, 5, 4, 5, 6, 7
5097 b
= 1, 2, 5, 6, 5, 6, 7, 8
5098 validate
40, 60, 84, 112
5101 generate int16x8_t
:int16x8_t
:int32x4_t
, int32x4_t
:int32x4_t
:int64x2_t
5103 /// Signed saturating doubling multiply long
5104 name
= vqdmull_high_n
5106 multi_fn
= simd_shuffle
!, a
:in_ntt
, a
, a
, {asc
-out_len
-out_len
}
5107 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5108 multi_fn
= vqdmull
-in_ntt
-noext
, a
, b
5109 a
= 0, 2, 8, 10, 8, 10, 12, 14
5111 validate
32, 40, 48, 56
5114 generate int16x8_t
:i16
:int32x4_t
, int32x4_t
:i32
:int64x2_t
5116 /// Vector saturating doubling long multiply by scalar
5119 multi_fn
= static_assert_imm
-in_exp_len
-N
5120 multi_fn
= simd_shuffle
!, b
:in_t0
, b
, b
, {dup
-out_len
-N as u32
}
5121 multi_fn
= vqdmull
-noqself
-noext
, a
, b
5123 b
= 0, 2, 2, 0, 2, 0, 0, 0
5125 validate
4, 8, 12, 16
5128 generate int16x4_t
:int16x8_t
:int32x4_t
, int32x2_t
:int32x4_t
:int64x2_t
5131 generate int16x4_t
:int16x4_t
:int32x4_t
, int32x2_t
:int32x2_t
:int64x2_t
5133 /// Signed saturating doubling multiply long
5134 name
= vqdmullh_lane
5136 multi_fn
= static_assert_imm
-in_exp_len
-N
5137 multi_fn
= simd_extract
, b
:in_t0
, b
, N as u32
5138 multi_fn
= vqdmullh
-noqself
-noext
, a
, b
5140 b
= 0, 2, 2, 0, 2, 0, 0, 0
5145 generate i16
:int16x4_t
:i32
, i16
:int16x8_t
:i32
5147 /// Signed saturating doubling multiply long
5148 name
= vqdmulls_lane
5150 multi_fn
= static_assert_imm
-in_exp_len
-N
5151 multi_fn
= simd_extract
, b
:in_t0
, b
, N as u32
5152 multi_fn
= vqdmulls
-noqself
-noext
, a
, b
5154 b
= 0, 2, 2, 0, 2, 0, 0, 0
5159 generate i32
:int32x2_t
:i64
, i32
:int32x4_t
:i64
5161 /// Signed saturating doubling multiply long
5162 name
= vqdmull_high_lane
5164 multi_fn
= static_assert_imm
-in_exp_len
-N
5165 multi_fn
= simd_shuffle
!, a
:in_t
, a
, a
, {asc
-out_len
-out_len
}
5166 multi_fn
= simd_shuffle
!, b
:in_t
, b
, b
, {dup
-out_len
-N as u32
}
5167 multi_fn
= vqdmull
-self
-noext
, a
, b
5168 a
= 0, 1, 4, 5, 4, 5, 6, 7
5169 b
= 0, 2, 2, 0, 2, 0, 0, 0
5171 validate
16, 20, 24, 28
5174 generate int16x8_t
:int16x4_t
:int32x4_t
, int32x4_t
:int32x2_t
:int64x2_t
5176 /// Signed saturating doubling multiply long
5177 name
= vqdmull_high_lane
5179 multi_fn
= static_assert_imm
-in_exp_len
-N
5180 multi_fn
= simd_shuffle
!, a
:half
, a
, a
, {asc
-out_len
-out_len
}
5181 multi_fn
= simd_shuffle
!, b
:half
, b
, b
, {dup
-out_len
-N as u32
}
5182 multi_fn
= vqdmull
-noqself
-noext
, a
, b
5183 a
= 0, 1, 4, 5, 4, 5, 6, 7
5184 b
= 0, 2, 2, 0, 2, 0, 0, 0
5186 validate
16, 20, 24, 28
5189 generate int16x8_t
:int16x8_t
:int32x4_t
, int32x4_t
:int32x4_t
:int64x2_t
5191 /// Signed saturating doubling multiply
-add long
5193 multi_fn
= vqadd
-out
-noext
, a
, {vqdmull
-self
-noext
, b
, c
}
5197 validate
5, 9, 13, 17
5201 generate int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
5203 /// Vector widening saturating doubling multiply accumulate with scalar
5206 multi_fn
= vqadd
-out
-noext
, a
, {vqdmull_n
-self
-noext
, b
, c
}
5210 validate
5, 9, 13, 17
5214 generate int32x4_t
:int16x4_t
:i16
:int32x4_t
, int64x2_t
:int32x2_t
:i32
:int64x2_t
5216 /// Signed saturating doubling multiply
-add long
5219 multi_fn
= vqadd
-out
-noext
, a
, {vqdmull_high
-noqself
-noext
, b
, c
}
5221 b
= 0, 1, 4, 5, 4, 5, 6, 7
5222 c
= 1, 2, 5, 6, 5, 6, 7, 8
5223 validate
41, 62, 87, 116
5226 generate int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
5228 /// Signed saturating doubling multiply
-add long
5229 name
= vqdmlal_high_n
5231 multi_fn
= vqadd
-out
-noext
, a
, {vqdmull_high_n
-noqself
-noext
, b
, c
}
5233 b
= 0, 2, 8, 10, 8, 10, 12, 14
5235 validate
33, 42, 51, 60
5238 generate int32x4_t
:int16x8_t
:i16
:int32x4_t
, int64x2_t
:int32x4_t
:i32
:int64x2_t
5240 /// Vector widening saturating doubling multiply accumulate with scalar
5244 multi_fn
= static_assert_imm
-in2_exp_len
-N
5245 multi_fn
= vqadd
-out
-noext
, a
, {vqdmull_lane
-in2
-::<N
>, b
, c
}
5248 c
= 0, 2, 2, 0, 2, 0, 0, 0
5250 validate
5, 10, 15, 20
5253 generate int32x4_t
:int16x4_t
:int16x8_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x4_t
:int64x2_t
5256 generate int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
5258 /// Signed saturating doubling multiply
-add long
5259 name
= vqdmlal_high_lane
5262 multi_fn
= static_assert_imm
-in2_exp_len
-N
5263 multi_fn
= vqadd
-out
-noext
, a
, {vqdmull_high_lane
-in2
-::<N
>, b
, c
}
5265 b
= 0, 1, 4, 5, 4, 5, 6, 7
5266 c
= 0, 2, 0, 0, 0, 0, 0, 0
5268 validate
17, 22, 27, 32
5271 generate int32x4_t
:int16x8_t
:int16x4_t
:int32x4_t
, int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
, int64x2_t
: int32x4_t
:int32x2_t
:int64x2_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
5273 /// Signed saturating doubling multiply
-add long
5275 multi_fn
= vqdmull
-in_ntt
-noext
, x
:out_long_ntt
, {vdup_n
-in_ntt
-noext
, b
}, {vdup_n
-in_ntt
-noext
, c
}
5276 multi_fn
= vqadd
-out
-noext
, a
, {simd_extract
, x
, 0}
5283 generate i32
:i16
:i16
:i32
5285 /// Signed saturating doubling multiply
-add long
5287 multi_fn
= vqadd
-out
-noext
, x
:out_t
, a
, {vqdmulls
-in_ntt
-noext
, b
, c
}
5288 multi_fn
= x as out_t
5295 generate i64
:i32
:i32
:i64
5297 /// Signed saturating doubling multiply
-add long
5298 name
= vqdmlalh_lane
5301 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
5302 multi_fn
= vqdmlal
-self
-noext
, a
, b
, {simd_extract
, c
, LANE as u32
}
5305 c
= 2, 1, 1, 1, 1, 1, 1, 1
5310 generate i32
:i16
:int16x4_t
:i32
, i32
:i16
:int16x8_t
:i32
5311 name
= vqdmlals_lane
5313 generate i64
:i32
:int32x2_t
:i64
, i64
:i32
:int32x4_t
:i64
5315 /// Signed saturating doubling multiply
-subtract long
5317 multi_fn
= vqsub
-out
-noext
, a
, {vqdmull
-self
-noext
, b
, c
}
5321 validate
-1, -1, -1, -1
5325 generate int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
5327 /// Vector widening saturating doubling multiply subtract with scalar
5330 multi_fn
= vqsub
-out
-noext
, a
, {vqdmull_n
-self
-noext
, b
, c
}
5334 validate
-1, -1, -1, -1
5338 generate int32x4_t
:int16x4_t
:i16
:int32x4_t
, int64x2_t
:int32x2_t
:i32
:int64x2_t
5340 /// Signed saturating doubling multiply
-subtract long
5343 multi_fn
= vqsub
-out
-noext
, a
, {vqdmull_high
-noqself
-noext
, b
, c
}
5345 b
= 0, 1, 4, 5, 4, 5, 6, 7
5346 c
= 1, 2, 5, 6, 5, 6, 7, 8
5347 validate
-1, -2, -3, -4
5350 generate int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
5352 /// Signed saturating doubling multiply
-subtract long
5353 name
= vqdmlsl_high_n
5355 multi_fn
= vqsub
-out
-noext
, a
, {vqdmull_high_n
-noqself
-noext
, b
, c
}
5357 b
= 0, 2, 8, 10, 8, 10, 12, 14
5359 validate
-1, -2, -3, -4
5362 generate int32x4_t
:int16x8_t
:i16
:int32x4_t
, int64x2_t
:int32x4_t
:i32
:int64x2_t
5364 /// Vector widening saturating doubling multiply subtract with scalar
5368 multi_fn
= static_assert_imm
-in2_exp_len
-N
5369 multi_fn
= vqsub
-out
-noext
, a
, {vqdmull_lane
-in2
-::<N
>, b
, c
}
5372 c
= 0, 2, 2, 0, 2, 0, 0, 0
5374 validate
-1, -2, -3, -4
5377 generate int32x4_t
:int16x4_t
:int16x8_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x4_t
:int64x2_t
5380 generate int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
, int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
5382 /// Signed saturating doubling multiply
-subtract long
5383 name
= vqdmlsl_high_lane
5386 multi_fn
= static_assert_imm
-in2_exp_len
-N
5387 multi_fn
= vqsub
-out
-noext
, a
, {vqdmull_high_lane
-in2
-::<N
>, b
, c
}
5389 b
= 0, 1, 4, 5, 4, 5, 6, 7
5390 c
= 0, 2, 0, 0, 0, 0, 0, 0
5392 validate
-1, -2, -3, -4
5395 generate int32x4_t
:int16x8_t
:int16x4_t
:int32x4_t
, int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
, int64x2_t
: int32x4_t
:int32x2_t
:int64x2_t
, int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
5397 /// Signed saturating doubling multiply
-subtract long
5399 multi_fn
= vqdmull
-in_ntt
-noext
, x
:out_long_ntt
, {vdup_n
-in_ntt
-noext
, b
}, {vdup_n
-in_ntt
-noext
, c
}
5400 multi_fn
= vqsub
-out
-noext
, a
, {simd_extract
, x
, 0}
5407 generate i32
:i16
:i16
:i32
5409 /// Signed saturating doubling multiply
-subtract long
5411 multi_fn
= vqsub
-out
-noext
, x
:out_t
, a
, {vqdmulls
-in_ntt
-noext
, b
, c
}
5412 multi_fn
= x as out_t
5419 generate i64
:i32
:i32
:i64
5421 /// Signed saturating doubling multiply
-subtract long
5422 name
= vqdmlslh_lane
5425 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
5426 multi_fn
= vqdmlsl
-self
-noext
, a
, b
, {simd_extract
, c
, LANE as u32
}
5429 c
= 2, 1, 1, 1, 1, 1, 1, 1
5434 generate i32
:i16
:int16x4_t
:i32
, i32
:i16
:int16x8_t
:i32
5435 name
= vqdmlsls_lane
5437 generate i64
:i32
:int32x2_t
:i64
, i64
:i32
:int32x4_t
:i64
5439 /// Signed saturating doubling multiply returning high half
5441 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5442 b
= 2, 2, 2, 2, 2, 2, 2, 2
5443 validate
1, 1, 1, 1, 1, 1, 1, 1
5446 link
-aarch64
= sqdmulh._EXT_
5448 link
-arm
= vqdmulh._EXT_
5449 generate int16x4_t
, int16x8_t
, int32x2_t
, int32x4_t
5451 /// Signed saturating doubling multiply returning high half
5453 multi_fn
= vdup_n
-in_ntt
-noext
, a
:in_ntt
, a
5454 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5455 multi_fn
= simd_extract
, {vqdmulh
-in_ntt
-noext
, a
, b
}, 0
5463 /// Vector saturating doubling multiply high with scalar
5466 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5467 multi_fn
= vqdmulh
-out
-noext
, a
, b
5468 a
= MAX
, MAX
, MAX
, MAX
5474 generate int16x4_t
:i16
:int16x4_t
, int32x2_t
:i32
:int32x2_t
5476 /// Vector saturating doubling multiply high with scalar
5479 multi_fn
= vdupq_n
-in_ntt
-noext
, b
:out_t
, b
5480 multi_fn
= vqdmulh
-out
-noext
, a
, b
5481 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5483 validate
1, 1, 1, 1, 1, 1, 1, 1
5487 generate int16x8_t
:i16
:int16x8_t
, int32x4_t
:i32
:int32x4_t
5489 /// Signed saturating doubling multiply returning high half
5490 name
= vqdmulhh_lane
5492 multi_fn
= static_assert_imm
-in_exp_len
-N
5493 multi_fn
= simd_extract
, b
:in_t0
, b
, N as u32
5494 multi_fn
= vqdmulhh
-out_ntt
-noext
, a
, b
5496 b
= 0, 0, MAX
, 0, 0, 0, 0, 0
5501 generate i16
:int16x4_t
:i16
, i16
:int16x8_t
:i16
5503 /// Signed saturating doubling multiply returning high half
5504 name
= vqdmulhs_lane
5506 multi_fn
= static_assert_imm
-in_exp_len
-N
5507 multi_fn
= simd_extract
, b
:in_t0
, b
, N as u32
5508 multi_fn
= vqdmulhs
-out_ntt
-noext
, a
, b
5515 generate i32
:int32x2_t
:i32
, i32
:int32x4_t
:i32
5517 /// Vector saturating doubling multiply high by scalar
5521 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
5522 multi_fn
= vqdmulh
-out
-noext
, a
, {vdup
-nout
-noext
, {simd_extract
, b
, LANE as u32
}}
5523 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5524 b
= 2, 1, 1, 1, 1, 1, 1, 1
5526 validate
1, 1, 1, 1, 1, 1, 1, 1
5529 generate int16x4_t
, int16x8_t
:int16x4_t
:int16x8_t
5530 generate int32x2_t
, int32x4_t
:int32x2_t
:int32x4_t
5532 generate int16x8_t
, int16x4_t
:int16x8_t
:int16x4_t
5533 generate int32x4_t
, int32x2_t
:int32x4_t
:int32x2_t
5535 /// Signed saturating extract narrow
5538 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5539 validate MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5542 link
-aarch64
= sqxtn._EXT2_
5544 link
-arm
= vqmovns._EXT2_
5545 generate int16x8_t
:int8x8_t
, int32x4_t
:int16x4_t
, int64x2_t
:int32x2_t
5547 /// Unsigned saturating extract narrow
5550 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5551 validate MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5554 link
-aarch64
= uqxtn._EXT2_
5556 link
-arm
= vqmovnu._EXT2_
5557 generate uint16x8_t
:uint8x8_t
, uint32x4_t
:uint16x4_t
, uint64x2_t
:uint32x2_t
5559 /// Saturating extract narrow
5561 multi_fn
= simd_extract
, {vqmovn
-in_ntt
-noext
, {vdupq_n
-in_ntt
-noext
, a
}}, 0
5566 generate i16
:i8
, i32
:i16
5568 generate u16
:u8
, u32
:u16
5570 /// Saturating extract narrow
5576 link
-aarch64
= scalar.sqxtn._EXT2_._EXT_
5580 link
-aarch64
= scalar.uqxtn._EXT2_._EXT_
5583 /// Signed saturating extract narrow
5586 multi_fn
= simd_shuffle
!, a
, {vqmovn
-noqself
-noext
, b
}, {asc
-0-out_len
}
5587 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5588 b
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5589 validate MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5592 generate int8x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int32x4_t
5594 generate uint8x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint32x4_t
5596 /// Signed saturating extract unsigned narrow
5599 a
= -1, -1, -1, -1, -1, -1, -1, -1
5600 validate
0, 0, 0, 0, 0, 0, 0, 0
5603 link
-aarch64
= sqxtun._EXT2_
5605 link
-arm
= vqmovnsu._EXT2_
5606 generate int16x8_t
:uint8x8_t
, int32x4_t
:uint16x4_t
, int64x2_t
:uint32x2_t
5608 /// Signed saturating extract unsigned narrow
5610 multi_fn
= simd_extract
, {vqmovun
-in_ntt
-noext
, {vdupq_n
-in_ntt
-noext
, a
}}, 0
5615 generate i16
:u8
, i32
:u16
, i64
:u32
5617 /// Signed saturating extract unsigned narrow
5620 multi_fn
= simd_shuffle
!, a
, {vqmovun
-noqself
-noext
, b
}, {asc
-0-out_len
}
5621 a
= 0, 0, 0, 0, 0, 0, 0, 0
5622 b
= -1, -1, -1, -1, -1, -1, -1, -1
5623 validate
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5626 generate uint8x8_t
:int16x8_t
:uint8x16_t
, uint16x4_t
:int32x4_t
:uint16x8_t
, uint32x2_t
:int64x2_t
:uint32x4_t
5628 /// Signed saturating rounding doubling multiply returning high half
5630 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5631 b
= 2, 2, 2, 2, 2, 2, 2, 2
5632 validate
2, 2, 2, 2, 2, 2, 2, 2
5635 link
-aarch64
= sqrdmulh._EXT_
5637 link
-arm
= vqrdmulh._EXT_
5638 generate int16x4_t
, int16x8_t
, int32x2_t
, int32x4_t
5640 /// Signed saturating rounding doubling multiply returning high half
5642 multi_fn
= simd_extract
, {vqrdmulh
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
5650 /// Vector saturating rounding doubling multiply high with scalar
5653 multi_fn
= vqrdmulh
-out
-noext
, a
, {vdup
-nout
-noext
, b
}
5654 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5656 validate
2, 2, 2, 2, 2, 2, 2, 2
5660 generate int16x4_t
:i16
:int16x4_t
, int16x8_t
:i16
:int16x8_t
, int32x2_t
:i32
:int32x2_t
, int32x4_t
:i32
:int32x4_t
5662 /// Vector rounding saturating doubling multiply high by scalar
5666 multi_fn
= static_assert_imm
-in_exp_len
-LANE
5667 multi_fn
= simd_shuffle
!, b
:out_t
, b
, b
, {dup
-out_len
-LANE as u32
}
5668 multi_fn
= vqrdmulh
-out
-noext
, a
, b
5669 a
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5670 b
= 0, 2, 0, 0, 0, 0, 0, 0,
5672 validate
2, 2, 2, 2, 2, 2, 2, 2
5676 generate int16x4_t
, int16x4_t
:int16x8_t
:int16x4_t
, int16x8_t
:int16x4_t
:int16x8_t
, int16x8_t
5677 generate int32x2_t
, int32x2_t
:int32x4_t
:int32x2_t
, int32x4_t
:int32x2_t
:int32x4_t
, int32x4_t
5679 /// Signed saturating rounding doubling multiply returning high half
5683 multi_fn
= static_assert_imm
-in_exp_len
-LANE
5684 multi_fn
= vqrdmulh
-out
-noext
, a
, {simd_extract
, b
, LANE as u32
}
5686 b
= 0, 2, 0, 0, 0, 0, 0, 0,
5691 generate i16
:int16x4_t
:i16
, i16
:int16x8_t
:i16
, i32
:int32x2_t
:i32
, i32
:int32x4_t
:i32
5693 /// Signed saturating rounding doubling multiply accumulate returning high half
5695 a
= 1, 1, 1, 1, 1, 1, 1, 1
5696 b
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5697 c
= 2, 2, 2, 2, 2, 2, 2, 2
5698 validate
3, 3, 3, 3, 3, 3, 3, 3
5701 link
-aarch64
= sqrdmlah._EXT_
5703 generate int16x4_t
, int16x8_t
, int32x2_t
, int32x4_t
5705 /// Signed saturating rounding doubling multiply accumulate returning high half
5707 multi_fn
= vdup_n
-in_ntt
-noext
, a
:in_ntt
, a
5708 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5709 multi_fn
= vdup_n
-in_ntt
-noext
, c
:in_ntt
, c
5710 multi_fn
= simd_extract
, {vqrdmlah
-in_ntt
-noext
, a
, b
, c
}, 0
5720 /// Signed saturating rounding doubling multiply accumulate returning high half
5724 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
5725 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {dup
-out_len
-LANE as u32
}
5726 multi_fn
= vqrdmlah
-out
-noext
, a
, b
, c
5727 a
= 1, 1, 1, 1, 1, 1, 1, 1
5728 b
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5729 c
= 0, 2, 0, 0, 0, 0, 0, 0
5731 validate
3, 3, 3, 3, 3, 3, 3, 3
5735 generate int16x4_t
, int16x4_t
:int16x4_t
:int16x8_t
:int16x4_t
, int16x8_t
:int16x8_t
:int16x4_t
:int16x8_t
, int16x8_t
5736 generate int32x2_t
, int32x2_t
:int32x2_t
:int32x4_t
:int32x2_t
, int32x4_t
:int32x4_t
:int32x2_t
:int32x4_t
, int32x4_t
5738 /// Signed saturating rounding doubling multiply accumulate returning high half
5742 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
5743 multi_fn
= vqrdmlah
-self
-noext
, a
, b
, {simd_extract
, c
, LANE as u32
}
5746 c
= 0, 2, 0, 0, 0, 0, 0, 0
5752 generate i16
:i16
:int16x4_t
:i16
, i16
:i16
:int16x8_t
:i16
, i32
:i32
:int32x2_t
:i32
, i32
:i32
:int32x4_t
:i32
5754 /// Signed saturating rounding doubling multiply subtract returning high half
5756 link
-aarch64
= sqrdmlsh._EXT_
5757 a
= 1, 1, 1, 1, 1, 1, 1, 1
5758 b
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5759 c
= 2, 2, 2, 2, 2, 2, 2, 2
5760 validate
-1, -1, -1, -1, -1, -1, -1, -1
5764 generate int16x4_t
, int16x8_t
, int32x2_t
, int32x4_t
5766 /// Signed saturating rounding doubling multiply subtract returning high half
5768 multi_fn
= vdup_n
-in_ntt
-noext
, a
:in_ntt
, a
5769 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5770 multi_fn
= vdup_n
-in_ntt
-noext
, c
:in_ntt
, c
5771 multi_fn
= simd_extract
, {vqrdmlsh
-in_ntt
-noext
, a
, b
, c
}, 0
5781 /// Signed saturating rounding doubling multiply subtract returning high half
5785 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
5786 multi_fn
= simd_shuffle
!, c
:out_t
, c
, c
, {dup
-out_len
-LANE as u32
}
5787 multi_fn
= vqrdmlsh
-out
-noext
, a
, b
, c
5788 a
= 1, 1, 1, 1, 1, 1, 1, 1
5789 b
= MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
, MAX
5790 c
= 0, 2, 0, 0, 0, 0, 0, 0
5792 validate
-1, -1, -1, -1, -1, -1, -1, -1
5796 generate int16x4_t
, int16x4_t
:int16x4_t
:int16x8_t
:int16x4_t
, int16x8_t
:int16x8_t
:int16x4_t
:int16x8_t
, int16x8_t
5797 generate int32x2_t
, int32x2_t
:int32x2_t
:int32x4_t
:int32x2_t
, int32x4_t
:int32x4_t
:int32x2_t
:int32x4_t
, int32x4_t
5799 /// Signed saturating rounding doubling multiply subtract returning high half
5803 multi_fn
= static_assert_imm
-in2_exp_len
-LANE
5804 multi_fn
= vqrdmlsh
-self
-noext
, a
, b
, {simd_extract
, c
, LANE as u32
}
5807 c
= 0, 2, 0, 0, 0, 0, 0, 0
5813 generate i16
:i16
:int16x4_t
:i16
, i16
:i16
:int16x8_t
:i16
, i32
:i32
:int32x2_t
:i32
, i32
:i32
:int32x4_t
:i32
5815 /// Signed saturating rounding
shift left
5817 a
= 2, MIN
, MAX
, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5818 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5819 validate
8, MIN
, MAX
, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5822 link
-aarch64
= sqrshl._EXT_
5826 link
-arm
= vqrshifts._EXT_
5827 generate int
*_t
, int64x
*_t
5829 /// Signed saturating rounding
shift left
5831 multi_fn
= vdup_n
-in_ntt
-noext
, a
:in_ntt
, a
5832 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5833 multi_fn
= simd_extract
, {vqrshl
-in_ntt
-noext
, a
, b
}, 0
5841 /// Unsigned signed saturating rounding
shift left
5844 a
= 2, MIN
, MAX
, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5845 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5846 validate
8, 0, MAX
, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5849 link
-aarch64
= uqrshl._EXT_
5850 generate u32
:i32
:u32
, u64
:i64
:u64
5853 link
-arm
= vqrshiftu._EXT_
5854 generate uint8x8_t
:int8x8_t
:uint8x8_t
, uint8x16_t
:int8x16_t
:uint8x16_t
, uint16x4_t
:int16x4_t
:uint16x4_t
, uint16x8_t
:int16x8_t
:uint16x8_t
5855 generate uint32x2_t
:int32x2_t
:uint32x2_t
, uint32x4_t
:int32x4_t
:uint32x4_t
, uint64x1_t
:int64x1_t
:uint64x1_t
, uint64x2_t
:int64x2_t
:uint64x2_t
5857 /// Unsigned signed saturating rounding
shift left
5860 multi_fn
= vdup_n
-out_ntt
-noext
, a
:out_ntt
, a
5861 multi_fn
= vdup_n
-in_ntt
-noext
, b
:in_ntt
, b
5862 multi_fn
= simd_extract
, {vqrshl
-out_ntt
-noext
, a
, b
}, 0
5868 generate u8
:i8
:u8
, u16
:i16
:u16
5870 /// Signed saturating rounded
shift right narrow
5874 multi_fn
= static_assert
-N
-1-halfbits
5875 a
= MIN
, 4, 8, 12, 16, 20, 24, 28
5877 validate MIN
, 1, 2, 3, 4, 5, 6, 7
5880 link
-aarch64
= sqrshrn._EXT2_
5884 link
-arm
= vqrshiftns._EXT2_
5885 const
-arm
= -N as ttn
5886 arm
-aarch64
-separate
5887 generate int16x8_t
:int8x8_t
, int32x4_t
:int16x4_t
, int64x2_t
:int32x2_t
5889 /// Signed saturating rounded
shift right narrow
5893 multi_fn
= static_assert
-N
-1-halfbits
5894 multi_fn
= vdupq_n
-in_ntt
-noext
, a
:in_long_ntt
, a
5895 multi_fn
= simd_extract
, {vqrshrn_n
-in_ntt
-::<N
>, a
}, 0
5901 generate i16
:i8
, i32
:i16
, i64
:i32
5903 /// Signed saturating rounded
shift right narrow
5907 multi_fn
= static_assert
-N
-1-halfbits
5908 multi_fn
= simd_shuffle
!, a
, {vqrshrn_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
5909 a
= 0, 1, 2, 3, 2, 3, 6, 7
5910 b
= 8, 12, 24, 28, 48, 52, 56, 60
5912 validate
0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5915 generate int8x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int32x4_t
5917 /// Unsigned signed saturating rounded
shift right narrow
5921 multi_fn
= static_assert
-N
-1-halfbits
5922 a
= MIN
, 4, 8, 12, 16, 20, 24, 28
5924 validate
0, 1, 2, 3, 4, 5, 6, 7
5927 link
-aarch64
= uqrshrn._EXT2_
5931 link
-arm
= vqrshiftnu._EXT2_
5932 const
-arm
= -N as ttn
5933 arm
-aarch64
-separate
5934 generate uint16x8_t
:uint8x8_t
, uint32x4_t
:uint16x4_t
, uint64x2_t
:uint32x2_t
5936 /// Unsigned saturating rounded
shift right narrow
5940 multi_fn
= static_assert
-N
-1-halfbits
5941 multi_fn
= vdupq_n
-in_ntt
-noext
, a
:in_long_ntt
, a
5942 multi_fn
= simd_extract
, {vqrshrn_n
-in_ntt
-::<N
>, a
}, 0
5948 generate u16
:u8
, u32
:u16
, u64
:u32
5950 /// Unsigned saturating rounded
shift right narrow
5954 multi_fn
= static_assert
-N
-1-halfbits
5955 multi_fn
= simd_shuffle
!, a
, {vqrshrn_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
5956 a
= 0, 1, 2, 3, 2, 3, 6, 7
5957 b
= 8, 12, 24, 28, 48, 52, 56, 60
5959 validate
0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5962 generate uint8x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint32x4_t
5964 /// Signed saturating rounded
shift right unsigned narrow
5968 multi_fn
= static_assert
-N
-1-halfbits
5969 a
= 0, 4, 8, 12, 16, 20, 24, 28
5971 validate
0, 1, 2, 3, 4, 5, 6, 7
5974 link
-aarch64
= sqrshrun._EXT2_
5978 link
-arm
= vqrshiftnsu._EXT2_
5979 const
-arm
= -N as ttn
5980 arm
-aarch64
-separate
5981 generate int16x8_t
:uint8x8_t
, int32x4_t
:uint16x4_t
, int64x2_t
:uint32x2_t
5983 /// Signed saturating rounded
shift right unsigned narrow
5987 multi_fn
= static_assert
-N
-1-halfbits
5988 multi_fn
= vdupq_n
-in_ntt
-noext
, a
:in_long_ntt
, a
5989 multi_fn
= simd_extract
, {vqrshrun_n
-in_ntt
-::<N
>, a
}, 0
5995 generate i16
:u8
, i32
:u16
, i64
:u32
5997 /// Signed saturating rounded
shift right unsigned narrow
5998 name
= vqrshrun_high
6001 multi_fn
= static_assert
-N
-1-halfbits
6002 multi_fn
= simd_shuffle
!, a
, {vqrshrun_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
6003 a
= 0, 1, 2, 3, 2, 3, 6, 7
6004 b
= 8, 12, 24, 28, 48, 52, 56, 60
6006 validate
0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
6009 generate uint8x8_t
:int16x8_t
:uint8x16_t
, uint16x4_t
:int32x4_t
:uint16x8_t
, uint32x2_t
:int64x2_t
:uint32x4_t
6011 /// Signed saturating
shift left
6013 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6014 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6015 validate
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6018 link
-aarch64
= sqshl._EXT_
6022 link
-arm
= vqshifts._EXT_
6023 generate int
*_t
, int64x
*_t
6025 /// Signed saturating
shift left
6027 multi_fn
= vqshl
-in_ntt
-noext
, c
:in_ntt
, {vdup_n
-in_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}
6028 multi_fn
= simd_extract
, c
, 0
6034 generate i8
, i16
, i32
6036 /// Unsigned saturating
shift left
6039 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6040 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6041 validate
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6044 link
-aarch64
= uqshl._EXT_
6045 generate u64
:i64
:u64
6048 link
-arm
= vqshiftu._EXT_
6049 generate uint8x8_t
:int8x8_t
:uint8x8_t
, uint8x16_t
:int8x16_t
:uint8x16_t
, uint16x4_t
:int16x4_t
:uint16x4_t
, uint16x8_t
:int16x8_t
:uint16x8_t
6050 generate uint32x2_t
:int32x2_t
:uint32x2_t
, uint32x4_t
:int32x4_t
:uint32x4_t
, uint64x1_t
:int64x1_t
:uint64x1_t
, uint64x2_t
:int64x2_t
:uint64x2_t
6052 /// Unsigned saturating
shift left
6055 multi_fn
= vqshl
-out_ntt
-noext
, c
:out_ntt
, {vdup_n
-out_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}
6056 multi_fn
= simd_extract
, c
, 0
6062 generate u8
:i8
:u8
, u16
:i16
:u16
, u32
:i32
:u32
6064 /// Signed saturating
shift left
6068 multi_fn
= static_assert_imm
-out_bits_exp_len
-N
6069 multi_fn
= vqshl
-self
-noext
, a
, {vdup
-nself
-noext
, N as _
}
6070 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6072 validate
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6076 generate int
*_t
, int64x
*_t
6078 /// Signed saturating
shift left
6082 multi_fn
= static_assert_imm
-out_bits_exp_len
-N
6083 multi_fn
= simd_extract
, {vqshl_n
-in_ntt
-::<N
>, {vdup_n
-in_ntt
-noext
, a
}}, 0
6089 generate i8
, i16
, i32
, i64
6091 /// Unsigned saturating
shift left
6095 multi_fn
= static_assert_imm
-out_bits_exp_len
-N
6096 multi_fn
= vqshl
-self
-noext
, a
, {vdup
-nsigned
-noext
, N as _
}
6097 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6099 validate
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6103 generate uint
*_t
, uint64x
*_t
6105 /// Unsigned saturating
shift left
6109 multi_fn
= static_assert_imm
-out_bits_exp_len
-N
6110 multi_fn
= simd_extract
, {vqshl_n
-in_ntt
-::<N
>, {vdup_n
-in_ntt
-noext
, a
}}, 0
6116 generate u8
, u16
, u32
, u64
6118 /// Signed saturating
shift left unsigned
6122 multi_fn
= static_assert_imm
-out_bits_exp_len
-N
6123 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6125 validate
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6126 arm
-aarch64
-separate
6129 link
-aarch64
= sqshlu._EXT_
6130 const
-aarch64
= {dup
-in_len
-N as ttn
}
6132 link
-arm
= vqshiftsu._EXT_
6133 const
-arm
= N as ttn
6134 generate int8x8_t
:uint8x8_t
, int16x4_t
:uint16x4_t
, int32x2_t
:uint32x2_t
, int64x1_t
:uint64x1_t
6135 generate int8x16_t
:uint8x16_t
, int16x8_t
:uint16x8_t
, int32x4_t
:uint32x4_t
, int64x2_t
:uint64x2_t
6137 /// Signed saturating
shift left unsigned
6141 multi_fn
= static_assert_imm
-out_bits_exp_len
-N
6142 multi_fn
= simd_extract
, {vqshlu_n
-in_ntt
-::<N
>, {vdup_n
-in_ntt
-noext
, a
}}, 0
6148 generate i8
:u8
, i16
:u16
, i32
:u32
, i64
:u64
6150 /// Signed saturating
shift right narrow
6154 multi_fn
= static_assert
-N
-1-halfbits
6155 a
= 0, 4, 8, 12, 16, 20, 24, 28
6157 validate
0, 1, 2, 3, 4, 5, 6, 7
6158 arm
-aarch64
-separate
6161 link
-aarch64
= sqshrn._EXT2_
6166 link
-arm
= vqshiftns._EXT2_
6167 const
-arm
= -N as ttn
6168 generate int16x8_t
:int8x8_t
, int32x4_t
:int16x4_t
, int64x2_t
:int32x2_t
6170 /// Signed saturating
shift right narrow
6174 multi_fn
= static_assert
-N
-1-halfbits
6175 multi_fn
= simd_extract
, {vqshrn_n
-in_ntt
-::<N
>, {vdupq_n
-in_ntt
-noext
, a
}}, 0
6181 generate i16
:i8
, i32
:i16
6183 /// Signed saturating
shift right narrow
6187 multi_fn
= static_assert
-N
-1-halfbits
6188 multi_fn
= simd_shuffle
!, a
, {vqshrn_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
6189 a
= 0, 1, 8, 9, 8, 9, 10, 11
6190 b
= 32, 36, 40, 44, 48, 52, 56, 60
6192 validate
0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6195 generate int8x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int32x4_t
6197 /// Unsigned saturating
shift right narrow
6201 multi_fn
= static_assert
-N
-1-halfbits
6202 a
= 0, 4, 8, 12, 16, 20, 24, 28
6204 validate
0, 1, 2, 3, 4, 5, 6, 7
6205 arm
-aarch64
-separate
6208 link
-aarch64
= uqshrn._EXT2_
6213 link
-arm
= vqshiftnu._EXT2_
6214 const
-arm
= -N as ttn
6215 generate uint16x8_t
:uint8x8_t
, uint32x4_t
:uint16x4_t
, uint64x2_t
:uint32x2_t
6217 /// Unsigned saturating
shift right narrow
6221 multi_fn
= static_assert
-N
-1-halfbits
6222 multi_fn
= simd_extract
, {vqshrn_n
-in_ntt
-::<N
>, {vdupq_n
-in_ntt
-noext
, a
}}, 0
6228 generate u16
:u8
, u32
:u16
6230 /// Unsigned saturating
shift right narrow
6234 multi_fn
= static_assert
-N
-1-halfbits
6235 multi_fn
= simd_shuffle
!, a
, {vqshrn_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
6236 a
= 0, 1, 8, 9, 8, 9, 10, 11
6237 b
= 32, 36, 40, 44, 48, 52, 56, 60
6239 validate
0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6242 generate uint8x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint32x4_t
6244 /// Signed saturating
shift right unsigned narrow
6248 multi_fn
= static_assert
-N
-1-halfbits
6249 a
= 0, 4, 8, 12, 16, 20, 24, 28
6251 validate
0, 1, 2, 3, 4, 5, 6, 7
6252 arm
-aarch64
-separate
6255 link
-aarch64
= sqshrun._EXT2_
6259 link
-arm
= vqshiftnsu._EXT2_
6260 const
-arm
= -N as ttn
6261 generate int16x8_t
:uint8x8_t
, int32x4_t
:uint16x4_t
, int64x2_t
:uint32x2_t
6263 /// Signed saturating
shift right unsigned narrow
6267 multi_fn
= static_assert
-N
-1-halfbits
6268 multi_fn
= simd_extract
, {vqshrun_n
-in_ntt
-::<N
>, {vdupq_n
-in_ntt
-noext
, a
}}, 0
6274 generate i16
:u8
, i32
:u16
, i64
:u32
6276 /// Signed saturating
shift right unsigned narrow
6280 multi_fn
= static_assert
-N
-1-halfbits
6281 multi_fn
= simd_shuffle
!, a
, {vqshrun_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
6282 a
= 0, 1, 8, 9, 8, 9, 10, 11
6283 b
= 32, 36, 40, 44, 48, 52, 56, 60
6285 validate
0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6288 generate uint8x8_t
:int16x8_t
:uint8x16_t
, uint16x4_t
:int32x4_t
:uint16x8_t
, uint32x2_t
:int64x2_t
:uint32x4_t
6290 /// Unsigned saturating accumulate of signed value
6293 multi_fn
= simd_extract
, {vsqadd
-out_ntt
-noext
, {vdup_n
-out_ntt
-noext
, a
}, {vdup_n
-in_ntt
-noext
, b
}}, 0
6299 generate u8
:i8
:u8
, u16
:i16
:u16
6301 /// Unsigned saturating accumulate of signed value
6309 link
-aarch64
= usqadd._EXT_
6310 generate u32
:i32
:u32
, u64
:i64
:u64
6312 /// Calculates the square root of each lane.
6315 a
= 4.0, 9.0, 16.0, 25.0
6316 validate
2.0, 3.0, 4.0, 5.0
6319 generate float
*_t
, float64x
*_t
6321 /// Reciprocal square
-root estimate.
6323 a
= 1.0, 2.0, 3.0, 4.0
6324 validate
0.998046875, 0.705078125, 0.576171875, 0.4990234375
6327 link
-aarch64
= frsqrte._EXT_
6328 generate float64x
*_t
, f32
, f64
6331 link
-arm
= vrsqrte._EXT_
6334 /// Unsigned reciprocal square root estimate
6337 validate
4294967295, 4294967295, 4294967295, 4294967295
6340 link
-aarch64
= ursqrte._EXT_
6342 link
-arm
= vrsqrte._EXT_
6343 generate uint32x2_t
, uint32x4_t
6345 /// Floating
-point reciprocal square root step
6347 a
= 1.0, 2.0, 3.0, 4.0
6348 b
= 1.0, 2.0, 3.0, 4.0
6349 validate
1.
, -0.5, -3.0, -6.5
6352 link
-aarch64
= frsqrts._EXT_
6353 generate float64x
*_t
, f32
, f64
6356 link
-arm
= vrsqrts._EXT_
6359 /// Reciprocal estimate.
6361 a
= 4.0, 3.0, 2.0, 1.0
6362 validate
0.24951171875, 0.3330078125, 0.4990234375, 0.998046875
6365 link
-aarch64
= frecpe._EXT_
6366 generate float64x
*_t
, f32
, f64
6369 link
-arm
= vrecpe._EXT_
6372 /// Unsigned reciprocal estimate
6375 validate
4294967295, 4294967295, 4294967295, 4294967295
6378 link
-aarch64
= urecpe._EXT_
6380 link
-arm
= vrecpe._EXT_
6381 generate uint32x2_t
, uint32x4_t
6383 /// Floating
-point reciprocal step
6385 a
= 4.0, 3.0, 2.0, 1.0
6386 b
= 4.0, 3.0, 2.0, 1.0
6387 validate
-14.
, -7.
, -2.
, 1.
6390 link
-aarch64
= frecps._EXT_
6391 generate float64x
*_t
, f32
, f64
6394 link
-arm
= vrecps._EXT_
6397 /// Floating
-point reciprocal exponent
6403 link
-aarch64
= frecpx._EXT_
6406 /// Vector reinterpret cast operation
6410 a
= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6411 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6414 generate poly64x1_t
:int64x1_t
, poly64x1_t
:uint64x1_t
, int64x1_t
:poly64x1_t
, uint64x1_t
:poly64x1_t
6415 generate poly64x2_t
:int64x2_t
, poly64x2_t
:uint64x2_t
, int64x2_t
:poly64x2_t
, uint64x2_t
:poly64x2_t
6418 generate uint8x8_t
:int8x8_t
, poly8x8_t
:int8x8_t
, poly16x4_t
:int16x4_t
, uint16x4_t
:int16x4_t
, uint32x2_t
:int32x2_t
, uint64x1_t
:int64x1_t
6419 generate uint8x16_t
:int8x16_t
, poly8x16_t
:int8x16_t
, poly16x8_t
:int16x8_t
, uint16x8_t
:int16x8_t
, uint32x4_t
:int32x4_t
, uint64x2_t
:int64x2_t
6420 generate poly8x8_t
:uint8x8_t
, int8x8_t
:uint8x8_t
, poly16x4_t
:uint16x4_t
, int16x4_t
:uint16x4_t
, int32x2_t
:uint32x2_t
, int64x1_t
:uint64x1_t
6421 generate poly8x16_t
:uint8x16_t
, int8x16_t
:uint8x16_t
, poly16x8_t
:uint16x8_t
, int16x8_t
:uint16x8_t
, int32x4_t
:uint32x4_t
, int64x2_t
:uint64x2_t
6422 generate int8x8_t
:poly8x8_t
, uint8x8_t
:poly8x8_t
, int16x4_t
:poly16x4_t
, uint16x4_t
:poly16x4_t
6423 generate int8x16_t
:poly8x16_t
, uint8x16_t
:poly8x16_t
, int16x8_t
:poly16x8_t
, uint16x8_t
:poly16x8_t
6425 /// Vector reinterpret cast operation
6429 a
= 0, 1, 2, 3, 4, 5, 6, 7
6430 validate
0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6434 generate int16x4_t
:int8x8_t
, uint16x4_t
:int8x8_t
, poly16x4_t
:int8x8_t
, int32x2_t
:int16x4_t
, uint32x2_t
:int16x4_t
, int64x1_t
:int32x2_t
, uint64x1_t
:int32x2_t
6435 generate int16x8_t
:int8x16_t
, uint16x8_t
:int8x16_t
, poly16x8_t
:int8x16_t
, int32x4_t
:int16x8_t
, uint32x4_t
:int16x8_t
, int64x2_t
:int32x4_t
, uint64x2_t
:int32x4_t
6436 generate poly16x4_t
:uint8x8_t
, int16x4_t
:uint8x8_t
, uint16x4_t
:uint8x8_t
, int32x2_t
:uint16x4_t
, uint32x2_t
:uint16x4_t
, int64x1_t
:uint32x2_t
, uint64x1_t
:uint32x2_t
6437 generate poly16x8_t
:uint8x16_t
, int16x8_t
:uint8x16_t
, uint16x8_t
:uint8x16_t
, int32x4_t
:uint16x8_t
, uint32x4_t
:uint16x8_t
, int64x2_t
:uint32x4_t
, uint64x2_t
:uint32x4_t
6438 generate poly16x4_t
:poly8x8_t
, int16x4_t
:poly8x8_t
, uint16x4_t
:poly8x8_t
, int32x2_t
:poly16x4_t
, uint32x2_t
:poly16x4_t
6439 generate poly16x8_t
:poly8x16_t
, int16x8_t
:poly8x16_t
, uint16x8_t
:poly8x16_t
, int32x4_t
:poly16x8_t
, uint32x4_t
:poly16x8_t
6441 generate poly64x1_t
:int32x2_t
, poly64x1_t
:uint32x2_t
6442 generate poly64x2_t
:int32x4_t
, poly64x2_t
:uint32x4_t
6443 generate p128
:int64x2_t
, p128
:uint64x2_t
, p128
:poly64x2_t
6445 /// Vector reinterpret cast operation
6449 a
= 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6450 validate
0, 1, 2, 3, 4, 5, 6, 7
6454 generate poly8x8_t
:int16x4_t
, int8x8_t
:int16x4_t
, uint8x8_t
:int16x4_t
, poly16x4_t
:int32x2_t
, int16x4_t
:int32x2_t
, uint16x4_t
:int32x2_t
, int32x2_t
:int64x1_t
, uint32x2_t
:int64x1_t
6455 generate poly8x16_t
:int16x8_t
, int8x16_t
:int16x8_t
, uint8x16_t
:int16x8_t
, poly16x8_t
:int32x4_t
, int16x8_t
:int32x4_t
, uint16x8_t
:int32x4_t
, int32x4_t
:int64x2_t
, uint32x4_t
:int64x2_t
6456 generate poly8x8_t
:uint16x4_t
, int8x8_t
:uint16x4_t
, uint8x8_t
:uint16x4_t
, poly16x4_t
:uint32x2_t
, int16x4_t
:uint32x2_t
, uint16x4_t
:uint32x2_t
, int32x2_t
:uint64x1_t
, uint32x2_t
:uint64x1_t
6457 generate poly8x16_t
:uint16x8_t
, int8x16_t
:uint16x8_t
, uint8x16_t
:uint16x8_t
, poly16x8_t
:uint32x4_t
, int16x8_t
:uint32x4_t
, uint16x8_t
:uint32x4_t
, int32x4_t
:uint64x2_t
, uint32x4_t
:uint64x2_t
6458 generate poly8x8_t
:poly16x4_t
, int8x8_t
:poly16x4_t
, uint8x8_t
:poly16x4_t
6459 generate poly8x16_t
:poly16x8_t
, int8x16_t
:poly16x8_t
, uint8x16_t
:poly16x8_t
6461 generate int32x2_t
:poly64x1_t
, uint32x2_t
:poly64x1_t
6462 generate int32x4_t
:poly64x2_t
, uint32x4_t
:poly64x2_t
6463 generate int64x2_t
:p128
, uint64x2_t
:p128
, poly64x2_t
:p128
6465 /// Vector reinterpret cast operation
6470 validate
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6474 generate int32x2_t
:int8x8_t
, uint32x2_t
:int8x8_t
, int64x1_t
:int16x4_t
, uint64x1_t
:int16x4_t
6475 generate int32x4_t
:int8x16_t
, uint32x4_t
:int8x16_t
, int64x2_t
:int16x8_t
, uint64x2_t
:int16x8_t
6476 generate int32x2_t
:uint8x8_t
, uint32x2_t
:uint8x8_t
, int64x1_t
:uint16x4_t
, uint64x1_t
:uint16x4_t
6477 generate int32x4_t
:uint8x16_t
, uint32x4_t
:uint8x16_t
, int64x2_t
:uint16x8_t
, uint64x2_t
:uint16x8_t
6478 generate int32x2_t
:poly8x8_t
, uint32x2_t
:poly8x8_t
, int64x1_t
:poly16x4_t
, uint64x1_t
:poly16x4_t
6479 generate int32x4_t
:poly8x16_t
, uint32x4_t
:poly8x16_t
, int64x2_t
:poly16x8_t
, uint64x2_t
:poly16x8_t
6481 generate poly64x1_t
:int16x4_t
, poly64x1_t
:uint16x4_t
, poly64x1_t
:poly16x4_t
6482 generate poly64x2_t
:int16x8_t
, poly64x2_t
:uint16x8_t
, poly64x2_t
:poly16x8_t
6483 generate p128
:int32x4_t
, p128
:uint32x4_t
6485 /// Vector reinterpret cast operation
6489 a
= 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6494 generate poly8x8_t
:int32x2_t
, int8x8_t
:int32x2_t
, uint8x8_t
:int32x2_t
, poly16x4_t
:int64x1_t
, int16x4_t
:int64x1_t
, uint16x4_t
:int64x1_t
6495 generate poly8x16_t
:int32x4_t
, int8x16_t
:int32x4_t
, uint8x16_t
:int32x4_t
, poly16x8_t
:int64x2_t
, int16x8_t
:int64x2_t
, uint16x8_t
:int64x2_t
6496 generate poly8x8_t
:uint32x2_t
, int8x8_t
:uint32x2_t
, uint8x8_t
:uint32x2_t
, poly16x4_t
:uint64x1_t
, int16x4_t
:uint64x1_t
, uint16x4_t
:uint64x1_t
6497 generate poly8x16_t
:uint32x4_t
, int8x16_t
:uint32x4_t
, uint8x16_t
:uint32x4_t
, poly16x8_t
:uint64x2_t
, int16x8_t
:uint64x2_t
, uint16x8_t
:uint64x2_t
6499 generate poly16x4_t
:poly64x1_t
, int16x4_t
:poly64x1_t
, uint16x4_t
:poly64x1_t
6500 generate poly16x8_t
:poly64x2_t
, int16x8_t
:poly64x2_t
, uint16x8_t
:poly64x2_t
6501 generate int32x4_t
:p128
, uint32x4_t
:p128
6503 /// Vector reinterpret cast operation
6508 validate
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6512 generate int64x1_t
:int8x8_t
, uint64x1_t
:int8x8_t
, int64x1_t
:uint8x8_t
, uint64x1_t
:uint8x8_t
, int64x1_t
:poly8x8_t
, uint64x1_t
:poly8x8_t
6513 generate int64x2_t
:int8x16_t
, uint64x2_t
:int8x16_t
, int64x2_t
:uint8x16_t
, uint64x2_t
:uint8x16_t
, int64x2_t
:poly8x16_t
, uint64x2_t
:poly8x16_t
6515 generate poly64x1_t
:int8x8_t
, poly64x1_t
:uint8x8_t
, poly64x1_t
:poly8x8_t
6516 generate poly64x2_t
:int8x16_t
, poly64x2_t
:uint8x16_t
, poly64x2_t
:poly8x16_t
6517 generate p128
:int16x8_t
, p128
:uint16x8_t
, p128
:poly16x8_t
6519 /// Vector reinterpret cast operation
6523 a
= 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6528 generate poly8x8_t
:int64x1_t
, int8x8_t
:int64x1_t
, uint8x8_t
:int64x1_t
, poly8x8_t
:uint64x1_t
, int8x8_t
:uint64x1_t
, uint8x8_t
:uint64x1_t
6529 generate poly8x16_t
:int64x2_t
, int8x16_t
:int64x2_t
, uint8x16_t
:int64x2_t
, poly8x16_t
:uint64x2_t
, int8x16_t
:uint64x2_t
, uint8x16_t
:uint64x2_t
6531 generate poly8x8_t
:poly64x1_t
, int8x8_t
:poly64x1_t
, uint8x8_t
:poly64x1_t
6532 generate poly8x16_t
:poly64x2_t
, int8x16_t
:poly64x2_t
, uint8x16_t
:poly64x2_t
6533 generate int16x8_t
:p128
, uint16x8_t
:p128
, poly16x8_t
:p128
6535 /// Vector reinterpret cast operation
6539 a
= 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6545 generate int8x16_t
:p128
, uint8x16_t
:p128
, poly8x16_t
:p128
6547 /// Vector reinterpret cast operation
6552 validate
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6557 generate p128
:int8x16_t
, p128
:uint8x16_t
, p128
:poly8x16_t
6559 /// Vector reinterpret cast operation
6563 a
= 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
6564 validate
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6567 generate float64x1_t
:int8x8_t
, float64x1_t
:int16x4_t
, float64x1_t
:int32x2_t
, float64x1_t
:int64x1_t
6568 generate float64x2_t
:int8x16_t
, float64x2_t
:int16x8_t
, float64x2_t
:int32x4_t
, float64x2_t
:int64x2_t
6569 generate float64x1_t
:uint8x8_t
, float64x1_t
:uint16x4_t
, float64x1_t
:uint32x2_t
, float64x1_t
:uint64x1_t
6570 generate float64x2_t
:uint8x16_t
, float64x2_t
:uint16x8_t
, float64x2_t
:uint32x4_t
, float64x2_t
:uint64x2_t
6571 generate float64x1_t
:poly8x8_t
, float64x1_t
:poly16x4_t
, float32x2_t
:poly64x1_t
, float64x1_t
:poly64x1_t
6572 generate float64x2_t
:poly8x16_t
, float64x2_t
:poly16x8_t
, float32x4_t
:poly64x2_t
, float64x2_t
:poly64x2_t
6573 generate float64x2_t
:p128
6576 generate float32x2_t
:int8x8_t
, float32x2_t
:int16x4_t
, float32x2_t
:int32x2_t
, float32x2_t
:int64x1_t
6577 generate float32x4_t
:int8x16_t
, float32x4_t
:int16x8_t
, float32x4_t
:int32x4_t
, float32x4_t
:int64x2_t
6578 generate float32x2_t
:uint8x8_t
, float32x2_t
:uint16x4_t
, float32x2_t
:uint32x2_t
, float32x2_t
:uint64x1_t
6579 generate float32x4_t
:uint8x16_t
, float32x4_t
:uint16x8_t
, float32x4_t
:uint32x4_t
, float32x4_t
:uint64x2_t
6580 generate float32x2_t
:poly8x8_t
, float32x2_t
:poly16x4_t
6581 generate float32x4_t
:poly8x16_t
, float32x4_t
:poly16x8_t
6582 generate float32x4_t
:p128
6584 /// Vector reinterpret cast operation
6588 a
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6589 validate
0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
6592 generate int8x8_t
:float64x1_t
, int16x4_t
:float64x1_t
, int32x2_t
:float64x1_t
, int64x1_t
:float64x1_t
6593 generate int8x16_t
:float64x2_t
, int16x8_t
:float64x2_t
, int32x4_t
:float64x2_t
, int64x2_t
:float64x2_t
6594 generate poly8x8_t
:float64x1_t
, uint16x4_t
:float64x1_t
, uint32x2_t
:float64x1_t
, uint64x1_t
:float64x1_t
6595 generate poly8x16_t
:float64x2_t
, uint16x8_t
:float64x2_t
, uint32x4_t
:float64x2_t
, uint64x2_t
:float64x2_t
6596 generate uint8x8_t
:float64x1_t
, poly16x4_t
:float64x1_t
, poly64x1_t
:float64x1_t
, poly64x1_t
:float32x2_t
6597 generate uint8x16_t
:float64x2_t
, poly16x8_t
:float64x2_t
, poly64x2_t
:float64x2_t
, poly64x2_t
:float32x4_t
6598 generate p128
:float64x2_t
6601 generate int8x8_t
:float32x2_t
, int16x4_t
:float32x2_t
, int32x2_t
:float32x2_t
, int64x1_t
:float32x2_t
6602 generate int8x16_t
:float32x4_t
, int16x8_t
:float32x4_t
, int32x4_t
:float32x4_t
, int64x2_t
:float32x4_t
6603 generate uint8x8_t
:float32x2_t
, uint16x4_t
:float32x2_t
, uint32x2_t
:float32x2_t
, uint64x1_t
:float32x2_t
6604 generate uint8x16_t
:float32x4_t
, uint16x8_t
:float32x4_t
, uint32x4_t
:float32x4_t
, uint64x2_t
:float32x4_t
6605 generate poly8x8_t
:float32x2_t
, poly16x4_t
:float32x2_t
6606 generate poly8x16_t
:float32x4_t
, poly16x8_t
:float32x4_t
6607 generate p128
:float32x4_t
6609 /// Vector reinterpret cast operation
6613 a
= 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
6614 validate
0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
, 0.
6617 generate float32x2_t
:float64x1_t
, float64x1_t
:float32x2_t
6618 generate float32x4_t
:float64x2_t
, float64x2_t
:float32x4_t
6620 /// Signed rounding
shift left
6622 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6623 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6624 validate
4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6627 link
-aarch64
= srshl._EXT_
6631 link
-arm
= vrshifts._EXT_
6632 generate int
*_t
, int64x
*_t
6634 /// Unsigned rounding
shift left
6637 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6638 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6639 validate
4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6642 link
-aarch64
= urshl._EXT_
6643 generate u64
:i64
:u64
6646 link
-arm
= vrshiftu._EXT_
6647 generate uint8x8_t
:int8x8_t
:uint8x8_t
, uint8x16_t
:int8x16_t
:uint8x16_t
, uint16x4_t
:int16x4_t
:uint16x4_t
, uint16x8_t
:int16x8_t
:uint16x8_t
6648 generate uint32x2_t
:int32x2_t
:uint32x2_t
, uint32x4_t
:int32x4_t
:uint32x4_t
, uint64x1_t
:int64x1_t
:uint64x1_t
, uint64x2_t
:int64x2_t
:uint64x2_t
6650 /// Signed rounding
shift right
6654 multi_fn
= static_assert
-N
-1-bits
6655 multi_fn
= vrshl
-self
-noext
, a
, {vdup
-nself
-noext
, -N as _
}
6656 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6658 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6662 generate int
*_t
, int64x
*_t
6664 /// Signed rounding
shift right
6668 multi_fn
= static_assert
-N
-1-bits
6669 multi_fn
= vrshl
-self
-noext
, a
, -N as i64
6670 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6672 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6677 /// Unsigned rounding
shift right
6681 multi_fn
= static_assert
-N
-1-bits
6682 multi_fn
= vrshl
-self
-noext
, a
, {vdup
-nsigned
-noext
, -N as _
}
6683 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6685 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6689 generate uint
*_t
, uint64x
*_t
6691 /// Unsigned rounding
shift right
6695 multi_fn
= static_assert
-N
-1-bits
6696 multi_fn
= vrshl
-self
-noext
, a
, -N as i64
6697 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6699 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6704 /// Rounding
shift right narrow
6708 multi_fn
= static_assert
-N
-1-halfbits
6709 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6711 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6712 arm
-aarch64
-separate
6715 link
-aarch64
= rshrn._EXT2_
6719 link
-arm
= vrshiftn._EXT2_
6720 const
-arm
= -N as ttn
6721 generate int16x8_t
:int8x8_t
, int32x4_t
:int16x4_t
, int64x2_t
:int32x2_t
6723 /// Rounding
shift right narrow
6727 multi_fn
= static_assert
-N
-1-halfbits
6728 multi_fn
= transmute
, {vrshrn_n
-noqsigned
-::<N
>, transmute
(a
)}
6729 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6731 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6735 generate uint16x8_t
:uint8x8_t
, uint32x4_t
:uint16x4_t
, uint64x2_t
:uint32x2_t
6737 /// Rounding
shift right narrow
6741 multi_fn
= static_assert
-N
-1-halfbits
6742 multi_fn
= simd_shuffle
!, a
, {vrshrn_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
6743 a
= 0, 1, 8, 9, 8, 9, 10, 11
6744 b
= 32, 36, 40, 44, 48, 52, 56, 60
6746 validate
0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6749 generate int8x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int32x4_t
6750 generate uint8x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint32x4_t
6752 /// Signed rounding
shift right and accumulate
6756 multi_fn
= static_assert
-N
-1-bits
6757 multi_fn
= simd_add
, a
, {vrshr
-nself
-::<N
>, b
}
6758 a
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6759 b
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6761 validate
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6765 generate int
*_t
, int64x
*_t
6767 /// Unsigned rounding
shift right and accumulate
6771 multi_fn
= static_assert
-N
-1-bits
6772 multi_fn
= simd_add
, a
, {vrshr
-nself
-::<N
>, b
}
6773 a
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6774 b
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6776 validate
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6780 generate uint
*_t
, uint64x
*_t
6782 /// Signed rounding
shift right and accumulate.
6786 multi_fn
= static_assert
-N
-1-bits
6787 multi_fn
= vrshr
-nself
-::<N
>, b
:in_t
, b
6788 multi_fn
= a.wrapping_add
(b
)
6797 /// Unsigned rounding
shift right and accumulate.
6801 multi_fn
= static_assert
-N
-1-bits
6802 multi_fn
= vrshr
-nself
-::<N
>, b
:in_t
, b
6803 multi_fn
= a.wrapping_add
(b
)
6812 /// Rounding subtract returning high narrow
6815 a
= MAX
, MIN
, 0, 4, 5, 6, 7, 8
6816 b
= 1, 2, 3, 4, 5, 6, 7, 8
6817 validate MIN
, MIN
, 0, 0, 0, 0, 0, 0
6820 link
-aarch64
= rsubhn._EXT2_
6822 link
-arm
= vrsubhn._EXT2_
6823 generate int16x8_t
:int16x8_t
:int8x8_t
, int32x4_t
:int32x4_t
:int16x4_t
, int64x2_t
:int64x2_t
:int32x2_t
6825 /// Rounding subtract returning high narrow
6828 multi_fn
= transmute
, {vrsubhn
-noqsigned
-noext
, {transmute
, a
}, {transmute
, b
}}
6829 a
= MAX
, MIN
, 3, 4, 5, 6, 7, 8
6830 b
= 1, 2, 3, 4, 5, 6, 7, 8
6831 validate
0, 0, 0, 0, 0, 0, 0, 0
6835 generate uint16x8_t
:uint16x8_t
:uint8x8_t
, uint32x4_t
:uint32x4_t
:uint16x4_t
, uint64x2_t
:uint64x2_t
:uint32x2_t
6837 /// Rounding subtract returning high narrow
6840 multi_fn
= vrsubhn
-noqself
-noext
, x
:in_t0
, b
, c
6841 multi_fn
= simd_shuffle
!, a
, x
, {asc
-0-out_len
}
6842 a
= 1, 2, 0, 0, 0, 0, 0, 0
6843 b
= 1, 2, 3, 4, 5, 6, 7, 8
6844 c
= 1, 2, 3, 4, 5, 6, 7, 8
6845 validate
1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6848 generate int8x8_t
:int16x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int64x2_t
:int32x4_t
6849 generate uint8x8_t
:uint16x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint64x2_t
:uint32x4_t
6851 /// Insert vector element from another vector element
6854 multi_fn
= static_assert_imm
-in_exp_len
-LANE
6855 multi_fn
= simd_insert
, b
, LANE as u32
, a
6857 b
= 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6859 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6863 generate i8
:int8x8_t
:int8x8_t
, i16
:int16x4_t
:int16x4_t
6864 generate i32
:int32x2_t
:int32x2_t
, i64
:int64x1_t
:int64x1_t
6865 generate u8
:uint8x8_t
:uint8x8_t
, u16
:uint16x4_t
:uint16x4_t
6866 generate u32
:uint32x2_t
:uint32x2_t
, u64
:uint64x1_t
:uint64x1_t
6867 generate p8
:poly8x8_t
:poly8x8_t
, p16
:poly16x4_t
:poly16x4_t
6870 generate p64
:poly64x1_t
:poly64x1_t
6872 /// Insert vector element from another vector element
6876 multi_fn
= static_assert_imm
-in_exp_len
-LANE
6877 multi_fn
= simd_insert
, b
, LANE as u32
, a
6879 b
= 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6881 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6885 generate i8
:int8x16_t
:int8x16_t
, i16
:int16x8_t
:int16x8_t
6886 generate i32
:int32x4_t
:int32x4_t
, i64
:int64x2_t
:int64x2_t
6887 generate u8
:uint8x16_t
:uint8x16_t
, u16
:uint16x8_t
:uint16x8_t
6888 generate u32
:uint32x4_t
:uint32x4_t
, u64
:uint64x2_t
:uint64x2_t
6889 generate p8
:poly8x16_t
:poly8x16_t
, p16
:poly16x8_t
:poly16x8_t
6892 generate p64
:poly64x2_t
:poly64x2_t
6894 /// Insert vector element from another vector element
6897 multi_fn
= static_assert_imm
-in_exp_len
-LANE
6898 multi_fn
= simd_insert
, b
, LANE as u32
, a
6902 validate
1.
, 2.
, 3.
, 4.
6905 generate f64
:float64x1_t
:float64x1_t
6908 generate f32
:float32x2_t
:float32x2_t
6910 /// Insert vector element from another vector element
6914 multi_fn
= static_assert_imm
-in_exp_len
-LANE
6915 multi_fn
= simd_insert
, b
, LANE as u32
, a
6919 validate
1.
, 2.
, 3.
, 4.
6922 generate f64
:float64x2_t
:float64x2_t
6925 generate f32
:float32x4_t
:float32x4_t
6927 /// Signed Shift left
6929 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6930 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6931 validate
4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6934 link
-aarch64
= sshl._EXT_
6936 link
-arm
= vshifts._EXT_
6937 generate int
*_t
, int64x
*_t
6939 /// Signed Shift left
6941 multi_fn
= transmute
, {vshl
-in_ntt
-noext
, transmute
(a
), transmute
(b
)}
6949 /// Unsigned Shift left
6952 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6953 b
= 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6954 validate
4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6957 link
-aarch64
= ushl._EXT_
6959 link
-arm
= vshiftu._EXT_
6960 generate uint8x8_t
:int8x8_t
:uint8x8_t
, uint8x16_t
:int8x16_t
:uint8x16_t
, uint16x4_t
:int16x4_t
:uint16x4_t
, uint16x8_t
:int16x8_t
:uint16x8_t
6961 generate uint32x2_t
:int32x2_t
:uint32x2_t
, uint32x4_t
:int32x4_t
:uint32x4_t
, uint64x1_t
:int64x1_t
:uint64x1_t
, uint64x2_t
:int64x2_t
:uint64x2_t
6963 /// Unsigned Shift left
6966 multi_fn
= transmute
, {vshl
-out_ntt
-noext
, transmute
(a
), transmute
(b
)}
6972 generate u64
:i64
:u64
6978 multi_fn
= static_assert_imm
-out_bits_exp_len
-N
6979 multi_fn
= simd_shl
, a
, {vdup
-nself
-noext
, N as _
}
6980 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6982 validate
4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6986 generate int
*_t
, uint
*_t
, int64x
*_t
, uint64x
*_t
6988 /// Signed
shift left long
6992 multi_fn
= static_assert
-N
-0-bits
6993 multi_fn
= simd_shl
, {simd_cast
, a
}, {vdup
-nout
-noext
, N as _
}
6994 a
= 1, 2, 3, 4, 5, 6, 7, 8
6996 validate
4, 8, 12, 16, 20, 24, 28, 32
7000 generate int8x8_t
:int16x8_t
, int16x4_t
:int32x4_t
, int32x2_t
:int64x2_t
7002 generate uint8x8_t
:uint16x8_t
, uint16x4_t
:uint32x4_t
, uint32x2_t
:uint64x2_t
7004 /// Signed
shift left long
7008 multi_fn
= static_assert
-N
-0-bits
7009 multi_fn
= simd_shuffle
!, b
:half
, a
, a
, {asc
-halflen
-halflen
}
7010 multi_fn
= vshll_n
-noqself
-::<N
>, b
7011 a
= 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8
7013 validate
4, 8, 12, 16, 20, 24, 28, 32
7016 generate int8x16_t
:int16x8_t
, int16x8_t
:int32x4_t
, int32x4_t
:int64x2_t
7018 generate uint8x16_t
:uint16x8_t
, uint16x8_t
:uint32x4_t
, uint32x4_t
:uint64x2_t
7024 multi_fn
= static_assert
-N
-1-bits
7025 multi_fn
= fix_right_shift_imm
-N
-bits
7026 multi_fn
= simd_shr
, a
, {vdup
-nself
-noext
, n as _
}
7027 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7029 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7033 generate int
*_t
, int64x
*_t
7035 generate uint
*_t
, uint64x
*_t
7037 /// Shift right narrow
7041 multi_fn
= static_assert
-N
-1-halfbits
7042 multi_fn
= simd_cast
, {simd_shr
, a
, {vdup
-nself
-noext
, N as _
}}
7043 a
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7045 validate
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7049 generate int16x8_t
:int8x8_t
, int32x4_t
:int16x4_t
, int64x2_t
:int32x2_t
7050 generate uint16x8_t
:uint8x8_t
, uint32x4_t
:uint16x4_t
, uint64x2_t
:uint32x2_t
7052 /// Shift right narrow
7056 multi_fn
= static_assert
-N
-1-halfbits
7057 multi_fn
= simd_shuffle
!, a
, {vshrn_n
-noqself
-::<N
>, b
}, {asc
-0-out_len
}
7058 a
= 1, 2, 5, 6, 5, 6, 7, 8
7059 b
= 20, 24, 28, 32, 52, 56, 60, 64
7061 validate
1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16
7064 generate int8x8_t
:int16x8_t
:int8x16_t
, int16x4_t
:int32x4_t
:int16x8_t
, int32x2_t
:int64x2_t
:int32x4_t
7065 generate uint8x8_t
:uint16x8_t
:uint8x16_t
, uint16x4_t
:uint32x4_t
:uint16x8_t
, uint32x2_t
:uint64x2_t
:uint32x4_t
7067 /// Signed
shift right and accumulate
7071 multi_fn
= static_assert
-N
-1-bits
7072 multi_fn
= simd_add
, a
, {vshr
-nself
-::<N
>, b
}
7073 a
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7074 b
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7076 validate
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
7080 generate int
*_t
, int64x
*_t
7082 /// Unsigned
shift right and accumulate
7086 multi_fn
= static_assert
-N
-1-bits
7087 multi_fn
= simd_add
, a
, {vshr
-nself
-::<N
>, b
}
7088 a
= 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7089 b
= 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7091 validate
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
7095 generate uint
*_t
, uint64x
*_t
7102 validate
2147549312, 3221323968, 131329, 2684362752
7106 link
-aarch64
= llvm.aarch64.crypto.sm3partw1
7114 validate
128, 256, 384, 1077977696
7118 link
-aarch64
= llvm.aarch64.crypto.sm3partw2
7126 validate
0, 0, 0, 2098176
7130 link
-aarch64
= llvm.aarch64.crypto.sm3ss1
7137 validate
1784948604, 136020997, 2940231695, 3789947679
7141 link
-aarch64
= llvm.aarch64.crypto.sm4ekey
7148 validate
1093874472, 3616769504, 3878330411, 2765298765
7152 link
-aarch64
= llvm.aarch64.crypto.sm4e
7155 /// Rotate and exclusive OR
7163 link
-aarch64
= llvm.aarch64.crypto.rax1
7166 /// SHA512
hash update part
1
7171 validate
11189044327219203, 7177611956453380
7175 link
-aarch64
= llvm.aarch64.crypto.sha512h
7178 /// SHA512
hash update part
2
7183 validate
5770237651009406214, 349133864969
7187 link
-aarch64
= llvm.aarch64.crypto.sha512h2
7190 /// SHA512 schedule update
0
7194 validate
144115188075855874, 9439544818968559619
7198 link
-aarch64
= llvm.aarch64.crypto.sha512su0
7201 /// SHA512 schedule update
1
7206 validate
105553116266526, 140737488355368
7210 link
-aarch64
= llvm.aarch64.crypto.sha512su1
7213 /// Floating
-point round to
32-bit integer
, using current rounding mode
7217 // For validation
, the rounding mode should be the default
: round
-to
-nearest
(ties
-to
-even
).
7218 a
= -1.5, 2.9, 1.5, -2.5
7219 validate
-2.0, 3.0, 2.0, -2.0
7222 link
-aarch64
= frint32x._EXT_
7223 generate float32x2_t
, float32x4_t
7225 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7226 // (and so has no intrinsic
-test), so perform extra validation to
make sure
7227 // that it matches the float64x2_t form.
7231 // - The biggest f64 that rounds to i32
::MAX.
7232 // - The smallest positive f64 that rounds out of range.
7233 a
= 2147483647.499999762, 2147483647.5
7234 validate
2147483647.0, -2147483648.0
7235 // - The smallest f64 that rounds to i32
::MIN
+ 1.
7236 // - The largest negative f64 that rounds out of range.
7237 a
= -2147483647.499999762, -2147483648.500000477
7238 validate
-2147483647.0, -2147483648.0
7239 generate float64x2_t
7241 // Odd
-numbered tests
for float64x1_t coverage.
7247 validate
-2147483648.0
7248 a
= -2147483648.500000477
7249 validate
-2147483648.0
7251 multi_fn
= transmute
, {self
-out
-_
, {simd_extract
, a
, 0}}
7252 link
-aarch64
= llvm.aarch64.frint32x.f64
:f64
:::f64
7253 generate float64x1_t
7255 /// Floating
-point round to
32-bit integer toward zero
7259 a
= -1.5, 2.9, 1.5, -2.5
7260 validate
-1.0, 2.0, 1.0, -2.0
7263 link
-aarch64
= frint32z._EXT_
7264 generate float32x2_t
, float32x4_t
7266 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7267 // (and so has no intrinsic
-test), so perform extra validation to
make sure
7268 // that it matches the float64x2_t form.
7272 // - The biggest f64 that rounds to i32
::MAX.
7273 // - The smallest positive f64 that rounds out of range.
7274 a
= 2147483647.999999762, 2147483648.0
7275 validate
2147483647.0, -2147483648.0
7276 // - The smallest f64 that rounds to i32
::MIN
+ 1.
7277 // - The largest negative f64 that rounds out of range.
7278 a
= -2147483647.999999762, -2147483649.0
7279 validate
-2147483647.0, -2147483648.0
7280 generate float64x2_t
7282 // Odd
-numbered tests
for float64x1_t coverage.
7288 validate
-2147483648.0
7290 validate
-2147483648.0
7292 multi_fn
= transmute
, {self
-out
-_
, {simd_extract
, a
, 0}}
7293 link
-aarch64
= llvm.aarch64.frint32z.f64
:f64
:::f64
7294 generate float64x1_t
7296 /// Floating
-point round to
64-bit integer
, using current rounding mode
7300 // For validation
, the rounding mode should be the default
: round
-to
-nearest
(ties
-to
-even
).
7301 a
= -1.5, 2.9, 1.5, -2.5
7302 validate
-2.0, 3.0, 2.0, -2.0
7305 link
-aarch64
= frint64x._EXT_
7306 generate float32x2_t
, float32x4_t
7308 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7309 // (and so has no intrinsic
-test), so perform extra validation to
make sure
7310 // that it matches the float64x2_t form.
7314 // - The biggest f64 representable as an i64
(0x7ffffffffffffc00).
7315 // - The smallest positive f64 that is out of range
(2^
63).
7316 a
= 9223372036854774784.0, 9223372036854775808.0
7317 validate
9223372036854774784.0, -9223372036854775808.0
7318 // - The smallest f64 representable as an i64
(i64
::MIN
).
7319 // - The biggest negative f64 that is out of range.
7320 a
= -9223372036854775808.0, -9223372036854777856.0
7321 validate
-9223372036854775808.0, -9223372036854775808.0
7322 generate float64x2_t
7324 // Odd
-numbered tests
for float64x1_t coverage.
7329 a
= 9223372036854775808.0
7330 validate
-9223372036854775808.0
7331 a
= -9223372036854777856.0
7332 validate
-9223372036854775808.0
7334 multi_fn
= transmute
, {self
-out
-_
, {simd_extract
, a
, 0}}
7335 link
-aarch64
= llvm.aarch64.frint64x.f64
:f64
:::f64
7336 generate float64x1_t
7338 /// Floating
-point round to
64-bit integer toward zero
7342 a
= -1.5, 2.9, 1.5, -2.5
7343 validate
-1.0, 2.0, 1.0, -2.0
7346 link
-aarch64
= frint64z._EXT_
7347 generate float32x2_t
, float32x4_t
7349 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7350 // (and so has no intrinsic
-test), so perform extra validation to
make sure
7351 // that it matches the float64x2_t form.
7355 // - The biggest f64 representable as an i64
(0x7ffffffffffffc00).
7356 // - The smallest positive f64 that is out of range
(2^
63).
7357 a
= 9223372036854774784.0, 9223372036854775808.0
7358 validate
9223372036854774784.0, -9223372036854775808.0
7359 // - The smallest f64 representable as an i64
(i64
::MIN
).
7360 // - The biggest negative f64 that is out of range.
7361 a
= -9223372036854775808.0, -9223372036854777856.0
7362 validate
-9223372036854775808.0, -9223372036854775808.0
7363 generate float64x2_t
7365 // Odd
-numbered tests
for float64x1_t coverage.
7370 a
= 9223372036854775808.0
7371 validate
-9223372036854775808.0
7372 a
= -9223372036854777856.0
7373 validate
-9223372036854775808.0
7375 multi_fn
= transmute
, {self
-out
-_
, {simd_extract
, a
, 0}}
7376 link
-aarch64
= llvm.aarch64.frint64z.f64
:f64
:::f64
7377 generate float64x1_t
7379 /// Transpose elements
7381 multi_fn
= simd_shuffle
!, a1
:in_t
, a
, b
, {transpose
-1-in_len
}
7382 multi_fn
= simd_shuffle
!, b1
:in_t
, a
, b
, {transpose
-2-in_len
}
7383 multi_fn
= transmute
, (a1
, b1
)
7384 a
= 0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30
7385 b
= 1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31
7386 validate
0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7390 generate int8x8_t
:int8x8_t
:int8x8x2_t
, int16x4_t
:int16x4_t
:int16x4x2_t
, int8x16_t
:int8x16_t
:int8x16x2_t
, int16x8_t
:int16x8_t
:int16x8x2_t
, int32x4_t
:int32x4_t
:int32x4x2_t
7391 generate uint8x8_t
:uint8x8_t
:uint8x8x2_t
, uint16x4_t
:uint16x4_t
:uint16x4x2_t
, uint8x16_t
:uint8x16_t
:uint8x16x2_t
, uint16x8_t
:uint16x8_t
:uint16x8x2_t
, uint32x4_t
:uint32x4_t
:uint32x4x2_t
7392 generate poly8x8_t
:poly8x8_t
:poly8x8x2_t
, poly16x4_t
:poly16x4_t
:poly16x4x2_t
, poly8x16_t
:poly8x16_t
:poly8x16x2_t
, poly16x8_t
:poly16x8_t
:poly16x8x2_t
7394 generate int32x2_t
:int32x2_t
:int32x2x2_t
, uint32x2_t
:uint32x2_t
:uint32x2x2_t
7396 /// Transpose elements
7398 multi_fn
= simd_shuffle
!, a1
:in_t
, a
, b
, {transpose
-1-in_len
}
7399 multi_fn
= simd_shuffle
!, b1
:in_t
, a
, b
, {transpose
-2-in_len
}
7400 multi_fn
= transmute
, (a1
, b1
)
7403 validate
0.
, 1.
, 2.
, 3.
, 2.
, 3.
, 6.
, 7.
7407 generate float32x2_t
:float32x2_t
:float32x2x2_t
7409 generate float32x4_t
:float32x4_t
:float32x4x2_t
7411 /// Transpose vectors
7413 multi_fn
= simd_shuffle
!, a
, b
, {transpose
-1-in_len
}
7414 a
= 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7415 b
= 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7416 validate
0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
7419 generate int8x8_t
, int8x16_t
, int16x4_t
, int16x8_t
, int32x4_t
, uint8x8_t
, uint8x16_t
, uint16x4_t
, uint16x8_t
, uint32x4_t
, poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
7422 generate int32x2_t
, int64x2_t
, uint32x2_t
, uint64x2_t
, poly64x2_t
7424 /// Transpose vectors
7426 multi_fn
= simd_shuffle
!, a
, b
, {transpose
-1-in_len
}
7427 a
= 0.
, 2.
, 4.
, 6.
, 8.
, 10.
, 12.
, 14.
7428 b
= 1.
, 3.
, 5.
, 7.
, 9.
, 11.
, 13.
, 15.
7429 validate
0.
, 1.
, 4.
, 5.
, 8.
, 9.
, 12.
, 13.
7432 generate float32x4_t
7435 generate float32x2_t
, float64x2_t
7437 /// Transpose vectors
7439 multi_fn
= simd_shuffle
!, a
, b
, {transpose
-2-in_len
}
7440 a
= 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7441 b
= 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7442 validate
2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7445 generate int8x8_t
, int8x16_t
, int16x4_t
, int16x8_t
, int32x4_t
, uint8x8_t
, uint8x16_t
, uint16x4_t
, uint16x8_t
, uint32x4_t
, poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
7448 generate int32x2_t
, int64x2_t
, uint32x2_t
, uint64x2_t
, poly64x2_t
7450 /// Transpose vectors
7452 multi_fn
= simd_shuffle
!, a
, b
, {transpose
-2-in_len
}
7453 a
= 0.
, 2.
, 4.
, 6.
, 8.
, 10.
, 12.
, 14.
7454 b
= 1.
, 3.
, 5.
, 7.
, 9.
, 11.
, 13.
, 15.
7455 validate
2.
, 3.
, 6.
, 7.
, 10.
, 11.
, 14.
, 15.
7458 generate float32x4_t
7461 generate float32x2_t
, float64x2_t
7465 multi_fn
= simd_shuffle
!, a0
:in_t
, a
, b
, {zip
-1-in_len
}
7466 multi_fn
= simd_shuffle
!, b0
:in_t
, a
, b
, {zip
-2-in_len
}
7467 multi_fn
= transmute
, (a0
, b0
)
7468 a
= 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7469 b
= 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7470 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7474 generate int8x8_t
:int8x8_t
:int8x8x2_t
, int16x4_t
:int16x4_t
:int16x4x2_t
7475 generate uint8x8_t
:uint8x8_t
:uint8x8x2_t
, uint16x4_t
:uint16x4_t
:uint16x4x2_t
7476 generate poly8x8_t
:poly8x8_t
:poly8x8x2_t
, poly16x4_t
:poly16x4_t
:poly16x4x2_t
7478 generate int32x2_t
:int32x2_t
:int32x2x2_t
, uint32x2_t
:uint32x2_t
:uint32x2x2_t
7481 generate int8x16_t
:int8x16_t
:int8x16x2_t
, int16x8_t
:int16x8_t
:int16x8x2_t
, int32x4_t
:int32x4_t
:int32x4x2_t
7482 generate uint8x16_t
:uint8x16_t
:uint8x16x2_t
, uint16x8_t
:uint16x8_t
:uint16x8x2_t
, uint32x4_t
:uint32x4_t
:uint32x4x2_t
7483 generate poly8x16_t
:poly8x16_t
:poly8x16x2_t
, poly16x8_t
:poly16x8_t
:poly16x8x2_t
7487 multi_fn
= simd_shuffle
!, a0
:in_t
, a
, b
, {zip
-1-in_len
}
7488 multi_fn
= simd_shuffle
!, b0
:in_t
, a
, b
, {zip
-2-in_len
}
7489 multi_fn
= transmute
, (a0
, b0
)
7492 validate
1.
, 5.
, 2.
, 6.
, 3.
, 7.
, 4.
, 8.
7496 generate float32x2_t
:float32x2_t
:float32x2x2_t
7499 generate float32x4_t
:float32x4_t
:float32x4x2_t
7503 multi_fn
= simd_shuffle
!, a
, b
, {zip
-1-in_len
}
7504 a
= 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7505 b
= 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7506 validate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
7509 generate int
*_t
, int64x2_t
, uint
*_t
, uint64x2_t
, poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
, poly64x2_t
7513 multi_fn
= simd_shuffle
!, a
, b
, {zip
-1-in_len
}
7514 a
= 0.
, 2.
, 4.
, 6.
, 8.
, 10.
, 12.
, 14.
7515 b
= 1.
, 3.
, 5.
, 7.
, 9.
, 11.
, 13.
, 15.
7516 validate
0.
, 1.
, 2.
, 3.
, 4.
, 5.
, 6.
, 7.
7519 generate float32x2_t
, float32x4_t
, float64x2_t
7523 multi_fn
= simd_shuffle
!, a
, b
, {zip
-2-in_len
}
7524 a
= 0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30
7525 b
= 1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31
7526 validate
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7529 generate int
*_t
, int64x2_t
, uint
*_t
, uint64x2_t
, poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
, poly64x2_t
7533 multi_fn
= simd_shuffle
!, a
, b
, {zip
-2-in_len
}
7534 a
= 0.
, 8.
, 8.
, 10.
, 8.
, 10.
, 12.
, 14.
7535 b
= 1.
, 9.
, 9.
, 11.
, 9.
, 11.
, 13.
, 15.
7536 validate
8.
, 9.
, 10.
, 11.
, 12.
, 13.
, 14.
, 15.
7539 generate float32x2_t
, float32x4_t
, float64x2_t
7543 multi_fn
= simd_shuffle
!, a0
:in_t
, a
, b
, {unzip
-1-in_len
}
7544 multi_fn
= simd_shuffle
!, b0
:in_t
, a
, b
, {unzip
-2-in_len
}
7545 multi_fn
= transmute
, (a0
, b0
)
7546 a
= 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16
7547 b
= 2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32
7548 validate
1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32
7552 generate int8x8_t
:int8x8_t
:int8x8x2_t
, int16x4_t
:int16x4_t
:int16x4x2_t
, int8x16_t
:int8x16_t
:int8x16x2_t
, int16x8_t
:int16x8_t
:int16x8x2_t
, int32x4_t
:int32x4_t
:int32x4x2_t
7553 generate uint8x8_t
:uint8x8_t
:uint8x8x2_t
, uint16x4_t
:uint16x4_t
:uint16x4x2_t
, uint8x16_t
:uint8x16_t
:uint8x16x2_t
, uint16x8_t
:uint16x8_t
:uint16x8x2_t
, uint32x4_t
:uint32x4_t
:uint32x4x2_t
7554 generate poly8x8_t
:poly8x8_t
:poly8x8x2_t
, poly16x4_t
:poly16x4_t
:poly16x4x2_t
, poly8x16_t
:poly8x16_t
:poly8x16x2_t
, poly16x8_t
:poly16x8_t
:poly16x8x2_t
7557 generate int32x2_t
:int32x2_t
:int32x2x2_t
, uint32x2_t
:uint32x2_t
:uint32x2x2_t
7561 multi_fn
= simd_shuffle
!, a0
:in_t
, a
, b
, {unzip
-1-in_len
}
7562 multi_fn
= simd_shuffle
!, b0
:in_t
, a
, b
, {unzip
-2-in_len
}
7563 multi_fn
= transmute
, (a0
, b0
)
7566 validate
1.
, 2.
, 2.
, 6.
, 2.
, 4.
, 6.
, 8.
7570 generate float32x2_t
:float32x2_t
:float32x2x2_t
7573 generate float32x4_t
:float32x4_t
:float32x4x2_t
7577 multi_fn
= simd_shuffle
!, a
, b
, {unzip
-1-in_len
}
7578 a
= 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0
7579 b
= 2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0
7580 validate
1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16
7583 generate int8x8_t
, int8x16_t
, int16x4_t
, int16x8_t
, int32x4_t
, uint8x8_t
, uint8x16_t
, uint16x4_t
, uint16x8_t
, uint32x4_t
, poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
7586 generate int32x2_t
, int64x2_t
, uint32x2_t
, uint64x2_t
, poly64x2_t
7590 multi_fn
= simd_shuffle
!, a
, b
, {unzip
-1-in_len
}
7591 a
= 0.
, 8.
, 1.
, 9.
, 4.
, 12.
, 5.
, 13.
7592 b
= 1.
, 10.
, 3.
, 11.
, 6.
, 14.
, 7.
, 15.
7593 validate
0.
, 1.
, 1.
, 3.
, 4.
, 5.
, 6.
, 7.
7596 generate float32x4_t
7599 generate float32x2_t
, float64x2_t
7603 multi_fn
= simd_shuffle
!, a
, b
, {unzip
-2-in_len
}
7604 a
= 0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24
7605 b
= 0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32
7606 validate
17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32
7609 generate int8x8_t
, int8x16_t
, int16x4_t
, int16x8_t
, int32x4_t
, uint8x8_t
, uint8x16_t
, uint16x4_t
, uint16x8_t
, uint32x4_t
, poly8x8_t
, poly8x16_t
, poly16x4_t
, poly16x8_t
7612 generate int32x2_t
, int64x2_t
, uint32x2_t
, uint64x2_t
, poly64x2_t
7616 multi_fn
= simd_shuffle
!, a
, b
, {unzip
-2-in_len
}
7617 a
= 0.
, 8.
, 1.
, 9.
, 4.
, 12.
, 5.
, 13.
7618 b
= 2.
, 9.
, 3.
, 11.
, 6.
, 14.
, 7.
, 15.
7619 validate
8.
, 9.
, 9.
, 11.
, 12.
, 13.
, 14.
, 15.
7622 generate float32x4_t
7625 generate float32x2_t
, float64x2_t
7627 ////////////////////
7628 // Unsigned Absolute difference and Accumulate Long
7629 ////////////////////
7631 /// Unsigned Absolute difference and Accumulate Long
7633 multi_fn
= vabd
-unsigned
-noext
, b
, c
, d
:in_t
7634 multi_fn
= simd_add
, a
, {simd_cast
, d
}
7635 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7636 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7637 c
= 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7638 validate
10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7642 generate uint16x8_t
:uint8x8_t
:uint8x8_t
:uint16x8_t
, uint32x4_t
:uint16x4_t
:uint16x4_t
:uint32x4_t
, uint64x2_t
:uint32x2_t
:uint32x2_t
:uint64x2_t
7644 /// Unsigned Absolute difference and Accumulate Long
7647 multi_fn
= simd_shuffle
!, d
:uint8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
7648 multi_fn
= simd_shuffle
!, e
:uint8x8_t
, c
, c
, [8, 9, 10, 11, 12, 13, 14, 15]
7649 multi_fn
= vabd_u8
, d
, e
, f
:uint8x8_t
7650 multi_fn
= simd_add
, a
, {simd_cast
, f
}
7651 a
= 9, 10, 11, 12, 13, 14, 15, 16
7652 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7653 c
= 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7654 validate
20, 20, 20, 20, 20, 20, 20, 20
7657 generate uint16x8_t
:uint8x16_t
:uint8x16_t
:uint16x8_t
7659 /// Unsigned Absolute difference and Accumulate Long
7662 multi_fn
= simd_shuffle
!, d
:uint16x4_t
, b
, b
, [4, 5, 6, 7]
7663 multi_fn
= simd_shuffle
!, e
:uint16x4_t
, c
, c
, [4, 5, 6, 7]
7664 multi_fn
= vabd_u16
, d
, e
, f
:uint16x4_t
7665 multi_fn
= simd_add
, a
, {simd_cast
, f
}
7667 b
= 1, 2, 3, 4, 9, 10, 11, 12
7668 c
= 10, 10, 10, 10, 20, 0, 2, 4
7669 validate
20, 20, 20, 20
7672 generate uint32x4_t
:uint16x8_t
:uint16x8_t
:uint32x4_t
7674 /// Unsigned Absolute difference and Accumulate Long
7677 multi_fn
= simd_shuffle
!, d
:uint32x2_t
, b
, b
, [2, 3]
7678 multi_fn
= simd_shuffle
!, e
:uint32x2_t
, c
, c
, [2, 3]
7679 multi_fn
= vabd_u32
, d
, e
, f
:uint32x2_t
7680 multi_fn
= simd_add
, a
, {simd_cast
, f
}
7687 generate uint64x2_t
:uint32x4_t
:uint32x4_t
:uint64x2_t
7689 ////////////////////
7690 // Signed Absolute difference and Accumulate Long
7691 ////////////////////
7693 /// Signed Absolute difference and Accumulate Long
7695 multi_fn
= vabd
-signed
-noext
, b
, c
, d
:int8x8_t
7696 multi_fn
= simd_cast
, e
:uint8x8_t
, d
7697 multi_fn
= simd_add
, a
, {simd_cast
, e
}
7698 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7699 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7700 c
= 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7701 validate
10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7705 generate int16x8_t
:int8x8_t
:int8x8_t
:int16x8_t
7707 /// Signed Absolute difference and Accumulate Long
7709 multi_fn
= vabd
-signed
-noext
, b
, c
, d
:int16x4_t
7710 multi_fn
= simd_cast
, e
:uint16x4_t
, d
7711 multi_fn
= simd_add
, a
, {simd_cast
, e
}
7712 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7713 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7714 c
= 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7715 validate
10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7719 generate int32x4_t
:int16x4_t
:int16x4_t
:int32x4_t
7721 /// Signed Absolute difference and Accumulate Long
7723 multi_fn
= vabd
-signed
-noext
, b
, c
, d
:int32x2_t
7724 multi_fn
= simd_cast
, e
:uint32x2_t
, d
7725 multi_fn
= simd_add
, a
, {simd_cast
, e
}
7726 a
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7727 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7728 c
= 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7729 validate
10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7733 generate int64x2_t
:int32x2_t
:int32x2_t
:int64x2_t
7735 /// Signed Absolute difference and Accumulate Long
7738 multi_fn
= simd_shuffle
!, d
:int8x8_t
, b
, b
, [8, 9, 10, 11, 12, 13, 14, 15]
7739 multi_fn
= simd_shuffle
!, e
:int8x8_t
, c
, c
, [8, 9, 10, 11, 12, 13, 14, 15]
7740 multi_fn
= vabd_s8
, d
, e
, f
:int8x8_t
7741 multi_fn
= simd_cast
, f
:uint8x8_t
, f
7742 multi_fn
= simd_add
, a
, {simd_cast
, f
}
7743 a
= 9, 10, 11, 12, 13, 14, 15, 16
7744 b
= 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7745 c
= 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7746 validate
20, 20, 20, 20, 20, 20, 20, 20
7749 generate int16x8_t
:int8x16_t
:int8x16_t
:int16x8_t
7751 /// Signed Absolute difference and Accumulate Long
7754 multi_fn
= simd_shuffle
!, d
:int16x4_t
, b
, b
, [4, 5, 6, 7]
7755 multi_fn
= simd_shuffle
!, e
:int16x4_t
, c
, c
, [4, 5, 6, 7]
7756 multi_fn
= vabd_s16
, d
, e
, f
:int16x4_t
7757 multi_fn
= simd_cast
, f
:uint16x4_t
, f
7758 multi_fn
= simd_add
, a
, {simd_cast
, f
}
7760 b
= 1, 2, 3, 4, 9, 10, 11, 12
7761 c
= 10, 10, 10, 10, 20, 0, 2, 4
7762 validate
20, 20, 20, 20
7765 generate int32x4_t
:int16x8_t
:int16x8_t
:int32x4_t
7767 /// Signed Absolute difference and Accumulate Long
7770 multi_fn
= simd_shuffle
!, d
:int32x2_t
, b
, b
, [2, 3]
7771 multi_fn
= simd_shuffle
!, e
:int32x2_t
, c
, c
, [2, 3]
7772 multi_fn
= vabd_s32
, d
, e
, f
:int32x2_t
7773 multi_fn
= simd_cast
, f
:uint32x2_t
, f
7774 multi_fn
= simd_add
, a
, {simd_cast
, f
}
7781 generate int64x2_t
:int32x4_t
:int32x4_t
:int64x2_t
7783 ////////////////////
7784 // Signed saturating Absolute value
7785 ////////////////////
7787 /// Signed saturating Absolute value
7789 a
= MIN
, MAX
, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5
7790 validate MAX
, MAX
, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5
7794 link
-arm
= vqabs._EXT_
7795 link
-aarch64
= sqabs._EXT_
7798 /// Signed saturating Absolute value
7804 link
-aarch64
= sqabs._EXT_
7807 /// Signed saturating absolute value
7809 multi_fn
= simd_extract
, {vqabs
-in_ntt
-noext
, {vdup_n
-in_ntt
-noext
, a
}}, 0
7814 generate i8
:i8
, i16
:i16
7816 /// Signed saturating absolute value
7822 link
-aarch64
= sqabs._EXT_
7823 generate i32
:i32
, i64
:i64
7825 /// Shift left and insert
7829 multi_fn
= static_assert
-N
-0-63
7830 multi_fn
= transmute
, {vsli_n
-in_ntt
-::<N
>, transmute
(a
), transmute
(b
)}
7839 /// Shift right and insert
7843 multi_fn
= static_assert
-N
-1-bits
7844 multi_fn
= transmute
, {vsri_n
-in_ntt
-::<N
>, transmute
(a
), transmute
(b
)}