]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/stdarch-gen/neon.spec
New upstream version 1.78.0+dfsg1
[rustc.git] / library / stdarch / crates / stdarch-gen / neon.spec
CommitLineData
ba9703b0 1// ARM Neon intrinsic specification.
fc512014
XL
2//
3// This file contains the specification for a number of
ba9703b0
XL
4// intrinsics that allows us to generate them along with
5// their test cases.
6//
7// To the syntax of the file - it's not very intelligently parsed!
8//
9// # Comments
10// start with AT LEAST two, or four or more slashes so // is a
11// comment /////// is too.
12//
13// # Sections
14// Sections start with EXACTLY three slashes followed
15// by AT LEAST one space. Sections are used for two things:
16//
353b0b11 17// 1) they serve as the doc comment for the given intrinsics.
ba9703b0
XL
18// 2) they reset all variables (name, fn, etc.)
19//
20// # Variables
21//
22// name - The prefix of the function, suffixes are auto
23// generated by the type they get passed.
24//
25// fn - The function to call in rust-land.
26//
27// aarch64 - The intrinsic to check on aarch64 architecture.
28// If this is given but no arm intrinsic is provided,
29// the function will exclusively be generated for
30// aarch64.
31// This is used to generate both aarch64 specific and
353b0b11 32// shared intrinsics by first only specifying th aarch64
ba9703b0 33// variant then the arm variant.
fc512014 34//
353b0b11 35// arm - The arm v7 intrinsics used to checked for arm code
ba9703b0 36// generation. All neon functions available in arm are
353b0b11 37// also available in aarch64. If no aarch64 intrinsic was
ba9703b0 38// set they are assumed to be the same.
353b0b11 39// Intrinsics ending with a `.` will have a size suffixes
ba9703b0 40// added (such as `i8` or `i64`) that is not sign specific
353b0b11 41// Intrinsics ending with a `.s` will have a size suffixes
ba9703b0
XL
42// added (such as `s8` or `u64`) that is sign specific
43//
44// a - First input for tests, it gets scaled to the size of
45// the type.
46//
47// b - Second input for tests, it gets scaled to the size of
48// the type.
49//
50// # special values
51//
52// TRUE - 'true' all bits are set to 1
53// FALSE - 'false' all bits are set to 0
54// FF - same as 'true'
55// MIN - minimal value (either 0 or the lowest negative number)
17df50a5 56// MAX - maximal value proper to overflow
ba9703b0
XL
57//
58// # validate <values>
a2a8927a 59// Validates a and b against the expected result of the test.
ba9703b0 60// The special values 'TRUE' and 'FALSE' can be used to
17df50a5 61// represent the correct NEON representation of true or
ba9703b0 62// false values. It too gets scaled to the type.
fc512014 63//
ba9703b0
XL
64// Validate needs to be called before generate as it sets
65// up the rules for validation that get generated for each
66// type.
67// # generate <types>
68// The generate command generates the intrinsics, it uses the
69// Variables set and can be called multiple times while overwriting
70// some of the variables.
71
72/// Vector bitwise and
73name = vand
74fn = simd_and
75arm = vand
76aarch64 = and
77a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
78b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F
79validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
80b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
81validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
82generate int*_t, uint*_t, int64x*_t, uint64x*_t
83
84/// Vector bitwise or (immediate, inclusive)
85name = vorr
86fn = simd_or
87arm = vorr
88aarch64 = orr
89a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
90b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
91validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
92generate int*_t, uint*_t, int64x*_t, uint64x*_t
93
94
95/// Vector bitwise exclusive or (vector)
96name = veor
97fn = simd_xor
98arm = veor
99aarch64 = eor
100a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
101b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
102validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
103generate int*_t, uint*_t, int64x*_t, uint64x*_t
104
3c0e092e
XL
105/// Three-way exclusive OR
106name = veor3
107a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
108b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
109c = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
110validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
111target = sha3
112
113aarch64 = eor3
114link-aarch64 = llvm.aarch64.crypto.eor3s._EXT_
115generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
116link-aarch64 = llvm.aarch64.crypto.eor3u._EXT_
117generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
118
17df50a5
XL
119////////////////////
120// Absolute difference between the arguments
121////////////////////
122
123/// Absolute difference between the arguments
124name = vabd
125a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
126b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
127validate 15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15
128
129arm = vabd.s
130aarch64 = sabd
131link-arm = vabds._EXT_
132link-aarch64 = sabd._EXT_
133generate int*_t
134
135arm = vabd.s
136aarch64 = uabd
137link-arm = vabdu._EXT_
138link-aarch64 = uabd._EXT_
139generate uint*_t
140
141/// Absolute difference between the arguments of Floating
142name = vabd
143a = 1.0, 2.0, 5.0, -4.0
144b = 9.0, 3.0, 2.0, 8.0
145validate 8.0, 1.0, 3.0, 12.0
146
147aarch64 = fabd
148link-aarch64 = fabd._EXT_
149generate float64x*_t
150
151arm = vabd.s
152aarch64 = fabd
153link-arm = vabds._EXT_
154link-aarch64 = fabd._EXT_
155generate float*_t
156
3c0e092e
XL
157/// Floating-point absolute difference
158name = vabd
c620b35d 159multi_fn = simd_extract!, {vabd-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
160a = 1.0
161b = 9.0
162validate 8.0
163
164aarch64 = fabd
165generate f32, f64
166
17df50a5
XL
167////////////////////
168// Absolute difference Long
169////////////////////
170
171/// Unsigned Absolute difference Long
172name = vabdl
173multi_fn = simd_cast, {vabd-unsigned-noext, a, b}
174a = 1, 2, 3, 4, 4, 3, 2, 1
175b = 10, 10, 10, 10, 10, 10, 10, 10
176validate 9, 8, 7, 6, 6, 7, 8, 9
177
178arm = vabdl.s
179aarch64 = uabdl
180generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
181
182/// Signed Absolute difference Long
183name = vabdl
184multi_fn = simd_cast, c:uint8x8_t, {vabd-signed-noext, a, b}
185multi_fn = simd_cast, c
186a = 1, 2, 3, 4, 4, 3, 2, 1
187b = 10, 10, 10, 10, 10, 10, 10, 10
188validate 9, 8, 7, 6, 6, 7, 8, 9
189
190arm = vabdl.s
191aarch64 = sabdl
192generate int8x8_t:int8x8_t:int16x8_t
193
194/// Signed Absolute difference Long
195name = vabdl
196multi_fn = simd_cast, c:uint16x4_t, {vabd-signed-noext, a, b}
197multi_fn = simd_cast, c
198a = 1, 2, 11, 12
199b = 10, 10, 10, 10
200validate 9, 8, 1, 2
201
202arm = vabdl.s
203aarch64 = sabdl
204generate int16x4_t:int16x4_t:int32x4_t
205
206/// Signed Absolute difference Long
207name = vabdl
208multi_fn = simd_cast, c:uint32x2_t, {vabd-signed-noext, a, b}
209multi_fn = simd_cast, c
210a = 1, 11
211b = 10, 10
212validate 9, 1
213
214arm = vabdl.s
215aarch64 = sabdl
216generate int32x2_t:int32x2_t:int64x2_t
217
218/// Unsigned Absolute difference Long
219name = vabdl_high
220no-q
353b0b11
FG
221multi_fn = simd_shuffle!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
222multi_fn = simd_shuffle!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
223multi_fn = simd_cast, {vabd_u8, c, d}
224a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
225b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
226validate 1, 0, 1, 2, 3, 4, 5, 6
227
228aarch64 = uabdl
229generate uint8x16_t:uint8x16_t:uint16x8_t
230
231/// Unsigned Absolute difference Long
232name = vabdl_high
233no-q
353b0b11
FG
234multi_fn = simd_shuffle!, c:uint16x4_t, a, a, [4, 5, 6, 7]
235multi_fn = simd_shuffle!, d:uint16x4_t, b, b, [4, 5, 6, 7]
17df50a5
XL
236multi_fn = simd_cast, {vabd_u16, c, d}
237a = 1, 2, 3, 4, 8, 9, 11, 12
238b = 10, 10, 10, 10, 10, 10, 10, 10
239validate 2, 1, 1, 2
240
241aarch64 = uabdl
242generate uint16x8_t:uint16x8_t:uint32x4_t
243
244/// Unsigned Absolute difference Long
245name = vabdl_high
246no-q
353b0b11
FG
247multi_fn = simd_shuffle!, c:uint32x2_t, a, a, [2, 3]
248multi_fn = simd_shuffle!, d:uint32x2_t, b, b, [2, 3]
17df50a5
XL
249multi_fn = simd_cast, {vabd_u32, c, d}
250a = 1, 2, 3, 4
251b = 10, 10, 10, 10
252validate 7, 6
253
254aarch64 = uabdl
255generate uint32x4_t:uint32x4_t:uint64x2_t
256
257/// Signed Absolute difference Long
258name = vabdl_high
259no-q
353b0b11
FG
260multi_fn = simd_shuffle!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
261multi_fn = simd_shuffle!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
262multi_fn = simd_cast, e:uint8x8_t, {vabd_s8, c, d}
263multi_fn = simd_cast, e
264a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
265b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
266validate 1, 0, 1, 2, 3, 4, 5, 6
267
268aarch64 = sabdl
269generate int8x16_t:int8x16_t:int16x8_t
270
271/// Signed Absolute difference Long
272name = vabdl_high
273no-q
353b0b11
FG
274multi_fn = simd_shuffle!, c:int16x4_t, a, a, [4, 5, 6, 7]
275multi_fn = simd_shuffle!, d:int16x4_t, b, b, [4, 5, 6, 7]
17df50a5
XL
276multi_fn = simd_cast, e:uint16x4_t, {vabd_s16, c, d}
277multi_fn = simd_cast, e
278a = 1, 2, 3, 4, 9, 10, 11, 12
279b = 10, 10, 10, 10, 10, 10, 10, 10
280validate 1, 0, 1, 2
281
282aarch64 = sabdl
283generate int16x8_t:int16x8_t:int32x4_t
284
285/// Signed Absolute difference Long
286name = vabdl_high
287no-q
353b0b11
FG
288multi_fn = simd_shuffle!, c:int32x2_t, a, a, [2, 3]
289multi_fn = simd_shuffle!, d:int32x2_t, b, b, [2, 3]
17df50a5
XL
290multi_fn = simd_cast, e:uint32x2_t, {vabd_s32, c, d}
291multi_fn = simd_cast, e
292a = 1, 2, 3, 4
293b = 10, 10, 10, 10
294validate 7, 6
295
296aarch64 = sabdl
297generate int32x4_t:int32x4_t:int64x2_t
298
ba9703b0
XL
299////////////////////
300// equality
301////////////////////
302
303/// Compare bitwise Equal (vector)
304name = vceq
305fn = simd_eq
306a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
307b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
308validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
309a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX
310b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN
311validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
312
313aarch64 = cmeq
314generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
315
316arm = vceq.
17df50a5 317generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t
ba9703b0
XL
318
319/// Floating-point compare equal
320name = vceq
321fn = simd_eq
322a = 1.2, 3.4, 5.6, 7.8
323b = 1.2, 3.4, 5.6, 7.8
324validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
325
326aarch64 = fcmeq
327generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
328
329arm = vceq.
ba9703b0
XL
330generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
331
3c0e092e
XL
332/// Compare bitwise equal
333name = vceq
334multi_fn = transmute, {vceq-in_ntt-noext, {transmute, a}, {transmute, b}}
335a = 1
336b = 2
337validate 0
338
339aarch64 = cmp
340generate i64:u64, u64
341
342/// Floating-point compare equal
343name = vceq
c620b35d 344multi_fn = simd_extract!, {vceq-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
345a = 1.
346b = 2.
347validate 0
348
349aarch64 = fcmp
350generate f32:u32, f64:u64
351
17df50a5
XL
352/// Signed compare bitwise equal to zero
353name = vceqz
354fn = simd_eq
355a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
356fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
357validate FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
358
359aarch64 = cmeq
360generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
361
362/// Unsigned compare bitwise equal to zero
363name = vceqz
364fn = simd_eq
365a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
366fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
367validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
368
369aarch64 = cmeq
370generate uint*_t, uint64x*_t
371
372/// Floating-point compare bitwise equal to zero
373name = vceqz
374fn = simd_eq
375a = 0.0, 1.2, 3.4, 5.6
376fixed = 0.0, 0.0, 0.0, 0.0
377validate TRUE, FALSE, FALSE, FALSE
378
379aarch64 = fcmeq
380generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
381
3c0e092e
XL
382/// Compare bitwise equal to zero
383name = vceqz
384multi_fn = transmute, {vceqz-in_ntt-noext, {transmute, a}}
385a = 1
386validate 0
387
388aarch64 = cmp
389generate i64:u64, u64
390
391/// Floating-point compare bitwise equal to zero
392name = vceqz
c620b35d 393multi_fn = simd_extract!, {vceqz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
394a = 1.
395validate 0
396
397aarch64 = fcmp
398generate f32:u32, f64:u64
399
17df50a5
XL
400/// Signed compare bitwise Test bits nonzero
401name = vtst
402multi_fn = simd_and, c:in_t, a, b
403multi_fn = fixed, d:in_t
404multi_fn = simd_ne, c, transmute(d)
405a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
406b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
407fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
408validate TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
409
410aarch64 = cmtst
411generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
412
413arm = vtst
a2a8927a 414generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly16x4_t:uint16x4_t, poly16x8_t:uint16x8_t
17df50a5
XL
415
416/// Unsigned compare bitwise Test bits nonzero
417name = vtst
418multi_fn = simd_and, c:in_t, a, b
419multi_fn = fixed, d:in_t
420multi_fn = simd_ne, c, transmute(d)
421a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
422b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
423fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
424validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
425
426aarch64 = cmtst
427generate uint64x*_t
428
429arm = vtst
430generate uint*_t
431
3c0e092e
XL
432/// Compare bitwise test bits nonzero
433name = vtst
434multi_fn = transmute, {vtst-in_ntt-noext, {transmute, a}, {transmute, b}}
435a = 0
436b = 0
437validate 0
438
439aarch64 = tst
440generate i64:i64:u64, u64
441
442/// Signed saturating accumulate of unsigned value
443name = vuqadd
444out-suffix
445a = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
446b = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
447validate 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8
448
449aarch64 = suqadd
450link-aarch64 = suqadd._EXT_
451generate i32:u32:i32, i64:u64:i64
452
453/// Signed saturating accumulate of unsigned value
454name = vuqadd
455out-suffix
c620b35d 456multi_fn = simd_extract!, {vuqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
457a = 1
458b = 2
459validate 3
460
461aarch64 = suqadd
462generate i8:u8:i8, i16:u16:i16
463
17df50a5
XL
464////////////////////
465// Floating-point absolute value
466////////////////////
467
468/// Floating-point absolute value
469name = vabs
470fn = simd_fabs
471a = -0.1, -2.2, -3.3, -6.6
472validate 0.1, 2.2, 3.3, 6.6
473aarch64 = fabs
474generate float64x1_t:float64x1_t, float64x2_t:float64x2_t
475
476arm = vabs
477generate float32x2_t:float32x2_t, float32x4_t:float32x4_t
478
ba9703b0
XL
479////////////////////
480// greater then
481////////////////////
482
483/// Compare signed greater than
484name = vcgt
485fn = simd_gt
486a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
487b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
488validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
489aarch64 = cmgt
490generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
491
492arm = vcgt.s
493generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
494
49aad941 495/// Compare unsigned greater than
ba9703b0
XL
496name = vcgt
497fn = simd_gt
498a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
499b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
500validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
501
502aarch64 = cmhi
503generate uint64x*_t
504
505arm = vcgt.s
506generate uint*_t
507
508/// Floating-point compare greater than
509name = vcgt
510fn = simd_gt
fc512014 511a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
512b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
513validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
514
515aarch64 = fcmgt
516generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
517
518arm = vcgt.s
ba9703b0
XL
519generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
520
3c0e092e
XL
521/// Compare greater than
522name = vcgt
523multi_fn = transmute, {vcgt-in_ntt-noext, {transmute, a}, {transmute, b}}
524a = 1
525b = 2
526validate 0
527
528aarch64 = cmp
529generate i64:u64, u64
530
531/// Floating-point compare greater than
532name = vcgt
c620b35d 533multi_fn = simd_extract!, {vcgt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
534a = 1.
535b = 2.
536validate 0
537
538aarch64 = fcmp
539generate f32:u32, f64:u64
540
ba9703b0
XL
541////////////////////
542// lesser then
543////////////////////
544
545/// Compare signed less than
546name = vclt
547fn = simd_lt
548a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
549b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
550validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
551aarch64 = cmgt
552generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
553
554arm = vcgt.s
555generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
556
557/// Compare unsigned less than
558name = vclt
559fn = simd_lt
560a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
561b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
562validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
563
564aarch64 = cmhi
565generate uint64x*_t
566
567arm = vcgt.s
568generate uint*_t
569
570/// Floating-point compare less than
571name = vclt
572fn = simd_lt
573a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
fc512014 574b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
575validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
576
577aarch64 = fcmgt
578generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
579
580arm = vcgt.s
ba9703b0
XL
581generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
582
3c0e092e
XL
583/// Compare less than
584name = vclt
585multi_fn = transmute, {vclt-in_ntt-noext, {transmute, a}, {transmute, b}}
586a = 2
587b = 1
588validate 0
589
590aarch64 = cmp
591generate i64:u64, u64
592
593/// Floating-point compare less than
594name = vclt
c620b35d 595multi_fn = simd_extract!, {vclt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
596a = 2.
597b = 1.
598validate 0
599
600aarch64 = fcmp
601generate f32:u32, f64:u64
602
ba9703b0
XL
603////////////////////
604// lesser then equals
605////////////////////
606
607/// Compare signed less than or equal
608name = vcle
609fn = simd_le
610a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
611b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
612validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
613
614aarch64 = cmge
615generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
616
617arm = vcge.s
618generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
619
3c0e092e
XL
620/// Compare greater than or equal
621name = vcge
622multi_fn = transmute, {vcge-in_ntt-noext, {transmute, a}, {transmute, b}}
623a = 1
624b = 2
625validate 0
626
627aarch64 = cmp
628generate i64:u64, u64
629
630/// Floating-point compare greater than or equal
631name = vcge
c620b35d 632multi_fn = simd_extract!, {vcge-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
633a = 1.
634b = 2.
635validate 0
636
637aarch64 = fcmp
638generate f32:u32, f64:u64
639
ba9703b0
XL
640/// Compare unsigned less than or equal
641name = vcle
642fn = simd_le
643a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
644b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
645validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
646
647aarch64 = cmhs
648generate uint64x*_t
649
650arm = vcge.s
651generate uint*_t
652
653/// Floating-point compare less than or equal
654name = vcle
655fn = simd_le
656a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
fc512014 657b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
658validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
659aarch64 = fcmge
660generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
661
ba9703b0
XL
662arm = vcge.s
663generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
664
3c0e092e
XL
665/// Compare less than or equal
666name = vcle
667multi_fn = transmute, {vcle-in_ntt-noext, {transmute, a}, {transmute, b}}
668a = 2
669b = 1
670validate 0
671
672aarch64 = cmp
673generate i64:u64, u64
674
675/// Floating-point compare less than or equal
676name = vcle
c620b35d 677multi_fn = simd_extract!, {vcle-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
678a = 2.
679b = 1.
680validate 0
681
682aarch64 = fcmp
683generate f32:u32, f64:u64
684
ba9703b0
XL
685////////////////////
686// greater then equals
687////////////////////
688
689/// Compare signed greater than or equal
690name = vcge
691fn = simd_ge
692a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
693b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
694validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
695
696aarch64 = cmge
697generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
698
699arm = vcge.s
700generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
701
702/// Compare unsigned greater than or equal
703name = vcge
704fn = simd_ge
705a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
706b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
707validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
708
709aarch64 = cmhs
710generate uint64x*_t
711
712arm = vcge.s
713generate uint*_t
714
715/// Floating-point compare greater than or equal
716name = vcge
717fn = simd_ge
fc512014 718a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
719b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
720validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
721
722aarch64 = fcmge
723generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
724
725arm = vcge.s
ba9703b0
XL
726generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
727
17df50a5
XL
728/// Compare signed greater than or equal to zero
729name = vcgez
730fn = simd_ge
731a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
732fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
733validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 734
c620b35d 735aarch64 = cmge
17df50a5 736generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
ba9703b0 737
17df50a5
XL
738/// Floating-point compare greater than or equal to zero
739name = vcgez
740fn = simd_ge
741a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
742fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
743validate FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 744
17df50a5
XL
745aarch64 = fcmge
746generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
ba9703b0 747
3c0e092e
XL
748/// Compare signed greater than or equal to zero
749name = vcgez
750multi_fn = transmute, {vcgez-in_ntt-noext, {transmute, a}}
751a = -1
752validate 0
753
49aad941 754aarch64 = nop
3c0e092e
XL
755generate i64:u64
756
757/// Floating-point compare greater than or equal to zero
758name = vcgez
c620b35d 759multi_fn = simd_extract!, {vcgez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
760a = -1.
761validate 0
762
763aarch64 = fcmp
764generate f32:u32, f64:u64
765
17df50a5
XL
766/// Compare signed greater than zero
767name = vcgtz
768fn = simd_gt
769a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
770fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
771validate FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 772
17df50a5
XL
773aarch64 = cmgt
774generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
ba9703b0 775
17df50a5
XL
776/// Floating-point compare greater than zero
777name = vcgtz
778fn = simd_gt
779a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
780fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
781validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 782
17df50a5
XL
783aarch64 = fcmgt
784generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
ba9703b0 785
3c0e092e
XL
786/// Compare signed greater than zero
787name = vcgtz
788multi_fn = transmute, {vcgtz-in_ntt-noext, {transmute, a}}
789a = -1
790validate 0
791
792aarch64 = cmp
793generate i64:u64
794
795/// Floating-point compare greater than zero
796name = vcgtz
c620b35d 797multi_fn = simd_extract!, {vcgtz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
798a = -1.
799validate 0
800
801aarch64 = fcmp
802generate f32:u32, f64:u64
803
17df50a5
XL
804/// Compare signed less than or equal to zero
805name = vclez
806fn = simd_le
807a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
808fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
809validate TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
ba9703b0 810
c620b35d 811aarch64 = cmle
17df50a5 812generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
ba9703b0 813
17df50a5
XL
814/// Floating-point compare less than or equal to zero
815name = vclez
816fn = simd_le
817a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
818fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
819validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
820
821aarch64 = fcmle
822generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
823
3c0e092e
XL
824/// Compare less than or equal to zero
825name = vclez
826multi_fn = transmute, {vclez-in_ntt-noext, {transmute, a}}
827a = 2
828validate 0
829
830aarch64 = cmp
831generate i64:u64
832
833/// Floating-point compare less than or equal to zero
834name = vclez
c620b35d 835multi_fn = simd_extract!, {vclez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
836a = 2.
837validate 0
838
839aarch64 = fcmp
840generate f32:u32, f64:u64
841
17df50a5
XL
842/// Compare signed less than zero
843name = vcltz
844fn = simd_lt
845a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
846fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
847validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
848
5e7ed085 849aarch64 = cmlt
17df50a5
XL
850generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
851
852/// Floating-point compare less than zero
853name = vcltz
854fn = simd_lt
855a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
856fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
857validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
858
859aarch64 = fcmlt
860generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
861
3c0e092e
XL
862/// Compare less than zero
863name = vcltz
864multi_fn = transmute, {vcltz-in_ntt-noext, {transmute, a}}
865a = 2
866validate 0
867
868aarch64 = asr
869generate i64:u64
870
871/// Floating-point compare less than zero
872name = vcltz
c620b35d 873multi_fn = simd_extract!, {vcltz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
874a = 2.
875validate 0
876
877aarch64 = fcmp
878generate f32:u32, f64:u64
879
17df50a5
XL
880/// Count leading sign bits
881name = vcls
882a = MIN, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
883validate 0, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
884
885arm = vcls.s
886aarch64 = cls
887link-arm = vcls._EXT_
888link-aarch64 = cls._EXT_
ba9703b0
XL
889generate int*_t
890
3c0e092e
XL
891/// Count leading sign bits
892name = vcls
c620b35d 893multi_fn = vcls-signed-noext, {transmute, a}
3c0e092e
XL
894a = MIN, MAX, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
895validate BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1
896
897arm = vcls
898aarch64 = cls
a2a8927a 899generate uint8x8_t:int8x8_t, uint8x16_t:int8x16_t, uint16x4_t:int16x4_t, uint16x8_t:int16x8_t, uint32x2_t:int32x2_t, uint32x4_t:int32x4_t
3c0e092e
XL
900
901/// Count leading zero bits
17df50a5
XL
902name = vclz
903multi_fn = self-signed-ext, a
904a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
905validate 0, 0, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 1
ba9703b0 906
17df50a5
XL
907arm = vclz.
908aarch64 = clz
909generate int*_t
910
3c0e092e 911/// Count leading zero bits
17df50a5
XL
912name = vclz
913multi_fn = transmute, {self-signed-ext, transmute(a)}
914a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
915validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
916
917arm = vclz.
918aarch64 = clz
ba9703b0
XL
919generate uint*_t
920
17df50a5
XL
921/// Floating-point absolute compare greater than
922name = vcagt
923a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
924b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 925validate !0, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
ba9703b0 926
17df50a5
XL
927aarch64 = facgt
928link-aarch64 = facgt._EXT2_._EXT_
3c0e092e 929generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 930
17df50a5
XL
931arm = vacgt.s
932link-arm = vacgt._EXT2_._EXT_
933generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
ba9703b0 934
17df50a5
XL
935/// Floating-point absolute compare greater than or equal
936name = vcage
937a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
938b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 939validate !0, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
ba9703b0 940
17df50a5
XL
941aarch64 = facge
942link-aarch64 = facge._EXT2_._EXT_
3c0e092e 943generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 944
17df50a5
XL
945arm = vacge.s
946link-arm = vacge._EXT2_._EXT_
947generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
ba9703b0 948
17df50a5
XL
949/// Floating-point absolute compare less than
950name = vcalt
951multi_fn = vcagt-self-noext, b, a
952a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
953b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 954validate 0, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
ba9703b0 955
17df50a5 956aarch64 = facgt
3c0e092e 957generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 958
17df50a5
XL
959arm = vacgt.s
960generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
ba9703b0 961
17df50a5
XL
962/// Floating-point absolute compare less than or equal
963name = vcale
964multi_fn = vcage-self-noext , b, a
965a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
966b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 967validate 0, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
ba9703b0 968
17df50a5 969aarch64 = facge
3c0e092e 970generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 971
17df50a5
XL
972arm = vacge.s
973generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
974
975/// Insert vector element from another vector element
976name = vcopy
977lane-suffixes
978constn = LANE1:LANE2
979multi_fn = static_assert_imm-in0_exp_len-LANE1
980multi_fn = static_assert_imm-in_exp_len-LANE2
353b0b11 981multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
ba9703b0 982a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
17df50a5
XL
983b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
984n = 0:1
985validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
ba9703b0 986
17df50a5
XL
987aarch64 = mov
988generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x2_t
989generate uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x2_t
990generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
ba9703b0 991
17df50a5
XL
992/// Insert vector element from another vector element
993name = vcopy
994lane-suffixes
995constn = LANE1:LANE2
996multi_fn = static_assert_imm-in0_exp_len-LANE1
997multi_fn = static_assert_imm-in_exp_len-LANE2
353b0b11 998multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
17df50a5
XL
999a = 1., 2., 3., 4.
1000b = 0., 0.5, 0., 0.
1001n = 0:1
1002validate 0.5, 2., 3., 4.
fc512014 1003
17df50a5
XL
1004aarch64 = mov
1005generate float32x2_t, float32x4_t, float64x2_t
1006
1007/// Insert vector element from another vector element
1008name = vcopy
1009lane-suffixes
1010constn = LANE1:LANE2
1011multi_fn = static_assert_imm-in0_exp_len-LANE1
1012multi_fn = static_assert_imm-in_exp_len-LANE2
353b0b11
FG
1013multi_fn = simd_shuffle!, a:in_t, a, a, {asc-0-in_len}
1014multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in_len-LANE2}
fc512014 1015a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
17df50a5
XL
1016b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1017n = 0:1
1018validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
fc512014 1019
17df50a5
XL
1020aarch64 = mov
1021generate int8x8_t:int8x16_t:int8x8_t, int16x4_t:int16x8_t:int16x4_t, int32x2_t:int32x4_t:int32x2_t
1022generate uint8x8_t:uint8x16_t:uint8x8_t, uint16x4_t:uint16x8_t:uint16x4_t, uint32x2_t:uint32x4_t:uint32x2_t
1023generate poly8x8_t:poly8x16_t:poly8x8_t, poly16x4_t:poly16x8_t:poly16x4_t
fc512014 1024
17df50a5
XL
1025/// Insert vector element from another vector element
1026name = vcopy
1027lane-suffixes
1028constn = LANE1:LANE2
1029multi_fn = static_assert_imm-in0_exp_len-LANE1
1030multi_fn = static_assert_imm-in_exp_len-LANE2
353b0b11
FG
1031multi_fn = simd_shuffle!, a:in_t, a, a, {asc-0-in_len}
1032multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in_len-LANE2}
17df50a5
XL
1033a = 1., 2., 3., 4.
1034b = 0., 0.5, 0., 0.
1035n = 0:1
1036validate 0.5, 2., 3., 4.
fc512014 1037
17df50a5
XL
1038aarch64 = mov
1039generate float32x2_t:float32x4_t:float32x2_t
fc512014 1040
17df50a5
XL
1041/// Insert vector element from another vector element
1042name = vcopy
1043lane-suffixes
1044constn = LANE1:LANE2
1045multi_fn = static_assert_imm-in0_exp_len-LANE1
1046multi_fn = static_assert_imm-in_exp_len-LANE2
353b0b11
FG
1047multi_fn = simd_shuffle!, b:in_t0, b, b, {asc-0-in0_len}
1048multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
17df50a5
XL
1049a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1050b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1051n = 0:1
1052validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
fc512014 1053
17df50a5
XL
1054aarch64 = mov
1055generate int8x16_t:int8x8_t:int8x16_t, int16x8_t:int16x4_t:int16x8_t, int32x4_t:int32x2_t:int32x4_t
1056generate uint8x16_t:uint8x8_t:uint8x16_t, uint16x8_t:uint16x4_t:uint16x8_t, uint32x4_t:uint32x2_t:uint32x4_t
1057generate poly8x16_t:poly8x8_t:poly8x16_t, poly16x8_t:poly16x4_t:poly16x8_t
fc512014 1058
17df50a5
XL
1059/// Insert vector element from another vector element
1060name = vcopy
1061lane-suffixes
1062constn = LANE1:LANE2
1063multi_fn = static_assert_imm-in0_exp_len-LANE1
1064multi_fn = static_assert_imm-in_exp_len-LANE2
353b0b11
FG
1065multi_fn = simd_shuffle!, b:in_t0, b, b, {asc-0-in0_len}
1066multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
fc512014 1067a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
17df50a5
XL
1068b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1069n = 1:0
1070validate 1, MAX
fc512014 1071
5e7ed085 1072aarch64 = mov
17df50a5 1073generate int64x2_t:int64x1_t:int64x2_t, uint64x2_t:uint64x1_t:uint64x2_t, poly64x2_t:poly64x1_t:poly64x2_t
fc512014 1074
17df50a5
XL
1075/// Insert vector element from another vector element
1076name = vcopy
1077lane-suffixes
1078constn = LANE1:LANE2
1079multi_fn = static_assert_imm-in0_exp_len-LANE1
1080multi_fn = static_assert_imm-in_exp_len-LANE2
353b0b11
FG
1081multi_fn = simd_shuffle!, b:in_t0, b, b, {asc-0-in0_len}
1082multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
17df50a5
XL
1083a = 1., 2., 3., 4.
1084b = 0.5, 0., 0., 0.
1085n = 1:0
1086validate 1., 0.5, 3., 4.
fc512014 1087
17df50a5
XL
1088aarch64 = mov
1089generate float32x4_t:float32x2_t:float32x4_t
5e7ed085 1090aarch64 = mov
17df50a5 1091generate float64x2_t:float64x1_t:float64x2_t
fc512014 1092
17df50a5
XL
1093/// Insert vector element from another vector element
1094name = vcreate
1095out-suffix
1096multi_fn = transmute, a
1097a = 1
1098validate 1, 0, 0, 0, 0, 0, 0, 0
fc512014 1099
17df50a5
XL
1100aarch64 = nop
1101arm = nop
3c0e092e
XL
1102generate u64:int8x8_t, u64:int16x4_t, u64:int32x2_t, u64:int64x1_t
1103generate u64:uint8x8_t, u64:uint16x4_t, u64:uint32x2_t, u64:uint64x1_t
17df50a5 1104generate u64:poly8x8_t, u64:poly16x4_t
94222f64 1105target = aes
17df50a5
XL
1106generate u64:poly64x1_t
1107
1108/// Insert vector element from another vector element
1109name = vcreate
1110out-suffix
1111multi_fn = transmute, a
1112a = 0
1113validate 0., 0.
1114
1115aarch64 = nop
1116generate u64:float64x1_t
1117arm = nop
1118generate u64:float32x2_t
1119
1120/// Fixed-point convert to floating-point
1121name = vcvt
1122double-suffixes
1123fn = simd_cast
1124a = 1, 2, 3, 4
1125validate 1., 2., 3., 4.
1126
1127aarch64 = scvtf
1128generate int64x1_t:float64x1_t, int64x2_t:float64x2_t
1129aarch64 = ucvtf
1130generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t
1131
1132arm = vcvt
1133aarch64 = scvtf
1134generate int32x2_t:float32x2_t, int32x4_t:float32x4_t
1135aarch64 = ucvtf
1136generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t
1137
1138/// Floating-point convert to higher precision long
1139name = vcvt
1140double-suffixes
1141fn = simd_cast
1142a = -1.2, 1.2
1143validate -1.2f32 as f64, 1.2f32 as f64
1144
1145aarch64 = fcvtl
1146generate float32x2_t:float64x2_t
1147
1148/// Floating-point convert to higher precision long
1149name = vcvt_high
1150noq-double-suffixes
353b0b11 1151multi_fn = simd_shuffle!, b:float32x2_t, a, a, [2, 3]
17df50a5
XL
1152multi_fn = simd_cast, b
1153a = -1.2, 1.2, 2.3, 3.4
1154validate 2.3f32 as f64, 3.4f32 as f64
1155
1156aarch64 = fcvtl
1157generate float32x4_t:float64x2_t
1158
1159/// Floating-point convert to lower precision narrow
1160name = vcvt
1161double-suffixes
1162fn = simd_cast
1163a = -1.2, 1.2
1164validate -1.2f64 as f32, 1.2f64 as f32
1165
1166aarch64 = fcvtn
1167generate float64x2_t:float32x2_t
1168
1169/// Floating-point convert to lower precision narrow
1170name = vcvt_high
1171noq-double-suffixes
353b0b11 1172multi_fn = simd_shuffle!, a, {simd_cast, b}, [0, 1, 2, 3]
17df50a5
XL
1173a = -1.2, 1.2
1174b = -2.3, 3.4
1175validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32
1176
1177aarch64 = fcvtn
1178generate float32x2_t:float64x2_t:float32x4_t
1179
1180/// Floating-point convert to lower precision narrow, rounding to odd
1181name = vcvtx
1182double-suffixes
1183a = -1.0, 2.0
1184validate -1.0, 2.0
1185
1186aarch64 = fcvtxn
1187link-aarch64 = fcvtxn._EXT2_._EXT_
1188generate float64x2_t:float32x2_t
1189
3c0e092e
XL
1190/// Floating-point convert to lower precision narrow, rounding to odd
1191name = vcvtx
1192double-suffixes
c620b35d 1193multi_fn = simd_extract!, {vcvtx-_f32_f64-noext, {vdupq_n-in_ntt-noext, a}}, 0
3c0e092e
XL
1194a = -1.0
1195validate -1.0
1196
1197aarch64 = fcvtxn
1198generate f64:f32
1199
17df50a5
XL
1200/// Floating-point convert to lower precision narrow, rounding to odd
1201name = vcvtx_high
1202noq-double-suffixes
353b0b11 1203multi_fn = simd_shuffle!, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3]
17df50a5
XL
1204a = -1.0, 2.0
1205b = -3.0, 4.0
1206validate -1.0, 2.0, -3.0, 4.0
1207
1208aarch64 = fcvtxn
1209generate float32x2_t:float64x2_t:float32x4_t
1210
1211/// Fixed-point convert to floating-point
1212name = vcvt
1213double-n-suffixes
1214constn = N
1215multi_fn = static_assert-N-1-bits
1216a = 1, 2, 3, 4
1217n = 2
1218validate 0.25, 0.5, 0.75, 1.
c295e0f8 1219arm-aarch64-separate
17df50a5
XL
1220
1221aarch64 = scvtf
1222link-aarch64 = vcvtfxs2fp._EXT2_._EXT_
1223const-aarch64 = N
1224generate int64x1_t:float64x1_t, int64x2_t:float64x2_t, i32:f32, i64:f64
1225
1226aarch64 = ucvtf
1227link-aarch64 = vcvtfxu2fp._EXT2_._EXT_
1228const-aarch64 = N
1229generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t, u32:f32, u64:f64
1230
1231aarch64 = scvtf
1232link-aarch64 = vcvtfxs2fp._EXT2_._EXT_
1233arm = vcvt
1234link-arm = vcvtfxs2fp._EXT2_._EXT_
1235const-arm = N:i32
c295e0f8 1236
17df50a5
XL
1237generate int32x2_t:float32x2_t, int32x4_t:float32x4_t
1238
1239aarch64 = ucvtf
1240link-aarch64 = vcvtfxu2fp._EXT2_._EXT_
1241arm = vcvt
1242link-arm = vcvtfxu2fp._EXT2_._EXT_
1243const-arm = N:i32
1244generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t
1245
1246/// Floating-point convert to fixed-point, rounding toward zero
1247name = vcvt
1248double-n-suffixes
1249constn = N
1250multi_fn = static_assert-N-1-bits
1251a = 0.25, 0.5, 0.75, 1.
1252n = 2
1253validate 1, 2, 3, 4
c295e0f8 1254arm-aarch64-separate
17df50a5
XL
1255
1256aarch64 = fcvtzs
1257link-aarch64 = vcvtfp2fxs._EXT2_._EXT_
1258const-aarch64 = N
1259generate float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1260
1261aarch64 = fcvtzu
1262link-aarch64 = vcvtfp2fxu._EXT2_._EXT_
1263const-aarch64 = N
1264generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1265
1266aarch64 = fcvtzs
1267link-aarch64 = vcvtfp2fxs._EXT2_._EXT_
1268arm = vcvt
1269link-arm = vcvtfp2fxs._EXT2_._EXT_
1270const-arm = N:i32
1271generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
1272
1273aarch64 = fcvtzu
1274link-aarch64 = vcvtfp2fxu._EXT2_._EXT_
1275arm = vcvt
1276link-arm = vcvtfp2fxu._EXT2_._EXT_
1277const-arm = N:i32
1278generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
1279
1280/// Fixed-point convert to floating-point
1281name = vcvt
1282double-suffixes
1283multi_fn = a as out_t
1284a = 1
1285validate 1.
1286
1287aarch64 = scvtf
1288generate i32:f32, i64:f64
1289aarch64 = ucvtf
1290generate u32:f32, u64:f64
1291
1292/// Fixed-point convert to floating-point
1293name = vcvt
1294double-suffixes
1295multi_fn = a as out_t
1296a = 1.
1297validate 1
1298
1299aarch64 = fcvtzs
1300generate f32:i32, f64:i64
1301aarch64 = fcvtzu
1302generate f32:u32, f64:u64
1303
1304/// Floating-point convert to signed fixed-point, rounding toward zero
1305name = vcvt
1306double-suffixes
94222f64 1307link-aarch64 = llvm.fptosi.sat._EXT2_._EXT_
17df50a5
XL
1308a = -1.1, 2.1, -2.9, 3.9
1309validate -1, 2, -2, 3
1310
1311aarch64 = fcvtzs
1312generate float64x1_t:int64x1_t, float64x2_t:int64x2_t
1313
94222f64 1314link-arm = llvm.fptosi.sat._EXT2_._EXT_
17df50a5
XL
1315arm = vcvt
1316generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
1317
1318/// Floating-point convert to unsigned fixed-point, rounding toward zero
1319name = vcvt
1320double-suffixes
94222f64 1321link-aarch64 = llvm.fptoui.sat._EXT2_._EXT_
17df50a5
XL
1322a = 1.1, 2.1, 2.9, 3.9
1323validate 1, 2, 2, 3
1324
1325aarch64 = fcvtzu
1326generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
1327
94222f64 1328link-arm = llvm.fptoui.sat._EXT2_._EXT_
17df50a5
XL
1329arm = vcvt
1330generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
1331
1332/// Floating-point convert to signed integer, rounding to nearest with ties to away
1333name = vcvta
1334double-suffixes
1335a = -1.1, 2.1, -2.9, 3.9
1336validate -1, 2, -3, 4
1337
1338aarch64 = fcvtas
1339link-aarch64 = fcvtas._EXT2_._EXT_
1340generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
1341
1342/// Floating-point convert to integer, rounding to nearest with ties to away
1343name = vcvta
1344double-suffixes
1345a = 2.9
1346validate 3
1347
1348aarch64 = fcvtas
1349link-aarch64 = fcvtas._EXT2_._EXT_
1350generate f32:i32, f64:i64
1351
1352aarch64 = fcvtau
1353link-aarch64 = fcvtau._EXT2_._EXT_
1354generate f32:u32, f64:u64
1355
1356/// Floating-point convert to signed integer, rounding to nearest with ties to even
1357name = vcvtn
1358double-suffixes
1359a = -1.5, 2.1, -2.9, 3.9
1360validate -2, 2, -3, 4
1361
1362aarch64 = fcvtns
1363link-aarch64 = fcvtns._EXT2_._EXT_
1364generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1365
1366/// Floating-point convert to signed integer, rounding toward minus infinity
1367name = vcvtm
1368double-suffixes
1369a = -1.1, 2.1, -2.9, 3.9
1370validate -2, 2, -3, 3
1371
1372aarch64 = fcvtms
1373link-aarch64 = fcvtms._EXT2_._EXT_
1374generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1375
1376/// Floating-point convert to signed integer, rounding toward plus infinity
1377name = vcvtp
1378double-suffixes
1379a = -1.1, 2.1, -2.9, 3.9
1380validate -1, 3, -2, 4
1381
1382aarch64 = fcvtps
1383link-aarch64 = fcvtps._EXT2_._EXT_
1384generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1385
1386/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
1387name = vcvta
1388double-suffixes
1389a = 1.1, 2.1, 2.9, 3.9
1390validate 1, 2, 3, 4
1391
1392aarch64 = fcvtau
1393link-aarch64 = fcvtau._EXT2_._EXT_
1394generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
1395
1396/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
1397name = vcvtn
1398double-suffixes
1399a = 1.5, 2.1, 2.9, 3.9
1400validate 2, 2, 3, 4
1401
1402aarch64 = fcvtnu
1403link-aarch64 = fcvtnu._EXT2_._EXT_
1404generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1405
1406/// Floating-point convert to unsigned integer, rounding toward minus infinity
1407name = vcvtm
1408double-suffixes
1409a = 1.1, 2.1, 2.9, 3.9
1410validate 1, 2, 2, 3
1411
1412aarch64 = fcvtmu
1413link-aarch64 = fcvtmu._EXT2_._EXT_
1414generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1415
1416/// Floating-point convert to unsigned integer, rounding toward plus infinity
1417name = vcvtp
1418double-suffixes
1419a = 1.1, 2.1, 2.9, 3.9
1420validate 2, 3, 3, 4
1421
1422aarch64 = fcvtpu
1423link-aarch64 = fcvtpu._EXT2_._EXT_
1424generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1425
1426/// Set all vector lanes to the same value
1427name = vdup
1428lane-suffixes
1429constn = N
1430multi_fn = static_assert_imm-in_exp_len-N
353b0b11 1431multi_fn = simd_shuffle!, a, a, {dup-out_len-N as u32}
17df50a5
XL
1432a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1433n = HFLEN
1434validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1435
1436aarch64 = dup
1437generate poly64x2_t, poly64x1_t:poly64x2_t
1438
1439arm = vdup.l
1440generate int*_t
1441generate int8x16_t:int8x8_t, int16x8_t:int16x4_t, int32x4_t:int32x2_t
1442generate int8x8_t:int8x16_t, int16x4_t:int16x8_t, int32x2_t:int32x4_t
1443
1444generate uint*_t
1445generate uint8x16_t:uint8x8_t, uint16x8_t:uint16x4_t, uint32x4_t:uint32x2_t
1446generate uint8x8_t:uint8x16_t, uint16x4_t:uint16x8_t, uint32x2_t:uint32x4_t
1447
1448generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
1449generate poly8x16_t:poly8x8_t, poly16x8_t:poly16x4_t
1450generate poly8x8_t:poly8x16_t, poly16x4_t:poly16x8_t
1451
1452/// Set all vector lanes to the same value
1453name = vdup
1454lane-suffixes
1455constn = N
1456multi_fn = static_assert_imm-in_exp_len-N
353b0b11 1457multi_fn = simd_shuffle!, a, a, {dup-out_len-N as u32}
17df50a5
XL
1458a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1459n = HFLEN
1460validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1461
1462aarch64 = dup
1463arm = vmov
1464generate int64x2_t, int64x1_t:int64x2_t, uint64x2_t, uint64x1_t:uint64x2_t
1465
1466/// Set all vector lanes to the same value
1467name = vdup
1468lane-suffixes
1469constn = N
1470multi_fn = static_assert_imm-in_exp_len-N
353b0b11 1471multi_fn = simd_shuffle!, a, a, {dup-out_len-N as u32}
17df50a5
XL
1472a = 1., 1., 1., 4.
1473n = HFLEN
1474validate 1., 1., 1., 1.
1475
1476aarch64 = dup
1477generate float64x2_t, float64x1_t:float64x2_t
1478
1479arm = vdup.l
1480generate float*_t, float32x4_t:float32x2_t, float32x2_t:float32x4_t
1481
1482/// Set all vector lanes to the same value
1483name = vdup
1484lane-suffixes
1485constn = N
1486multi_fn = static_assert_imm-in_exp_len-N
1487multi_fn = a
1488a = 0
1489n = HFLEN
1490validate 0
1491
1492aarch64 = nop
1493generate poly64x1_t
1494
1495arm = nop
1496generate int64x1_t, uint64x1_t
1497
1498/// Set all vector lanes to the same value
1499name = vdup
1500lane-suffixes
1501constn = N
1502multi_fn = static_assert_imm-in_exp_len-N
1503multi_fn = a
1504a = 0.
1505n = HFLEN
1506validate 0.
1507
1508aarch64 = nop
1509generate float64x1_t
1510
1511/// Set all vector lanes to the same value
1512name = vdup
1513lane-suffixes
1514constn = N
1515multi_fn = static_assert_imm-in_exp_len-N
c620b35d 1516multi_fn = transmute--<element_t _>, {simd_extract!, a, N as u32}
17df50a5
XL
1517a = 0, 1
1518n = HFLEN
1519validate 1
1520
1521aarch64 = nop
1522generate poly64x2_t:poly64x1_t
1523
1524arm = vmov
1525generate int64x2_t:int64x1_t, uint64x2_t:uint64x1_t
1526
1527/// Set all vector lanes to the same value
1528name = vdup
1529lane-suffixes
1530constn = N
1531multi_fn = static_assert_imm-in_exp_len-N
c620b35d 1532multi_fn = transmute--<element_t _>, {simd_extract!, a, N as u32}
17df50a5
XL
1533a = 0., 1.
1534n = HFLEN
1535validate 1.
1536
1537aarch64 = nop
1538generate float64x2_t:float64x1_t
1539
1540/// Set all vector lanes to the same value
1541name = vdup
1542lane-suffixes
1543constn = N
1544multi_fn = static_assert_imm-in_exp_len-N
c620b35d 1545multi_fn = simd_extract!, a, N as u32
17df50a5
XL
1546a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1547n = HFLEN
1548validate 1
1549
1550aarch64 = nop
1551generate int8x8_t:i8, int8x16_t:i8, int16x4_t:i16, int16x8_t:i16, int32x2_t:i32, int32x4_t:i32, int64x1_t:i64, int64x2_t:i64
1552generate uint8x8_t:u8, uint8x16_t:u8, uint16x4_t:u16, uint16x8_t:u16, uint32x2_t:u32, uint32x4_t:u32, uint64x1_t:u64, uint64x2_t:u64
1553generate poly8x8_t:p8, poly8x16_t:p8, poly16x4_t:p16, poly16x8_t:p16
1554
1555/// Set all vector lanes to the same value
1556name = vdup
1557lane-suffixes
1558constn = N
1559multi_fn = static_assert_imm-in_exp_len-N
c620b35d 1560multi_fn = simd_extract!, a, N as u32
17df50a5
XL
1561a = 1., 1., 1., 4.
1562n = HFLEN
1563validate 1.
1564
1565aarch64 = nop
1566generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64
1567
1568/// Extract vector from pair of vectors
1569name = vext
1570constn = N
1571multi_fn = static_assert_imm-out_exp_len-N
353b0b11 1572multi_fn = matchn-out_exp_len-N, simd_shuffle!, a, b, {asc-n-out_len}
f2b60f7d
FG
1573a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1574b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1575n = LEN_M1
1576validate 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
17df50a5
XL
1577
1578arm = "vext.8"
1579aarch64 = ext
1580generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
1581
1582/// Extract vector from pair of vectors
1583name = vext
1584constn = N
1585multi_fn = static_assert_imm-out_exp_len-N
353b0b11 1586multi_fn = matchn-out_exp_len-N, simd_shuffle!, a, b, {asc-n-out_len}
f2b60f7d
FG
1587a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1588b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1589n = LEN_M1
1590validate 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
17df50a5
XL
1591
1592aarch64 = ext
1593generate poly64x2_t
1594
1595arm = vmov
1596generate int64x2_t, uint64x2_t
1597
1598/// Extract vector from pair of vectors
1599name = vext
1600constn = N
1601multi_fn = static_assert_imm-out_exp_len-N
353b0b11 1602multi_fn = matchn-out_exp_len-N, simd_shuffle!, a, b, {asc-n-out_len}
f2b60f7d
FG
1603a = 1., 1., 1., 1.
1604b = 2., 2., 2., 2.,
1605n = LEN_M1
1606validate 1., 2., 2., 2.
17df50a5
XL
1607
1608aarch64 = ext
1609generate float64x2_t
1610
1611arm = "vext.8"
1612generate float*_t
1613
1614/// Multiply-add to accumulator
1615name = vmla
1616multi_fn = simd_add, a, {simd_mul, b, c}
1617a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1618b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1619c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1620validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1621
1622arm = vmla.
1623aarch64 = mla
1624generate int*_t, uint*_t
1625
1626/// Floating-point multiply-add to accumulator
1627name = vmla
1628multi_fn = simd_add, a, {simd_mul, b, c}
1629a = 0., 1., 2., 3.
1630b = 2., 2., 2., 2.
1631c = 3., 3., 3., 3.
1632validate 6., 7., 8., 9.
1633
1634aarch64 = fmul
1635generate float64x*_t
1636
1637arm = vmla.
1638generate float*_t
1639
1640/// Vector multiply accumulate with scalar
1641name = vmla
1642n-suffix
1643multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
1644a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1645b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1646c = 3
1647validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1648
1649aarch64 = mla
1650arm = vmla.
1651generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
1652generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
1653
1654/// Vector multiply accumulate with scalar
1655name = vmla
1656n-suffix
1657multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
1658a = 0., 1., 2., 3.
1659b = 2., 2., 2., 2.
1660c = 3.
1661validate 6., 7., 8., 9.
1662
1663aarch64 = fmul
1664arm = vmla.
1665generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
1666
1667/// Vector multiply accumulate with scalar
1668name = vmla
1669in2-lane-suffixes
1670constn = LANE
1671multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 1672multi_fn = vmla-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
1673a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1674b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1675c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1676n = 1
1677validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1678
1679aarch64 = mla
1680arm = vmla.
1681generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
1682generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
1683generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
1684generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
1685
1686/// Vector multiply accumulate with scalar
1687name = vmla
1688in2-lane-suffixes
1689constn = LANE
1690multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 1691multi_fn = vmla-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
1692a = 0., 1., 2., 3.
1693b = 2., 2., 2., 2.
1694c = 0., 3., 0., 0.
1695n = 1
1696validate 6., 7., 8., 9.
1697
1698aarch64 = fmul
1699arm = vmla.
1700generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
1701
1702/// Signed multiply-add long
1703name = vmlal
1704multi_fn = simd_add, a, {vmull-self-noext, b, c}
1705a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1706b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1707c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1708validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1709
1710arm = vmlal.s
1711aarch64 = smlal
1712generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
1713
1714/// Unsigned multiply-add long
1715name = vmlal
1716multi_fn = simd_add, a, {vmull-self-noext, b, c}
1717a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1718b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1719c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1720validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1721
1722arm = vmlal.s
1723aarch64 = umlal
1724generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
1725
1726/// Vector widening multiply accumulate with scalar
1727name = vmlal
1728n-suffix
1729multi_fn = vmlal-self-noext, a, b, {vdup-nself-noext, c}
1730a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1731b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1732c = 3
1733validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1734
1735arm = vmlal.s
1736aarch64 = smlal
1737generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
1738aarch64 = umlal
1739generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
1740
1741/// Vector widening multiply accumulate with scalar
1742name = vmlal_lane
1743in2-suffix
1744constn = LANE
1745multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 1746multi_fn = vmlal-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
1747a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1748b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1749c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1750n = 1
1751validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1752
1753arm = vmlal.s
1754aarch64 = smlal
1755generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
1756generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
1757aarch64 = umlal
1758generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
1759generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
1760
1761/// Signed multiply-add long
1762name = vmlal_high
1763no-q
353b0b11
FG
1764multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1765multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
17df50a5
XL
1766multi_fn = vmlal-noqself-noext, a, b, c
1767a = 8, 7, 6, 5, 4, 3, 2, 1
1768b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1769c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1770fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1771validate 8, 9, 10, 11, 12, 13, 14, 15
1772
1773aarch64 = smlal2
1774generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1775
1776/// Unsigned multiply-add long
1777name = vmlal_high
1778no-q
353b0b11
FG
1779multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1780multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
17df50a5
XL
1781multi_fn = vmlal-noqself-noext, a, b, c
1782a = 8, 7, 6, 5, 4, 3, 2, 1
1783b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1784c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1785fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1786validate 8, 9, 10, 11, 12, 13, 14, 15
1787
1788aarch64 = umlal2
1789generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
1790
1791/// Multiply-add long
1792name = vmlal_high_n
1793no-q
1794multi_fn = vmlal_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
1795a = 8, 7, 6, 5, 4, 3, 2, 1
1796b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1797c = 2
1798validate 8, 9, 10, 11, 12, 13, 14, 15
1799
1800aarch64 = smlal2
1801generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
1802aarch64 = umlal2
1803generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
1804
1805/// Multiply-add long
1806name = vmlal_high_lane
1807in2-suffix
1808constn = LANE
1809multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 1810multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
1811a = 8, 7, 6, 5, 4, 3, 2, 1
1812b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1813c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1814n = 1
1815validate 8, 9, 10, 11, 12, 13, 14, 15
1816
1817aarch64 = smlal2
1818generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
1819generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1820aarch64 = umlal2
1821generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
1822generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
1823
1824/// Multiply-subtract from accumulator
1825name = vmls
1826multi_fn = simd_sub, a, {simd_mul, b, c}
1827a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1828b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1829c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1830validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1831
1832arm = vmls.
1833aarch64 = mls
1834generate int*_t, uint*_t
1835
1836/// Floating-point multiply-subtract from accumulator
1837name = vmls
1838multi_fn = simd_sub, a, {simd_mul, b, c}
1839a = 6., 7., 8., 9.
1840b = 2., 2., 2., 2.
1841c = 3., 3., 3., 3.
1842validate 0., 1., 2., 3.
1843
1844aarch64 = fmul
1845generate float64x*_t
1846
1847arm = vmls.
1848generate float*_t
1849
1850/// Vector multiply subtract with scalar
1851name = vmls
1852n-suffix
1853multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
1854a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1855b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1856c = 3
1857validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1858
1859aarch64 = mls
1860arm = vmls.
1861generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
1862generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
1863
1864/// Vector multiply subtract with scalar
1865name = vmls
1866n-suffix
1867multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
1868a = 6., 7., 8., 9.
1869b = 2., 2., 2., 2.
1870c = 3.
1871validate 0., 1., 2., 3.
1872
1873aarch64 = fmul
1874arm = vmls.
1875generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
1876
1877/// Vector multiply subtract with scalar
1878name = vmls
1879in2-lane-suffixes
1880constn = LANE
1881multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 1882multi_fn = vmls-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
1883a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1884b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1885c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1886n = 1
1887validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1888
1889aarch64 = mls
1890arm = vmls.
1891generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
1892generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
1893generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
1894generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
1895
1896/// Vector multiply subtract with scalar
1897name = vmls
1898in2-lane-suffixes
1899constn = LANE
1900multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 1901multi_fn = vmls-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
1902a = 6., 7., 8., 9.
1903b = 2., 2., 2., 2.
1904c = 0., 3., 0., 0.
1905n = 1
1906validate 0., 1., 2., 3.
1907
1908aarch64 = fmul
1909arm = vmls.
1910generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
1911
1912/// Signed multiply-subtract long
1913name = vmlsl
1914multi_fn = simd_sub, a, {vmull-self-noext, b, c}
1915a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1916b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1917c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1918validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1919
1920arm = vmlsl.s
1921aarch64 = smlsl
1922generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
1923
1924/// Unsigned multiply-subtract long
1925name = vmlsl
1926multi_fn = simd_sub, a, {vmull-self-noext, b, c}
1927a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1928b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1929c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1930validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1931
1932arm = vmlsl.s
1933aarch64 = umlsl
1934generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
1935
1936/// Vector widening multiply subtract with scalar
1937name = vmlsl
1938n-suffix
1939multi_fn = vmlsl-self-noext, a, b, {vdup-nself-noext, c}
1940a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1941b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1942c = 3
1943validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1944
1945arm = vmlsl.s
1946aarch64 = smlsl
1947generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
1948aarch64 = umlsl
1949generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
1950
1951/// Vector widening multiply subtract with scalar
1952name = vmlsl_lane
1953in2-suffix
1954constn = LANE
1955multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 1956multi_fn = vmlsl-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
1957a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1958b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1959c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1960n = 1
1961validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1962
1963arm = vmlsl.s
1964aarch64 = smlsl
1965generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
1966generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
1967aarch64 = umlsl
1968generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
1969generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
1970
1971/// Signed multiply-subtract long
1972name = vmlsl_high
1973no-q
353b0b11
FG
1974multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1975multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
17df50a5
XL
1976multi_fn = vmlsl-noqself-noext, a, b, c
1977a = 14, 15, 16, 17, 18, 19, 20, 21
1978b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1979c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1980fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1981validate 14, 13, 12, 11, 10, 9, 8, 7
1982
1983aarch64 = smlsl2
1984generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1985
1986/// Unsigned multiply-subtract long
1987name = vmlsl_high
1988no-q
353b0b11
FG
1989multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1990multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
17df50a5
XL
1991multi_fn = vmlsl-noqself-noext, a, b, c
1992a = 14, 15, 16, 17, 18, 19, 20, 21
1993b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1994c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1995fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1996validate 14, 13, 12, 11, 10, 9, 8, 7
1997
1998aarch64 = umlsl2
1999generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
2000
2001/// Multiply-subtract long
2002name = vmlsl_high_n
2003no-q
2004multi_fn = vmlsl_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
2005a = 14, 15, 16, 17, 18, 19, 20, 21
2006b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2007c = 2
2008validate 14, 13, 12, 11, 10, 9, 8, 7
2009
2010aarch64 = smlsl2
2011generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
2012aarch64 = umlsl2
2013generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
2014
2015/// Multiply-subtract long
2016name = vmlsl_high_lane
2017in2-suffix
2018constn = LANE
2019multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 2020multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
17df50a5
XL
2021a = 14, 15, 16, 17, 18, 19, 20, 21
2022b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2023c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2024n = 1
2025validate 14, 13, 12, 11, 10, 9, 8, 7
2026
2027aarch64 = smlsl2
2028generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
2029generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
2030aarch64 = umlsl2
2031generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
2032generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
2033
2034/// Extract narrow
2035name = vmovn_high
2036no-q
2037multi_fn = simd_cast, c:in_t0, b
353b0b11 2038multi_fn = simd_shuffle!, a, c, {asc-0-out_len}
17df50a5
XL
2039a = 0, 1, 2, 3, 2, 3, 4, 5
2040b = 2, 3, 4, 5, 12, 13, 14, 15
2041validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15
2042
2043aarch64 = xtn2
2044generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
2045generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
2046
2047/// Negate
2048name = vneg
2049fn = simd_neg
2050a = 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8
2051validate 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8
2052
2053aarch64 = neg
2054generate int64x*_t
2055
2056arm = vneg.s
2057generate int*_t
2058
3c0e092e
XL
2059/// Negate
2060name = vneg
a2a8927a 2061multi_fn = a.wrapping_neg()
3c0e092e
XL
2062a = 1
2063validate -1
2064
2065aarch64 = neg
2066generate i64
2067
17df50a5
XL
2068/// Negate
2069name = vneg
2070fn = simd_neg
2071a = 0., 1., -1., 2., -2., 3., -3., 4.
2072validate 0., -1., 1., -2., 2., -3., 3., -4.
2073
2074aarch64 = fneg
2075generate float64x*_t
2076
2077arm = vneg.s
2078generate float*_t
2079
2080/// Signed saturating negate
2081name = vqneg
2082a = MIN, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7
2083validate MAX, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7
2084link-arm = vqneg._EXT_
2085link-aarch64 = sqneg._EXT_
2086
2087aarch64 = sqneg
2088generate int64x*_t
2089
2090arm = vqneg.s
2091generate int*_t
2092
3c0e092e
XL
2093/// Signed saturating negate
2094name = vqneg
c620b35d 2095multi_fn = simd_extract!, {vqneg-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
2096a = 1
2097validate -1
2098
2099aarch64 = sqneg
2100generate i8, i16, i32, i64
2101
17df50a5
XL
2102/// Saturating subtract
2103name = vqsub
2104a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2105b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2106validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26
2107
2108arm = vqsub.s
2109aarch64 = uqsub
2110link-arm = llvm.usub.sat._EXT_
2111link-aarch64 = uqsub._EXT_
2112generate uint*_t, uint64x*_t
2113
2114arm = vqsub.s
2115aarch64 = sqsub
2116link-arm = llvm.ssub.sat._EXT_
2117link-aarch64 = sqsub._EXT_
2118generate int*_t, int64x*_t
2119
2120/// Saturating subtract
2121name = vqsub
2122multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
2123multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
c620b35d 2124multi_fn = simd_extract!, {vqsub-in_ntt-noext, a, b}, 0
17df50a5
XL
2125a = 42
2126b = 1
2127validate 41
2128
2129aarch64 = sqsub
2130generate i8, i16
2131aarch64 = uqsub
2132generate u8, u16
2133
2134/// Saturating subtract
2135name = vqsub
2136a = 42
2137b = 1
2138validate 41
2139
2140aarch64 = uqsub
2141link-aarch64 = uqsub._EXT_
2142generate u32, u64
2143
2144aarch64 = sqsub
2145link-aarch64 = sqsub._EXT_
2146generate i32, i64
2147
2148/// Halving add
2149name = vhadd
2150a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2151b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2152validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
2153
2154arm = vhadd.s
2155aarch64 = uhadd
2156link-aarch64 = uhadd._EXT_
2157link-arm = vhaddu._EXT_
2158generate uint*_t
2159
2160arm = vhadd.s
2161aarch64 = shadd
2162link-aarch64 = shadd._EXT_
2163link-arm = vhadds._EXT_
2164generate int*_t
2165
2166/// Reverse bit order
2167name = vrbit
2168a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2169validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2170
2171aarch64 = rbit
2172link-aarch64 = rbit._EXT_
2173
2174generate int8x8_t, int8x16_t
2175
2176/// Reverse bit order
2177name = vrbit
2178multi_fn = transmute, {vrbit-signed-noext, transmute(a)}
2179a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2180validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2181
2182aarch64 = rbit
2183
2184generate uint8x8_t, uint8x16_t, poly8x8_t, poly8x16_t
2185
2186/// Rounding halving add
2187name = vrhadd
2188a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2189b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2190validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29
2191
2192arm = vrhadd.s
2193aarch64 = urhadd
2194link-arm = vrhaddu._EXT_
2195link-aarch64 = urhadd._EXT_
2196generate uint*_t
2197
2198arm = vrhadd.s
2199aarch64 = srhadd
2200link-arm = vrhadds._EXT_
2201link-aarch64 = srhadd._EXT_
2202generate int*_t
2203
2204/// Floating-point round to integral exact, using current rounding mode
2205name = vrndx
2206a = -1.5, 0.5, 1.5, 2.5
2207validate -2.0, 0.0, 2.0, 2.0
2208
2209aarch64 = frintx
2210link-aarch64 = llvm.rint._EXT_
2211generate float*_t, float64x*_t
2212
2213/// Floating-point round to integral, to nearest with ties to away
2214name = vrnda
2215a = -1.5, 0.5, 1.5, 2.5
2216validate -2.0, 1.0, 2.0, 3.0
2217
2218aarch64 = frinta
2219link-aarch64 = llvm.round._EXT_
2220generate float*_t, float64x*_t
2221
2222/// Floating-point round to integral, to nearest with ties to even
2223name = vrndn
2224a = -1.5, 0.5, 1.5, 2.5
2225validate -2.0, 0.0, 2.0, 2.0
2226
2227link-aarch64 = frintn._EXT_
2228aarch64 = frintn
2229generate float64x*_t
2230
2231target = fp-armv8
2232arm = vrintn
2233link-arm = vrintn._EXT_
2234generate float*_t
2235
3c0e092e
XL
2236/// Floating-point round to integral, to nearest with ties to even
2237name = vrndn
2238a = -1.5
2239validate -2.0
2240
2241aarch64 = frintn
2242link-aarch64 = llvm.roundeven._EXT_
2243generate f32
2244
17df50a5
XL
2245/// Floating-point round to integral, toward minus infinity
2246name = vrndm
2247a = -1.5, 0.5, 1.5, 2.5
2248validate -2.0, 0.0, 1.0, 2.0
2249
2250aarch64 = frintm
2251link-aarch64 = llvm.floor._EXT_
2252generate float*_t, float64x*_t
2253
2254/// Floating-point round to integral, toward plus infinity
2255name = vrndp
2256a = -1.5, 0.5, 1.5, 2.5
2257validate -1.0, 1.0, 2.0, 3.0
2258
2259aarch64 = frintp
2260link-aarch64 = llvm.ceil._EXT_
2261generate float*_t, float64x*_t
2262
2263/// Floating-point round to integral, toward zero
2264name = vrnd
2265a = -1.5, 0.5, 1.5, 2.5
2266validate -1.0, 0.0, 1.0, 2.0
2267
2268aarch64 = frintz
2269link-aarch64 = llvm.trunc._EXT_
2270generate float*_t, float64x*_t
2271
2272/// Floating-point round to integral, using current rounding mode
2273name = vrndi
2274a = -1.5, 0.5, 1.5, 2.5
2275validate -2.0, 0.0, 2.0, 2.0
2276
2277aarch64 = frinti
2278link-aarch64 = llvm.nearbyint._EXT_
2279generate float*_t, float64x*_t
2280
2281/// Saturating add
2282name = vqadd
2283a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2284b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2285validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2286
2287arm = vqadd.s
2288aarch64 = uqadd
2289link-arm = llvm.uadd.sat._EXT_
2290link-aarch64 = uqadd._EXT_
2291generate uint*_t, uint64x*_t
2292
2293arm = vqadd.s
2294aarch64 = sqadd
2295link-arm = llvm.sadd.sat._EXT_
2296link-aarch64 = sqadd._EXT_
2297generate int*_t, int64x*_t
2298
2299/// Saturating add
2300name = vqadd
2301multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
2302multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
c620b35d 2303multi_fn = simd_extract!, {vqadd-in_ntt-noext, a, b}, 0
17df50a5
XL
2304a = 42
2305b = 1
2306validate 43
2307
2308aarch64 = sqadd
2309generate i8, i16
2310aarch64 = uqadd
2311generate u8, u16
2312
2313/// Saturating add
2314name = vqadd
2315a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2316b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2317validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2318
2319aarch64 = uqadd
2320link-aarch64 = uqadd._EXT_
2321generate u32, u64
2322
2323aarch64 = sqadd
2324link-aarch64 = sqadd._EXT_
2325generate i32, i64
2326
c295e0f8
XL
2327/// Load multiple single-element structures to one, two, three, or four registers
2328name = vld1
2329out-suffix
2330a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2331validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2332load_fn
2333
2334aarch64 = ld1
2335link-aarch64 = ld1x2._EXT2_
2336arm = vld1
2337link-arm = vld1x2._EXT2_
2338generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
2339generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
2340
2341link-aarch64 = ld1x3._EXT2_
2342link-arm = vld1x3._EXT2_
2343generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
2344generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t, *const i64:int64x2x3_t
2345
2346link-aarch64 = ld1x4._EXT2_
2347link-arm = vld1x4._EXT2_
2348generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
2349generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t, *const i64:int64x2x4_t
2350
2351/// Load multiple single-element structures to one, two, three, or four registers
2352name = vld1
2353out-suffix
2354multi_fn = transmute, {vld1-outsigned-noext, transmute(a)}
2355a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2356validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2357
2358load_fn
2359aarch64 = ld1
2360arm = vld1
2361generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
2362generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
2363generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
2364generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t, *const u64:uint64x2x3_t
2365generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
2366generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t, *const u64:uint64x2x4_t
2367generate *const p8:poly8x8x2_t, *const p8:poly8x8x3_t, *const p8:poly8x8x4_t
2368generate *const p8:poly8x16x2_t, *const p8:poly8x16x3_t, *const p8:poly8x16x4_t
2369generate *const p16:poly16x4x2_t, *const p16:poly16x4x3_t, *const p16:poly16x4x4_t
2370generate *const p16:poly16x8x2_t, *const p16:poly16x8x3_t, *const p16:poly16x8x4_t
2371target = aes
2372generate *const p64:poly64x1x2_t
3c0e092e 2373arm = nop
c295e0f8
XL
2374generate *const p64:poly64x1x3_t, *const p64:poly64x1x4_t
2375generate *const p64:poly64x2x2_t, *const p64:poly64x2x3_t, *const p64:poly64x2x4_t
c295e0f8
XL
2376/// Load multiple single-element structures to one, two, three, or four registers
2377name = vld1
2378out-suffix
2379a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
2380validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
2381load_fn
2382
2383aarch64 = ld1
2384link-aarch64 = ld1x2._EXT2_
2385generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
2386
2387link-aarch64 = ld1x3._EXT2_
2388generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
2389
2390link-aarch64 = ld1x4._EXT2_
2391generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
2392
2393arm = vld1
2394link-aarch64 = ld1x2._EXT2_
2395link-arm = vld1x2._EXT2_
2396generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
2397
2398link-aarch64 = ld1x3._EXT2_
2399link-arm = vld1x3._EXT2_
2400generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2401
2402link-aarch64 = ld1x4._EXT2_
2403link-arm = vld1x4._EXT2_
2404generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2405
2406/// Load multiple 2-element structures to two registers
2407name = vld2
2408out-nox
2409a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2410validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2411load_fn
3c0e092e 2412arm-aarch64-separate
c295e0f8
XL
2413
2414aarch64 = ld2
2415link-aarch64 = ld2._EXTv2_
3c0e092e
XL
2416generate *const i64:int64x2x2_t
2417
c295e0f8
XL
2418arm = vld2
2419link-arm = vld2._EXTpi82_
3c0e092e
XL
2420generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
2421generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
2422arm = nop
2423aarch64 = nop
2424generate *const i64:int64x1x2_t
c295e0f8
XL
2425
2426/// Load multiple 2-element structures to two registers
2427name = vld2
2428out-nox
2429multi_fn = transmute, {vld2-outsignednox-noext, transmute(a)}
2430a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2431validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2432load_fn
2433
2434aarch64 = ld2
3c0e092e
XL
2435generate *const u64:uint64x2x2_t
2436target = aes
2437generate *const p64:poly64x2x2_t
2438
2439target = default
c295e0f8 2440arm = vld2
3c0e092e
XL
2441generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
2442generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
2443generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
2444arm = nop
2445aarch64 = nop
2446generate *const u64:uint64x1x2_t
2447target = aes
2448generate *const p64:poly64x1x2_t
2449
c295e0f8
XL
2450
2451/// Load multiple 2-element structures to two registers
2452name = vld2
2453out-nox
2454a = 0., 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
2455validate 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
2456load_fn
3c0e092e 2457arm-aarch64-separate
c295e0f8 2458
3c0e092e 2459aarch64 = nop
c295e0f8 2460link-aarch64 = ld2._EXTv2_
3c0e092e
XL
2461generate *const f64:float64x1x2_t
2462aarch64 = ld2
2463generate *const f64:float64x2x2_t
c295e0f8
XL
2464
2465arm = vld2
2466link-arm = vld2._EXTpi82_
3c0e092e 2467generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
c295e0f8
XL
2468
2469/// Load single 2-element structure and replicate to all lanes of two registers
2470name = vld2
2471out-dup-nox
2472a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2473validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2474load_fn
3c0e092e 2475arm-aarch64-separate
c295e0f8 2476
c295e0f8
XL
2477aarch64 = ld2r
2478link-aarch64 = ld2r._EXT2_
3c0e092e
XL
2479generate *const i64:int64x2x2_t
2480
2481arm = vld2
2482link-arm = vld2dup._EXTpi82_
2483generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
2484generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
2485arm = nop
2486generate *const i64:int64x1x2_t
c295e0f8
XL
2487
2488/// Load single 2-element structure and replicate to all lanes of two registers
2489name = vld2
2490out-dup-nox
2491multi_fn = transmute, {vld2-outsigneddupnox-noext, transmute(a)}
2492a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2493validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2494load_fn
2495
c295e0f8 2496aarch64 = ld2r
3c0e092e
XL
2497generate *const u64:uint64x2x2_t
2498target = aes
2499generate *const p64:poly64x2x2_t
2500
2501target = default
2502arm = vld2
2503generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
2504generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
2505generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
2506arm = nop
2507generate *const u64:uint64x1x2_t
2508target = aes
2509generate *const p64:poly64x1x2_t
c295e0f8
XL
2510
2511/// Load single 2-element structure and replicate to all lanes of two registers
2512name = vld2
2513out-dup-nox
2514a = 0., 1., 1., 2., 3., 1., 4., 3., 5.
2515validate 1., 1., 1., 1., 1., 1., 1., 1.
2516load_fn
3c0e092e 2517arm-aarch64-separate
c295e0f8
XL
2518
2519aarch64 = ld2r
2520link-aarch64 = ld2r._EXT2_
3c0e092e 2521generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
c295e0f8 2522
3c0e092e 2523arm = vld2
c295e0f8 2524link-arm = vld2dup._EXTpi82_
3c0e092e 2525generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
c295e0f8
XL
2526
2527/// Load multiple 2-element structures to two registers
2528name = vld2
2529out-lane-nox
2530multi_fn = static_assert_imm-in_exp_len-LANE
2531constn = LANE
2532a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2533b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2534n = 0
2535validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2536load_fn
2537arm-aarch64-separate
2538
3c0e092e 2539aarch64 = ld2
c295e0f8
XL
2540const-aarch64 = LANE
2541link-aarch64 = ld2lane._EXTpi82_
3c0e092e 2542generate *const i8:int8x16x2_t:int8x16x2_t, *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
c295e0f8 2543
3c0e092e 2544arm = vld2
c295e0f8
XL
2545const-arm = LANE
2546link-arm = vld2lane._EXTpi82_
3c0e092e
XL
2547generate *const i8:int8x8x2_t:int8x8x2_t, *const i16:int16x4x2_t:int16x4x2_t, *const i32:int32x2x2_t:int32x2x2_t
2548generate *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
c295e0f8
XL
2549
2550/// Load multiple 2-element structures to two registers
2551name = vld2
2552out-lane-nox
2553multi_fn = static_assert_imm-in_exp_len-LANE
2554multi_fn = transmute, {vld2-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2555constn = LANE
2556a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2557b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2558n = 0
2559validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2560load_fn
c295e0f8 2561
3c0e092e 2562aarch64 = ld2
c295e0f8
XL
2563const-aarch64 = LANE
2564
2565target = aes
3c0e092e 2566generate *const p64:poly64x1x2_t:poly64x1x2_t, *const p64:poly64x2x2_t:poly64x2x2_t
c295e0f8
XL
2567
2568target = default
3c0e092e
XL
2569generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
2570generate *const p8:poly8x16x2_t:poly8x16x2_t
c295e0f8 2571
3c0e092e 2572arm = vld2
c295e0f8 2573const-arm = LANE
3c0e092e
XL
2574generate *const u8:uint8x8x2_t:uint8x8x2_t, *const u16:uint16x4x2_t:uint16x4x2_t, *const u32:uint32x2x2_t:uint32x2x2_t
2575generate *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
2576generate *const p8:poly8x8x2_t:poly8x8x2_t, *const p16:poly16x4x2_t:poly16x4x2_t
2577generate *const p16:poly16x8x2_t:poly16x8x2_t
c295e0f8
XL
2578
2579/// Load multiple 2-element structures to two registers
2580name = vld2
2581out-lane-nox
2582multi_fn = static_assert_imm-in_exp_len-LANE
2583constn = LANE
2584a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
2585b = 0., 2., 2., 14., 2., 16., 17., 18.
2586n = 0
2587validate 1., 2., 2., 14., 2., 16., 17., 18.
2588load_fn
2589arm-aarch64-separate
2590
3c0e092e 2591aarch64 = ld2
c295e0f8
XL
2592const-aarch64 = LANE
2593link-aarch64 = ld2lane._EXTpi82_
3c0e092e 2594generate *const f64:float64x1x2_t:float64x1x2_t, *const f64:float64x2x2_t:float64x2x2_t
c295e0f8 2595
3c0e092e 2596arm = vld2
c295e0f8
XL
2597const-arm = LANE
2598link-arm = vld2lane._EXTpi82_
3c0e092e 2599generate *const f32:float32x2x2_t:float32x2x2_t, *const f32:float32x4x2_t:float32x4x2_t
c295e0f8 2600
3c0e092e
XL
2601/// Load multiple 3-element structures to three registers
2602name = vld3
2603out-nox
2604a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2605validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2606load_fn
c295e0f8
XL
2607arm-aarch64-separate
2608
3c0e092e
XL
2609aarch64 = ld3
2610link-aarch64 = ld3._EXTv2_
2611generate *const i64:int64x2x3_t
c295e0f8 2612
3c0e092e
XL
2613arm = vld3
2614link-arm = vld3._EXTpi82_
2615generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
2616generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
2617arm = nop
2618aarch64 = nop
2619generate *const i64:int64x1x3_t
c295e0f8 2620
3c0e092e
XL
2621/// Load multiple 3-element structures to three registers
2622name = vld3
2623out-nox
2624multi_fn = transmute, {vld3-outsignednox-noext, transmute(a)}
2625a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2626validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2627load_fn
2628
2629aarch64 = ld3
2630generate *const u64:uint64x2x3_t
2631target = aes
2632generate *const p64:poly64x2x3_t
2633
2634target = default
2635arm = vld3
2636generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
2637generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
2638generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
2639arm = nop
2640aarch64 = nop
2641generate *const u64:uint64x1x3_t
2642target = aes
2643generate *const p64:poly64x1x3_t
2644
2645/// Load multiple 3-element structures to three registers
2646name = vld3
2647out-nox
2648a = 0., 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.
2649validate 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.
2650load_fn
2651arm-aarch64-separate
2652
2653aarch64 = nop
2654link-aarch64 = ld3._EXTv2_
2655generate *const f64:float64x1x3_t
2656aarch64 = ld3
2657generate *const f64:float64x2x3_t
2658
2659arm = vld3
2660link-arm = vld3._EXTpi82_
2661generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2662
2663/// Load single 3-element structure and replicate to all lanes of three registers
2664name = vld3
2665out-dup-nox
2666a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2667validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2668load_fn
2669arm-aarch64-separate
2670
2671aarch64 = ld3r
2672link-aarch64 = ld3r._EXT2_
2673generate *const i64:int64x2x3_t
2674
2675arm = vld3
2676link-arm = vld3dup._EXTpi82_
2677generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
2678generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
2679arm = nop
2680generate *const i64:int64x1x3_t
2681
2682/// Load single 3-element structure and replicate to all lanes of three registers
2683name = vld3
2684out-dup-nox
2685multi_fn = transmute, {vld3-outsigneddupnox-noext, transmute(a)}
2686a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2687validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2688load_fn
2689
2690aarch64 = ld3r
2691generate *const u64:uint64x2x3_t
2692target = aes
2693generate *const p64:poly64x2x3_t
2694
2695target = default
2696arm = vld3
2697generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
2698generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
2699generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
2700arm = nop
2701generate *const u64:uint64x1x3_t
2702target = aes
2703generate *const p64:poly64x1x3_t
2704
2705/// Load single 3-element structure and replicate to all lanes of three registers
2706name = vld3
2707out-dup-nox
2708a = 0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.
2709validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
2710load_fn
2711arm-aarch64-separate
2712
2713aarch64 = ld3r
2714link-aarch64 = ld3r._EXT2_
2715generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
2716
2717arm = vld3
2718link-arm = vld3dup._EXTpi82_
2719generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2720
2721/// Load multiple 3-element structures to two registers
2722name = vld3
2723out-lane-nox
2724multi_fn = static_assert_imm-in_exp_len-LANE
2725constn = LANE
2726a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2727b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2728n = 0
2729validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2730load_fn
2731arm-aarch64-separate
2732
2733aarch64 = ld3
2734const-aarch64 = LANE
2735link-aarch64 = ld3lane._EXTpi82_
2736generate *const i8:int8x16x3_t:int8x16x3_t, *const i64:int64x1x3_t:int64x1x3_t, *const i64:int64x2x3_t:int64x2x3_t
2737
2738arm = vld3
2739const-arm = LANE
2740link-arm = vld3lane._EXTpi82_
2741generate *const i8:int8x8x3_t:int8x8x3_t, *const i16:int16x4x3_t:int16x4x3_t, *const i32:int32x2x3_t:int32x2x3_t
2742generate *const i16:int16x8x3_t:int16x8x3_t, *const i32:int32x4x3_t:int32x4x3_t
2743
2744/// Load multiple 3-element structures to three registers
2745name = vld3
2746out-lane-nox
2747multi_fn = static_assert_imm-in_exp_len-LANE
2748multi_fn = transmute, {vld3-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2749constn = LANE
2750a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2751b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2752n = 0
2753validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2754load_fn
2755
2756aarch64 = ld3
2757const-aarch64 = LANE
2758target = aes
2759generate *const p64:poly64x1x3_t:poly64x1x3_t, *const p64:poly64x2x3_t:poly64x2x3_t
2760target = default
2761generate *const p8:poly8x16x3_t:poly8x16x3_t, *const u8:uint8x16x3_t:uint8x16x3_t, *const u64:uint64x1x3_t:uint64x1x3_t, *const u64:uint64x2x3_t:uint64x2x3_t
2762
2763arm = vld3
2764const-arm = LANE
2765generate *const u8:uint8x8x3_t:uint8x8x3_t, *const u16:uint16x4x3_t:uint16x4x3_t, *const u32:uint32x2x3_t:uint32x2x3_t
2766generate *const u16:uint16x8x3_t:uint16x8x3_t, *const u32:uint32x4x3_t:uint32x4x3_t
2767generate *const p8:poly8x8x3_t:poly8x8x3_t, *const p16:poly16x4x3_t:poly16x4x3_t
2768generate *const p16:poly16x8x3_t:poly16x8x3_t
2769
2770/// Load multiple 3-element structures to three registers
2771name = vld3
2772out-lane-nox
2773multi_fn = static_assert_imm-in_exp_len-LANE
2774constn = LANE
2775a = 0., 1., 2., 2., 4., 5., 6., 7., 8., 5., 6., 7., 8.
2776b = 0., 2., 2., 14., 9., 16., 17., 18., 5., 6., 7., 8.
2777n = 0
2778validate 1., 2., 2., 14., 2., 16., 17., 18., 2., 6., 7., 8.
2779load_fn
2780arm-aarch64-separate
2781
2782aarch64 = ld3
2783const-aarch64 = LANE
2784link-aarch64 = ld3lane._EXTpi82_
2785generate *const f64:float64x1x3_t:float64x1x3_t, *const f64:float64x2x3_t:float64x2x3_t
2786
2787arm = vld3
2788const-arm = LANE
2789link-arm = vld3lane._EXTpi82_
2790generate *const f32:float32x2x3_t:float32x2x3_t, *const f32:float32x4x3_t:float32x4x3_t
2791
2792/// Load multiple 4-element structures to four registers
2793name = vld4
2794out-nox
2795a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2796validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2797load_fn
2798arm-aarch64-separate
2799
2800aarch64 = ld4
2801link-aarch64 = ld4._EXTv2_
2802generate *const i64:int64x2x4_t
2803
2804arm = vld4
2805link-arm = vld4._EXTpi82_
2806generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
2807generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
2808aarch64 = nop
2809arm = nop
2810generate *const i64:int64x1x4_t
2811
2812/// Load multiple 4-element structures to four registers
2813name = vld4
2814out-nox
2815multi_fn = transmute, {vld4-outsignednox-noext, transmute(a)}
2816a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2817validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2818load_fn
2819
2820aarch64 = ld4
2821generate *const u64:uint64x2x4_t
2822target = aes
2823generate *const p64:poly64x2x4_t
2824
2825target = default
2826arm = vld4
2827generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
2828generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
2829generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
2830aarch64 = nop
2831arm = nop
2832generate *const u64:uint64x1x4_t
2833target = aes
2834generate *const p64:poly64x1x4_t
2835
2836/// Load multiple 4-element structures to four registers
2837name = vld4
2838out-nox
2839a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 15., 16.
2840validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 15., 6., 8., 8., 16.
2841load_fn
2842arm-aarch64-separate
2843
2844aarch64 = nop
2845link-aarch64 = ld4._EXTv2_
2846generate *const f64:float64x1x4_t
2847aarch64 = ld4
2848generate *const f64:float64x2x4_t
2849
2850arm = vld4
2851link-arm = vld4._EXTpi82_
2852generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2853
2854/// Load single 4-element structure and replicate to all lanes of four registers
2855name = vld4
2856out-dup-nox
2857a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2858validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2859load_fn
2860arm-aarch64-separate
2861
2862aarch64 = ld4r
2863link-aarch64 = ld4r._EXT2_
2864generate *const i64:int64x2x4_t
2865
2866arm = vld4
2867link-arm = vld4dup._EXTpi82_
2868generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
2869generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
2870arm = nop
2871generate *const i64:int64x1x4_t
2872
2873/// Load single 4-element structure and replicate to all lanes of four registers
2874name = vld4
2875out-dup-nox
2876multi_fn = transmute, {vld4-outsigneddupnox-noext, transmute(a)}
2877a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2878validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2879load_fn
2880
2881aarch64 = ld4r
2882generate *const u64:uint64x2x4_t
2883target = aes
2884generate *const p64:poly64x2x4_t
2885
2886target = default
2887arm = vld4
2888generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
2889generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
2890generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
2891arm = nop
2892generate *const u64:uint64x1x4_t
2893target = aes
2894generate *const p64:poly64x1x4_t
2895
2896/// Load single 4-element structure and replicate to all lanes of four registers
2897name = vld4
2898out-dup-nox
2899a = 0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5., 9., 4., 3., 5.
2900validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
2901load_fn
2902arm-aarch64-separate
2903
2904aarch64 = ld4r
2905link-aarch64 = ld4r._EXT2_
2906generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
2907
2908arm = vld4
2909link-arm = vld4dup._EXTpi82_
2910generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2911
2912/// Load multiple 4-element structures to four registers
2913name = vld4
2914out-lane-nox
2915multi_fn = static_assert_imm-in_exp_len-LANE
2916constn = LANE
2917a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2918b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2919n = 0
2920validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2921load_fn
2922arm-aarch64-separate
2923
2924aarch64 = ld4
2925const-aarch64 = LANE
2926link-aarch64 = ld4lane._EXTpi82_
2927generate *const i8:int8x16x4_t:int8x16x4_t, *const i64:int64x1x4_t:int64x1x4_t, *const i64:int64x2x4_t:int64x2x4_t
2928
2929arm = vld4
2930const-arm = LANE
2931link-arm = vld4lane._EXTpi82_
2932generate *const i8:int8x8x4_t:int8x8x4_t, *const i16:int16x4x4_t:int16x4x4_t, *const i32:int32x2x4_t:int32x2x4_t
2933generate *const i16:int16x8x4_t:int16x8x4_t, *const i32:int32x4x4_t:int32x4x4_t
2934
2935/// Load multiple 4-element structures to four registers
2936name = vld4
2937out-lane-nox
2938multi_fn = static_assert_imm-in_exp_len-LANE
2939multi_fn = transmute, {vld4-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2940constn = LANE
2941a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2942b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2943n = 0
2944validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2945load_fn
2946
2947aarch64 = ld4
2948const-aarch64 = LANE
2949target = aes
2950generate *const p64:poly64x1x4_t:poly64x1x4_t, *const p64:poly64x2x4_t:poly64x2x4_t
2951target = default
2952generate *const p8:poly8x16x4_t:poly8x16x4_t, *const u8:uint8x16x4_t:uint8x16x4_t, *const u64:uint64x1x4_t:uint64x1x4_t, *const u64:uint64x2x4_t:uint64x2x4_t
2953
2954arm = vld4
2955const-arm = LANE
2956generate *const u8:uint8x8x4_t:uint8x8x4_t, *const u16:uint16x4x4_t:uint16x4x4_t, *const u32:uint32x2x4_t:uint32x2x4_t
2957generate *const u16:uint16x8x4_t:uint16x8x4_t, *const u32:uint32x4x4_t:uint32x4x4_t
2958generate *const p8:poly8x8x4_t:poly8x8x4_t, *const p16:poly16x4x4_t:poly16x4x4_t
2959generate *const p16:poly16x8x4_t:poly16x8x4_t
2960
2961/// Load multiple 4-element structures to four registers
2962name = vld4
2963out-lane-nox
2964multi_fn = static_assert_imm-in_exp_len-LANE
2965constn = LANE
2966a = 0., 1., 2., 2., 2., 5., 6., 7., 8., 5., 6., 7., 8., 1., 4., 3., 5.
2967b = 0., 2., 2., 2., 2., 16., 2., 18., 5., 6., 7., 8., 1., 4., 3., 5.
2968n = 0
2969validate 1., 2., 2., 2., 2., 16., 2., 18., 2., 6., 7., 8., 2., 4., 3., 5.
2970load_fn
2971arm-aarch64-separate
2972
2973aarch64 = ld4
2974const-aarch64 = LANE
2975link-aarch64 = ld4lane._EXTpi82_
2976generate *const f64:float64x1x4_t:float64x1x4_t, *const f64:float64x2x4_t:float64x2x4_t
2977
2978arm = vld4
2979const-arm = LANE
2980link-arm = vld4lane._EXTpi82_
2981generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float32x4x4_t
2982
2983/// Store multiple single-element structures from one, two, three, or four registers
2984name = vst1
2985in1-lane-nox
2986multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 2987multi_fn = *a, {simd_extract!, b, LANE as u32}
3c0e092e
XL
2988constn = LANE
2989a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2990n = 0
2991validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2992store_fn
2993
2994aarch64 = nop
2995arm = nop
2996generate *mut i8:int8x8_t:void, *mut i16:int16x4_t:void, *mut i32:int32x2_t:void, *mut i64:int64x1_t:void
2997generate *mut i8:int8x16_t:void, *mut i16:int16x8_t:void, *mut i32:int32x4_t:void, *mut i64:int64x2_t:void
2998generate *mut u8:uint8x8_t:void, *mut u16:uint16x4_t:void, *mut u32:uint32x2_t:void, *mut u64:uint64x1_t:void
2999generate *mut u8:uint8x16_t:void, *mut u16:uint16x8_t:void, *mut u32:uint32x4_t:void, *mut u64:uint64x2_t:void
3000generate *mut p8:poly8x8_t:void, *mut p16:poly16x4_t:void, *mut p8:poly8x16_t:void, *mut p16:poly16x8_t:void
3001target = aes
3002generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void
3003
3004/// Store multiple single-element structures from one, two, three, or four registers
3005name = vst1
3006in1-lane-nox
3007multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3008multi_fn = *a, {simd_extract!, b, LANE as u32}
3c0e092e
XL
3009constn = LANE
3010a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
3011n = 0
3012validate 1., 0., 0., 0., 0., 0., 0., 0.
3013store_fn
3014
3015aarch64 = nop
3016generate *mut f64:float64x1_t:void, *mut f64:float64x2_t:void
3017
3018arm = nop
3019generate *mut f32:float32x2_t:void, *mut f32:float32x4_t:void
3020
3021/// Store multiple single-element structures from one, two, three, or four registers
3022name = vst1
3023a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3024validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3025store_fn
3026arm-aarch64-separate
3027
3028aarch64 = st1
3029link-aarch64 = st1x2._EXT3_
3030arm = vst1
3031link-arm = vst1x2._EXTr3_
3032generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void, *mut i64:int64x1x2_t:void
3033generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void, *mut i64:int64x2x2_t:void
3034
3035link-aarch64 = st1x3._EXT3_
3036link-arm = vst1x3._EXTr3_
3037generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void, *mut i64:int64x1x3_t:void
3038generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void, *mut i64:int64x2x3_t:void
3039
3040link-aarch64 = st1x4._EXT3_
3041link-arm = vst1x4._EXTr3_
3042generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void, *mut i64:int64x1x4_t:void
3043generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void, *mut i64:int64x2x4_t:void
c295e0f8
XL
3044
3045/// Store multiple single-element structures to one, two, three, or four registers
3046name = vst1
3047multi_fn = vst1-signed-noext, transmute(a), transmute(b)
3048a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3049validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3050
3051store_fn
3052aarch64 = st1
3053arm = vst1
3054generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void, *mut u64:uint64x1x2_t:void
3055generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void, *mut u64:uint64x2x2_t:void
3056generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void, *mut u64:uint64x1x3_t:void
3057generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void, *mut u64:uint64x2x3_t:void
3058generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void, *mut u64:uint64x1x4_t:void
3059generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void, *mut u64:uint64x2x4_t:void
3060generate *mut p8:poly8x8x2_t:void, *mut p8:poly8x8x3_t:void, *mut p8:poly8x8x4_t:void
3061generate *mut p8:poly8x16x2_t:void, *mut p8:poly8x16x3_t:void, *mut p8:poly8x16x4_t:void
3062generate *mut p16:poly16x4x2_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x4x4_t:void
3063generate *mut p16:poly16x8x2_t:void, *mut p16:poly16x8x3_t:void, *mut p16:poly16x8x4_t:void
3c0e092e
XL
3064target = aes
3065generate *mut p64:poly64x1x2_t:void
3066arm = nop
3067generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x1x4_t:void
3068generate *mut p64:poly64x2x2_t:void, *mut p64:poly64x2x3_t:void, *mut p64:poly64x2x4_t:void
c295e0f8
XL
3069
3070/// Store multiple single-element structures to one, two, three, or four registers
3071name = vst1
3072a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
3073validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
3074store_fn
3075arm-aarch64-separate
3076
3077aarch64 = st1
3078link-aarch64 = st1x2._EXT3_
3079generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
3080
3081link-aarch64 = st1x3._EXT3_
3082generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
3083
3084link-aarch64 = st1x4._EXT3_
3085generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
3086
3087arm = vst1
3088link-aarch64 = st1x2._EXT3_
3089link-arm = vst1x2._EXTr3_
3090generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3091
3092link-aarch64 = st1x3._EXT3_
3093link-arm = vst1x3._EXTr3_
3094generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3095
3096link-aarch64 = st1x4._EXT3_
3097link-arm = vst1x4._EXTr3_
3098generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3099
3c0e092e
XL
3100/// Store multiple 2-element structures from two registers
3101name = vst2
3102in1-nox
3103a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3104validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3105store_fn
3106arm-aarch64-separate
3107
3108aarch64 = st2
3109link-aarch64 = st2._EXTpi8_
3110generate *mut i64:int64x2x2_t:void
3111
3112arm = vst2
3113link-arm = vst2._EXTpi8r_
3114generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
3115generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
3116arm = nop
3117aarch64 = nop
3118generate *mut i64:int64x1x2_t:void
3119
3120/// Store multiple 2-element structures from two registers
3121name = vst2
c620b35d 3122multi_fn = vst2-in1signednox-noext, transmute(a), transmute(b)
3c0e092e
XL
3123in1-nox
3124a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3125validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3126store_fn
3127
3128aarch64 = st2
3129generate *mut u64:uint64x2x2_t:void
3130target = aes
3131generate *mut p64:poly64x2x2_t:void
3132
3133target = default
3134arm = vst2
3135generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
3136generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
3137generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p8:poly8x16x2_t:void, *mut p16:poly16x8x2_t:void
3138arm = nop
3139aarch64 = nop
3140generate *mut u64:uint64x1x2_t:void
3141target = aes
3142generate *mut p64:poly64x1x2_t:void
3143
3144/// Store multiple 2-element structures from two registers
3145name = vst2
3146in1-nox
3147a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3148validate 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
3149store_fn
3150arm-aarch64-separate
3151
3152aarch64 = st1
3153link-aarch64 = st2._EXTpi8_
3154generate *mut f64:float64x1x2_t:void
3155aarch64 = st2
3156generate *mut f64:float64x2x2_t:void
3157
3158arm = vst2
3159link-arm = vst2._EXTpi8r_
3160generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3161
3162/// Store multiple 2-element structures from two registers
3163name = vst2
3164in1-lane-nox
3165constn = LANE
3166multi_fn = static_assert_imm-in_exp_len-LANE
3167a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3168n = 0
3169validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3170store_fn
3171arm-aarch64-separate
3172
3173aarch64 = st2
3174link-aarch64 = st2lane._EXTpi8_
3175const-aarch64 = LANE
3176generate *mut i8:int8x16x2_t:void, *mut i64:int64x1x2_t:void, *mut i64:int64x2x2_t:void
3177
3178arm = vst2
3179link-arm = vst2lane._EXTpi8r_
3180const-arm = LANE
3181generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
3182generate *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
3183
3184/// Store multiple 2-element structures from two registers
3185name = vst2
3186in1-lane-nox
3187constn = LANE
3188multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3189multi_fn = vst2-in1signedlanenox-::<LANE>, transmute(a), transmute(b)
3c0e092e
XL
3190a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3191n = 0
3192validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3193store_fn
3194
3195aarch64 = st2
3196generate *mut u8:uint8x16x2_t:void, *mut u64:uint64x1x2_t:void, *mut u64:uint64x2x2_t:void, *mut p8:poly8x16x2_t:void
3197target = aes
3198generate *mut p64:poly64x1x2_t:void, *mut p64:poly64x2x2_t:void
3199
3200target = default
3201arm = vst2
3202generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
3203generate *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
3204generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p16:poly16x8x2_t:void
3205
3206/// Store multiple 2-element structures from two registers
3207name = vst2
3208in1-lane-nox
3209constn = LANE
3210multi_fn = static_assert_imm-in_exp_len-LANE
3211a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3212n = 0
3213validate 1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3214store_fn
3215arm-aarch64-separate
3216
3217aarch64 = st2
3218link-aarch64 = st2lane._EXTpi8_
3219const-aarch64 = LANE
3220generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
3221
3222arm = vst2
3223link-arm = vst2lane._EXTpi8r_
3224const-arm = LANE
3225generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3226
3227/// Store multiple 3-element structures from three registers
3228name = vst3
3229in1-nox
3230a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3231validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3232store_fn
3233arm-aarch64-separate
3234
3235aarch64 = st3
3236link-aarch64 = st3._EXTpi8_
3237generate *mut i64:int64x2x3_t:void
3238
3239arm = vst3
3240link-arm = vst3._EXTpi8r_
3241generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
3242generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
3243arm = nop
3244aarch64 = nop
3245generate *mut i64:int64x1x3_t:void
3246
3247/// Store multiple 3-element structures from three registers
3248name = vst3
c620b35d 3249multi_fn = vst3-in1signednox-noext, transmute(a), transmute(b)
3c0e092e
XL
3250in1-nox
3251a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3252validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3253store_fn
3254
3255aarch64 = st3
3256generate *mut u64:uint64x2x3_t:void
3257target = aes
3258generate *mut p64:poly64x2x3_t:void
3259
3260target = default
3261arm = vst3
3262generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
3263generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
3264generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p8:poly8x16x3_t:void, *mut p16:poly16x8x3_t:void
3265arm = nop
3266aarch64 = nop
3267generate *mut u64:uint64x1x3_t:void
3268target = aes
3269generate *mut p64:poly64x1x3_t:void
3270
3271/// Store multiple 3-element structures from three registers
3272name = vst3
3273in1-nox
3274a = 0., 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8., 13., 14., 15., 16
3275validate 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8., 2., 13., 13., 4.
3276store_fn
3277arm-aarch64-separate
3278
3279aarch64 = nop
3280link-aarch64 = st3._EXTpi8_
3281generate *mut f64:float64x1x3_t:void
3282aarch64 = st3
3283generate *mut f64:float64x2x3_t:void
3284
3285arm = vst3
3286link-arm = vst3._EXTpi8r_
3287generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3288
3289/// Store multiple 3-element structures from three registers
3290name = vst3
3291in1-lane-nox
3292constn = LANE
3293multi_fn = static_assert_imm-in_exp_len-LANE
3294a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3295n = 0
3296validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3297store_fn
3298arm-aarch64-separate
3299
3300aarch64 = st3
3301link-aarch64 = st3lane._EXTpi8_
3302const-aarch64 = LANE
3303generate *mut i8:int8x16x3_t:void, *mut i64:int64x1x3_t:void, *mut i64:int64x2x3_t:void
3304
3305arm = vst3
3306link-arm = vst3lane._EXTpi8r_
3307const-arm = LANE
3308generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
3309generate *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
3310
3311/// Store multiple 3-element structures from three registers
3312name = vst3
3313in1-lane-nox
3314constn = LANE
3315multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3316multi_fn = vst3-in1signedlanenox-::<LANE>, transmute(a), transmute(b)
3c0e092e
XL
3317a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3318n = 0
3319validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3320store_fn
3321
3322aarch64 = st3
3323generate *mut u8:uint8x16x3_t:void, *mut u64:uint64x1x3_t:void, *mut u64:uint64x2x3_t:void, *mut p8:poly8x16x3_t:void
3324target = aes
3325generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x2x3_t:void
3326
3327target = default
3328arm = vst3
3329generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
3330generate *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
3331generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x8x3_t:void
3332
3333/// Store multiple 3-element structures from three registers
3334name = vst3
3335in1-lane-nox
3336constn = LANE
3337multi_fn = static_assert_imm-in_exp_len-LANE
3338a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3339n = 0
3340validate 1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3341store_fn
3342arm-aarch64-separate
3343
3344aarch64 = st3
3345link-aarch64 = st3lane._EXTpi8_
3346const-aarch64 = LANE
3347generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
3348
3349arm = vst3
3350link-arm = vst3lane._EXTpi8r_
3351const-arm = LANE
3352generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3353
3354/// Store multiple 4-element structures from four registers
3355name = vst4
3356in1-nox
3357a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3358validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3359store_fn
3360arm-aarch64-separate
3361
3362aarch64 = st4
3363link-aarch64 = st4._EXTpi8_
3364generate *mut i64:int64x2x4_t:void
3365
3366arm = vst4
3367link-arm = vst4._EXTpi8r_
3368generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
3369generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
3370arm = nop
3371aarch64 = nop
3372generate *mut i64:int64x1x4_t:void
3373
3374/// Store multiple 4-element structures from four registers
3375name = vst4
c620b35d 3376multi_fn = vst4-in1signednox-noext, transmute(a), transmute(b)
3c0e092e
XL
3377in1-nox
3378a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3379validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3380store_fn
3381
3382aarch64 = st4
3383generate *mut u64:uint64x2x4_t:void
3384target = aes
3385generate *mut p64:poly64x2x4_t:void
3386
3387target = default
3388arm = vst4
3389generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
3390generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
3391generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p8:poly8x16x4_t:void, *mut p16:poly16x8x4_t:void
3392arm = nop
3393aarch64 = nop
3394generate *mut u64:uint64x1x4_t:void
3395target = aes
3396generate *mut p64:poly64x1x4_t:void
3397
3398/// Store multiple 4-element structures from four registers
3399name = vst4
3400in1-nox
3401a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3402validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3403store_fn
3404arm-aarch64-separate
3405
3406aarch64 = nop
3407link-aarch64 = st4._EXTpi8_
3408generate *mut f64:float64x1x4_t:void
3409aarch64 = st4
3410generate *mut f64:float64x2x4_t:void
3411
3412arm = vst4
3413link-arm = vst4._EXTpi8r_
3414generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3415
3416/// Store multiple 4-element structures from four registers
3417name = vst4
3418in1-lane-nox
3419constn = LANE
3420multi_fn = static_assert_imm-in_exp_len-LANE
3421a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3422n = 0
3423validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3424store_fn
3425arm-aarch64-separate
3426
3427aarch64 = st4
3428link-aarch64 = st4lane._EXTpi8_
3429const-aarch64 = LANE
3430generate *mut i8:int8x16x4_t:void, *mut i64:int64x1x4_t:void, *mut i64:int64x2x4_t:void
3431
3432arm = vst4
3433link-arm = vst4lane._EXTpi8r_
3434const-arm = LANE
3435generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
3436generate *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
3437
3438/// Store multiple 4-element structures from four registers
3439name = vst4
3440in1-lane-nox
3441constn = LANE
3442multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3443multi_fn = vst4-in1signedlanenox-::<LANE>, transmute(a), transmute(b)
3c0e092e
XL
3444a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3445n = 0
3446validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3447store_fn
3448
3449aarch64 = st4
3450generate *mut u8:uint8x16x4_t:void, *mut u64:uint64x1x4_t:void, *mut u64:uint64x2x4_t:void, *mut p8:poly8x16x4_t:void
3451target = aes
3452generate *mut p64:poly64x1x4_t:void, *mut p64:poly64x2x4_t:void
3453
3454target = default
3455arm = vst4
3456generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
3457generate *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
3458generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p16:poly16x8x4_t:void
3459
3460/// Store multiple 4-element structures from four registers
3461name = vst4
3462in1-lane-nox
3463constn = LANE
3464multi_fn = static_assert_imm-in_exp_len-LANE
3465a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3466n = 0
3467validate 1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3468store_fn
3469arm-aarch64-separate
3470
3471aarch64 = st4
3472link-aarch64 = st4lane._EXTpi8_
3473const-aarch64 = LANE
3474generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
3475
3476arm = vst4
3477link-arm = vst4lane._EXTpi8r_
3478const-arm = LANE
3479generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3480
781aab86
FG
3481/// Dot product vector form with unsigned and signed integers
3482name = vusdot
3483out-suffix
3484a = 1000, -4200, -1000, 2000
3485b = 100, 205, 110, 195, 120, 185, 130, 175, 140, 165, 150, 155, 160, 145, 170, 135
3486c = 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8
3487aarch64 = usdot
3488arm = vusdot
3489target = i8mm
3490
3491// 1000 + (100, 205, 110, 195) . ( 0, 1, 2, 3)
3492// -4200 + (120, 185, 130, 175) . (-1, -2, -3, -4)
3493// ...
3494validate 2010, -5780, 2370, -1940
3495
3496link-arm = usdot._EXT2_._EXT4_:int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3497link-aarch64 = usdot._EXT2_._EXT4_:int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3498generate int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3499
3500link-arm = usdot._EXT2_._EXT4_:int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3501link-aarch64 = usdot._EXT2_._EXT4_:int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3502generate int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3503
3504/// Dot product index form with unsigned and signed integers
3505name = vusdot
3506out-lane-suffixes
3507constn = LANE
3508aarch64 = usdot
3509arm = vusdot
3510target = i8mm
3511multi_fn = static_assert_imm-in2_dot-LANE
3512multi_fn = transmute, c:merge4_t2, c
3513multi_fn = simd_shuffle!, c:out_signed, c, c, {dup-out_len-LANE as u32}
3514multi_fn = vusdot-out-noext, a, b, {transmute, c}
3515a = 1000, -4200, -1000, 2000
3516b = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3517c = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3518
3519// 1000 + (100, 110, 120, 130) . (4, 3, 2, 1)
3520// -4200 + (140, 150, 160, 170) . (4, 3, 2, 1)
3521// ...
3522n = 0
3523validate 2100, -2700, 900, 4300
3524
3525// 1000 + (100, 110, 120, 130) . (0, -1, -2, -3)
3526// -4200 + (140, 150, 160, 170) . (0, -1, -2, -3)
3527// ...
3528n = 1
3529validate 260, -5180, -2220, 540
3530
3531generate int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3532generate int32x4_t:uint8x16_t:int8x8_t:int32x4_t
3533
3534/// Dot product index form with unsigned and signed integers
3535name = vusdot
3536out-lane-suffixes
3537constn = LANE
3538// Only AArch64 has the laneq forms.
3539aarch64 = usdot
3540target = i8mm
3541multi_fn = static_assert_imm-in2_dot-LANE
3542multi_fn = transmute, c:merge4_t2, c
3543multi_fn = simd_shuffle!, c:out_signed, c, c, {dup-out_len-LANE as u32}
3544multi_fn = vusdot-out-noext, a, b, {transmute, c}
3545a = 1000, -4200, -1000, 2000
3546b = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3547c = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3548
3549// 1000 + (100, 110, 120, 130) . (-4, -5, -6, -7)
3550// -4200 + (140, 150, 160, 170) . (-4, -5, -6, -7)
3551// ...
3552n = 3
3553validate -3420, -10140, -8460, -6980
3554
3555generate int32x2_t:uint8x8_t:int8x16_t:int32x2_t
3556generate int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3557
3c0e092e
XL
3558/// Dot product index form with signed and unsigned integers
3559name = vsudot
3560out-lane-suffixes
3561constn = LANE
781aab86
FG
3562aarch64 = sudot
3563arm = vsudot
3564target = i8mm
3565
3c0e092e 3566multi_fn = static_assert_imm-in2_dot-LANE
781aab86
FG
3567multi_fn = transmute, c:merge4_t2, c
3568multi_fn = simd_shuffle!, c:out_unsigned, c, c, {dup-out_len-LANE as u32}
3569multi_fn = vusdot-out-noext, a, {transmute, c}, b
3570a = -2000, 4200, -1000, 2000
3571b = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3572c = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3573
3574// -2000 + (4, 3, 2, 1) . (100, 110, 120, 130)
3575// 4200 + (0, -1, -2, -3) . (100, 110, 120, 130)
3576// ...
3c0e092e 3577n = 0
781aab86
FG
3578validate -900, 3460, -3580, -2420
3579
3580// -2000 + (4, 3, 2, 1) . (140, 150, 160, 170)
3581// 4200 + (0, -1, -2, -3) . (140, 150, 160, 170)
3582// ...
3583n = 1
3584validate -500, 3220, -4460, -3940
3c0e092e 3585
781aab86
FG
3586generate int32x2_t:int8x8_t:uint8x8_t:int32x2_t
3587generate int32x4_t:int8x16_t:uint8x8_t:int32x4_t
3588
3589/// Dot product index form with signed and unsigned integers
3590name = vsudot
3591out-lane-suffixes
3592constn = LANE
3593// Only AArch64 has the laneq forms.
3c0e092e 3594aarch64 = sudot
781aab86
FG
3595target = i8mm
3596
3597multi_fn = static_assert_imm-in2_dot-LANE
3598multi_fn = transmute, c:merge4_t2, c
3599multi_fn = simd_shuffle!, c:out_unsigned, c, c, {dup-out_len-LANE as u32}
3600multi_fn = vusdot-out-noext, a, {transmute, c}, b
3601a = -2000, 4200, -1000, 2000
3602b = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3603c = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3604
3605// -2000 + (4, 3, 2, 1) . (220, 230, 240, 250)
3606// 4200 + (0, -1, -2, -3) . (220, 230, 240, 250)
3607// ...
3608n = 3
3609validate 300, 2740, -6220, -6980
3610
3611generate int32x2_t:int8x8_t:uint8x16_t:int32x2_t
3612generate int32x4_t:int8x16_t:uint8x16_t:int32x4_t
3c0e092e 3613
17df50a5
XL
3614/// Multiply
3615name = vmul
3616a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3617b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3618validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3619arm = vmul.
3620aarch64 = mul
3621fn = simd_mul
3622generate int*_t, uint*_t
3623
3624/// Polynomial multiply
3625name = vmul
3626a = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3627b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3628validate 1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48
3629
3630aarch64 = pmul
3631link-aarch64 = pmul._EXT_
3632arm = vmul
3633link-arm = vmulp._EXT_
3634generate poly8x8_t, poly8x16_t
3635
3636/// Multiply
3637name = vmul
3638fn = simd_mul
3639a = 1.0, 2.0, 1.0, 2.0
3640b = 2.0, 3.0, 4.0, 5.0
3641validate 2.0, 6.0, 4.0, 10.0
3642
3643aarch64 = fmul
3644generate float64x*_t
3645
3646arm = vmul.
3647generate float*_t
3648
3649/// Vector multiply by scalar
3650name = vmul
3651out-n-suffix
3652multi_fn = simd_mul, a, {vdup-nout-noext, b}
3653a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3654b = 2
3655validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3656
3657arm = vmul
3658aarch64 = mul
3659generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
3660generate uint16x4_t:u16:uint16x4_t, uint16x8_t:u16:uint16x8_t, uint32x2_t:u32:uint32x2_t, uint32x4_t:u32:uint32x4_t
3661
3662/// Vector multiply by scalar
3663name = vmul
3664out-n-suffix
3665multi_fn = simd_mul, a, {vdup-nout-noext, b}
3666a = 1., 2., 3., 4.
3667b = 2.
3668validate 2., 4., 6., 8.
3669
3670aarch64 = fmul
3671generate float64x1_t:f64:float64x1_t, float64x2_t:f64:float64x2_t
3672
3673arm = vmul
3674generate float32x2_t:f32:float32x2_t, float32x4_t:f32:float32x4_t
3675
3676/// Multiply
3677name = vmul
3678lane-suffixes
3679constn = LANE
3680multi_fn = static_assert_imm-in_exp_len-LANE
353b0b11 3681multi_fn = simd_mul, a, {simd_shuffle!, b, b, {dup-out_len-LANE as u32}}
17df50a5
XL
3682a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3683b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3684n = 1
3685validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3686
3687aarch64 = mul
3688arm = vmul
3689generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
3690generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
3691generate uint16x4_t, uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
3692generate uint32x2_t, uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
3693
3694/// Floating-point multiply
3695name = vmul
3696lane-suffixes
3697constn = LANE
3698multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3699multi_fn = simd_mul, a, {transmute--<element_t _>, {simd_extract!, b, LANE as u32}}
17df50a5
XL
3700a = 1., 2., 3., 4.
3701b = 2., 0., 0., 0.
3702n = 0
3703validate 2., 4., 6., 8.
3704
3705aarch64 = fmul
3706generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
3707
3708/// Floating-point multiply
3709name = vmul
3710lane-suffixes
3711constn = LANE
3712multi_fn = static_assert_imm-in_exp_len-LANE
353b0b11 3713multi_fn = simd_mul, a, {simd_shuffle!, b, b, {dup-out_len-LANE as u32}}
17df50a5
XL
3714a = 1., 2., 3., 4.
3715b = 2., 0., 0., 0.
3716n = 0
3717validate 2., 4., 6., 8.
3718
3719aarch64 = fmul
3720generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
3721
3722arm = vmul
3723generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
3724
3725/// Floating-point multiply
3726name = vmuls_lane
3727constn = LANE
3728multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3729multi_fn = simd_extract!, b:f32, b, LANE as u32
17df50a5
XL
3730multi_fn = a * b
3731a = 1.
3732b = 2., 0., 0., 0.
3733n = 0
3734validate 2.
3735aarch64 = fmul
3736generate f32:float32x2_t:f32, f32:float32x4_t:f32
3737
3738/// Floating-point multiply
3739name = vmuld_lane
3740constn = LANE
3741multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3742multi_fn = simd_extract!, b:f64, b, LANE as u32
17df50a5
XL
3743multi_fn = a * b
3744a = 1.
3745b = 2., 0.
3746n = 0
3747validate 2.
3748aarch64 = fmul
3749generate f64:float64x1_t:f64, f64:float64x2_t:f64
3750
3751/// Signed multiply long
3752name = vmull
3753a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3754b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3755validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3756
3757arm = vmull.s
3758aarch64 = smull
3759link-arm = vmulls._EXT_
3760link-aarch64 = smull._EXT_
3761generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
3762
3763/// Signed multiply long
3764name = vmull_high
3765no-q
353b0b11
FG
3766multi_fn = simd_shuffle!, a:half, a, a, {fixed-half-right}
3767multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
17df50a5
XL
3768multi_fn = vmull-noqself-noext, a, b
3769a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3770b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3771fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3772validate 9, 20, 11, 24, 13, 28, 15, 32
3773
3774aarch64 = smull2
3775generate int8x16_t:int8x16_t:int16x8_t, int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
3776
3777/// Unsigned multiply long
3778name = vmull
3779a = 1, 2, 3, 4, 5, 6, 7, 8
3780b = 1, 2, 1, 2, 1, 2, 1, 2
3781validate 1, 4, 3, 8, 5, 12, 7, 16
3782
3783arm = vmull.s
3784aarch64 = umull
3785link-arm = vmullu._EXT_
3786link-aarch64 = umull._EXT_
3787generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
3788
3789/// Unsigned multiply long
3790name = vmull_high
3791no-q
353b0b11
FG
3792multi_fn = simd_shuffle!, a:half, a, a, {fixed-half-right}
3793multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
17df50a5
XL
3794multi_fn = vmull-noqself-noext, a, b
3795a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3796b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3797fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3798validate 9, 20, 11, 24, 13, 28, 15, 32
3799
3800aarch64 = umull2
3801generate uint8x16_t:uint8x16_t:uint16x8_t, uint16x8_t:uint16x8_t:uint32x4_t, uint32x4_t:uint32x4_t:uint64x2_t
3802
3803/// Polynomial multiply long
3804name = vmull
3805a = 1, 2, 3, 4, 5, 6, 7, 8
3806b = 1, 3, 1, 3, 1, 3, 1, 3
3807validate 1, 6, 3, 12, 5, 10, 7, 24
3808
3809arm = vmull.s
3810aarch64 = pmull
3811link-arm = vmullp._EXT_
3812link-aarch64 = pmull._EXT_
3813generate poly8x8_t:poly8x8_t:poly16x8_t
3814
3815/// Polynomial multiply long
3816name = vmull
3817no-q
3818a = 15
3819b = 3
3820validate 17
94222f64 3821target = aes
17df50a5
XL
3822
3823aarch64 = pmull
3824link-aarch64 = pmull64:p64:p64:p64:int8x16_t
3c0e092e 3825// Because of the support status of llvm, vmull_p64 is currently only available on arm
17df50a5
XL
3826// arm = vmull
3827// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
3828generate p64:p64:p128
3829
3830
3831/// Polynomial multiply long
3832name = vmull_high
3833no-q
353b0b11
FG
3834multi_fn = simd_shuffle!, a:half, a, a, {fixed-half-right}
3835multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
17df50a5
XL
3836multi_fn = vmull-noqself-noext, a, b
3837a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3838b = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3839fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3840validate 9, 30, 11, 20, 13, 18, 15, 48
3841
3842aarch64 = pmull
3843generate poly8x16_t:poly8x16_t:poly16x8_t
3844
3845/// Polynomial multiply long
3846name = vmull_high
3847no-q
c620b35d 3848multi_fn = vmull-noqself-noext, {simd_extract!, a, 1}, {simd_extract!, b, 1}
17df50a5
XL
3849a = 1, 15
3850b = 1, 3
3851validate 17
94222f64 3852target = aes
17df50a5
XL
3853
3854aarch64 = pmull
3855generate poly64x2_t:poly64x2_t:p128
3856
3857/// Vector long multiply with scalar
c295e0f8
XL
3858name = vmull_n
3859no-q
17df50a5
XL
3860multi_fn = vmull-in0-noext, a, {vdup-nin0-noext, b}
3861a = 1, 2, 3, 4, 5, 6, 7, 8
3862b = 2
3863validate 2, 4, 6, 8, 10, 12, 14, 16
3864
3865arm = vmull
3866aarch64 = smull
3867generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
3868aarch64 = umull
3869generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t
3870
3871/// Vector long multiply by scalar
3872name = vmull_lane
3873constn = LANE
3874multi_fn = static_assert_imm-in_exp_len-LANE
353b0b11 3875multi_fn = vmull-in0-noext, a, {simd_shuffle!, b, b, {dup-in0_len-LANE as u32}}
17df50a5
XL
3876a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3877b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3878n = 1
3879validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3880
3881arm = vmull
3882aarch64 = smull
3883generate int16x4_t:int16x4_t:int32x4_t, int16x4_t:int16x8_t:int32x4_t
3884generate int32x2_t:int32x2_t:int64x2_t, int32x2_t:int32x4_t:int64x2_t
3885aarch64 = umull
3886generate uint16x4_t:uint16x4_t:uint32x4_t, uint16x4_t:uint16x8_t:uint32x4_t
3887generate uint32x2_t:uint32x2_t:uint64x2_t, uint32x2_t:uint32x4_t:uint64x2_t
3888
3889/// Multiply long
3890name = vmull_high_n
3891no-q
3892multi_fn = vmull_high-noqself-noext, a, {vdup-nin0-noext, b}
3893a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3894b = 2
3895validate 18, 20, 22, 24, 26, 28, 30, 32
3896
3897aarch64 = smull2
3898generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
3899aarch64 = umull2
3900generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t
3901
3902/// Multiply long
3903name = vmull_high_lane
3904constn = LANE
3905multi_fn = static_assert_imm-in_exp_len-LANE
353b0b11 3906multi_fn = vmull_high-noqself-noext, a, {simd_shuffle!, b, b, {dup-in0_len-LANE as u32}}
17df50a5
XL
3907a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3908b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3909n = 1
3910validate 18, 20, 22, 24, 26, 28, 30, 32
3911
3912aarch64 = smull2
3913generate int16x8_t:int16x4_t:int32x4_t, int16x8_t:int16x8_t:int32x4_t
3914generate int32x4_t:int32x2_t:int64x2_t, int32x4_t:int32x4_t:int64x2_t
3915aarch64 = umull2
3916generate uint16x8_t:uint16x4_t:uint32x4_t, uint16x8_t:uint16x8_t:uint32x4_t
3917generate uint32x4_t:uint32x2_t:uint64x2_t, uint32x4_t:uint32x4_t:uint64x2_t
3918
3919/// Floating-point multiply extended
3920name = vmulx
3921a = 1., 2., 3., 4.
3922b = 2., 2., 2., 2.
3923validate 2., 4., 6., 8.
3924
3925aarch64 = fmulx
3926link-aarch64 = fmulx._EXT_
3927generate float*_t, float64x*_t
3928
3929/// Floating-point multiply extended
3930name = vmulx
3931lane-suffixes
3932constn = LANE
3933multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3934multi_fn = vmulx-in0-noext, a, {transmute--<element_t _>, {simd_extract!, b, LANE as u32}}
17df50a5
XL
3935a = 1.
3936b = 2., 0.
3937n = 0
3938validate 2.
3939
3940aarch64 = fmulx
3941generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
3942
3943/// Floating-point multiply extended
3944name = vmulx
3945lane-suffixes
3946constn = LANE
3947multi_fn = static_assert_imm-in_exp_len-LANE
353b0b11 3948multi_fn = vmulx-in0-noext, a, {simd_shuffle!, b, b, {dup-in0_len-LANE as u32}}
17df50a5
XL
3949a = 1., 2., 3., 4.
3950b = 2., 0., 0., 0.
3951n = 0
3952validate 2., 4., 6., 8.
3953
3954aarch64 = fmulx
3955generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
3956generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
3957
3958/// Floating-point multiply extended
3959name = vmulx
3960a = 2.
3961b = 3.
3962validate 6.
3963
3964aarch64 = fmulx
3965link-aarch64 = fmulx._EXT_
3966generate f32, f64
3967
3968/// Floating-point multiply extended
3969name = vmulx
3970lane-suffixes
3971constn = LANE
3972multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 3973multi_fn = vmulx-out-noext, a, {simd_extract!, b, LANE as u32}
17df50a5
XL
3974
3975a = 2.
3976b = 3., 0., 0., 0.
3977n = 0
3978validate 6.
3979
3980aarch64 = fmulx
3981generate f32:float32x2_t:f32, f32:float32x4_t:f32, f64:float64x1_t:f64, f64:float64x2_t:f64
3982
3983/// Floating-point fused Multiply-Add to accumulator(vector)
3984name = vfma
3985multi_fn = vfma-self-_, b, c, a
3986a = 8.0, 18.0, 12.0, 10.0
3987b = 6.0, 4.0, 7.0, 8.0
3988c = 2.0, 3.0, 4.0, 5.0
3989validate 20.0, 30.0, 40.0, 50.0
3990
3991link-aarch64 = llvm.fma._EXT_
3992aarch64 = fmadd
3993generate float64x1_t
3994aarch64 = fmla
3995generate float64x2_t
3996
c295e0f8 3997target = vfp4
17df50a5
XL
3998arm = vfma
3999link-arm = llvm.fma._EXT_
4000generate float*_t
4001
4002/// Floating-point fused Multiply-Add to accumulator(vector)
4003name = vfma
4004n-suffix
c295e0f8 4005multi_fn = vfma-self-noext, a, b, {vdup-nselfvfp4-noext, c}
17df50a5
XL
4006a = 2.0, 3.0, 4.0, 5.0
4007b = 6.0, 4.0, 7.0, 8.0
4008c = 8.0
4009validate 50.0, 35.0, 60.0, 69.0
4010
4011aarch64 = fmadd
4012generate float64x1_t:float64x1_t:f64:float64x1_t
4013aarch64 = fmla
4014generate float64x2_t:float64x2_t:f64:float64x2_t
4015
c295e0f8 4016target = vfp4
17df50a5
XL
4017arm = vfma
4018generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
4019
4020/// Floating-point fused multiply-add to accumulator
4021name = vfma
4022in2-lane-suffixes
4023constn = LANE
4024multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 4025multi_fn = vfma-out-noext, a, b, {vdup-nout-noext, {simd_extract!, c, LANE as u32}}
17df50a5
XL
4026a = 2., 3., 4., 5.
4027b = 6., 4., 7., 8.
4028c = 2., 0., 0., 0.
4029n = 0
4030validate 14., 11., 18., 21.
4031
4032aarch64 = fmla
4033generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4034aarch64 = fmadd
c620b35d 4035generate float64x1_t, float64x1_t:float64x1_t:float64x2_t:float64x1_t
17df50a5 4036aarch64 = fmla
c620b35d 4037generate float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t
17df50a5
XL
4038
4039/// Floating-point fused multiply-add to accumulator
4040name = vfma
4041in2-lane-suffixes
4042constn = LANE
4043multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 4044multi_fn = simd_extract!, c:out_t, c, LANE as u32
17df50a5
XL
4045multi_fn = vfma-in2lane-_, b, c, a
4046a = 2.
4047b = 6.
4048c = 3., 0., 0., 0.
4049n = 0
4050validate 20.
4051
c620b35d 4052aarch64 = fmadd
17df50a5
XL
4053link-aarch64 = llvm.fma._EXT_:f32:f32:f32:f32
4054generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32
4055link-aarch64 = llvm.fma._EXT_:f64:f64:f64:f64
c620b35d 4056generate f64:f64:float64x1_t:f64, f64:f64:float64x2_t:f64
17df50a5
XL
4057
4058/// Floating-point fused multiply-subtract from accumulator
4059name = vfms
4060multi_fn = simd_neg, b:in_t, b
4061multi_fn = vfma-self-noext, a, b, c
4062a = 20.0, 30.0, 40.0, 50.0
4063b = 6.0, 4.0, 7.0, 8.0
4064c = 2.0, 3.0, 4.0, 5.0
4065validate 8.0, 18.0, 12.0, 10.0
4066
4067aarch64 = fmsub
4068generate float64x1_t
4069aarch64 = fmls
4070generate float64x2_t
4071
c295e0f8 4072target = vfp4
17df50a5
XL
4073arm = vfms
4074generate float*_t
4075
4076/// Floating-point fused Multiply-subtract to accumulator(vector)
4077name = vfms
4078n-suffix
c295e0f8 4079multi_fn = vfms-self-noext, a, b, {vdup-nselfvfp4-noext, c}
17df50a5
XL
4080a = 50.0, 35.0, 60.0, 69.0
4081b = 6.0, 4.0, 7.0, 8.0
4082c = 8.0
4083validate 2.0, 3.0, 4.0, 5.0
4084
4085aarch64 = fmsub
4086generate float64x1_t:float64x1_t:f64:float64x1_t
4087aarch64 = fmls
4088generate float64x2_t:float64x2_t:f64:float64x2_t
4089
c295e0f8 4090target = vfp4
17df50a5
XL
4091arm = vfms
4092generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
4093
4094/// Floating-point fused multiply-subtract to accumulator
4095name = vfms
4096in2-lane-suffixes
4097constn = LANE
4098multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 4099multi_fn = vfms-out-noext, a, b, {vdup-nout-noext, {simd_extract!, c, LANE as u32}}
17df50a5
XL
4100a = 14., 11., 18., 21.
4101b = 6., 4., 7., 8.
4102c = 2., 0., 0., 0.
4103n = 0
4104validate 2., 3., 4., 5.
4105
4106aarch64 = fmls
4107generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4108aarch64 = fmsub
c620b35d 4109generate float64x1_t, float64x1_t:float64x1_t:float64x2_t:float64x1_t
17df50a5 4110aarch64 = fmls
c620b35d 4111generate float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t
17df50a5
XL
4112
4113/// Floating-point fused multiply-subtract to accumulator
4114name = vfms
4115in2-lane-suffixes
4116constn = LANE
4117multi_fn = vfma-in2lane-::<LANE>, a, -b, c
4118a = 14.
4119b = 6.
4120c = 2., 0., 0., 0.
4121n = 0
4122validate 2.
4123
17df50a5 4124aarch64 = fmsub
c620b35d 4125generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32, f64:f64:float64x1_t:f64, f64:f64:float64x2_t:f64
17df50a5
XL
4126
4127/// Divide
4128name = vdiv
4129fn = simd_div
4130a = 2.0, 6.0, 4.0, 10.0
4131b = 1.0, 2.0, 1.0, 2.0
4132validate 2.0, 3.0, 4.0, 5.0
4133
4134aarch64 = fdiv
4135generate float*_t, float64x*_t
4136
4137/// Subtract
4138name = vsub
4139a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4140b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4141validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
4142arm = vsub.
4143aarch64 = sub
4144fn = simd_sub
4145generate int*_t, uint*_t, int64x*_t, uint64x*_t
4146
4147/// Subtract
4148name = vsub
4149fn = simd_sub
4150a = 1.0, 4.0, 3.0, 8.0
4151b = 1.0, 2.0, 3.0, 4.0
4152validate 0.0, 2.0, 0.0, 4.0
4153
4154aarch64 = fsub
4155generate float64x*_t
4156
4157arm = vsub.
4158generate float*_t
4159
3c0e092e
XL
4160/// Subtract
4161name = vsub
a2a8927a 4162multi_fn = a.wrapping_sub(b)
3c0e092e
XL
4163a = 3
4164b = 2
4165validate 1
4166
4167aarch64 = nop
4168generate i64, u64
4169
4170/// Add
4171name = vadd
a2a8927a 4172multi_fn = a.wrapping_add(b)
3c0e092e
XL
4173a = 1
4174b = 2
4175validate 3
4176
4177aarch64 = nop
4178generate i64, u64
4179
4180/// Bitwise exclusive OR
4181name = vadd
4182multi_fn = simd_xor, a, b
4183a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4184b = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4185validate 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17
4186
4187aarch64 = nop
4188arm = nop
4189generate poly8x8_t, poly16x4_t, poly8x16_t, poly16x8_t, poly64x1_t, poly64x2_t
4190
4191/// Bitwise exclusive OR
4192name = vaddq
4193no-q
4194multi_fn = a ^ b
4195a = 16
4196b = 1
4197validate 17
4198
4199aarch64 = nop
4200arm = nop
4201generate p128
4202
4203/// Floating-point add across vector
4204name = vaddv
4205a = 1., 2., 0., 0.
4206validate 3.
4207
4208aarch64 = faddp
4209link-aarch64 = faddv._EXT2_._EXT_
4210generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
4211
17df50a5
XL
4212/// Signed Add Long across Vector
4213name = vaddlv
4214a = 1, 2, 3, 4
4215validate 10
4216
4217aarch64 = saddlv
4218link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
4219generate int16x4_t:i32
4220
4221/// Signed Add Long across Vector
4222name = vaddlv
4223a = 1, 2, 3, 4, 5, 6, 7, 8
4224validate 36
4225
4226aarch64 = saddlv
4227link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
4228generate int16x8_t:i32
4229
4230/// Signed Add Long across Vector
4231name = vaddlv
4232a = 1, 2
4233validate 3
4234
4235aarch64 = saddlp
4236link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
4237generate int32x2_t:i64
4238
4239/// Signed Add Long across Vector
4240name = vaddlv
4241a = 1, 2, 3, 4
4242validate 10
4243
4244aarch64 = saddlv
4245link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
4246generate int32x4_t:i64
4247
4248/// Unsigned Add Long across Vector
4249name = vaddlv
4250a = 1, 2, 3, 4
4251validate 10
4252
4253aarch64 = uaddlv
4254link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
4255generate uint16x4_t:u32
4256
4257/// Unsigned Add Long across Vector
4258name = vaddlv
4259a = 1, 2, 3, 4, 5, 6, 7, 8
4260validate 36
4261
4262aarch64 = uaddlv
4263link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
4264generate uint16x8_t:u32
4265
4266/// Unsigned Add Long across Vector
4267name = vaddlv
4268a = 1, 2
4269validate 3
4270
4271aarch64 = uaddlp
4272link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
4273generate uint32x2_t:u64
4274
4275/// Unsigned Add Long across Vector
4276name = vaddlv
4277a = 1, 2, 3, 4
4278validate 10
4279
4280aarch64 = uaddlv
4281link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
4282generate uint32x4_t:u64
4283
4284/// Subtract returning high narrow
4285name = vsubhn
4286no-q
4287multi_fn = fixed, c:in_t
4288multi_fn = simd_cast, {simd_shr, {simd_sub, a, b}, transmute(c)}
4289a = MAX, MIN, 1, 1, MAX, MIN, 1, 1
4290b = 1, 0, 0, 0, 1, 0, 0, 0
4291fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS
4292validate MAX, MIN, 0, 0, MAX, MIN, 0, 0
4293
4294arm = vsubhn
4295aarch64 = subhn
4296generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
4297generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
4298
4299/// Subtract returning high narrow
4300name = vsubhn_high
4301no-q
4302multi_fn = vsubhn-noqself-noext, d:in_t0, b, c
353b0b11 4303multi_fn = simd_shuffle!, a, d, {asc-0-out_len}
17df50a5
XL
4304a = MAX, 0, MAX, 0, MAX, 0, MAX, 0
4305b = MAX, 1, MAX, 1, MAX, 1, MAX, 1
4306c = 1, 0, 1, 0, 1, 0, 1, 0
4307validate MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0
4308
4309arm = vsubhn
4310aarch64 = subhn2
4311generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
4312generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
4313
4314/// Signed halving subtract
4315name = vhsub
4316a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4317b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4318validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
4319
4320arm = vhsub.s
4321aarch64 = uhsub
4322link-arm = vhsubu._EXT_
4323link-aarch64 = uhsub._EXT_
4324generate uint*_t
4325
4326arm = vhsub.s
4327aarch64 = shsub
4328link-arm = vhsubs._EXT_
4329link-aarch64 = shsub._EXT_
4330generate int*_t
4331
4332/// Signed Subtract Wide
4333name = vsubw
4334no-q
4335multi_fn = simd_sub, a, {simd_cast, b}
4336a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4337b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4338validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4339
4340arm = vsubw
4341aarch64 = ssubw
4342generate int16x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int64x2_t
4343
4344/// Unsigned Subtract Wide
4345name = vsubw
4346no-q
4347multi_fn = simd_sub, a, {simd_cast, b}
4348a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4349b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4350validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4351
4352arm = vsubw
4353aarch64 = usubw
4354generate uint16x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint64x2_t
4355
4356/// Signed Subtract Wide
4357name = vsubw_high
4358no-q
353b0b11 4359multi_fn = simd_shuffle!, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
4360multi_fn = simd_sub, a, {simd_cast, c}
4361a = 8, 9, 10, 12, 13, 14, 15, 16
4362b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4363validate 0, 0, 0, 0, 0, 0, 0, 0
4364
4365aarch64 = ssubw
4366generate int16x8_t:int8x16_t:int16x8_t
4367
4368/// Signed Subtract Wide
4369name = vsubw_high
4370no-q
353b0b11 4371multi_fn = simd_shuffle!, c:int16x4_t, b, b, [4, 5, 6, 7]
17df50a5
XL
4372multi_fn = simd_sub, a, {simd_cast, c}
4373a = 8, 9, 10, 11
4374b = 0, 1, 2, 3, 8, 9, 10, 11
4375validate 0, 0, 0, 0
4376
4377aarch64 = ssubw
4378generate int32x4_t:int16x8_t:int32x4_t
4379
4380/// Signed Subtract Wide
4381name = vsubw_high
4382no-q
353b0b11 4383multi_fn = simd_shuffle!, c:int32x2_t, b, b, [2, 3]
17df50a5
XL
4384multi_fn = simd_sub, a, {simd_cast, c}
4385a = 8, 9
4386b = 6, 7, 8, 9
4387validate 0, 0
4388
4389aarch64 = ssubw
4390generate int64x2_t:int32x4_t:int64x2_t
4391
4392/// Unsigned Subtract Wide
4393name = vsubw_high
4394no-q
353b0b11 4395multi_fn = simd_shuffle!, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
4396multi_fn = simd_sub, a, {simd_cast, c}
4397a = 8, 9, 10, 11, 12, 13, 14, 15
4398b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4399validate 0, 0, 0, 0, 0, 0, 0, 0
4400
4401aarch64 = usubw
4402generate uint16x8_t:uint8x16_t:uint16x8_t
4403
4404/// Unsigned Subtract Wide
4405name = vsubw_high
4406no-q
353b0b11 4407multi_fn = simd_shuffle!, c:uint16x4_t, b, b, [4, 5, 6, 7]
17df50a5
XL
4408multi_fn = simd_sub, a, {simd_cast, c}
4409a = 8, 9, 10, 11
4410b = 0, 1, 2, 3, 8, 9, 10, 11
4411validate 0, 0, 0, 0
4412
4413aarch64 = usubw
4414generate uint32x4_t:uint16x8_t:uint32x4_t
4415
4416/// Unsigned Subtract Wide
4417name = vsubw_high
4418no-q
353b0b11 4419multi_fn = simd_shuffle!, c:uint32x2_t, b, b, [2, 3]
17df50a5
XL
4420multi_fn = simd_sub, a, {simd_cast, c}
4421a = 8, 9
4422b = 6, 7, 8, 9
4423validate 0, 0
4424
4425aarch64 = usubw
4426generate uint64x2_t:uint32x4_t:uint64x2_t
4427
4428/// Signed Subtract Long
4429name = vsubl
4430no-q
4431multi_fn = simd_cast, c:out_t, a
4432multi_fn = simd_cast, d:out_t, b
4433multi_fn = simd_sub, c, d
4434
4435a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4436b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4437validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4438
4439arm = vsubl
4440aarch64 = ssubl
4441generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
4442
4443/// Unsigned Subtract Long
4444name = vsubl
4445no-q
4446multi_fn = simd_cast, c:out_t, a
4447multi_fn = simd_cast, d:out_t, b
4448multi_fn = simd_sub, c, d
4449
4450a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4451b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4452validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4453
4454arm = vsubl
4455aarch64 = usubl
4456generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
4457
4458/// Signed Subtract Long
4459name = vsubl_high
4460no-q
353b0b11 4461multi_fn = simd_shuffle!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5 4462multi_fn = simd_cast, d:out_t, c
353b0b11 4463multi_fn = simd_shuffle!, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
4464multi_fn = simd_cast, f:out_t, e
4465multi_fn = simd_sub, d, f
4466
4467a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4468b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4469validate 6, 7, 8, 9, 10, 11, 12, 13
4470
4471aarch64 = ssubl
4472generate int8x16_t:int8x16_t:int16x8_t
4473
4474/// Signed Subtract Long
4475name = vsubl_high
4476no-q
353b0b11 4477multi_fn = simd_shuffle!, c:int16x4_t, a, a, [4, 5, 6, 7]
17df50a5 4478multi_fn = simd_cast, d:out_t, c
353b0b11 4479multi_fn = simd_shuffle!, e:int16x4_t, b, b, [4, 5, 6, 7]
17df50a5
XL
4480multi_fn = simd_cast, f:out_t, e
4481multi_fn = simd_sub, d, f
4482
4483a = 8, 9, 10, 11, 12, 13, 14, 15
4484b = 6, 6, 6, 6, 8, 8, 8, 8
4485validate 4, 5, 6, 7
4486
4487aarch64 = ssubl
4488generate int16x8_t:int16x8_t:int32x4_t
4489
4490/// Signed Subtract Long
4491name = vsubl_high
4492no-q
353b0b11 4493multi_fn = simd_shuffle!, c:int32x2_t, a, a, [2, 3]
17df50a5 4494multi_fn = simd_cast, d:out_t, c
353b0b11 4495multi_fn = simd_shuffle!, e:int32x2_t, b, b, [2, 3]
17df50a5
XL
4496multi_fn = simd_cast, f:out_t, e
4497multi_fn = simd_sub, d, f
4498
4499a = 12, 13, 14, 15
4500b = 6, 6, 8, 8
4501validate 6, 7
4502
4503aarch64 = ssubl
4504generate int32x4_t:int32x4_t:int64x2_t
4505
4506/// Unsigned Subtract Long
4507name = vsubl_high
4508no-q
353b0b11 4509multi_fn = simd_shuffle!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5 4510multi_fn = simd_cast, d:out_t, c
353b0b11 4511multi_fn = simd_shuffle!, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
4512multi_fn = simd_cast, f:out_t, e
4513multi_fn = simd_sub, d, f
4514
4515a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4516b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4517validate 6, 7, 8, 9, 10, 11, 12, 13
4518
4519aarch64 = usubl
4520generate uint8x16_t:uint8x16_t:uint16x8_t
4521
4522/// Unsigned Subtract Long
4523name = vsubl_high
4524no-q
353b0b11 4525multi_fn = simd_shuffle!, c:uint16x4_t, a, a, [4, 5, 6, 7]
17df50a5 4526multi_fn = simd_cast, d:out_t, c
353b0b11 4527multi_fn = simd_shuffle!, e:uint16x4_t, b, b, [4, 5, 6, 7]
17df50a5
XL
4528multi_fn = simd_cast, f:out_t, e
4529multi_fn = simd_sub, d, f
4530
4531a = 8, 9, 10, 11, 12, 13, 14, 15
4532b = 6, 6, 6, 6, 8, 8, 8, 8
4533validate 4, 5, 6, 7
4534
4535aarch64 = usubl
4536generate uint16x8_t:uint16x8_t:uint32x4_t
4537
4538/// Unsigned Subtract Long
4539name = vsubl_high
4540no-q
353b0b11 4541multi_fn = simd_shuffle!, c:uint32x2_t, a, a, [2, 3]
17df50a5 4542multi_fn = simd_cast, d:out_t, c
353b0b11 4543multi_fn = simd_shuffle!, e:uint32x2_t, b, b, [2, 3]
17df50a5
XL
4544multi_fn = simd_cast, f:out_t, e
4545multi_fn = simd_sub, d, f
4546
4547a = 12, 13, 14, 15
4548b = 6, 6, 8, 8
4549validate 6, 7
4550
4551aarch64 = usubl
4552generate uint32x4_t:uint32x4_t:uint64x2_t
4553
3c0e092e
XL
4554/// Bit clear and exclusive OR
4555name = vbcax
4556a = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
4557b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4558c = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4559validate 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14
4560target = sha3
4561
4562aarch64 = bcax
4563link-aarch64 = llvm.aarch64.crypto.bcaxs._EXT_
4564generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
4565link-aarch64 = llvm.aarch64.crypto.bcaxu._EXT_
4566generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
4567
4568/// Floating-point complex add
4569name = vcadd_rot270
4570no-q
4571a = 1., -1., 1., -1.
4572b = -1., 1., -1., 1.
4573validate 2., 0., 2., 0.
4574target = fcma
4575
4576aarch64 = fcadd
4577link-aarch64 = vcadd.rot270._EXT_
4578generate float32x2_t
4579name = vcaddq_rot270
4580generate float32x4_t, float64x2_t
4581
4582/// Floating-point complex add
4583name = vcadd_rot90
4584no-q
4585a = 1., -1., 1., -1.
4586b = -1., 1., -1., 1.
4587validate 0., -2., 0., -2.
4588target = fcma
4589
4590aarch64 = fcadd
4591link-aarch64 = vcadd.rot90._EXT_
4592generate float32x2_t
4593name = vcaddq_rot90
4594generate float32x4_t, float64x2_t
4595
4596/// Floating-point complex multiply accumulate
4597name = vcmla
4598a = 1., -1., 1., -1.
4599b = -1., 1., -1., 1.
4600c = 1., 1., -1., -1.
4601validate 0., -2., 2., 0.
4602target = fcma
4603
4604aarch64 = fcmla
4605link-aarch64 = vcmla.rot0._EXT_
4606generate float32x2_t, float32x4_t, float64x2_t
4607
4608/// Floating-point complex multiply accumulate
4609name = vcmla_rot90
4610rot-suffix
4611a = 1., 1., 1., 1.
4612b = 1., -1., 1., -1.
4613c = 1., 1., 1., 1.
4614validate 2., 0., 2., 0.
4615target = fcma
4616
4617aarch64 = fcmla
4618link-aarch64 = vcmla.rot90._EXT_
4619generate float32x2_t, float32x4_t, float64x2_t
4620
4621/// Floating-point complex multiply accumulate
4622name = vcmla_rot180
4623rot-suffix
4624a = 1., 1., 1., 1.
4625b = 1., -1., 1., -1.
4626c = 1., 1., 1., 1.
4627validate 0., 0., 0., 0.
4628target = fcma
4629
4630aarch64 = fcmla
4631link-aarch64 = vcmla.rot180._EXT_
4632generate float32x2_t, float32x4_t, float64x2_t
4633
4634/// Floating-point complex multiply accumulate
4635name = vcmla_rot270
4636rot-suffix
4637a = 1., 1., 1., 1.
4638b = 1., -1., 1., -1.
4639c = 1., 1., 1., 1.
4640validate 0., 2., 0., 2.
4641target = fcma
4642
4643aarch64 = fcmla
4644link-aarch64 = vcmla.rot270._EXT_
4645generate float32x2_t, float32x4_t, float64x2_t
4646
4647/// Floating-point complex multiply accumulate
4648name = vcmla
4649in2-lane-suffixes
4650constn = LANE
4651multi_fn = static_assert_imm-in2_rot-LANE
353b0b11 4652multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
3c0e092e
XL
4653multi_fn = vcmla-self-noext, a, b, c
4654a = 1., -1., 1., -1.
4655b = -1., 1., -1., 1.
4656c = 1., 1., -1., -1.
4657n = 0
4658validate 0., -2., 0., -2.
4659target = fcma
4660
4661aarch64 = fcmla
4662generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4663generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4664
4665/// Floating-point complex multiply accumulate
4666name = vcmla_rot90
4667rot-lane-suffixes
4668constn = LANE
4669multi_fn = static_assert_imm-in2_rot-LANE
353b0b11 4670multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
3c0e092e
XL
4671multi_fn = vcmla_rot90-rot-noext, a, b, c
4672a = 1., -1., 1., -1.
4673b = -1., 1., -1., 1.
4674c = 1., 1., -1., -1.
4675n = 0
4676validate 0., 0., 0., 0.
4677target = fcma
4678
4679aarch64 = fcmla
4680generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4681generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4682
4683/// Floating-point complex multiply accumulate
4684name = vcmla_rot180
4685rot-lane-suffixes
4686constn = LANE
4687multi_fn = static_assert_imm-in2_rot-LANE
353b0b11 4688multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
3c0e092e
XL
4689multi_fn = vcmla_rot180-rot-noext, a, b, c
4690a = 1., -1., 1., -1.
4691b = -1., 1., -1., 1.
4692c = 1., 1., -1., -1.
4693n = 0
4694validate 2., 0., 2., 0.
4695target = fcma
4696
4697aarch64 = fcmla
4698generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4699generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4700
4701/// Floating-point complex multiply accumulate
4702name = vcmla_rot270
4703rot-lane-suffixes
4704constn = LANE
4705multi_fn = static_assert_imm-in2_rot-LANE
353b0b11 4706multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
3c0e092e
XL
4707multi_fn = vcmla_rot270-rot-noext, a, b, c
4708a = 1., -1., 1., -1.
4709b = -1., 1., -1., 1.
4710c = 1., 1., -1., -1.
4711n = 0
4712validate 2., -2., 2., -2.
4713target = fcma
4714
4715aarch64 = fcmla
4716generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4717generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4718
781aab86 4719/// Dot product arithmetic (vector)
3c0e092e
XL
4720name = vdot
4721out-suffix
4722a = 1, 2, 1, 2
4723b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4724c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4725validate 31, 176, 31, 176
4726target = dotprod
4727
781aab86 4728arm = vsdot
3c0e092e 4729aarch64 = sdot
781aab86 4730link-arm = sdot._EXT_._EXT3_
3c0e092e
XL
4731link-aarch64 = sdot._EXT_._EXT3_
4732generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
4733
781aab86 4734arm = vudot
3c0e092e 4735aarch64 = udot
781aab86 4736link-arm = udot._EXT_._EXT3_
3c0e092e
XL
4737link-aarch64 = udot._EXT_._EXT3_
4738generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
4739
781aab86 4740/// Dot product arithmetic (indexed)
3c0e092e
XL
4741name = vdot
4742out-lane-suffixes
4743constn = LANE
4744multi_fn = static_assert_imm-in2_dot-LANE
781aab86
FG
4745multi_fn = transmute, c:merge4_t2, c
4746multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
4747multi_fn = vdot-out-noext, a, b, {transmute, c}
3c0e092e 4748a = 1, 2, 1, 2
781aab86 4749b = -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
3c0e092e
XL
4750c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4751n = 0
781aab86 4752validate 29, 72, 31, 72
3c0e092e
XL
4753target = dotprod
4754
781aab86 4755// Only AArch64 has the laneq forms.
3c0e092e 4756aarch64 = sdot
781aab86
FG
4757generate int32x2_t:int8x8_t:int8x16_t:int32x2_t
4758generate int32x4_t:int8x16_t:int8x16_t:int32x4_t
4759
4760arm = vsdot
4761generate int32x2_t:int8x8_t:int8x8_t:int32x2_t
4762generate int32x4_t:int8x16_t:int8x8_t:int32x4_t
4763
4764/// Dot product arithmetic (indexed)
4765name = vdot
4766out-lane-suffixes
4767constn = LANE
4768multi_fn = static_assert_imm-in2_dot-LANE
4769multi_fn = transmute, c:merge4_t2, c
4770multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
4771multi_fn = vdot-out-noext, a, b, {transmute, c}
4772a = 1, 2, 1, 2
4773b = 255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4774c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4775n = 0
4776validate 285, 72, 31, 72
4777target = dotprod
3c0e092e 4778
781aab86 4779// Only AArch64 has the laneq forms.
3c0e092e 4780aarch64 = udot
781aab86
FG
4781generate uint32x2_t:uint8x8_t:uint8x16_t:uint32x2_t
4782generate uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
4783
4784arm = vudot
4785generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t
4786generate uint32x4_t:uint8x16_t:uint8x8_t:uint32x4_t
3c0e092e 4787
17df50a5
XL
4788/// Maximum (vector)
4789name = vmax
4790a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4791b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4792validate 16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16
4793
4794arm = vmax
4795aarch64 = smax
4796link-arm = vmaxs._EXT_
4797link-aarch64 = smax._EXT_
4798generate int*_t
4799
4800arm = vmax
4801aarch64 = umax
4802link-arm = vmaxu._EXT_
4803link-aarch64 = umax._EXT_
4804generate uint*_t
4805
4806/// Maximum (vector)
4807name = vmax
4808a = 1.0, -2.0, 3.0, -4.0
4809b = 0.0, 3.0, 2.0, 8.0
4810validate 1.0, 3.0, 3.0, 8.0
4811
4812aarch64 = fmax
4813link-aarch64 = fmax._EXT_
4814generate float64x*_t
4815
4816arm = vmax
4817aarch64 = fmax
4818link-arm = vmaxs._EXT_
4819link-aarch64 = fmax._EXT_
4820generate float*_t
4821
a2a8927a 4822/// Floating-point Maximum Number (vector)
17df50a5
XL
4823name = vmaxnm
4824a = 1.0, 2.0, 3.0, -4.0
4825b = 8.0, 16.0, -1.0, 6.0
4826validate 8.0, 16.0, 3.0, 6.0
4827
4828aarch64 = fmaxnm
4829link-aarch64 = fmaxnm._EXT_
4830generate float64x*_t
4831
4832target = fp-armv8
4833arm = vmaxnm
4834aarch64 = fmaxnm
4835link-arm = vmaxnm._EXT_
4836link-aarch64 = fmaxnm._EXT_
4837generate float*_t
4838
3c0e092e
XL
4839/// Floating-point maximum number across vector
4840name = vmaxnmv
4841a = 1., 2., 0., 1.
4842validate 2.
4843
4844aarch64 = fmaxnmp
4845link-aarch64 = fmaxnmv._EXT2_._EXT_
4846generate float32x2_t:f32, float64x2_t:f64
4847aarch64 = fmaxnmv
4848generate float32x4_t:f32
4849
17df50a5
XL
4850/// Floating-point Maximum Number Pairwise (vector).
4851name = vpmaxnm
4852a = 1.0, 2.0
4853b = 6.0, -3.0
4854validate 2.0, 6.0
4855aarch64 = fmaxnmp
4856link-aarch64 = fmaxnmp._EXT_
4857generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
4858
4859/// Floating-point Maximum Number Pairwise (vector).
4860name = vpmaxnm
4861a = 1.0, 2.0, 3.0, -4.0
4862b = 8.0, 16.0, -1.0, 6.0
4863validate 2.0, 3.0, 16.0, 6.0
4864aarch64 = fmaxnmp
4865link-aarch64 = fmaxnmp._EXT_
4866generate float32x4_t:float32x4_t:float32x4_t
4867
3c0e092e
XL
4868/// Floating-point maximum number pairwise
4869name = vpmaxnm
4870out-suffix
4871a = 1., 2.
4872validate 2.
4873
4874aarch64 = fmaxnmp
4875link-aarch64 = fmaxnmv._EXT2_._EXT_
4876generate float32x2_t:f32
4877name = vpmaxnmq
4878generate float64x2_t:f64
4879
4880/// Floating-point maximum pairwise
4881name = vpmax
4882out-suffix
4883a = 1., 2.
4884validate 2.
4885
4886aarch64 = fmaxp
4887link-aarch64 = fmaxv._EXT2_._EXT_
4888generate float32x2_t:f32
4889name = vpmaxq
4890generate float64x2_t:f64
4891
17df50a5
XL
4892/// Minimum (vector)
4893name = vmin
4894a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4895b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4896validate 1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1
4897
4898arm = vmin
4899aarch64 = smin
4900link-arm = vmins._EXT_
4901link-aarch64 = smin._EXT_
4902generate int*_t
4903
4904arm = vmin
4905aarch64 = umin
4906link-arm = vminu._EXT_
4907link-aarch64 = umin._EXT_
4908generate uint*_t
4909
4910/// Minimum (vector)
4911name = vmin
4912a = 1.0, -2.0, 3.0, -4.0
4913b = 0.0, 3.0, 2.0, 8.0
4914validate 0.0, -2.0, 2.0, -4.0
4915
4916aarch64 = fmin
4917link-aarch64 = fmin._EXT_
4918generate float64x*_t
4919
4920arm = vmin
4921aarch64 = fmin
fc512014
XL
4922link-arm = vmins._EXT_
4923link-aarch64 = fmin._EXT_
4924generate float*_t
17df50a5 4925
a2a8927a 4926/// Floating-point Minimum Number (vector)
17df50a5
XL
4927name = vminnm
4928a = 1.0, 2.0, 3.0, -4.0
4929b = 8.0, 16.0, -1.0, 6.0
4930validate 1.0, 2.0, -1.0, -4.0
4931
4932aarch64 = fminnm
4933link-aarch64 = fminnm._EXT_
4934generate float64x*_t
4935
4936target = fp-armv8
4937arm = vminnm
4938aarch64 = fminnm
4939link-arm = vminnm._EXT_
4940link-aarch64 = fminnm._EXT_
4941generate float*_t
4942
3c0e092e
XL
4943/// Floating-point minimum number across vector
4944name = vminnmv
4945a = 1., 0., 2., 3.
4946validate 0.
4947
4948aarch64 = fminnmp
4949link-aarch64 = fminnmv._EXT2_._EXT_
4950generate float32x2_t:f32, float64x2_t:f64
4951aarch64 = fminnmv
4952generate float32x4_t:f32
4953
4954/// Vector move
4955name = vmovl_high
4956no-q
353b0b11 4957multi_fn = simd_shuffle!, a:half, a, a, {asc-halflen-halflen}
3c0e092e
XL
4958multi_fn = vmovl-noqself-noext, a
4959a = 1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10
4960validate 3, 4, 5, 6, 7, 8, 9, 10
4961
4962aarch64 = sxtl2
4963generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
4964
4965aarch64 = uxtl2
4966generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
4967
4968/// Floating-point add pairwise
4969name = vpadd
4970a = 1., 2., 3., 4.
4971b = 3., 4., 5., 6.
4972validate 3., 7., 7., 11.
4973
4974aarch64 = faddp
4975link-aarch64 = faddp._EXT_
4976generate float32x4_t, float64x2_t
4977
4978arm = vpadd
4979link-arm = vpadd._EXT_
4980generate float32x2_t
4981
4982/// Floating-point add pairwise
4983name = vpadd
4984out-suffix
c620b35d
FG
4985multi_fn = simd_extract!, a1:out_t, a, 0
4986multi_fn = simd_extract!, a2:out_t, a, 1
3c0e092e
XL
4987multi_fn = a1 + a2
4988a = 1., 2.
4989validate 3.
4990
4991aarch64 = nop
4992generate float32x2_t:f32, float64x2_t:f64
4993
17df50a5
XL
4994/// Floating-point Minimum Number Pairwise (vector).
4995name = vpminnm
4996a = 1.0, 2.0
4997b = 6.0, -3.0
4998validate 1.0, -3.0
3c0e092e 4999
17df50a5
XL
5000aarch64 = fminnmp
5001link-aarch64 = fminnmp._EXT_
5002generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
5003
5004/// Floating-point Minimum Number Pairwise (vector).
5005name = vpminnm
5006a = 1.0, 2.0, 3.0, -4.0
5007b = 8.0, 16.0, -1.0, 6.0
5008validate 1.0, -4.0, 8.0, -1.0
5009aarch64 = fminnmp
5010link-aarch64 = fminnmp._EXT_
5011generate float32x4_t:float32x4_t:float32x4_t
5012
3c0e092e
XL
5013/// Floating-point minimum number pairwise
5014name = vpminnm
5015out-suffix
5016a = 1., 2.
5017validate 1.
5018
5019aarch64 = fminnmp
5020link-aarch64 = fminnmv._EXT2_._EXT_
5021generate float32x2_t:f32
5022name = vpminnmq
5023generate float64x2_t:f64
5024
5025/// Floating-point minimum pairwise
5026name = vpmin
5027out-suffix
5028a = 1., 2.
5029validate 1.
5030
5031aarch64 = fminp
5032link-aarch64 = fminv._EXT2_._EXT_
5033generate float32x2_t:f32
5034name = vpminq
5035generate float64x2_t:f64
5036
17df50a5
XL
5037/// Signed saturating doubling multiply long
5038name = vqdmull
5039a = 0, 1, 2, 3, 4, 5, 6, 7
5040b = 1, 2, 3, 4, 5, 6, 7, 8
5041validate 0, 4, 12, 24, 40, 60, 84, 108
5042
5043aarch64 = sqdmull
5044link-aarch64 = sqdmull._EXT2_
5045arm = vqdmull
5046link-arm = vqdmull._EXT2_
5047generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
5048
5049/// Signed saturating doubling multiply long
5050name = vqdmull
5051multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5052multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
c620b35d 5053multi_fn = simd_extract!, {vqdmull-in_ntt-noext, a, b}, 0
17df50a5
XL
5054a = 2
5055b = 3
5056validate 12
5057
5058aarch64 = sqdmull
5059generate i16:i16:i32
5060
5061/// Signed saturating doubling multiply long
5062name = vqdmull
5063a = 2
5064b = 3
5065validate 12
5066
5067aarch64 = sqdmull
5068link-aarch64 = sqdmulls.scalar
5069generate i32:i32:i64
5070
5071/// Vector saturating doubling long multiply with scalar
5072name = vqdmull_n
5073no-q
5074multi_fn = vqdmull-in_ntt-noext, a, {vdup_n-in_ntt-noext, b}
5075a = 2, 4, 6, 8
5076b = 2
5077validate 8, 16, 24, 32
5078
5079aarch64 = sqdmull
5080arm = vqdmull
5081generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
5082
5083/// Signed saturating doubling multiply long
5084name = vqdmull_high
5085no-q
353b0b11
FG
5086multi_fn = simd_shuffle!, a:half, a, a, {asc-halflen-halflen}
5087multi_fn = simd_shuffle!, b:half, b, b, {asc-halflen-halflen}
17df50a5
XL
5088multi_fn = vqdmull-noqself-noext, a, b
5089a = 0, 1, 4, 5, 4, 5, 6, 7
5090b = 1, 2, 5, 6, 5, 6, 7, 8
5091validate 40, 60, 84, 112
5092
5093aarch64 = sqdmull2
5094generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
5095
5096/// Signed saturating doubling multiply long
5097name = vqdmull_high_n
5098no-q
353b0b11 5099multi_fn = simd_shuffle!, a:in_ntt, a, a, {asc-out_len-out_len}
17df50a5
XL
5100multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5101multi_fn = vqdmull-in_ntt-noext, a, b
5102a = 0, 2, 8, 10, 8, 10, 12, 14
5103b = 2
5104validate 32, 40, 48, 56
5105
5106aarch64 = sqdmull2
5107generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
5108
5109/// Vector saturating doubling long multiply by scalar
5110name = vqdmull_lane
5111constn = N
5112multi_fn = static_assert_imm-in_exp_len-N
353b0b11 5113multi_fn = simd_shuffle!, b:in_t0, b, b, {dup-out_len-N as u32}
17df50a5
XL
5114multi_fn = vqdmull-noqself-noext, a, b
5115a = 1, 2, 3, 4
5116b = 0, 2, 2, 0, 2, 0, 0, 0
5117n = HFLEN
5118validate 4, 8, 12, 16
5119
5120aarch64 = sqdmull
5121generate int16x4_t:int16x8_t:int32x4_t, int32x2_t:int32x4_t:int64x2_t
5122
5123arm = vqdmull
5124generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
5125
5126/// Signed saturating doubling multiply long
5127name = vqdmullh_lane
5128constn = N
5129multi_fn = static_assert_imm-in_exp_len-N
c620b35d 5130multi_fn = simd_extract!, b:in_t0, b, N as u32
17df50a5
XL
5131multi_fn = vqdmullh-noqself-noext, a, b
5132a = 2
5133b = 0, 2, 2, 0, 2, 0, 0, 0
5134n = HFLEN
5135validate 8
5136
5137aarch64 = sqdmull
5138generate i16:int16x4_t:i32, i16:int16x8_t:i32
5139
5140/// Signed saturating doubling multiply long
5141name = vqdmulls_lane
5142constn = N
5143multi_fn = static_assert_imm-in_exp_len-N
c620b35d 5144multi_fn = simd_extract!, b:in_t0, b, N as u32
17df50a5
XL
5145multi_fn = vqdmulls-noqself-noext, a, b
5146a = 2
5147b = 0, 2, 2, 0, 2, 0, 0, 0
5148n = HFLEN
5149validate 8
5150
5151aarch64 = sqdmull
5152generate i32:int32x2_t:i64, i32:int32x4_t:i64
5153
5154/// Signed saturating doubling multiply long
5155name = vqdmull_high_lane
5156constn = N
5157multi_fn = static_assert_imm-in_exp_len-N
353b0b11
FG
5158multi_fn = simd_shuffle!, a:in_t, a, a, {asc-out_len-out_len}
5159multi_fn = simd_shuffle!, b:in_t, b, b, {dup-out_len-N as u32}
17df50a5
XL
5160multi_fn = vqdmull-self-noext, a, b
5161a = 0, 1, 4, 5, 4, 5, 6, 7
5162b = 0, 2, 2, 0, 2, 0, 0, 0
5163n = HFLEN
5164validate 16, 20, 24, 28
5165
5166aarch64 = sqdmull2
5167generate int16x8_t:int16x4_t:int32x4_t, int32x4_t:int32x2_t:int64x2_t
5168
5169/// Signed saturating doubling multiply long
5170name = vqdmull_high_lane
5171constn = N
5172multi_fn = static_assert_imm-in_exp_len-N
353b0b11
FG
5173multi_fn = simd_shuffle!, a:half, a, a, {asc-out_len-out_len}
5174multi_fn = simd_shuffle!, b:half, b, b, {dup-out_len-N as u32}
17df50a5
XL
5175multi_fn = vqdmull-noqself-noext, a, b
5176a = 0, 1, 4, 5, 4, 5, 6, 7
5177b = 0, 2, 2, 0, 2, 0, 0, 0
5178n = HFLEN
5179validate 16, 20, 24, 28
5180
5181aarch64 = sqdmull2
5182generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
5183
5184/// Signed saturating doubling multiply-add long
5185name = vqdmlal
5186multi_fn = vqadd-out-noext, a, {vqdmull-self-noext, b, c}
5187a = 1, 1, 1, 1
5188b = 1, 2, 3, 4
5189c = 2, 2, 2, 2
5190validate 5, 9, 13, 17
5191
5192aarch64 = sqdmlal
5193arm = vqdmlal
5194generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5195
5196/// Vector widening saturating doubling multiply accumulate with scalar
5197name = vqdmlal
5198n-suffix
5199multi_fn = vqadd-out-noext, a, {vqdmull_n-self-noext, b, c}
5200a = 1, 1, 1, 1
5201b = 1, 2, 3, 4
5202c = 2
5203validate 5, 9, 13, 17
5204
5205aarch64 = sqdmlal
5206arm = vqdmlal
5207generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
5208
5209/// Signed saturating doubling multiply-add long
5210name = vqdmlal_high
5211no-q
5212multi_fn = vqadd-out-noext, a, {vqdmull_high-noqself-noext, b, c}
5213a = 1, 2, 3, 4
5214b = 0, 1, 4, 5, 4, 5, 6, 7
5215c = 1, 2, 5, 6, 5, 6, 7, 8
5216validate 41, 62, 87, 116
5217
5218aarch64 = sqdmlal2
5219generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5220
5221/// Signed saturating doubling multiply-add long
5222name = vqdmlal_high_n
5223no-q
5224multi_fn = vqadd-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
5225a = 1, 2, 3, 4
5226b = 0, 2, 8, 10, 8, 10, 12, 14
5227c = 2
5228validate 33, 42, 51, 60
5229
5230aarch64 = sqdmlal2
5231generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
5232
5233/// Vector widening saturating doubling multiply accumulate with scalar
5234name = vqdmlal_lane
5235in2-suffix
5236constn = N
5237multi_fn = static_assert_imm-in2_exp_len-N
5238multi_fn = vqadd-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
5239a = 1, 2, 3, 4
5240b = 1, 2, 3, 4
5241c = 0, 2, 2, 0, 2, 0, 0, 0
5242n = HFLEN
5243validate 5, 10, 15, 20
5244
5245aarch64 = sqdmlal
5246generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
5247
5248arm = vqdmlal
5249generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5250
5251/// Signed saturating doubling multiply-add long
5252name = vqdmlal_high_lane
5253in2-suffix
5254constn = N
5255multi_fn = static_assert_imm-in2_exp_len-N
5256multi_fn = vqadd-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
5257a = 1, 2, 3, 4
5258b = 0, 1, 4, 5, 4, 5, 6, 7
5259c = 0, 2, 0, 0, 0, 0, 0, 0
5260n = 1
5261validate 17, 22, 27, 32
5262
5263aarch64 = sqdmlal2
5264generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5265
3c0e092e
XL
5266/// Signed saturating doubling multiply-add long
5267name = vqdmlal
5268multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
c620b35d 5269multi_fn = vqadd-out-noext, a, {simd_extract!, x, 0}
3c0e092e
XL
5270a = 1
5271b = 1
5272c = 2
5273validate 5
5274
49aad941
FG
5275aarch64 = sqdmlal
5276generate i32:i16:i16:i32
5277
5278/// Signed saturating doubling multiply-add long
5279name = vqdmlal
5280multi_fn = vqadd-out-noext, x:out_t, a, {vqdmulls-in_ntt-noext, b, c}
5281multi_fn = x as out_t
5282a = 1
5283b = 1
5284c = 2
5285validate 5
5286
5287aarch64 = sqdmlal
5288generate i64:i32:i32:i64
3c0e092e
XL
5289
5290/// Signed saturating doubling multiply-add long
5291name = vqdmlalh_lane
5292in2-suffix
5293constn = LANE
5294multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 5295multi_fn = vqdmlal-self-noext, a, b, {simd_extract!, c, LANE as u32}
3c0e092e
XL
5296a = 1
5297b = 1
5298c = 2, 1, 1, 1, 1, 1, 1, 1
5299n = 0
5300validate 5
5301
5302aarch64 = sqdmlal
5303generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
5304name = vqdmlals_lane
49aad941 5305aarch64 = sqdmlal
3c0e092e
XL
5306generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
5307
17df50a5
XL
5308/// Signed saturating doubling multiply-subtract long
5309name = vqdmlsl
5310multi_fn = vqsub-out-noext, a, {vqdmull-self-noext, b, c}
5311a = 3, 7, 11, 15
5312b = 1, 2, 3, 4
5313c = 2, 2, 2, 2
5314validate -1, -1, -1, -1
5315
5316aarch64 = sqdmlsl
5317arm = vqdmlsl
5318generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5319
5320/// Vector widening saturating doubling multiply subtract with scalar
5321name = vqdmlsl
5322n-suffix
5323multi_fn = vqsub-out-noext, a, {vqdmull_n-self-noext, b, c}
5324a = 3, 7, 11, 15
5325b = 1, 2, 3, 4
5326c = 2
5327validate -1, -1, -1, -1
5328
5329aarch64 = sqdmlsl
5330arm = vqdmlsl
5331generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
5332
5333/// Signed saturating doubling multiply-subtract long
5334name = vqdmlsl_high
5335no-q
5336multi_fn = vqsub-out-noext, a, {vqdmull_high-noqself-noext, b, c}
5337a = 39, 58, 81, 108
5338b = 0, 1, 4, 5, 4, 5, 6, 7
5339c = 1, 2, 5, 6, 5, 6, 7, 8
5340validate -1, -2, -3, -4
5341
5342aarch64 = sqdmlsl2
5343generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5344
5345/// Signed saturating doubling multiply-subtract long
5346name = vqdmlsl_high_n
5347no-q
5348multi_fn = vqsub-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
5349a = 31, 38, 45, 52
5350b = 0, 2, 8, 10, 8, 10, 12, 14
5351c = 2
5352validate -1, -2, -3, -4
5353
5354aarch64 = sqdmlsl2
5355generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
5356
5357/// Vector widening saturating doubling multiply subtract with scalar
5358name = vqdmlsl_lane
5359in2-suffix
5360constn = N
5361multi_fn = static_assert_imm-in2_exp_len-N
5362multi_fn = vqsub-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
5363a = 3, 6, 9, 12
5364b = 1, 2, 3, 4
5365c = 0, 2, 2, 0, 2, 0, 0, 0
5366n = HFLEN
5367validate -1, -2, -3, -4
5368
5369aarch64 = sqdmlsl
5370generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
5371
5372arm = vqdmlsl
5373generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5374
5375/// Signed saturating doubling multiply-subtract long
5376name = vqdmlsl_high_lane
5377in2-suffix
5378constn = N
5379multi_fn = static_assert_imm-in2_exp_len-N
5380multi_fn = vqsub-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
5381a = 15, 18, 21, 24
5382b = 0, 1, 4, 5, 4, 5, 6, 7
5383c = 0, 2, 0, 0, 0, 0, 0, 0
5384n = 1
5385validate -1, -2, -3, -4
5386
5387aarch64 = sqdmlsl2
5388generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5389
3c0e092e
XL
5390/// Signed saturating doubling multiply-subtract long
5391name = vqdmlsl
5392multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
c620b35d 5393multi_fn = vqsub-out-noext, a, {simd_extract!, x, 0}
3c0e092e
XL
5394a = 10
5395b = 1
5396c = 2
5397validate 6
5398
49aad941
FG
5399aarch64 = sqdmlsl
5400generate i32:i16:i16:i32
5401
5402/// Signed saturating doubling multiply-subtract long
5403name = vqdmlsl
5404multi_fn = vqsub-out-noext, x:out_t, a, {vqdmulls-in_ntt-noext, b, c}
5405multi_fn = x as out_t
5406a = 10
5407b = 1
5408c = 2
5409validate 6
5410
5411aarch64 = sqdmlsl
5412generate i64:i32:i32:i64
3c0e092e
XL
5413
5414/// Signed saturating doubling multiply-subtract long
5415name = vqdmlslh_lane
5416in2-suffix
5417constn = LANE
5418multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 5419multi_fn = vqdmlsl-self-noext, a, b, {simd_extract!, c, LANE as u32}
3c0e092e
XL
5420a = 10
5421b = 1
5422c = 2, 1, 1, 1, 1, 1, 1, 1
5423n = 0
5424validate 6
5425
5426aarch64 = sqdmlsl
5427generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
5428name = vqdmlsls_lane
49aad941 5429aarch64 = sqdmlsl
3c0e092e
XL
5430generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
5431
17df50a5
XL
5432/// Signed saturating doubling multiply returning high half
5433name = vqdmulh
5434a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5435b = 2, 2, 2, 2, 2, 2, 2, 2
5436validate 1, 1, 1, 1, 1, 1, 1, 1
5437
5438aarch64 = sqdmulh
5439link-aarch64 = sqdmulh._EXT_
5440arm = vqdmulh
5441link-arm = vqdmulh._EXT_
5442generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5443
5444/// Signed saturating doubling multiply returning high half
5445name = vqdmulh
5446multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5447multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
c620b35d 5448multi_fn = simd_extract!, {vqdmulh-in_ntt-noext, a, b}, 0
17df50a5
XL
5449a = 1
5450b = 2
5451validate 0
5452
5453aarch64 = sqdmulh
5454generate i16, i32
5455
5456/// Vector saturating doubling multiply high with scalar
5457name = vqdmulh_n
5458out-suffix
5459multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5460multi_fn = vqdmulh-out-noext, a, b
5461a = MAX, MAX, MAX, MAX
5462b = 2
5463validate 1, 1, 1, 1
5464
5465aarch64 = sqdmulh
5466arm = vqdmulh
5467generate int16x4_t:i16:int16x4_t, int32x2_t:i32:int32x2_t
5468
5469/// Vector saturating doubling multiply high with scalar
5470name = vqdmulhq_n
c295e0f8 5471no-q
17df50a5
XL
5472multi_fn = vdupq_n-in_ntt-noext, b:out_t, b
5473multi_fn = vqdmulh-out-noext, a, b
5474a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5475b = 2
5476validate 1, 1, 1, 1, 1, 1, 1, 1
5477
5478aarch64 = sqdmulh
5479arm = vqdmulh
5480generate int16x8_t:i16:int16x8_t, int32x4_t:i32:int32x4_t
5481
5482/// Signed saturating doubling multiply returning high half
5483name = vqdmulhh_lane
5484constn = N
5485multi_fn = static_assert_imm-in_exp_len-N
c620b35d 5486multi_fn = simd_extract!, b:in_t0, b, N as u32
17df50a5
XL
5487multi_fn = vqdmulhh-out_ntt-noext, a, b
5488a = 2
5489b = 0, 0, MAX, 0, 0, 0, 0, 0
5490n = 2
5491validate 1
5492
5493aarch64 = sqdmulh
5494generate i16:int16x4_t:i16, i16:int16x8_t:i16
5495
5496/// Signed saturating doubling multiply returning high half
5497name = vqdmulhs_lane
5498constn = N
5499multi_fn = static_assert_imm-in_exp_len-N
c620b35d 5500multi_fn = simd_extract!, b:in_t0, b, N as u32
17df50a5
XL
5501multi_fn = vqdmulhs-out_ntt-noext, a, b
5502a = 2
5503b = 0, MAX, 0, 0
5504n = 1
5505validate 1
5506
5507aarch64 = sqdmulh
5508generate i32:int32x2_t:i32, i32:int32x4_t:i32
5509
3c0e092e
XL
5510/// Vector saturating doubling multiply high by scalar
5511name = vqdmulh
5512lane-suffixes
5513constn = LANE
5514multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 5515multi_fn = vqdmulh-out-noext, a, {vdup-nout-noext, {simd_extract!, b, LANE as u32}}
3c0e092e
XL
5516a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5517b = 2, 1, 1, 1, 1, 1, 1, 1
5518n = 0
5519validate 1, 1, 1, 1, 1, 1, 1, 1
5520
5521aarch64 = sqdmulh
5522generate int16x4_t, int16x8_t:int16x4_t:int16x8_t
5523generate int32x2_t, int32x4_t:int32x2_t:int32x4_t
5524arm = vqdmulh
5525generate int16x8_t, int16x4_t:int16x8_t:int16x4_t
5526generate int32x4_t, int32x2_t:int32x4_t:int32x2_t
5527
17df50a5
XL
5528/// Signed saturating extract narrow
5529name = vqmovn
5530no-q
5531a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5532validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5533
5534aarch64 = sqxtn
5535link-aarch64 = sqxtn._EXT2_
5536arm = vqmovn
5537link-arm = vqmovns._EXT2_
5538generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
5539
5540/// Unsigned saturating extract narrow
5541name = vqmovn
5542no-q
5543a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5544validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5545
5546aarch64 = uqxtn
5547link-aarch64 = uqxtn._EXT2_
5548arm = vqmovn
5549link-arm = vqmovnu._EXT2_
5550generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
5551
5552/// Saturating extract narrow
5553name = vqmovn
c620b35d 5554multi_fn = simd_extract!, {vqmovn-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0
17df50a5
XL
5555a = 1
5556validate 1
5557
5558aarch64 = sqxtn
5559generate i16:i8, i32:i16
5560aarch64 = uqxtn
5561generate u16:u8, u32:u16
5562
5563/// Saturating extract narrow
5564name = vqmovn
5565a = 1
5566validate 1
5567
5568aarch64 = sqxtn
5569link-aarch64 = scalar.sqxtn._EXT2_._EXT_
5570generate i64:i32
5571
5572aarch64 = uqxtn
5573link-aarch64 = scalar.uqxtn._EXT2_._EXT_
5574generate u64:u32
5575
5576/// Signed saturating extract narrow
5577name = vqmovn_high
5578no-q
353b0b11 5579multi_fn = simd_shuffle!, a, {vqmovn-noqself-noext, b}, {asc-0-out_len}
17df50a5
XL
5580a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5581b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5582validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5583
5584aarch64 = sqxtn2
5585generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
5586aarch64 = uqxtn2
5587generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
5588
5589/// Signed saturating extract unsigned narrow
5590name = vqmovun
5591no-q
5592a = -1, -1, -1, -1, -1, -1, -1, -1
5593validate 0, 0, 0, 0, 0, 0, 0, 0
5594
5595aarch64 = sqxtun
5596link-aarch64 = sqxtun._EXT2_
5597arm = vqmovun
5598link-arm = vqmovnsu._EXT2_
5599generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
5600
5601/// Signed saturating extract unsigned narrow
5602name = vqmovun
c620b35d 5603multi_fn = simd_extract!, {vqmovun-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0
17df50a5
XL
5604a = 1
5605validate 1
5606
5607aarch64 = sqxtun
5608generate i16:u8, i32:u16, i64:u32
5609
5610/// Signed saturating extract unsigned narrow
5611name = vqmovun_high
5612no-q
353b0b11 5613multi_fn = simd_shuffle!, a, {vqmovun-noqself-noext, b}, {asc-0-out_len}
17df50a5
XL
5614a = 0, 0, 0, 0, 0, 0, 0, 0
5615b = -1, -1, -1, -1, -1, -1, -1, -1
5616validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5617
5618aarch64 = sqxtun2
5619generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
5620
5621/// Signed saturating rounding doubling multiply returning high half
5622name = vqrdmulh
5623a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5624b = 2, 2, 2, 2, 2, 2, 2, 2
5625validate 2, 2, 2, 2, 2, 2, 2, 2
5626
5627aarch64 = sqrdmulh
5628link-aarch64 = sqrdmulh._EXT_
5629arm = vqrdmulh
5630link-arm = vqrdmulh._EXT_
5631generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5632
5633/// Signed saturating rounding doubling multiply returning high half
5634name = vqrdmulh
c620b35d 5635multi_fn = simd_extract!, {vqrdmulh-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
17df50a5
XL
5636a = 1
5637b = 2
5638validate 0
5639
5640aarch64 = sqrdmulh
5641generate i16, i32
5642
5643/// Vector saturating rounding doubling multiply high with scalar
5644name = vqrdmulh
5645out-n-suffix
5646multi_fn = vqrdmulh-out-noext, a, {vdup-nout-noext, b}
5647a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5648b = 2
5649validate 2, 2, 2, 2, 2, 2, 2, 2
5650
5651aarch64 = sqrdmulh
5652arm = vqrdmulh
5653generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
5654
5655/// Vector rounding saturating doubling multiply high by scalar
5656name = vqrdmulh
5657lane-suffixes
5658constn = LANE
5659multi_fn = static_assert_imm-in_exp_len-LANE
353b0b11 5660multi_fn = simd_shuffle!, b:out_t, b, b, {dup-out_len-LANE as u32}
17df50a5
XL
5661multi_fn = vqrdmulh-out-noext, a, b
5662a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5663b = 0, 2, 0, 0, 0, 0, 0, 0,
5664n = 1
5665validate 2, 2, 2, 2, 2, 2, 2, 2
5666
5667aarch64 = sqrdmulh
5668arm = vqrdmulh
5669generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
5670generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
5671
5672/// Signed saturating rounding doubling multiply returning high half
5673name = vqrdmulh
5674lane-suffixes
5675constn = LANE
5676multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 5677multi_fn = vqrdmulh-out-noext, a, {simd_extract!, b, LANE as u32}
17df50a5
XL
5678a = 1
5679b = 0, 2, 0, 0, 0, 0, 0, 0,
5680n = 1
5681validate 0
5682
5683aarch64 = sqrdmulh
5684generate i16:int16x4_t:i16, i16:int16x8_t:i16, i32:int32x2_t:i32, i32:int32x4_t:i32
5685
5686/// Signed saturating rounding doubling multiply accumulate returning high half
5687name = vqrdmlah
17df50a5
XL
5688a = 1, 1, 1, 1, 1, 1, 1, 1
5689b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5690c = 2, 2, 2, 2, 2, 2, 2, 2
5691validate 3, 3, 3, 3, 3, 3, 3, 3
5692
3c0e092e 5693aarch64 = sqrdmlah
5e7ed085 5694link-aarch64 = sqrdmlah._EXT_
3c0e092e 5695target = rdm
17df50a5
XL
5696generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5697
5698/// Signed saturating rounding doubling multiply accumulate returning high half
5699name = vqrdmlah
3c0e092e
XL
5700multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5701multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5702multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c
c620b35d 5703multi_fn = simd_extract!, {vqrdmlah-in_ntt-noext, a, b, c}, 0
17df50a5
XL
5704a = 1
5705b = 1
5706c = 2
5707validate 1
5708
3c0e092e
XL
5709aarch64 = sqrdmlah
5710target = rdm
17df50a5
XL
5711generate i16, i32
5712
5713/// Signed saturating rounding doubling multiply accumulate returning high half
5714name = vqrdmlah
5715in2-lane-suffixes
5716constn = LANE
5717multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 5718multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
5e7ed085 5719multi_fn = vqrdmlah-out-noext, a, b, c
17df50a5
XL
5720a = 1, 1, 1, 1, 1, 1, 1, 1
5721b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5722c = 0, 2, 0, 0, 0, 0, 0, 0
5723n = 1
5724validate 3, 3, 3, 3, 3, 3, 3, 3
5725
3c0e092e
XL
5726aarch64 = sqrdmlah
5727target = rdm
17df50a5
XL
5728generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
5729generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
5730
5731/// Signed saturating rounding doubling multiply accumulate returning high half
5732name = vqrdmlah
5733in2-lane-suffixes
5734constn = LANE
5735multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 5736multi_fn = vqrdmlah-self-noext, a, b, {simd_extract!, c, LANE as u32}
17df50a5
XL
5737a = 1
5738b = 1
5739c = 0, 2, 0, 0, 0, 0, 0, 0
5740n = 1
5741validate 1
5742
3c0e092e
XL
5743aarch64 = sqrdmlah
5744target = rdm
17df50a5
XL
5745generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32
5746
5747/// Signed saturating rounding doubling multiply subtract returning high half
5748name = vqrdmlsh
04454e1e 5749link-aarch64 = sqrdmlsh._EXT_
17df50a5
XL
5750a = 1, 1, 1, 1, 1, 1, 1, 1
5751b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5752c = 2, 2, 2, 2, 2, 2, 2, 2
5753validate -1, -1, -1, -1, -1, -1, -1, -1
5754
04454e1e
FG
5755aarch64 = sqrdmlsh
5756target = rdm
17df50a5
XL
5757generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5758
5759/// Signed saturating rounding doubling multiply subtract returning high half
5760name = vqrdmlsh
04454e1e
FG
5761multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5762multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5763multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c
c620b35d 5764multi_fn = simd_extract!, {vqrdmlsh-in_ntt-noext, a, b, c}, 0
17df50a5
XL
5765a = 1
5766b = 1
5767c = 2
5768validate 1
5769
04454e1e
FG
5770aarch64 = sqrdmlsh
5771target = rdm
17df50a5
XL
5772generate i16, i32
5773
5774/// Signed saturating rounding doubling multiply subtract returning high half
5775name = vqrdmlsh
5776in2-lane-suffixes
5777constn = LANE
5778multi_fn = static_assert_imm-in2_exp_len-LANE
353b0b11 5779multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
04454e1e 5780multi_fn = vqrdmlsh-out-noext, a, b, c
17df50a5
XL
5781a = 1, 1, 1, 1, 1, 1, 1, 1
5782b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5783c = 0, 2, 0, 0, 0, 0, 0, 0
5784n = 1
5785validate -1, -1, -1, -1, -1, -1, -1, -1
5786
04454e1e
FG
5787aarch64 = sqrdmlsh
5788target = rdm
17df50a5
XL
5789generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
5790generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
5791
5792/// Signed saturating rounding doubling multiply subtract returning high half
5793name = vqrdmlsh
5794in2-lane-suffixes
5795constn = LANE
5796multi_fn = static_assert_imm-in2_exp_len-LANE
c620b35d 5797multi_fn = vqrdmlsh-self-noext, a, b, {simd_extract!, c, LANE as u32}
17df50a5
XL
5798a = 1
5799b = 1
5800c = 0, 2, 0, 0, 0, 0, 0, 0
5801n = 1
5802validate 1
5803
04454e1e
FG
5804aarch64 = sqrdmlsh
5805target = rdm
17df50a5
XL
5806generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32
5807
5808/// Signed saturating rounding shift left
5809name = vqrshl
5810a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5811b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5812validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5813
5814aarch64 = sqrshl
5815link-aarch64 = sqrshl._EXT_
5816generate i32, i64
5817
5818arm = vqrshl
5819link-arm = vqrshifts._EXT_
5820generate int*_t, int64x*_t
5821
5822/// Signed saturating rounding shift left
5823name = vqrshl
5824multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5825multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
c620b35d 5826multi_fn = simd_extract!, {vqrshl-in_ntt-noext, a, b}, 0
17df50a5
XL
5827a = 1
5828b = 2
5829validate 4
5830
5831aarch64 = sqrshl
5832generate i8, i16
5833
5834/// Unsigned signed saturating rounding shift left
5835name = vqrshl
5836out-suffix
5837a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5838b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5839validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5840
5841aarch64 = uqrshl
5842link-aarch64 = uqrshl._EXT_
5843generate u32:i32:u32, u64:i64:u64
5844
5845arm = vqrshl
5846link-arm = vqrshiftu._EXT_
5847generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
5848generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
5849
5850/// Unsigned signed saturating rounding shift left
5851name = vqrshl
5852out-suffix
5853multi_fn = vdup_n-out_ntt-noext, a:out_ntt, a
5854multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
c620b35d 5855multi_fn = simd_extract!, {vqrshl-out_ntt-noext, a, b}, 0
17df50a5
XL
5856a = 1
5857b = 2
5858validate 4
5859
5860aarch64 = uqrshl
5861generate u8:i8:u8, u16:i16:u16
5862
5863/// Signed saturating rounded shift right narrow
5864name = vqrshrn
5865noq-n-suffix
5866constn = N
5867multi_fn = static_assert-N-1-halfbits
5868a = MIN, 4, 8, 12, 16, 20, 24, 28
5869n = 2
5870validate MIN, 1, 2, 3, 4, 5, 6, 7
5871
5872aarch64 = sqrshrn
5873link-aarch64 = sqrshrn._EXT2_
5874const-aarch64 = N
5875
5876arm = vqrshrn
5877link-arm = vqrshiftns._EXT2_
5878const-arm = -N as ttn
c295e0f8 5879arm-aarch64-separate
17df50a5
XL
5880generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
5881
5882/// Signed saturating rounded shift right narrow
5883name = vqrshrn
5884noq-n-suffix
5885constn = N
5886multi_fn = static_assert-N-1-halfbits
5887multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
c620b35d 5888multi_fn = simd_extract!, {vqrshrn_n-in_ntt-::<N>, a}, 0
17df50a5
XL
5889a = 4
5890n = 2
5891validate 1
5892
5893aarch64 = sqrshrn
5894generate i16:i8, i32:i16, i64:i32
5895
5896/// Signed saturating rounded shift right narrow
5897name = vqrshrn_high
5898noq-n-suffix
5899constn = N
5900multi_fn = static_assert-N-1-halfbits
353b0b11 5901multi_fn = simd_shuffle!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
5902a = 0, 1, 2, 3, 2, 3, 6, 7
5903b = 8, 12, 24, 28, 48, 52, 56, 60
5904n = 2
5905validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5906
5907aarch64 = sqrshrn2
5908generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
5909
5910/// Unsigned signed saturating rounded shift right narrow
5911name = vqrshrn
5912noq-n-suffix
5913constn = N
5914multi_fn = static_assert-N-1-halfbits
5915a = MIN, 4, 8, 12, 16, 20, 24, 28
5916n = 2
5917validate 0, 1, 2, 3, 4, 5, 6, 7
5918
5919aarch64 = uqrshrn
5920link-aarch64 = uqrshrn._EXT2_
5921const-aarch64 = N
5922
5923arm = vqrshrn
5924link-arm = vqrshiftnu._EXT2_
5925const-arm = -N as ttn
c295e0f8 5926arm-aarch64-separate
17df50a5
XL
5927generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
5928
5929/// Unsigned saturating rounded shift right narrow
5930name = vqrshrn
5931noq-n-suffix
5932constn = N
5933multi_fn = static_assert-N-1-halfbits
5934multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
c620b35d 5935multi_fn = simd_extract!, {vqrshrn_n-in_ntt-::<N>, a}, 0
17df50a5
XL
5936a = 4
5937n = 2
5938validate 1
5939
5940aarch64 = uqrshrn
5941generate u16:u8, u32:u16, u64:u32
5942
5943/// Unsigned saturating rounded shift right narrow
5944name = vqrshrn_high
5945noq-n-suffix
5946constn = N
5947multi_fn = static_assert-N-1-halfbits
353b0b11 5948multi_fn = simd_shuffle!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
5949a = 0, 1, 2, 3, 2, 3, 6, 7
5950b = 8, 12, 24, 28, 48, 52, 56, 60
5951n = 2
5952validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5953
5954aarch64 = uqrshrn2
5955generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
5956
5957/// Signed saturating rounded shift right unsigned narrow
5958name = vqrshrun
5959noq-n-suffix
5960constn = N
5961multi_fn = static_assert-N-1-halfbits
5962a = 0, 4, 8, 12, 16, 20, 24, 28
5963n = 2
5964validate 0, 1, 2, 3, 4, 5, 6, 7
5965
5966aarch64 = sqrshrun
5967link-aarch64 = sqrshrun._EXT2_
5968const-aarch64 = N
5969
5970arm = vqrshrun
5971link-arm = vqrshiftnsu._EXT2_
5972const-arm = -N as ttn
c295e0f8 5973arm-aarch64-separate
17df50a5
XL
5974generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
5975
5976/// Signed saturating rounded shift right unsigned narrow
5977name = vqrshrun
5978noq-n-suffix
5979constn = N
5980multi_fn = static_assert-N-1-halfbits
5981multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
c620b35d 5982multi_fn = simd_extract!, {vqrshrun_n-in_ntt-::<N>, a}, 0
17df50a5
XL
5983a = 4
5984n = 2
5985validate 1
5986
5987aarch64 = sqrshrun
5988generate i16:u8, i32:u16, i64:u32
5989
5990/// Signed saturating rounded shift right unsigned narrow
5991name = vqrshrun_high
5992noq-n-suffix
5993constn = N
5994multi_fn = static_assert-N-1-halfbits
353b0b11 5995multi_fn = simd_shuffle!, a, {vqrshrun_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
5996a = 0, 1, 2, 3, 2, 3, 6, 7
5997b = 8, 12, 24, 28, 48, 52, 56, 60
5998n = 2
5999validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
6000
6001aarch64 = sqrshrun2
6002generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
6003
6004/// Signed saturating shift left
6005name = vqshl
6006a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6007b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6008validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6009
6010aarch64 = sqshl
6011link-aarch64 = sqshl._EXT_
6012generate i64
6013
6014arm = vqshl
6015link-arm = vqshifts._EXT_
6016generate int*_t, int64x*_t
6017
6018/// Signed saturating shift left
6019name = vqshl
6020multi_fn = vqshl-in_ntt-noext, c:in_ntt, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
c620b35d 6021multi_fn = simd_extract!, c, 0
17df50a5
XL
6022a = 1
6023b = 2
6024validate 4
6025
6026aarch64 = sqshl
6027generate i8, i16, i32
6028
6029/// Unsigned saturating shift left
6030name = vqshl
6031out-suffix
6032a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6033b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6034validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6035
6036aarch64 = uqshl
6037link-aarch64 = uqshl._EXT_
6038generate u64:i64:u64
6039
6040arm = vqshl
6041link-arm = vqshiftu._EXT_
6042generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6043generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6044
6045/// Unsigned saturating shift left
6046name = vqshl
6047out-suffix
6048multi_fn = vqshl-out_ntt-noext, c:out_ntt, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
c620b35d 6049multi_fn = simd_extract!, c, 0
17df50a5
XL
6050a = 1
6051b = 2
6052validate 4
6053
6054aarch64 = uqshl
6055generate u8:i8:u8, u16:i16:u16, u32:i32:u32
6056
6057/// Signed saturating shift left
6058name = vqshl
6059n-suffix
6060constn = N
6061multi_fn = static_assert_imm-out_bits_exp_len-N
a2a8927a 6062multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N as _}
17df50a5
XL
6063a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6064n = 2
6065validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6066
6067aarch64 = sqshl
6068arm = vqshl
6069generate int*_t, int64x*_t
6070
6071/// Signed saturating shift left
6072name = vqshl
6073n-suffix
6074constn = N
6075multi_fn = static_assert_imm-out_bits_exp_len-N
c620b35d 6076multi_fn = simd_extract!, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
17df50a5
XL
6077a = 1
6078n = 2
6079validate 4
6080
6081aarch64 = sqshl
6082generate i8, i16, i32, i64
6083
6084/// Unsigned saturating shift left
6085name = vqshl
6086n-suffix
6087constn = N
6088multi_fn = static_assert_imm-out_bits_exp_len-N
a2a8927a 6089multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N as _}
17df50a5
XL
6090a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6091n = 2
6092validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6093
6094aarch64 = uqshl
6095arm = vqshl
6096generate uint*_t, uint64x*_t
6097
6098/// Unsigned saturating shift left
6099name = vqshl
6100n-suffix
6101constn = N
6102multi_fn = static_assert_imm-out_bits_exp_len-N
c620b35d 6103multi_fn = simd_extract!, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
17df50a5
XL
6104a = 1
6105n = 2
6106validate 4
6107
6108aarch64 = uqshl
6109generate u8, u16, u32, u64
6110
3c0e092e
XL
6111/// Signed saturating shift left unsigned
6112name = vqshlu
6113n-suffix
6114constn = N
6115multi_fn = static_assert_imm-out_bits_exp_len-N
6116a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6117n = 2
6118validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6119arm-aarch64-separate
6120
6121aarch64 = sqshlu
6122link-aarch64 = sqshlu._EXT_
6123const-aarch64 = {dup-in_len-N as ttn}
6124arm = vqshlu
6125link-arm = vqshiftsu._EXT_
6126const-arm = N as ttn
6127generate int8x8_t:uint8x8_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
6128generate int8x16_t:uint8x16_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
6129
6130/// Signed saturating shift left unsigned
6131name = vqshlu
6132n-suffix
6133constn = N
6134multi_fn = static_assert_imm-out_bits_exp_len-N
c620b35d 6135multi_fn = simd_extract!, {vqshlu_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
6136a = 1
6137n = 2
6138validate 4
6139
6140aarch64 = sqshlu
6141generate i8:u8, i16:u16, i32:u32, i64:u64
6142
17df50a5
XL
6143/// Signed saturating shift right narrow
6144name = vqshrn
6145noq-n-suffix
6146constn = N
6147multi_fn = static_assert-N-1-halfbits
6148a = 0, 4, 8, 12, 16, 20, 24, 28
6149n = 2
6150validate 0, 1, 2, 3, 4, 5, 6, 7
c295e0f8 6151arm-aarch64-separate
17df50a5
XL
6152
6153aarch64 = sqshrn
6154link-aarch64 = sqshrn._EXT2_
6155const-aarch64 = N
6156generate i64:i32
6157
6158arm = vqshrn
6159link-arm = vqshiftns._EXT2_
6160const-arm = -N as ttn
6161generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
6162
6163/// Signed saturating shift right narrow
6164name = vqshrn
6165noq-n-suffix
6166constn = N
6167multi_fn = static_assert-N-1-halfbits
c620b35d 6168multi_fn = simd_extract!, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
17df50a5
XL
6169a = 4
6170n = 2
6171validate 1
6172
6173aarch64 = sqshrn
6174generate i16:i8, i32:i16
6175
6176/// Signed saturating shift right narrow
6177name = vqshrn_high
6178noq-n-suffix
6179constn = N
6180multi_fn = static_assert-N-1-halfbits
353b0b11 6181multi_fn = simd_shuffle!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
6182a = 0, 1, 8, 9, 8, 9, 10, 11
6183b = 32, 36, 40, 44, 48, 52, 56, 60
6184n = 2
6185validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6186
6187aarch64 = sqshrn2
6188generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
6189
6190/// Unsigned saturating shift right narrow
6191name = vqshrn
6192noq-n-suffix
6193constn = N
6194multi_fn = static_assert-N-1-halfbits
6195a = 0, 4, 8, 12, 16, 20, 24, 28
6196n = 2
6197validate 0, 1, 2, 3, 4, 5, 6, 7
c295e0f8 6198arm-aarch64-separate
17df50a5
XL
6199
6200aarch64 = uqshrn
6201link-aarch64 = uqshrn._EXT2_
6202const-aarch64 = N
6203generate u64:u32
6204
6205arm = vqshrn
6206link-arm = vqshiftnu._EXT2_
6207const-arm = -N as ttn
6208generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
6209
6210/// Unsigned saturating shift right narrow
6211name = vqshrn
6212noq-n-suffix
6213constn = N
6214multi_fn = static_assert-N-1-halfbits
c620b35d 6215multi_fn = simd_extract!, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
17df50a5
XL
6216a = 4
6217n = 2
6218validate 1
6219
6220aarch64 = uqshrn
6221generate u16:u8, u32:u16
6222
6223/// Unsigned saturating shift right narrow
6224name = vqshrn_high
6225noq-n-suffix
6226constn = N
6227multi_fn = static_assert-N-1-halfbits
353b0b11 6228multi_fn = simd_shuffle!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
6229a = 0, 1, 8, 9, 8, 9, 10, 11
6230b = 32, 36, 40, 44, 48, 52, 56, 60
6231n = 2
6232validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6233
6234aarch64 = uqshrn2
6235generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
6236
6237/// Signed saturating shift right unsigned narrow
6238name = vqshrun
6239noq-n-suffix
6240constn = N
6241multi_fn = static_assert-N-1-halfbits
6242a = 0, 4, 8, 12, 16, 20, 24, 28
6243n = 2
6244validate 0, 1, 2, 3, 4, 5, 6, 7
c295e0f8 6245arm-aarch64-separate
17df50a5
XL
6246
6247aarch64 = sqshrun
6248link-aarch64 = sqshrun._EXT2_
6249const-aarch64 = N
6250
6251arm = vqshrun
6252link-arm = vqshiftnsu._EXT2_
6253const-arm = -N as ttn
6254generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
6255
6256/// Signed saturating shift right unsigned narrow
6257name = vqshrun
6258noq-n-suffix
6259constn = N
6260multi_fn = static_assert-N-1-halfbits
c620b35d 6261multi_fn = simd_extract!, {vqshrun_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
17df50a5
XL
6262a = 4
6263n = 2
6264validate 1
6265
6266aarch64 = sqshrun
6267generate i16:u8, i32:u16, i64:u32
6268
6269/// Signed saturating shift right unsigned narrow
6270name = vqshrun_high
6271noq-n-suffix
6272constn = N
6273multi_fn = static_assert-N-1-halfbits
353b0b11 6274multi_fn = simd_shuffle!, a, {vqshrun_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
6275a = 0, 1, 8, 9, 8, 9, 10, 11
6276b = 32, 36, 40, 44, 48, 52, 56, 60
6277n = 2
6278validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6279
6280aarch64 = sqshrun2
6281generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
6282
3c0e092e
XL
6283/// Unsigned saturating accumulate of signed value
6284name = vsqadd
6285out-suffix
c620b35d 6286multi_fn = simd_extract!, {vsqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
3c0e092e
XL
6287a = 2
6288b = 2
6289validate 4
6290
6291aarch64 = usqadd
6292generate u8:i8:u8, u16:i16:u16
6293
6294/// Unsigned saturating accumulate of signed value
6295name = vsqadd
6296out-suffix
6297a = 2
6298b = 2
6299validate 4
6300
6301aarch64 = usqadd
6302link-aarch64 = usqadd._EXT_
6303generate u32:i32:u32, u64:i64:u64
6304
17df50a5
XL
6305/// Calculates the square root of each lane.
6306name = vsqrt
6307fn = simd_fsqrt
6308a = 4.0, 9.0, 16.0, 25.0
6309validate 2.0, 3.0, 4.0, 5.0
6310
6311aarch64 = fsqrt
6312generate float*_t, float64x*_t
6313
6314/// Reciprocal square-root estimate.
6315name = vrsqrte
6316a = 1.0, 2.0, 3.0, 4.0
6317validate 0.998046875, 0.705078125, 0.576171875, 0.4990234375
6318
6319aarch64 = frsqrte
6320link-aarch64 = frsqrte._EXT_
3c0e092e
XL
6321generate float64x*_t, f32, f64
6322
6323arm = vrsqrte
6324link-arm = vrsqrte._EXT_
6325generate float*_t
6326
6327/// Unsigned reciprocal square root estimate
6328name = vrsqrte
6329a = 1, 2, 3, 4
6330validate 4294967295, 4294967295, 4294967295, 4294967295
17df50a5 6331
3c0e092e
XL
6332aarch64 = ursqrte
6333link-aarch64 = ursqrte._EXT_
17df50a5
XL
6334arm = vrsqrte
6335link-arm = vrsqrte._EXT_
3c0e092e
XL
6336generate uint32x2_t, uint32x4_t
6337
6338/// Floating-point reciprocal square root step
6339name = vrsqrts
6340a = 1.0, 2.0, 3.0, 4.0
6341b = 1.0, 2.0, 3.0, 4.0
6342validate 1., -0.5, -3.0, -6.5
6343
6344aarch64 = frsqrts
6345link-aarch64 = frsqrts._EXT_
6346generate float64x*_t, f32, f64
6347
6348arm = vrsqrts
6349link-arm = vrsqrts._EXT_
17df50a5
XL
6350generate float*_t
6351
6352/// Reciprocal estimate.
6353name = vrecpe
6354a = 4.0, 3.0, 2.0, 1.0
6355validate 0.24951171875, 0.3330078125, 0.4990234375, 0.998046875
6356
6357aarch64 = frecpe
6358link-aarch64 = frecpe._EXT_
3c0e092e 6359generate float64x*_t, f32, f64
17df50a5
XL
6360
6361arm = vrecpe
6362link-arm = vrecpe._EXT_
6363generate float*_t
6364
3c0e092e
XL
6365/// Unsigned reciprocal estimate
6366name = vrecpe
6367a = 4, 3, 2, 1
6368validate 4294967295, 4294967295, 4294967295, 4294967295
6369
6370aarch64 = urecpe
6371link-aarch64 = urecpe._EXT_
6372arm = vrecpe
6373link-arm = vrecpe._EXT_
6374generate uint32x2_t, uint32x4_t
6375
6376/// Floating-point reciprocal step
6377name = vrecps
6378a = 4.0, 3.0, 2.0, 1.0
6379b = 4.0, 3.0, 2.0, 1.0
6380validate -14., -7., -2., 1.
6381
6382aarch64 = frecps
6383link-aarch64 = frecps._EXT_
6384generate float64x*_t, f32, f64
6385
6386arm = vrecps
6387link-arm = vrecps._EXT_
6388generate float*_t
6389
6390/// Floating-point reciprocal exponent
6391name = vrecpx
6392a = 4.0
6393validate 0.5
6394
6395aarch64 = frecpx
6396link-aarch64 = frecpx._EXT_
6397generate f32, f64
6398
17df50a5
XL
6399/// Vector reinterpret cast operation
6400name = vreinterpret
6401double-suffixes
6402fn = transmute
6403a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6404validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6405
c295e0f8 6406aarch64 = nop
17df50a5
XL
6407generate poly64x1_t:int64x1_t, poly64x1_t:uint64x1_t, int64x1_t:poly64x1_t, uint64x1_t:poly64x1_t
6408generate poly64x2_t:int64x2_t, poly64x2_t:uint64x2_t, int64x2_t:poly64x2_t, uint64x2_t:poly64x2_t
6409
c295e0f8 6410arm = nop
17df50a5
XL
6411generate uint8x8_t:int8x8_t, poly8x8_t:int8x8_t, poly16x4_t:int16x4_t, uint16x4_t:int16x4_t, uint32x2_t:int32x2_t, uint64x1_t:int64x1_t
6412generate uint8x16_t:int8x16_t, poly8x16_t:int8x16_t, poly16x8_t:int16x8_t, uint16x8_t:int16x8_t, uint32x4_t:int32x4_t, uint64x2_t:int64x2_t
6413generate poly8x8_t:uint8x8_t, int8x8_t:uint8x8_t, poly16x4_t:uint16x4_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
6414generate poly8x16_t:uint8x16_t, int8x16_t:uint8x16_t, poly16x8_t:uint16x8_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
6415generate int8x8_t:poly8x8_t, uint8x8_t:poly8x8_t, int16x4_t:poly16x4_t, uint16x4_t:poly16x4_t
6416generate int8x16_t:poly8x16_t, uint8x16_t:poly8x16_t, int16x8_t:poly16x8_t, uint16x8_t:poly16x8_t
6417
6418/// Vector reinterpret cast operation
6419name = vreinterpret
6420double-suffixes
6421fn = transmute
6422a = 0, 1, 2, 3, 4, 5, 6, 7
6423validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6424
c295e0f8 6425aarch64 = nop
c295e0f8 6426arm = nop
17df50a5
XL
6427generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t
6428generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t
6429generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x2_t:uint16x4_t, uint32x2_t:uint16x4_t, int64x1_t:uint32x2_t, uint64x1_t:uint32x2_t
6430generate poly16x8_t:uint8x16_t, int16x8_t:uint8x16_t, uint16x8_t:uint8x16_t, int32x4_t:uint16x8_t, uint32x4_t:uint16x8_t, int64x2_t:uint32x4_t, uint64x2_t:uint32x4_t
6431generate poly16x4_t:poly8x8_t, int16x4_t:poly8x8_t, uint16x4_t:poly8x8_t, int32x2_t:poly16x4_t, uint32x2_t:poly16x4_t
6432generate poly16x8_t:poly8x16_t, int16x8_t:poly8x16_t, uint16x8_t:poly8x16_t, int32x4_t:poly16x8_t, uint32x4_t:poly16x8_t
3c0e092e
XL
6433target = aes
6434generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t
6435generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t
6436generate p128:int64x2_t, p128:uint64x2_t, p128:poly64x2_t
17df50a5
XL
6437
6438/// Vector reinterpret cast operation
6439name = vreinterpret
6440double-suffixes
6441fn = transmute
6442a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6443validate 0, 1, 2, 3, 4, 5, 6, 7
6444
c295e0f8 6445aarch64 = nop
c295e0f8 6446arm = nop
17df50a5
XL
6447generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t
6448generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t
6449generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16x4_t:uint32x2_t, int16x4_t:uint32x2_t, uint16x4_t:uint32x2_t, int32x2_t:uint64x1_t, uint32x2_t:uint64x1_t
6450generate poly8x16_t:uint16x8_t, int8x16_t:uint16x8_t, uint8x16_t:uint16x8_t, poly16x8_t:uint32x4_t, int16x8_t:uint32x4_t, uint16x8_t:uint32x4_t, int32x4_t:uint64x2_t, uint32x4_t:uint64x2_t
6451generate poly8x8_t:poly16x4_t, int8x8_t:poly16x4_t, uint8x8_t:poly16x4_t
6452generate poly8x16_t:poly16x8_t, int8x16_t:poly16x8_t, uint8x16_t:poly16x8_t
3c0e092e
XL
6453target = aes
6454generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t
6455generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t
6456generate int64x2_t:p128, uint64x2_t:p128, poly64x2_t:p128
17df50a5
XL
6457
6458/// Vector reinterpret cast operation
6459name = vreinterpret
6460double-suffixes
6461fn = transmute
6462a = 0, 1, 2, 3
6463validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6464
c295e0f8 6465aarch64 = nop
c295e0f8 6466arm = nop
17df50a5
XL
6467generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t
6468generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t
6469generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64x1_t:uint16x4_t
6470generate int32x4_t:uint8x16_t, uint32x4_t:uint8x16_t, int64x2_t:uint16x8_t, uint64x2_t:uint16x8_t
6471generate int32x2_t:poly8x8_t, uint32x2_t:poly8x8_t, int64x1_t:poly16x4_t, uint64x1_t:poly16x4_t
6472generate int32x4_t:poly8x16_t, uint32x4_t:poly8x16_t, int64x2_t:poly16x8_t, uint64x2_t:poly16x8_t
3c0e092e
XL
6473target = aes
6474generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t
6475generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t
6476generate p128:int32x4_t, p128:uint32x4_t
17df50a5
XL
6477
6478/// Vector reinterpret cast operation
6479name = vreinterpret
6480double-suffixes
6481fn = transmute
6482a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6483validate 0, 1, 2, 3
6484
c295e0f8 6485aarch64 = nop
c295e0f8 6486arm = nop
17df50a5
XL
6487generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t
6488generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t
6489generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t
6490generate poly8x16_t:uint32x4_t, int8x16_t:uint32x4_t, uint8x16_t:uint32x4_t, poly16x8_t:uint64x2_t, int16x8_t:uint64x2_t, uint16x8_t:uint64x2_t
3c0e092e
XL
6491target = aes
6492generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t
6493generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t
6494generate int32x4_t:p128, uint32x4_t:p128
17df50a5
XL
6495
6496/// Vector reinterpret cast operation
6497name = vreinterpret
6498double-suffixes
6499fn = transmute
6500a = 0, 1
6501validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6502
c295e0f8 6503aarch64 = nop
c295e0f8 6504arm = nop
17df50a5
XL
6505generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t
6506generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t
3c0e092e
XL
6507target = aes
6508generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t
6509generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t
6510generate p128:int16x8_t, p128:uint16x8_t, p128:poly16x8_t
17df50a5
XL
6511
6512/// Vector reinterpret cast operation
6513name = vreinterpret
6514double-suffixes
6515fn = transmute
6516a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6517validate 0, 1
6518
c295e0f8 6519aarch64 = nop
3c0e092e
XL
6520arm = nop
6521generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t
6522generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t
6523target = aes
17df50a5
XL
6524generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t
6525generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t
3c0e092e
XL
6526generate int16x8_t:p128, uint16x8_t:p128, poly16x8_t:p128
6527
6528/// Vector reinterpret cast operation
6529name = vreinterpret
6530double-suffixes
6531fn = transmute
6532a = 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6533validate 1
6534target = aes
17df50a5 6535
3c0e092e 6536aarch64 = nop
c295e0f8 6537arm = nop
3c0e092e
XL
6538generate int8x16_t:p128, uint8x16_t:p128, poly8x16_t:p128
6539
6540/// Vector reinterpret cast operation
6541name = vreinterpret
6542double-suffixes
6543fn = transmute
6544a = 1
6545validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6546target = aes
6547
6548aarch64 = nop
6549arm = nop
6550generate p128:int8x16_t, p128:uint8x16_t, p128:poly8x16_t
17df50a5
XL
6551
6552/// Vector reinterpret cast operation
6553name = vreinterpret
6554double-suffixes
6555fn = transmute
6556a = 0., 0., 0., 0., 0., 0., 0., 0.
6557validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6558
c295e0f8 6559aarch64 = nop
17df50a5
XL
6560generate float64x1_t:int8x8_t, float64x1_t:int16x4_t, float64x1_t:int32x2_t, float64x1_t:int64x1_t
6561generate float64x2_t:int8x16_t, float64x2_t:int16x8_t, float64x2_t:int32x4_t, float64x2_t:int64x2_t
6562generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t, float64x1_t:uint64x1_t
6563generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t, float64x2_t:uint64x2_t
6564generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t
6565generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t
3c0e092e 6566generate float64x2_t:p128
17df50a5 6567
c295e0f8 6568arm = nop
17df50a5
XL
6569generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t
6570generate float32x4_t:int8x16_t, float32x4_t:int16x8_t, float32x4_t:int32x4_t, float32x4_t:int64x2_t
6571generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t, float32x2_t:uint64x1_t
6572generate float32x4_t:uint8x16_t, float32x4_t:uint16x8_t, float32x4_t:uint32x4_t, float32x4_t:uint64x2_t
6573generate float32x2_t:poly8x8_t, float32x2_t:poly16x4_t
6574generate float32x4_t:poly8x16_t, float32x4_t:poly16x8_t
3c0e092e 6575generate float32x4_t:p128
17df50a5
XL
6576
6577/// Vector reinterpret cast operation
6578name = vreinterpret
6579double-suffixes
6580fn = transmute
6581a = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6582validate 0., 0., 0., 0., 0., 0., 0., 0.
6583
c295e0f8 6584aarch64 = nop
17df50a5
XL
6585generate int8x8_t:float64x1_t, int16x4_t:float64x1_t, int32x2_t:float64x1_t, int64x1_t:float64x1_t
6586generate int8x16_t:float64x2_t, int16x8_t:float64x2_t, int32x4_t:float64x2_t, int64x2_t:float64x2_t
6587generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t, uint64x1_t:float64x1_t
6588generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t, uint64x2_t:float64x2_t
6589generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t
6590generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t
3c0e092e 6591generate p128:float64x2_t
17df50a5 6592
c295e0f8 6593arm = nop
17df50a5
XL
6594generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t
6595generate int8x16_t:float32x4_t, int16x8_t:float32x4_t, int32x4_t:float32x4_t, int64x2_t:float32x4_t
6596generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t, uint64x1_t:float32x2_t
6597generate uint8x16_t:float32x4_t, uint16x8_t:float32x4_t, uint32x4_t:float32x4_t, uint64x2_t:float32x4_t
6598generate poly8x8_t:float32x2_t, poly16x4_t:float32x2_t
6599generate poly8x16_t:float32x4_t, poly16x8_t:float32x4_t
3c0e092e 6600generate p128:float32x4_t
17df50a5
XL
6601
6602/// Vector reinterpret cast operation
6603name = vreinterpret
6604double-suffixes
6605fn = transmute
6606a = 0., 0., 0., 0., 0., 0., 0., 0.
6607validate 0., 0., 0., 0., 0., 0., 0., 0.
6608
c295e0f8 6609aarch64 = nop
17df50a5
XL
6610generate float32x2_t:float64x1_t, float64x1_t:float32x2_t
6611generate float32x4_t:float64x2_t, float64x2_t:float32x4_t
6612
6613/// Signed rounding shift left
6614name = vrshl
6615a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6616b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6617validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6618
6619aarch64 = srshl
6620link-aarch64 = srshl._EXT_
6621generate i64
6622
6623arm = vrshl
6624link-arm = vrshifts._EXT_
6625generate int*_t, int64x*_t
6626
6627/// Unsigned rounding shift left
6628name = vrshl
6629out-suffix
6630a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6631b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6632validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6633
6634aarch64 = urshl
6635link-aarch64 = urshl._EXT_
6636generate u64:i64:u64
6637
6638arm = vrshl
6639link-arm = vrshiftu._EXT_
6640generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6641generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6642
6643/// Signed rounding shift right
6644name = vrshr
6645n-suffix
6646constn = N
6647multi_fn = static_assert-N-1-bits
781aab86 6648multi_fn = vrshl-self-noext, a, {vdup-nself-noext, -N as _}
17df50a5
XL
6649a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6650n = 2
6651validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6652
6653aarch64 = srshr
6654arm = vrshr
6655generate int*_t, int64x*_t
6656
6657/// Signed rounding shift right
6658name = vrshr
6659n-suffix
6660constn = N
6661multi_fn = static_assert-N-1-bits
6662multi_fn = vrshl-self-noext, a, -N as i64
6663a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6664n = 2
6665validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6666
6667aarch64 = srshr
6668generate i64
6669
6670/// Unsigned rounding shift right
6671name = vrshr
6672n-suffix
6673constn = N
6674multi_fn = static_assert-N-1-bits
781aab86 6675multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, -N as _}
17df50a5
XL
6676a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6677n = 2
6678validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6679
6680aarch64 = urshr
6681arm = vrshr
6682generate uint*_t, uint64x*_t
6683
6684/// Unsigned rounding shift right
6685name = vrshr
6686n-suffix
6687constn = N
6688multi_fn = static_assert-N-1-bits
6689multi_fn = vrshl-self-noext, a, -N as i64
6690a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6691n = 2
6692validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6693
6694aarch64 = urshr
6695generate u64
6696
6697/// Rounding shift right narrow
6698name = vrshrn
6699noq-n-suffix
6700constn = N
6701multi_fn = static_assert-N-1-halfbits
6702a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6703n = 2
6704validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
c295e0f8 6705arm-aarch64-separate
17df50a5
XL
6706
6707aarch64 = rshrn
6708link-aarch64 = rshrn._EXT2_
6709const-aarch64 = N
6710
6711arm = vrshrn
6712link-arm = vrshiftn._EXT2_
6713const-arm = -N as ttn
6714generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
6715
6716/// Rounding shift right narrow
6717name = vrshrn
6718noq-n-suffix
6719constn = N
6720multi_fn = static_assert-N-1-halfbits
6721multi_fn = transmute, {vrshrn_n-noqsigned-::<N>, transmute(a)}
6722a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6723n = 2
6724validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6725
6726aarch64 = rshrn
6727arm = vrshrn
6728generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
6729
6730/// Rounding shift right narrow
6731name = vrshrn_high
6732noq-n-suffix
6733constn = N
6734multi_fn = static_assert-N-1-halfbits
353b0b11 6735multi_fn = simd_shuffle!, a, {vrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
6736a = 0, 1, 8, 9, 8, 9, 10, 11
6737b = 32, 36, 40, 44, 48, 52, 56, 60
6738n = 2
6739validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6740
6741aarch64 = rshrn2
6742generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
6743generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
6744
6745/// Signed rounding shift right and accumulate
6746name = vrsra
6747n-suffix
6748constn = N
6749multi_fn = static_assert-N-1-bits
6750multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
6751a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6752b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6753n = 2
6754validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6755
6756aarch64 = srsra
6757arm = vrsra
6758generate int*_t, int64x*_t
6759
6760/// Unsigned rounding shift right and accumulate
6761name = vrsra
6762n-suffix
6763constn = N
6764multi_fn = static_assert-N-1-bits
6765multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
6766a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6767b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6768n = 2
6769validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6770
6771aarch64 = ursra
6772arm = vrsra
6773generate uint*_t, uint64x*_t
6774
6775/// Signed rounding shift right and accumulate.
6776name = vrsra
6777n-suffix
6778constn = N
6779multi_fn = static_assert-N-1-bits
6780multi_fn = vrshr-nself-::<N>, b:in_t, b
a2a8927a 6781multi_fn = a.wrapping_add(b)
17df50a5
XL
6782a = 1
6783b = 4
6784n = 2
6785validate 2
6786
781aab86 6787aarch64 = srshr
17df50a5
XL
6788generate i64
6789
781aab86 6790/// Unsigned rounding shift right and accumulate.
17df50a5
XL
6791name = vrsra
6792n-suffix
6793constn = N
6794multi_fn = static_assert-N-1-bits
6795multi_fn = vrshr-nself-::<N>, b:in_t, b
a2a8927a 6796multi_fn = a.wrapping_add(b)
17df50a5
XL
6797a = 1
6798b = 4
6799n = 2
6800validate 2
6801
781aab86 6802aarch64 = urshr
17df50a5
XL
6803generate u64
6804
3c0e092e
XL
6805/// Rounding subtract returning high narrow
6806name = vrsubhn
6807no-q
6808a = MAX, MIN, 0, 4, 5, 6, 7, 8
6809b = 1, 2, 3, 4, 5, 6, 7, 8
6810validate MIN, MIN, 0, 0, 0, 0, 0, 0
6811
6812aarch64 = rsubhn
6813link-aarch64 = rsubhn._EXT2_
6814arm = vrsubhn
6815link-arm = vrsubhn._EXT2_
6816generate int16x8_t:int16x8_t:int8x8_t, int32x4_t:int32x4_t:int16x4_t, int64x2_t:int64x2_t:int32x2_t
6817
6818/// Rounding subtract returning high narrow
6819name = vrsubhn
6820no-q
6821multi_fn = transmute, {vrsubhn-noqsigned-noext, {transmute, a}, {transmute, b}}
6822a = MAX, MIN, 3, 4, 5, 6, 7, 8
6823b = 1, 2, 3, 4, 5, 6, 7, 8
6824validate 0, 0, 0, 0, 0, 0, 0, 0
6825
6826aarch64 = rsubhn
6827arm = vrsubhn
6828generate uint16x8_t:uint16x8_t:uint8x8_t, uint32x4_t:uint32x4_t:uint16x4_t, uint64x2_t:uint64x2_t:uint32x2_t
6829
6830/// Rounding subtract returning high narrow
6831name = vrsubhn_high
6832no-q
6833multi_fn = vrsubhn-noqself-noext, x:in_t0, b, c
353b0b11 6834multi_fn = simd_shuffle!, a, x, {asc-0-out_len}
3c0e092e
XL
6835a = 1, 2, 0, 0, 0, 0, 0, 0
6836b = 1, 2, 3, 4, 5, 6, 7, 8
6837c = 1, 2, 3, 4, 5, 6, 7, 8
6838validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6839
6840aarch64 = rsubhn2
6841generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
6842generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
6843
17df50a5
XL
6844/// Insert vector element from another vector element
6845name = vset_lane
6846constn = LANE
6847multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 6848multi_fn = simd_insert!, b, LANE as u32, a
17df50a5
XL
6849a = 1
6850b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6851n = 0
6852validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6853
6854aarch64 = nop
6855arm = nop
6856generate i8:int8x8_t:int8x8_t, i16:int16x4_t:int16x4_t
6857generate i32:int32x2_t:int32x2_t, i64:int64x1_t:int64x1_t
6858generate u8:uint8x8_t:uint8x8_t, u16:uint16x4_t:uint16x4_t
6859generate u32:uint32x2_t:uint32x2_t, u64:uint64x1_t:uint64x1_t
6860generate p8:poly8x8_t:poly8x8_t, p16:poly16x4_t:poly16x4_t
6861
94222f64 6862target = aes
17df50a5
XL
6863generate p64:poly64x1_t:poly64x1_t
6864
6865/// Insert vector element from another vector element
6866name = vsetq_lane
6867no-q
6868constn = LANE
6869multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 6870multi_fn = simd_insert!, b, LANE as u32, a
17df50a5
XL
6871a = 1
6872b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6873n = 0
6874validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6875
6876aarch64 = nop
6877arm = nop
6878generate i8:int8x16_t:int8x16_t, i16:int16x8_t:int16x8_t
6879generate i32:int32x4_t:int32x4_t, i64:int64x2_t:int64x2_t
6880generate u8:uint8x16_t:uint8x16_t, u16:uint16x8_t:uint16x8_t
6881generate u32:uint32x4_t:uint32x4_t, u64:uint64x2_t:uint64x2_t
6882generate p8:poly8x16_t:poly8x16_t, p16:poly16x8_t:poly16x8_t
6883
94222f64 6884target = aes
17df50a5
XL
6885generate p64:poly64x2_t:poly64x2_t
6886
6887/// Insert vector element from another vector element
6888name = vset_lane
6889constn = LANE
6890multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 6891multi_fn = simd_insert!, b, LANE as u32, a
17df50a5
XL
6892a = 1.
6893b = 0., 2., 3., 4.
6894n = 0
6895validate 1., 2., 3., 4.
6896
6897aarch64 = nop
6898generate f64:float64x1_t:float64x1_t
6899
6900arm = nop
6901generate f32:float32x2_t:float32x2_t
6902
6903/// Insert vector element from another vector element
6904name = vsetq_lane
6905no-q
6906constn = LANE
6907multi_fn = static_assert_imm-in_exp_len-LANE
c620b35d 6908multi_fn = simd_insert!, b, LANE as u32, a
17df50a5
XL
6909a = 1.
6910b = 0., 2., 3., 4.
6911n = 0
6912validate 1., 2., 3., 4.
6913
6914aarch64 = nop
6915generate f64:float64x2_t:float64x2_t
6916
6917arm = nop
6918generate f32:float32x4_t:float32x4_t
6919
6920/// Signed Shift left
6921name = vshl
6922a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6923b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6924validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6925
6926aarch64 = sshl
6927link-aarch64 = sshl._EXT_
6928arm = vshl
6929link-arm = vshifts._EXT_
6930generate int*_t, int64x*_t
6931
6932/// Signed Shift left
6933name = vshl
6934multi_fn = transmute, {vshl-in_ntt-noext, transmute(a), transmute(b)}
6935a = 1
6936b = 2
6937validate 4
6938
6939aarch64 = sshl
6940generate i64
6941
6942/// Unsigned Shift left
6943name = vshl
6944out-suffix
6945a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6946b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6947validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6948
6949aarch64 = ushl
6950link-aarch64 = ushl._EXT_
6951arm = vshl
6952link-arm = vshiftu._EXT_
6953generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6954generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6955
6956/// Unsigned Shift left
6957out-suffix
6958name = vshl
6959multi_fn = transmute, {vshl-out_ntt-noext, transmute(a), transmute(b)}
6960a = 1
6961b = 2
6962validate 4
6963
6964aarch64 = ushl
6965generate u64:i64:u64
6966
6967/// Shift left
6968name = vshl
6969n-suffix
6970constn = N
6971multi_fn = static_assert_imm-out_bits_exp_len-N
a2a8927a 6972multi_fn = simd_shl, a, {vdup-nself-noext, N as _}
17df50a5
XL
6973a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6974n = 2
6975validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6976
6977arm = vshl
6978aarch64 = shl
6979generate int*_t, uint*_t, int64x*_t, uint64x*_t
6980
6981/// Signed shift left long
6982name = vshll
6983n-suffix
6984constn = N
6985multi_fn = static_assert-N-0-bits
a2a8927a 6986multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N as _}
17df50a5
XL
6987a = 1, 2, 3, 4, 5, 6, 7, 8
6988n = 2
6989validate 4, 8, 12, 16, 20, 24, 28, 32
6990
6991arm = vshll.s
6992aarch64 = sshll
6993generate int8x8_t:int16x8_t, int16x4_t:int32x4_t, int32x2_t:int64x2_t
6994aarch64 = ushll
6995generate uint8x8_t:uint16x8_t, uint16x4_t:uint32x4_t, uint32x2_t:uint64x2_t
6996
6997/// Signed shift left long
6998name = vshll_high_n
6999no-q
7000constn = N
7001multi_fn = static_assert-N-0-bits
353b0b11 7002multi_fn = simd_shuffle!, b:half, a, a, {asc-halflen-halflen}
17df50a5
XL
7003multi_fn = vshll_n-noqself-::<N>, b
7004a = 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8
7005n = 2
7006validate 4, 8, 12, 16, 20, 24, 28, 32
7007
7008aarch64 = sshll2
7009generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
7010aarch64 = ushll2
7011generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
7012
7013/// Shift right
7014name = vshr
7015n-suffix
7016constn = N
7017multi_fn = static_assert-N-1-bits
3c0e092e 7018multi_fn = fix_right_shift_imm-N-bits
a2a8927a 7019multi_fn = simd_shr, a, {vdup-nself-noext, n as _}
17df50a5
XL
7020a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7021n = 2
7022validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7023
7024arm = vshr.s
7025aarch64 = sshr
7026generate int*_t, int64x*_t
7027aarch64 = ushr
7028generate uint*_t, uint64x*_t
7029
7030/// Shift right narrow
7031name = vshrn_n
7032no-q
7033constn = N
7034multi_fn = static_assert-N-1-halfbits
a2a8927a 7035multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N as _}}
17df50a5
XL
7036a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7037n = 2
7038validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7039
7040arm = vshrn.
7041aarch64 = shrn
7042generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
7043generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
7044
7045/// Shift right narrow
7046name = vshrn_high_n
7047no-q
7048constn = N
7049multi_fn = static_assert-N-1-halfbits
353b0b11 7050multi_fn = simd_shuffle!, a, {vshrn_n-noqself-::<N>, b}, {asc-0-out_len}
17df50a5
XL
7051a = 1, 2, 5, 6, 5, 6, 7, 8
7052b = 20, 24, 28, 32, 52, 56, 60, 64
7053n = 2
7054validate 1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16
7055
7056aarch64 = shrn2
7057generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
7058generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
7059
7060/// Signed shift right and accumulate
7061name = vsra
7062n-suffix
7063constn = N
7064multi_fn = static_assert-N-1-bits
7065multi_fn = simd_add, a, {vshr-nself-::<N>, b}
7066a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7067b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7068n = 2
7069validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
7070
7071aarch64 = ssra
7072arm = vsra
7073generate int*_t, int64x*_t
7074
7075/// Unsigned shift right and accumulate
7076name = vsra
7077n-suffix
7078constn = N
7079multi_fn = static_assert-N-1-bits
7080multi_fn = simd_add, a, {vshr-nself-::<N>, b}
7081a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7082b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7083n = 2
7084validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
7085
7086aarch64 = usra
7087arm = vsra
7088generate uint*_t, uint64x*_t
7089
3c0e092e
XL
7090/// SM3PARTW1
7091name = vsm3partw1
7092a = 1, 2, 3, 4
7093b = 1, 2, 3, 4
7094c = 1, 2, 3, 4
7095validate 2147549312, 3221323968, 131329, 2684362752
7096target = sm4
7097
7098aarch64 = sm3partw1
7099link-aarch64 = llvm.aarch64.crypto.sm3partw1
7100generate uint32x4_t
7101
7102/// SM3PARTW2
7103name = vsm3partw2
7104a = 1, 2, 3, 4
7105b = 1, 2, 3, 4
7106c = 1, 2, 3, 4
7107validate 128, 256, 384, 1077977696
7108target = sm4
7109
7110aarch64 = sm3partw2
7111link-aarch64 = llvm.aarch64.crypto.sm3partw2
7112generate uint32x4_t
7113
7114/// SM3SS1
7115name = vsm3ss1
7116a = 1, 2, 3, 4
7117b = 1, 2, 3, 4
7118c = 1, 2, 3, 4
7119validate 0, 0, 0, 2098176
7120target = sm4
7121
7122aarch64 = sm3ss1
7123link-aarch64 = llvm.aarch64.crypto.sm3ss1
7124generate uint32x4_t
7125
7126/// SM4 key
7127name = vsm4ekey
7128a = 1, 2, 3, 4
7129b = 1, 2, 3, 4
7130validate 1784948604, 136020997, 2940231695, 3789947679
7131target = sm4
7132
7133aarch64 = sm4ekey
7134link-aarch64 = llvm.aarch64.crypto.sm4ekey
7135generate uint32x4_t
7136
7137/// SM4 encode
7138name = vsm4e
7139a = 1, 2, 3, 4
7140b = 1, 2, 3, 4
7141validate 1093874472, 3616769504, 3878330411, 2765298765
7142target = sm4
7143
7144aarch64 = sm4e
7145link-aarch64 = llvm.aarch64.crypto.sm4e
7146generate uint32x4_t
7147
7148/// Rotate and exclusive OR
7149name = vrax1
7150a = 1, 2
7151b = 3, 4
7152validate 7, 10
7153target = sha3
7154
7155aarch64 = rax1
7156link-aarch64 = llvm.aarch64.crypto.rax1
7157generate uint64x2_t
7158
7159/// SHA512 hash update part 1
7160name = vsha512h
7161a = 1, 2
7162b = 3, 4
7163c = 5, 6
7164validate 11189044327219203, 7177611956453380
7165target = sha3
7166
7167aarch64 = sha512h
7168link-aarch64 = llvm.aarch64.crypto.sha512h
7169generate uint64x2_t
7170
7171/// SHA512 hash update part 2
7172name = vsha512h2
7173a = 1, 2
7174b = 3, 4
7175c = 5, 6
7176validate 5770237651009406214, 349133864969
7177target = sha3
7178
7179aarch64 = sha512h2
7180link-aarch64 = llvm.aarch64.crypto.sha512h2
7181generate uint64x2_t
7182
7183/// SHA512 schedule update 0
7184name = vsha512su0
7185a = 1, 2
7186b = 3, 4
7187validate 144115188075855874, 9439544818968559619
7188target = sha3
7189
7190aarch64 = sha512su0
7191link-aarch64 = llvm.aarch64.crypto.sha512su0
7192generate uint64x2_t
7193
7194/// SHA512 schedule update 1
7195name = vsha512su1
7196a = 1, 2
7197b = 3, 4
7198c = 5, 6
7199validate 105553116266526, 140737488355368
7200target = sha3
7201
7202aarch64 = sha512su1
7203link-aarch64 = llvm.aarch64.crypto.sha512su1
7204generate uint64x2_t
7205
7206/// Floating-point round to 32-bit integer, using current rounding mode
7207name = vrnd32x
3c0e092e
XL
7208target = frintts
7209
781aab86
FG
7210// For validation, the rounding mode should be the default: round-to-nearest (ties-to-even).
7211a = -1.5, 2.9, 1.5, -2.5
7212validate -2.0, 3.0, 2.0, -2.0
7213
3c0e092e
XL
7214aarch64 = frint32x
7215link-aarch64 = frint32x._EXT_
7216generate float32x2_t, float32x4_t
7217
781aab86
FG
7218// The float64x1_t form uses a different LLVM link and isn't supported by Clang
7219// (and so has no intrinsic-test), so perform extra validation to make sure
7220// that it matches the float64x2_t form.
7221
7222a = 1.5, -2.5
7223validate 2.0, -2.0
7224// - The biggest f64 that rounds to i32::MAX.
7225// - The smallest positive f64 that rounds out of range.
7226a = 2147483647.499999762, 2147483647.5
7227validate 2147483647.0, -2147483648.0
7228// - The smallest f64 that rounds to i32::MIN + 1.
7229// - The largest negative f64 that rounds out of range.
7230a = -2147483647.499999762, -2147483648.500000477
7231validate -2147483647.0, -2147483648.0
7232generate float64x2_t
7233
7234// Odd-numbered tests for float64x1_t coverage.
7235a = 2.9
7236validate 3.0
7237a = -2.5
7238validate -2.0
7239a = 2147483647.5
7240validate -2147483648.0
7241a = -2147483648.500000477
7242validate -2147483648.0
7243
c620b35d 7244multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}}
781aab86
FG
7245link-aarch64 = llvm.aarch64.frint32x.f64:f64:::f64
7246generate float64x1_t
7247
3c0e092e
XL
7248/// Floating-point round to 32-bit integer toward zero
7249name = vrnd32z
3c0e092e
XL
7250target = frintts
7251
781aab86
FG
7252a = -1.5, 2.9, 1.5, -2.5
7253validate -1.0, 2.0, 1.0, -2.0
7254
3c0e092e
XL
7255aarch64 = frint32z
7256link-aarch64 = frint32z._EXT_
7257generate float32x2_t, float32x4_t
7258
781aab86
FG
7259// The float64x1_t form uses a different LLVM link and isn't supported by Clang
7260// (and so has no intrinsic-test), so perform extra validation to make sure
7261// that it matches the float64x2_t form.
7262
7263a = 1.5, -2.5
7264validate 1.0, -2.0
7265// - The biggest f64 that rounds to i32::MAX.
7266// - The smallest positive f64 that rounds out of range.
7267a = 2147483647.999999762, 2147483648.0
7268validate 2147483647.0, -2147483648.0
7269// - The smallest f64 that rounds to i32::MIN + 1.
7270// - The largest negative f64 that rounds out of range.
7271a = -2147483647.999999762, -2147483649.0
7272validate -2147483647.0, -2147483648.0
7273generate float64x2_t
7274
7275// Odd-numbered tests for float64x1_t coverage.
7276a = 2.9
7277validate 2.0
7278a = -2.5
7279validate -2.0
7280a = 2147483648.0
7281validate -2147483648.0
7282a = -2147483649.0
7283validate -2147483648.0
7284
c620b35d 7285multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}}
781aab86
FG
7286link-aarch64 = llvm.aarch64.frint32z.f64:f64:::f64
7287generate float64x1_t
7288
3c0e092e
XL
7289/// Floating-point round to 64-bit integer, using current rounding mode
7290name = vrnd64x
3c0e092e
XL
7291target = frintts
7292
781aab86
FG
7293// For validation, the rounding mode should be the default: round-to-nearest (ties-to-even).
7294a = -1.5, 2.9, 1.5, -2.5
7295validate -2.0, 3.0, 2.0, -2.0
7296
3c0e092e
XL
7297aarch64 = frint64x
7298link-aarch64 = frint64x._EXT_
7299generate float32x2_t, float32x4_t
7300
781aab86
FG
7301// The float64x1_t form uses a different LLVM link and isn't supported by Clang
7302// (and so has no intrinsic-test), so perform extra validation to make sure
7303// that it matches the float64x2_t form.
7304
7305a = 1.5, -2.5
7306validate 2.0, -2.0
7307// - The biggest f64 representable as an i64 (0x7ffffffffffffc00).
7308// - The smallest positive f64 that is out of range (2^63).
7309a = 9223372036854774784.0, 9223372036854775808.0
7310validate 9223372036854774784.0, -9223372036854775808.0
7311// - The smallest f64 representable as an i64 (i64::MIN).
7312// - The biggest negative f64 that is out of range.
7313a = -9223372036854775808.0, -9223372036854777856.0
7314validate -9223372036854775808.0, -9223372036854775808.0
7315generate float64x2_t
7316
7317// Odd-numbered tests for float64x1_t coverage.
7318a = 2.9
7319validate 3.0
7320a = -2.5
7321validate -2.0
7322a = 9223372036854775808.0
7323validate -9223372036854775808.0
7324a = -9223372036854777856.0
7325validate -9223372036854775808.0
7326
c620b35d 7327multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}}
781aab86
FG
7328link-aarch64 = llvm.aarch64.frint64x.f64:f64:::f64
7329generate float64x1_t
7330
3c0e092e
XL
7331/// Floating-point round to 64-bit integer toward zero
7332name = vrnd64z
3c0e092e
XL
7333target = frintts
7334
781aab86
FG
7335a = -1.5, 2.9, 1.5, -2.5
7336validate -1.0, 2.0, 1.0, -2.0
7337
3c0e092e
XL
7338aarch64 = frint64z
7339link-aarch64 = frint64z._EXT_
7340generate float32x2_t, float32x4_t
7341
781aab86
FG
7342// The float64x1_t form uses a different LLVM link and isn't supported by Clang
7343// (and so has no intrinsic-test), so perform extra validation to make sure
7344// that it matches the float64x2_t form.
7345
7346a = 1.5, -2.5
7347validate 1.0, -2.0
7348// - The biggest f64 representable as an i64 (0x7ffffffffffffc00).
7349// - The smallest positive f64 that is out of range (2^63).
7350a = 9223372036854774784.0, 9223372036854775808.0
7351validate 9223372036854774784.0, -9223372036854775808.0
7352// - The smallest f64 representable as an i64 (i64::MIN).
7353// - The biggest negative f64 that is out of range.
7354a = -9223372036854775808.0, -9223372036854777856.0
7355validate -9223372036854775808.0, -9223372036854775808.0
7356generate float64x2_t
7357
7358// Odd-numbered tests for float64x1_t coverage.
7359a = 2.9
7360validate 2.0
7361a = -2.5
7362validate -2.0
7363a = 9223372036854775808.0
7364validate -9223372036854775808.0
7365a = -9223372036854777856.0
7366validate -9223372036854775808.0
7367
c620b35d 7368multi_fn = transmute, {self-out-_, {simd_extract!, a, 0}}
781aab86
FG
7369link-aarch64 = llvm.aarch64.frint64z.f64:f64:::f64
7370generate float64x1_t
7371
3c0e092e
XL
7372/// Transpose elements
7373name = vtrn
353b0b11
FG
7374multi_fn = simd_shuffle!, a1:in_t, a, b, {transpose-1-in_len}
7375multi_fn = simd_shuffle!, b1:in_t, a, b, {transpose-2-in_len}
3c0e092e
XL
7376multi_fn = transmute, (a1, b1)
7377a = 0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30
7378b = 1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31
7379validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7380
7381aarch64 = trn
7382arm = vtrn
7383generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7384generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7385generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7386aarch64 = zip
7387generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7388
7389/// Transpose elements
7390name = vtrn
353b0b11
FG
7391multi_fn = simd_shuffle!, a1:in_t, a, b, {transpose-1-in_len}
7392multi_fn = simd_shuffle!, b1:in_t, a, b, {transpose-2-in_len}
3c0e092e
XL
7393multi_fn = transmute, (a1, b1)
7394a = 0., 2., 2., 6.
7395b = 1., 3., 3., 7.
7396validate 0., 1., 2., 3., 2., 3., 6., 7.
7397
7398aarch64 = zip
7399arm = vtrn
7400generate float32x2_t:float32x2_t:float32x2x2_t
7401aarch64 = trn
7402generate float32x4_t:float32x4_t:float32x4x2_t
7403
17df50a5
XL
7404/// Transpose vectors
7405name = vtrn1
353b0b11 7406multi_fn = simd_shuffle!, a, b, {transpose-1-in_len}
17df50a5
XL
7407a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7408b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7409validate 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
7410
7411aarch64 = trn1
7412generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7413
7414aarch64 = zip1
7415generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7416
7417/// Transpose vectors
7418name = vtrn1
353b0b11 7419multi_fn = simd_shuffle!, a, b, {transpose-1-in_len}
17df50a5
XL
7420a = 0., 2., 4., 6., 8., 10., 12., 14.
7421b = 1., 3., 5., 7., 9., 11., 13., 15.
7422validate 0., 1., 4., 5., 8., 9., 12., 13.
7423
7424aarch64 = trn1
7425generate float32x4_t
7426
7427aarch64 = zip1
7428generate float32x2_t, float64x2_t
7429
7430/// Transpose vectors
7431name = vtrn2
353b0b11 7432multi_fn = simd_shuffle!, a, b, {transpose-2-in_len}
17df50a5
XL
7433a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7434b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7435validate 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7436
7437aarch64 = trn2
7438generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7439
7440aarch64 = zip2
7441generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7442
7443/// Transpose vectors
7444name = vtrn2
353b0b11 7445multi_fn = simd_shuffle!, a, b, {transpose-2-in_len}
17df50a5
XL
7446a = 0., 2., 4., 6., 8., 10., 12., 14.
7447b = 1., 3., 5., 7., 9., 11., 13., 15.
7448validate 2., 3., 6., 7., 10., 11., 14., 15.
7449
7450aarch64 = trn2
7451generate float32x4_t
7452
7453aarch64 = zip2
7454generate float32x2_t, float64x2_t
7455
3c0e092e
XL
7456/// Zip vectors
7457name = vzip
353b0b11
FG
7458multi_fn = simd_shuffle!, a0:in_t, a, b, {zip-1-in_len}
7459multi_fn = simd_shuffle!, b0:in_t, a, b, {zip-2-in_len}
3c0e092e
XL
7460multi_fn = transmute, (a0, b0)
7461a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7462b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7463validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7464
7465aarch64 = zip
7466arm = vzip
7467generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t
7468generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t
7469generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t
7470arm = vtrn
7471generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
781aab86 7472aarch64 = zip
3c0e092e
XL
7473arm = vorr
7474generate int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7475generate uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7476generate poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7477
7478/// Zip vectors
7479name = vzip
353b0b11
FG
7480multi_fn = simd_shuffle!, a0:in_t, a, b, {zip-1-in_len}
7481multi_fn = simd_shuffle!, b0:in_t, a, b, {zip-2-in_len}
3c0e092e
XL
7482multi_fn = transmute, (a0, b0)
7483a = 1., 2., 3., 4.
7484b = 5., 6., 7., 8.
7485validate 1., 5., 2., 6., 3., 7., 4., 8.
7486
7487aarch64 = zip
7488arm = vtrn
7489generate float32x2_t:float32x2_t:float32x2x2_t
781aab86 7490aarch64 = zip
3c0e092e
XL
7491arm = vorr
7492generate float32x4_t:float32x4_t:float32x4x2_t
7493
17df50a5
XL
7494/// Zip vectors
7495name = vzip1
353b0b11 7496multi_fn = simd_shuffle!, a, b, {zip-1-in_len}
17df50a5
XL
7497a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7498b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7499validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
7500
7501aarch64 = zip1
7502generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
7503
7504/// Zip vectors
7505name = vzip1
353b0b11 7506multi_fn = simd_shuffle!, a, b, {zip-1-in_len}
17df50a5
XL
7507a = 0., 2., 4., 6., 8., 10., 12., 14.
7508b = 1., 3., 5., 7., 9., 11., 13., 15.
7509validate 0., 1., 2., 3., 4., 5., 6., 7.
7510
7511aarch64 = zip1
7512generate float32x2_t, float32x4_t, float64x2_t
7513
7514/// Zip vectors
7515name = vzip2
353b0b11 7516multi_fn = simd_shuffle!, a, b, {zip-2-in_len}
17df50a5
XL
7517a = 0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30
7518b = 1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31
7519validate 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7520
7521aarch64 = zip2
7522generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
7523
7524/// Zip vectors
7525name = vzip2
353b0b11 7526multi_fn = simd_shuffle!, a, b, {zip-2-in_len}
17df50a5
XL
7527a = 0., 8., 8., 10., 8., 10., 12., 14.
7528b = 1., 9., 9., 11., 9., 11., 13., 15.
7529validate 8., 9., 10., 11., 12., 13., 14., 15.
7530
7531aarch64 = zip2
7532generate float32x2_t, float32x4_t, float64x2_t
7533
3c0e092e
XL
7534/// Unzip vectors
7535name = vuzp
353b0b11
FG
7536multi_fn = simd_shuffle!, a0:in_t, a, b, {unzip-1-in_len}
7537multi_fn = simd_shuffle!, b0:in_t, a, b, {unzip-2-in_len}
3c0e092e
XL
7538multi_fn = transmute, (a0, b0)
7539a = 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16
7540b = 2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32
7541validate 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32
7542
7543aarch64 = uzp
7544arm = vuzp
7545generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7546generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7547generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7548aarch64 = zip
7549arm = vtrn
7550generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7551
7552/// Unzip vectors
7553name = vuzp
353b0b11
FG
7554multi_fn = simd_shuffle!, a0:in_t, a, b, {unzip-1-in_len}
7555multi_fn = simd_shuffle!, b0:in_t, a, b, {unzip-2-in_len}
3c0e092e
XL
7556multi_fn = transmute, (a0, b0)
7557a = 1., 2., 2., 4.
7558b = 2., 6., 6., 8.
7559validate 1., 2., 2., 6., 2., 4., 6., 8.
7560
7561aarch64 = zip
7562arm = vtrn
7563generate float32x2_t:float32x2_t:float32x2x2_t
7564aarch64 = uzp
7565arm = vuzp
7566generate float32x4_t:float32x4_t:float32x4x2_t
7567
17df50a5
XL
7568/// Unzip vectors
7569name = vuzp1
353b0b11 7570multi_fn = simd_shuffle!, a, b, {unzip-1-in_len}
17df50a5
XL
7571a = 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0
7572b = 2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0
7573validate 1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16
7574
7575aarch64 = uzp1
7576generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7577
7578aarch64 = zip1
7579generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7580
7581/// Unzip vectors
7582name = vuzp1
353b0b11 7583multi_fn = simd_shuffle!, a, b, {unzip-1-in_len}
17df50a5
XL
7584a = 0., 8., 1., 9., 4., 12., 5., 13.
7585b = 1., 10., 3., 11., 6., 14., 7., 15.
7586validate 0., 1., 1., 3., 4., 5., 6., 7.
7587
7588aarch64 = uzp1
7589generate float32x4_t
7590
7591aarch64 = zip1
7592generate float32x2_t, float64x2_t
7593
7594/// Unzip vectors
7595name = vuzp2
353b0b11 7596multi_fn = simd_shuffle!, a, b, {unzip-2-in_len}
17df50a5
XL
7597a = 0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24
7598b = 0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32
7599validate 17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32
7600
7601aarch64 = uzp2
7602generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7603
7604aarch64 = zip2
7605generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7606
7607/// Unzip vectors
7608name = vuzp2
353b0b11 7609multi_fn = simd_shuffle!, a, b, {unzip-2-in_len}
17df50a5
XL
7610a = 0., 8., 1., 9., 4., 12., 5., 13.
7611b = 2., 9., 3., 11., 6., 14., 7., 15.
7612validate 8., 9., 9., 11., 12., 13., 14., 15.
7613
7614aarch64 = uzp2
7615generate float32x4_t
7616
7617aarch64 = zip2
7618generate float32x2_t, float64x2_t
7619
7620////////////////////
7621// Unsigned Absolute difference and Accumulate Long
7622////////////////////
7623
7624/// Unsigned Absolute difference and Accumulate Long
7625name = vabal
7626multi_fn = vabd-unsigned-noext, b, c, d:in_t
7627multi_fn = simd_add, a, {simd_cast, d}
7628a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7629b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7630c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7631validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7632
7633arm = vabal.s
7634aarch64 = uabal
7635generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
7636
7637/// Unsigned Absolute difference and Accumulate Long
7638name = vabal_high
7639no-q
353b0b11
FG
7640multi_fn = simd_shuffle!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
7641multi_fn = simd_shuffle!, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
7642multi_fn = vabd_u8, d, e, f:uint8x8_t
7643multi_fn = simd_add, a, {simd_cast, f}
7644a = 9, 10, 11, 12, 13, 14, 15, 16
7645b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7646c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7647validate 20, 20, 20, 20, 20, 20, 20, 20
7648
7649aarch64 = uabal
7650generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t
7651
7652/// Unsigned Absolute difference and Accumulate Long
7653name = vabal_high
7654no-q
353b0b11
FG
7655multi_fn = simd_shuffle!, d:uint16x4_t, b, b, [4, 5, 6, 7]
7656multi_fn = simd_shuffle!, e:uint16x4_t, c, c, [4, 5, 6, 7]
17df50a5
XL
7657multi_fn = vabd_u16, d, e, f:uint16x4_t
7658multi_fn = simd_add, a, {simd_cast, f}
7659a = 9, 10, 11, 12
7660b = 1, 2, 3, 4, 9, 10, 11, 12
7661c = 10, 10, 10, 10, 20, 0, 2, 4
7662validate 20, 20, 20, 20
7663
7664aarch64 = uabal
7665generate uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
7666
7667/// Unsigned Absolute difference and Accumulate Long
7668name = vabal_high
7669no-q
353b0b11
FG
7670multi_fn = simd_shuffle!, d:uint32x2_t, b, b, [2, 3]
7671multi_fn = simd_shuffle!, e:uint32x2_t, c, c, [2, 3]
17df50a5
XL
7672multi_fn = vabd_u32, d, e, f:uint32x2_t
7673multi_fn = simd_add, a, {simd_cast, f}
7674a = 15, 16
7675b = 1, 2, 15, 16
7676c = 10, 10, 10, 12
7677validate 20, 20
7678
7679aarch64 = uabal
7680generate uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
7681
7682////////////////////
7683// Signed Absolute difference and Accumulate Long
7684////////////////////
7685
7686/// Signed Absolute difference and Accumulate Long
7687name = vabal
7688multi_fn = vabd-signed-noext, b, c, d:int8x8_t
7689multi_fn = simd_cast, e:uint8x8_t, d
7690multi_fn = simd_add, a, {simd_cast, e}
7691a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7692b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7693c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7694validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7695
7696arm = vabal.s
7697aarch64 = sabal
7698generate int16x8_t:int8x8_t:int8x8_t:int16x8_t
7699
7700/// Signed Absolute difference and Accumulate Long
7701name = vabal
7702multi_fn = vabd-signed-noext, b, c, d:int16x4_t
7703multi_fn = simd_cast, e:uint16x4_t, d
7704multi_fn = simd_add, a, {simd_cast, e}
7705a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7706b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7707c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7708validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7709
7710arm = vabal.s
7711aarch64 = sabal
7712generate int32x4_t:int16x4_t:int16x4_t:int32x4_t
7713
7714/// Signed Absolute difference and Accumulate Long
7715name = vabal
7716multi_fn = vabd-signed-noext, b, c, d:int32x2_t
7717multi_fn = simd_cast, e:uint32x2_t, d
7718multi_fn = simd_add, a, {simd_cast, e}
7719a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7720b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7721c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7722validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7723
7724arm = vabal.s
7725aarch64 = sabal
7726generate int64x2_t:int32x2_t:int32x2_t:int64x2_t
7727
7728/// Signed Absolute difference and Accumulate Long
7729name = vabal_high
7730no-q
353b0b11
FG
7731multi_fn = simd_shuffle!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
7732multi_fn = simd_shuffle!, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
17df50a5
XL
7733multi_fn = vabd_s8, d, e, f:int8x8_t
7734multi_fn = simd_cast, f:uint8x8_t, f
7735multi_fn = simd_add, a, {simd_cast, f}
7736a = 9, 10, 11, 12, 13, 14, 15, 16
7737b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7738c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7739validate 20, 20, 20, 20, 20, 20, 20, 20
7740
7741aarch64 = sabal
7742generate int16x8_t:int8x16_t:int8x16_t:int16x8_t
7743
7744/// Signed Absolute difference and Accumulate Long
7745name = vabal_high
7746no-q
353b0b11
FG
7747multi_fn = simd_shuffle!, d:int16x4_t, b, b, [4, 5, 6, 7]
7748multi_fn = simd_shuffle!, e:int16x4_t, c, c, [4, 5, 6, 7]
17df50a5
XL
7749multi_fn = vabd_s16, d, e, f:int16x4_t
7750multi_fn = simd_cast, f:uint16x4_t, f
7751multi_fn = simd_add, a, {simd_cast, f}
7752a = 9, 10, 11, 12
7753b = 1, 2, 3, 4, 9, 10, 11, 12
7754c = 10, 10, 10, 10, 20, 0, 2, 4
7755validate 20, 20, 20, 20
7756
7757aarch64 = sabal
7758generate int32x4_t:int16x8_t:int16x8_t:int32x4_t
7759
7760/// Signed Absolute difference and Accumulate Long
7761name = vabal_high
7762no-q
353b0b11
FG
7763multi_fn = simd_shuffle!, d:int32x2_t, b, b, [2, 3]
7764multi_fn = simd_shuffle!, e:int32x2_t, c, c, [2, 3]
17df50a5
XL
7765multi_fn = vabd_s32, d, e, f:int32x2_t
7766multi_fn = simd_cast, f:uint32x2_t, f
7767multi_fn = simd_add, a, {simd_cast, f}
7768a = 15, 16
7769b = 1, 2, 15, 16
7770c = 10, 10, 10, 12
7771validate 20, 20
7772
7773aarch64 = sabal
7774generate int64x2_t:int32x4_t:int32x4_t:int64x2_t
7775
7776////////////////////
353b0b11 7777// Signed saturating Absolute value
17df50a5
XL
7778////////////////////
7779
353b0b11 7780/// Signed saturating Absolute value
17df50a5
XL
7781name = vqabs
7782a = MIN, MAX, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5
7783validate MAX, MAX, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5
7784
7785arm = vqabs.s
7786aarch64 = sqabs
7787link-arm = vqabs._EXT_
7788link-aarch64 = sqabs._EXT_
7789generate int*_t
7790
353b0b11 7791/// Signed saturating Absolute value
17df50a5
XL
7792name = vqabs
7793a = MIN, -7
7794validate MAX, 7
7795
7796aarch64 = sqabs
7797link-aarch64 = sqabs._EXT_
7798generate int64x*_t
3c0e092e
XL
7799
7800/// Signed saturating absolute value
7801name = vqabs
c620b35d 7802multi_fn = simd_extract!, {vqabs-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
3c0e092e
XL
7803a = -7
7804validate 7
7805
7806aarch64 = sqabs
7807generate i8:i8, i16:i16
7808
7809/// Signed saturating absolute value
7810name = vqabs
7811a = -7
7812validate 7
7813
7814aarch64 = sqabs
7815link-aarch64 = sqabs._EXT_
7816generate i32:i32, i64:i64
7817
7818/// Shift left and insert
7819name = vsli
7820n-suffix
7821constn = N
7822multi_fn = static_assert-N-0-63
7823multi_fn = transmute, {vsli_n-in_ntt-::<N>, transmute(a), transmute(b)}
7824a = 333
7825b = 2042
7826n = 2
7827validate 8169
7828
7829aarch64 = sli
7830generate i64, u64
7831
7832/// Shift right and insert
7833name = vsri
7834n-suffix
7835constn = N
7836multi_fn = static_assert-N-1-bits
7837multi_fn = transmute, {vsri_n-in_ntt-::<N>, transmute(a), transmute(b)}
7838a = 333
7839b = 2042
7840n = 2
7841validate 510
7842
7843aarch64 = sri
a2a8927a 7844generate i64, u64