]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/stdarch-gen/neon.spec
New upstream version 1.62.1+dfsg1
[rustc.git] / library / stdarch / crates / stdarch-gen / neon.spec
CommitLineData
ba9703b0 1// ARM Neon intrinsic specification.
fc512014
XL
2//
3// This file contains the specification for a number of
ba9703b0
XL
4// intrinsics that allows us to generate them along with
5// their test cases.
6//
7// To the syntax of the file - it's not very intelligently parsed!
8//
9// # Comments
10// start with AT LEAST two, or four or more slashes so // is a
11// comment /////// is too.
12//
13// # Sections
14// Sections start with EXACTLY three slashes followed
15// by AT LEAST one space. Sections are used for two things:
16//
17// 1) they serve as the doc comment for the given intrinics.
18// 2) they reset all variables (name, fn, etc.)
19//
20// # Variables
21//
22// name - The prefix of the function, suffixes are auto
23// generated by the type they get passed.
24//
25// fn - The function to call in rust-land.
26//
27// aarch64 - The intrinsic to check on aarch64 architecture.
28// If this is given but no arm intrinsic is provided,
29// the function will exclusively be generated for
30// aarch64.
31// This is used to generate both aarch64 specific and
32// shared intrinics by first only specifying th aarch64
33// variant then the arm variant.
fc512014 34//
ba9703b0
XL
35// arm - The arm v7 intrinics used to checked for arm code
36// generation. All neon functions available in arm are
37// also available in aarch64. If no aarch64 intrinic was
38// set they are assumed to be the same.
39// Intrinics ending with a `.` will have a size suffixes
40// added (such as `i8` or `i64`) that is not sign specific
41// Intrinics ending with a `.s` will have a size suffixes
42// added (such as `s8` or `u64`) that is sign specific
43//
44// a - First input for tests, it gets scaled to the size of
45// the type.
46//
47// b - Second input for tests, it gets scaled to the size of
48// the type.
49//
50// # special values
51//
52// TRUE - 'true' all bits are set to 1
53// FALSE - 'false' all bits are set to 0
54// FF - same as 'true'
55// MIN - minimal value (either 0 or the lowest negative number)
17df50a5 56// MAX - maximal value proper to overflow
ba9703b0
XL
57//
58// # validate <values>
a2a8927a 59// Validates a and b against the expected result of the test.
ba9703b0 60// The special values 'TRUE' and 'FALSE' can be used to
17df50a5 61// represent the correct NEON representation of true or
ba9703b0 62// false values. It too gets scaled to the type.
fc512014 63//
ba9703b0
XL
64// Validate needs to be called before generate as it sets
65// up the rules for validation that get generated for each
66// type.
67// # generate <types>
68// The generate command generates the intrinsics, it uses the
69// Variables set and can be called multiple times while overwriting
70// some of the variables.
71
72/// Vector bitwise and
73name = vand
74fn = simd_and
75arm = vand
76aarch64 = and
77a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
78b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F
79validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
80b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
81validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
82generate int*_t, uint*_t, int64x*_t, uint64x*_t
83
84/// Vector bitwise or (immediate, inclusive)
85name = vorr
86fn = simd_or
87arm = vorr
88aarch64 = orr
89a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
90b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
91validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
92generate int*_t, uint*_t, int64x*_t, uint64x*_t
93
94
95/// Vector bitwise exclusive or (vector)
96name = veor
97fn = simd_xor
98arm = veor
99aarch64 = eor
100a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
101b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
102validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
103generate int*_t, uint*_t, int64x*_t, uint64x*_t
104
3c0e092e
XL
105/// Three-way exclusive OR
106name = veor3
107a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
108b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
109c = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
110validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
111target = sha3
112
113aarch64 = eor3
114link-aarch64 = llvm.aarch64.crypto.eor3s._EXT_
115generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
116link-aarch64 = llvm.aarch64.crypto.eor3u._EXT_
117generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
118
17df50a5
XL
119////////////////////
120// Absolute difference between the arguments
121////////////////////
122
123/// Absolute difference between the arguments
124name = vabd
125a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
126b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
127validate 15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15
128
129arm = vabd.s
130aarch64 = sabd
131link-arm = vabds._EXT_
132link-aarch64 = sabd._EXT_
133generate int*_t
134
135arm = vabd.s
136aarch64 = uabd
137link-arm = vabdu._EXT_
138link-aarch64 = uabd._EXT_
139generate uint*_t
140
141/// Absolute difference between the arguments of Floating
142name = vabd
143a = 1.0, 2.0, 5.0, -4.0
144b = 9.0, 3.0, 2.0, 8.0
145validate 8.0, 1.0, 3.0, 12.0
146
147aarch64 = fabd
148link-aarch64 = fabd._EXT_
149generate float64x*_t
150
151arm = vabd.s
152aarch64 = fabd
153link-arm = vabds._EXT_
154link-aarch64 = fabd._EXT_
155generate float*_t
156
3c0e092e
XL
157/// Floating-point absolute difference
158name = vabd
159multi_fn = simd_extract, {vabd-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
160a = 1.0
161b = 9.0
162validate 8.0
163
164aarch64 = fabd
165generate f32, f64
166
17df50a5
XL
167////////////////////
168// Absolute difference Long
169////////////////////
170
171/// Unsigned Absolute difference Long
172name = vabdl
173multi_fn = simd_cast, {vabd-unsigned-noext, a, b}
174a = 1, 2, 3, 4, 4, 3, 2, 1
175b = 10, 10, 10, 10, 10, 10, 10, 10
176validate 9, 8, 7, 6, 6, 7, 8, 9
177
178arm = vabdl.s
179aarch64 = uabdl
180generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
181
182/// Signed Absolute difference Long
183name = vabdl
184multi_fn = simd_cast, c:uint8x8_t, {vabd-signed-noext, a, b}
185multi_fn = simd_cast, c
186a = 1, 2, 3, 4, 4, 3, 2, 1
187b = 10, 10, 10, 10, 10, 10, 10, 10
188validate 9, 8, 7, 6, 6, 7, 8, 9
189
190arm = vabdl.s
191aarch64 = sabdl
192generate int8x8_t:int8x8_t:int16x8_t
193
194/// Signed Absolute difference Long
195name = vabdl
196multi_fn = simd_cast, c:uint16x4_t, {vabd-signed-noext, a, b}
197multi_fn = simd_cast, c
198a = 1, 2, 11, 12
199b = 10, 10, 10, 10
200validate 9, 8, 1, 2
201
202arm = vabdl.s
203aarch64 = sabdl
204generate int16x4_t:int16x4_t:int32x4_t
205
206/// Signed Absolute difference Long
207name = vabdl
208multi_fn = simd_cast, c:uint32x2_t, {vabd-signed-noext, a, b}
209multi_fn = simd_cast, c
210a = 1, 11
211b = 10, 10
212validate 9, 1
213
214arm = vabdl.s
215aarch64 = sabdl
216generate int32x2_t:int32x2_t:int64x2_t
217
218/// Unsigned Absolute difference Long
219name = vabdl_high
220no-q
221multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
222multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
223multi_fn = simd_cast, {vabd_u8, c, d}
224a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
225b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
226validate 1, 0, 1, 2, 3, 4, 5, 6
227
228aarch64 = uabdl
229generate uint8x16_t:uint8x16_t:uint16x8_t
230
231/// Unsigned Absolute difference Long
232name = vabdl_high
233no-q
234multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7]
235multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7]
236multi_fn = simd_cast, {vabd_u16, c, d}
237a = 1, 2, 3, 4, 8, 9, 11, 12
238b = 10, 10, 10, 10, 10, 10, 10, 10
239validate 2, 1, 1, 2
240
241aarch64 = uabdl
242generate uint16x8_t:uint16x8_t:uint32x4_t
243
244/// Unsigned Absolute difference Long
245name = vabdl_high
246no-q
247multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3]
248multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3]
249multi_fn = simd_cast, {vabd_u32, c, d}
250a = 1, 2, 3, 4
251b = 10, 10, 10, 10
252validate 7, 6
253
254aarch64 = uabdl
255generate uint32x4_t:uint32x4_t:uint64x2_t
256
257/// Signed Absolute difference Long
258name = vabdl_high
259no-q
260multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
261multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
262multi_fn = simd_cast, e:uint8x8_t, {vabd_s8, c, d}
263multi_fn = simd_cast, e
264a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
265b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
266validate 1, 0, 1, 2, 3, 4, 5, 6
267
268aarch64 = sabdl
269generate int8x16_t:int8x16_t:int16x8_t
270
271/// Signed Absolute difference Long
272name = vabdl_high
273no-q
274multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7]
275multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7]
276multi_fn = simd_cast, e:uint16x4_t, {vabd_s16, c, d}
277multi_fn = simd_cast, e
278a = 1, 2, 3, 4, 9, 10, 11, 12
279b = 10, 10, 10, 10, 10, 10, 10, 10
280validate 1, 0, 1, 2
281
282aarch64 = sabdl
283generate int16x8_t:int16x8_t:int32x4_t
284
285/// Signed Absolute difference Long
286name = vabdl_high
287no-q
288multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3]
289multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3]
290multi_fn = simd_cast, e:uint32x2_t, {vabd_s32, c, d}
291multi_fn = simd_cast, e
292a = 1, 2, 3, 4
293b = 10, 10, 10, 10
294validate 7, 6
295
296aarch64 = sabdl
297generate int32x4_t:int32x4_t:int64x2_t
298
ba9703b0
XL
299////////////////////
300// equality
301////////////////////
302
303/// Compare bitwise Equal (vector)
304name = vceq
305fn = simd_eq
306a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
307b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
308validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
309a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX
310b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN
311validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
312
313aarch64 = cmeq
314generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
315
316arm = vceq.
17df50a5 317generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t
ba9703b0
XL
318
319/// Floating-point compare equal
320name = vceq
321fn = simd_eq
322a = 1.2, 3.4, 5.6, 7.8
323b = 1.2, 3.4, 5.6, 7.8
324validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
325
326aarch64 = fcmeq
327generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
328
329arm = vceq.
ba9703b0
XL
330generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
331
3c0e092e
XL
332/// Compare bitwise equal
333name = vceq
334multi_fn = transmute, {vceq-in_ntt-noext, {transmute, a}, {transmute, b}}
335a = 1
336b = 2
337validate 0
338
339aarch64 = cmp
340generate i64:u64, u64
341
342/// Floating-point compare equal
343name = vceq
344multi_fn = simd_extract, {vceq-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
345a = 1.
346b = 2.
347validate 0
348
349aarch64 = fcmp
350generate f32:u32, f64:u64
351
17df50a5
XL
352/// Signed compare bitwise equal to zero
353name = vceqz
354fn = simd_eq
355a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
356fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
357validate FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
358
359aarch64 = cmeq
360generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
361
362/// Unsigned compare bitwise equal to zero
363name = vceqz
364fn = simd_eq
365a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
366fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
367validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
368
369aarch64 = cmeq
370generate uint*_t, uint64x*_t
371
372/// Floating-point compare bitwise equal to zero
373name = vceqz
374fn = simd_eq
375a = 0.0, 1.2, 3.4, 5.6
376fixed = 0.0, 0.0, 0.0, 0.0
377validate TRUE, FALSE, FALSE, FALSE
378
379aarch64 = fcmeq
380generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
381
3c0e092e
XL
382/// Compare bitwise equal to zero
383name = vceqz
384multi_fn = transmute, {vceqz-in_ntt-noext, {transmute, a}}
385a = 1
386validate 0
387
388aarch64 = cmp
389generate i64:u64, u64
390
391/// Floating-point compare bitwise equal to zero
392name = vceqz
393multi_fn = simd_extract, {vceqz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
394a = 1.
395validate 0
396
397aarch64 = fcmp
398generate f32:u32, f64:u64
399
17df50a5
XL
400/// Signed compare bitwise Test bits nonzero
401name = vtst
402multi_fn = simd_and, c:in_t, a, b
403multi_fn = fixed, d:in_t
404multi_fn = simd_ne, c, transmute(d)
405a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
406b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
407fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
408validate TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
409
410aarch64 = cmtst
411generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
412
413arm = vtst
a2a8927a 414generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly16x4_t:uint16x4_t, poly16x8_t:uint16x8_t
17df50a5
XL
415
416/// Unsigned compare bitwise Test bits nonzero
417name = vtst
418multi_fn = simd_and, c:in_t, a, b
419multi_fn = fixed, d:in_t
420multi_fn = simd_ne, c, transmute(d)
421a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
422b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
423fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
424validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
425
426aarch64 = cmtst
427generate uint64x*_t
428
429arm = vtst
430generate uint*_t
431
3c0e092e
XL
432/// Compare bitwise test bits nonzero
433name = vtst
434multi_fn = transmute, {vtst-in_ntt-noext, {transmute, a}, {transmute, b}}
435a = 0
436b = 0
437validate 0
438
439aarch64 = tst
440generate i64:i64:u64, u64
441
442/// Signed saturating accumulate of unsigned value
443name = vuqadd
444out-suffix
445a = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
446b = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
447validate 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8
448
449aarch64 = suqadd
450link-aarch64 = suqadd._EXT_
451generate i32:u32:i32, i64:u64:i64
452
453/// Signed saturating accumulate of unsigned value
454name = vuqadd
455out-suffix
456multi_fn = simd_extract, {vuqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
457a = 1
458b = 2
459validate 3
460
461aarch64 = suqadd
462generate i8:u8:i8, i16:u16:i16
463
17df50a5
XL
464////////////////////
465// Floating-point absolute value
466////////////////////
467
468/// Floating-point absolute value
469name = vabs
470fn = simd_fabs
471a = -0.1, -2.2, -3.3, -6.6
472validate 0.1, 2.2, 3.3, 6.6
473aarch64 = fabs
474generate float64x1_t:float64x1_t, float64x2_t:float64x2_t
475
476arm = vabs
477generate float32x2_t:float32x2_t, float32x4_t:float32x4_t
478
ba9703b0
XL
479////////////////////
480// greater then
481////////////////////
482
483/// Compare signed greater than
484name = vcgt
485fn = simd_gt
486a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
487b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
488validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
489aarch64 = cmgt
490generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
491
492arm = vcgt.s
493generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
494
495/// Compare unsigned highe
496name = vcgt
497fn = simd_gt
498a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
499b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
500validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
501
502aarch64 = cmhi
503generate uint64x*_t
504
505arm = vcgt.s
506generate uint*_t
507
508/// Floating-point compare greater than
509name = vcgt
510fn = simd_gt
fc512014 511a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
512b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
513validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
514
515aarch64 = fcmgt
516generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
517
518arm = vcgt.s
ba9703b0
XL
519generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
520
3c0e092e
XL
521/// Compare greater than
522name = vcgt
523multi_fn = transmute, {vcgt-in_ntt-noext, {transmute, a}, {transmute, b}}
524a = 1
525b = 2
526validate 0
527
528aarch64 = cmp
529generate i64:u64, u64
530
531/// Floating-point compare greater than
532name = vcgt
533multi_fn = simd_extract, {vcgt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
534a = 1.
535b = 2.
536validate 0
537
538aarch64 = fcmp
539generate f32:u32, f64:u64
540
ba9703b0
XL
541////////////////////
542// lesser then
543////////////////////
544
545/// Compare signed less than
546name = vclt
547fn = simd_lt
548a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
549b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
550validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
551aarch64 = cmgt
552generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
553
554arm = vcgt.s
555generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
556
557/// Compare unsigned less than
558name = vclt
559fn = simd_lt
560a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
561b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
562validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
563
564aarch64 = cmhi
565generate uint64x*_t
566
567arm = vcgt.s
568generate uint*_t
569
570/// Floating-point compare less than
571name = vclt
572fn = simd_lt
573a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
fc512014 574b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
575validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
576
577aarch64 = fcmgt
578generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
579
580arm = vcgt.s
ba9703b0
XL
581generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
582
3c0e092e
XL
583/// Compare less than
584name = vclt
585multi_fn = transmute, {vclt-in_ntt-noext, {transmute, a}, {transmute, b}}
586a = 2
587b = 1
588validate 0
589
590aarch64 = cmp
591generate i64:u64, u64
592
593/// Floating-point compare less than
594name = vclt
595multi_fn = simd_extract, {vclt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
596a = 2.
597b = 1.
598validate 0
599
600aarch64 = fcmp
601generate f32:u32, f64:u64
602
ba9703b0
XL
603////////////////////
604// lesser then equals
605////////////////////
606
607/// Compare signed less than or equal
608name = vcle
609fn = simd_le
610a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
611b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
612validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
613
614aarch64 = cmge
615generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
616
617arm = vcge.s
618generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
619
3c0e092e
XL
620/// Compare greater than or equal
621name = vcge
622multi_fn = transmute, {vcge-in_ntt-noext, {transmute, a}, {transmute, b}}
623a = 1
624b = 2
625validate 0
626
627aarch64 = cmp
628generate i64:u64, u64
629
630/// Floating-point compare greater than or equal
631name = vcge
632multi_fn = simd_extract, {vcge-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
633a = 1.
634b = 2.
635validate 0
636
637aarch64 = fcmp
638generate f32:u32, f64:u64
639
ba9703b0
XL
640/// Compare unsigned less than or equal
641name = vcle
642fn = simd_le
643a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
644b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
645validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
646
647aarch64 = cmhs
648generate uint64x*_t
649
650arm = vcge.s
651generate uint*_t
652
653/// Floating-point compare less than or equal
654name = vcle
655fn = simd_le
656a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
fc512014 657b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
658validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
659aarch64 = fcmge
660generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
661
ba9703b0
XL
662arm = vcge.s
663generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
664
3c0e092e
XL
665/// Compare less than or equal
666name = vcle
667multi_fn = transmute, {vcle-in_ntt-noext, {transmute, a}, {transmute, b}}
668a = 2
669b = 1
670validate 0
671
672aarch64 = cmp
673generate i64:u64, u64
674
675/// Floating-point compare less than or equal
676name = vcle
677multi_fn = simd_extract, {vcle-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
678a = 2.
679b = 1.
680validate 0
681
682aarch64 = fcmp
683generate f32:u32, f64:u64
684
ba9703b0
XL
685////////////////////
686// greater then equals
687////////////////////
688
689/// Compare signed greater than or equal
690name = vcge
691fn = simd_ge
692a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
693b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
694validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
695
696aarch64 = cmge
697generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
698
699arm = vcge.s
700generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
701
702/// Compare unsigned greater than or equal
703name = vcge
704fn = simd_ge
705a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
706b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
707validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
708
709aarch64 = cmhs
710generate uint64x*_t
711
712arm = vcge.s
713generate uint*_t
714
715/// Floating-point compare greater than or equal
716name = vcge
717fn = simd_ge
fc512014 718a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
ba9703b0
XL
719b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
720validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
721
722aarch64 = fcmge
723generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
724
725arm = vcge.s
ba9703b0
XL
726generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
727
17df50a5
XL
728/// Compare signed greater than or equal to zero
729name = vcgez
730fn = simd_ge
731a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
732fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
733validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 734
17df50a5
XL
735aarch64 = cmge
736generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
ba9703b0 737
17df50a5
XL
738/// Floating-point compare greater than or equal to zero
739name = vcgez
740fn = simd_ge
741a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
742fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
743validate FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 744
17df50a5
XL
745aarch64 = fcmge
746generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
ba9703b0 747
3c0e092e
XL
748/// Compare signed greater than or equal to zero
749name = vcgez
750multi_fn = transmute, {vcgez-in_ntt-noext, {transmute, a}}
751a = -1
752validate 0
753
754aarch64 = eor
755generate i64:u64
756
757/// Floating-point compare greater than or equal to zero
758name = vcgez
759multi_fn = simd_extract, {vcgez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
760a = -1.
761validate 0
762
763aarch64 = fcmp
764generate f32:u32, f64:u64
765
17df50a5
XL
766/// Compare signed greater than zero
767name = vcgtz
768fn = simd_gt
769a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
770fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
771validate FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 772
17df50a5
XL
773aarch64 = cmgt
774generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
ba9703b0 775
17df50a5
XL
776/// Floating-point compare greater than zero
777name = vcgtz
778fn = simd_gt
779a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
780fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
781validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
ba9703b0 782
17df50a5
XL
783aarch64 = fcmgt
784generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
ba9703b0 785
3c0e092e
XL
786/// Compare signed greater than zero
787name = vcgtz
788multi_fn = transmute, {vcgtz-in_ntt-noext, {transmute, a}}
789a = -1
790validate 0
791
792aarch64 = cmp
793generate i64:u64
794
795/// Floating-point compare greater than zero
796name = vcgtz
797multi_fn = simd_extract, {vcgtz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
798a = -1.
799validate 0
800
801aarch64 = fcmp
802generate f32:u32, f64:u64
803
17df50a5
XL
804/// Compare signed less than or equal to zero
805name = vclez
806fn = simd_le
807a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
808fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
809validate TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
ba9703b0 810
17df50a5
XL
811aarch64 = cmgt
812generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
ba9703b0 813
17df50a5
XL
814/// Floating-point compare less than or equal to zero
815name = vclez
816fn = simd_le
817a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
818fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
819validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
820
821aarch64 = fcmle
822generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
823
3c0e092e
XL
824/// Compare less than or equal to zero
825name = vclez
826multi_fn = transmute, {vclez-in_ntt-noext, {transmute, a}}
827a = 2
828validate 0
829
830aarch64 = cmp
831generate i64:u64
832
833/// Floating-point compare less than or equal to zero
834name = vclez
835multi_fn = simd_extract, {vclez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
836a = 2.
837validate 0
838
839aarch64 = fcmp
840generate f32:u32, f64:u64
841
17df50a5
XL
842/// Compare signed less than zero
843name = vcltz
844fn = simd_lt
845a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
846fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
847validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
848
5e7ed085 849aarch64 = cmlt
17df50a5
XL
850generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
851
852/// Floating-point compare less than zero
853name = vcltz
854fn = simd_lt
855a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
856fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
857validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
858
859aarch64 = fcmlt
860generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
861
3c0e092e
XL
862/// Compare less than zero
863name = vcltz
864multi_fn = transmute, {vcltz-in_ntt-noext, {transmute, a}}
865a = 2
866validate 0
867
868aarch64 = asr
869generate i64:u64
870
871/// Floating-point compare less than zero
872name = vcltz
873multi_fn = simd_extract, {vcltz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
874a = 2.
875validate 0
876
877aarch64 = fcmp
878generate f32:u32, f64:u64
879
17df50a5
XL
880/// Count leading sign bits
881name = vcls
882a = MIN, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
883validate 0, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
884
885arm = vcls.s
886aarch64 = cls
887link-arm = vcls._EXT_
888link-aarch64 = cls._EXT_
ba9703b0
XL
889generate int*_t
890
3c0e092e
XL
891/// Count leading sign bits
892name = vcls
893multi_fn = transmute, {vcls-signed-noext, {transmute, a}}
894a = MIN, MAX, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
895validate BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1
896
897arm = vcls
898aarch64 = cls
a2a8927a 899generate uint8x8_t:int8x8_t, uint8x16_t:int8x16_t, uint16x4_t:int16x4_t, uint16x8_t:int16x8_t, uint32x2_t:int32x2_t, uint32x4_t:int32x4_t
3c0e092e
XL
900
901/// Count leading zero bits
17df50a5
XL
902name = vclz
903multi_fn = self-signed-ext, a
904a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
905validate 0, 0, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 1
ba9703b0 906
17df50a5
XL
907arm = vclz.
908aarch64 = clz
909generate int*_t
910
3c0e092e 911/// Count leading zero bits
17df50a5
XL
912name = vclz
913multi_fn = transmute, {self-signed-ext, transmute(a)}
914a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
915validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
916
917arm = vclz.
918aarch64 = clz
ba9703b0
XL
919generate uint*_t
920
17df50a5
XL
921/// Floating-point absolute compare greater than
922name = vcagt
923a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
924b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 925validate !0, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
ba9703b0 926
17df50a5
XL
927aarch64 = facgt
928link-aarch64 = facgt._EXT2_._EXT_
3c0e092e 929generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 930
17df50a5
XL
931arm = vacgt.s
932link-arm = vacgt._EXT2_._EXT_
933generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
ba9703b0 934
17df50a5
XL
935/// Floating-point absolute compare greater than or equal
936name = vcage
937a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
938b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 939validate !0, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
ba9703b0 940
17df50a5
XL
941aarch64 = facge
942link-aarch64 = facge._EXT2_._EXT_
3c0e092e 943generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 944
17df50a5
XL
945arm = vacge.s
946link-arm = vacge._EXT2_._EXT_
947generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
ba9703b0 948
17df50a5
XL
949/// Floating-point absolute compare less than
950name = vcalt
951multi_fn = vcagt-self-noext, b, a
952a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
953b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 954validate 0, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
ba9703b0 955
17df50a5 956aarch64 = facgt
3c0e092e 957generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 958
17df50a5
XL
959arm = vacgt.s
960generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
ba9703b0 961
17df50a5
XL
962/// Floating-point absolute compare less than or equal
963name = vcale
964multi_fn = vcage-self-noext , b, a
965a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
966b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
3c0e092e 967validate 0, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
ba9703b0 968
17df50a5 969aarch64 = facge
3c0e092e 970generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
ba9703b0 971
17df50a5
XL
972arm = vacge.s
973generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
974
975/// Insert vector element from another vector element
976name = vcopy
977lane-suffixes
978constn = LANE1:LANE2
979multi_fn = static_assert_imm-in0_exp_len-LANE1
980multi_fn = static_assert_imm-in_exp_len-LANE2
981multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
ba9703b0 982a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
17df50a5
XL
983b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
984n = 0:1
985validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
ba9703b0 986
17df50a5
XL
987aarch64 = mov
988generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x2_t
989generate uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x2_t
990generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
ba9703b0 991
17df50a5
XL
992/// Insert vector element from another vector element
993name = vcopy
994lane-suffixes
995constn = LANE1:LANE2
996multi_fn = static_assert_imm-in0_exp_len-LANE1
997multi_fn = static_assert_imm-in_exp_len-LANE2
998multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
999a = 1., 2., 3., 4.
1000b = 0., 0.5, 0., 0.
1001n = 0:1
1002validate 0.5, 2., 3., 4.
fc512014 1003
17df50a5
XL
1004aarch64 = mov
1005generate float32x2_t, float32x4_t, float64x2_t
1006
1007/// Insert vector element from another vector element
1008name = vcopy
1009lane-suffixes
1010constn = LANE1:LANE2
1011multi_fn = static_assert_imm-in0_exp_len-LANE1
1012multi_fn = static_assert_imm-in_exp_len-LANE2
1013multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len}
1014multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2}
fc512014 1015a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
17df50a5
XL
1016b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1017n = 0:1
1018validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
fc512014 1019
17df50a5
XL
1020aarch64 = mov
1021generate int8x8_t:int8x16_t:int8x8_t, int16x4_t:int16x8_t:int16x4_t, int32x2_t:int32x4_t:int32x2_t
1022generate uint8x8_t:uint8x16_t:uint8x8_t, uint16x4_t:uint16x8_t:uint16x4_t, uint32x2_t:uint32x4_t:uint32x2_t
1023generate poly8x8_t:poly8x16_t:poly8x8_t, poly16x4_t:poly16x8_t:poly16x4_t
fc512014 1024
17df50a5
XL
1025/// Insert vector element from another vector element
1026name = vcopy
1027lane-suffixes
1028constn = LANE1:LANE2
1029multi_fn = static_assert_imm-in0_exp_len-LANE1
1030multi_fn = static_assert_imm-in_exp_len-LANE2
1031multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len}
1032multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2}
1033a = 1., 2., 3., 4.
1034b = 0., 0.5, 0., 0.
1035n = 0:1
1036validate 0.5, 2., 3., 4.
fc512014 1037
17df50a5
XL
1038aarch64 = mov
1039generate float32x2_t:float32x4_t:float32x2_t
fc512014 1040
17df50a5
XL
1041/// Insert vector element from another vector element
1042name = vcopy
1043lane-suffixes
1044constn = LANE1:LANE2
1045multi_fn = static_assert_imm-in0_exp_len-LANE1
1046multi_fn = static_assert_imm-in_exp_len-LANE2
1047multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len}
1048multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
1049a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1050b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1051n = 0:1
1052validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
fc512014 1053
17df50a5
XL
1054aarch64 = mov
1055generate int8x16_t:int8x8_t:int8x16_t, int16x8_t:int16x4_t:int16x8_t, int32x4_t:int32x2_t:int32x4_t
1056generate uint8x16_t:uint8x8_t:uint8x16_t, uint16x8_t:uint16x4_t:uint16x8_t, uint32x4_t:uint32x2_t:uint32x4_t
1057generate poly8x16_t:poly8x8_t:poly8x16_t, poly16x8_t:poly16x4_t:poly16x8_t
fc512014 1058
17df50a5
XL
1059/// Insert vector element from another vector element
1060name = vcopy
1061lane-suffixes
1062constn = LANE1:LANE2
1063multi_fn = static_assert_imm-in0_exp_len-LANE1
1064multi_fn = static_assert_imm-in_exp_len-LANE2
1065multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len}
1066multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
fc512014 1067a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
17df50a5
XL
1068b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1069n = 1:0
1070validate 1, MAX
fc512014 1071
5e7ed085 1072aarch64 = mov
17df50a5 1073generate int64x2_t:int64x1_t:int64x2_t, uint64x2_t:uint64x1_t:uint64x2_t, poly64x2_t:poly64x1_t:poly64x2_t
fc512014 1074
17df50a5
XL
1075/// Insert vector element from another vector element
1076name = vcopy
1077lane-suffixes
1078constn = LANE1:LANE2
1079multi_fn = static_assert_imm-in0_exp_len-LANE1
1080multi_fn = static_assert_imm-in_exp_len-LANE2
1081multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len}
1082multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2}
1083a = 1., 2., 3., 4.
1084b = 0.5, 0., 0., 0.
1085n = 1:0
1086validate 1., 0.5, 3., 4.
fc512014 1087
17df50a5
XL
1088aarch64 = mov
1089generate float32x4_t:float32x2_t:float32x4_t
5e7ed085 1090aarch64 = mov
17df50a5 1091generate float64x2_t:float64x1_t:float64x2_t
fc512014 1092
17df50a5
XL
1093/// Insert vector element from another vector element
1094name = vcreate
1095out-suffix
1096multi_fn = transmute, a
1097a = 1
1098validate 1, 0, 0, 0, 0, 0, 0, 0
fc512014 1099
17df50a5
XL
1100aarch64 = nop
1101arm = nop
3c0e092e
XL
1102generate u64:int8x8_t, u64:int16x4_t, u64:int32x2_t, u64:int64x1_t
1103generate u64:uint8x8_t, u64:uint16x4_t, u64:uint32x2_t, u64:uint64x1_t
17df50a5 1104generate u64:poly8x8_t, u64:poly16x4_t
94222f64 1105target = aes
17df50a5
XL
1106generate u64:poly64x1_t
1107
1108/// Insert vector element from another vector element
1109name = vcreate
1110out-suffix
1111multi_fn = transmute, a
1112a = 0
1113validate 0., 0.
1114
1115aarch64 = nop
1116generate u64:float64x1_t
1117arm = nop
1118generate u64:float32x2_t
1119
1120/// Fixed-point convert to floating-point
1121name = vcvt
1122double-suffixes
1123fn = simd_cast
1124a = 1, 2, 3, 4
1125validate 1., 2., 3., 4.
1126
1127aarch64 = scvtf
1128generate int64x1_t:float64x1_t, int64x2_t:float64x2_t
1129aarch64 = ucvtf
1130generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t
1131
1132arm = vcvt
1133aarch64 = scvtf
1134generate int32x2_t:float32x2_t, int32x4_t:float32x4_t
1135aarch64 = ucvtf
1136generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t
1137
1138/// Floating-point convert to higher precision long
1139name = vcvt
1140double-suffixes
1141fn = simd_cast
1142a = -1.2, 1.2
1143validate -1.2f32 as f64, 1.2f32 as f64
1144
1145aarch64 = fcvtl
1146generate float32x2_t:float64x2_t
1147
1148/// Floating-point convert to higher precision long
1149name = vcvt_high
1150noq-double-suffixes
1151multi_fn = simd_shuffle2!, b:float32x2_t, a, a, [2, 3]
1152multi_fn = simd_cast, b
1153a = -1.2, 1.2, 2.3, 3.4
1154validate 2.3f32 as f64, 3.4f32 as f64
1155
1156aarch64 = fcvtl
1157generate float32x4_t:float64x2_t
1158
1159/// Floating-point convert to lower precision narrow
1160name = vcvt
1161double-suffixes
1162fn = simd_cast
1163a = -1.2, 1.2
1164validate -1.2f64 as f32, 1.2f64 as f32
1165
1166aarch64 = fcvtn
1167generate float64x2_t:float32x2_t
1168
1169/// Floating-point convert to lower precision narrow
1170name = vcvt_high
1171noq-double-suffixes
1172multi_fn = simd_shuffle4!, a, {simd_cast, b}, [0, 1, 2, 3]
1173a = -1.2, 1.2
1174b = -2.3, 3.4
1175validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32
1176
1177aarch64 = fcvtn
1178generate float32x2_t:float64x2_t:float32x4_t
1179
1180/// Floating-point convert to lower precision narrow, rounding to odd
1181name = vcvtx
1182double-suffixes
1183a = -1.0, 2.0
1184validate -1.0, 2.0
1185
1186aarch64 = fcvtxn
1187link-aarch64 = fcvtxn._EXT2_._EXT_
1188generate float64x2_t:float32x2_t
1189
3c0e092e
XL
1190/// Floating-point convert to lower precision narrow, rounding to odd
1191name = vcvtx
1192double-suffixes
1193multi_fn = simd_extract, {vcvtx-_f32_f64-noext, {vdupq_n-in_ntt-noext, a}}, 0
1194a = -1.0
1195validate -1.0
1196
1197aarch64 = fcvtxn
1198generate f64:f32
1199
17df50a5
XL
1200/// Floating-point convert to lower precision narrow, rounding to odd
1201name = vcvtx_high
1202noq-double-suffixes
1203multi_fn = simd_shuffle4!, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3]
1204a = -1.0, 2.0
1205b = -3.0, 4.0
1206validate -1.0, 2.0, -3.0, 4.0
1207
1208aarch64 = fcvtxn
1209generate float32x2_t:float64x2_t:float32x4_t
1210
1211/// Fixed-point convert to floating-point
1212name = vcvt
1213double-n-suffixes
1214constn = N
1215multi_fn = static_assert-N-1-bits
1216a = 1, 2, 3, 4
1217n = 2
1218validate 0.25, 0.5, 0.75, 1.
c295e0f8 1219arm-aarch64-separate
17df50a5
XL
1220
1221aarch64 = scvtf
1222link-aarch64 = vcvtfxs2fp._EXT2_._EXT_
1223const-aarch64 = N
1224generate int64x1_t:float64x1_t, int64x2_t:float64x2_t, i32:f32, i64:f64
1225
1226aarch64 = ucvtf
1227link-aarch64 = vcvtfxu2fp._EXT2_._EXT_
1228const-aarch64 = N
1229generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t, u32:f32, u64:f64
1230
1231aarch64 = scvtf
1232link-aarch64 = vcvtfxs2fp._EXT2_._EXT_
1233arm = vcvt
1234link-arm = vcvtfxs2fp._EXT2_._EXT_
1235const-arm = N:i32
c295e0f8 1236
17df50a5
XL
1237generate int32x2_t:float32x2_t, int32x4_t:float32x4_t
1238
1239aarch64 = ucvtf
1240link-aarch64 = vcvtfxu2fp._EXT2_._EXT_
1241arm = vcvt
1242link-arm = vcvtfxu2fp._EXT2_._EXT_
1243const-arm = N:i32
1244generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t
1245
1246/// Floating-point convert to fixed-point, rounding toward zero
1247name = vcvt
1248double-n-suffixes
1249constn = N
1250multi_fn = static_assert-N-1-bits
1251a = 0.25, 0.5, 0.75, 1.
1252n = 2
1253validate 1, 2, 3, 4
c295e0f8 1254arm-aarch64-separate
17df50a5
XL
1255
1256aarch64 = fcvtzs
1257link-aarch64 = vcvtfp2fxs._EXT2_._EXT_
1258const-aarch64 = N
1259generate float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1260
1261aarch64 = fcvtzu
1262link-aarch64 = vcvtfp2fxu._EXT2_._EXT_
1263const-aarch64 = N
1264generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1265
1266aarch64 = fcvtzs
1267link-aarch64 = vcvtfp2fxs._EXT2_._EXT_
1268arm = vcvt
1269link-arm = vcvtfp2fxs._EXT2_._EXT_
1270const-arm = N:i32
1271generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
1272
1273aarch64 = fcvtzu
1274link-aarch64 = vcvtfp2fxu._EXT2_._EXT_
1275arm = vcvt
1276link-arm = vcvtfp2fxu._EXT2_._EXT_
1277const-arm = N:i32
1278generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
1279
1280/// Fixed-point convert to floating-point
1281name = vcvt
1282double-suffixes
1283multi_fn = a as out_t
1284a = 1
1285validate 1.
1286
1287aarch64 = scvtf
1288generate i32:f32, i64:f64
1289aarch64 = ucvtf
1290generate u32:f32, u64:f64
1291
1292/// Fixed-point convert to floating-point
1293name = vcvt
1294double-suffixes
1295multi_fn = a as out_t
1296a = 1.
1297validate 1
1298
1299aarch64 = fcvtzs
1300generate f32:i32, f64:i64
1301aarch64 = fcvtzu
1302generate f32:u32, f64:u64
1303
1304/// Floating-point convert to signed fixed-point, rounding toward zero
1305name = vcvt
1306double-suffixes
94222f64 1307link-aarch64 = llvm.fptosi.sat._EXT2_._EXT_
17df50a5
XL
1308a = -1.1, 2.1, -2.9, 3.9
1309validate -1, 2, -2, 3
1310
1311aarch64 = fcvtzs
1312generate float64x1_t:int64x1_t, float64x2_t:int64x2_t
1313
94222f64 1314link-arm = llvm.fptosi.sat._EXT2_._EXT_
17df50a5
XL
1315arm = vcvt
1316generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
1317
1318/// Floating-point convert to unsigned fixed-point, rounding toward zero
1319name = vcvt
1320double-suffixes
94222f64 1321link-aarch64 = llvm.fptoui.sat._EXT2_._EXT_
17df50a5
XL
1322a = 1.1, 2.1, 2.9, 3.9
1323validate 1, 2, 2, 3
1324
1325aarch64 = fcvtzu
1326generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
1327
94222f64 1328link-arm = llvm.fptoui.sat._EXT2_._EXT_
17df50a5
XL
1329arm = vcvt
1330generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
1331
1332/// Floating-point convert to signed integer, rounding to nearest with ties to away
1333name = vcvta
1334double-suffixes
1335a = -1.1, 2.1, -2.9, 3.9
1336validate -1, 2, -3, 4
1337
1338aarch64 = fcvtas
1339link-aarch64 = fcvtas._EXT2_._EXT_
1340generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
1341
1342/// Floating-point convert to integer, rounding to nearest with ties to away
1343name = vcvta
1344double-suffixes
1345a = 2.9
1346validate 3
1347
1348aarch64 = fcvtas
1349link-aarch64 = fcvtas._EXT2_._EXT_
1350generate f32:i32, f64:i64
1351
1352aarch64 = fcvtau
1353link-aarch64 = fcvtau._EXT2_._EXT_
1354generate f32:u32, f64:u64
1355
1356/// Floating-point convert to signed integer, rounding to nearest with ties to even
1357name = vcvtn
1358double-suffixes
1359a = -1.5, 2.1, -2.9, 3.9
1360validate -2, 2, -3, 4
1361
1362aarch64 = fcvtns
1363link-aarch64 = fcvtns._EXT2_._EXT_
1364generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1365
1366/// Floating-point convert to signed integer, rounding toward minus infinity
1367name = vcvtm
1368double-suffixes
1369a = -1.1, 2.1, -2.9, 3.9
1370validate -2, 2, -3, 3
1371
1372aarch64 = fcvtms
1373link-aarch64 = fcvtms._EXT2_._EXT_
1374generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1375
1376/// Floating-point convert to signed integer, rounding toward plus infinity
1377name = vcvtp
1378double-suffixes
1379a = -1.1, 2.1, -2.9, 3.9
1380validate -1, 3, -2, 4
1381
1382aarch64 = fcvtps
1383link-aarch64 = fcvtps._EXT2_._EXT_
1384generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1385
1386/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
1387name = vcvta
1388double-suffixes
1389a = 1.1, 2.1, 2.9, 3.9
1390validate 1, 2, 3, 4
1391
1392aarch64 = fcvtau
1393link-aarch64 = fcvtau._EXT2_._EXT_
1394generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
1395
1396/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
1397name = vcvtn
1398double-suffixes
1399a = 1.5, 2.1, 2.9, 3.9
1400validate 2, 2, 3, 4
1401
1402aarch64 = fcvtnu
1403link-aarch64 = fcvtnu._EXT2_._EXT_
1404generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1405
1406/// Floating-point convert to unsigned integer, rounding toward minus infinity
1407name = vcvtm
1408double-suffixes
1409a = 1.1, 2.1, 2.9, 3.9
1410validate 1, 2, 2, 3
1411
1412aarch64 = fcvtmu
1413link-aarch64 = fcvtmu._EXT2_._EXT_
1414generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1415
1416/// Floating-point convert to unsigned integer, rounding toward plus infinity
1417name = vcvtp
1418double-suffixes
1419a = 1.1, 2.1, 2.9, 3.9
1420validate 2, 3, 3, 4
1421
1422aarch64 = fcvtpu
1423link-aarch64 = fcvtpu._EXT2_._EXT_
1424generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1425
1426/// Set all vector lanes to the same value
1427name = vdup
1428lane-suffixes
1429constn = N
1430multi_fn = static_assert_imm-in_exp_len-N
1431multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32}
1432a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1433n = HFLEN
1434validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1435
1436aarch64 = dup
1437generate poly64x2_t, poly64x1_t:poly64x2_t
1438
1439arm = vdup.l
1440generate int*_t
1441generate int8x16_t:int8x8_t, int16x8_t:int16x4_t, int32x4_t:int32x2_t
1442generate int8x8_t:int8x16_t, int16x4_t:int16x8_t, int32x2_t:int32x4_t
1443
1444generate uint*_t
1445generate uint8x16_t:uint8x8_t, uint16x8_t:uint16x4_t, uint32x4_t:uint32x2_t
1446generate uint8x8_t:uint8x16_t, uint16x4_t:uint16x8_t, uint32x2_t:uint32x4_t
1447
1448generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
1449generate poly8x16_t:poly8x8_t, poly16x8_t:poly16x4_t
1450generate poly8x8_t:poly8x16_t, poly16x4_t:poly16x8_t
1451
1452/// Set all vector lanes to the same value
1453name = vdup
1454lane-suffixes
1455constn = N
1456multi_fn = static_assert_imm-in_exp_len-N
1457multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32}
1458a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1459n = HFLEN
1460validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1461
1462aarch64 = dup
1463arm = vmov
1464generate int64x2_t, int64x1_t:int64x2_t, uint64x2_t, uint64x1_t:uint64x2_t
1465
1466/// Set all vector lanes to the same value
1467name = vdup
1468lane-suffixes
1469constn = N
1470multi_fn = static_assert_imm-in_exp_len-N
1471multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32}
1472a = 1., 1., 1., 4.
1473n = HFLEN
1474validate 1., 1., 1., 1.
1475
1476aarch64 = dup
1477generate float64x2_t, float64x1_t:float64x2_t
1478
1479arm = vdup.l
1480generate float*_t, float32x4_t:float32x2_t, float32x2_t:float32x4_t
1481
1482/// Set all vector lanes to the same value
1483name = vdup
1484lane-suffixes
1485constn = N
1486multi_fn = static_assert_imm-in_exp_len-N
1487multi_fn = a
1488a = 0
1489n = HFLEN
1490validate 0
1491
1492aarch64 = nop
1493generate poly64x1_t
1494
1495arm = nop
1496generate int64x1_t, uint64x1_t
1497
1498/// Set all vector lanes to the same value
1499name = vdup
1500lane-suffixes
1501constn = N
1502multi_fn = static_assert_imm-in_exp_len-N
1503multi_fn = a
1504a = 0.
1505n = HFLEN
1506validate 0.
1507
1508aarch64 = nop
1509generate float64x1_t
1510
1511/// Set all vector lanes to the same value
1512name = vdup
1513lane-suffixes
1514constn = N
1515multi_fn = static_assert_imm-in_exp_len-N
1516multi_fn = transmute--<element_t _>, {simd_extract, a, N as u32}
1517a = 0, 1
1518n = HFLEN
1519validate 1
1520
1521aarch64 = nop
1522generate poly64x2_t:poly64x1_t
1523
1524arm = vmov
1525generate int64x2_t:int64x1_t, uint64x2_t:uint64x1_t
1526
1527/// Set all vector lanes to the same value
1528name = vdup
1529lane-suffixes
1530constn = N
1531multi_fn = static_assert_imm-in_exp_len-N
1532multi_fn = transmute--<element_t _>, {simd_extract, a, N as u32}
1533a = 0., 1.
1534n = HFLEN
1535validate 1.
1536
1537aarch64 = nop
1538generate float64x2_t:float64x1_t
1539
1540/// Set all vector lanes to the same value
1541name = vdup
1542lane-suffixes
1543constn = N
1544multi_fn = static_assert_imm-in_exp_len-N
1545multi_fn = simd_extract, a, N as u32
1546a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1547n = HFLEN
1548validate 1
1549
1550aarch64 = nop
1551generate int8x8_t:i8, int8x16_t:i8, int16x4_t:i16, int16x8_t:i16, int32x2_t:i32, int32x4_t:i32, int64x1_t:i64, int64x2_t:i64
1552generate uint8x8_t:u8, uint8x16_t:u8, uint16x4_t:u16, uint16x8_t:u16, uint32x2_t:u32, uint32x4_t:u32, uint64x1_t:u64, uint64x2_t:u64
1553generate poly8x8_t:p8, poly8x16_t:p8, poly16x4_t:p16, poly16x8_t:p16
1554
1555/// Set all vector lanes to the same value
1556name = vdup
1557lane-suffixes
1558constn = N
1559multi_fn = static_assert_imm-in_exp_len-N
1560multi_fn = simd_extract, a, N as u32
1561a = 1., 1., 1., 4.
1562n = HFLEN
1563validate 1.
1564
1565aarch64 = nop
1566generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64
1567
1568/// Extract vector from pair of vectors
1569name = vext
1570constn = N
1571multi_fn = static_assert_imm-out_exp_len-N
1572multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len}
1573a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15
1574b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11
1575n = HFLEN
1576validate 8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19
1577
1578arm = "vext.8"
1579aarch64 = ext
1580generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
1581
1582/// Extract vector from pair of vectors
1583name = vext
1584constn = N
1585multi_fn = static_assert_imm-out_exp_len-N
1586multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len}
1587a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15
1588b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11
1589n = HFLEN
1590validate 8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19
1591
1592aarch64 = ext
1593generate poly64x2_t
1594
1595arm = vmov
1596generate int64x2_t, uint64x2_t
1597
1598/// Extract vector from pair of vectors
1599name = vext
1600constn = N
1601multi_fn = static_assert_imm-out_exp_len-N
1602multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len}
1603a = 0., 2., 2., 3.
1604b = 3., 4., 5., 6.,
1605n = HFLEN
1606validate 2., 3., 3., 4.
1607
1608aarch64 = ext
1609generate float64x2_t
1610
1611arm = "vext.8"
1612generate float*_t
1613
1614/// Multiply-add to accumulator
1615name = vmla
1616multi_fn = simd_add, a, {simd_mul, b, c}
1617a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1618b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1619c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1620validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1621
1622arm = vmla.
1623aarch64 = mla
1624generate int*_t, uint*_t
1625
1626/// Floating-point multiply-add to accumulator
1627name = vmla
1628multi_fn = simd_add, a, {simd_mul, b, c}
1629a = 0., 1., 2., 3.
1630b = 2., 2., 2., 2.
1631c = 3., 3., 3., 3.
1632validate 6., 7., 8., 9.
1633
1634aarch64 = fmul
1635generate float64x*_t
1636
1637arm = vmla.
1638generate float*_t
1639
1640/// Vector multiply accumulate with scalar
1641name = vmla
1642n-suffix
1643multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
1644a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1645b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1646c = 3
1647validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1648
1649aarch64 = mla
1650arm = vmla.
1651generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
1652generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
1653
1654/// Vector multiply accumulate with scalar
1655name = vmla
1656n-suffix
1657multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
1658a = 0., 1., 2., 3.
1659b = 2., 2., 2., 2.
1660c = 3.
1661validate 6., 7., 8., 9.
1662
1663aarch64 = fmul
1664arm = vmla.
1665generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
1666
1667/// Vector multiply accumulate with scalar
1668name = vmla
1669in2-lane-suffixes
1670constn = LANE
1671multi_fn = static_assert_imm-in2_exp_len-LANE
1672multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
1673a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1674b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1675c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1676n = 1
1677validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1678
1679aarch64 = mla
1680arm = vmla.
1681generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
1682generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
1683generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
1684generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
1685
1686/// Vector multiply accumulate with scalar
1687name = vmla
1688in2-lane-suffixes
1689constn = LANE
1690multi_fn = static_assert_imm-in2_exp_len-LANE
1691multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
1692a = 0., 1., 2., 3.
1693b = 2., 2., 2., 2.
1694c = 0., 3., 0., 0.
1695n = 1
1696validate 6., 7., 8., 9.
1697
1698aarch64 = fmul
1699arm = vmla.
1700generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
1701
1702/// Signed multiply-add long
1703name = vmlal
1704multi_fn = simd_add, a, {vmull-self-noext, b, c}
1705a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1706b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1707c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1708validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1709
1710arm = vmlal.s
1711aarch64 = smlal
1712generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
1713
1714/// Unsigned multiply-add long
1715name = vmlal
1716multi_fn = simd_add, a, {vmull-self-noext, b, c}
1717a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1718b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1719c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1720validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1721
1722arm = vmlal.s
1723aarch64 = umlal
1724generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
1725
1726/// Vector widening multiply accumulate with scalar
1727name = vmlal
1728n-suffix
1729multi_fn = vmlal-self-noext, a, b, {vdup-nself-noext, c}
1730a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1731b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1732c = 3
1733validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1734
1735arm = vmlal.s
1736aarch64 = smlal
1737generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
1738aarch64 = umlal
1739generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
1740
1741/// Vector widening multiply accumulate with scalar
1742name = vmlal_lane
1743in2-suffix
1744constn = LANE
1745multi_fn = static_assert_imm-in2_exp_len-LANE
1746multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
1747a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1748b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1749c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1750n = 1
1751validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1752
1753arm = vmlal.s
1754aarch64 = smlal
1755generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
1756generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
1757aarch64 = umlal
1758generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
1759generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
1760
1761/// Signed multiply-add long
1762name = vmlal_high
1763no-q
1764multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
1765multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
1766multi_fn = vmlal-noqself-noext, a, b, c
1767a = 8, 7, 6, 5, 4, 3, 2, 1
1768b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1769c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1770fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1771validate 8, 9, 10, 11, 12, 13, 14, 15
1772
1773aarch64 = smlal2
1774generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1775
1776/// Unsigned multiply-add long
1777name = vmlal_high
1778no-q
1779multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
1780multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
1781multi_fn = vmlal-noqself-noext, a, b, c
1782a = 8, 7, 6, 5, 4, 3, 2, 1
1783b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1784c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1785fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1786validate 8, 9, 10, 11, 12, 13, 14, 15
1787
1788aarch64 = umlal2
1789generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
1790
1791/// Multiply-add long
1792name = vmlal_high_n
1793no-q
1794multi_fn = vmlal_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
1795a = 8, 7, 6, 5, 4, 3, 2, 1
1796b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1797c = 2
1798validate 8, 9, 10, 11, 12, 13, 14, 15
1799
1800aarch64 = smlal2
1801generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
1802aarch64 = umlal2
1803generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
1804
1805/// Multiply-add long
1806name = vmlal_high_lane
1807in2-suffix
1808constn = LANE
1809multi_fn = static_assert_imm-in2_exp_len-LANE
1810multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
1811a = 8, 7, 6, 5, 4, 3, 2, 1
1812b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1813c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1814n = 1
1815validate 8, 9, 10, 11, 12, 13, 14, 15
1816
1817aarch64 = smlal2
1818generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
1819generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1820aarch64 = umlal2
1821generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
1822generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
1823
1824/// Multiply-subtract from accumulator
1825name = vmls
1826multi_fn = simd_sub, a, {simd_mul, b, c}
1827a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1828b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1829c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1830validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1831
1832arm = vmls.
1833aarch64 = mls
1834generate int*_t, uint*_t
1835
1836/// Floating-point multiply-subtract from accumulator
1837name = vmls
1838multi_fn = simd_sub, a, {simd_mul, b, c}
1839a = 6., 7., 8., 9.
1840b = 2., 2., 2., 2.
1841c = 3., 3., 3., 3.
1842validate 0., 1., 2., 3.
1843
1844aarch64 = fmul
1845generate float64x*_t
1846
1847arm = vmls.
1848generate float*_t
1849
1850/// Vector multiply subtract with scalar
1851name = vmls
1852n-suffix
1853multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
1854a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1855b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1856c = 3
1857validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1858
1859aarch64 = mls
1860arm = vmls.
1861generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
1862generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
1863
1864/// Vector multiply subtract with scalar
1865name = vmls
1866n-suffix
1867multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
1868a = 6., 7., 8., 9.
1869b = 2., 2., 2., 2.
1870c = 3.
1871validate 0., 1., 2., 3.
1872
1873aarch64 = fmul
1874arm = vmls.
1875generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
1876
1877/// Vector multiply subtract with scalar
1878name = vmls
1879in2-lane-suffixes
1880constn = LANE
1881multi_fn = static_assert_imm-in2_exp_len-LANE
1882multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
1883a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1884b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1885c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1886n = 1
1887validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1888
1889aarch64 = mls
1890arm = vmls.
1891generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
1892generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
1893generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
1894generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
1895
1896/// Vector multiply subtract with scalar
1897name = vmls
1898in2-lane-suffixes
1899constn = LANE
1900multi_fn = static_assert_imm-in2_exp_len-LANE
1901multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
1902a = 6., 7., 8., 9.
1903b = 2., 2., 2., 2.
1904c = 0., 3., 0., 0.
1905n = 1
1906validate 0., 1., 2., 3.
1907
1908aarch64 = fmul
1909arm = vmls.
1910generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
1911
1912/// Signed multiply-subtract long
1913name = vmlsl
1914multi_fn = simd_sub, a, {vmull-self-noext, b, c}
1915a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1916b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1917c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1918validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1919
1920arm = vmlsl.s
1921aarch64 = smlsl
1922generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
1923
1924/// Unsigned multiply-subtract long
1925name = vmlsl
1926multi_fn = simd_sub, a, {vmull-self-noext, b, c}
1927a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1928b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1929c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1930validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1931
1932arm = vmlsl.s
1933aarch64 = umlsl
1934generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
1935
1936/// Vector widening multiply subtract with scalar
1937name = vmlsl
1938n-suffix
1939multi_fn = vmlsl-self-noext, a, b, {vdup-nself-noext, c}
1940a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1941b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1942c = 3
1943validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1944
1945arm = vmlsl.s
1946aarch64 = smlsl
1947generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
1948aarch64 = umlsl
1949generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
1950
1951/// Vector widening multiply subtract with scalar
1952name = vmlsl_lane
1953in2-suffix
1954constn = LANE
1955multi_fn = static_assert_imm-in2_exp_len-LANE
1956multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
1957a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1958b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1959c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1960n = 1
1961validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1962
1963arm = vmlsl.s
1964aarch64 = smlsl
1965generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
1966generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
1967aarch64 = umlsl
1968generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
1969generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
1970
1971/// Signed multiply-subtract long
1972name = vmlsl_high
1973no-q
1974multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
1975multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
1976multi_fn = vmlsl-noqself-noext, a, b, c
1977a = 14, 15, 16, 17, 18, 19, 20, 21
1978b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1979c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1980fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1981validate 14, 13, 12, 11, 10, 9, 8, 7
1982
1983aarch64 = smlsl2
1984generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1985
1986/// Unsigned multiply-subtract long
1987name = vmlsl_high
1988no-q
1989multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
1990multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right}
1991multi_fn = vmlsl-noqself-noext, a, b, c
1992a = 14, 15, 16, 17, 18, 19, 20, 21
1993b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1994c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1995fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1996validate 14, 13, 12, 11, 10, 9, 8, 7
1997
1998aarch64 = umlsl2
1999generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
2000
2001/// Multiply-subtract long
2002name = vmlsl_high_n
2003no-q
2004multi_fn = vmlsl_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
2005a = 14, 15, 16, 17, 18, 19, 20, 21
2006b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2007c = 2
2008validate 14, 13, 12, 11, 10, 9, 8, 7
2009
2010aarch64 = smlsl2
2011generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
2012aarch64 = umlsl2
2013generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
2014
2015/// Multiply-subtract long
2016name = vmlsl_high_lane
2017in2-suffix
2018constn = LANE
2019multi_fn = static_assert_imm-in2_exp_len-LANE
2020multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}}
2021a = 14, 15, 16, 17, 18, 19, 20, 21
2022b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2023c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2024n = 1
2025validate 14, 13, 12, 11, 10, 9, 8, 7
2026
2027aarch64 = smlsl2
2028generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
2029generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
2030aarch64 = umlsl2
2031generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
2032generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
2033
2034/// Extract narrow
2035name = vmovn_high
2036no-q
2037multi_fn = simd_cast, c:in_t0, b
2038multi_fn = simd_shuffle-out_len-!, a, c, {asc-0-out_len}
2039a = 0, 1, 2, 3, 2, 3, 4, 5
2040b = 2, 3, 4, 5, 12, 13, 14, 15
2041validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15
2042
2043aarch64 = xtn2
2044generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
2045generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
2046
2047/// Negate
2048name = vneg
2049fn = simd_neg
2050a = 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8
2051validate 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8
2052
2053aarch64 = neg
2054generate int64x*_t
2055
2056arm = vneg.s
2057generate int*_t
2058
3c0e092e
XL
2059/// Negate
2060name = vneg
a2a8927a 2061multi_fn = a.wrapping_neg()
3c0e092e
XL
2062a = 1
2063validate -1
2064
2065aarch64 = neg
2066generate i64
2067
17df50a5
XL
2068/// Negate
2069name = vneg
2070fn = simd_neg
2071a = 0., 1., -1., 2., -2., 3., -3., 4.
2072validate 0., -1., 1., -2., 2., -3., 3., -4.
2073
2074aarch64 = fneg
2075generate float64x*_t
2076
2077arm = vneg.s
2078generate float*_t
2079
2080/// Signed saturating negate
2081name = vqneg
2082a = MIN, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7
2083validate MAX, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7
2084link-arm = vqneg._EXT_
2085link-aarch64 = sqneg._EXT_
2086
2087aarch64 = sqneg
2088generate int64x*_t
2089
2090arm = vqneg.s
2091generate int*_t
2092
3c0e092e
XL
2093/// Signed saturating negate
2094name = vqneg
2095multi_fn = simd_extract, {vqneg-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
2096a = 1
2097validate -1
2098
2099aarch64 = sqneg
2100generate i8, i16, i32, i64
2101
17df50a5
XL
2102/// Saturating subtract
2103name = vqsub
2104a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2105b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2106validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26
2107
2108arm = vqsub.s
2109aarch64 = uqsub
2110link-arm = llvm.usub.sat._EXT_
2111link-aarch64 = uqsub._EXT_
2112generate uint*_t, uint64x*_t
2113
2114arm = vqsub.s
2115aarch64 = sqsub
2116link-arm = llvm.ssub.sat._EXT_
2117link-aarch64 = sqsub._EXT_
2118generate int*_t, int64x*_t
2119
2120/// Saturating subtract
2121name = vqsub
2122multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
2123multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
2124multi_fn = simd_extract, {vqsub-in_ntt-noext, a, b}, 0
2125a = 42
2126b = 1
2127validate 41
2128
2129aarch64 = sqsub
2130generate i8, i16
2131aarch64 = uqsub
2132generate u8, u16
2133
2134/// Saturating subtract
2135name = vqsub
2136a = 42
2137b = 1
2138validate 41
2139
2140aarch64 = uqsub
2141link-aarch64 = uqsub._EXT_
2142generate u32, u64
2143
2144aarch64 = sqsub
2145link-aarch64 = sqsub._EXT_
2146generate i32, i64
2147
2148/// Halving add
2149name = vhadd
2150a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2151b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2152validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
2153
2154arm = vhadd.s
2155aarch64 = uhadd
2156link-aarch64 = uhadd._EXT_
2157link-arm = vhaddu._EXT_
2158generate uint*_t
2159
2160arm = vhadd.s
2161aarch64 = shadd
2162link-aarch64 = shadd._EXT_
2163link-arm = vhadds._EXT_
2164generate int*_t
2165
2166/// Reverse bit order
2167name = vrbit
2168a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2169validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2170
2171aarch64 = rbit
2172link-aarch64 = rbit._EXT_
2173
2174generate int8x8_t, int8x16_t
2175
2176/// Reverse bit order
2177name = vrbit
2178multi_fn = transmute, {vrbit-signed-noext, transmute(a)}
2179a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2180validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2181
2182aarch64 = rbit
2183
2184generate uint8x8_t, uint8x16_t, poly8x8_t, poly8x16_t
2185
2186/// Rounding halving add
2187name = vrhadd
2188a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2189b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2190validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29
2191
2192arm = vrhadd.s
2193aarch64 = urhadd
2194link-arm = vrhaddu._EXT_
2195link-aarch64 = urhadd._EXT_
2196generate uint*_t
2197
2198arm = vrhadd.s
2199aarch64 = srhadd
2200link-arm = vrhadds._EXT_
2201link-aarch64 = srhadd._EXT_
2202generate int*_t
2203
2204/// Floating-point round to integral exact, using current rounding mode
2205name = vrndx
2206a = -1.5, 0.5, 1.5, 2.5
2207validate -2.0, 0.0, 2.0, 2.0
2208
2209aarch64 = frintx
2210link-aarch64 = llvm.rint._EXT_
2211generate float*_t, float64x*_t
2212
2213/// Floating-point round to integral, to nearest with ties to away
2214name = vrnda
2215a = -1.5, 0.5, 1.5, 2.5
2216validate -2.0, 1.0, 2.0, 3.0
2217
2218aarch64 = frinta
2219link-aarch64 = llvm.round._EXT_
2220generate float*_t, float64x*_t
2221
2222/// Floating-point round to integral, to nearest with ties to even
2223name = vrndn
2224a = -1.5, 0.5, 1.5, 2.5
2225validate -2.0, 0.0, 2.0, 2.0
2226
2227link-aarch64 = frintn._EXT_
2228aarch64 = frintn
2229generate float64x*_t
2230
2231target = fp-armv8
2232arm = vrintn
2233link-arm = vrintn._EXT_
2234generate float*_t
2235
3c0e092e
XL
2236/// Floating-point round to integral, to nearest with ties to even
2237name = vrndn
2238a = -1.5
2239validate -2.0
2240
2241aarch64 = frintn
2242link-aarch64 = llvm.roundeven._EXT_
2243generate f32
2244
17df50a5
XL
2245/// Floating-point round to integral, toward minus infinity
2246name = vrndm
2247a = -1.5, 0.5, 1.5, 2.5
2248validate -2.0, 0.0, 1.0, 2.0
2249
2250aarch64 = frintm
2251link-aarch64 = llvm.floor._EXT_
2252generate float*_t, float64x*_t
2253
2254/// Floating-point round to integral, toward plus infinity
2255name = vrndp
2256a = -1.5, 0.5, 1.5, 2.5
2257validate -1.0, 1.0, 2.0, 3.0
2258
2259aarch64 = frintp
2260link-aarch64 = llvm.ceil._EXT_
2261generate float*_t, float64x*_t
2262
2263/// Floating-point round to integral, toward zero
2264name = vrnd
2265a = -1.5, 0.5, 1.5, 2.5
2266validate -1.0, 0.0, 1.0, 2.0
2267
2268aarch64 = frintz
2269link-aarch64 = llvm.trunc._EXT_
2270generate float*_t, float64x*_t
2271
2272/// Floating-point round to integral, using current rounding mode
2273name = vrndi
2274a = -1.5, 0.5, 1.5, 2.5
2275validate -2.0, 0.0, 2.0, 2.0
2276
2277aarch64 = frinti
2278link-aarch64 = llvm.nearbyint._EXT_
2279generate float*_t, float64x*_t
2280
2281/// Saturating add
2282name = vqadd
2283a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2284b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2285validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2286
2287arm = vqadd.s
2288aarch64 = uqadd
2289link-arm = llvm.uadd.sat._EXT_
2290link-aarch64 = uqadd._EXT_
2291generate uint*_t, uint64x*_t
2292
2293arm = vqadd.s
2294aarch64 = sqadd
2295link-arm = llvm.sadd.sat._EXT_
2296link-aarch64 = sqadd._EXT_
2297generate int*_t, int64x*_t
2298
2299/// Saturating add
2300name = vqadd
2301multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
2302multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
2303multi_fn = simd_extract, {vqadd-in_ntt-noext, a, b}, 0
2304a = 42
2305b = 1
2306validate 43
2307
2308aarch64 = sqadd
2309generate i8, i16
2310aarch64 = uqadd
2311generate u8, u16
2312
2313/// Saturating add
2314name = vqadd
2315a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2316b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2317validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2318
2319aarch64 = uqadd
2320link-aarch64 = uqadd._EXT_
2321generate u32, u64
2322
2323aarch64 = sqadd
2324link-aarch64 = sqadd._EXT_
2325generate i32, i64
2326
c295e0f8
XL
2327/// Load multiple single-element structures to one, two, three, or four registers
2328name = vld1
2329out-suffix
2330a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2331validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2332load_fn
2333
2334aarch64 = ld1
2335link-aarch64 = ld1x2._EXT2_
2336arm = vld1
2337link-arm = vld1x2._EXT2_
2338generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
2339generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
2340
2341link-aarch64 = ld1x3._EXT2_
2342link-arm = vld1x3._EXT2_
2343generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
2344generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t, *const i64:int64x2x3_t
2345
2346link-aarch64 = ld1x4._EXT2_
2347link-arm = vld1x4._EXT2_
2348generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
2349generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t, *const i64:int64x2x4_t
2350
2351/// Load multiple single-element structures to one, two, three, or four registers
2352name = vld1
2353out-suffix
2354multi_fn = transmute, {vld1-outsigned-noext, transmute(a)}
2355a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2356validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2357
2358load_fn
2359aarch64 = ld1
2360arm = vld1
2361generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
2362generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
2363generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
2364generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t, *const u64:uint64x2x3_t
2365generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
2366generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t, *const u64:uint64x2x4_t
2367generate *const p8:poly8x8x2_t, *const p8:poly8x8x3_t, *const p8:poly8x8x4_t
2368generate *const p8:poly8x16x2_t, *const p8:poly8x16x3_t, *const p8:poly8x16x4_t
2369generate *const p16:poly16x4x2_t, *const p16:poly16x4x3_t, *const p16:poly16x4x4_t
2370generate *const p16:poly16x8x2_t, *const p16:poly16x8x3_t, *const p16:poly16x8x4_t
2371target = aes
2372generate *const p64:poly64x1x2_t
3c0e092e 2373arm = nop
c295e0f8
XL
2374generate *const p64:poly64x1x3_t, *const p64:poly64x1x4_t
2375generate *const p64:poly64x2x2_t, *const p64:poly64x2x3_t, *const p64:poly64x2x4_t
c295e0f8
XL
2376/// Load multiple single-element structures to one, two, three, or four registers
2377name = vld1
2378out-suffix
2379a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
2380validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
2381load_fn
2382
2383aarch64 = ld1
2384link-aarch64 = ld1x2._EXT2_
2385generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
2386
2387link-aarch64 = ld1x3._EXT2_
2388generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
2389
2390link-aarch64 = ld1x4._EXT2_
2391generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
2392
2393arm = vld1
2394link-aarch64 = ld1x2._EXT2_
2395link-arm = vld1x2._EXT2_
2396generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
2397
2398link-aarch64 = ld1x3._EXT2_
2399link-arm = vld1x3._EXT2_
2400generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2401
2402link-aarch64 = ld1x4._EXT2_
2403link-arm = vld1x4._EXT2_
2404generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2405
2406/// Load multiple 2-element structures to two registers
2407name = vld2
2408out-nox
2409a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2410validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2411load_fn
3c0e092e 2412arm-aarch64-separate
c295e0f8
XL
2413
2414aarch64 = ld2
2415link-aarch64 = ld2._EXTv2_
3c0e092e
XL
2416generate *const i64:int64x2x2_t
2417
c295e0f8
XL
2418arm = vld2
2419link-arm = vld2._EXTpi82_
3c0e092e
XL
2420generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
2421generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
2422arm = nop
2423aarch64 = nop
2424generate *const i64:int64x1x2_t
c295e0f8
XL
2425
2426/// Load multiple 2-element structures to two registers
2427name = vld2
2428out-nox
2429multi_fn = transmute, {vld2-outsignednox-noext, transmute(a)}
2430a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2431validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2432load_fn
2433
2434aarch64 = ld2
3c0e092e
XL
2435generate *const u64:uint64x2x2_t
2436target = aes
2437generate *const p64:poly64x2x2_t
2438
2439target = default
c295e0f8 2440arm = vld2
3c0e092e
XL
2441generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
2442generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
2443generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
2444arm = nop
2445aarch64 = nop
2446generate *const u64:uint64x1x2_t
2447target = aes
2448generate *const p64:poly64x1x2_t
2449
c295e0f8
XL
2450
2451/// Load multiple 2-element structures to two registers
2452name = vld2
2453out-nox
2454a = 0., 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
2455validate 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
2456load_fn
3c0e092e 2457arm-aarch64-separate
c295e0f8 2458
3c0e092e 2459aarch64 = nop
c295e0f8 2460link-aarch64 = ld2._EXTv2_
3c0e092e
XL
2461generate *const f64:float64x1x2_t
2462aarch64 = ld2
2463generate *const f64:float64x2x2_t
c295e0f8
XL
2464
2465arm = vld2
2466link-arm = vld2._EXTpi82_
3c0e092e 2467generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
c295e0f8
XL
2468
2469/// Load single 2-element structure and replicate to all lanes of two registers
2470name = vld2
2471out-dup-nox
2472a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2473validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2474load_fn
3c0e092e 2475arm-aarch64-separate
c295e0f8 2476
c295e0f8
XL
2477aarch64 = ld2r
2478link-aarch64 = ld2r._EXT2_
3c0e092e
XL
2479generate *const i64:int64x2x2_t
2480
2481arm = vld2
2482link-arm = vld2dup._EXTpi82_
2483generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
2484generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
2485arm = nop
2486generate *const i64:int64x1x2_t
c295e0f8
XL
2487
2488/// Load single 2-element structure and replicate to all lanes of two registers
2489name = vld2
2490out-dup-nox
2491multi_fn = transmute, {vld2-outsigneddupnox-noext, transmute(a)}
2492a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2493validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2494load_fn
2495
c295e0f8 2496aarch64 = ld2r
3c0e092e
XL
2497generate *const u64:uint64x2x2_t
2498target = aes
2499generate *const p64:poly64x2x2_t
2500
2501target = default
2502arm = vld2
2503generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
2504generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
2505generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
2506arm = nop
2507generate *const u64:uint64x1x2_t
2508target = aes
2509generate *const p64:poly64x1x2_t
c295e0f8
XL
2510
2511/// Load single 2-element structure and replicate to all lanes of two registers
2512name = vld2
2513out-dup-nox
2514a = 0., 1., 1., 2., 3., 1., 4., 3., 5.
2515validate 1., 1., 1., 1., 1., 1., 1., 1.
2516load_fn
3c0e092e 2517arm-aarch64-separate
c295e0f8
XL
2518
2519aarch64 = ld2r
2520link-aarch64 = ld2r._EXT2_
3c0e092e 2521generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
c295e0f8 2522
3c0e092e 2523arm = vld2
c295e0f8 2524link-arm = vld2dup._EXTpi82_
3c0e092e 2525generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
c295e0f8
XL
2526
2527/// Load multiple 2-element structures to two registers
2528name = vld2
2529out-lane-nox
2530multi_fn = static_assert_imm-in_exp_len-LANE
2531constn = LANE
2532a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2533b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2534n = 0
2535validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2536load_fn
2537arm-aarch64-separate
2538
3c0e092e 2539aarch64 = ld2
c295e0f8
XL
2540const-aarch64 = LANE
2541link-aarch64 = ld2lane._EXTpi82_
3c0e092e 2542generate *const i8:int8x16x2_t:int8x16x2_t, *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
c295e0f8 2543
3c0e092e 2544arm = vld2
c295e0f8
XL
2545const-arm = LANE
2546link-arm = vld2lane._EXTpi82_
3c0e092e
XL
2547generate *const i8:int8x8x2_t:int8x8x2_t, *const i16:int16x4x2_t:int16x4x2_t, *const i32:int32x2x2_t:int32x2x2_t
2548generate *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
c295e0f8
XL
2549
2550/// Load multiple 2-element structures to two registers
2551name = vld2
2552out-lane-nox
2553multi_fn = static_assert_imm-in_exp_len-LANE
2554multi_fn = transmute, {vld2-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2555constn = LANE
2556a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2557b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2558n = 0
2559validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2560load_fn
c295e0f8 2561
3c0e092e 2562aarch64 = ld2
c295e0f8
XL
2563const-aarch64 = LANE
2564
2565target = aes
3c0e092e 2566generate *const p64:poly64x1x2_t:poly64x1x2_t, *const p64:poly64x2x2_t:poly64x2x2_t
c295e0f8
XL
2567
2568target = default
3c0e092e
XL
2569generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
2570generate *const p8:poly8x16x2_t:poly8x16x2_t
c295e0f8 2571
3c0e092e 2572arm = vld2
c295e0f8 2573const-arm = LANE
3c0e092e
XL
2574generate *const u8:uint8x8x2_t:uint8x8x2_t, *const u16:uint16x4x2_t:uint16x4x2_t, *const u32:uint32x2x2_t:uint32x2x2_t
2575generate *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
2576generate *const p8:poly8x8x2_t:poly8x8x2_t, *const p16:poly16x4x2_t:poly16x4x2_t
2577generate *const p16:poly16x8x2_t:poly16x8x2_t
c295e0f8
XL
2578
2579/// Load multiple 2-element structures to two registers
2580name = vld2
2581out-lane-nox
2582multi_fn = static_assert_imm-in_exp_len-LANE
2583constn = LANE
2584a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
2585b = 0., 2., 2., 14., 2., 16., 17., 18.
2586n = 0
2587validate 1., 2., 2., 14., 2., 16., 17., 18.
2588load_fn
2589arm-aarch64-separate
2590
3c0e092e 2591aarch64 = ld2
c295e0f8
XL
2592const-aarch64 = LANE
2593link-aarch64 = ld2lane._EXTpi82_
3c0e092e 2594generate *const f64:float64x1x2_t:float64x1x2_t, *const f64:float64x2x2_t:float64x2x2_t
c295e0f8 2595
3c0e092e 2596arm = vld2
c295e0f8
XL
2597const-arm = LANE
2598link-arm = vld2lane._EXTpi82_
3c0e092e 2599generate *const f32:float32x2x2_t:float32x2x2_t, *const f32:float32x4x2_t:float32x4x2_t
c295e0f8 2600
3c0e092e
XL
2601/// Load multiple 3-element structures to three registers
2602name = vld3
2603out-nox
2604a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2605validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2606load_fn
c295e0f8
XL
2607arm-aarch64-separate
2608
3c0e092e
XL
2609aarch64 = ld3
2610link-aarch64 = ld3._EXTv2_
2611generate *const i64:int64x2x3_t
c295e0f8 2612
3c0e092e
XL
2613arm = vld3
2614link-arm = vld3._EXTpi82_
2615generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
2616generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
2617arm = nop
2618aarch64 = nop
2619generate *const i64:int64x1x3_t
c295e0f8 2620
3c0e092e
XL
2621/// Load multiple 3-element structures to three registers
2622name = vld3
2623out-nox
2624multi_fn = transmute, {vld3-outsignednox-noext, transmute(a)}
2625a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2626validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2627load_fn
2628
2629aarch64 = ld3
2630generate *const u64:uint64x2x3_t
2631target = aes
2632generate *const p64:poly64x2x3_t
2633
2634target = default
2635arm = vld3
2636generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
2637generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
2638generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
2639arm = nop
2640aarch64 = nop
2641generate *const u64:uint64x1x3_t
2642target = aes
2643generate *const p64:poly64x1x3_t
2644
2645/// Load multiple 3-element structures to three registers
2646name = vld3
2647out-nox
2648a = 0., 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.
2649validate 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.
2650load_fn
2651arm-aarch64-separate
2652
2653aarch64 = nop
2654link-aarch64 = ld3._EXTv2_
2655generate *const f64:float64x1x3_t
2656aarch64 = ld3
2657generate *const f64:float64x2x3_t
2658
2659arm = vld3
2660link-arm = vld3._EXTpi82_
2661generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2662
2663/// Load single 3-element structure and replicate to all lanes of three registers
2664name = vld3
2665out-dup-nox
2666a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2667validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2668load_fn
2669arm-aarch64-separate
2670
2671aarch64 = ld3r
2672link-aarch64 = ld3r._EXT2_
2673generate *const i64:int64x2x3_t
2674
2675arm = vld3
2676link-arm = vld3dup._EXTpi82_
2677generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
2678generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
2679arm = nop
2680generate *const i64:int64x1x3_t
2681
2682/// Load single 3-element structure and replicate to all lanes of three registers
2683name = vld3
2684out-dup-nox
2685multi_fn = transmute, {vld3-outsigneddupnox-noext, transmute(a)}
2686a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2687validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2688load_fn
2689
2690aarch64 = ld3r
2691generate *const u64:uint64x2x3_t
2692target = aes
2693generate *const p64:poly64x2x3_t
2694
2695target = default
2696arm = vld3
2697generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
2698generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
2699generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
2700arm = nop
2701generate *const u64:uint64x1x3_t
2702target = aes
2703generate *const p64:poly64x1x3_t
2704
2705/// Load single 3-element structure and replicate to all lanes of three registers
2706name = vld3
2707out-dup-nox
2708a = 0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.
2709validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
2710load_fn
2711arm-aarch64-separate
2712
2713aarch64 = ld3r
2714link-aarch64 = ld3r._EXT2_
2715generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
2716
2717arm = vld3
2718link-arm = vld3dup._EXTpi82_
2719generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2720
2721/// Load multiple 3-element structures to two registers
2722name = vld3
2723out-lane-nox
2724multi_fn = static_assert_imm-in_exp_len-LANE
2725constn = LANE
2726a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2727b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2728n = 0
2729validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2730load_fn
2731arm-aarch64-separate
2732
2733aarch64 = ld3
2734const-aarch64 = LANE
2735link-aarch64 = ld3lane._EXTpi82_
2736generate *const i8:int8x16x3_t:int8x16x3_t, *const i64:int64x1x3_t:int64x1x3_t, *const i64:int64x2x3_t:int64x2x3_t
2737
2738arm = vld3
2739const-arm = LANE
2740link-arm = vld3lane._EXTpi82_
2741generate *const i8:int8x8x3_t:int8x8x3_t, *const i16:int16x4x3_t:int16x4x3_t, *const i32:int32x2x3_t:int32x2x3_t
2742generate *const i16:int16x8x3_t:int16x8x3_t, *const i32:int32x4x3_t:int32x4x3_t
2743
2744/// Load multiple 3-element structures to three registers
2745name = vld3
2746out-lane-nox
2747multi_fn = static_assert_imm-in_exp_len-LANE
2748multi_fn = transmute, {vld3-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2749constn = LANE
2750a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2751b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2752n = 0
2753validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2754load_fn
2755
2756aarch64 = ld3
2757const-aarch64 = LANE
2758target = aes
2759generate *const p64:poly64x1x3_t:poly64x1x3_t, *const p64:poly64x2x3_t:poly64x2x3_t
2760target = default
2761generate *const p8:poly8x16x3_t:poly8x16x3_t, *const u8:uint8x16x3_t:uint8x16x3_t, *const u64:uint64x1x3_t:uint64x1x3_t, *const u64:uint64x2x3_t:uint64x2x3_t
2762
2763arm = vld3
2764const-arm = LANE
2765generate *const u8:uint8x8x3_t:uint8x8x3_t, *const u16:uint16x4x3_t:uint16x4x3_t, *const u32:uint32x2x3_t:uint32x2x3_t
2766generate *const u16:uint16x8x3_t:uint16x8x3_t, *const u32:uint32x4x3_t:uint32x4x3_t
2767generate *const p8:poly8x8x3_t:poly8x8x3_t, *const p16:poly16x4x3_t:poly16x4x3_t
2768generate *const p16:poly16x8x3_t:poly16x8x3_t
2769
2770/// Load multiple 3-element structures to three registers
2771name = vld3
2772out-lane-nox
2773multi_fn = static_assert_imm-in_exp_len-LANE
2774constn = LANE
2775a = 0., 1., 2., 2., 4., 5., 6., 7., 8., 5., 6., 7., 8.
2776b = 0., 2., 2., 14., 9., 16., 17., 18., 5., 6., 7., 8.
2777n = 0
2778validate 1., 2., 2., 14., 2., 16., 17., 18., 2., 6., 7., 8.
2779load_fn
2780arm-aarch64-separate
2781
2782aarch64 = ld3
2783const-aarch64 = LANE
2784link-aarch64 = ld3lane._EXTpi82_
2785generate *const f64:float64x1x3_t:float64x1x3_t, *const f64:float64x2x3_t:float64x2x3_t
2786
2787arm = vld3
2788const-arm = LANE
2789link-arm = vld3lane._EXTpi82_
2790generate *const f32:float32x2x3_t:float32x2x3_t, *const f32:float32x4x3_t:float32x4x3_t
2791
2792/// Load multiple 4-element structures to four registers
2793name = vld4
2794out-nox
2795a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2796validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2797load_fn
2798arm-aarch64-separate
2799
2800aarch64 = ld4
2801link-aarch64 = ld4._EXTv2_
2802generate *const i64:int64x2x4_t
2803
2804arm = vld4
2805link-arm = vld4._EXTpi82_
2806generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
2807generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
2808aarch64 = nop
2809arm = nop
2810generate *const i64:int64x1x4_t
2811
2812/// Load multiple 4-element structures to four registers
2813name = vld4
2814out-nox
2815multi_fn = transmute, {vld4-outsignednox-noext, transmute(a)}
2816a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2817validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2818load_fn
2819
2820aarch64 = ld4
2821generate *const u64:uint64x2x4_t
2822target = aes
2823generate *const p64:poly64x2x4_t
2824
2825target = default
2826arm = vld4
2827generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
2828generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
2829generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
2830aarch64 = nop
2831arm = nop
2832generate *const u64:uint64x1x4_t
2833target = aes
2834generate *const p64:poly64x1x4_t
2835
2836/// Load multiple 4-element structures to four registers
2837name = vld4
2838out-nox
2839a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 15., 16.
2840validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 15., 6., 8., 8., 16.
2841load_fn
2842arm-aarch64-separate
2843
2844aarch64 = nop
2845link-aarch64 = ld4._EXTv2_
2846generate *const f64:float64x1x4_t
2847aarch64 = ld4
2848generate *const f64:float64x2x4_t
2849
2850arm = vld4
2851link-arm = vld4._EXTpi82_
2852generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2853
2854/// Load single 4-element structure and replicate to all lanes of four registers
2855name = vld4
2856out-dup-nox
2857a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2858validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2859load_fn
2860arm-aarch64-separate
2861
2862aarch64 = ld4r
2863link-aarch64 = ld4r._EXT2_
2864generate *const i64:int64x2x4_t
2865
2866arm = vld4
2867link-arm = vld4dup._EXTpi82_
2868generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
2869generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
2870arm = nop
2871generate *const i64:int64x1x4_t
2872
2873/// Load single 4-element structure and replicate to all lanes of four registers
2874name = vld4
2875out-dup-nox
2876multi_fn = transmute, {vld4-outsigneddupnox-noext, transmute(a)}
2877a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2878validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2879load_fn
2880
2881aarch64 = ld4r
2882generate *const u64:uint64x2x4_t
2883target = aes
2884generate *const p64:poly64x2x4_t
2885
2886target = default
2887arm = vld4
2888generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
2889generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
2890generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
2891arm = nop
2892generate *const u64:uint64x1x4_t
2893target = aes
2894generate *const p64:poly64x1x4_t
2895
2896/// Load single 4-element structure and replicate to all lanes of four registers
2897name = vld4
2898out-dup-nox
2899a = 0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5., 9., 4., 3., 5.
2900validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
2901load_fn
2902arm-aarch64-separate
2903
2904aarch64 = ld4r
2905link-aarch64 = ld4r._EXT2_
2906generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
2907
2908arm = vld4
2909link-arm = vld4dup._EXTpi82_
2910generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2911
2912/// Load multiple 4-element structures to four registers
2913name = vld4
2914out-lane-nox
2915multi_fn = static_assert_imm-in_exp_len-LANE
2916constn = LANE
2917a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2918b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2919n = 0
2920validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2921load_fn
2922arm-aarch64-separate
2923
2924aarch64 = ld4
2925const-aarch64 = LANE
2926link-aarch64 = ld4lane._EXTpi82_
2927generate *const i8:int8x16x4_t:int8x16x4_t, *const i64:int64x1x4_t:int64x1x4_t, *const i64:int64x2x4_t:int64x2x4_t
2928
2929arm = vld4
2930const-arm = LANE
2931link-arm = vld4lane._EXTpi82_
2932generate *const i8:int8x8x4_t:int8x8x4_t, *const i16:int16x4x4_t:int16x4x4_t, *const i32:int32x2x4_t:int32x2x4_t
2933generate *const i16:int16x8x4_t:int16x8x4_t, *const i32:int32x4x4_t:int32x4x4_t
2934
2935/// Load multiple 4-element structures to four registers
2936name = vld4
2937out-lane-nox
2938multi_fn = static_assert_imm-in_exp_len-LANE
2939multi_fn = transmute, {vld4-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2940constn = LANE
2941a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2942b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2943n = 0
2944validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2945load_fn
2946
2947aarch64 = ld4
2948const-aarch64 = LANE
2949target = aes
2950generate *const p64:poly64x1x4_t:poly64x1x4_t, *const p64:poly64x2x4_t:poly64x2x4_t
2951target = default
2952generate *const p8:poly8x16x4_t:poly8x16x4_t, *const u8:uint8x16x4_t:uint8x16x4_t, *const u64:uint64x1x4_t:uint64x1x4_t, *const u64:uint64x2x4_t:uint64x2x4_t
2953
2954arm = vld4
2955const-arm = LANE
2956generate *const u8:uint8x8x4_t:uint8x8x4_t, *const u16:uint16x4x4_t:uint16x4x4_t, *const u32:uint32x2x4_t:uint32x2x4_t
2957generate *const u16:uint16x8x4_t:uint16x8x4_t, *const u32:uint32x4x4_t:uint32x4x4_t
2958generate *const p8:poly8x8x4_t:poly8x8x4_t, *const p16:poly16x4x4_t:poly16x4x4_t
2959generate *const p16:poly16x8x4_t:poly16x8x4_t
2960
2961/// Load multiple 4-element structures to four registers
2962name = vld4
2963out-lane-nox
2964multi_fn = static_assert_imm-in_exp_len-LANE
2965constn = LANE
2966a = 0., 1., 2., 2., 2., 5., 6., 7., 8., 5., 6., 7., 8., 1., 4., 3., 5.
2967b = 0., 2., 2., 2., 2., 16., 2., 18., 5., 6., 7., 8., 1., 4., 3., 5.
2968n = 0
2969validate 1., 2., 2., 2., 2., 16., 2., 18., 2., 6., 7., 8., 2., 4., 3., 5.
2970load_fn
2971arm-aarch64-separate
2972
2973aarch64 = ld4
2974const-aarch64 = LANE
2975link-aarch64 = ld4lane._EXTpi82_
2976generate *const f64:float64x1x4_t:float64x1x4_t, *const f64:float64x2x4_t:float64x2x4_t
2977
2978arm = vld4
2979const-arm = LANE
2980link-arm = vld4lane._EXTpi82_
2981generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float32x4x4_t
2982
2983/// Store multiple single-element structures from one, two, three, or four registers
2984name = vst1
2985in1-lane-nox
2986multi_fn = static_assert_imm-in_exp_len-LANE
2987multi_fn = *a, {simd_extract, b, LANE as u32}
2988constn = LANE
2989a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2990n = 0
2991validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2992store_fn
2993
2994aarch64 = nop
2995arm = nop
2996generate *mut i8:int8x8_t:void, *mut i16:int16x4_t:void, *mut i32:int32x2_t:void, *mut i64:int64x1_t:void
2997generate *mut i8:int8x16_t:void, *mut i16:int16x8_t:void, *mut i32:int32x4_t:void, *mut i64:int64x2_t:void
2998generate *mut u8:uint8x8_t:void, *mut u16:uint16x4_t:void, *mut u32:uint32x2_t:void, *mut u64:uint64x1_t:void
2999generate *mut u8:uint8x16_t:void, *mut u16:uint16x8_t:void, *mut u32:uint32x4_t:void, *mut u64:uint64x2_t:void
3000generate *mut p8:poly8x8_t:void, *mut p16:poly16x4_t:void, *mut p8:poly8x16_t:void, *mut p16:poly16x8_t:void
3001target = aes
3002generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void
3003
3004/// Store multiple single-element structures from one, two, three, or four registers
3005name = vst1
3006in1-lane-nox
3007multi_fn = static_assert_imm-in_exp_len-LANE
3008multi_fn = *a, {simd_extract, b, LANE as u32}
3009constn = LANE
3010a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
3011n = 0
3012validate 1., 0., 0., 0., 0., 0., 0., 0.
3013store_fn
3014
3015aarch64 = nop
3016generate *mut f64:float64x1_t:void, *mut f64:float64x2_t:void
3017
3018arm = nop
3019generate *mut f32:float32x2_t:void, *mut f32:float32x4_t:void
3020
3021/// Store multiple single-element structures from one, two, three, or four registers
3022name = vst1
3023a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3024validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3025store_fn
3026arm-aarch64-separate
3027
3028aarch64 = st1
3029link-aarch64 = st1x2._EXT3_
3030arm = vst1
3031link-arm = vst1x2._EXTr3_
3032generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void, *mut i64:int64x1x2_t:void
3033generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void, *mut i64:int64x2x2_t:void
3034
3035link-aarch64 = st1x3._EXT3_
3036link-arm = vst1x3._EXTr3_
3037generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void, *mut i64:int64x1x3_t:void
3038generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void, *mut i64:int64x2x3_t:void
3039
3040link-aarch64 = st1x4._EXT3_
3041link-arm = vst1x4._EXTr3_
3042generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void, *mut i64:int64x1x4_t:void
3043generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void, *mut i64:int64x2x4_t:void
c295e0f8
XL
3044
3045/// Store multiple single-element structures to one, two, three, or four registers
3046name = vst1
3047multi_fn = vst1-signed-noext, transmute(a), transmute(b)
3048a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3049validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3050
3051store_fn
3052aarch64 = st1
3053arm = vst1
3054generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void, *mut u64:uint64x1x2_t:void
3055generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void, *mut u64:uint64x2x2_t:void
3056generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void, *mut u64:uint64x1x3_t:void
3057generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void, *mut u64:uint64x2x3_t:void
3058generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void, *mut u64:uint64x1x4_t:void
3059generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void, *mut u64:uint64x2x4_t:void
3060generate *mut p8:poly8x8x2_t:void, *mut p8:poly8x8x3_t:void, *mut p8:poly8x8x4_t:void
3061generate *mut p8:poly8x16x2_t:void, *mut p8:poly8x16x3_t:void, *mut p8:poly8x16x4_t:void
3062generate *mut p16:poly16x4x2_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x4x4_t:void
3063generate *mut p16:poly16x8x2_t:void, *mut p16:poly16x8x3_t:void, *mut p16:poly16x8x4_t:void
3c0e092e
XL
3064target = aes
3065generate *mut p64:poly64x1x2_t:void
3066arm = nop
3067generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x1x4_t:void
3068generate *mut p64:poly64x2x2_t:void, *mut p64:poly64x2x3_t:void, *mut p64:poly64x2x4_t:void
c295e0f8
XL
3069
3070/// Store multiple single-element structures to one, two, three, or four registers
3071name = vst1
3072a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
3073validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
3074store_fn
3075arm-aarch64-separate
3076
3077aarch64 = st1
3078link-aarch64 = st1x2._EXT3_
3079generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
3080
3081link-aarch64 = st1x3._EXT3_
3082generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
3083
3084link-aarch64 = st1x4._EXT3_
3085generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
3086
3087arm = vst1
3088link-aarch64 = st1x2._EXT3_
3089link-arm = vst1x2._EXTr3_
3090generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3091
3092link-aarch64 = st1x3._EXT3_
3093link-arm = vst1x3._EXTr3_
3094generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3095
3096link-aarch64 = st1x4._EXT3_
3097link-arm = vst1x4._EXTr3_
3098generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3099
3c0e092e
XL
3100/// Store multiple 2-element structures from two registers
3101name = vst2
3102in1-nox
3103a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3104validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3105store_fn
3106arm-aarch64-separate
3107
3108aarch64 = st2
3109link-aarch64 = st2._EXTpi8_
3110generate *mut i64:int64x2x2_t:void
3111
3112arm = vst2
3113link-arm = vst2._EXTpi8r_
3114generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
3115generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
3116arm = nop
3117aarch64 = nop
3118generate *mut i64:int64x1x2_t:void
3119
3120/// Store multiple 2-element structures from two registers
3121name = vst2
3122multi_fn = transmute, {vst2-in1signednox-noext, transmute(a), transmute(b)}
3123in1-nox
3124a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3125validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3126store_fn
3127
3128aarch64 = st2
3129generate *mut u64:uint64x2x2_t:void
3130target = aes
3131generate *mut p64:poly64x2x2_t:void
3132
3133target = default
3134arm = vst2
3135generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
3136generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
3137generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p8:poly8x16x2_t:void, *mut p16:poly16x8x2_t:void
3138arm = nop
3139aarch64 = nop
3140generate *mut u64:uint64x1x2_t:void
3141target = aes
3142generate *mut p64:poly64x1x2_t:void
3143
3144/// Store multiple 2-element structures from two registers
3145name = vst2
3146in1-nox
3147a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3148validate 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
3149store_fn
3150arm-aarch64-separate
3151
3152aarch64 = st1
3153link-aarch64 = st2._EXTpi8_
3154generate *mut f64:float64x1x2_t:void
3155aarch64 = st2
3156generate *mut f64:float64x2x2_t:void
3157
3158arm = vst2
3159link-arm = vst2._EXTpi8r_
3160generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3161
3162/// Store multiple 2-element structures from two registers
3163name = vst2
3164in1-lane-nox
3165constn = LANE
3166multi_fn = static_assert_imm-in_exp_len-LANE
3167a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3168n = 0
3169validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3170store_fn
3171arm-aarch64-separate
3172
3173aarch64 = st2
3174link-aarch64 = st2lane._EXTpi8_
3175const-aarch64 = LANE
3176generate *mut i8:int8x16x2_t:void, *mut i64:int64x1x2_t:void, *mut i64:int64x2x2_t:void
3177
3178arm = vst2
3179link-arm = vst2lane._EXTpi8r_
3180const-arm = LANE
3181generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
3182generate *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
3183
3184/// Store multiple 2-element structures from two registers
3185name = vst2
3186in1-lane-nox
3187constn = LANE
3188multi_fn = static_assert_imm-in_exp_len-LANE
3189multi_fn = transmute, {vst2-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
3190a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3191n = 0
3192validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3193store_fn
3194
3195aarch64 = st2
3196generate *mut u8:uint8x16x2_t:void, *mut u64:uint64x1x2_t:void, *mut u64:uint64x2x2_t:void, *mut p8:poly8x16x2_t:void
3197target = aes
3198generate *mut p64:poly64x1x2_t:void, *mut p64:poly64x2x2_t:void
3199
3200target = default
3201arm = vst2
3202generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
3203generate *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
3204generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p16:poly16x8x2_t:void
3205
3206/// Store multiple 2-element structures from two registers
3207name = vst2
3208in1-lane-nox
3209constn = LANE
3210multi_fn = static_assert_imm-in_exp_len-LANE
3211a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3212n = 0
3213validate 1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3214store_fn
3215arm-aarch64-separate
3216
3217aarch64 = st2
3218link-aarch64 = st2lane._EXTpi8_
3219const-aarch64 = LANE
3220generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
3221
3222arm = vst2
3223link-arm = vst2lane._EXTpi8r_
3224const-arm = LANE
3225generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3226
3227/// Store multiple 3-element structures from three registers
3228name = vst3
3229in1-nox
3230a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3231validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3232store_fn
3233arm-aarch64-separate
3234
3235aarch64 = st3
3236link-aarch64 = st3._EXTpi8_
3237generate *mut i64:int64x2x3_t:void
3238
3239arm = vst3
3240link-arm = vst3._EXTpi8r_
3241generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
3242generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
3243arm = nop
3244aarch64 = nop
3245generate *mut i64:int64x1x3_t:void
3246
3247/// Store multiple 3-element structures from three registers
3248name = vst3
3249multi_fn = transmute, {vst3-in1signednox-noext, transmute(a), transmute(b)}
3250in1-nox
3251a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3252validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3253store_fn
3254
3255aarch64 = st3
3256generate *mut u64:uint64x2x3_t:void
3257target = aes
3258generate *mut p64:poly64x2x3_t:void
3259
3260target = default
3261arm = vst3
3262generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
3263generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
3264generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p8:poly8x16x3_t:void, *mut p16:poly16x8x3_t:void
3265arm = nop
3266aarch64 = nop
3267generate *mut u64:uint64x1x3_t:void
3268target = aes
3269generate *mut p64:poly64x1x3_t:void
3270
3271/// Store multiple 3-element structures from three registers
3272name = vst3
3273in1-nox
3274a = 0., 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8., 13., 14., 15., 16
3275validate 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8., 2., 13., 13., 4.
3276store_fn
3277arm-aarch64-separate
3278
3279aarch64 = nop
3280link-aarch64 = st3._EXTpi8_
3281generate *mut f64:float64x1x3_t:void
3282aarch64 = st3
3283generate *mut f64:float64x2x3_t:void
3284
3285arm = vst3
3286link-arm = vst3._EXTpi8r_
3287generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3288
3289/// Store multiple 3-element structures from three registers
3290name = vst3
3291in1-lane-nox
3292constn = LANE
3293multi_fn = static_assert_imm-in_exp_len-LANE
3294a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3295n = 0
3296validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3297store_fn
3298arm-aarch64-separate
3299
3300aarch64 = st3
3301link-aarch64 = st3lane._EXTpi8_
3302const-aarch64 = LANE
3303generate *mut i8:int8x16x3_t:void, *mut i64:int64x1x3_t:void, *mut i64:int64x2x3_t:void
3304
3305arm = vst3
3306link-arm = vst3lane._EXTpi8r_
3307const-arm = LANE
3308generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
3309generate *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
3310
3311/// Store multiple 3-element structures from three registers
3312name = vst3
3313in1-lane-nox
3314constn = LANE
3315multi_fn = static_assert_imm-in_exp_len-LANE
3316multi_fn = transmute, {vst3-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
3317a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3318n = 0
3319validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3320store_fn
3321
3322aarch64 = st3
3323generate *mut u8:uint8x16x3_t:void, *mut u64:uint64x1x3_t:void, *mut u64:uint64x2x3_t:void, *mut p8:poly8x16x3_t:void
3324target = aes
3325generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x2x3_t:void
3326
3327target = default
3328arm = vst3
3329generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
3330generate *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
3331generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x8x3_t:void
3332
3333/// Store multiple 3-element structures from three registers
3334name = vst3
3335in1-lane-nox
3336constn = LANE
3337multi_fn = static_assert_imm-in_exp_len-LANE
3338a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3339n = 0
3340validate 1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3341store_fn
3342arm-aarch64-separate
3343
3344aarch64 = st3
3345link-aarch64 = st3lane._EXTpi8_
3346const-aarch64 = LANE
3347generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
3348
3349arm = vst3
3350link-arm = vst3lane._EXTpi8r_
3351const-arm = LANE
3352generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3353
3354/// Store multiple 4-element structures from four registers
3355name = vst4
3356in1-nox
3357a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3358validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3359store_fn
3360arm-aarch64-separate
3361
3362aarch64 = st4
3363link-aarch64 = st4._EXTpi8_
3364generate *mut i64:int64x2x4_t:void
3365
3366arm = vst4
3367link-arm = vst4._EXTpi8r_
3368generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
3369generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
3370arm = nop
3371aarch64 = nop
3372generate *mut i64:int64x1x4_t:void
3373
3374/// Store multiple 4-element structures from four registers
3375name = vst4
3376multi_fn = transmute, {vst4-in1signednox-noext, transmute(a), transmute(b)}
3377in1-nox
3378a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3379validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3380store_fn
3381
3382aarch64 = st4
3383generate *mut u64:uint64x2x4_t:void
3384target = aes
3385generate *mut p64:poly64x2x4_t:void
3386
3387target = default
3388arm = vst4
3389generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
3390generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
3391generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p8:poly8x16x4_t:void, *mut p16:poly16x8x4_t:void
3392arm = nop
3393aarch64 = nop
3394generate *mut u64:uint64x1x4_t:void
3395target = aes
3396generate *mut p64:poly64x1x4_t:void
3397
3398/// Store multiple 4-element structures from four registers
3399name = vst4
3400in1-nox
3401a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3402validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3403store_fn
3404arm-aarch64-separate
3405
3406aarch64 = nop
3407link-aarch64 = st4._EXTpi8_
3408generate *mut f64:float64x1x4_t:void
3409aarch64 = st4
3410generate *mut f64:float64x2x4_t:void
3411
3412arm = vst4
3413link-arm = vst4._EXTpi8r_
3414generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3415
3416/// Store multiple 4-element structures from four registers
3417name = vst4
3418in1-lane-nox
3419constn = LANE
3420multi_fn = static_assert_imm-in_exp_len-LANE
3421a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3422n = 0
3423validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3424store_fn
3425arm-aarch64-separate
3426
3427aarch64 = st4
3428link-aarch64 = st4lane._EXTpi8_
3429const-aarch64 = LANE
3430generate *mut i8:int8x16x4_t:void, *mut i64:int64x1x4_t:void, *mut i64:int64x2x4_t:void
3431
3432arm = vst4
3433link-arm = vst4lane._EXTpi8r_
3434const-arm = LANE
3435generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
3436generate *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
3437
3438/// Store multiple 4-element structures from four registers
3439name = vst4
3440in1-lane-nox
3441constn = LANE
3442multi_fn = static_assert_imm-in_exp_len-LANE
3443multi_fn = transmute, {vst4-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
3444a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3445n = 0
3446validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3447store_fn
3448
3449aarch64 = st4
3450generate *mut u8:uint8x16x4_t:void, *mut u64:uint64x1x4_t:void, *mut u64:uint64x2x4_t:void, *mut p8:poly8x16x4_t:void
3451target = aes
3452generate *mut p64:poly64x1x4_t:void, *mut p64:poly64x2x4_t:void
3453
3454target = default
3455arm = vst4
3456generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
3457generate *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
3458generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p16:poly16x8x4_t:void
3459
3460/// Store multiple 4-element structures from four registers
3461name = vst4
3462in1-lane-nox
3463constn = LANE
3464multi_fn = static_assert_imm-in_exp_len-LANE
3465a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3466n = 0
3467validate 1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3468store_fn
3469arm-aarch64-separate
3470
3471aarch64 = st4
3472link-aarch64 = st4lane._EXTpi8_
3473const-aarch64 = LANE
3474generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
3475
3476arm = vst4
3477link-arm = vst4lane._EXTpi8r_
3478const-arm = LANE
3479generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3480
3481/// Dot product index form with signed and unsigned integers
3482name = vsudot
3483out-lane-suffixes
3484constn = LANE
3485multi_fn = static_assert_imm-in2_dot-LANE
3486multi_fn = simd_shuffle-in_len-!, c:unsigned, c, c, {base-4-LANE}
3487multi_fn = vsudot-outlane-_, a, b, c
3488a = 1, 2, 1, 2
3489b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
3490c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
3491n = 0
3492validate 31, 72, 31, 72
3493target = dotprod
3494
3495aarch64 = sudot
3496link-aarch64 = usdot._EXT2_._EXT4_:int32x2_t:int8x8_t:uint8x8_t:int32x2_t
3497// LLVM ERROR: Cannot select: intrinsic %llvm.aarch64.neon.usdot
3498//generate int32x2_t:int8x8_t:uint8x8_t:int32x2_t, int32x2_t:int8x8_t:uint8x16_t:int32x2_t
3499link-aarch64 = usdot._EXT2_._EXT4_:int32x4_t:int8x16_t:uint8x16_t:int32x4_t
3500// LLVM ERROR: Cannot select: intrinsic %llvm.aarch64.neon.usdot
3501//generate int32x4_t:int8x16_t:uint8x8_t:int32x4_t, int32x4_t:int8x16_t:uint8x16_t:int32x4_t
3502
17df50a5
XL
3503/// Multiply
3504name = vmul
3505a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3506b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3507validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3508arm = vmul.
3509aarch64 = mul
3510fn = simd_mul
3511generate int*_t, uint*_t
3512
3513/// Polynomial multiply
3514name = vmul
3515a = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3516b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3517validate 1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48
3518
3519aarch64 = pmul
3520link-aarch64 = pmul._EXT_
3521arm = vmul
3522link-arm = vmulp._EXT_
3523generate poly8x8_t, poly8x16_t
3524
3525/// Multiply
3526name = vmul
3527fn = simd_mul
3528a = 1.0, 2.0, 1.0, 2.0
3529b = 2.0, 3.0, 4.0, 5.0
3530validate 2.0, 6.0, 4.0, 10.0
3531
3532aarch64 = fmul
3533generate float64x*_t
3534
3535arm = vmul.
3536generate float*_t
3537
3538/// Vector multiply by scalar
3539name = vmul
3540out-n-suffix
3541multi_fn = simd_mul, a, {vdup-nout-noext, b}
3542a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3543b = 2
3544validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3545
3546arm = vmul
3547aarch64 = mul
3548generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
3549generate uint16x4_t:u16:uint16x4_t, uint16x8_t:u16:uint16x8_t, uint32x2_t:u32:uint32x2_t, uint32x4_t:u32:uint32x4_t
3550
3551/// Vector multiply by scalar
3552name = vmul
3553out-n-suffix
3554multi_fn = simd_mul, a, {vdup-nout-noext, b}
3555a = 1., 2., 3., 4.
3556b = 2.
3557validate 2., 4., 6., 8.
3558
3559aarch64 = fmul
3560generate float64x1_t:f64:float64x1_t, float64x2_t:f64:float64x2_t
3561
3562arm = vmul
3563generate float32x2_t:f32:float32x2_t, float32x4_t:f32:float32x4_t
3564
3565/// Multiply
3566name = vmul
3567lane-suffixes
3568constn = LANE
3569multi_fn = static_assert_imm-in_exp_len-LANE
3570multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}}
3571a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3572b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3573n = 1
3574validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3575
3576aarch64 = mul
3577arm = vmul
3578generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
3579generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
3580generate uint16x4_t, uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
3581generate uint32x2_t, uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
3582
3583/// Floating-point multiply
3584name = vmul
3585lane-suffixes
3586constn = LANE
3587multi_fn = static_assert_imm-in_exp_len-LANE
3588multi_fn = simd_mul, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
3589a = 1., 2., 3., 4.
3590b = 2., 0., 0., 0.
3591n = 0
3592validate 2., 4., 6., 8.
3593
3594aarch64 = fmul
3595generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
3596
3597/// Floating-point multiply
3598name = vmul
3599lane-suffixes
3600constn = LANE
3601multi_fn = static_assert_imm-in_exp_len-LANE
3602multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}}
3603a = 1., 2., 3., 4.
3604b = 2., 0., 0., 0.
3605n = 0
3606validate 2., 4., 6., 8.
3607
3608aarch64 = fmul
3609generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
3610
3611arm = vmul
3612generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
3613
3614/// Floating-point multiply
3615name = vmuls_lane
3616constn = LANE
3617multi_fn = static_assert_imm-in_exp_len-LANE
3618multi_fn = simd_extract, b:f32, b, LANE as u32
3619multi_fn = a * b
3620a = 1.
3621b = 2., 0., 0., 0.
3622n = 0
3623validate 2.
3624aarch64 = fmul
3625generate f32:float32x2_t:f32, f32:float32x4_t:f32
3626
3627/// Floating-point multiply
3628name = vmuld_lane
3629constn = LANE
3630multi_fn = static_assert_imm-in_exp_len-LANE
3631multi_fn = simd_extract, b:f64, b, LANE as u32
3632multi_fn = a * b
3633a = 1.
3634b = 2., 0.
3635n = 0
3636validate 2.
3637aarch64 = fmul
3638generate f64:float64x1_t:f64, f64:float64x2_t:f64
3639
3640/// Signed multiply long
3641name = vmull
3642a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3643b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3644validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3645
3646arm = vmull.s
3647aarch64 = smull
3648link-arm = vmulls._EXT_
3649link-aarch64 = smull._EXT_
3650generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
3651
3652/// Signed multiply long
3653name = vmull_high
3654no-q
3655multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right}
3656multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
3657multi_fn = vmull-noqself-noext, a, b
3658a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3659b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3660fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3661validate 9, 20, 11, 24, 13, 28, 15, 32
3662
3663aarch64 = smull2
3664generate int8x16_t:int8x16_t:int16x8_t, int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
3665
3666/// Unsigned multiply long
3667name = vmull
3668a = 1, 2, 3, 4, 5, 6, 7, 8
3669b = 1, 2, 1, 2, 1, 2, 1, 2
3670validate 1, 4, 3, 8, 5, 12, 7, 16
3671
3672arm = vmull.s
3673aarch64 = umull
3674link-arm = vmullu._EXT_
3675link-aarch64 = umull._EXT_
3676generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
3677
3678/// Unsigned multiply long
3679name = vmull_high
3680no-q
3681multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right}
3682multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
3683multi_fn = vmull-noqself-noext, a, b
3684a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3685b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3686fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3687validate 9, 20, 11, 24, 13, 28, 15, 32
3688
3689aarch64 = umull2
3690generate uint8x16_t:uint8x16_t:uint16x8_t, uint16x8_t:uint16x8_t:uint32x4_t, uint32x4_t:uint32x4_t:uint64x2_t
3691
3692/// Polynomial multiply long
3693name = vmull
3694a = 1, 2, 3, 4, 5, 6, 7, 8
3695b = 1, 3, 1, 3, 1, 3, 1, 3
3696validate 1, 6, 3, 12, 5, 10, 7, 24
3697
3698arm = vmull.s
3699aarch64 = pmull
3700link-arm = vmullp._EXT_
3701link-aarch64 = pmull._EXT_
3702generate poly8x8_t:poly8x8_t:poly16x8_t
3703
3704/// Polynomial multiply long
3705name = vmull
3706no-q
3707a = 15
3708b = 3
3709validate 17
94222f64 3710target = aes
17df50a5
XL
3711
3712aarch64 = pmull
3713link-aarch64 = pmull64:p64:p64:p64:int8x16_t
3c0e092e 3714// Because of the support status of llvm, vmull_p64 is currently only available on arm
17df50a5
XL
3715// arm = vmull
3716// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
3717generate p64:p64:p128
3718
3719
3720/// Polynomial multiply long
3721name = vmull_high
3722no-q
3723multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right}
3724multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right}
3725multi_fn = vmull-noqself-noext, a, b
3726a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3727b = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3728fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3729validate 9, 30, 11, 20, 13, 18, 15, 48
3730
3731aarch64 = pmull
3732generate poly8x16_t:poly8x16_t:poly16x8_t
3733
3734/// Polynomial multiply long
3735name = vmull_high
3736no-q
3737multi_fn = vmull-noqself-noext, {simd_extract, a, 1}, {simd_extract, b, 1}
3738a = 1, 15
3739b = 1, 3
3740validate 17
94222f64 3741target = aes
17df50a5
XL
3742
3743aarch64 = pmull
3744generate poly64x2_t:poly64x2_t:p128
3745
3746/// Vector long multiply with scalar
c295e0f8
XL
3747name = vmull_n
3748no-q
17df50a5
XL
3749multi_fn = vmull-in0-noext, a, {vdup-nin0-noext, b}
3750a = 1, 2, 3, 4, 5, 6, 7, 8
3751b = 2
3752validate 2, 4, 6, 8, 10, 12, 14, 16
3753
3754arm = vmull
3755aarch64 = smull
3756generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
3757aarch64 = umull
3758generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t
3759
3760/// Vector long multiply by scalar
3761name = vmull_lane
3762constn = LANE
3763multi_fn = static_assert_imm-in_exp_len-LANE
3764multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}}
3765a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3766b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3767n = 1
3768validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3769
3770arm = vmull
3771aarch64 = smull
3772generate int16x4_t:int16x4_t:int32x4_t, int16x4_t:int16x8_t:int32x4_t
3773generate int32x2_t:int32x2_t:int64x2_t, int32x2_t:int32x4_t:int64x2_t
3774aarch64 = umull
3775generate uint16x4_t:uint16x4_t:uint32x4_t, uint16x4_t:uint16x8_t:uint32x4_t
3776generate uint32x2_t:uint32x2_t:uint64x2_t, uint32x2_t:uint32x4_t:uint64x2_t
3777
3778/// Multiply long
3779name = vmull_high_n
3780no-q
3781multi_fn = vmull_high-noqself-noext, a, {vdup-nin0-noext, b}
3782a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3783b = 2
3784validate 18, 20, 22, 24, 26, 28, 30, 32
3785
3786aarch64 = smull2
3787generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
3788aarch64 = umull2
3789generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t
3790
3791/// Multiply long
3792name = vmull_high_lane
3793constn = LANE
3794multi_fn = static_assert_imm-in_exp_len-LANE
3795multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}}
3796a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3797b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3798n = 1
3799validate 18, 20, 22, 24, 26, 28, 30, 32
3800
3801aarch64 = smull2
3802generate int16x8_t:int16x4_t:int32x4_t, int16x8_t:int16x8_t:int32x4_t
3803generate int32x4_t:int32x2_t:int64x2_t, int32x4_t:int32x4_t:int64x2_t
3804aarch64 = umull2
3805generate uint16x8_t:uint16x4_t:uint32x4_t, uint16x8_t:uint16x8_t:uint32x4_t
3806generate uint32x4_t:uint32x2_t:uint64x2_t, uint32x4_t:uint32x4_t:uint64x2_t
3807
3808/// Floating-point multiply extended
3809name = vmulx
3810a = 1., 2., 3., 4.
3811b = 2., 2., 2., 2.
3812validate 2., 4., 6., 8.
3813
3814aarch64 = fmulx
3815link-aarch64 = fmulx._EXT_
3816generate float*_t, float64x*_t
3817
3818/// Floating-point multiply extended
3819name = vmulx
3820lane-suffixes
3821constn = LANE
3822multi_fn = static_assert_imm-in_exp_len-LANE
3823multi_fn = vmulx-in0-noext, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
3824a = 1.
3825b = 2., 0.
3826n = 0
3827validate 2.
3828
3829aarch64 = fmulx
3830generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
3831
3832/// Floating-point multiply extended
3833name = vmulx
3834lane-suffixes
3835constn = LANE
3836multi_fn = static_assert_imm-in_exp_len-LANE
3837multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}}
3838a = 1., 2., 3., 4.
3839b = 2., 0., 0., 0.
3840n = 0
3841validate 2., 4., 6., 8.
3842
3843aarch64 = fmulx
3844generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
3845generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
3846
3847/// Floating-point multiply extended
3848name = vmulx
3849a = 2.
3850b = 3.
3851validate 6.
3852
3853aarch64 = fmulx
3854link-aarch64 = fmulx._EXT_
3855generate f32, f64
3856
3857/// Floating-point multiply extended
3858name = vmulx
3859lane-suffixes
3860constn = LANE
3861multi_fn = static_assert_imm-in_exp_len-LANE
3862multi_fn = vmulx-out-noext, a, {simd_extract, b, LANE as u32}
3863
3864a = 2.
3865b = 3., 0., 0., 0.
3866n = 0
3867validate 6.
3868
3869aarch64 = fmulx
3870generate f32:float32x2_t:f32, f32:float32x4_t:f32, f64:float64x1_t:f64, f64:float64x2_t:f64
3871
3872/// Floating-point fused Multiply-Add to accumulator(vector)
3873name = vfma
3874multi_fn = vfma-self-_, b, c, a
3875a = 8.0, 18.0, 12.0, 10.0
3876b = 6.0, 4.0, 7.0, 8.0
3877c = 2.0, 3.0, 4.0, 5.0
3878validate 20.0, 30.0, 40.0, 50.0
3879
3880link-aarch64 = llvm.fma._EXT_
3881aarch64 = fmadd
3882generate float64x1_t
3883aarch64 = fmla
3884generate float64x2_t
3885
c295e0f8 3886target = vfp4
17df50a5
XL
3887arm = vfma
3888link-arm = llvm.fma._EXT_
3889generate float*_t
3890
3891/// Floating-point fused Multiply-Add to accumulator(vector)
3892name = vfma
3893n-suffix
c295e0f8 3894multi_fn = vfma-self-noext, a, b, {vdup-nselfvfp4-noext, c}
17df50a5
XL
3895a = 2.0, 3.0, 4.0, 5.0
3896b = 6.0, 4.0, 7.0, 8.0
3897c = 8.0
3898validate 50.0, 35.0, 60.0, 69.0
3899
3900aarch64 = fmadd
3901generate float64x1_t:float64x1_t:f64:float64x1_t
3902aarch64 = fmla
3903generate float64x2_t:float64x2_t:f64:float64x2_t
3904
c295e0f8 3905target = vfp4
17df50a5
XL
3906arm = vfma
3907generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
3908
3909/// Floating-point fused multiply-add to accumulator
3910name = vfma
3911in2-lane-suffixes
3912constn = LANE
3913multi_fn = static_assert_imm-in2_exp_len-LANE
3914multi_fn = vfma-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}}
3915a = 2., 3., 4., 5.
3916b = 6., 4., 7., 8.
3917c = 2., 0., 0., 0.
3918n = 0
3919validate 14., 11., 18., 21.
3920
3921aarch64 = fmla
3922generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
3923aarch64 = fmadd
3924generate float64x1_t
3925aarch64 = fmla
3926generate float64x1_t:float64x1_t:float64x2_t:float64x1_t, float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t
3927
3928/// Floating-point fused multiply-add to accumulator
3929name = vfma
3930in2-lane-suffixes
3931constn = LANE
3932multi_fn = static_assert_imm-in2_exp_len-LANE
3933multi_fn = simd_extract, c:out_t, c, LANE as u32
3934multi_fn = vfma-in2lane-_, b, c, a
3935a = 2.
3936b = 6.
3937c = 3., 0., 0., 0.
3938n = 0
3939validate 20.
3940
3941aarch64 = fmla
3942link-aarch64 = llvm.fma._EXT_:f32:f32:f32:f32
3943generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32
3944link-aarch64 = llvm.fma._EXT_:f64:f64:f64:f64
3945aarch64 = fmadd
3946generate f64:f64:float64x1_t:f64
3947aarch64 = fmla
3948generate f64:f64:float64x2_t:f64
3949
3950/// Floating-point fused multiply-subtract from accumulator
3951name = vfms
3952multi_fn = simd_neg, b:in_t, b
3953multi_fn = vfma-self-noext, a, b, c
3954a = 20.0, 30.0, 40.0, 50.0
3955b = 6.0, 4.0, 7.0, 8.0
3956c = 2.0, 3.0, 4.0, 5.0
3957validate 8.0, 18.0, 12.0, 10.0
3958
3959aarch64 = fmsub
3960generate float64x1_t
3961aarch64 = fmls
3962generate float64x2_t
3963
c295e0f8 3964target = vfp4
17df50a5
XL
3965arm = vfms
3966generate float*_t
3967
3968/// Floating-point fused Multiply-subtract to accumulator(vector)
3969name = vfms
3970n-suffix
c295e0f8 3971multi_fn = vfms-self-noext, a, b, {vdup-nselfvfp4-noext, c}
17df50a5
XL
3972a = 50.0, 35.0, 60.0, 69.0
3973b = 6.0, 4.0, 7.0, 8.0
3974c = 8.0
3975validate 2.0, 3.0, 4.0, 5.0
3976
3977aarch64 = fmsub
3978generate float64x1_t:float64x1_t:f64:float64x1_t
3979aarch64 = fmls
3980generate float64x2_t:float64x2_t:f64:float64x2_t
3981
c295e0f8 3982target = vfp4
17df50a5
XL
3983arm = vfms
3984generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
3985
3986/// Floating-point fused multiply-subtract to accumulator
3987name = vfms
3988in2-lane-suffixes
3989constn = LANE
3990multi_fn = static_assert_imm-in2_exp_len-LANE
3991multi_fn = vfms-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}}
3992a = 14., 11., 18., 21.
3993b = 6., 4., 7., 8.
3994c = 2., 0., 0., 0.
3995n = 0
3996validate 2., 3., 4., 5.
3997
3998aarch64 = fmls
3999generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4000aarch64 = fmsub
4001generate float64x1_t
4002aarch64 = fmls
4003generate float64x1_t:float64x1_t:float64x2_t:float64x1_t, float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t
4004
4005/// Floating-point fused multiply-subtract to accumulator
4006name = vfms
4007in2-lane-suffixes
4008constn = LANE
4009multi_fn = vfma-in2lane-::<LANE>, a, -b, c
4010a = 14.
4011b = 6.
4012c = 2., 0., 0., 0.
4013n = 0
4014validate 2.
4015
4016aarch64 = fmls
4017generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32
4018aarch64 = fmsub
4019generate f64:f64:float64x1_t:f64
4020aarch64 = fmls
4021generate f64:f64:float64x2_t:f64
4022
4023/// Divide
4024name = vdiv
4025fn = simd_div
4026a = 2.0, 6.0, 4.0, 10.0
4027b = 1.0, 2.0, 1.0, 2.0
4028validate 2.0, 3.0, 4.0, 5.0
4029
4030aarch64 = fdiv
4031generate float*_t, float64x*_t
4032
4033/// Subtract
4034name = vsub
4035a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4036b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4037validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
4038arm = vsub.
4039aarch64 = sub
4040fn = simd_sub
4041generate int*_t, uint*_t, int64x*_t, uint64x*_t
4042
4043/// Subtract
4044name = vsub
4045fn = simd_sub
4046a = 1.0, 4.0, 3.0, 8.0
4047b = 1.0, 2.0, 3.0, 4.0
4048validate 0.0, 2.0, 0.0, 4.0
4049
4050aarch64 = fsub
4051generate float64x*_t
4052
4053arm = vsub.
4054generate float*_t
4055
3c0e092e
XL
4056/// Subtract
4057name = vsub
a2a8927a 4058multi_fn = a.wrapping_sub(b)
3c0e092e
XL
4059a = 3
4060b = 2
4061validate 1
4062
4063aarch64 = nop
4064generate i64, u64
4065
4066/// Add
4067name = vadd
a2a8927a 4068multi_fn = a.wrapping_add(b)
3c0e092e
XL
4069a = 1
4070b = 2
4071validate 3
4072
4073aarch64 = nop
4074generate i64, u64
4075
4076/// Bitwise exclusive OR
4077name = vadd
4078multi_fn = simd_xor, a, b
4079a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4080b = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4081validate 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17
4082
4083aarch64 = nop
4084arm = nop
4085generate poly8x8_t, poly16x4_t, poly8x16_t, poly16x8_t, poly64x1_t, poly64x2_t
4086
4087/// Bitwise exclusive OR
4088name = vaddq
4089no-q
4090multi_fn = a ^ b
4091a = 16
4092b = 1
4093validate 17
4094
4095aarch64 = nop
4096arm = nop
4097generate p128
4098
4099/// Floating-point add across vector
4100name = vaddv
4101a = 1., 2., 0., 0.
4102validate 3.
4103
4104aarch64 = faddp
4105link-aarch64 = faddv._EXT2_._EXT_
4106generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
4107
17df50a5
XL
4108/// Signed Add Long across Vector
4109name = vaddlv
4110a = 1, 2, 3, 4
4111validate 10
4112
4113aarch64 = saddlv
4114link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
4115generate int16x4_t:i32
4116
4117/// Signed Add Long across Vector
4118name = vaddlv
4119a = 1, 2, 3, 4, 5, 6, 7, 8
4120validate 36
4121
4122aarch64 = saddlv
4123link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
4124generate int16x8_t:i32
4125
4126/// Signed Add Long across Vector
4127name = vaddlv
4128a = 1, 2
4129validate 3
4130
4131aarch64 = saddlp
4132link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
4133generate int32x2_t:i64
4134
4135/// Signed Add Long across Vector
4136name = vaddlv
4137a = 1, 2, 3, 4
4138validate 10
4139
4140aarch64 = saddlv
4141link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
4142generate int32x4_t:i64
4143
4144/// Unsigned Add Long across Vector
4145name = vaddlv
4146a = 1, 2, 3, 4
4147validate 10
4148
4149aarch64 = uaddlv
4150link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
4151generate uint16x4_t:u32
4152
4153/// Unsigned Add Long across Vector
4154name = vaddlv
4155a = 1, 2, 3, 4, 5, 6, 7, 8
4156validate 36
4157
4158aarch64 = uaddlv
4159link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
4160generate uint16x8_t:u32
4161
4162/// Unsigned Add Long across Vector
4163name = vaddlv
4164a = 1, 2
4165validate 3
4166
4167aarch64 = uaddlp
4168link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
4169generate uint32x2_t:u64
4170
4171/// Unsigned Add Long across Vector
4172name = vaddlv
4173a = 1, 2, 3, 4
4174validate 10
4175
4176aarch64 = uaddlv
4177link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
4178generate uint32x4_t:u64
4179
4180/// Subtract returning high narrow
4181name = vsubhn
4182no-q
4183multi_fn = fixed, c:in_t
4184multi_fn = simd_cast, {simd_shr, {simd_sub, a, b}, transmute(c)}
4185a = MAX, MIN, 1, 1, MAX, MIN, 1, 1
4186b = 1, 0, 0, 0, 1, 0, 0, 0
4187fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS
4188validate MAX, MIN, 0, 0, MAX, MIN, 0, 0
4189
4190arm = vsubhn
4191aarch64 = subhn
4192generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
4193generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
4194
4195/// Subtract returning high narrow
4196name = vsubhn_high
4197no-q
4198multi_fn = vsubhn-noqself-noext, d:in_t0, b, c
4199multi_fn = simd_shuffle-out_len-!, a, d, {asc-0-out_len}
4200a = MAX, 0, MAX, 0, MAX, 0, MAX, 0
4201b = MAX, 1, MAX, 1, MAX, 1, MAX, 1
4202c = 1, 0, 1, 0, 1, 0, 1, 0
4203validate MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0
4204
4205arm = vsubhn
4206aarch64 = subhn2
4207generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
4208generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
4209
4210/// Signed halving subtract
4211name = vhsub
4212a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4213b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4214validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
4215
4216arm = vhsub.s
4217aarch64 = uhsub
4218link-arm = vhsubu._EXT_
4219link-aarch64 = uhsub._EXT_
4220generate uint*_t
4221
4222arm = vhsub.s
4223aarch64 = shsub
4224link-arm = vhsubs._EXT_
4225link-aarch64 = shsub._EXT_
4226generate int*_t
4227
4228/// Signed Subtract Wide
4229name = vsubw
4230no-q
4231multi_fn = simd_sub, a, {simd_cast, b}
4232a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4233b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4234validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4235
4236arm = vsubw
4237aarch64 = ssubw
4238generate int16x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int64x2_t
4239
4240/// Unsigned Subtract Wide
4241name = vsubw
4242no-q
4243multi_fn = simd_sub, a, {simd_cast, b}
4244a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4245b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4246validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4247
4248arm = vsubw
4249aarch64 = usubw
4250generate uint16x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint64x2_t
4251
4252/// Signed Subtract Wide
4253name = vsubw_high
4254no-q
4255multi_fn = simd_shuffle8!, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4256multi_fn = simd_sub, a, {simd_cast, c}
4257a = 8, 9, 10, 12, 13, 14, 15, 16
4258b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4259validate 0, 0, 0, 0, 0, 0, 0, 0
4260
4261aarch64 = ssubw
4262generate int16x8_t:int8x16_t:int16x8_t
4263
4264/// Signed Subtract Wide
4265name = vsubw_high
4266no-q
4267multi_fn = simd_shuffle4!, c:int16x4_t, b, b, [4, 5, 6, 7]
4268multi_fn = simd_sub, a, {simd_cast, c}
4269a = 8, 9, 10, 11
4270b = 0, 1, 2, 3, 8, 9, 10, 11
4271validate 0, 0, 0, 0
4272
4273aarch64 = ssubw
4274generate int32x4_t:int16x8_t:int32x4_t
4275
4276/// Signed Subtract Wide
4277name = vsubw_high
4278no-q
4279multi_fn = simd_shuffle2!, c:int32x2_t, b, b, [2, 3]
4280multi_fn = simd_sub, a, {simd_cast, c}
4281a = 8, 9
4282b = 6, 7, 8, 9
4283validate 0, 0
4284
4285aarch64 = ssubw
4286generate int64x2_t:int32x4_t:int64x2_t
4287
4288/// Unsigned Subtract Wide
4289name = vsubw_high
4290no-q
4291multi_fn = simd_shuffle8!, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4292multi_fn = simd_sub, a, {simd_cast, c}
4293a = 8, 9, 10, 11, 12, 13, 14, 15
4294b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4295validate 0, 0, 0, 0, 0, 0, 0, 0
4296
4297aarch64 = usubw
4298generate uint16x8_t:uint8x16_t:uint16x8_t
4299
4300/// Unsigned Subtract Wide
4301name = vsubw_high
4302no-q
4303multi_fn = simd_shuffle4!, c:uint16x4_t, b, b, [4, 5, 6, 7]
4304multi_fn = simd_sub, a, {simd_cast, c}
4305a = 8, 9, 10, 11
4306b = 0, 1, 2, 3, 8, 9, 10, 11
4307validate 0, 0, 0, 0
4308
4309aarch64 = usubw
4310generate uint32x4_t:uint16x8_t:uint32x4_t
4311
4312/// Unsigned Subtract Wide
4313name = vsubw_high
4314no-q
4315multi_fn = simd_shuffle2!, c:uint32x2_t, b, b, [2, 3]
4316multi_fn = simd_sub, a, {simd_cast, c}
4317a = 8, 9
4318b = 6, 7, 8, 9
4319validate 0, 0
4320
4321aarch64 = usubw
4322generate uint64x2_t:uint32x4_t:uint64x2_t
4323
4324/// Signed Subtract Long
4325name = vsubl
4326no-q
4327multi_fn = simd_cast, c:out_t, a
4328multi_fn = simd_cast, d:out_t, b
4329multi_fn = simd_sub, c, d
4330
4331a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4332b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4333validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4334
4335arm = vsubl
4336aarch64 = ssubl
4337generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
4338
4339/// Unsigned Subtract Long
4340name = vsubl
4341no-q
4342multi_fn = simd_cast, c:out_t, a
4343multi_fn = simd_cast, d:out_t, b
4344multi_fn = simd_sub, c, d
4345
4346a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4347b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4348validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4349
4350arm = vsubl
4351aarch64 = usubl
4352generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
4353
4354/// Signed Subtract Long
4355name = vsubl_high
4356no-q
4357multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
4358multi_fn = simd_cast, d:out_t, c
4359multi_fn = simd_shuffle8!, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4360multi_fn = simd_cast, f:out_t, e
4361multi_fn = simd_sub, d, f
4362
4363a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4364b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4365validate 6, 7, 8, 9, 10, 11, 12, 13
4366
4367aarch64 = ssubl
4368generate int8x16_t:int8x16_t:int16x8_t
4369
4370/// Signed Subtract Long
4371name = vsubl_high
4372no-q
4373multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7]
4374multi_fn = simd_cast, d:out_t, c
4375multi_fn = simd_shuffle4!, e:int16x4_t, b, b, [4, 5, 6, 7]
4376multi_fn = simd_cast, f:out_t, e
4377multi_fn = simd_sub, d, f
4378
4379a = 8, 9, 10, 11, 12, 13, 14, 15
4380b = 6, 6, 6, 6, 8, 8, 8, 8
4381validate 4, 5, 6, 7
4382
4383aarch64 = ssubl
4384generate int16x8_t:int16x8_t:int32x4_t
4385
4386/// Signed Subtract Long
4387name = vsubl_high
4388no-q
4389multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3]
4390multi_fn = simd_cast, d:out_t, c
4391multi_fn = simd_shuffle2!, e:int32x2_t, b, b, [2, 3]
4392multi_fn = simd_cast, f:out_t, e
4393multi_fn = simd_sub, d, f
4394
4395a = 12, 13, 14, 15
4396b = 6, 6, 8, 8
4397validate 6, 7
4398
4399aarch64 = ssubl
4400generate int32x4_t:int32x4_t:int64x2_t
4401
4402/// Unsigned Subtract Long
4403name = vsubl_high
4404no-q
4405multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
4406multi_fn = simd_cast, d:out_t, c
4407multi_fn = simd_shuffle8!, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4408multi_fn = simd_cast, f:out_t, e
4409multi_fn = simd_sub, d, f
4410
4411a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4412b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4413validate 6, 7, 8, 9, 10, 11, 12, 13
4414
4415aarch64 = usubl
4416generate uint8x16_t:uint8x16_t:uint16x8_t
4417
4418/// Unsigned Subtract Long
4419name = vsubl_high
4420no-q
4421multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7]
4422multi_fn = simd_cast, d:out_t, c
4423multi_fn = simd_shuffle4!, e:uint16x4_t, b, b, [4, 5, 6, 7]
4424multi_fn = simd_cast, f:out_t, e
4425multi_fn = simd_sub, d, f
4426
4427a = 8, 9, 10, 11, 12, 13, 14, 15
4428b = 6, 6, 6, 6, 8, 8, 8, 8
4429validate 4, 5, 6, 7
4430
4431aarch64 = usubl
4432generate uint16x8_t:uint16x8_t:uint32x4_t
4433
4434/// Unsigned Subtract Long
4435name = vsubl_high
4436no-q
4437multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3]
4438multi_fn = simd_cast, d:out_t, c
4439multi_fn = simd_shuffle2!, e:uint32x2_t, b, b, [2, 3]
4440multi_fn = simd_cast, f:out_t, e
4441multi_fn = simd_sub, d, f
4442
4443a = 12, 13, 14, 15
4444b = 6, 6, 8, 8
4445validate 6, 7
4446
4447aarch64 = usubl
4448generate uint32x4_t:uint32x4_t:uint64x2_t
4449
3c0e092e
XL
4450/// Bit clear and exclusive OR
4451name = vbcax
4452a = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
4453b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4454c = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4455validate 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14
4456target = sha3
4457
4458aarch64 = bcax
4459link-aarch64 = llvm.aarch64.crypto.bcaxs._EXT_
4460generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
4461link-aarch64 = llvm.aarch64.crypto.bcaxu._EXT_
4462generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
4463
4464/// Floating-point complex add
4465name = vcadd_rot270
4466no-q
4467a = 1., -1., 1., -1.
4468b = -1., 1., -1., 1.
4469validate 2., 0., 2., 0.
4470target = fcma
4471
4472aarch64 = fcadd
4473link-aarch64 = vcadd.rot270._EXT_
4474generate float32x2_t
4475name = vcaddq_rot270
4476generate float32x4_t, float64x2_t
4477
4478/// Floating-point complex add
4479name = vcadd_rot90
4480no-q
4481a = 1., -1., 1., -1.
4482b = -1., 1., -1., 1.
4483validate 0., -2., 0., -2.
4484target = fcma
4485
4486aarch64 = fcadd
4487link-aarch64 = vcadd.rot90._EXT_
4488generate float32x2_t
4489name = vcaddq_rot90
4490generate float32x4_t, float64x2_t
4491
4492/// Floating-point complex multiply accumulate
4493name = vcmla
4494a = 1., -1., 1., -1.
4495b = -1., 1., -1., 1.
4496c = 1., 1., -1., -1.
4497validate 0., -2., 2., 0.
4498target = fcma
4499
4500aarch64 = fcmla
4501link-aarch64 = vcmla.rot0._EXT_
4502generate float32x2_t, float32x4_t, float64x2_t
4503
4504/// Floating-point complex multiply accumulate
4505name = vcmla_rot90
4506rot-suffix
4507a = 1., 1., 1., 1.
4508b = 1., -1., 1., -1.
4509c = 1., 1., 1., 1.
4510validate 2., 0., 2., 0.
4511target = fcma
4512
4513aarch64 = fcmla
4514link-aarch64 = vcmla.rot90._EXT_
4515generate float32x2_t, float32x4_t, float64x2_t
4516
4517/// Floating-point complex multiply accumulate
4518name = vcmla_rot180
4519rot-suffix
4520a = 1., 1., 1., 1.
4521b = 1., -1., 1., -1.
4522c = 1., 1., 1., 1.
4523validate 0., 0., 0., 0.
4524target = fcma
4525
4526aarch64 = fcmla
4527link-aarch64 = vcmla.rot180._EXT_
4528generate float32x2_t, float32x4_t, float64x2_t
4529
4530/// Floating-point complex multiply accumulate
4531name = vcmla_rot270
4532rot-suffix
4533a = 1., 1., 1., 1.
4534b = 1., -1., 1., -1.
4535c = 1., 1., 1., 1.
4536validate 0., 2., 0., 2.
4537target = fcma
4538
4539aarch64 = fcmla
4540link-aarch64 = vcmla.rot270._EXT_
4541generate float32x2_t, float32x4_t, float64x2_t
4542
4543/// Floating-point complex multiply accumulate
4544name = vcmla
4545in2-lane-suffixes
4546constn = LANE
4547multi_fn = static_assert_imm-in2_rot-LANE
4548multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
4549multi_fn = vcmla-self-noext, a, b, c
4550a = 1., -1., 1., -1.
4551b = -1., 1., -1., 1.
4552c = 1., 1., -1., -1.
4553n = 0
4554validate 0., -2., 0., -2.
4555target = fcma
4556
4557aarch64 = fcmla
4558generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4559generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4560
4561/// Floating-point complex multiply accumulate
4562name = vcmla_rot90
4563rot-lane-suffixes
4564constn = LANE
4565multi_fn = static_assert_imm-in2_rot-LANE
4566multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
4567multi_fn = vcmla_rot90-rot-noext, a, b, c
4568a = 1., -1., 1., -1.
4569b = -1., 1., -1., 1.
4570c = 1., 1., -1., -1.
4571n = 0
4572validate 0., 0., 0., 0.
4573target = fcma
4574
4575aarch64 = fcmla
4576generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4577generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4578
4579/// Floating-point complex multiply accumulate
4580name = vcmla_rot180
4581rot-lane-suffixes
4582constn = LANE
4583multi_fn = static_assert_imm-in2_rot-LANE
4584multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
4585multi_fn = vcmla_rot180-rot-noext, a, b, c
4586a = 1., -1., 1., -1.
4587b = -1., 1., -1., 1.
4588c = 1., 1., -1., -1.
4589n = 0
4590validate 2., 0., 2., 0.
4591target = fcma
4592
4593aarch64 = fcmla
4594generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4595generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4596
4597/// Floating-point complex multiply accumulate
4598name = vcmla_rot270
4599rot-lane-suffixes
4600constn = LANE
4601multi_fn = static_assert_imm-in2_rot-LANE
4602multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {base-2-LANE}
4603multi_fn = vcmla_rot270-rot-noext, a, b, c
4604a = 1., -1., 1., -1.
4605b = -1., 1., -1., 1.
4606c = 1., 1., -1., -1.
4607n = 0
4608validate 2., -2., 2., -2.
4609target = fcma
4610
4611aarch64 = fcmla
4612generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4613generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4614
4615/// Dot product arithmetic
4616name = vdot
4617out-suffix
4618a = 1, 2, 1, 2
4619b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4620c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4621validate 31, 176, 31, 176
4622target = dotprod
4623
4624aarch64 = sdot
4625link-aarch64 = sdot._EXT_._EXT3_
4626generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
4627
4628aarch64 = udot
4629link-aarch64 = udot._EXT_._EXT3_
4630generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
4631
4632/// Dot product arithmetic
4633name = vdot
4634out-lane-suffixes
4635constn = LANE
4636multi_fn = static_assert_imm-in2_dot-LANE
4637multi_fn = simd_shuffle-in_len-!, c:in_t, c, c, {base-4-LANE}
4638multi_fn = vdot-out-noext, a, b, c
4639a = 1, 2, 1, 2
4640b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4641c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4642n = 0
4643validate 31, 72, 31, 72
4644target = dotprod
4645
4646aarch64 = sdot
4647generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x2_t:int8x8_t:int8x16_t:int32x2_t
4648generate int32x4_t:int8x16_t:int8x8_t:int32x4_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
4649
4650aarch64 = udot
4651generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x2_t:uint8x8_t:uint8x16_t:uint32x2_t
4652generate uint32x4_t:uint8x16_t:uint8x8_t:uint32x4_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
4653
17df50a5
XL
4654/// Maximum (vector)
4655name = vmax
4656a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4657b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4658validate 16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16
4659
4660arm = vmax
4661aarch64 = smax
4662link-arm = vmaxs._EXT_
4663link-aarch64 = smax._EXT_
4664generate int*_t
4665
4666arm = vmax
4667aarch64 = umax
4668link-arm = vmaxu._EXT_
4669link-aarch64 = umax._EXT_
4670generate uint*_t
4671
4672/// Maximum (vector)
4673name = vmax
4674a = 1.0, -2.0, 3.0, -4.0
4675b = 0.0, 3.0, 2.0, 8.0
4676validate 1.0, 3.0, 3.0, 8.0
4677
4678aarch64 = fmax
4679link-aarch64 = fmax._EXT_
4680generate float64x*_t
4681
4682arm = vmax
4683aarch64 = fmax
4684link-arm = vmaxs._EXT_
4685link-aarch64 = fmax._EXT_
4686generate float*_t
4687
a2a8927a 4688/// Floating-point Maximum Number (vector)
17df50a5
XL
4689name = vmaxnm
4690a = 1.0, 2.0, 3.0, -4.0
4691b = 8.0, 16.0, -1.0, 6.0
4692validate 8.0, 16.0, 3.0, 6.0
4693
4694aarch64 = fmaxnm
4695link-aarch64 = fmaxnm._EXT_
4696generate float64x*_t
4697
4698target = fp-armv8
4699arm = vmaxnm
4700aarch64 = fmaxnm
4701link-arm = vmaxnm._EXT_
4702link-aarch64 = fmaxnm._EXT_
4703generate float*_t
4704
3c0e092e
XL
4705/// Floating-point maximum number across vector
4706name = vmaxnmv
4707a = 1., 2., 0., 1.
4708validate 2.
4709
4710aarch64 = fmaxnmp
4711link-aarch64 = fmaxnmv._EXT2_._EXT_
4712generate float32x2_t:f32, float64x2_t:f64
4713aarch64 = fmaxnmv
4714generate float32x4_t:f32
4715
17df50a5
XL
4716/// Floating-point Maximum Number Pairwise (vector).
4717name = vpmaxnm
4718a = 1.0, 2.0
4719b = 6.0, -3.0
4720validate 2.0, 6.0
4721aarch64 = fmaxnmp
4722link-aarch64 = fmaxnmp._EXT_
4723generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
4724
4725/// Floating-point Maximum Number Pairwise (vector).
4726name = vpmaxnm
4727a = 1.0, 2.0, 3.0, -4.0
4728b = 8.0, 16.0, -1.0, 6.0
4729validate 2.0, 3.0, 16.0, 6.0
4730aarch64 = fmaxnmp
4731link-aarch64 = fmaxnmp._EXT_
4732generate float32x4_t:float32x4_t:float32x4_t
4733
3c0e092e
XL
4734/// Floating-point maximum number pairwise
4735name = vpmaxnm
4736out-suffix
4737a = 1., 2.
4738validate 2.
4739
4740aarch64 = fmaxnmp
4741link-aarch64 = fmaxnmv._EXT2_._EXT_
4742generate float32x2_t:f32
4743name = vpmaxnmq
4744generate float64x2_t:f64
4745
4746/// Floating-point maximum pairwise
4747name = vpmax
4748out-suffix
4749a = 1., 2.
4750validate 2.
4751
4752aarch64 = fmaxp
4753link-aarch64 = fmaxv._EXT2_._EXT_
4754generate float32x2_t:f32
4755name = vpmaxq
4756generate float64x2_t:f64
4757
17df50a5
XL
4758/// Minimum (vector)
4759name = vmin
4760a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4761b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4762validate 1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1
4763
4764arm = vmin
4765aarch64 = smin
4766link-arm = vmins._EXT_
4767link-aarch64 = smin._EXT_
4768generate int*_t
4769
4770arm = vmin
4771aarch64 = umin
4772link-arm = vminu._EXT_
4773link-aarch64 = umin._EXT_
4774generate uint*_t
4775
4776/// Minimum (vector)
4777name = vmin
4778a = 1.0, -2.0, 3.0, -4.0
4779b = 0.0, 3.0, 2.0, 8.0
4780validate 0.0, -2.0, 2.0, -4.0
4781
4782aarch64 = fmin
4783link-aarch64 = fmin._EXT_
4784generate float64x*_t
4785
4786arm = vmin
4787aarch64 = fmin
fc512014
XL
4788link-arm = vmins._EXT_
4789link-aarch64 = fmin._EXT_
4790generate float*_t
17df50a5 4791
a2a8927a 4792/// Floating-point Minimum Number (vector)
17df50a5
XL
4793name = vminnm
4794a = 1.0, 2.0, 3.0, -4.0
4795b = 8.0, 16.0, -1.0, 6.0
4796validate 1.0, 2.0, -1.0, -4.0
4797
4798aarch64 = fminnm
4799link-aarch64 = fminnm._EXT_
4800generate float64x*_t
4801
4802target = fp-armv8
4803arm = vminnm
4804aarch64 = fminnm
4805link-arm = vminnm._EXT_
4806link-aarch64 = fminnm._EXT_
4807generate float*_t
4808
3c0e092e
XL
4809/// Floating-point minimum number across vector
4810name = vminnmv
4811a = 1., 0., 2., 3.
4812validate 0.
4813
4814aarch64 = fminnmp
4815link-aarch64 = fminnmv._EXT2_._EXT_
4816generate float32x2_t:f32, float64x2_t:f64
4817aarch64 = fminnmv
4818generate float32x4_t:f32
4819
4820/// Vector move
4821name = vmovl_high
4822no-q
4823multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-halflen-halflen}
4824multi_fn = vmovl-noqself-noext, a
4825a = 1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10
4826validate 3, 4, 5, 6, 7, 8, 9, 10
4827
4828aarch64 = sxtl2
4829generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
4830
4831aarch64 = uxtl2
4832generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
4833
4834/// Floating-point add pairwise
4835name = vpadd
4836a = 1., 2., 3., 4.
4837b = 3., 4., 5., 6.
4838validate 3., 7., 7., 11.
4839
4840aarch64 = faddp
4841link-aarch64 = faddp._EXT_
4842generate float32x4_t, float64x2_t
4843
4844arm = vpadd
4845link-arm = vpadd._EXT_
4846generate float32x2_t
4847
4848/// Floating-point add pairwise
4849name = vpadd
4850out-suffix
4851multi_fn = simd_extract, a1:out_t, a, 0
4852multi_fn = simd_extract, a2:out_t, a, 1
4853multi_fn = a1 + a2
4854a = 1., 2.
4855validate 3.
4856
4857aarch64 = nop
4858generate float32x2_t:f32, float64x2_t:f64
4859
17df50a5
XL
4860/// Floating-point Minimum Number Pairwise (vector).
4861name = vpminnm
4862a = 1.0, 2.0
4863b = 6.0, -3.0
4864validate 1.0, -3.0
3c0e092e 4865
17df50a5
XL
4866aarch64 = fminnmp
4867link-aarch64 = fminnmp._EXT_
4868generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
4869
4870/// Floating-point Minimum Number Pairwise (vector).
4871name = vpminnm
4872a = 1.0, 2.0, 3.0, -4.0
4873b = 8.0, 16.0, -1.0, 6.0
4874validate 1.0, -4.0, 8.0, -1.0
4875aarch64 = fminnmp
4876link-aarch64 = fminnmp._EXT_
4877generate float32x4_t:float32x4_t:float32x4_t
4878
3c0e092e
XL
4879/// Floating-point minimum number pairwise
4880name = vpminnm
4881out-suffix
4882a = 1., 2.
4883validate 1.
4884
4885aarch64 = fminnmp
4886link-aarch64 = fminnmv._EXT2_._EXT_
4887generate float32x2_t:f32
4888name = vpminnmq
4889generate float64x2_t:f64
4890
4891/// Floating-point minimum pairwise
4892name = vpmin
4893out-suffix
4894a = 1., 2.
4895validate 1.
4896
4897aarch64 = fminp
4898link-aarch64 = fminv._EXT2_._EXT_
4899generate float32x2_t:f32
4900name = vpminq
4901generate float64x2_t:f64
4902
17df50a5
XL
4903/// Signed saturating doubling multiply long
4904name = vqdmull
4905a = 0, 1, 2, 3, 4, 5, 6, 7
4906b = 1, 2, 3, 4, 5, 6, 7, 8
4907validate 0, 4, 12, 24, 40, 60, 84, 108
4908
4909aarch64 = sqdmull
4910link-aarch64 = sqdmull._EXT2_
4911arm = vqdmull
4912link-arm = vqdmull._EXT2_
4913generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
4914
4915/// Signed saturating doubling multiply long
4916name = vqdmull
4917multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
4918multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
4919multi_fn = simd_extract, {vqdmull-in_ntt-noext, a, b}, 0
4920a = 2
4921b = 3
4922validate 12
4923
4924aarch64 = sqdmull
4925generate i16:i16:i32
4926
4927/// Signed saturating doubling multiply long
4928name = vqdmull
4929a = 2
4930b = 3
4931validate 12
4932
4933aarch64 = sqdmull
4934link-aarch64 = sqdmulls.scalar
4935generate i32:i32:i64
4936
4937/// Vector saturating doubling long multiply with scalar
4938name = vqdmull_n
4939no-q
4940multi_fn = vqdmull-in_ntt-noext, a, {vdup_n-in_ntt-noext, b}
4941a = 2, 4, 6, 8
4942b = 2
4943validate 8, 16, 24, 32
4944
4945aarch64 = sqdmull
4946arm = vqdmull
4947generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
4948
4949/// Signed saturating doubling multiply long
4950name = vqdmull_high
4951no-q
4952multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-halflen-halflen}
4953multi_fn = simd_shuffle-out_len-!, b:half, b, b, {asc-halflen-halflen}
4954multi_fn = vqdmull-noqself-noext, a, b
4955a = 0, 1, 4, 5, 4, 5, 6, 7
4956b = 1, 2, 5, 6, 5, 6, 7, 8
4957validate 40, 60, 84, 112
4958
4959aarch64 = sqdmull2
4960generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
4961
4962/// Signed saturating doubling multiply long
4963name = vqdmull_high_n
4964no-q
4965multi_fn = simd_shuffle-out_len-!, a:in_ntt, a, a, {asc-out_len-out_len}
4966multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
4967multi_fn = vqdmull-in_ntt-noext, a, b
4968a = 0, 2, 8, 10, 8, 10, 12, 14
4969b = 2
4970validate 32, 40, 48, 56
4971
4972aarch64 = sqdmull2
4973generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
4974
4975/// Vector saturating doubling long multiply by scalar
4976name = vqdmull_lane
4977constn = N
4978multi_fn = static_assert_imm-in_exp_len-N
4979multi_fn = simd_shuffle-out_len-!, b:in_t0, b, b, {dup-out_len-N as u32}
4980multi_fn = vqdmull-noqself-noext, a, b
4981a = 1, 2, 3, 4
4982b = 0, 2, 2, 0, 2, 0, 0, 0
4983n = HFLEN
4984validate 4, 8, 12, 16
4985
4986aarch64 = sqdmull
4987generate int16x4_t:int16x8_t:int32x4_t, int32x2_t:int32x4_t:int64x2_t
4988
4989arm = vqdmull
4990generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
4991
4992/// Signed saturating doubling multiply long
4993name = vqdmullh_lane
4994constn = N
4995multi_fn = static_assert_imm-in_exp_len-N
4996multi_fn = simd_extract, b:in_t0, b, N as u32
4997multi_fn = vqdmullh-noqself-noext, a, b
4998a = 2
4999b = 0, 2, 2, 0, 2, 0, 0, 0
5000n = HFLEN
5001validate 8
5002
5003aarch64 = sqdmull
5004generate i16:int16x4_t:i32, i16:int16x8_t:i32
5005
5006/// Signed saturating doubling multiply long
5007name = vqdmulls_lane
5008constn = N
5009multi_fn = static_assert_imm-in_exp_len-N
5010multi_fn = simd_extract, b:in_t0, b, N as u32
5011multi_fn = vqdmulls-noqself-noext, a, b
5012a = 2
5013b = 0, 2, 2, 0, 2, 0, 0, 0
5014n = HFLEN
5015validate 8
5016
5017aarch64 = sqdmull
5018generate i32:int32x2_t:i64, i32:int32x4_t:i64
5019
5020/// Signed saturating doubling multiply long
5021name = vqdmull_high_lane
5022constn = N
5023multi_fn = static_assert_imm-in_exp_len-N
5024multi_fn = simd_shuffle-out_len-!, a:in_t, a, a, {asc-out_len-out_len}
5025multi_fn = simd_shuffle-out_len-!, b:in_t, b, b, {dup-out_len-N as u32}
5026multi_fn = vqdmull-self-noext, a, b
5027a = 0, 1, 4, 5, 4, 5, 6, 7
5028b = 0, 2, 2, 0, 2, 0, 0, 0
5029n = HFLEN
5030validate 16, 20, 24, 28
5031
5032aarch64 = sqdmull2
5033generate int16x8_t:int16x4_t:int32x4_t, int32x4_t:int32x2_t:int64x2_t
5034
5035/// Signed saturating doubling multiply long
5036name = vqdmull_high_lane
5037constn = N
5038multi_fn = static_assert_imm-in_exp_len-N
5039multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-out_len-out_len}
5040multi_fn = simd_shuffle-out_len-!, b:half, b, b, {dup-out_len-N as u32}
5041multi_fn = vqdmull-noqself-noext, a, b
5042a = 0, 1, 4, 5, 4, 5, 6, 7
5043b = 0, 2, 2, 0, 2, 0, 0, 0
5044n = HFLEN
5045validate 16, 20, 24, 28
5046
5047aarch64 = sqdmull2
5048generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
5049
5050/// Signed saturating doubling multiply-add long
5051name = vqdmlal
5052multi_fn = vqadd-out-noext, a, {vqdmull-self-noext, b, c}
5053a = 1, 1, 1, 1
5054b = 1, 2, 3, 4
5055c = 2, 2, 2, 2
5056validate 5, 9, 13, 17
5057
5058aarch64 = sqdmlal
5059arm = vqdmlal
5060generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5061
5062/// Vector widening saturating doubling multiply accumulate with scalar
5063name = vqdmlal
5064n-suffix
5065multi_fn = vqadd-out-noext, a, {vqdmull_n-self-noext, b, c}
5066a = 1, 1, 1, 1
5067b = 1, 2, 3, 4
5068c = 2
5069validate 5, 9, 13, 17
5070
5071aarch64 = sqdmlal
5072arm = vqdmlal
5073generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
5074
5075/// Signed saturating doubling multiply-add long
5076name = vqdmlal_high
5077no-q
5078multi_fn = vqadd-out-noext, a, {vqdmull_high-noqself-noext, b, c}
5079a = 1, 2, 3, 4
5080b = 0, 1, 4, 5, 4, 5, 6, 7
5081c = 1, 2, 5, 6, 5, 6, 7, 8
5082validate 41, 62, 87, 116
5083
5084aarch64 = sqdmlal2
5085generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5086
5087/// Signed saturating doubling multiply-add long
5088name = vqdmlal_high_n
5089no-q
5090multi_fn = vqadd-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
5091a = 1, 2, 3, 4
5092b = 0, 2, 8, 10, 8, 10, 12, 14
5093c = 2
5094validate 33, 42, 51, 60
5095
5096aarch64 = sqdmlal2
5097generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
5098
5099/// Vector widening saturating doubling multiply accumulate with scalar
5100name = vqdmlal_lane
5101in2-suffix
5102constn = N
5103multi_fn = static_assert_imm-in2_exp_len-N
5104multi_fn = vqadd-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
5105a = 1, 2, 3, 4
5106b = 1, 2, 3, 4
5107c = 0, 2, 2, 0, 2, 0, 0, 0
5108n = HFLEN
5109validate 5, 10, 15, 20
5110
5111aarch64 = sqdmlal
5112generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
5113
5114arm = vqdmlal
5115generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5116
5117/// Signed saturating doubling multiply-add long
5118name = vqdmlal_high_lane
5119in2-suffix
5120constn = N
5121multi_fn = static_assert_imm-in2_exp_len-N
5122multi_fn = vqadd-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
5123a = 1, 2, 3, 4
5124b = 0, 1, 4, 5, 4, 5, 6, 7
5125c = 0, 2, 0, 0, 0, 0, 0, 0
5126n = 1
5127validate 17, 22, 27, 32
5128
5129aarch64 = sqdmlal2
5130generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5131
3c0e092e
XL
5132/// Signed saturating doubling multiply-add long
5133name = vqdmlal
5134multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
5135multi_fn = vqadd-out-noext, a, {simd_extract, x, 0}
5136a = 1
5137b = 1
5138c = 2
5139validate 5
5140
5141aarch64 = sqdmull
5142generate i32:i16:i16:i32, i64:i32:i32:i64
5143
5144/// Signed saturating doubling multiply-add long
5145name = vqdmlalh_lane
5146in2-suffix
5147constn = LANE
5148multi_fn = static_assert_imm-in2_exp_len-LANE
5149multi_fn = vqdmlal-self-noext, a, b, {simd_extract, c, LANE as u32}
5150a = 1
5151b = 1
5152c = 2, 1, 1, 1, 1, 1, 1, 1
5153n = 0
5154validate 5
5155
5156aarch64 = sqdmlal
5157generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
5158name = vqdmlals_lane
5159aarch64 = sqdmull
5160generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
5161
17df50a5
XL
5162/// Signed saturating doubling multiply-subtract long
5163name = vqdmlsl
5164multi_fn = vqsub-out-noext, a, {vqdmull-self-noext, b, c}
5165a = 3, 7, 11, 15
5166b = 1, 2, 3, 4
5167c = 2, 2, 2, 2
5168validate -1, -1, -1, -1
5169
5170aarch64 = sqdmlsl
5171arm = vqdmlsl
5172generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5173
5174/// Vector widening saturating doubling multiply subtract with scalar
5175name = vqdmlsl
5176n-suffix
5177multi_fn = vqsub-out-noext, a, {vqdmull_n-self-noext, b, c}
5178a = 3, 7, 11, 15
5179b = 1, 2, 3, 4
5180c = 2
5181validate -1, -1, -1, -1
5182
5183aarch64 = sqdmlsl
5184arm = vqdmlsl
5185generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
5186
5187/// Signed saturating doubling multiply-subtract long
5188name = vqdmlsl_high
5189no-q
5190multi_fn = vqsub-out-noext, a, {vqdmull_high-noqself-noext, b, c}
5191a = 39, 58, 81, 108
5192b = 0, 1, 4, 5, 4, 5, 6, 7
5193c = 1, 2, 5, 6, 5, 6, 7, 8
5194validate -1, -2, -3, -4
5195
5196aarch64 = sqdmlsl2
5197generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5198
5199/// Signed saturating doubling multiply-subtract long
5200name = vqdmlsl_high_n
5201no-q
5202multi_fn = vqsub-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
5203a = 31, 38, 45, 52
5204b = 0, 2, 8, 10, 8, 10, 12, 14
5205c = 2
5206validate -1, -2, -3, -4
5207
5208aarch64 = sqdmlsl2
5209generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
5210
5211/// Vector widening saturating doubling multiply subtract with scalar
5212name = vqdmlsl_lane
5213in2-suffix
5214constn = N
5215multi_fn = static_assert_imm-in2_exp_len-N
5216multi_fn = vqsub-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
5217a = 3, 6, 9, 12
5218b = 1, 2, 3, 4
5219c = 0, 2, 2, 0, 2, 0, 0, 0
5220n = HFLEN
5221validate -1, -2, -3, -4
5222
5223aarch64 = sqdmlsl
5224generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
5225
5226arm = vqdmlsl
5227generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5228
5229/// Signed saturating doubling multiply-subtract long
5230name = vqdmlsl_high_lane
5231in2-suffix
5232constn = N
5233multi_fn = static_assert_imm-in2_exp_len-N
5234multi_fn = vqsub-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
5235a = 15, 18, 21, 24
5236b = 0, 1, 4, 5, 4, 5, 6, 7
5237c = 0, 2, 0, 0, 0, 0, 0, 0
5238n = 1
5239validate -1, -2, -3, -4
5240
5241aarch64 = sqdmlsl2
5242generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5243
3c0e092e
XL
5244/// Signed saturating doubling multiply-subtract long
5245name = vqdmlsl
5246multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
5247multi_fn = vqsub-out-noext, a, {simd_extract, x, 0}
5248a = 10
5249b = 1
5250c = 2
5251validate 6
5252
5253aarch64 = sqdmull
5254generate i32:i16:i16:i32, i64:i32:i32:i64
5255
5256/// Signed saturating doubling multiply-subtract long
5257name = vqdmlslh_lane
5258in2-suffix
5259constn = LANE
5260multi_fn = static_assert_imm-in2_exp_len-LANE
5261multi_fn = vqdmlsl-self-noext, a, b, {simd_extract, c, LANE as u32}
5262a = 10
5263b = 1
5264c = 2, 1, 1, 1, 1, 1, 1, 1
5265n = 0
5266validate 6
5267
5268aarch64 = sqdmlsl
5269generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
5270name = vqdmlsls_lane
5271aarch64 = sqdmull
5272generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
5273
17df50a5
XL
5274/// Signed saturating doubling multiply returning high half
5275name = vqdmulh
5276a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5277b = 2, 2, 2, 2, 2, 2, 2, 2
5278validate 1, 1, 1, 1, 1, 1, 1, 1
5279
5280aarch64 = sqdmulh
5281link-aarch64 = sqdmulh._EXT_
5282arm = vqdmulh
5283link-arm = vqdmulh._EXT_
5284generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5285
5286/// Signed saturating doubling multiply returning high half
5287name = vqdmulh
5288multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5289multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5290multi_fn = simd_extract, {vqdmulh-in_ntt-noext, a, b}, 0
5291a = 1
5292b = 2
5293validate 0
5294
5295aarch64 = sqdmulh
5296generate i16, i32
5297
5298/// Vector saturating doubling multiply high with scalar
5299name = vqdmulh_n
5300out-suffix
5301multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5302multi_fn = vqdmulh-out-noext, a, b
5303a = MAX, MAX, MAX, MAX
5304b = 2
5305validate 1, 1, 1, 1
5306
5307aarch64 = sqdmulh
5308arm = vqdmulh
5309generate int16x4_t:i16:int16x4_t, int32x2_t:i32:int32x2_t
5310
5311/// Vector saturating doubling multiply high with scalar
5312name = vqdmulhq_n
c295e0f8 5313no-q
17df50a5
XL
5314multi_fn = vdupq_n-in_ntt-noext, b:out_t, b
5315multi_fn = vqdmulh-out-noext, a, b
5316a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5317b = 2
5318validate 1, 1, 1, 1, 1, 1, 1, 1
5319
5320aarch64 = sqdmulh
5321arm = vqdmulh
5322generate int16x8_t:i16:int16x8_t, int32x4_t:i32:int32x4_t
5323
5324/// Signed saturating doubling multiply returning high half
5325name = vqdmulhh_lane
5326constn = N
5327multi_fn = static_assert_imm-in_exp_len-N
5328multi_fn = simd_extract, b:in_t0, b, N as u32
5329multi_fn = vqdmulhh-out_ntt-noext, a, b
5330a = 2
5331b = 0, 0, MAX, 0, 0, 0, 0, 0
5332n = 2
5333validate 1
5334
5335aarch64 = sqdmulh
5336generate i16:int16x4_t:i16, i16:int16x8_t:i16
5337
5338/// Signed saturating doubling multiply returning high half
5339name = vqdmulhs_lane
5340constn = N
5341multi_fn = static_assert_imm-in_exp_len-N
5342multi_fn = simd_extract, b:in_t0, b, N as u32
5343multi_fn = vqdmulhs-out_ntt-noext, a, b
5344a = 2
5345b = 0, MAX, 0, 0
5346n = 1
5347validate 1
5348
5349aarch64 = sqdmulh
5350generate i32:int32x2_t:i32, i32:int32x4_t:i32
5351
3c0e092e
XL
5352/// Vector saturating doubling multiply high by scalar
5353name = vqdmulh
5354lane-suffixes
5355constn = LANE
5356multi_fn = static_assert_imm-in2_exp_len-LANE
5357multi_fn = vqdmulh-out-noext, a, {vdup-nout-noext, {simd_extract, b, LANE as u32}}
5358a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5359b = 2, 1, 1, 1, 1, 1, 1, 1
5360n = 0
5361validate 1, 1, 1, 1, 1, 1, 1, 1
5362
5363aarch64 = sqdmulh
5364generate int16x4_t, int16x8_t:int16x4_t:int16x8_t
5365generate int32x2_t, int32x4_t:int32x2_t:int32x4_t
5366arm = vqdmulh
5367generate int16x8_t, int16x4_t:int16x8_t:int16x4_t
5368generate int32x4_t, int32x2_t:int32x4_t:int32x2_t
5369
17df50a5
XL
5370/// Signed saturating extract narrow
5371name = vqmovn
5372no-q
5373a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5374validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5375
5376aarch64 = sqxtn
5377link-aarch64 = sqxtn._EXT2_
5378arm = vqmovn
5379link-arm = vqmovns._EXT2_
5380generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
5381
5382/// Unsigned saturating extract narrow
5383name = vqmovn
5384no-q
5385a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5386validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5387
5388aarch64 = uqxtn
5389link-aarch64 = uqxtn._EXT2_
5390arm = vqmovn
5391link-arm = vqmovnu._EXT2_
5392generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
5393
5394/// Saturating extract narrow
5395name = vqmovn
5396multi_fn = simd_extract, {vqmovn-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0
5397a = 1
5398validate 1
5399
5400aarch64 = sqxtn
5401generate i16:i8, i32:i16
5402aarch64 = uqxtn
5403generate u16:u8, u32:u16
5404
5405/// Saturating extract narrow
5406name = vqmovn
5407a = 1
5408validate 1
5409
5410aarch64 = sqxtn
5411link-aarch64 = scalar.sqxtn._EXT2_._EXT_
5412generate i64:i32
5413
5414aarch64 = uqxtn
5415link-aarch64 = scalar.uqxtn._EXT2_._EXT_
5416generate u64:u32
5417
5418/// Signed saturating extract narrow
5419name = vqmovn_high
5420no-q
5421multi_fn = simd_shuffle-out_len-!, a, {vqmovn-noqself-noext, b}, {asc-0-out_len}
5422a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5423b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5424validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5425
5426aarch64 = sqxtn2
5427generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
5428aarch64 = uqxtn2
5429generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
5430
5431/// Signed saturating extract unsigned narrow
5432name = vqmovun
5433no-q
5434a = -1, -1, -1, -1, -1, -1, -1, -1
5435validate 0, 0, 0, 0, 0, 0, 0, 0
5436
5437aarch64 = sqxtun
5438link-aarch64 = sqxtun._EXT2_
5439arm = vqmovun
5440link-arm = vqmovnsu._EXT2_
5441generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
5442
5443/// Signed saturating extract unsigned narrow
5444name = vqmovun
5445multi_fn = simd_extract, {vqmovun-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0
5446a = 1
5447validate 1
5448
5449aarch64 = sqxtun
5450generate i16:u8, i32:u16, i64:u32
5451
5452/// Signed saturating extract unsigned narrow
5453name = vqmovun_high
5454no-q
5455multi_fn = simd_shuffle-out_len-!, a, {vqmovun-noqself-noext, b}, {asc-0-out_len}
5456a = 0, 0, 0, 0, 0, 0, 0, 0
5457b = -1, -1, -1, -1, -1, -1, -1, -1
5458validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5459
5460aarch64 = sqxtun2
5461generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
5462
5463/// Signed saturating rounding doubling multiply returning high half
5464name = vqrdmulh
5465a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5466b = 2, 2, 2, 2, 2, 2, 2, 2
5467validate 2, 2, 2, 2, 2, 2, 2, 2
5468
5469aarch64 = sqrdmulh
5470link-aarch64 = sqrdmulh._EXT_
5471arm = vqrdmulh
5472link-arm = vqrdmulh._EXT_
5473generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5474
5475/// Signed saturating rounding doubling multiply returning high half
5476name = vqrdmulh
5477multi_fn = simd_extract, {vqrdmulh-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
5478a = 1
5479b = 2
5480validate 0
5481
5482aarch64 = sqrdmulh
5483generate i16, i32
5484
5485/// Vector saturating rounding doubling multiply high with scalar
5486name = vqrdmulh
5487out-n-suffix
5488multi_fn = vqrdmulh-out-noext, a, {vdup-nout-noext, b}
5489a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5490b = 2
5491validate 2, 2, 2, 2, 2, 2, 2, 2
5492
5493aarch64 = sqrdmulh
5494arm = vqrdmulh
5495generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
5496
5497/// Vector rounding saturating doubling multiply high by scalar
5498name = vqrdmulh
5499lane-suffixes
5500constn = LANE
5501multi_fn = static_assert_imm-in_exp_len-LANE
5502multi_fn = simd_shuffle-out_len-!, b:out_t, b, b, {dup-out_len-LANE as u32}
5503multi_fn = vqrdmulh-out-noext, a, b
5504a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5505b = 0, 2, 0, 0, 0, 0, 0, 0,
5506n = 1
5507validate 2, 2, 2, 2, 2, 2, 2, 2
5508
5509aarch64 = sqrdmulh
5510arm = vqrdmulh
5511generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
5512generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
5513
5514/// Signed saturating rounding doubling multiply returning high half
5515name = vqrdmulh
5516lane-suffixes
5517constn = LANE
5518multi_fn = static_assert_imm-in_exp_len-LANE
5519multi_fn = vqrdmulh-out-noext, a, {simd_extract, b, LANE as u32}
5520a = 1
5521b = 0, 2, 0, 0, 0, 0, 0, 0,
5522n = 1
5523validate 0
5524
5525aarch64 = sqrdmulh
5526generate i16:int16x4_t:i16, i16:int16x8_t:i16, i32:int32x2_t:i32, i32:int32x4_t:i32
5527
5528/// Signed saturating rounding doubling multiply accumulate returning high half
5529name = vqrdmlah
17df50a5
XL
5530a = 1, 1, 1, 1, 1, 1, 1, 1
5531b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5532c = 2, 2, 2, 2, 2, 2, 2, 2
5533validate 3, 3, 3, 3, 3, 3, 3, 3
5534
3c0e092e 5535aarch64 = sqrdmlah
5e7ed085 5536link-aarch64 = sqrdmlah._EXT_
3c0e092e 5537target = rdm
17df50a5
XL
5538generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5539
5540/// Signed saturating rounding doubling multiply accumulate returning high half
5541name = vqrdmlah
3c0e092e
XL
5542multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5543multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5544multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c
5545multi_fn = simd_extract, {vqrdmlah-in_ntt-noext, a, b, c}, 0
17df50a5
XL
5546a = 1
5547b = 1
5548c = 2
5549validate 1
5550
3c0e092e
XL
5551aarch64 = sqrdmlah
5552target = rdm
17df50a5
XL
5553generate i16, i32
5554
5555/// Signed saturating rounding doubling multiply accumulate returning high half
5556name = vqrdmlah
5557in2-lane-suffixes
5558constn = LANE
5559multi_fn = static_assert_imm-in2_exp_len-LANE
5e7ed085
FG
5560multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {dup-out_len-LANE as u32}
5561multi_fn = vqrdmlah-out-noext, a, b, c
17df50a5
XL
5562a = 1, 1, 1, 1, 1, 1, 1, 1
5563b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5564c = 0, 2, 0, 0, 0, 0, 0, 0
5565n = 1
5566validate 3, 3, 3, 3, 3, 3, 3, 3
5567
3c0e092e
XL
5568aarch64 = sqrdmlah
5569target = rdm
17df50a5
XL
5570generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
5571generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
5572
5573/// Signed saturating rounding doubling multiply accumulate returning high half
5574name = vqrdmlah
5575in2-lane-suffixes
5576constn = LANE
5577multi_fn = static_assert_imm-in2_exp_len-LANE
3c0e092e 5578multi_fn = vqrdmlah-self-noext, a, b, {simd_extract, c, LANE as u32}
17df50a5
XL
5579a = 1
5580b = 1
5581c = 0, 2, 0, 0, 0, 0, 0, 0
5582n = 1
5583validate 1
5584
3c0e092e
XL
5585aarch64 = sqrdmlah
5586target = rdm
17df50a5
XL
5587generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32
5588
5589/// Signed saturating rounding doubling multiply subtract returning high half
5590name = vqrdmlsh
04454e1e 5591link-aarch64 = sqrdmlsh._EXT_
17df50a5
XL
5592a = 1, 1, 1, 1, 1, 1, 1, 1
5593b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5594c = 2, 2, 2, 2, 2, 2, 2, 2
5595validate -1, -1, -1, -1, -1, -1, -1, -1
5596
04454e1e
FG
5597aarch64 = sqrdmlsh
5598target = rdm
17df50a5
XL
5599generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5600
5601/// Signed saturating rounding doubling multiply subtract returning high half
5602name = vqrdmlsh
04454e1e
FG
5603multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5604multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5605multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c
5606multi_fn = simd_extract, {vqrdmlsh-in_ntt-noext, a, b, c}, 0
17df50a5
XL
5607a = 1
5608b = 1
5609c = 2
5610validate 1
5611
04454e1e
FG
5612aarch64 = sqrdmlsh
5613target = rdm
17df50a5
XL
5614generate i16, i32
5615
5616/// Signed saturating rounding doubling multiply subtract returning high half
5617name = vqrdmlsh
5618in2-lane-suffixes
5619constn = LANE
5620multi_fn = static_assert_imm-in2_exp_len-LANE
04454e1e
FG
5621multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {dup-out_len-LANE as u32}
5622multi_fn = vqrdmlsh-out-noext, a, b, c
17df50a5
XL
5623a = 1, 1, 1, 1, 1, 1, 1, 1
5624b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5625c = 0, 2, 0, 0, 0, 0, 0, 0
5626n = 1
5627validate -1, -1, -1, -1, -1, -1, -1, -1
5628
04454e1e
FG
5629aarch64 = sqrdmlsh
5630target = rdm
17df50a5
XL
5631generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
5632generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
5633
5634/// Signed saturating rounding doubling multiply subtract returning high half
5635name = vqrdmlsh
5636in2-lane-suffixes
5637constn = LANE
5638multi_fn = static_assert_imm-in2_exp_len-LANE
04454e1e 5639multi_fn = vqrdmlsh-self-noext, a, b, {simd_extract, c, LANE as u32}
17df50a5
XL
5640a = 1
5641b = 1
5642c = 0, 2, 0, 0, 0, 0, 0, 0
5643n = 1
5644validate 1
5645
04454e1e
FG
5646aarch64 = sqrdmlsh
5647target = rdm
17df50a5
XL
5648generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32
5649
5650/// Signed saturating rounding shift left
5651name = vqrshl
5652a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5653b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5654validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5655
5656aarch64 = sqrshl
5657link-aarch64 = sqrshl._EXT_
5658generate i32, i64
5659
5660arm = vqrshl
5661link-arm = vqrshifts._EXT_
5662generate int*_t, int64x*_t
5663
5664/// Signed saturating rounding shift left
5665name = vqrshl
5666multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5667multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5668multi_fn = simd_extract, {vqrshl-in_ntt-noext, a, b}, 0
5669a = 1
5670b = 2
5671validate 4
5672
5673aarch64 = sqrshl
5674generate i8, i16
5675
5676/// Unsigned signed saturating rounding shift left
5677name = vqrshl
5678out-suffix
5679a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5680b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5681validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5682
5683aarch64 = uqrshl
5684link-aarch64 = uqrshl._EXT_
5685generate u32:i32:u32, u64:i64:u64
5686
5687arm = vqrshl
5688link-arm = vqrshiftu._EXT_
5689generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
5690generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
5691
5692/// Unsigned signed saturating rounding shift left
5693name = vqrshl
5694out-suffix
5695multi_fn = vdup_n-out_ntt-noext, a:out_ntt, a
5696multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5697multi_fn = simd_extract, {vqrshl-out_ntt-noext, a, b}, 0
5698a = 1
5699b = 2
5700validate 4
5701
5702aarch64 = uqrshl
5703generate u8:i8:u8, u16:i16:u16
5704
5705/// Signed saturating rounded shift right narrow
5706name = vqrshrn
5707noq-n-suffix
5708constn = N
5709multi_fn = static_assert-N-1-halfbits
5710a = MIN, 4, 8, 12, 16, 20, 24, 28
5711n = 2
5712validate MIN, 1, 2, 3, 4, 5, 6, 7
5713
5714aarch64 = sqrshrn
5715link-aarch64 = sqrshrn._EXT2_
5716const-aarch64 = N
5717
5718arm = vqrshrn
5719link-arm = vqrshiftns._EXT2_
5720const-arm = -N as ttn
c295e0f8 5721arm-aarch64-separate
17df50a5
XL
5722generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
5723
5724/// Signed saturating rounded shift right narrow
5725name = vqrshrn
5726noq-n-suffix
5727constn = N
5728multi_fn = static_assert-N-1-halfbits
5729multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
5730multi_fn = simd_extract, {vqrshrn_n-in_ntt-::<N>, a}, 0
5731a = 4
5732n = 2
5733validate 1
5734
5735aarch64 = sqrshrn
5736generate i16:i8, i32:i16, i64:i32
5737
5738/// Signed saturating rounded shift right narrow
5739name = vqrshrn_high
5740noq-n-suffix
5741constn = N
5742multi_fn = static_assert-N-1-halfbits
5743multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
5744a = 0, 1, 2, 3, 2, 3, 6, 7
5745b = 8, 12, 24, 28, 48, 52, 56, 60
5746n = 2
5747validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5748
5749aarch64 = sqrshrn2
5750generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
5751
5752/// Unsigned signed saturating rounded shift right narrow
5753name = vqrshrn
5754noq-n-suffix
5755constn = N
5756multi_fn = static_assert-N-1-halfbits
5757a = MIN, 4, 8, 12, 16, 20, 24, 28
5758n = 2
5759validate 0, 1, 2, 3, 4, 5, 6, 7
5760
5761aarch64 = uqrshrn
5762link-aarch64 = uqrshrn._EXT2_
5763const-aarch64 = N
5764
5765arm = vqrshrn
5766link-arm = vqrshiftnu._EXT2_
5767const-arm = -N as ttn
c295e0f8 5768arm-aarch64-separate
17df50a5
XL
5769generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
5770
5771/// Unsigned saturating rounded shift right narrow
5772name = vqrshrn
5773noq-n-suffix
5774constn = N
5775multi_fn = static_assert-N-1-halfbits
5776multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
5777multi_fn = simd_extract, {vqrshrn_n-in_ntt-::<N>, a}, 0
5778a = 4
5779n = 2
5780validate 1
5781
5782aarch64 = uqrshrn
5783generate u16:u8, u32:u16, u64:u32
5784
5785/// Unsigned saturating rounded shift right narrow
5786name = vqrshrn_high
5787noq-n-suffix
5788constn = N
5789multi_fn = static_assert-N-1-halfbits
5790multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
5791a = 0, 1, 2, 3, 2, 3, 6, 7
5792b = 8, 12, 24, 28, 48, 52, 56, 60
5793n = 2
5794validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5795
5796aarch64 = uqrshrn2
5797generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
5798
5799/// Signed saturating rounded shift right unsigned narrow
5800name = vqrshrun
5801noq-n-suffix
5802constn = N
5803multi_fn = static_assert-N-1-halfbits
5804a = 0, 4, 8, 12, 16, 20, 24, 28
5805n = 2
5806validate 0, 1, 2, 3, 4, 5, 6, 7
5807
5808aarch64 = sqrshrun
5809link-aarch64 = sqrshrun._EXT2_
5810const-aarch64 = N
5811
5812arm = vqrshrun
5813link-arm = vqrshiftnsu._EXT2_
5814const-arm = -N as ttn
c295e0f8 5815arm-aarch64-separate
17df50a5
XL
5816generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
5817
5818/// Signed saturating rounded shift right unsigned narrow
5819name = vqrshrun
5820noq-n-suffix
5821constn = N
5822multi_fn = static_assert-N-1-halfbits
5823multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
5824multi_fn = simd_extract, {vqrshrun_n-in_ntt-::<N>, a}, 0
5825a = 4
5826n = 2
5827validate 1
5828
5829aarch64 = sqrshrun
5830generate i16:u8, i32:u16, i64:u32
5831
5832/// Signed saturating rounded shift right unsigned narrow
5833name = vqrshrun_high
5834noq-n-suffix
5835constn = N
5836multi_fn = static_assert-N-1-halfbits
5837multi_fn = simd_shuffle-out_len-!, a, {vqrshrun_n-noqself-::<N>, b}, {asc-0-out_len}
5838a = 0, 1, 2, 3, 2, 3, 6, 7
5839b = 8, 12, 24, 28, 48, 52, 56, 60
5840n = 2
5841validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5842
5843aarch64 = sqrshrun2
5844generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
5845
5846/// Signed saturating shift left
5847name = vqshl
5848a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5849b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5850validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5851
5852aarch64 = sqshl
5853link-aarch64 = sqshl._EXT_
5854generate i64
5855
5856arm = vqshl
5857link-arm = vqshifts._EXT_
5858generate int*_t, int64x*_t
5859
5860/// Signed saturating shift left
5861name = vqshl
5862multi_fn = vqshl-in_ntt-noext, c:in_ntt, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
5863multi_fn = simd_extract, c, 0
5864a = 1
5865b = 2
5866validate 4
5867
5868aarch64 = sqshl
5869generate i8, i16, i32
5870
5871/// Unsigned saturating shift left
5872name = vqshl
5873out-suffix
5874a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5875b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5876validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5877
5878aarch64 = uqshl
5879link-aarch64 = uqshl._EXT_
5880generate u64:i64:u64
5881
5882arm = vqshl
5883link-arm = vqshiftu._EXT_
5884generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
5885generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
5886
5887/// Unsigned saturating shift left
5888name = vqshl
5889out-suffix
5890multi_fn = vqshl-out_ntt-noext, c:out_ntt, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
5891multi_fn = simd_extract, c, 0
5892a = 1
5893b = 2
5894validate 4
5895
5896aarch64 = uqshl
5897generate u8:i8:u8, u16:i16:u16, u32:i32:u32
5898
5899/// Signed saturating shift left
5900name = vqshl
5901n-suffix
5902constn = N
5903multi_fn = static_assert_imm-out_bits_exp_len-N
a2a8927a 5904multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N as _}
17df50a5
XL
5905a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5906n = 2
5907validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5908
5909aarch64 = sqshl
5910arm = vqshl
5911generate int*_t, int64x*_t
5912
5913/// Signed saturating shift left
5914name = vqshl
5915n-suffix
5916constn = N
5917multi_fn = static_assert_imm-out_bits_exp_len-N
5918multi_fn = simd_extract, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
5919a = 1
5920n = 2
5921validate 4
5922
5923aarch64 = sqshl
5924generate i8, i16, i32, i64
5925
5926/// Unsigned saturating shift left
5927name = vqshl
5928n-suffix
5929constn = N
5930multi_fn = static_assert_imm-out_bits_exp_len-N
a2a8927a 5931multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N as _}
17df50a5
XL
5932a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5933n = 2
5934validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5935
5936aarch64 = uqshl
5937arm = vqshl
5938generate uint*_t, uint64x*_t
5939
5940/// Unsigned saturating shift left
5941name = vqshl
5942n-suffix
5943constn = N
5944multi_fn = static_assert_imm-out_bits_exp_len-N
5945multi_fn = simd_extract, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
5946a = 1
5947n = 2
5948validate 4
5949
5950aarch64 = uqshl
5951generate u8, u16, u32, u64
5952
3c0e092e
XL
5953/// Signed saturating shift left unsigned
5954name = vqshlu
5955n-suffix
5956constn = N
5957multi_fn = static_assert_imm-out_bits_exp_len-N
5958a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5959n = 2
5960validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5961arm-aarch64-separate
5962
5963aarch64 = sqshlu
5964link-aarch64 = sqshlu._EXT_
5965const-aarch64 = {dup-in_len-N as ttn}
5966arm = vqshlu
5967link-arm = vqshiftsu._EXT_
5968const-arm = N as ttn
5969generate int8x8_t:uint8x8_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
5970generate int8x16_t:uint8x16_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
5971
5972/// Signed saturating shift left unsigned
5973name = vqshlu
5974n-suffix
5975constn = N
5976multi_fn = static_assert_imm-out_bits_exp_len-N
5977multi_fn = simd_extract, {vqshlu_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
5978a = 1
5979n = 2
5980validate 4
5981
5982aarch64 = sqshlu
5983generate i8:u8, i16:u16, i32:u32, i64:u64
5984
17df50a5
XL
5985/// Signed saturating shift right narrow
5986name = vqshrn
5987noq-n-suffix
5988constn = N
5989multi_fn = static_assert-N-1-halfbits
5990a = 0, 4, 8, 12, 16, 20, 24, 28
5991n = 2
5992validate 0, 1, 2, 3, 4, 5, 6, 7
c295e0f8 5993arm-aarch64-separate
17df50a5
XL
5994
5995aarch64 = sqshrn
5996link-aarch64 = sqshrn._EXT2_
5997const-aarch64 = N
5998generate i64:i32
5999
6000arm = vqshrn
6001link-arm = vqshiftns._EXT2_
6002const-arm = -N as ttn
6003generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
6004
6005/// Signed saturating shift right narrow
6006name = vqshrn
6007noq-n-suffix
6008constn = N
6009multi_fn = static_assert-N-1-halfbits
6010multi_fn = simd_extract, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
6011a = 4
6012n = 2
6013validate 1
6014
6015aarch64 = sqshrn
6016generate i16:i8, i32:i16
6017
6018/// Signed saturating shift right narrow
6019name = vqshrn_high
6020noq-n-suffix
6021constn = N
6022multi_fn = static_assert-N-1-halfbits
6023multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
6024a = 0, 1, 8, 9, 8, 9, 10, 11
6025b = 32, 36, 40, 44, 48, 52, 56, 60
6026n = 2
6027validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6028
6029aarch64 = sqshrn2
6030generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
6031
6032/// Unsigned saturating shift right narrow
6033name = vqshrn
6034noq-n-suffix
6035constn = N
6036multi_fn = static_assert-N-1-halfbits
6037a = 0, 4, 8, 12, 16, 20, 24, 28
6038n = 2
6039validate 0, 1, 2, 3, 4, 5, 6, 7
c295e0f8 6040arm-aarch64-separate
17df50a5
XL
6041
6042aarch64 = uqshrn
6043link-aarch64 = uqshrn._EXT2_
6044const-aarch64 = N
6045generate u64:u32
6046
6047arm = vqshrn
6048link-arm = vqshiftnu._EXT2_
6049const-arm = -N as ttn
6050generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
6051
6052/// Unsigned saturating shift right narrow
6053name = vqshrn
6054noq-n-suffix
6055constn = N
6056multi_fn = static_assert-N-1-halfbits
6057multi_fn = simd_extract, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
6058a = 4
6059n = 2
6060validate 1
6061
6062aarch64 = uqshrn
6063generate u16:u8, u32:u16
6064
6065/// Unsigned saturating shift right narrow
6066name = vqshrn_high
6067noq-n-suffix
6068constn = N
6069multi_fn = static_assert-N-1-halfbits
6070multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
6071a = 0, 1, 8, 9, 8, 9, 10, 11
6072b = 32, 36, 40, 44, 48, 52, 56, 60
6073n = 2
6074validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6075
6076aarch64 = uqshrn2
6077generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
6078
6079/// Signed saturating shift right unsigned narrow
6080name = vqshrun
6081noq-n-suffix
6082constn = N
6083multi_fn = static_assert-N-1-halfbits
6084a = 0, 4, 8, 12, 16, 20, 24, 28
6085n = 2
6086validate 0, 1, 2, 3, 4, 5, 6, 7
c295e0f8 6087arm-aarch64-separate
17df50a5
XL
6088
6089aarch64 = sqshrun
6090link-aarch64 = sqshrun._EXT2_
6091const-aarch64 = N
6092
6093arm = vqshrun
6094link-arm = vqshiftnsu._EXT2_
6095const-arm = -N as ttn
6096generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
6097
6098/// Signed saturating shift right unsigned narrow
6099name = vqshrun
6100noq-n-suffix
6101constn = N
6102multi_fn = static_assert-N-1-halfbits
6103multi_fn = simd_extract, {vqshrun_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
6104a = 4
6105n = 2
6106validate 1
6107
6108aarch64 = sqshrun
6109generate i16:u8, i32:u16, i64:u32
6110
6111/// Signed saturating shift right unsigned narrow
6112name = vqshrun_high
6113noq-n-suffix
6114constn = N
6115multi_fn = static_assert-N-1-halfbits
6116multi_fn = simd_shuffle-out_len-!, a, {vqshrun_n-noqself-::<N>, b}, {asc-0-out_len}
6117a = 0, 1, 8, 9, 8, 9, 10, 11
6118b = 32, 36, 40, 44, 48, 52, 56, 60
6119n = 2
6120validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6121
6122aarch64 = sqshrun2
6123generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
6124
3c0e092e
XL
6125/// Unsigned saturating accumulate of signed value
6126name = vsqadd
6127out-suffix
6128multi_fn = simd_extract, {vsqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
6129a = 2
6130b = 2
6131validate 4
6132
6133aarch64 = usqadd
6134generate u8:i8:u8, u16:i16:u16
6135
6136/// Unsigned saturating accumulate of signed value
6137name = vsqadd
6138out-suffix
6139a = 2
6140b = 2
6141validate 4
6142
6143aarch64 = usqadd
6144link-aarch64 = usqadd._EXT_
6145generate u32:i32:u32, u64:i64:u64
6146
17df50a5
XL
6147/// Calculates the square root of each lane.
6148name = vsqrt
6149fn = simd_fsqrt
6150a = 4.0, 9.0, 16.0, 25.0
6151validate 2.0, 3.0, 4.0, 5.0
6152
6153aarch64 = fsqrt
6154generate float*_t, float64x*_t
6155
6156/// Reciprocal square-root estimate.
6157name = vrsqrte
6158a = 1.0, 2.0, 3.0, 4.0
6159validate 0.998046875, 0.705078125, 0.576171875, 0.4990234375
6160
6161aarch64 = frsqrte
6162link-aarch64 = frsqrte._EXT_
3c0e092e
XL
6163generate float64x*_t, f32, f64
6164
6165arm = vrsqrte
6166link-arm = vrsqrte._EXT_
6167generate float*_t
6168
6169/// Unsigned reciprocal square root estimate
6170name = vrsqrte
6171a = 1, 2, 3, 4
6172validate 4294967295, 4294967295, 4294967295, 4294967295
17df50a5 6173
3c0e092e
XL
6174aarch64 = ursqrte
6175link-aarch64 = ursqrte._EXT_
17df50a5
XL
6176arm = vrsqrte
6177link-arm = vrsqrte._EXT_
3c0e092e
XL
6178generate uint32x2_t, uint32x4_t
6179
6180/// Floating-point reciprocal square root step
6181name = vrsqrts
6182a = 1.0, 2.0, 3.0, 4.0
6183b = 1.0, 2.0, 3.0, 4.0
6184validate 1., -0.5, -3.0, -6.5
6185
6186aarch64 = frsqrts
6187link-aarch64 = frsqrts._EXT_
6188generate float64x*_t, f32, f64
6189
6190arm = vrsqrts
6191link-arm = vrsqrts._EXT_
17df50a5
XL
6192generate float*_t
6193
6194/// Reciprocal estimate.
6195name = vrecpe
6196a = 4.0, 3.0, 2.0, 1.0
6197validate 0.24951171875, 0.3330078125, 0.4990234375, 0.998046875
6198
6199aarch64 = frecpe
6200link-aarch64 = frecpe._EXT_
3c0e092e 6201generate float64x*_t, f32, f64
17df50a5
XL
6202
6203arm = vrecpe
6204link-arm = vrecpe._EXT_
6205generate float*_t
6206
3c0e092e
XL
6207/// Unsigned reciprocal estimate
6208name = vrecpe
6209a = 4, 3, 2, 1
6210validate 4294967295, 4294967295, 4294967295, 4294967295
6211
6212aarch64 = urecpe
6213link-aarch64 = urecpe._EXT_
6214arm = vrecpe
6215link-arm = vrecpe._EXT_
6216generate uint32x2_t, uint32x4_t
6217
6218/// Floating-point reciprocal step
6219name = vrecps
6220a = 4.0, 3.0, 2.0, 1.0
6221b = 4.0, 3.0, 2.0, 1.0
6222validate -14., -7., -2., 1.
6223
6224aarch64 = frecps
6225link-aarch64 = frecps._EXT_
6226generate float64x*_t, f32, f64
6227
6228arm = vrecps
6229link-arm = vrecps._EXT_
6230generate float*_t
6231
6232/// Floating-point reciprocal exponent
6233name = vrecpx
6234a = 4.0
6235validate 0.5
6236
6237aarch64 = frecpx
6238link-aarch64 = frecpx._EXT_
6239generate f32, f64
6240
17df50a5
XL
6241/// Vector reinterpret cast operation
6242name = vreinterpret
6243double-suffixes
6244fn = transmute
6245a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6246validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6247
c295e0f8 6248aarch64 = nop
17df50a5
XL
6249generate poly64x1_t:int64x1_t, poly64x1_t:uint64x1_t, int64x1_t:poly64x1_t, uint64x1_t:poly64x1_t
6250generate poly64x2_t:int64x2_t, poly64x2_t:uint64x2_t, int64x2_t:poly64x2_t, uint64x2_t:poly64x2_t
6251
c295e0f8 6252arm = nop
17df50a5
XL
6253generate uint8x8_t:int8x8_t, poly8x8_t:int8x8_t, poly16x4_t:int16x4_t, uint16x4_t:int16x4_t, uint32x2_t:int32x2_t, uint64x1_t:int64x1_t
6254generate uint8x16_t:int8x16_t, poly8x16_t:int8x16_t, poly16x8_t:int16x8_t, uint16x8_t:int16x8_t, uint32x4_t:int32x4_t, uint64x2_t:int64x2_t
6255generate poly8x8_t:uint8x8_t, int8x8_t:uint8x8_t, poly16x4_t:uint16x4_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
6256generate poly8x16_t:uint8x16_t, int8x16_t:uint8x16_t, poly16x8_t:uint16x8_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
6257generate int8x8_t:poly8x8_t, uint8x8_t:poly8x8_t, int16x4_t:poly16x4_t, uint16x4_t:poly16x4_t
6258generate int8x16_t:poly8x16_t, uint8x16_t:poly8x16_t, int16x8_t:poly16x8_t, uint16x8_t:poly16x8_t
6259
6260/// Vector reinterpret cast operation
6261name = vreinterpret
6262double-suffixes
6263fn = transmute
6264a = 0, 1, 2, 3, 4, 5, 6, 7
6265validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6266
c295e0f8 6267aarch64 = nop
c295e0f8 6268arm = nop
17df50a5
XL
6269generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t
6270generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t
6271generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x2_t:uint16x4_t, uint32x2_t:uint16x4_t, int64x1_t:uint32x2_t, uint64x1_t:uint32x2_t
6272generate poly16x8_t:uint8x16_t, int16x8_t:uint8x16_t, uint16x8_t:uint8x16_t, int32x4_t:uint16x8_t, uint32x4_t:uint16x8_t, int64x2_t:uint32x4_t, uint64x2_t:uint32x4_t
6273generate poly16x4_t:poly8x8_t, int16x4_t:poly8x8_t, uint16x4_t:poly8x8_t, int32x2_t:poly16x4_t, uint32x2_t:poly16x4_t
6274generate poly16x8_t:poly8x16_t, int16x8_t:poly8x16_t, uint16x8_t:poly8x16_t, int32x4_t:poly16x8_t, uint32x4_t:poly16x8_t
3c0e092e
XL
6275target = aes
6276generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t
6277generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t
6278generate p128:int64x2_t, p128:uint64x2_t, p128:poly64x2_t
17df50a5
XL
6279
6280/// Vector reinterpret cast operation
6281name = vreinterpret
6282double-suffixes
6283fn = transmute
6284a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6285validate 0, 1, 2, 3, 4, 5, 6, 7
6286
c295e0f8 6287aarch64 = nop
c295e0f8 6288arm = nop
17df50a5
XL
6289generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t
6290generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t
6291generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16x4_t:uint32x2_t, int16x4_t:uint32x2_t, uint16x4_t:uint32x2_t, int32x2_t:uint64x1_t, uint32x2_t:uint64x1_t
6292generate poly8x16_t:uint16x8_t, int8x16_t:uint16x8_t, uint8x16_t:uint16x8_t, poly16x8_t:uint32x4_t, int16x8_t:uint32x4_t, uint16x8_t:uint32x4_t, int32x4_t:uint64x2_t, uint32x4_t:uint64x2_t
6293generate poly8x8_t:poly16x4_t, int8x8_t:poly16x4_t, uint8x8_t:poly16x4_t
6294generate poly8x16_t:poly16x8_t, int8x16_t:poly16x8_t, uint8x16_t:poly16x8_t
3c0e092e
XL
6295target = aes
6296generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t
6297generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t
6298generate int64x2_t:p128, uint64x2_t:p128, poly64x2_t:p128
17df50a5
XL
6299
6300/// Vector reinterpret cast operation
6301name = vreinterpret
6302double-suffixes
6303fn = transmute
6304a = 0, 1, 2, 3
6305validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6306
c295e0f8 6307aarch64 = nop
c295e0f8 6308arm = nop
17df50a5
XL
6309generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t
6310generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t
6311generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64x1_t:uint16x4_t
6312generate int32x4_t:uint8x16_t, uint32x4_t:uint8x16_t, int64x2_t:uint16x8_t, uint64x2_t:uint16x8_t
6313generate int32x2_t:poly8x8_t, uint32x2_t:poly8x8_t, int64x1_t:poly16x4_t, uint64x1_t:poly16x4_t
6314generate int32x4_t:poly8x16_t, uint32x4_t:poly8x16_t, int64x2_t:poly16x8_t, uint64x2_t:poly16x8_t
3c0e092e
XL
6315target = aes
6316generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t
6317generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t
6318generate p128:int32x4_t, p128:uint32x4_t
17df50a5
XL
6319
6320/// Vector reinterpret cast operation
6321name = vreinterpret
6322double-suffixes
6323fn = transmute
6324a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6325validate 0, 1, 2, 3
6326
c295e0f8 6327aarch64 = nop
c295e0f8 6328arm = nop
17df50a5
XL
6329generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t
6330generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t
6331generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t
6332generate poly8x16_t:uint32x4_t, int8x16_t:uint32x4_t, uint8x16_t:uint32x4_t, poly16x8_t:uint64x2_t, int16x8_t:uint64x2_t, uint16x8_t:uint64x2_t
3c0e092e
XL
6333target = aes
6334generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t
6335generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t
6336generate int32x4_t:p128, uint32x4_t:p128
17df50a5
XL
6337
6338/// Vector reinterpret cast operation
6339name = vreinterpret
6340double-suffixes
6341fn = transmute
6342a = 0, 1
6343validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6344
c295e0f8 6345aarch64 = nop
c295e0f8 6346arm = nop
17df50a5
XL
6347generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t
6348generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t
3c0e092e
XL
6349target = aes
6350generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t
6351generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t
6352generate p128:int16x8_t, p128:uint16x8_t, p128:poly16x8_t
17df50a5
XL
6353
6354/// Vector reinterpret cast operation
6355name = vreinterpret
6356double-suffixes
6357fn = transmute
6358a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6359validate 0, 1
6360
c295e0f8 6361aarch64 = nop
3c0e092e
XL
6362arm = nop
6363generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t
6364generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t
6365target = aes
17df50a5
XL
6366generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t
6367generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t
3c0e092e
XL
6368generate int16x8_t:p128, uint16x8_t:p128, poly16x8_t:p128
6369
6370/// Vector reinterpret cast operation
6371name = vreinterpret
6372double-suffixes
6373fn = transmute
6374a = 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6375validate 1
6376target = aes
17df50a5 6377
3c0e092e 6378aarch64 = nop
c295e0f8 6379arm = nop
3c0e092e
XL
6380generate int8x16_t:p128, uint8x16_t:p128, poly8x16_t:p128
6381
6382/// Vector reinterpret cast operation
6383name = vreinterpret
6384double-suffixes
6385fn = transmute
6386a = 1
6387validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6388target = aes
6389
6390aarch64 = nop
6391arm = nop
6392generate p128:int8x16_t, p128:uint8x16_t, p128:poly8x16_t
17df50a5
XL
6393
6394/// Vector reinterpret cast operation
6395name = vreinterpret
6396double-suffixes
6397fn = transmute
6398a = 0., 0., 0., 0., 0., 0., 0., 0.
6399validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6400
c295e0f8 6401aarch64 = nop
17df50a5
XL
6402generate float64x1_t:int8x8_t, float64x1_t:int16x4_t, float64x1_t:int32x2_t, float64x1_t:int64x1_t
6403generate float64x2_t:int8x16_t, float64x2_t:int16x8_t, float64x2_t:int32x4_t, float64x2_t:int64x2_t
6404generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t, float64x1_t:uint64x1_t
6405generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t, float64x2_t:uint64x2_t
6406generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t
6407generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t
3c0e092e 6408generate float64x2_t:p128
17df50a5 6409
c295e0f8 6410arm = nop
17df50a5
XL
6411generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t
6412generate float32x4_t:int8x16_t, float32x4_t:int16x8_t, float32x4_t:int32x4_t, float32x4_t:int64x2_t
6413generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t, float32x2_t:uint64x1_t
6414generate float32x4_t:uint8x16_t, float32x4_t:uint16x8_t, float32x4_t:uint32x4_t, float32x4_t:uint64x2_t
6415generate float32x2_t:poly8x8_t, float32x2_t:poly16x4_t
6416generate float32x4_t:poly8x16_t, float32x4_t:poly16x8_t
3c0e092e 6417generate float32x4_t:p128
17df50a5
XL
6418
6419/// Vector reinterpret cast operation
6420name = vreinterpret
6421double-suffixes
6422fn = transmute
6423a = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6424validate 0., 0., 0., 0., 0., 0., 0., 0.
6425
c295e0f8 6426aarch64 = nop
17df50a5
XL
6427generate int8x8_t:float64x1_t, int16x4_t:float64x1_t, int32x2_t:float64x1_t, int64x1_t:float64x1_t
6428generate int8x16_t:float64x2_t, int16x8_t:float64x2_t, int32x4_t:float64x2_t, int64x2_t:float64x2_t
6429generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t, uint64x1_t:float64x1_t
6430generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t, uint64x2_t:float64x2_t
6431generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t
6432generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t
3c0e092e 6433generate p128:float64x2_t
17df50a5 6434
c295e0f8 6435arm = nop
17df50a5
XL
6436generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t
6437generate int8x16_t:float32x4_t, int16x8_t:float32x4_t, int32x4_t:float32x4_t, int64x2_t:float32x4_t
6438generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t, uint64x1_t:float32x2_t
6439generate uint8x16_t:float32x4_t, uint16x8_t:float32x4_t, uint32x4_t:float32x4_t, uint64x2_t:float32x4_t
6440generate poly8x8_t:float32x2_t, poly16x4_t:float32x2_t
6441generate poly8x16_t:float32x4_t, poly16x8_t:float32x4_t
3c0e092e 6442generate p128:float32x4_t
17df50a5
XL
6443
6444/// Vector reinterpret cast operation
6445name = vreinterpret
6446double-suffixes
6447fn = transmute
6448a = 0., 0., 0., 0., 0., 0., 0., 0.
6449validate 0., 0., 0., 0., 0., 0., 0., 0.
6450
c295e0f8 6451aarch64 = nop
17df50a5
XL
6452generate float32x2_t:float64x1_t, float64x1_t:float32x2_t
6453generate float32x4_t:float64x2_t, float64x2_t:float32x4_t
6454
6455/// Signed rounding shift left
6456name = vrshl
6457a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6458b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6459validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6460
6461aarch64 = srshl
6462link-aarch64 = srshl._EXT_
6463generate i64
6464
6465arm = vrshl
6466link-arm = vrshifts._EXT_
6467generate int*_t, int64x*_t
6468
6469/// Unsigned rounding shift left
6470name = vrshl
6471out-suffix
6472a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6473b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6474validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6475
6476aarch64 = urshl
6477link-aarch64 = urshl._EXT_
6478generate u64:i64:u64
6479
6480arm = vrshl
6481link-arm = vrshiftu._EXT_
6482generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6483generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6484
6485/// Signed rounding shift right
6486name = vrshr
6487n-suffix
6488constn = N
6489multi_fn = static_assert-N-1-bits
a2a8927a 6490multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N) as _}
17df50a5
XL
6491a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6492n = 2
6493validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6494
6495aarch64 = srshr
6496arm = vrshr
6497generate int*_t, int64x*_t
6498
6499/// Signed rounding shift right
6500name = vrshr
6501n-suffix
6502constn = N
6503multi_fn = static_assert-N-1-bits
6504multi_fn = vrshl-self-noext, a, -N as i64
6505a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6506n = 2
6507validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6508
6509aarch64 = srshr
6510generate i64
6511
6512/// Unsigned rounding shift right
6513name = vrshr
6514n-suffix
6515constn = N
6516multi_fn = static_assert-N-1-bits
a2a8927a 6517multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N) as _}
17df50a5
XL
6518a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6519n = 2
6520validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6521
6522aarch64 = urshr
6523arm = vrshr
6524generate uint*_t, uint64x*_t
6525
6526/// Unsigned rounding shift right
6527name = vrshr
6528n-suffix
6529constn = N
6530multi_fn = static_assert-N-1-bits
6531multi_fn = vrshl-self-noext, a, -N as i64
6532a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6533n = 2
6534validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6535
6536aarch64 = urshr
6537generate u64
6538
6539/// Rounding shift right narrow
6540name = vrshrn
6541noq-n-suffix
6542constn = N
6543multi_fn = static_assert-N-1-halfbits
6544a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6545n = 2
6546validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
c295e0f8 6547arm-aarch64-separate
17df50a5
XL
6548
6549aarch64 = rshrn
6550link-aarch64 = rshrn._EXT2_
6551const-aarch64 = N
6552
6553arm = vrshrn
6554link-arm = vrshiftn._EXT2_
6555const-arm = -N as ttn
6556generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
6557
6558/// Rounding shift right narrow
6559name = vrshrn
6560noq-n-suffix
6561constn = N
6562multi_fn = static_assert-N-1-halfbits
6563multi_fn = transmute, {vrshrn_n-noqsigned-::<N>, transmute(a)}
6564a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6565n = 2
6566validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6567
6568aarch64 = rshrn
6569arm = vrshrn
6570generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
6571
6572/// Rounding shift right narrow
6573name = vrshrn_high
6574noq-n-suffix
6575constn = N
6576multi_fn = static_assert-N-1-halfbits
6577multi_fn = simd_shuffle-out_len-!, a, {vrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
6578a = 0, 1, 8, 9, 8, 9, 10, 11
6579b = 32, 36, 40, 44, 48, 52, 56, 60
6580n = 2
6581validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6582
6583aarch64 = rshrn2
6584generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
6585generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
6586
6587/// Signed rounding shift right and accumulate
6588name = vrsra
6589n-suffix
6590constn = N
6591multi_fn = static_assert-N-1-bits
6592multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
6593a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6594b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6595n = 2
6596validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6597
6598aarch64 = srsra
6599arm = vrsra
6600generate int*_t, int64x*_t
6601
6602/// Unsigned rounding shift right and accumulate
6603name = vrsra
6604n-suffix
6605constn = N
6606multi_fn = static_assert-N-1-bits
6607multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
6608a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6609b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6610n = 2
6611validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6612
6613aarch64 = ursra
6614arm = vrsra
6615generate uint*_t, uint64x*_t
6616
6617/// Signed rounding shift right and accumulate.
6618name = vrsra
6619n-suffix
6620constn = N
6621multi_fn = static_assert-N-1-bits
6622multi_fn = vrshr-nself-::<N>, b:in_t, b
a2a8927a 6623multi_fn = a.wrapping_add(b)
17df50a5
XL
6624a = 1
6625b = 4
6626n = 2
6627validate 2
6628
6629aarch64 = srsra
6630generate i64
6631
6632/// Ungisned rounding shift right and accumulate.
6633name = vrsra
6634n-suffix
6635constn = N
6636multi_fn = static_assert-N-1-bits
6637multi_fn = vrshr-nself-::<N>, b:in_t, b
a2a8927a 6638multi_fn = a.wrapping_add(b)
17df50a5
XL
6639a = 1
6640b = 4
6641n = 2
6642validate 2
6643
6644aarch64 = ursra
6645generate u64
6646
3c0e092e
XL
6647/// Rounding subtract returning high narrow
6648name = vrsubhn
6649no-q
6650a = MAX, MIN, 0, 4, 5, 6, 7, 8
6651b = 1, 2, 3, 4, 5, 6, 7, 8
6652validate MIN, MIN, 0, 0, 0, 0, 0, 0
6653
6654aarch64 = rsubhn
6655link-aarch64 = rsubhn._EXT2_
6656arm = vrsubhn
6657link-arm = vrsubhn._EXT2_
6658generate int16x8_t:int16x8_t:int8x8_t, int32x4_t:int32x4_t:int16x4_t, int64x2_t:int64x2_t:int32x2_t
6659
6660/// Rounding subtract returning high narrow
6661name = vrsubhn
6662no-q
6663multi_fn = transmute, {vrsubhn-noqsigned-noext, {transmute, a}, {transmute, b}}
6664a = MAX, MIN, 3, 4, 5, 6, 7, 8
6665b = 1, 2, 3, 4, 5, 6, 7, 8
6666validate 0, 0, 0, 0, 0, 0, 0, 0
6667
6668aarch64 = rsubhn
6669arm = vrsubhn
6670generate uint16x8_t:uint16x8_t:uint8x8_t, uint32x4_t:uint32x4_t:uint16x4_t, uint64x2_t:uint64x2_t:uint32x2_t
6671
6672/// Rounding subtract returning high narrow
6673name = vrsubhn_high
6674no-q
6675multi_fn = vrsubhn-noqself-noext, x:in_t0, b, c
6676multi_fn = simd_shuffle-out_len-!, a, x, {asc-0-out_len}
6677a = 1, 2, 0, 0, 0, 0, 0, 0
6678b = 1, 2, 3, 4, 5, 6, 7, 8
6679c = 1, 2, 3, 4, 5, 6, 7, 8
6680validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6681
6682aarch64 = rsubhn2
6683generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
6684generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
6685
17df50a5
XL
6686/// Insert vector element from another vector element
6687name = vset_lane
6688constn = LANE
6689multi_fn = static_assert_imm-in_exp_len-LANE
6690multi_fn = simd_insert, b, LANE as u32, a
6691a = 1
6692b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6693n = 0
6694validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6695
6696aarch64 = nop
6697arm = nop
6698generate i8:int8x8_t:int8x8_t, i16:int16x4_t:int16x4_t
6699generate i32:int32x2_t:int32x2_t, i64:int64x1_t:int64x1_t
6700generate u8:uint8x8_t:uint8x8_t, u16:uint16x4_t:uint16x4_t
6701generate u32:uint32x2_t:uint32x2_t, u64:uint64x1_t:uint64x1_t
6702generate p8:poly8x8_t:poly8x8_t, p16:poly16x4_t:poly16x4_t
6703
94222f64 6704target = aes
17df50a5
XL
6705generate p64:poly64x1_t:poly64x1_t
6706
6707/// Insert vector element from another vector element
6708name = vsetq_lane
6709no-q
6710constn = LANE
6711multi_fn = static_assert_imm-in_exp_len-LANE
6712multi_fn = simd_insert, b, LANE as u32, a
6713a = 1
6714b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6715n = 0
6716validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6717
6718aarch64 = nop
6719arm = nop
6720generate i8:int8x16_t:int8x16_t, i16:int16x8_t:int16x8_t
6721generate i32:int32x4_t:int32x4_t, i64:int64x2_t:int64x2_t
6722generate u8:uint8x16_t:uint8x16_t, u16:uint16x8_t:uint16x8_t
6723generate u32:uint32x4_t:uint32x4_t, u64:uint64x2_t:uint64x2_t
6724generate p8:poly8x16_t:poly8x16_t, p16:poly16x8_t:poly16x8_t
6725
94222f64 6726target = aes
17df50a5
XL
6727generate p64:poly64x2_t:poly64x2_t
6728
6729/// Insert vector element from another vector element
6730name = vset_lane
6731constn = LANE
6732multi_fn = static_assert_imm-in_exp_len-LANE
6733multi_fn = simd_insert, b, LANE as u32, a
6734a = 1.
6735b = 0., 2., 3., 4.
6736n = 0
6737validate 1., 2., 3., 4.
6738
6739aarch64 = nop
6740generate f64:float64x1_t:float64x1_t
6741
6742arm = nop
6743generate f32:float32x2_t:float32x2_t
6744
6745/// Insert vector element from another vector element
6746name = vsetq_lane
6747no-q
6748constn = LANE
6749multi_fn = static_assert_imm-in_exp_len-LANE
6750multi_fn = simd_insert, b, LANE as u32, a
6751a = 1.
6752b = 0., 2., 3., 4.
6753n = 0
6754validate 1., 2., 3., 4.
6755
6756aarch64 = nop
6757generate f64:float64x2_t:float64x2_t
6758
6759arm = nop
6760generate f32:float32x4_t:float32x4_t
6761
6762/// Signed Shift left
6763name = vshl
6764a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6765b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6766validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6767
6768aarch64 = sshl
6769link-aarch64 = sshl._EXT_
6770arm = vshl
6771link-arm = vshifts._EXT_
6772generate int*_t, int64x*_t
6773
6774/// Signed Shift left
6775name = vshl
6776multi_fn = transmute, {vshl-in_ntt-noext, transmute(a), transmute(b)}
6777a = 1
6778b = 2
6779validate 4
6780
6781aarch64 = sshl
6782generate i64
6783
6784/// Unsigned Shift left
6785name = vshl
6786out-suffix
6787a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6788b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6789validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6790
6791aarch64 = ushl
6792link-aarch64 = ushl._EXT_
6793arm = vshl
6794link-arm = vshiftu._EXT_
6795generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6796generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6797
6798/// Unsigned Shift left
6799out-suffix
6800name = vshl
6801multi_fn = transmute, {vshl-out_ntt-noext, transmute(a), transmute(b)}
6802a = 1
6803b = 2
6804validate 4
6805
6806aarch64 = ushl
6807generate u64:i64:u64
6808
6809/// Shift left
6810name = vshl
6811n-suffix
6812constn = N
6813multi_fn = static_assert_imm-out_bits_exp_len-N
a2a8927a 6814multi_fn = simd_shl, a, {vdup-nself-noext, N as _}
17df50a5
XL
6815a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6816n = 2
6817validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6818
6819arm = vshl
6820aarch64 = shl
6821generate int*_t, uint*_t, int64x*_t, uint64x*_t
6822
6823/// Signed shift left long
6824name = vshll
6825n-suffix
6826constn = N
6827multi_fn = static_assert-N-0-bits
a2a8927a 6828multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N as _}
17df50a5
XL
6829a = 1, 2, 3, 4, 5, 6, 7, 8
6830n = 2
6831validate 4, 8, 12, 16, 20, 24, 28, 32
6832
6833arm = vshll.s
6834aarch64 = sshll
6835generate int8x8_t:int16x8_t, int16x4_t:int32x4_t, int32x2_t:int64x2_t
6836aarch64 = ushll
6837generate uint8x8_t:uint16x8_t, uint16x4_t:uint32x4_t, uint32x2_t:uint64x2_t
6838
6839/// Signed shift left long
6840name = vshll_high_n
6841no-q
6842constn = N
6843multi_fn = static_assert-N-0-bits
6844multi_fn = simd_shuffle-out_len-!, b:half, a, a, {asc-halflen-halflen}
6845multi_fn = vshll_n-noqself-::<N>, b
6846a = 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8
6847n = 2
6848validate 4, 8, 12, 16, 20, 24, 28, 32
6849
6850aarch64 = sshll2
6851generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
6852aarch64 = ushll2
6853generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
6854
6855/// Shift right
6856name = vshr
6857n-suffix
6858constn = N
6859multi_fn = static_assert-N-1-bits
3c0e092e 6860multi_fn = fix_right_shift_imm-N-bits
a2a8927a 6861multi_fn = simd_shr, a, {vdup-nself-noext, n as _}
17df50a5
XL
6862a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6863n = 2
6864validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6865
6866arm = vshr.s
6867aarch64 = sshr
6868generate int*_t, int64x*_t
6869aarch64 = ushr
6870generate uint*_t, uint64x*_t
6871
6872/// Shift right narrow
6873name = vshrn_n
6874no-q
6875constn = N
6876multi_fn = static_assert-N-1-halfbits
a2a8927a 6877multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N as _}}
17df50a5
XL
6878a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6879n = 2
6880validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6881
6882arm = vshrn.
6883aarch64 = shrn
6884generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
6885generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
6886
6887/// Shift right narrow
6888name = vshrn_high_n
6889no-q
6890constn = N
6891multi_fn = static_assert-N-1-halfbits
6892multi_fn = simd_shuffle-out_len-!, a, {vshrn_n-noqself-::<N>, b}, {asc-0-out_len}
6893a = 1, 2, 5, 6, 5, 6, 7, 8
6894b = 20, 24, 28, 32, 52, 56, 60, 64
6895n = 2
6896validate 1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16
6897
6898aarch64 = shrn2
6899generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
6900generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
6901
6902/// Signed shift right and accumulate
6903name = vsra
6904n-suffix
6905constn = N
6906multi_fn = static_assert-N-1-bits
6907multi_fn = simd_add, a, {vshr-nself-::<N>, b}
6908a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6909b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6910n = 2
6911validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6912
6913aarch64 = ssra
6914arm = vsra
6915generate int*_t, int64x*_t
6916
6917/// Unsigned shift right and accumulate
6918name = vsra
6919n-suffix
6920constn = N
6921multi_fn = static_assert-N-1-bits
6922multi_fn = simd_add, a, {vshr-nself-::<N>, b}
6923a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6924b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6925n = 2
6926validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6927
6928aarch64 = usra
6929arm = vsra
6930generate uint*_t, uint64x*_t
6931
3c0e092e
XL
6932/// SM3PARTW1
6933name = vsm3partw1
6934a = 1, 2, 3, 4
6935b = 1, 2, 3, 4
6936c = 1, 2, 3, 4
6937validate 2147549312, 3221323968, 131329, 2684362752
6938target = sm4
6939
6940aarch64 = sm3partw1
6941link-aarch64 = llvm.aarch64.crypto.sm3partw1
6942generate uint32x4_t
6943
6944/// SM3PARTW2
6945name = vsm3partw2
6946a = 1, 2, 3, 4
6947b = 1, 2, 3, 4
6948c = 1, 2, 3, 4
6949validate 128, 256, 384, 1077977696
6950target = sm4
6951
6952aarch64 = sm3partw2
6953link-aarch64 = llvm.aarch64.crypto.sm3partw2
6954generate uint32x4_t
6955
6956/// SM3SS1
6957name = vsm3ss1
6958a = 1, 2, 3, 4
6959b = 1, 2, 3, 4
6960c = 1, 2, 3, 4
6961validate 0, 0, 0, 2098176
6962target = sm4
6963
6964aarch64 = sm3ss1
6965link-aarch64 = llvm.aarch64.crypto.sm3ss1
6966generate uint32x4_t
6967
6968/// SM4 key
6969name = vsm4ekey
6970a = 1, 2, 3, 4
6971b = 1, 2, 3, 4
6972validate 1784948604, 136020997, 2940231695, 3789947679
6973target = sm4
6974
6975aarch64 = sm4ekey
6976link-aarch64 = llvm.aarch64.crypto.sm4ekey
6977generate uint32x4_t
6978
6979/// SM4 encode
6980name = vsm4e
6981a = 1, 2, 3, 4
6982b = 1, 2, 3, 4
6983validate 1093874472, 3616769504, 3878330411, 2765298765
6984target = sm4
6985
6986aarch64 = sm4e
6987link-aarch64 = llvm.aarch64.crypto.sm4e
6988generate uint32x4_t
6989
6990/// Rotate and exclusive OR
6991name = vrax1
6992a = 1, 2
6993b = 3, 4
6994validate 7, 10
6995target = sha3
6996
6997aarch64 = rax1
6998link-aarch64 = llvm.aarch64.crypto.rax1
6999generate uint64x2_t
7000
7001/// SHA512 hash update part 1
7002name = vsha512h
7003a = 1, 2
7004b = 3, 4
7005c = 5, 6
7006validate 11189044327219203, 7177611956453380
7007target = sha3
7008
7009aarch64 = sha512h
7010link-aarch64 = llvm.aarch64.crypto.sha512h
7011generate uint64x2_t
7012
7013/// SHA512 hash update part 2
7014name = vsha512h2
7015a = 1, 2
7016b = 3, 4
7017c = 5, 6
7018validate 5770237651009406214, 349133864969
7019target = sha3
7020
7021aarch64 = sha512h2
7022link-aarch64 = llvm.aarch64.crypto.sha512h2
7023generate uint64x2_t
7024
7025/// SHA512 schedule update 0
7026name = vsha512su0
7027a = 1, 2
7028b = 3, 4
7029validate 144115188075855874, 9439544818968559619
7030target = sha3
7031
7032aarch64 = sha512su0
7033link-aarch64 = llvm.aarch64.crypto.sha512su0
7034generate uint64x2_t
7035
7036/// SHA512 schedule update 1
7037name = vsha512su1
7038a = 1, 2
7039b = 3, 4
7040c = 5, 6
7041validate 105553116266526, 140737488355368
7042target = sha3
7043
7044aarch64 = sha512su1
7045link-aarch64 = llvm.aarch64.crypto.sha512su1
7046generate uint64x2_t
7047
7048/// Floating-point round to 32-bit integer, using current rounding mode
7049name = vrnd32x
7050a = 1.1, 1.9, -1.7, -2.3
7051validate 1.0, 2.0, -2.0, -2.0
7052target = frintts
7053
7054aarch64 = frint32x
7055link-aarch64 = frint32x._EXT_
7056generate float32x2_t, float32x4_t
7057
7058/// Floating-point round to 32-bit integer toward zero
7059name = vrnd32z
7060a = 1.1, 1.9, -1.7, -2.3
7061validate 1.0, 1.0, -1.0, -2.0
7062target = frintts
7063
7064aarch64 = frint32z
7065link-aarch64 = frint32z._EXT_
7066generate float32x2_t, float32x4_t
7067
7068/// Floating-point round to 64-bit integer, using current rounding mode
7069name = vrnd64x
7070a = 1.1, 1.9, -1.7, -2.3
7071validate 1.0, 2.0, -2.0, -2.0
7072target = frintts
7073
7074aarch64 = frint64x
7075link-aarch64 = frint64x._EXT_
7076generate float32x2_t, float32x4_t
7077
7078/// Floating-point round to 64-bit integer toward zero
7079name = vrnd64z
7080a = 1.1, 1.9, -1.7, -2.3
7081validate 1.0, 1.0, -1.0, -2.0
7082target = frintts
7083
7084aarch64 = frint64z
7085link-aarch64 = frint64z._EXT_
7086generate float32x2_t, float32x4_t
7087
7088/// Transpose elements
7089name = vtrn
7090multi_fn = simd_shuffle-in_len-!, a1:in_t, a, b, {transpose-1-in_len}
7091multi_fn = simd_shuffle-in_len-!, b1:in_t, a, b, {transpose-2-in_len}
7092multi_fn = transmute, (a1, b1)
7093a = 0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30
7094b = 1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31
7095validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7096
7097aarch64 = trn
7098arm = vtrn
7099generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7100generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7101generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7102aarch64 = zip
7103generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7104
7105/// Transpose elements
7106name = vtrn
7107multi_fn = simd_shuffle-in_len-!, a1:in_t, a, b, {transpose-1-in_len}
7108multi_fn = simd_shuffle-in_len-!, b1:in_t, a, b, {transpose-2-in_len}
7109multi_fn = transmute, (a1, b1)
7110a = 0., 2., 2., 6.
7111b = 1., 3., 3., 7.
7112validate 0., 1., 2., 3., 2., 3., 6., 7.
7113
7114aarch64 = zip
7115arm = vtrn
7116generate float32x2_t:float32x2_t:float32x2x2_t
7117aarch64 = trn
7118generate float32x4_t:float32x4_t:float32x4x2_t
7119
17df50a5
XL
7120/// Transpose vectors
7121name = vtrn1
7122multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len}
7123a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7124b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7125validate 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
7126
7127aarch64 = trn1
7128generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7129
7130aarch64 = zip1
7131generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7132
7133/// Transpose vectors
7134name = vtrn1
7135multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len}
7136a = 0., 2., 4., 6., 8., 10., 12., 14.
7137b = 1., 3., 5., 7., 9., 11., 13., 15.
7138validate 0., 1., 4., 5., 8., 9., 12., 13.
7139
7140aarch64 = trn1
7141generate float32x4_t
7142
7143aarch64 = zip1
7144generate float32x2_t, float64x2_t
7145
7146/// Transpose vectors
7147name = vtrn2
7148multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len}
7149a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7150b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7151validate 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7152
7153aarch64 = trn2
7154generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7155
7156aarch64 = zip2
7157generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7158
7159/// Transpose vectors
7160name = vtrn2
7161multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len}
7162a = 0., 2., 4., 6., 8., 10., 12., 14.
7163b = 1., 3., 5., 7., 9., 11., 13., 15.
7164validate 2., 3., 6., 7., 10., 11., 14., 15.
7165
7166aarch64 = trn2
7167generate float32x4_t
7168
7169aarch64 = zip2
7170generate float32x2_t, float64x2_t
7171
3c0e092e
XL
7172/// Zip vectors
7173name = vzip
7174multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {zip-1-in_len}
7175multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {zip-2-in_len}
7176multi_fn = transmute, (a0, b0)
7177a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7178b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7179validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7180
7181aarch64 = zip
7182arm = vzip
7183generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t
7184generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t
7185generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t
7186arm = vtrn
7187generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7188aarch64 = ext
7189arm = vorr
7190generate int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7191generate uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7192generate poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7193
7194/// Zip vectors
7195name = vzip
7196multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {zip-1-in_len}
7197multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {zip-2-in_len}
7198multi_fn = transmute, (a0, b0)
7199a = 1., 2., 3., 4.
7200b = 5., 6., 7., 8.
7201validate 1., 5., 2., 6., 3., 7., 4., 8.
7202
7203aarch64 = zip
7204arm = vtrn
7205generate float32x2_t:float32x2_t:float32x2x2_t
7206aarch64 = ext
7207arm = vorr
7208generate float32x4_t:float32x4_t:float32x4x2_t
7209
17df50a5
XL
7210/// Zip vectors
7211name = vzip1
7212multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len}
7213a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7214b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7215validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
7216
7217aarch64 = zip1
7218generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
7219
7220/// Zip vectors
7221name = vzip1
7222multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len}
7223a = 0., 2., 4., 6., 8., 10., 12., 14.
7224b = 1., 3., 5., 7., 9., 11., 13., 15.
7225validate 0., 1., 2., 3., 4., 5., 6., 7.
7226
7227aarch64 = zip1
7228generate float32x2_t, float32x4_t, float64x2_t
7229
7230/// Zip vectors
7231name = vzip2
7232multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len}
7233a = 0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30
7234b = 1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31
7235validate 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7236
7237aarch64 = zip2
7238generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
7239
7240/// Zip vectors
7241name = vzip2
7242multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len}
7243a = 0., 8., 8., 10., 8., 10., 12., 14.
7244b = 1., 9., 9., 11., 9., 11., 13., 15.
7245validate 8., 9., 10., 11., 12., 13., 14., 15.
7246
7247aarch64 = zip2
7248generate float32x2_t, float32x4_t, float64x2_t
7249
3c0e092e
XL
7250/// Unzip vectors
7251name = vuzp
7252multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {unzip-1-in_len}
7253multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {unzip-2-in_len}
7254multi_fn = transmute, (a0, b0)
7255a = 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16
7256b = 2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32
7257validate 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32
7258
7259aarch64 = uzp
7260arm = vuzp
7261generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7262generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7263generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7264aarch64 = zip
7265arm = vtrn
7266generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7267
7268/// Unzip vectors
7269name = vuzp
7270multi_fn = simd_shuffle-in_len-!, a0:in_t, a, b, {unzip-1-in_len}
7271multi_fn = simd_shuffle-in_len-!, b0:in_t, a, b, {unzip-2-in_len}
7272multi_fn = transmute, (a0, b0)
7273a = 1., 2., 2., 4.
7274b = 2., 6., 6., 8.
7275validate 1., 2., 2., 6., 2., 4., 6., 8.
7276
7277aarch64 = zip
7278arm = vtrn
7279generate float32x2_t:float32x2_t:float32x2x2_t
7280aarch64 = uzp
7281arm = vuzp
7282generate float32x4_t:float32x4_t:float32x4x2_t
7283
17df50a5
XL
7284/// Unzip vectors
7285name = vuzp1
7286multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len}
7287a = 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0
7288b = 2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0
7289validate 1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16
7290
7291aarch64 = uzp1
7292generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7293
7294aarch64 = zip1
7295generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7296
7297/// Unzip vectors
7298name = vuzp1
7299multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len}
7300a = 0., 8., 1., 9., 4., 12., 5., 13.
7301b = 1., 10., 3., 11., 6., 14., 7., 15.
7302validate 0., 1., 1., 3., 4., 5., 6., 7.
7303
7304aarch64 = uzp1
7305generate float32x4_t
7306
7307aarch64 = zip1
7308generate float32x2_t, float64x2_t
7309
7310/// Unzip vectors
7311name = vuzp2
7312multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len}
7313a = 0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24
7314b = 0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32
7315validate 17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32
7316
7317aarch64 = uzp2
7318generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7319
7320aarch64 = zip2
7321generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7322
7323/// Unzip vectors
7324name = vuzp2
7325multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len}
7326a = 0., 8., 1., 9., 4., 12., 5., 13.
7327b = 2., 9., 3., 11., 6., 14., 7., 15.
7328validate 8., 9., 9., 11., 12., 13., 14., 15.
7329
7330aarch64 = uzp2
7331generate float32x4_t
7332
7333aarch64 = zip2
7334generate float32x2_t, float64x2_t
7335
7336////////////////////
7337// Unsigned Absolute difference and Accumulate Long
7338////////////////////
7339
7340/// Unsigned Absolute difference and Accumulate Long
7341name = vabal
7342multi_fn = vabd-unsigned-noext, b, c, d:in_t
7343multi_fn = simd_add, a, {simd_cast, d}
7344a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7345b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7346c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7347validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7348
7349arm = vabal.s
7350aarch64 = uabal
7351generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
7352
7353/// Unsigned Absolute difference and Accumulate Long
7354name = vabal_high
7355no-q
7356multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
7357multi_fn = simd_shuffle8!, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
7358multi_fn = vabd_u8, d, e, f:uint8x8_t
7359multi_fn = simd_add, a, {simd_cast, f}
7360a = 9, 10, 11, 12, 13, 14, 15, 16
7361b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7362c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7363validate 20, 20, 20, 20, 20, 20, 20, 20
7364
7365aarch64 = uabal
7366generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t
7367
7368/// Unsigned Absolute difference and Accumulate Long
7369name = vabal_high
7370no-q
7371multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7]
7372multi_fn = simd_shuffle4!, e:uint16x4_t, c, c, [4, 5, 6, 7]
7373multi_fn = vabd_u16, d, e, f:uint16x4_t
7374multi_fn = simd_add, a, {simd_cast, f}
7375a = 9, 10, 11, 12
7376b = 1, 2, 3, 4, 9, 10, 11, 12
7377c = 10, 10, 10, 10, 20, 0, 2, 4
7378validate 20, 20, 20, 20
7379
7380aarch64 = uabal
7381generate uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
7382
7383/// Unsigned Absolute difference and Accumulate Long
7384name = vabal_high
7385no-q
7386multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3]
7387multi_fn = simd_shuffle2!, e:uint32x2_t, c, c, [2, 3]
7388multi_fn = vabd_u32, d, e, f:uint32x2_t
7389multi_fn = simd_add, a, {simd_cast, f}
7390a = 15, 16
7391b = 1, 2, 15, 16
7392c = 10, 10, 10, 12
7393validate 20, 20
7394
7395aarch64 = uabal
7396generate uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
7397
7398////////////////////
7399// Signed Absolute difference and Accumulate Long
7400////////////////////
7401
7402/// Signed Absolute difference and Accumulate Long
7403name = vabal
7404multi_fn = vabd-signed-noext, b, c, d:int8x8_t
7405multi_fn = simd_cast, e:uint8x8_t, d
7406multi_fn = simd_add, a, {simd_cast, e}
7407a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7408b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7409c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7410validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7411
7412arm = vabal.s
7413aarch64 = sabal
7414generate int16x8_t:int8x8_t:int8x8_t:int16x8_t
7415
7416/// Signed Absolute difference and Accumulate Long
7417name = vabal
7418multi_fn = vabd-signed-noext, b, c, d:int16x4_t
7419multi_fn = simd_cast, e:uint16x4_t, d
7420multi_fn = simd_add, a, {simd_cast, e}
7421a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7422b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7423c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7424validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7425
7426arm = vabal.s
7427aarch64 = sabal
7428generate int32x4_t:int16x4_t:int16x4_t:int32x4_t
7429
7430/// Signed Absolute difference and Accumulate Long
7431name = vabal
7432multi_fn = vabd-signed-noext, b, c, d:int32x2_t
7433multi_fn = simd_cast, e:uint32x2_t, d
7434multi_fn = simd_add, a, {simd_cast, e}
7435a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7436b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7437c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7438validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7439
7440arm = vabal.s
7441aarch64 = sabal
7442generate int64x2_t:int32x2_t:int32x2_t:int64x2_t
7443
7444/// Signed Absolute difference and Accumulate Long
7445name = vabal_high
7446no-q
7447multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
7448multi_fn = simd_shuffle8!, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
7449multi_fn = vabd_s8, d, e, f:int8x8_t
7450multi_fn = simd_cast, f:uint8x8_t, f
7451multi_fn = simd_add, a, {simd_cast, f}
7452a = 9, 10, 11, 12, 13, 14, 15, 16
7453b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7454c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7455validate 20, 20, 20, 20, 20, 20, 20, 20
7456
7457aarch64 = sabal
7458generate int16x8_t:int8x16_t:int8x16_t:int16x8_t
7459
7460/// Signed Absolute difference and Accumulate Long
7461name = vabal_high
7462no-q
7463multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7]
7464multi_fn = simd_shuffle4!, e:int16x4_t, c, c, [4, 5, 6, 7]
7465multi_fn = vabd_s16, d, e, f:int16x4_t
7466multi_fn = simd_cast, f:uint16x4_t, f
7467multi_fn = simd_add, a, {simd_cast, f}
7468a = 9, 10, 11, 12
7469b = 1, 2, 3, 4, 9, 10, 11, 12
7470c = 10, 10, 10, 10, 20, 0, 2, 4
7471validate 20, 20, 20, 20
7472
7473aarch64 = sabal
7474generate int32x4_t:int16x8_t:int16x8_t:int32x4_t
7475
7476/// Signed Absolute difference and Accumulate Long
7477name = vabal_high
7478no-q
7479multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3]
7480multi_fn = simd_shuffle2!, e:int32x2_t, c, c, [2, 3]
7481multi_fn = vabd_s32, d, e, f:int32x2_t
7482multi_fn = simd_cast, f:uint32x2_t, f
7483multi_fn = simd_add, a, {simd_cast, f}
7484a = 15, 16
7485b = 1, 2, 15, 16
7486c = 10, 10, 10, 12
7487validate 20, 20
7488
7489aarch64 = sabal
7490generate int64x2_t:int32x4_t:int32x4_t:int64x2_t
7491
7492////////////////////
7493// Singned saturating Absolute value
7494////////////////////
7495
7496/// Singned saturating Absolute value
7497name = vqabs
7498a = MIN, MAX, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5
7499validate MAX, MAX, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5
7500
7501arm = vqabs.s
7502aarch64 = sqabs
7503link-arm = vqabs._EXT_
7504link-aarch64 = sqabs._EXT_
7505generate int*_t
7506
7507/// Singned saturating Absolute value
7508name = vqabs
7509a = MIN, -7
7510validate MAX, 7
7511
7512aarch64 = sqabs
7513link-aarch64 = sqabs._EXT_
7514generate int64x*_t
3c0e092e
XL
7515
7516/// Signed saturating absolute value
7517name = vqabs
7518multi_fn = simd_extract, {vqabs-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
7519a = -7
7520validate 7
7521
7522aarch64 = sqabs
7523generate i8:i8, i16:i16
7524
7525/// Signed saturating absolute value
7526name = vqabs
7527a = -7
7528validate 7
7529
7530aarch64 = sqabs
7531link-aarch64 = sqabs._EXT_
7532generate i32:i32, i64:i64
7533
7534/// Shift left and insert
7535name = vsli
7536n-suffix
7537constn = N
7538multi_fn = static_assert-N-0-63
7539multi_fn = transmute, {vsli_n-in_ntt-::<N>, transmute(a), transmute(b)}
7540a = 333
7541b = 2042
7542n = 2
7543validate 8169
7544
7545aarch64 = sli
7546generate i64, u64
7547
7548/// Shift right and insert
7549name = vsri
7550n-suffix
7551constn = N
7552multi_fn = static_assert-N-1-bits
7553multi_fn = transmute, {vsri_n-in_ntt-::<N>, transmute(a), transmute(b)}
7554a = 333
7555b = 2042
7556n = 2
7557validate 510
7558
7559aarch64 = sri
a2a8927a 7560generate i64, u64