]> git.proxmox.com Git - rustc.git/blob - library/stdarch/crates/stdarch-gen/neon.spec
760fa2204176b48f444272f0253a1533075eaa00
[rustc.git] / library / stdarch / crates / stdarch-gen / neon.spec
1 // ARM Neon intrinsic specification.
2 //
3 // This file contains the specification for a number of
4 // intrinsics that allows us to generate them along with
5 // their test cases.
6 //
7 // To the syntax of the file - it's not very intelligently parsed!
8 //
9 // # Comments
10 // start with AT LEAST two, or four or more slashes so // is a
11 // comment /////// is too.
12 //
13 // # Sections
14 // Sections start with EXACTLY three slashes followed
15 // by AT LEAST one space. Sections are used for two things:
16 //
17 // 1) they serve as the doc comment for the given intrinsics.
18 // 2) they reset all variables (name, fn, etc.)
19 //
20 // # Variables
21 //
22 // name - The prefix of the function, suffixes are auto
23 // generated by the type they get passed.
24 //
25 // fn - The function to call in rust-land.
26 //
27 // aarch64 - The intrinsic to check on aarch64 architecture.
28 // If this is given but no arm intrinsic is provided,
29 // the function will exclusively be generated for
30 // aarch64.
31 // This is used to generate both aarch64 specific and
32 // shared intrinsics by first only specifying th aarch64
33 // variant then the arm variant.
34 //
35 // arm - The arm v7 intrinsics used to checked for arm code
36 // generation. All neon functions available in arm are
37 // also available in aarch64. If no aarch64 intrinsic was
38 // set they are assumed to be the same.
39 // Intrinsics ending with a `.` will have a size suffixes
40 // added (such as `i8` or `i64`) that is not sign specific
41 // Intrinsics ending with a `.s` will have a size suffixes
42 // added (such as `s8` or `u64`) that is sign specific
43 //
44 // a - First input for tests, it gets scaled to the size of
45 // the type.
46 //
47 // b - Second input for tests, it gets scaled to the size of
48 // the type.
49 //
50 // # special values
51 //
52 // TRUE - 'true' all bits are set to 1
53 // FALSE - 'false' all bits are set to 0
54 // FF - same as 'true'
55 // MIN - minimal value (either 0 or the lowest negative number)
56 // MAX - maximal value proper to overflow
57 //
58 // # validate <values>
59 // Validates a and b against the expected result of the test.
60 // The special values 'TRUE' and 'FALSE' can be used to
61 // represent the correct NEON representation of true or
62 // false values. It too gets scaled to the type.
63 //
64 // Validate needs to be called before generate as it sets
65 // up the rules for validation that get generated for each
66 // type.
67 // # generate <types>
68 // The generate command generates the intrinsics, it uses the
69 // Variables set and can be called multiple times while overwriting
70 // some of the variables.
71
72 /// Vector bitwise and
73 name = vand
74 fn = simd_and
75 arm = vand
76 aarch64 = and
77 a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
78 b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F
79 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
80 b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
81 validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
82 generate int*_t, uint*_t, int64x*_t, uint64x*_t
83
84 /// Vector bitwise or (immediate, inclusive)
85 name = vorr
86 fn = simd_or
87 arm = vorr
88 aarch64 = orr
89 a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
90 b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
91 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
92 generate int*_t, uint*_t, int64x*_t, uint64x*_t
93
94
95 /// Vector bitwise exclusive or (vector)
96 name = veor
97 fn = simd_xor
98 arm = veor
99 aarch64 = eor
100 a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
101 b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
102 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
103 generate int*_t, uint*_t, int64x*_t, uint64x*_t
104
105 /// Three-way exclusive OR
106 name = veor3
107 a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
108 b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
109 c = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
110 validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
111 target = sha3
112
113 aarch64 = eor3
114 link-aarch64 = llvm.aarch64.crypto.eor3s._EXT_
115 generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
116 link-aarch64 = llvm.aarch64.crypto.eor3u._EXT_
117 generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
118
119 ////////////////////
120 // Absolute difference between the arguments
121 ////////////////////
122
123 /// Absolute difference between the arguments
124 name = vabd
125 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
126 b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
127 validate 15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15
128
129 arm = vabd.s
130 aarch64 = sabd
131 link-arm = vabds._EXT_
132 link-aarch64 = sabd._EXT_
133 generate int*_t
134
135 arm = vabd.s
136 aarch64 = uabd
137 link-arm = vabdu._EXT_
138 link-aarch64 = uabd._EXT_
139 generate uint*_t
140
141 /// Absolute difference between the arguments of Floating
142 name = vabd
143 a = 1.0, 2.0, 5.0, -4.0
144 b = 9.0, 3.0, 2.0, 8.0
145 validate 8.0, 1.0, 3.0, 12.0
146
147 aarch64 = fabd
148 link-aarch64 = fabd._EXT_
149 generate float64x*_t
150
151 arm = vabd.s
152 aarch64 = fabd
153 link-arm = vabds._EXT_
154 link-aarch64 = fabd._EXT_
155 generate float*_t
156
157 /// Floating-point absolute difference
158 name = vabd
159 multi_fn = simd_extract, {vabd-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
160 a = 1.0
161 b = 9.0
162 validate 8.0
163
164 aarch64 = fabd
165 generate f32, f64
166
167 ////////////////////
168 // Absolute difference Long
169 ////////////////////
170
171 /// Unsigned Absolute difference Long
172 name = vabdl
173 multi_fn = simd_cast, {vabd-unsigned-noext, a, b}
174 a = 1, 2, 3, 4, 4, 3, 2, 1
175 b = 10, 10, 10, 10, 10, 10, 10, 10
176 validate 9, 8, 7, 6, 6, 7, 8, 9
177
178 arm = vabdl.s
179 aarch64 = uabdl
180 generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
181
182 /// Signed Absolute difference Long
183 name = vabdl
184 multi_fn = simd_cast, c:uint8x8_t, {vabd-signed-noext, a, b}
185 multi_fn = simd_cast, c
186 a = 1, 2, 3, 4, 4, 3, 2, 1
187 b = 10, 10, 10, 10, 10, 10, 10, 10
188 validate 9, 8, 7, 6, 6, 7, 8, 9
189
190 arm = vabdl.s
191 aarch64 = sabdl
192 generate int8x8_t:int8x8_t:int16x8_t
193
194 /// Signed Absolute difference Long
195 name = vabdl
196 multi_fn = simd_cast, c:uint16x4_t, {vabd-signed-noext, a, b}
197 multi_fn = simd_cast, c
198 a = 1, 2, 11, 12
199 b = 10, 10, 10, 10
200 validate 9, 8, 1, 2
201
202 arm = vabdl.s
203 aarch64 = sabdl
204 generate int16x4_t:int16x4_t:int32x4_t
205
206 /// Signed Absolute difference Long
207 name = vabdl
208 multi_fn = simd_cast, c:uint32x2_t, {vabd-signed-noext, a, b}
209 multi_fn = simd_cast, c
210 a = 1, 11
211 b = 10, 10
212 validate 9, 1
213
214 arm = vabdl.s
215 aarch64 = sabdl
216 generate int32x2_t:int32x2_t:int64x2_t
217
218 /// Unsigned Absolute difference Long
219 name = vabdl_high
220 no-q
221 multi_fn = simd_shuffle!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
222 multi_fn = simd_shuffle!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
223 multi_fn = simd_cast, {vabd_u8, c, d}
224 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
225 b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
226 validate 1, 0, 1, 2, 3, 4, 5, 6
227
228 aarch64 = uabdl
229 generate uint8x16_t:uint8x16_t:uint16x8_t
230
231 /// Unsigned Absolute difference Long
232 name = vabdl_high
233 no-q
234 multi_fn = simd_shuffle!, c:uint16x4_t, a, a, [4, 5, 6, 7]
235 multi_fn = simd_shuffle!, d:uint16x4_t, b, b, [4, 5, 6, 7]
236 multi_fn = simd_cast, {vabd_u16, c, d}
237 a = 1, 2, 3, 4, 8, 9, 11, 12
238 b = 10, 10, 10, 10, 10, 10, 10, 10
239 validate 2, 1, 1, 2
240
241 aarch64 = uabdl
242 generate uint16x8_t:uint16x8_t:uint32x4_t
243
244 /// Unsigned Absolute difference Long
245 name = vabdl_high
246 no-q
247 multi_fn = simd_shuffle!, c:uint32x2_t, a, a, [2, 3]
248 multi_fn = simd_shuffle!, d:uint32x2_t, b, b, [2, 3]
249 multi_fn = simd_cast, {vabd_u32, c, d}
250 a = 1, 2, 3, 4
251 b = 10, 10, 10, 10
252 validate 7, 6
253
254 aarch64 = uabdl
255 generate uint32x4_t:uint32x4_t:uint64x2_t
256
257 /// Signed Absolute difference Long
258 name = vabdl_high
259 no-q
260 multi_fn = simd_shuffle!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
261 multi_fn = simd_shuffle!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
262 multi_fn = simd_cast, e:uint8x8_t, {vabd_s8, c, d}
263 multi_fn = simd_cast, e
264 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
265 b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
266 validate 1, 0, 1, 2, 3, 4, 5, 6
267
268 aarch64 = sabdl
269 generate int8x16_t:int8x16_t:int16x8_t
270
271 /// Signed Absolute difference Long
272 name = vabdl_high
273 no-q
274 multi_fn = simd_shuffle!, c:int16x4_t, a, a, [4, 5, 6, 7]
275 multi_fn = simd_shuffle!, d:int16x4_t, b, b, [4, 5, 6, 7]
276 multi_fn = simd_cast, e:uint16x4_t, {vabd_s16, c, d}
277 multi_fn = simd_cast, e
278 a = 1, 2, 3, 4, 9, 10, 11, 12
279 b = 10, 10, 10, 10, 10, 10, 10, 10
280 validate 1, 0, 1, 2
281
282 aarch64 = sabdl
283 generate int16x8_t:int16x8_t:int32x4_t
284
285 /// Signed Absolute difference Long
286 name = vabdl_high
287 no-q
288 multi_fn = simd_shuffle!, c:int32x2_t, a, a, [2, 3]
289 multi_fn = simd_shuffle!, d:int32x2_t, b, b, [2, 3]
290 multi_fn = simd_cast, e:uint32x2_t, {vabd_s32, c, d}
291 multi_fn = simd_cast, e
292 a = 1, 2, 3, 4
293 b = 10, 10, 10, 10
294 validate 7, 6
295
296 aarch64 = sabdl
297 generate int32x4_t:int32x4_t:int64x2_t
298
299 ////////////////////
300 // equality
301 ////////////////////
302
303 /// Compare bitwise Equal (vector)
304 name = vceq
305 fn = simd_eq
306 a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
307 b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
308 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
309 a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX
310 b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN
311 validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
312
313 aarch64 = cmeq
314 generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
315
316 arm = vceq.
317 generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t
318
319 /// Floating-point compare equal
320 name = vceq
321 fn = simd_eq
322 a = 1.2, 3.4, 5.6, 7.8
323 b = 1.2, 3.4, 5.6, 7.8
324 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
325
326 aarch64 = fcmeq
327 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
328
329 arm = vceq.
330 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
331
332 /// Compare bitwise equal
333 name = vceq
334 multi_fn = transmute, {vceq-in_ntt-noext, {transmute, a}, {transmute, b}}
335 a = 1
336 b = 2
337 validate 0
338
339 aarch64 = cmp
340 generate i64:u64, u64
341
342 /// Floating-point compare equal
343 name = vceq
344 multi_fn = simd_extract, {vceq-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
345 a = 1.
346 b = 2.
347 validate 0
348
349 aarch64 = fcmp
350 generate f32:u32, f64:u64
351
352 /// Signed compare bitwise equal to zero
353 name = vceqz
354 fn = simd_eq
355 a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
356 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
357 validate FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
358
359 aarch64 = cmeq
360 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
361
362 /// Unsigned compare bitwise equal to zero
363 name = vceqz
364 fn = simd_eq
365 a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
366 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
367 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
368
369 aarch64 = cmeq
370 generate uint*_t, uint64x*_t
371
372 /// Floating-point compare bitwise equal to zero
373 name = vceqz
374 fn = simd_eq
375 a = 0.0, 1.2, 3.4, 5.6
376 fixed = 0.0, 0.0, 0.0, 0.0
377 validate TRUE, FALSE, FALSE, FALSE
378
379 aarch64 = fcmeq
380 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
381
382 /// Compare bitwise equal to zero
383 name = vceqz
384 multi_fn = transmute, {vceqz-in_ntt-noext, {transmute, a}}
385 a = 1
386 validate 0
387
388 aarch64 = cmp
389 generate i64:u64, u64
390
391 /// Floating-point compare bitwise equal to zero
392 name = vceqz
393 multi_fn = simd_extract, {vceqz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
394 a = 1.
395 validate 0
396
397 aarch64 = fcmp
398 generate f32:u32, f64:u64
399
400 /// Signed compare bitwise Test bits nonzero
401 name = vtst
402 multi_fn = simd_and, c:in_t, a, b
403 multi_fn = fixed, d:in_t
404 multi_fn = simd_ne, c, transmute(d)
405 a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
406 b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
407 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
408 validate TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
409
410 aarch64 = cmtst
411 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
412
413 arm = vtst
414 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly16x4_t:uint16x4_t, poly16x8_t:uint16x8_t
415
416 /// Unsigned compare bitwise Test bits nonzero
417 name = vtst
418 multi_fn = simd_and, c:in_t, a, b
419 multi_fn = fixed, d:in_t
420 multi_fn = simd_ne, c, transmute(d)
421 a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
422 b = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
423 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
424 validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
425
426 aarch64 = cmtst
427 generate uint64x*_t
428
429 arm = vtst
430 generate uint*_t
431
432 /// Compare bitwise test bits nonzero
433 name = vtst
434 multi_fn = transmute, {vtst-in_ntt-noext, {transmute, a}, {transmute, b}}
435 a = 0
436 b = 0
437 validate 0
438
439 aarch64 = tst
440 generate i64:i64:u64, u64
441
442 /// Signed saturating accumulate of unsigned value
443 name = vuqadd
444 out-suffix
445 a = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
446 b = 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
447 validate 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8
448
449 aarch64 = suqadd
450 link-aarch64 = suqadd._EXT_
451 generate i32:u32:i32, i64:u64:i64
452
453 /// Signed saturating accumulate of unsigned value
454 name = vuqadd
455 out-suffix
456 multi_fn = simd_extract, {vuqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
457 a = 1
458 b = 2
459 validate 3
460
461 aarch64 = suqadd
462 generate i8:u8:i8, i16:u16:i16
463
464 ////////////////////
465 // Floating-point absolute value
466 ////////////////////
467
468 /// Floating-point absolute value
469 name = vabs
470 fn = simd_fabs
471 a = -0.1, -2.2, -3.3, -6.6
472 validate 0.1, 2.2, 3.3, 6.6
473 aarch64 = fabs
474 generate float64x1_t:float64x1_t, float64x2_t:float64x2_t
475
476 arm = vabs
477 generate float32x2_t:float32x2_t, float32x4_t:float32x4_t
478
479 ////////////////////
480 // greater then
481 ////////////////////
482
483 /// Compare signed greater than
484 name = vcgt
485 fn = simd_gt
486 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
487 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
488 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
489 aarch64 = cmgt
490 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
491
492 arm = vcgt.s
493 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
494
495 /// Compare unsigned greater than
496 name = vcgt
497 fn = simd_gt
498 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
499 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
500 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
501
502 aarch64 = cmhi
503 generate uint64x*_t
504
505 arm = vcgt.s
506 generate uint*_t
507
508 /// Floating-point compare greater than
509 name = vcgt
510 fn = simd_gt
511 a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
512 b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
513 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
514
515 aarch64 = fcmgt
516 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
517
518 arm = vcgt.s
519 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
520
521 /// Compare greater than
522 name = vcgt
523 multi_fn = transmute, {vcgt-in_ntt-noext, {transmute, a}, {transmute, b}}
524 a = 1
525 b = 2
526 validate 0
527
528 aarch64 = cmp
529 generate i64:u64, u64
530
531 /// Floating-point compare greater than
532 name = vcgt
533 multi_fn = simd_extract, {vcgt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
534 a = 1.
535 b = 2.
536 validate 0
537
538 aarch64 = fcmp
539 generate f32:u32, f64:u64
540
541 ////////////////////
542 // lesser then
543 ////////////////////
544
545 /// Compare signed less than
546 name = vclt
547 fn = simd_lt
548 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
549 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
550 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
551 aarch64 = cmgt
552 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
553
554 arm = vcgt.s
555 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
556
557 /// Compare unsigned less than
558 name = vclt
559 fn = simd_lt
560 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
561 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
562 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
563
564 aarch64 = cmhi
565 generate uint64x*_t
566
567 arm = vcgt.s
568 generate uint*_t
569
570 /// Floating-point compare less than
571 name = vclt
572 fn = simd_lt
573 a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
574 b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
575 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
576
577 aarch64 = fcmgt
578 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
579
580 arm = vcgt.s
581 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
582
583 /// Compare less than
584 name = vclt
585 multi_fn = transmute, {vclt-in_ntt-noext, {transmute, a}, {transmute, b}}
586 a = 2
587 b = 1
588 validate 0
589
590 aarch64 = cmp
591 generate i64:u64, u64
592
593 /// Floating-point compare less than
594 name = vclt
595 multi_fn = simd_extract, {vclt-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
596 a = 2.
597 b = 1.
598 validate 0
599
600 aarch64 = fcmp
601 generate f32:u32, f64:u64
602
603 ////////////////////
604 // lesser then equals
605 ////////////////////
606
607 /// Compare signed less than or equal
608 name = vcle
609 fn = simd_le
610 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
611 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
612 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
613
614 aarch64 = cmge
615 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
616
617 arm = vcge.s
618 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
619
620 /// Compare greater than or equal
621 name = vcge
622 multi_fn = transmute, {vcge-in_ntt-noext, {transmute, a}, {transmute, b}}
623 a = 1
624 b = 2
625 validate 0
626
627 aarch64 = cmp
628 generate i64:u64, u64
629
630 /// Floating-point compare greater than or equal
631 name = vcge
632 multi_fn = simd_extract, {vcge-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
633 a = 1.
634 b = 2.
635 validate 0
636
637 aarch64 = fcmp
638 generate f32:u32, f64:u64
639
640 /// Compare unsigned less than or equal
641 name = vcle
642 fn = simd_le
643 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
644 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
645 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
646
647 aarch64 = cmhs
648 generate uint64x*_t
649
650 arm = vcge.s
651 generate uint*_t
652
653 /// Floating-point compare less than or equal
654 name = vcle
655 fn = simd_le
656 a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
657 b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
658 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
659 aarch64 = fcmge
660 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
661
662 arm = vcge.s
663 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
664
665 /// Compare less than or equal
666 name = vcle
667 multi_fn = transmute, {vcle-in_ntt-noext, {transmute, a}, {transmute, b}}
668 a = 2
669 b = 1
670 validate 0
671
672 aarch64 = cmp
673 generate i64:u64, u64
674
675 /// Floating-point compare less than or equal
676 name = vcle
677 multi_fn = simd_extract, {vcle-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
678 a = 2.
679 b = 1.
680 validate 0
681
682 aarch64 = fcmp
683 generate f32:u32, f64:u64
684
685 ////////////////////
686 // greater then equals
687 ////////////////////
688
689 /// Compare signed greater than or equal
690 name = vcge
691 fn = simd_ge
692 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
693 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
694 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
695
696 aarch64 = cmge
697 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
698
699 arm = vcge.s
700 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
701
702 /// Compare unsigned greater than or equal
703 name = vcge
704 fn = simd_ge
705 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
706 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
707 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
708
709 aarch64 = cmhs
710 generate uint64x*_t
711
712 arm = vcge.s
713 generate uint*_t
714
715 /// Floating-point compare greater than or equal
716 name = vcge
717 fn = simd_ge
718 a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
719 b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
720 validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
721
722 aarch64 = fcmge
723 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
724
725 arm = vcge.s
726 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
727
728 /// Compare signed greater than or equal to zero
729 name = vcgez
730 fn = simd_ge
731 a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
732 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
733 validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
734
735 aarch64 = cmgt
736 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
737
738 /// Floating-point compare greater than or equal to zero
739 name = vcgez
740 fn = simd_ge
741 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
742 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
743 validate FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
744
745 aarch64 = fcmge
746 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
747
748 /// Compare signed greater than or equal to zero
749 name = vcgez
750 multi_fn = transmute, {vcgez-in_ntt-noext, {transmute, a}}
751 a = -1
752 validate 0
753
754 aarch64 = nop
755 generate i64:u64
756
757 /// Floating-point compare greater than or equal to zero
758 name = vcgez
759 multi_fn = simd_extract, {vcgez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
760 a = -1.
761 validate 0
762
763 aarch64 = fcmp
764 generate f32:u32, f64:u64
765
766 /// Compare signed greater than zero
767 name = vcgtz
768 fn = simd_gt
769 a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
770 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
771 validate FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
772
773 aarch64 = cmgt
774 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
775
776 /// Floating-point compare greater than zero
777 name = vcgtz
778 fn = simd_gt
779 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
780 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
781 validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
782
783 aarch64 = fcmgt
784 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
785
786 /// Compare signed greater than zero
787 name = vcgtz
788 multi_fn = transmute, {vcgtz-in_ntt-noext, {transmute, a}}
789 a = -1
790 validate 0
791
792 aarch64 = cmp
793 generate i64:u64
794
795 /// Floating-point compare greater than zero
796 name = vcgtz
797 multi_fn = simd_extract, {vcgtz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
798 a = -1.
799 validate 0
800
801 aarch64 = fcmp
802 generate f32:u32, f64:u64
803
804 /// Compare signed less than or equal to zero
805 name = vclez
806 fn = simd_le
807 a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
808 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
809 validate TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
810
811 aarch64 = cmgt
812 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
813
814 /// Floating-point compare less than or equal to zero
815 name = vclez
816 fn = simd_le
817 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
818 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
819 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
820
821 aarch64 = fcmle
822 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
823
824 /// Compare less than or equal to zero
825 name = vclez
826 multi_fn = transmute, {vclez-in_ntt-noext, {transmute, a}}
827 a = 2
828 validate 0
829
830 aarch64 = cmp
831 generate i64:u64
832
833 /// Floating-point compare less than or equal to zero
834 name = vclez
835 multi_fn = simd_extract, {vclez-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
836 a = 2.
837 validate 0
838
839 aarch64 = fcmp
840 generate f32:u32, f64:u64
841
842 /// Compare signed less than zero
843 name = vcltz
844 fn = simd_lt
845 a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
846 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
847 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
848
849 aarch64 = cmlt
850 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
851
852 /// Floating-point compare less than zero
853 name = vcltz
854 fn = simd_lt
855 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
856 fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
857 validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
858
859 aarch64 = fcmlt
860 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
861
862 /// Compare less than zero
863 name = vcltz
864 multi_fn = transmute, {vcltz-in_ntt-noext, {transmute, a}}
865 a = 2
866 validate 0
867
868 aarch64 = asr
869 generate i64:u64
870
871 /// Floating-point compare less than zero
872 name = vcltz
873 multi_fn = simd_extract, {vcltz-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
874 a = 2.
875 validate 0
876
877 aarch64 = fcmp
878 generate f32:u32, f64:u64
879
880 /// Count leading sign bits
881 name = vcls
882 a = MIN, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
883 validate 0, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
884
885 arm = vcls.s
886 aarch64 = cls
887 link-arm = vcls._EXT_
888 link-aarch64 = cls._EXT_
889 generate int*_t
890
891 /// Count leading sign bits
892 name = vcls
893 multi_fn = transmute, {vcls-signed-noext, {transmute, a}}
894 a = MIN, MAX, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
895 validate BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1
896
897 arm = vcls
898 aarch64 = cls
899 generate uint8x8_t:int8x8_t, uint8x16_t:int8x16_t, uint16x4_t:int16x4_t, uint16x8_t:int16x8_t, uint32x2_t:int32x2_t, uint32x4_t:int32x4_t
900
901 /// Count leading zero bits
902 name = vclz
903 multi_fn = self-signed-ext, a
904 a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
905 validate 0, 0, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 1
906
907 arm = vclz.
908 aarch64 = clz
909 generate int*_t
910
911 /// Count leading zero bits
912 name = vclz
913 multi_fn = transmute, {self-signed-ext, transmute(a)}
914 a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
915 validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
916
917 arm = vclz.
918 aarch64 = clz
919 generate uint*_t
920
921 /// Floating-point absolute compare greater than
922 name = vcagt
923 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
924 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
925 validate !0, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
926
927 aarch64 = facgt
928 link-aarch64 = facgt._EXT2_._EXT_
929 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
930
931 arm = vacgt.s
932 link-arm = vacgt._EXT2_._EXT_
933 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
934
935 /// Floating-point absolute compare greater than or equal
936 name = vcage
937 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
938 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
939 validate !0, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
940
941 aarch64 = facge
942 link-aarch64 = facge._EXT2_._EXT_
943 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
944
945 arm = vacge.s
946 link-arm = vacge._EXT2_._EXT_
947 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
948
949 /// Floating-point absolute compare less than
950 name = vcalt
951 multi_fn = vcagt-self-noext, b, a
952 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
953 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
954 validate 0, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
955
956 aarch64 = facgt
957 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
958
959 arm = vacgt.s
960 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
961
962 /// Floating-point absolute compare less than or equal
963 name = vcale
964 multi_fn = vcage-self-noext , b, a
965 a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
966 b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
967 validate 0, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
968
969 aarch64 = facge
970 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
971
972 arm = vacge.s
973 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
974
975 /// Insert vector element from another vector element
976 name = vcopy
977 lane-suffixes
978 constn = LANE1:LANE2
979 multi_fn = static_assert_imm-in0_exp_len-LANE1
980 multi_fn = static_assert_imm-in_exp_len-LANE2
981 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
982 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
983 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
984 n = 0:1
985 validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
986
987 aarch64 = mov
988 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x2_t
989 generate uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x2_t
990 generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
991
992 /// Insert vector element from another vector element
993 name = vcopy
994 lane-suffixes
995 constn = LANE1:LANE2
996 multi_fn = static_assert_imm-in0_exp_len-LANE1
997 multi_fn = static_assert_imm-in_exp_len-LANE2
998 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
999 a = 1., 2., 3., 4.
1000 b = 0., 0.5, 0., 0.
1001 n = 0:1
1002 validate 0.5, 2., 3., 4.
1003
1004 aarch64 = mov
1005 generate float32x2_t, float32x4_t, float64x2_t
1006
1007 /// Insert vector element from another vector element
1008 name = vcopy
1009 lane-suffixes
1010 constn = LANE1:LANE2
1011 multi_fn = static_assert_imm-in0_exp_len-LANE1
1012 multi_fn = static_assert_imm-in_exp_len-LANE2
1013 multi_fn = simd_shuffle!, a:in_t, a, a, {asc-0-in_len}
1014 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in_len-LANE2}
1015 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1016 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1017 n = 0:1
1018 validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1019
1020 aarch64 = mov
1021 generate int8x8_t:int8x16_t:int8x8_t, int16x4_t:int16x8_t:int16x4_t, int32x2_t:int32x4_t:int32x2_t
1022 generate uint8x8_t:uint8x16_t:uint8x8_t, uint16x4_t:uint16x8_t:uint16x4_t, uint32x2_t:uint32x4_t:uint32x2_t
1023 generate poly8x8_t:poly8x16_t:poly8x8_t, poly16x4_t:poly16x8_t:poly16x4_t
1024
1025 /// Insert vector element from another vector element
1026 name = vcopy
1027 lane-suffixes
1028 constn = LANE1:LANE2
1029 multi_fn = static_assert_imm-in0_exp_len-LANE1
1030 multi_fn = static_assert_imm-in_exp_len-LANE2
1031 multi_fn = simd_shuffle!, a:in_t, a, a, {asc-0-in_len}
1032 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in_len-LANE2}
1033 a = 1., 2., 3., 4.
1034 b = 0., 0.5, 0., 0.
1035 n = 0:1
1036 validate 0.5, 2., 3., 4.
1037
1038 aarch64 = mov
1039 generate float32x2_t:float32x4_t:float32x2_t
1040
1041 /// Insert vector element from another vector element
1042 name = vcopy
1043 lane-suffixes
1044 constn = LANE1:LANE2
1045 multi_fn = static_assert_imm-in0_exp_len-LANE1
1046 multi_fn = static_assert_imm-in_exp_len-LANE2
1047 multi_fn = simd_shuffle!, b:in_t0, b, b, {asc-0-in0_len}
1048 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
1049 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1050 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1051 n = 0:1
1052 validate MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1053
1054 aarch64 = mov
1055 generate int8x16_t:int8x8_t:int8x16_t, int16x8_t:int16x4_t:int16x8_t, int32x4_t:int32x2_t:int32x4_t
1056 generate uint8x16_t:uint8x8_t:uint8x16_t, uint16x8_t:uint16x4_t:uint16x8_t, uint32x4_t:uint32x2_t:uint32x4_t
1057 generate poly8x16_t:poly8x8_t:poly8x16_t, poly16x8_t:poly16x4_t:poly16x8_t
1058
1059 /// Insert vector element from another vector element
1060 name = vcopy
1061 lane-suffixes
1062 constn = LANE1:LANE2
1063 multi_fn = static_assert_imm-in0_exp_len-LANE1
1064 multi_fn = static_assert_imm-in_exp_len-LANE2
1065 multi_fn = simd_shuffle!, b:in_t0, b, b, {asc-0-in0_len}
1066 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
1067 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1068 b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1069 n = 1:0
1070 validate 1, MAX
1071
1072 aarch64 = mov
1073 generate int64x2_t:int64x1_t:int64x2_t, uint64x2_t:uint64x1_t:uint64x2_t, poly64x2_t:poly64x1_t:poly64x2_t
1074
1075 /// Insert vector element from another vector element
1076 name = vcopy
1077 lane-suffixes
1078 constn = LANE1:LANE2
1079 multi_fn = static_assert_imm-in0_exp_len-LANE1
1080 multi_fn = static_assert_imm-in_exp_len-LANE2
1081 multi_fn = simd_shuffle!, b:in_t0, b, b, {asc-0-in0_len}
1082 multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle!, a, b, {ins-in0_len-in0_len-LANE2}
1083 a = 1., 2., 3., 4.
1084 b = 0.5, 0., 0., 0.
1085 n = 1:0
1086 validate 1., 0.5, 3., 4.
1087
1088 aarch64 = mov
1089 generate float32x4_t:float32x2_t:float32x4_t
1090 aarch64 = mov
1091 generate float64x2_t:float64x1_t:float64x2_t
1092
1093 /// Insert vector element from another vector element
1094 name = vcreate
1095 out-suffix
1096 multi_fn = transmute, a
1097 a = 1
1098 validate 1, 0, 0, 0, 0, 0, 0, 0
1099
1100 aarch64 = nop
1101 arm = nop
1102 generate u64:int8x8_t, u64:int16x4_t, u64:int32x2_t, u64:int64x1_t
1103 generate u64:uint8x8_t, u64:uint16x4_t, u64:uint32x2_t, u64:uint64x1_t
1104 generate u64:poly8x8_t, u64:poly16x4_t
1105 target = aes
1106 generate u64:poly64x1_t
1107
1108 /// Insert vector element from another vector element
1109 name = vcreate
1110 out-suffix
1111 multi_fn = transmute, a
1112 a = 0
1113 validate 0., 0.
1114
1115 aarch64 = nop
1116 generate u64:float64x1_t
1117 arm = nop
1118 generate u64:float32x2_t
1119
1120 /// Fixed-point convert to floating-point
1121 name = vcvt
1122 double-suffixes
1123 fn = simd_cast
1124 a = 1, 2, 3, 4
1125 validate 1., 2., 3., 4.
1126
1127 aarch64 = scvtf
1128 generate int64x1_t:float64x1_t, int64x2_t:float64x2_t
1129 aarch64 = ucvtf
1130 generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t
1131
1132 arm = vcvt
1133 aarch64 = scvtf
1134 generate int32x2_t:float32x2_t, int32x4_t:float32x4_t
1135 aarch64 = ucvtf
1136 generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t
1137
1138 /// Floating-point convert to higher precision long
1139 name = vcvt
1140 double-suffixes
1141 fn = simd_cast
1142 a = -1.2, 1.2
1143 validate -1.2f32 as f64, 1.2f32 as f64
1144
1145 aarch64 = fcvtl
1146 generate float32x2_t:float64x2_t
1147
1148 /// Floating-point convert to higher precision long
1149 name = vcvt_high
1150 noq-double-suffixes
1151 multi_fn = simd_shuffle!, b:float32x2_t, a, a, [2, 3]
1152 multi_fn = simd_cast, b
1153 a = -1.2, 1.2, 2.3, 3.4
1154 validate 2.3f32 as f64, 3.4f32 as f64
1155
1156 aarch64 = fcvtl
1157 generate float32x4_t:float64x2_t
1158
1159 /// Floating-point convert to lower precision narrow
1160 name = vcvt
1161 double-suffixes
1162 fn = simd_cast
1163 a = -1.2, 1.2
1164 validate -1.2f64 as f32, 1.2f64 as f32
1165
1166 aarch64 = fcvtn
1167 generate float64x2_t:float32x2_t
1168
1169 /// Floating-point convert to lower precision narrow
1170 name = vcvt_high
1171 noq-double-suffixes
1172 multi_fn = simd_shuffle!, a, {simd_cast, b}, [0, 1, 2, 3]
1173 a = -1.2, 1.2
1174 b = -2.3, 3.4
1175 validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32
1176
1177 aarch64 = fcvtn
1178 generate float32x2_t:float64x2_t:float32x4_t
1179
1180 /// Floating-point convert to lower precision narrow, rounding to odd
1181 name = vcvtx
1182 double-suffixes
1183 a = -1.0, 2.0
1184 validate -1.0, 2.0
1185
1186 aarch64 = fcvtxn
1187 link-aarch64 = fcvtxn._EXT2_._EXT_
1188 generate float64x2_t:float32x2_t
1189
1190 /// Floating-point convert to lower precision narrow, rounding to odd
1191 name = vcvtx
1192 double-suffixes
1193 multi_fn = simd_extract, {vcvtx-_f32_f64-noext, {vdupq_n-in_ntt-noext, a}}, 0
1194 a = -1.0
1195 validate -1.0
1196
1197 aarch64 = fcvtxn
1198 generate f64:f32
1199
1200 /// Floating-point convert to lower precision narrow, rounding to odd
1201 name = vcvtx_high
1202 noq-double-suffixes
1203 multi_fn = simd_shuffle!, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3]
1204 a = -1.0, 2.0
1205 b = -3.0, 4.0
1206 validate -1.0, 2.0, -3.0, 4.0
1207
1208 aarch64 = fcvtxn
1209 generate float32x2_t:float64x2_t:float32x4_t
1210
1211 /// Fixed-point convert to floating-point
1212 name = vcvt
1213 double-n-suffixes
1214 constn = N
1215 multi_fn = static_assert-N-1-bits
1216 a = 1, 2, 3, 4
1217 n = 2
1218 validate 0.25, 0.5, 0.75, 1.
1219 arm-aarch64-separate
1220
1221 aarch64 = scvtf
1222 link-aarch64 = vcvtfxs2fp._EXT2_._EXT_
1223 const-aarch64 = N
1224 generate int64x1_t:float64x1_t, int64x2_t:float64x2_t, i32:f32, i64:f64
1225
1226 aarch64 = ucvtf
1227 link-aarch64 = vcvtfxu2fp._EXT2_._EXT_
1228 const-aarch64 = N
1229 generate uint64x1_t:float64x1_t, uint64x2_t:float64x2_t, u32:f32, u64:f64
1230
1231 aarch64 = scvtf
1232 link-aarch64 = vcvtfxs2fp._EXT2_._EXT_
1233 arm = vcvt
1234 link-arm = vcvtfxs2fp._EXT2_._EXT_
1235 const-arm = N:i32
1236
1237 generate int32x2_t:float32x2_t, int32x4_t:float32x4_t
1238
1239 aarch64 = ucvtf
1240 link-aarch64 = vcvtfxu2fp._EXT2_._EXT_
1241 arm = vcvt
1242 link-arm = vcvtfxu2fp._EXT2_._EXT_
1243 const-arm = N:i32
1244 generate uint32x2_t:float32x2_t, uint32x4_t:float32x4_t
1245
1246 /// Floating-point convert to fixed-point, rounding toward zero
1247 name = vcvt
1248 double-n-suffixes
1249 constn = N
1250 multi_fn = static_assert-N-1-bits
1251 a = 0.25, 0.5, 0.75, 1.
1252 n = 2
1253 validate 1, 2, 3, 4
1254 arm-aarch64-separate
1255
1256 aarch64 = fcvtzs
1257 link-aarch64 = vcvtfp2fxs._EXT2_._EXT_
1258 const-aarch64 = N
1259 generate float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1260
1261 aarch64 = fcvtzu
1262 link-aarch64 = vcvtfp2fxu._EXT2_._EXT_
1263 const-aarch64 = N
1264 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1265
1266 aarch64 = fcvtzs
1267 link-aarch64 = vcvtfp2fxs._EXT2_._EXT_
1268 arm = vcvt
1269 link-arm = vcvtfp2fxs._EXT2_._EXT_
1270 const-arm = N:i32
1271 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
1272
1273 aarch64 = fcvtzu
1274 link-aarch64 = vcvtfp2fxu._EXT2_._EXT_
1275 arm = vcvt
1276 link-arm = vcvtfp2fxu._EXT2_._EXT_
1277 const-arm = N:i32
1278 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
1279
1280 /// Fixed-point convert to floating-point
1281 name = vcvt
1282 double-suffixes
1283 multi_fn = a as out_t
1284 a = 1
1285 validate 1.
1286
1287 aarch64 = scvtf
1288 generate i32:f32, i64:f64
1289 aarch64 = ucvtf
1290 generate u32:f32, u64:f64
1291
1292 /// Fixed-point convert to floating-point
1293 name = vcvt
1294 double-suffixes
1295 multi_fn = a as out_t
1296 a = 1.
1297 validate 1
1298
1299 aarch64 = fcvtzs
1300 generate f32:i32, f64:i64
1301 aarch64 = fcvtzu
1302 generate f32:u32, f64:u64
1303
1304 /// Floating-point convert to signed fixed-point, rounding toward zero
1305 name = vcvt
1306 double-suffixes
1307 link-aarch64 = llvm.fptosi.sat._EXT2_._EXT_
1308 a = -1.1, 2.1, -2.9, 3.9
1309 validate -1, 2, -2, 3
1310
1311 aarch64 = fcvtzs
1312 generate float64x1_t:int64x1_t, float64x2_t:int64x2_t
1313
1314 link-arm = llvm.fptosi.sat._EXT2_._EXT_
1315 arm = vcvt
1316 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
1317
1318 /// Floating-point convert to unsigned fixed-point, rounding toward zero
1319 name = vcvt
1320 double-suffixes
1321 link-aarch64 = llvm.fptoui.sat._EXT2_._EXT_
1322 a = 1.1, 2.1, 2.9, 3.9
1323 validate 1, 2, 2, 3
1324
1325 aarch64 = fcvtzu
1326 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
1327
1328 link-arm = llvm.fptoui.sat._EXT2_._EXT_
1329 arm = vcvt
1330 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
1331
1332 /// Floating-point convert to signed integer, rounding to nearest with ties to away
1333 name = vcvta
1334 double-suffixes
1335 a = -1.1, 2.1, -2.9, 3.9
1336 validate -1, 2, -3, 4
1337
1338 aarch64 = fcvtas
1339 link-aarch64 = fcvtas._EXT2_._EXT_
1340 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
1341
1342 /// Floating-point convert to integer, rounding to nearest with ties to away
1343 name = vcvta
1344 double-suffixes
1345 a = 2.9
1346 validate 3
1347
1348 aarch64 = fcvtas
1349 link-aarch64 = fcvtas._EXT2_._EXT_
1350 generate f32:i32, f64:i64
1351
1352 aarch64 = fcvtau
1353 link-aarch64 = fcvtau._EXT2_._EXT_
1354 generate f32:u32, f64:u64
1355
1356 /// Floating-point convert to signed integer, rounding to nearest with ties to even
1357 name = vcvtn
1358 double-suffixes
1359 a = -1.5, 2.1, -2.9, 3.9
1360 validate -2, 2, -3, 4
1361
1362 aarch64 = fcvtns
1363 link-aarch64 = fcvtns._EXT2_._EXT_
1364 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1365
1366 /// Floating-point convert to signed integer, rounding toward minus infinity
1367 name = vcvtm
1368 double-suffixes
1369 a = -1.1, 2.1, -2.9, 3.9
1370 validate -2, 2, -3, 3
1371
1372 aarch64 = fcvtms
1373 link-aarch64 = fcvtms._EXT2_._EXT_
1374 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1375
1376 /// Floating-point convert to signed integer, rounding toward plus infinity
1377 name = vcvtp
1378 double-suffixes
1379 a = -1.1, 2.1, -2.9, 3.9
1380 validate -1, 3, -2, 4
1381
1382 aarch64 = fcvtps
1383 link-aarch64 = fcvtps._EXT2_._EXT_
1384 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t, f32:i32, f64:i64
1385
1386 /// Floating-point convert to unsigned integer, rounding to nearest with ties to away
1387 name = vcvta
1388 double-suffixes
1389 a = 1.1, 2.1, 2.9, 3.9
1390 validate 1, 2, 3, 4
1391
1392 aarch64 = fcvtau
1393 link-aarch64 = fcvtau._EXT2_._EXT_
1394 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
1395
1396 /// Floating-point convert to unsigned integer, rounding to nearest with ties to even
1397 name = vcvtn
1398 double-suffixes
1399 a = 1.5, 2.1, 2.9, 3.9
1400 validate 2, 2, 3, 4
1401
1402 aarch64 = fcvtnu
1403 link-aarch64 = fcvtnu._EXT2_._EXT_
1404 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1405
1406 /// Floating-point convert to unsigned integer, rounding toward minus infinity
1407 name = vcvtm
1408 double-suffixes
1409 a = 1.1, 2.1, 2.9, 3.9
1410 validate 1, 2, 2, 3
1411
1412 aarch64 = fcvtmu
1413 link-aarch64 = fcvtmu._EXT2_._EXT_
1414 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1415
1416 /// Floating-point convert to unsigned integer, rounding toward plus infinity
1417 name = vcvtp
1418 double-suffixes
1419 a = 1.1, 2.1, 2.9, 3.9
1420 validate 2, 3, 3, 4
1421
1422 aarch64 = fcvtpu
1423 link-aarch64 = fcvtpu._EXT2_._EXT_
1424 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t, f32:u32, f64:u64
1425
1426 /// Set all vector lanes to the same value
1427 name = vdup
1428 lane-suffixes
1429 constn = N
1430 multi_fn = static_assert_imm-in_exp_len-N
1431 multi_fn = simd_shuffle!, a, a, {dup-out_len-N as u32}
1432 a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1433 n = HFLEN
1434 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1435
1436 aarch64 = dup
1437 generate poly64x2_t, poly64x1_t:poly64x2_t
1438
1439 arm = vdup.l
1440 generate int*_t
1441 generate int8x16_t:int8x8_t, int16x8_t:int16x4_t, int32x4_t:int32x2_t
1442 generate int8x8_t:int8x16_t, int16x4_t:int16x8_t, int32x2_t:int32x4_t
1443
1444 generate uint*_t
1445 generate uint8x16_t:uint8x8_t, uint16x8_t:uint16x4_t, uint32x4_t:uint32x2_t
1446 generate uint8x8_t:uint8x16_t, uint16x4_t:uint16x8_t, uint32x2_t:uint32x4_t
1447
1448 generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
1449 generate poly8x16_t:poly8x8_t, poly16x8_t:poly16x4_t
1450 generate poly8x8_t:poly8x16_t, poly16x4_t:poly16x8_t
1451
1452 /// Set all vector lanes to the same value
1453 name = vdup
1454 lane-suffixes
1455 constn = N
1456 multi_fn = static_assert_imm-in_exp_len-N
1457 multi_fn = simd_shuffle!, a, a, {dup-out_len-N as u32}
1458 a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1459 n = HFLEN
1460 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1461
1462 aarch64 = dup
1463 arm = vmov
1464 generate int64x2_t, int64x1_t:int64x2_t, uint64x2_t, uint64x1_t:uint64x2_t
1465
1466 /// Set all vector lanes to the same value
1467 name = vdup
1468 lane-suffixes
1469 constn = N
1470 multi_fn = static_assert_imm-in_exp_len-N
1471 multi_fn = simd_shuffle!, a, a, {dup-out_len-N as u32}
1472 a = 1., 1., 1., 4.
1473 n = HFLEN
1474 validate 1., 1., 1., 1.
1475
1476 aarch64 = dup
1477 generate float64x2_t, float64x1_t:float64x2_t
1478
1479 arm = vdup.l
1480 generate float*_t, float32x4_t:float32x2_t, float32x2_t:float32x4_t
1481
1482 /// Set all vector lanes to the same value
1483 name = vdup
1484 lane-suffixes
1485 constn = N
1486 multi_fn = static_assert_imm-in_exp_len-N
1487 multi_fn = a
1488 a = 0
1489 n = HFLEN
1490 validate 0
1491
1492 aarch64 = nop
1493 generate poly64x1_t
1494
1495 arm = nop
1496 generate int64x1_t, uint64x1_t
1497
1498 /// Set all vector lanes to the same value
1499 name = vdup
1500 lane-suffixes
1501 constn = N
1502 multi_fn = static_assert_imm-in_exp_len-N
1503 multi_fn = a
1504 a = 0.
1505 n = HFLEN
1506 validate 0.
1507
1508 aarch64 = nop
1509 generate float64x1_t
1510
1511 /// Set all vector lanes to the same value
1512 name = vdup
1513 lane-suffixes
1514 constn = N
1515 multi_fn = static_assert_imm-in_exp_len-N
1516 multi_fn = transmute--<element_t _>, {simd_extract, a, N as u32}
1517 a = 0, 1
1518 n = HFLEN
1519 validate 1
1520
1521 aarch64 = nop
1522 generate poly64x2_t:poly64x1_t
1523
1524 arm = vmov
1525 generate int64x2_t:int64x1_t, uint64x2_t:uint64x1_t
1526
1527 /// Set all vector lanes to the same value
1528 name = vdup
1529 lane-suffixes
1530 constn = N
1531 multi_fn = static_assert_imm-in_exp_len-N
1532 multi_fn = transmute--<element_t _>, {simd_extract, a, N as u32}
1533 a = 0., 1.
1534 n = HFLEN
1535 validate 1.
1536
1537 aarch64 = nop
1538 generate float64x2_t:float64x1_t
1539
1540 /// Set all vector lanes to the same value
1541 name = vdup
1542 lane-suffixes
1543 constn = N
1544 multi_fn = static_assert_imm-in_exp_len-N
1545 multi_fn = simd_extract, a, N as u32
1546 a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16
1547 n = HFLEN
1548 validate 1
1549
1550 aarch64 = nop
1551 generate int8x8_t:i8, int8x16_t:i8, int16x4_t:i16, int16x8_t:i16, int32x2_t:i32, int32x4_t:i32, int64x1_t:i64, int64x2_t:i64
1552 generate uint8x8_t:u8, uint8x16_t:u8, uint16x4_t:u16, uint16x8_t:u16, uint32x2_t:u32, uint32x4_t:u32, uint64x1_t:u64, uint64x2_t:u64
1553 generate poly8x8_t:p8, poly8x16_t:p8, poly16x4_t:p16, poly16x8_t:p16
1554
1555 /// Set all vector lanes to the same value
1556 name = vdup
1557 lane-suffixes
1558 constn = N
1559 multi_fn = static_assert_imm-in_exp_len-N
1560 multi_fn = simd_extract, a, N as u32
1561 a = 1., 1., 1., 4.
1562 n = HFLEN
1563 validate 1.
1564
1565 aarch64 = nop
1566 generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64
1567
1568 /// Extract vector from pair of vectors
1569 name = vext
1570 constn = N
1571 multi_fn = static_assert_imm-out_exp_len-N
1572 multi_fn = matchn-out_exp_len-N, simd_shuffle!, a, b, {asc-n-out_len}
1573 a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1574 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1575 n = LEN_M1
1576 validate 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1577
1578 arm = "vext.8"
1579 aarch64 = ext
1580 generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
1581
1582 /// Extract vector from pair of vectors
1583 name = vext
1584 constn = N
1585 multi_fn = static_assert_imm-out_exp_len-N
1586 multi_fn = matchn-out_exp_len-N, simd_shuffle!, a, b, {asc-n-out_len}
1587 a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1588 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1589 n = LEN_M1
1590 validate 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1591
1592 aarch64 = ext
1593 generate poly64x2_t
1594
1595 arm = vmov
1596 generate int64x2_t, uint64x2_t
1597
1598 /// Extract vector from pair of vectors
1599 name = vext
1600 constn = N
1601 multi_fn = static_assert_imm-out_exp_len-N
1602 multi_fn = matchn-out_exp_len-N, simd_shuffle!, a, b, {asc-n-out_len}
1603 a = 1., 1., 1., 1.
1604 b = 2., 2., 2., 2.,
1605 n = LEN_M1
1606 validate 1., 2., 2., 2.
1607
1608 aarch64 = ext
1609 generate float64x2_t
1610
1611 arm = "vext.8"
1612 generate float*_t
1613
1614 /// Multiply-add to accumulator
1615 name = vmla
1616 multi_fn = simd_add, a, {simd_mul, b, c}
1617 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1618 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1619 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1620 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1621
1622 arm = vmla.
1623 aarch64 = mla
1624 generate int*_t, uint*_t
1625
1626 /// Floating-point multiply-add to accumulator
1627 name = vmla
1628 multi_fn = simd_add, a, {simd_mul, b, c}
1629 a = 0., 1., 2., 3.
1630 b = 2., 2., 2., 2.
1631 c = 3., 3., 3., 3.
1632 validate 6., 7., 8., 9.
1633
1634 aarch64 = fmul
1635 generate float64x*_t
1636
1637 arm = vmla.
1638 generate float*_t
1639
1640 /// Vector multiply accumulate with scalar
1641 name = vmla
1642 n-suffix
1643 multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
1644 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1645 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1646 c = 3
1647 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1648
1649 aarch64 = mla
1650 arm = vmla.
1651 generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
1652 generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
1653
1654 /// Vector multiply accumulate with scalar
1655 name = vmla
1656 n-suffix
1657 multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
1658 a = 0., 1., 2., 3.
1659 b = 2., 2., 2., 2.
1660 c = 3.
1661 validate 6., 7., 8., 9.
1662
1663 aarch64 = fmul
1664 arm = vmla.
1665 generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
1666
1667 /// Vector multiply accumulate with scalar
1668 name = vmla
1669 in2-lane-suffixes
1670 constn = LANE
1671 multi_fn = static_assert_imm-in2_exp_len-LANE
1672 multi_fn = vmla-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
1673 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1674 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1675 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1676 n = 1
1677 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1678
1679 aarch64 = mla
1680 arm = vmla.
1681 generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
1682 generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
1683 generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
1684 generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
1685
1686 /// Vector multiply accumulate with scalar
1687 name = vmla
1688 in2-lane-suffixes
1689 constn = LANE
1690 multi_fn = static_assert_imm-in2_exp_len-LANE
1691 multi_fn = vmla-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
1692 a = 0., 1., 2., 3.
1693 b = 2., 2., 2., 2.
1694 c = 0., 3., 0., 0.
1695 n = 1
1696 validate 6., 7., 8., 9.
1697
1698 aarch64 = fmul
1699 arm = vmla.
1700 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
1701
1702 /// Signed multiply-add long
1703 name = vmlal
1704 multi_fn = simd_add, a, {vmull-self-noext, b, c}
1705 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1706 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1707 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1708 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1709
1710 arm = vmlal.s
1711 aarch64 = smlal
1712 generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
1713
1714 /// Unsigned multiply-add long
1715 name = vmlal
1716 multi_fn = simd_add, a, {vmull-self-noext, b, c}
1717 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1718 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1719 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1720 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1721
1722 arm = vmlal.s
1723 aarch64 = umlal
1724 generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
1725
1726 /// Vector widening multiply accumulate with scalar
1727 name = vmlal
1728 n-suffix
1729 multi_fn = vmlal-self-noext, a, b, {vdup-nself-noext, c}
1730 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1731 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1732 c = 3
1733 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1734
1735 arm = vmlal.s
1736 aarch64 = smlal
1737 generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
1738 aarch64 = umlal
1739 generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
1740
1741 /// Vector widening multiply accumulate with scalar
1742 name = vmlal_lane
1743 in2-suffix
1744 constn = LANE
1745 multi_fn = static_assert_imm-in2_exp_len-LANE
1746 multi_fn = vmlal-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
1747 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1748 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1749 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1750 n = 1
1751 validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1752
1753 arm = vmlal.s
1754 aarch64 = smlal
1755 generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
1756 generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
1757 aarch64 = umlal
1758 generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
1759 generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
1760
1761 /// Signed multiply-add long
1762 name = vmlal_high
1763 no-q
1764 multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1765 multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
1766 multi_fn = vmlal-noqself-noext, a, b, c
1767 a = 8, 7, 6, 5, 4, 3, 2, 1
1768 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1769 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1770 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1771 validate 8, 9, 10, 11, 12, 13, 14, 15
1772
1773 aarch64 = smlal2
1774 generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1775
1776 /// Unsigned multiply-add long
1777 name = vmlal_high
1778 no-q
1779 multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1780 multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
1781 multi_fn = vmlal-noqself-noext, a, b, c
1782 a = 8, 7, 6, 5, 4, 3, 2, 1
1783 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1784 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1785 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1786 validate 8, 9, 10, 11, 12, 13, 14, 15
1787
1788 aarch64 = umlal2
1789 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
1790
1791 /// Multiply-add long
1792 name = vmlal_high_n
1793 no-q
1794 multi_fn = vmlal_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
1795 a = 8, 7, 6, 5, 4, 3, 2, 1
1796 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1797 c = 2
1798 validate 8, 9, 10, 11, 12, 13, 14, 15
1799
1800 aarch64 = smlal2
1801 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
1802 aarch64 = umlal2
1803 generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
1804
1805 /// Multiply-add long
1806 name = vmlal_high_lane
1807 in2-suffix
1808 constn = LANE
1809 multi_fn = static_assert_imm-in2_exp_len-LANE
1810 multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
1811 a = 8, 7, 6, 5, 4, 3, 2, 1
1812 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1813 c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1814 n = 1
1815 validate 8, 9, 10, 11, 12, 13, 14, 15
1816
1817 aarch64 = smlal2
1818 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
1819 generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1820 aarch64 = umlal2
1821 generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
1822 generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
1823
1824 /// Multiply-subtract from accumulator
1825 name = vmls
1826 multi_fn = simd_sub, a, {simd_mul, b, c}
1827 a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1828 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1829 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1830 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1831
1832 arm = vmls.
1833 aarch64 = mls
1834 generate int*_t, uint*_t
1835
1836 /// Floating-point multiply-subtract from accumulator
1837 name = vmls
1838 multi_fn = simd_sub, a, {simd_mul, b, c}
1839 a = 6., 7., 8., 9.
1840 b = 2., 2., 2., 2.
1841 c = 3., 3., 3., 3.
1842 validate 0., 1., 2., 3.
1843
1844 aarch64 = fmul
1845 generate float64x*_t
1846
1847 arm = vmls.
1848 generate float*_t
1849
1850 /// Vector multiply subtract with scalar
1851 name = vmls
1852 n-suffix
1853 multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
1854 a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1855 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1856 c = 3
1857 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1858
1859 aarch64 = mls
1860 arm = vmls.
1861 generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
1862 generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
1863
1864 /// Vector multiply subtract with scalar
1865 name = vmls
1866 n-suffix
1867 multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
1868 a = 6., 7., 8., 9.
1869 b = 2., 2., 2., 2.
1870 c = 3.
1871 validate 0., 1., 2., 3.
1872
1873 aarch64 = fmul
1874 arm = vmls.
1875 generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
1876
1877 /// Vector multiply subtract with scalar
1878 name = vmls
1879 in2-lane-suffixes
1880 constn = LANE
1881 multi_fn = static_assert_imm-in2_exp_len-LANE
1882 multi_fn = vmls-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
1883 a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1884 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1885 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1886 n = 1
1887 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1888
1889 aarch64 = mls
1890 arm = vmls.
1891 generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
1892 generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
1893 generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
1894 generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
1895
1896 /// Vector multiply subtract with scalar
1897 name = vmls
1898 in2-lane-suffixes
1899 constn = LANE
1900 multi_fn = static_assert_imm-in2_exp_len-LANE
1901 multi_fn = vmls-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
1902 a = 6., 7., 8., 9.
1903 b = 2., 2., 2., 2.
1904 c = 0., 3., 0., 0.
1905 n = 1
1906 validate 0., 1., 2., 3.
1907
1908 aarch64 = fmul
1909 arm = vmls.
1910 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
1911
1912 /// Signed multiply-subtract long
1913 name = vmlsl
1914 multi_fn = simd_sub, a, {vmull-self-noext, b, c}
1915 a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1916 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1917 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1918 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1919
1920 arm = vmlsl.s
1921 aarch64 = smlsl
1922 generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
1923
1924 /// Unsigned multiply-subtract long
1925 name = vmlsl
1926 multi_fn = simd_sub, a, {vmull-self-noext, b, c}
1927 a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1928 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1929 c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
1930 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1931
1932 arm = vmlsl.s
1933 aarch64 = umlsl
1934 generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
1935
1936 /// Vector widening multiply subtract with scalar
1937 name = vmlsl
1938 n-suffix
1939 multi_fn = vmlsl-self-noext, a, b, {vdup-nself-noext, c}
1940 a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1941 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1942 c = 3
1943 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1944
1945 arm = vmlsl.s
1946 aarch64 = smlsl
1947 generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
1948 aarch64 = umlsl
1949 generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
1950
1951 /// Vector widening multiply subtract with scalar
1952 name = vmlsl_lane
1953 in2-suffix
1954 constn = LANE
1955 multi_fn = static_assert_imm-in2_exp_len-LANE
1956 multi_fn = vmlsl-self-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
1957 a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1958 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1959 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1960 n = 1
1961 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1962
1963 arm = vmlsl.s
1964 aarch64 = smlsl
1965 generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
1966 generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
1967 aarch64 = umlsl
1968 generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
1969 generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
1970
1971 /// Signed multiply-subtract long
1972 name = vmlsl_high
1973 no-q
1974 multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1975 multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
1976 multi_fn = vmlsl-noqself-noext, a, b, c
1977 a = 14, 15, 16, 17, 18, 19, 20, 21
1978 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1979 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1980 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1981 validate 14, 13, 12, 11, 10, 9, 8, 7
1982
1983 aarch64 = smlsl2
1984 generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
1985
1986 /// Unsigned multiply-subtract long
1987 name = vmlsl_high
1988 no-q
1989 multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
1990 multi_fn = simd_shuffle!, c:half, c, c, {fixed-half-right}
1991 multi_fn = vmlsl-noqself-noext, a, b, c
1992 a = 14, 15, 16, 17, 18, 19, 20, 21
1993 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1994 c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
1995 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1996 validate 14, 13, 12, 11, 10, 9, 8, 7
1997
1998 aarch64 = umlsl2
1999 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
2000
2001 /// Multiply-subtract long
2002 name = vmlsl_high_n
2003 no-q
2004 multi_fn = vmlsl_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
2005 a = 14, 15, 16, 17, 18, 19, 20, 21
2006 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2007 c = 2
2008 validate 14, 13, 12, 11, 10, 9, 8, 7
2009
2010 aarch64 = smlsl2
2011 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
2012 aarch64 = umlsl2
2013 generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
2014
2015 /// Multiply-subtract long
2016 name = vmlsl_high_lane
2017 in2-suffix
2018 constn = LANE
2019 multi_fn = static_assert_imm-in2_exp_len-LANE
2020 multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle!, c, c, {dup-in_len-LANE as u32}}
2021 a = 14, 15, 16, 17, 18, 19, 20, 21
2022 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
2023 c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2024 n = 1
2025 validate 14, 13, 12, 11, 10, 9, 8, 7
2026
2027 aarch64 = smlsl2
2028 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
2029 generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
2030 aarch64 = umlsl2
2031 generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
2032 generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
2033
2034 /// Extract narrow
2035 name = vmovn_high
2036 no-q
2037 multi_fn = simd_cast, c:in_t0, b
2038 multi_fn = simd_shuffle!, a, c, {asc-0-out_len}
2039 a = 0, 1, 2, 3, 2, 3, 4, 5
2040 b = 2, 3, 4, 5, 12, 13, 14, 15
2041 validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15
2042
2043 aarch64 = xtn2
2044 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
2045 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
2046
2047 /// Negate
2048 name = vneg
2049 fn = simd_neg
2050 a = 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8
2051 validate 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8
2052
2053 aarch64 = neg
2054 generate int64x*_t
2055
2056 arm = vneg.s
2057 generate int*_t
2058
2059 /// Negate
2060 name = vneg
2061 multi_fn = a.wrapping_neg()
2062 a = 1
2063 validate -1
2064
2065 aarch64 = neg
2066 generate i64
2067
2068 /// Negate
2069 name = vneg
2070 fn = simd_neg
2071 a = 0., 1., -1., 2., -2., 3., -3., 4.
2072 validate 0., -1., 1., -2., 2., -3., 3., -4.
2073
2074 aarch64 = fneg
2075 generate float64x*_t
2076
2077 arm = vneg.s
2078 generate float*_t
2079
2080 /// Signed saturating negate
2081 name = vqneg
2082 a = MIN, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7
2083 validate MAX, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7
2084 link-arm = vqneg._EXT_
2085 link-aarch64 = sqneg._EXT_
2086
2087 aarch64 = sqneg
2088 generate int64x*_t
2089
2090 arm = vqneg.s
2091 generate int*_t
2092
2093 /// Signed saturating negate
2094 name = vqneg
2095 multi_fn = simd_extract, {vqneg-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
2096 a = 1
2097 validate -1
2098
2099 aarch64 = sqneg
2100 generate i8, i16, i32, i64
2101
2102 /// Saturating subtract
2103 name = vqsub
2104 a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2105 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2106 validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26
2107
2108 arm = vqsub.s
2109 aarch64 = uqsub
2110 link-arm = llvm.usub.sat._EXT_
2111 link-aarch64 = uqsub._EXT_
2112 generate uint*_t, uint64x*_t
2113
2114 arm = vqsub.s
2115 aarch64 = sqsub
2116 link-arm = llvm.ssub.sat._EXT_
2117 link-aarch64 = sqsub._EXT_
2118 generate int*_t, int64x*_t
2119
2120 /// Saturating subtract
2121 name = vqsub
2122 multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
2123 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
2124 multi_fn = simd_extract, {vqsub-in_ntt-noext, a, b}, 0
2125 a = 42
2126 b = 1
2127 validate 41
2128
2129 aarch64 = sqsub
2130 generate i8, i16
2131 aarch64 = uqsub
2132 generate u8, u16
2133
2134 /// Saturating subtract
2135 name = vqsub
2136 a = 42
2137 b = 1
2138 validate 41
2139
2140 aarch64 = uqsub
2141 link-aarch64 = uqsub._EXT_
2142 generate u32, u64
2143
2144 aarch64 = sqsub
2145 link-aarch64 = sqsub._EXT_
2146 generate i32, i64
2147
2148 /// Halving add
2149 name = vhadd
2150 a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2151 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2152 validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
2153
2154 arm = vhadd.s
2155 aarch64 = uhadd
2156 link-aarch64 = uhadd._EXT_
2157 link-arm = vhaddu._EXT_
2158 generate uint*_t
2159
2160 arm = vhadd.s
2161 aarch64 = shadd
2162 link-aarch64 = shadd._EXT_
2163 link-arm = vhadds._EXT_
2164 generate int*_t
2165
2166 /// Reverse bit order
2167 name = vrbit
2168 a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2169 validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2170
2171 aarch64 = rbit
2172 link-aarch64 = rbit._EXT_
2173
2174 generate int8x8_t, int8x16_t
2175
2176 /// Reverse bit order
2177 name = vrbit
2178 multi_fn = transmute, {vrbit-signed-noext, transmute(a)}
2179 a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2180 validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
2181
2182 aarch64 = rbit
2183
2184 generate uint8x8_t, uint8x16_t, poly8x8_t, poly8x16_t
2185
2186 /// Rounding halving add
2187 name = vrhadd
2188 a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2189 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2190 validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29
2191
2192 arm = vrhadd.s
2193 aarch64 = urhadd
2194 link-arm = vrhaddu._EXT_
2195 link-aarch64 = urhadd._EXT_
2196 generate uint*_t
2197
2198 arm = vrhadd.s
2199 aarch64 = srhadd
2200 link-arm = vrhadds._EXT_
2201 link-aarch64 = srhadd._EXT_
2202 generate int*_t
2203
2204 /// Floating-point round to integral exact, using current rounding mode
2205 name = vrndx
2206 a = -1.5, 0.5, 1.5, 2.5
2207 validate -2.0, 0.0, 2.0, 2.0
2208
2209 aarch64 = frintx
2210 link-aarch64 = llvm.rint._EXT_
2211 generate float*_t, float64x*_t
2212
2213 /// Floating-point round to integral, to nearest with ties to away
2214 name = vrnda
2215 a = -1.5, 0.5, 1.5, 2.5
2216 validate -2.0, 1.0, 2.0, 3.0
2217
2218 aarch64 = frinta
2219 link-aarch64 = llvm.round._EXT_
2220 generate float*_t, float64x*_t
2221
2222 /// Floating-point round to integral, to nearest with ties to even
2223 name = vrndn
2224 a = -1.5, 0.5, 1.5, 2.5
2225 validate -2.0, 0.0, 2.0, 2.0
2226
2227 link-aarch64 = frintn._EXT_
2228 aarch64 = frintn
2229 generate float64x*_t
2230
2231 target = fp-armv8
2232 arm = vrintn
2233 link-arm = vrintn._EXT_
2234 generate float*_t
2235
2236 /// Floating-point round to integral, to nearest with ties to even
2237 name = vrndn
2238 a = -1.5
2239 validate -2.0
2240
2241 aarch64 = frintn
2242 link-aarch64 = llvm.roundeven._EXT_
2243 generate f32
2244
2245 /// Floating-point round to integral, toward minus infinity
2246 name = vrndm
2247 a = -1.5, 0.5, 1.5, 2.5
2248 validate -2.0, 0.0, 1.0, 2.0
2249
2250 aarch64 = frintm
2251 link-aarch64 = llvm.floor._EXT_
2252 generate float*_t, float64x*_t
2253
2254 /// Floating-point round to integral, toward plus infinity
2255 name = vrndp
2256 a = -1.5, 0.5, 1.5, 2.5
2257 validate -1.0, 1.0, 2.0, 3.0
2258
2259 aarch64 = frintp
2260 link-aarch64 = llvm.ceil._EXT_
2261 generate float*_t, float64x*_t
2262
2263 /// Floating-point round to integral, toward zero
2264 name = vrnd
2265 a = -1.5, 0.5, 1.5, 2.5
2266 validate -1.0, 0.0, 1.0, 2.0
2267
2268 aarch64 = frintz
2269 link-aarch64 = llvm.trunc._EXT_
2270 generate float*_t, float64x*_t
2271
2272 /// Floating-point round to integral, using current rounding mode
2273 name = vrndi
2274 a = -1.5, 0.5, 1.5, 2.5
2275 validate -2.0, 0.0, 2.0, 2.0
2276
2277 aarch64 = frinti
2278 link-aarch64 = llvm.nearbyint._EXT_
2279 generate float*_t, float64x*_t
2280
2281 /// Saturating add
2282 name = vqadd
2283 a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2284 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2285 validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2286
2287 arm = vqadd.s
2288 aarch64 = uqadd
2289 link-arm = llvm.uadd.sat._EXT_
2290 link-aarch64 = uqadd._EXT_
2291 generate uint*_t, uint64x*_t
2292
2293 arm = vqadd.s
2294 aarch64 = sqadd
2295 link-arm = llvm.sadd.sat._EXT_
2296 link-aarch64 = sqadd._EXT_
2297 generate int*_t, int64x*_t
2298
2299 /// Saturating add
2300 name = vqadd
2301 multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
2302 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
2303 multi_fn = simd_extract, {vqadd-in_ntt-noext, a, b}, 0
2304 a = 42
2305 b = 1
2306 validate 43
2307
2308 aarch64 = sqadd
2309 generate i8, i16
2310 aarch64 = uqadd
2311 generate u8, u16
2312
2313 /// Saturating add
2314 name = vqadd
2315 a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
2316 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2317 validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
2318
2319 aarch64 = uqadd
2320 link-aarch64 = uqadd._EXT_
2321 generate u32, u64
2322
2323 aarch64 = sqadd
2324 link-aarch64 = sqadd._EXT_
2325 generate i32, i64
2326
2327 /// Load multiple single-element structures to one, two, three, or four registers
2328 name = vld1
2329 out-suffix
2330 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2331 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2332 load_fn
2333
2334 aarch64 = ld1
2335 link-aarch64 = ld1x2._EXT2_
2336 arm = vld1
2337 link-arm = vld1x2._EXT2_
2338 generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
2339 generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
2340
2341 link-aarch64 = ld1x3._EXT2_
2342 link-arm = vld1x3._EXT2_
2343 generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
2344 generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t, *const i64:int64x2x3_t
2345
2346 link-aarch64 = ld1x4._EXT2_
2347 link-arm = vld1x4._EXT2_
2348 generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
2349 generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t, *const i64:int64x2x4_t
2350
2351 /// Load multiple single-element structures to one, two, three, or four registers
2352 name = vld1
2353 out-suffix
2354 multi_fn = transmute, {vld1-outsigned-noext, transmute(a)}
2355 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2356 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
2357
2358 load_fn
2359 aarch64 = ld1
2360 arm = vld1
2361 generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
2362 generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
2363 generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
2364 generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t, *const u64:uint64x2x3_t
2365 generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
2366 generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t, *const u64:uint64x2x4_t
2367 generate *const p8:poly8x8x2_t, *const p8:poly8x8x3_t, *const p8:poly8x8x4_t
2368 generate *const p8:poly8x16x2_t, *const p8:poly8x16x3_t, *const p8:poly8x16x4_t
2369 generate *const p16:poly16x4x2_t, *const p16:poly16x4x3_t, *const p16:poly16x4x4_t
2370 generate *const p16:poly16x8x2_t, *const p16:poly16x8x3_t, *const p16:poly16x8x4_t
2371 target = aes
2372 generate *const p64:poly64x1x2_t
2373 arm = nop
2374 generate *const p64:poly64x1x3_t, *const p64:poly64x1x4_t
2375 generate *const p64:poly64x2x2_t, *const p64:poly64x2x3_t, *const p64:poly64x2x4_t
2376 /// Load multiple single-element structures to one, two, three, or four registers
2377 name = vld1
2378 out-suffix
2379 a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
2380 validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
2381 load_fn
2382
2383 aarch64 = ld1
2384 link-aarch64 = ld1x2._EXT2_
2385 generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
2386
2387 link-aarch64 = ld1x3._EXT2_
2388 generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
2389
2390 link-aarch64 = ld1x4._EXT2_
2391 generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
2392
2393 arm = vld1
2394 link-aarch64 = ld1x2._EXT2_
2395 link-arm = vld1x2._EXT2_
2396 generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
2397
2398 link-aarch64 = ld1x3._EXT2_
2399 link-arm = vld1x3._EXT2_
2400 generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2401
2402 link-aarch64 = ld1x4._EXT2_
2403 link-arm = vld1x4._EXT2_
2404 generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2405
2406 /// Load multiple 2-element structures to two registers
2407 name = vld2
2408 out-nox
2409 a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2410 validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2411 load_fn
2412 arm-aarch64-separate
2413
2414 aarch64 = ld2
2415 link-aarch64 = ld2._EXTv2_
2416 generate *const i64:int64x2x2_t
2417
2418 arm = vld2
2419 link-arm = vld2._EXTpi82_
2420 generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
2421 generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
2422 arm = nop
2423 aarch64 = nop
2424 generate *const i64:int64x1x2_t
2425
2426 /// Load multiple 2-element structures to two registers
2427 name = vld2
2428 out-nox
2429 multi_fn = transmute, {vld2-outsignednox-noext, transmute(a)}
2430 a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2431 validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
2432 load_fn
2433
2434 aarch64 = ld2
2435 generate *const u64:uint64x2x2_t
2436 target = aes
2437 generate *const p64:poly64x2x2_t
2438
2439 target = default
2440 arm = vld2
2441 generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
2442 generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
2443 generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
2444 arm = nop
2445 aarch64 = nop
2446 generate *const u64:uint64x1x2_t
2447 target = aes
2448 generate *const p64:poly64x1x2_t
2449
2450
2451 /// Load multiple 2-element structures to two registers
2452 name = vld2
2453 out-nox
2454 a = 0., 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
2455 validate 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
2456 load_fn
2457 arm-aarch64-separate
2458
2459 aarch64 = nop
2460 link-aarch64 = ld2._EXTv2_
2461 generate *const f64:float64x1x2_t
2462 aarch64 = ld2
2463 generate *const f64:float64x2x2_t
2464
2465 arm = vld2
2466 link-arm = vld2._EXTpi82_
2467 generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
2468
2469 /// Load single 2-element structure and replicate to all lanes of two registers
2470 name = vld2
2471 out-dup-nox
2472 a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2473 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2474 load_fn
2475 arm-aarch64-separate
2476
2477 aarch64 = ld2r
2478 link-aarch64 = ld2r._EXT2_
2479 generate *const i64:int64x2x2_t
2480
2481 arm = vld2
2482 link-arm = vld2dup._EXTpi82_
2483 generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
2484 generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
2485 arm = nop
2486 generate *const i64:int64x1x2_t
2487
2488 /// Load single 2-element structure and replicate to all lanes of two registers
2489 name = vld2
2490 out-dup-nox
2491 multi_fn = transmute, {vld2-outsigneddupnox-noext, transmute(a)}
2492 a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
2493 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2494 load_fn
2495
2496 aarch64 = ld2r
2497 generate *const u64:uint64x2x2_t
2498 target = aes
2499 generate *const p64:poly64x2x2_t
2500
2501 target = default
2502 arm = vld2
2503 generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
2504 generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
2505 generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
2506 arm = nop
2507 generate *const u64:uint64x1x2_t
2508 target = aes
2509 generate *const p64:poly64x1x2_t
2510
2511 /// Load single 2-element structure and replicate to all lanes of two registers
2512 name = vld2
2513 out-dup-nox
2514 a = 0., 1., 1., 2., 3., 1., 4., 3., 5.
2515 validate 1., 1., 1., 1., 1., 1., 1., 1.
2516 load_fn
2517 arm-aarch64-separate
2518
2519 aarch64 = ld2r
2520 link-aarch64 = ld2r._EXT2_
2521 generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
2522
2523 arm = vld2
2524 link-arm = vld2dup._EXTpi82_
2525 generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
2526
2527 /// Load multiple 2-element structures to two registers
2528 name = vld2
2529 out-lane-nox
2530 multi_fn = static_assert_imm-in_exp_len-LANE
2531 constn = LANE
2532 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2533 b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2534 n = 0
2535 validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2536 load_fn
2537 arm-aarch64-separate
2538
2539 aarch64 = ld2
2540 const-aarch64 = LANE
2541 link-aarch64 = ld2lane._EXTpi82_
2542 generate *const i8:int8x16x2_t:int8x16x2_t, *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
2543
2544 arm = vld2
2545 const-arm = LANE
2546 link-arm = vld2lane._EXTpi82_
2547 generate *const i8:int8x8x2_t:int8x8x2_t, *const i16:int16x4x2_t:int16x4x2_t, *const i32:int32x2x2_t:int32x2x2_t
2548 generate *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
2549
2550 /// Load multiple 2-element structures to two registers
2551 name = vld2
2552 out-lane-nox
2553 multi_fn = static_assert_imm-in_exp_len-LANE
2554 multi_fn = transmute, {vld2-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2555 constn = LANE
2556 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2557 b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2558 n = 0
2559 validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
2560 load_fn
2561
2562 aarch64 = ld2
2563 const-aarch64 = LANE
2564
2565 target = aes
2566 generate *const p64:poly64x1x2_t:poly64x1x2_t, *const p64:poly64x2x2_t:poly64x2x2_t
2567
2568 target = default
2569 generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
2570 generate *const p8:poly8x16x2_t:poly8x16x2_t
2571
2572 arm = vld2
2573 const-arm = LANE
2574 generate *const u8:uint8x8x2_t:uint8x8x2_t, *const u16:uint16x4x2_t:uint16x4x2_t, *const u32:uint32x2x2_t:uint32x2x2_t
2575 generate *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
2576 generate *const p8:poly8x8x2_t:poly8x8x2_t, *const p16:poly16x4x2_t:poly16x4x2_t
2577 generate *const p16:poly16x8x2_t:poly16x8x2_t
2578
2579 /// Load multiple 2-element structures to two registers
2580 name = vld2
2581 out-lane-nox
2582 multi_fn = static_assert_imm-in_exp_len-LANE
2583 constn = LANE
2584 a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
2585 b = 0., 2., 2., 14., 2., 16., 17., 18.
2586 n = 0
2587 validate 1., 2., 2., 14., 2., 16., 17., 18.
2588 load_fn
2589 arm-aarch64-separate
2590
2591 aarch64 = ld2
2592 const-aarch64 = LANE
2593 link-aarch64 = ld2lane._EXTpi82_
2594 generate *const f64:float64x1x2_t:float64x1x2_t, *const f64:float64x2x2_t:float64x2x2_t
2595
2596 arm = vld2
2597 const-arm = LANE
2598 link-arm = vld2lane._EXTpi82_
2599 generate *const f32:float32x2x2_t:float32x2x2_t, *const f32:float32x4x2_t:float32x4x2_t
2600
2601 /// Load multiple 3-element structures to three registers
2602 name = vld3
2603 out-nox
2604 a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2605 validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2606 load_fn
2607 arm-aarch64-separate
2608
2609 aarch64 = ld3
2610 link-aarch64 = ld3._EXTv2_
2611 generate *const i64:int64x2x3_t
2612
2613 arm = vld3
2614 link-arm = vld3._EXTpi82_
2615 generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
2616 generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
2617 arm = nop
2618 aarch64 = nop
2619 generate *const i64:int64x1x3_t
2620
2621 /// Load multiple 3-element structures to three registers
2622 name = vld3
2623 out-nox
2624 multi_fn = transmute, {vld3-outsignednox-noext, transmute(a)}
2625 a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
2626 validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
2627 load_fn
2628
2629 aarch64 = ld3
2630 generate *const u64:uint64x2x3_t
2631 target = aes
2632 generate *const p64:poly64x2x3_t
2633
2634 target = default
2635 arm = vld3
2636 generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
2637 generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
2638 generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
2639 arm = nop
2640 aarch64 = nop
2641 generate *const u64:uint64x1x3_t
2642 target = aes
2643 generate *const p64:poly64x1x3_t
2644
2645 /// Load multiple 3-element structures to three registers
2646 name = vld3
2647 out-nox
2648 a = 0., 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.
2649 validate 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.
2650 load_fn
2651 arm-aarch64-separate
2652
2653 aarch64 = nop
2654 link-aarch64 = ld3._EXTv2_
2655 generate *const f64:float64x1x3_t
2656 aarch64 = ld3
2657 generate *const f64:float64x2x3_t
2658
2659 arm = vld3
2660 link-arm = vld3._EXTpi82_
2661 generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2662
2663 /// Load single 3-element structure and replicate to all lanes of three registers
2664 name = vld3
2665 out-dup-nox
2666 a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2667 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2668 load_fn
2669 arm-aarch64-separate
2670
2671 aarch64 = ld3r
2672 link-aarch64 = ld3r._EXT2_
2673 generate *const i64:int64x2x3_t
2674
2675 arm = vld3
2676 link-arm = vld3dup._EXTpi82_
2677 generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
2678 generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
2679 arm = nop
2680 generate *const i64:int64x1x3_t
2681
2682 /// Load single 3-element structure and replicate to all lanes of three registers
2683 name = vld3
2684 out-dup-nox
2685 multi_fn = transmute, {vld3-outsigneddupnox-noext, transmute(a)}
2686 a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
2687 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2688 load_fn
2689
2690 aarch64 = ld3r
2691 generate *const u64:uint64x2x3_t
2692 target = aes
2693 generate *const p64:poly64x2x3_t
2694
2695 target = default
2696 arm = vld3
2697 generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
2698 generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
2699 generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
2700 arm = nop
2701 generate *const u64:uint64x1x3_t
2702 target = aes
2703 generate *const p64:poly64x1x3_t
2704
2705 /// Load single 3-element structure and replicate to all lanes of three registers
2706 name = vld3
2707 out-dup-nox
2708 a = 0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.
2709 validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
2710 load_fn
2711 arm-aarch64-separate
2712
2713 aarch64 = ld3r
2714 link-aarch64 = ld3r._EXT2_
2715 generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
2716
2717 arm = vld3
2718 link-arm = vld3dup._EXTpi82_
2719 generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
2720
2721 /// Load multiple 3-element structures to two registers
2722 name = vld3
2723 out-lane-nox
2724 multi_fn = static_assert_imm-in_exp_len-LANE
2725 constn = LANE
2726 a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2727 b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2728 n = 0
2729 validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2730 load_fn
2731 arm-aarch64-separate
2732
2733 aarch64 = ld3
2734 const-aarch64 = LANE
2735 link-aarch64 = ld3lane._EXTpi82_
2736 generate *const i8:int8x16x3_t:int8x16x3_t, *const i64:int64x1x3_t:int64x1x3_t, *const i64:int64x2x3_t:int64x2x3_t
2737
2738 arm = vld3
2739 const-arm = LANE
2740 link-arm = vld3lane._EXTpi82_
2741 generate *const i8:int8x8x3_t:int8x8x3_t, *const i16:int16x4x3_t:int16x4x3_t, *const i32:int32x2x3_t:int32x2x3_t
2742 generate *const i16:int16x8x3_t:int16x8x3_t, *const i32:int32x4x3_t:int32x4x3_t
2743
2744 /// Load multiple 3-element structures to three registers
2745 name = vld3
2746 out-lane-nox
2747 multi_fn = static_assert_imm-in_exp_len-LANE
2748 multi_fn = transmute, {vld3-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2749 constn = LANE
2750 a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2751 b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2752 n = 0
2753 validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
2754 load_fn
2755
2756 aarch64 = ld3
2757 const-aarch64 = LANE
2758 target = aes
2759 generate *const p64:poly64x1x3_t:poly64x1x3_t, *const p64:poly64x2x3_t:poly64x2x3_t
2760 target = default
2761 generate *const p8:poly8x16x3_t:poly8x16x3_t, *const u8:uint8x16x3_t:uint8x16x3_t, *const u64:uint64x1x3_t:uint64x1x3_t, *const u64:uint64x2x3_t:uint64x2x3_t
2762
2763 arm = vld3
2764 const-arm = LANE
2765 generate *const u8:uint8x8x3_t:uint8x8x3_t, *const u16:uint16x4x3_t:uint16x4x3_t, *const u32:uint32x2x3_t:uint32x2x3_t
2766 generate *const u16:uint16x8x3_t:uint16x8x3_t, *const u32:uint32x4x3_t:uint32x4x3_t
2767 generate *const p8:poly8x8x3_t:poly8x8x3_t, *const p16:poly16x4x3_t:poly16x4x3_t
2768 generate *const p16:poly16x8x3_t:poly16x8x3_t
2769
2770 /// Load multiple 3-element structures to three registers
2771 name = vld3
2772 out-lane-nox
2773 multi_fn = static_assert_imm-in_exp_len-LANE
2774 constn = LANE
2775 a = 0., 1., 2., 2., 4., 5., 6., 7., 8., 5., 6., 7., 8.
2776 b = 0., 2., 2., 14., 9., 16., 17., 18., 5., 6., 7., 8.
2777 n = 0
2778 validate 1., 2., 2., 14., 2., 16., 17., 18., 2., 6., 7., 8.
2779 load_fn
2780 arm-aarch64-separate
2781
2782 aarch64 = ld3
2783 const-aarch64 = LANE
2784 link-aarch64 = ld3lane._EXTpi82_
2785 generate *const f64:float64x1x3_t:float64x1x3_t, *const f64:float64x2x3_t:float64x2x3_t
2786
2787 arm = vld3
2788 const-arm = LANE
2789 link-arm = vld3lane._EXTpi82_
2790 generate *const f32:float32x2x3_t:float32x2x3_t, *const f32:float32x4x3_t:float32x4x3_t
2791
2792 /// Load multiple 4-element structures to four registers
2793 name = vld4
2794 out-nox
2795 a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2796 validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2797 load_fn
2798 arm-aarch64-separate
2799
2800 aarch64 = ld4
2801 link-aarch64 = ld4._EXTv2_
2802 generate *const i64:int64x2x4_t
2803
2804 arm = vld4
2805 link-arm = vld4._EXTpi82_
2806 generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
2807 generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
2808 aarch64 = nop
2809 arm = nop
2810 generate *const i64:int64x1x4_t
2811
2812 /// Load multiple 4-element structures to four registers
2813 name = vld4
2814 out-nox
2815 multi_fn = transmute, {vld4-outsignednox-noext, transmute(a)}
2816 a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2817 validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
2818 load_fn
2819
2820 aarch64 = ld4
2821 generate *const u64:uint64x2x4_t
2822 target = aes
2823 generate *const p64:poly64x2x4_t
2824
2825 target = default
2826 arm = vld4
2827 generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
2828 generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
2829 generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
2830 aarch64 = nop
2831 arm = nop
2832 generate *const u64:uint64x1x4_t
2833 target = aes
2834 generate *const p64:poly64x1x4_t
2835
2836 /// Load multiple 4-element structures to four registers
2837 name = vld4
2838 out-nox
2839 a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 15., 16.
2840 validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 15., 6., 8., 8., 16.
2841 load_fn
2842 arm-aarch64-separate
2843
2844 aarch64 = nop
2845 link-aarch64 = ld4._EXTv2_
2846 generate *const f64:float64x1x4_t
2847 aarch64 = ld4
2848 generate *const f64:float64x2x4_t
2849
2850 arm = vld4
2851 link-arm = vld4._EXTpi82_
2852 generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2853
2854 /// Load single 4-element structure and replicate to all lanes of four registers
2855 name = vld4
2856 out-dup-nox
2857 a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2858 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2859 load_fn
2860 arm-aarch64-separate
2861
2862 aarch64 = ld4r
2863 link-aarch64 = ld4r._EXT2_
2864 generate *const i64:int64x2x4_t
2865
2866 arm = vld4
2867 link-arm = vld4dup._EXTpi82_
2868 generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
2869 generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
2870 arm = nop
2871 generate *const i64:int64x1x4_t
2872
2873 /// Load single 4-element structure and replicate to all lanes of four registers
2874 name = vld4
2875 out-dup-nox
2876 multi_fn = transmute, {vld4-outsigneddupnox-noext, transmute(a)}
2877 a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
2878 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
2879 load_fn
2880
2881 aarch64 = ld4r
2882 generate *const u64:uint64x2x4_t
2883 target = aes
2884 generate *const p64:poly64x2x4_t
2885
2886 target = default
2887 arm = vld4
2888 generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
2889 generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
2890 generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
2891 arm = nop
2892 generate *const u64:uint64x1x4_t
2893 target = aes
2894 generate *const p64:poly64x1x4_t
2895
2896 /// Load single 4-element structure and replicate to all lanes of four registers
2897 name = vld4
2898 out-dup-nox
2899 a = 0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5., 9., 4., 3., 5.
2900 validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
2901 load_fn
2902 arm-aarch64-separate
2903
2904 aarch64 = ld4r
2905 link-aarch64 = ld4r._EXT2_
2906 generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
2907
2908 arm = vld4
2909 link-arm = vld4dup._EXTpi82_
2910 generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
2911
2912 /// Load multiple 4-element structures to four registers
2913 name = vld4
2914 out-lane-nox
2915 multi_fn = static_assert_imm-in_exp_len-LANE
2916 constn = LANE
2917 a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2918 b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2919 n = 0
2920 validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2921 load_fn
2922 arm-aarch64-separate
2923
2924 aarch64 = ld4
2925 const-aarch64 = LANE
2926 link-aarch64 = ld4lane._EXTpi82_
2927 generate *const i8:int8x16x4_t:int8x16x4_t, *const i64:int64x1x4_t:int64x1x4_t, *const i64:int64x2x4_t:int64x2x4_t
2928
2929 arm = vld4
2930 const-arm = LANE
2931 link-arm = vld4lane._EXTpi82_
2932 generate *const i8:int8x8x4_t:int8x8x4_t, *const i16:int16x4x4_t:int16x4x4_t, *const i32:int32x2x4_t:int32x2x4_t
2933 generate *const i16:int16x8x4_t:int16x8x4_t, *const i32:int32x4x4_t:int32x4x4_t
2934
2935 /// Load multiple 4-element structures to four registers
2936 name = vld4
2937 out-lane-nox
2938 multi_fn = static_assert_imm-in_exp_len-LANE
2939 multi_fn = transmute, {vld4-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
2940 constn = LANE
2941 a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2942 b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2943 n = 0
2944 validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
2945 load_fn
2946
2947 aarch64 = ld4
2948 const-aarch64 = LANE
2949 target = aes
2950 generate *const p64:poly64x1x4_t:poly64x1x4_t, *const p64:poly64x2x4_t:poly64x2x4_t
2951 target = default
2952 generate *const p8:poly8x16x4_t:poly8x16x4_t, *const u8:uint8x16x4_t:uint8x16x4_t, *const u64:uint64x1x4_t:uint64x1x4_t, *const u64:uint64x2x4_t:uint64x2x4_t
2953
2954 arm = vld4
2955 const-arm = LANE
2956 generate *const u8:uint8x8x4_t:uint8x8x4_t, *const u16:uint16x4x4_t:uint16x4x4_t, *const u32:uint32x2x4_t:uint32x2x4_t
2957 generate *const u16:uint16x8x4_t:uint16x8x4_t, *const u32:uint32x4x4_t:uint32x4x4_t
2958 generate *const p8:poly8x8x4_t:poly8x8x4_t, *const p16:poly16x4x4_t:poly16x4x4_t
2959 generate *const p16:poly16x8x4_t:poly16x8x4_t
2960
2961 /// Load multiple 4-element structures to four registers
2962 name = vld4
2963 out-lane-nox
2964 multi_fn = static_assert_imm-in_exp_len-LANE
2965 constn = LANE
2966 a = 0., 1., 2., 2., 2., 5., 6., 7., 8., 5., 6., 7., 8., 1., 4., 3., 5.
2967 b = 0., 2., 2., 2., 2., 16., 2., 18., 5., 6., 7., 8., 1., 4., 3., 5.
2968 n = 0
2969 validate 1., 2., 2., 2., 2., 16., 2., 18., 2., 6., 7., 8., 2., 4., 3., 5.
2970 load_fn
2971 arm-aarch64-separate
2972
2973 aarch64 = ld4
2974 const-aarch64 = LANE
2975 link-aarch64 = ld4lane._EXTpi82_
2976 generate *const f64:float64x1x4_t:float64x1x4_t, *const f64:float64x2x4_t:float64x2x4_t
2977
2978 arm = vld4
2979 const-arm = LANE
2980 link-arm = vld4lane._EXTpi82_
2981 generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float32x4x4_t
2982
2983 /// Store multiple single-element structures from one, two, three, or four registers
2984 name = vst1
2985 in1-lane-nox
2986 multi_fn = static_assert_imm-in_exp_len-LANE
2987 multi_fn = *a, {simd_extract, b, LANE as u32}
2988 constn = LANE
2989 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2990 n = 0
2991 validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2992 store_fn
2993
2994 aarch64 = nop
2995 arm = nop
2996 generate *mut i8:int8x8_t:void, *mut i16:int16x4_t:void, *mut i32:int32x2_t:void, *mut i64:int64x1_t:void
2997 generate *mut i8:int8x16_t:void, *mut i16:int16x8_t:void, *mut i32:int32x4_t:void, *mut i64:int64x2_t:void
2998 generate *mut u8:uint8x8_t:void, *mut u16:uint16x4_t:void, *mut u32:uint32x2_t:void, *mut u64:uint64x1_t:void
2999 generate *mut u8:uint8x16_t:void, *mut u16:uint16x8_t:void, *mut u32:uint32x4_t:void, *mut u64:uint64x2_t:void
3000 generate *mut p8:poly8x8_t:void, *mut p16:poly16x4_t:void, *mut p8:poly8x16_t:void, *mut p16:poly16x8_t:void
3001 target = aes
3002 generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void
3003
3004 /// Store multiple single-element structures from one, two, three, or four registers
3005 name = vst1
3006 in1-lane-nox
3007 multi_fn = static_assert_imm-in_exp_len-LANE
3008 multi_fn = *a, {simd_extract, b, LANE as u32}
3009 constn = LANE
3010 a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
3011 n = 0
3012 validate 1., 0., 0., 0., 0., 0., 0., 0.
3013 store_fn
3014
3015 aarch64 = nop
3016 generate *mut f64:float64x1_t:void, *mut f64:float64x2_t:void
3017
3018 arm = nop
3019 generate *mut f32:float32x2_t:void, *mut f32:float32x4_t:void
3020
3021 /// Store multiple single-element structures from one, two, three, or four registers
3022 name = vst1
3023 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3024 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3025 store_fn
3026 arm-aarch64-separate
3027
3028 aarch64 = st1
3029 link-aarch64 = st1x2._EXT3_
3030 arm = vst1
3031 link-arm = vst1x2._EXTr3_
3032 generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void, *mut i64:int64x1x2_t:void
3033 generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void, *mut i64:int64x2x2_t:void
3034
3035 link-aarch64 = st1x3._EXT3_
3036 link-arm = vst1x3._EXTr3_
3037 generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void, *mut i64:int64x1x3_t:void
3038 generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void, *mut i64:int64x2x3_t:void
3039
3040 link-aarch64 = st1x4._EXT3_
3041 link-arm = vst1x4._EXTr3_
3042 generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void, *mut i64:int64x1x4_t:void
3043 generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void, *mut i64:int64x2x4_t:void
3044
3045 /// Store multiple single-element structures to one, two, three, or four registers
3046 name = vst1
3047 multi_fn = vst1-signed-noext, transmute(a), transmute(b)
3048 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3049 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
3050
3051 store_fn
3052 aarch64 = st1
3053 arm = vst1
3054 generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void, *mut u64:uint64x1x2_t:void
3055 generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void, *mut u64:uint64x2x2_t:void
3056 generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void, *mut u64:uint64x1x3_t:void
3057 generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void, *mut u64:uint64x2x3_t:void
3058 generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void, *mut u64:uint64x1x4_t:void
3059 generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void, *mut u64:uint64x2x4_t:void
3060 generate *mut p8:poly8x8x2_t:void, *mut p8:poly8x8x3_t:void, *mut p8:poly8x8x4_t:void
3061 generate *mut p8:poly8x16x2_t:void, *mut p8:poly8x16x3_t:void, *mut p8:poly8x16x4_t:void
3062 generate *mut p16:poly16x4x2_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x4x4_t:void
3063 generate *mut p16:poly16x8x2_t:void, *mut p16:poly16x8x3_t:void, *mut p16:poly16x8x4_t:void
3064 target = aes
3065 generate *mut p64:poly64x1x2_t:void
3066 arm = nop
3067 generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x1x4_t:void
3068 generate *mut p64:poly64x2x2_t:void, *mut p64:poly64x2x3_t:void, *mut p64:poly64x2x4_t:void
3069
3070 /// Store multiple single-element structures to one, two, three, or four registers
3071 name = vst1
3072 a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
3073 validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
3074 store_fn
3075 arm-aarch64-separate
3076
3077 aarch64 = st1
3078 link-aarch64 = st1x2._EXT3_
3079 generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
3080
3081 link-aarch64 = st1x3._EXT3_
3082 generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
3083
3084 link-aarch64 = st1x4._EXT3_
3085 generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
3086
3087 arm = vst1
3088 link-aarch64 = st1x2._EXT3_
3089 link-arm = vst1x2._EXTr3_
3090 generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3091
3092 link-aarch64 = st1x3._EXT3_
3093 link-arm = vst1x3._EXTr3_
3094 generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3095
3096 link-aarch64 = st1x4._EXT3_
3097 link-arm = vst1x4._EXTr3_
3098 generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3099
3100 /// Store multiple 2-element structures from two registers
3101 name = vst2
3102 in1-nox
3103 a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3104 validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3105 store_fn
3106 arm-aarch64-separate
3107
3108 aarch64 = st2
3109 link-aarch64 = st2._EXTpi8_
3110 generate *mut i64:int64x2x2_t:void
3111
3112 arm = vst2
3113 link-arm = vst2._EXTpi8r_
3114 generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
3115 generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
3116 arm = nop
3117 aarch64 = nop
3118 generate *mut i64:int64x1x2_t:void
3119
3120 /// Store multiple 2-element structures from two registers
3121 name = vst2
3122 multi_fn = transmute, {vst2-in1signednox-noext, transmute(a), transmute(b)}
3123 in1-nox
3124 a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3125 validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
3126 store_fn
3127
3128 aarch64 = st2
3129 generate *mut u64:uint64x2x2_t:void
3130 target = aes
3131 generate *mut p64:poly64x2x2_t:void
3132
3133 target = default
3134 arm = vst2
3135 generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
3136 generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
3137 generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p8:poly8x16x2_t:void, *mut p16:poly16x8x2_t:void
3138 arm = nop
3139 aarch64 = nop
3140 generate *mut u64:uint64x1x2_t:void
3141 target = aes
3142 generate *mut p64:poly64x1x2_t:void
3143
3144 /// Store multiple 2-element structures from two registers
3145 name = vst2
3146 in1-nox
3147 a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3148 validate 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
3149 store_fn
3150 arm-aarch64-separate
3151
3152 aarch64 = st1
3153 link-aarch64 = st2._EXTpi8_
3154 generate *mut f64:float64x1x2_t:void
3155 aarch64 = st2
3156 generate *mut f64:float64x2x2_t:void
3157
3158 arm = vst2
3159 link-arm = vst2._EXTpi8r_
3160 generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3161
3162 /// Store multiple 2-element structures from two registers
3163 name = vst2
3164 in1-lane-nox
3165 constn = LANE
3166 multi_fn = static_assert_imm-in_exp_len-LANE
3167 a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3168 n = 0
3169 validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3170 store_fn
3171 arm-aarch64-separate
3172
3173 aarch64 = st2
3174 link-aarch64 = st2lane._EXTpi8_
3175 const-aarch64 = LANE
3176 generate *mut i8:int8x16x2_t:void, *mut i64:int64x1x2_t:void, *mut i64:int64x2x2_t:void
3177
3178 arm = vst2
3179 link-arm = vst2lane._EXTpi8r_
3180 const-arm = LANE
3181 generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
3182 generate *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
3183
3184 /// Store multiple 2-element structures from two registers
3185 name = vst2
3186 in1-lane-nox
3187 constn = LANE
3188 multi_fn = static_assert_imm-in_exp_len-LANE
3189 multi_fn = transmute, {vst2-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
3190 a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
3191 n = 0
3192 validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3193 store_fn
3194
3195 aarch64 = st2
3196 generate *mut u8:uint8x16x2_t:void, *mut u64:uint64x1x2_t:void, *mut u64:uint64x2x2_t:void, *mut p8:poly8x16x2_t:void
3197 target = aes
3198 generate *mut p64:poly64x1x2_t:void, *mut p64:poly64x2x2_t:void
3199
3200 target = default
3201 arm = vst2
3202 generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
3203 generate *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
3204 generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p16:poly16x8x2_t:void
3205
3206 /// Store multiple 2-element structures from two registers
3207 name = vst2
3208 in1-lane-nox
3209 constn = LANE
3210 multi_fn = static_assert_imm-in_exp_len-LANE
3211 a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3212 n = 0
3213 validate 1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3214 store_fn
3215 arm-aarch64-separate
3216
3217 aarch64 = st2
3218 link-aarch64 = st2lane._EXTpi8_
3219 const-aarch64 = LANE
3220 generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
3221
3222 arm = vst2
3223 link-arm = vst2lane._EXTpi8r_
3224 const-arm = LANE
3225 generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
3226
3227 /// Store multiple 3-element structures from three registers
3228 name = vst3
3229 in1-nox
3230 a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3231 validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3232 store_fn
3233 arm-aarch64-separate
3234
3235 aarch64 = st3
3236 link-aarch64 = st3._EXTpi8_
3237 generate *mut i64:int64x2x3_t:void
3238
3239 arm = vst3
3240 link-arm = vst3._EXTpi8r_
3241 generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
3242 generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
3243 arm = nop
3244 aarch64 = nop
3245 generate *mut i64:int64x1x3_t:void
3246
3247 /// Store multiple 3-element structures from three registers
3248 name = vst3
3249 multi_fn = transmute, {vst3-in1signednox-noext, transmute(a), transmute(b)}
3250 in1-nox
3251 a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3252 validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
3253 store_fn
3254
3255 aarch64 = st3
3256 generate *mut u64:uint64x2x3_t:void
3257 target = aes
3258 generate *mut p64:poly64x2x3_t:void
3259
3260 target = default
3261 arm = vst3
3262 generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
3263 generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
3264 generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p8:poly8x16x3_t:void, *mut p16:poly16x8x3_t:void
3265 arm = nop
3266 aarch64 = nop
3267 generate *mut u64:uint64x1x3_t:void
3268 target = aes
3269 generate *mut p64:poly64x1x3_t:void
3270
3271 /// Store multiple 3-element structures from three registers
3272 name = vst3
3273 in1-nox
3274 a = 0., 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8., 13., 14., 15., 16
3275 validate 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8., 2., 13., 13., 4.
3276 store_fn
3277 arm-aarch64-separate
3278
3279 aarch64 = nop
3280 link-aarch64 = st3._EXTpi8_
3281 generate *mut f64:float64x1x3_t:void
3282 aarch64 = st3
3283 generate *mut f64:float64x2x3_t:void
3284
3285 arm = vst3
3286 link-arm = vst3._EXTpi8r_
3287 generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3288
3289 /// Store multiple 3-element structures from three registers
3290 name = vst3
3291 in1-lane-nox
3292 constn = LANE
3293 multi_fn = static_assert_imm-in_exp_len-LANE
3294 a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3295 n = 0
3296 validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3297 store_fn
3298 arm-aarch64-separate
3299
3300 aarch64 = st3
3301 link-aarch64 = st3lane._EXTpi8_
3302 const-aarch64 = LANE
3303 generate *mut i8:int8x16x3_t:void, *mut i64:int64x1x3_t:void, *mut i64:int64x2x3_t:void
3304
3305 arm = vst3
3306 link-arm = vst3lane._EXTpi8r_
3307 const-arm = LANE
3308 generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
3309 generate *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
3310
3311 /// Store multiple 3-element structures from three registers
3312 name = vst3
3313 in1-lane-nox
3314 constn = LANE
3315 multi_fn = static_assert_imm-in_exp_len-LANE
3316 multi_fn = transmute, {vst3-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
3317 a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
3318 n = 0
3319 validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3320 store_fn
3321
3322 aarch64 = st3
3323 generate *mut u8:uint8x16x3_t:void, *mut u64:uint64x1x3_t:void, *mut u64:uint64x2x3_t:void, *mut p8:poly8x16x3_t:void
3324 target = aes
3325 generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x2x3_t:void
3326
3327 target = default
3328 arm = vst3
3329 generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
3330 generate *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
3331 generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x8x3_t:void
3332
3333 /// Store multiple 3-element structures from three registers
3334 name = vst3
3335 in1-lane-nox
3336 constn = LANE
3337 multi_fn = static_assert_imm-in_exp_len-LANE
3338 a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
3339 n = 0
3340 validate 1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3341 store_fn
3342 arm-aarch64-separate
3343
3344 aarch64 = st3
3345 link-aarch64 = st3lane._EXTpi8_
3346 const-aarch64 = LANE
3347 generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
3348
3349 arm = vst3
3350 link-arm = vst3lane._EXTpi8r_
3351 const-arm = LANE
3352 generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
3353
3354 /// Store multiple 4-element structures from four registers
3355 name = vst4
3356 in1-nox
3357 a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3358 validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3359 store_fn
3360 arm-aarch64-separate
3361
3362 aarch64 = st4
3363 link-aarch64 = st4._EXTpi8_
3364 generate *mut i64:int64x2x4_t:void
3365
3366 arm = vst4
3367 link-arm = vst4._EXTpi8r_
3368 generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
3369 generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
3370 arm = nop
3371 aarch64 = nop
3372 generate *mut i64:int64x1x4_t:void
3373
3374 /// Store multiple 4-element structures from four registers
3375 name = vst4
3376 multi_fn = transmute, {vst4-in1signednox-noext, transmute(a), transmute(b)}
3377 in1-nox
3378 a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3379 validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3380 store_fn
3381
3382 aarch64 = st4
3383 generate *mut u64:uint64x2x4_t:void
3384 target = aes
3385 generate *mut p64:poly64x2x4_t:void
3386
3387 target = default
3388 arm = vst4
3389 generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
3390 generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
3391 generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p8:poly8x16x4_t:void, *mut p16:poly16x8x4_t:void
3392 arm = nop
3393 aarch64 = nop
3394 generate *mut u64:uint64x1x4_t:void
3395 target = aes
3396 generate *mut p64:poly64x1x4_t:void
3397
3398 /// Store multiple 4-element structures from four registers
3399 name = vst4
3400 in1-nox
3401 a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3402 validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3403 store_fn
3404 arm-aarch64-separate
3405
3406 aarch64 = nop
3407 link-aarch64 = st4._EXTpi8_
3408 generate *mut f64:float64x1x4_t:void
3409 aarch64 = st4
3410 generate *mut f64:float64x2x4_t:void
3411
3412 arm = vst4
3413 link-arm = vst4._EXTpi8r_
3414 generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3415
3416 /// Store multiple 4-element structures from four registers
3417 name = vst4
3418 in1-lane-nox
3419 constn = LANE
3420 multi_fn = static_assert_imm-in_exp_len-LANE
3421 a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3422 n = 0
3423 validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3424 store_fn
3425 arm-aarch64-separate
3426
3427 aarch64 = st4
3428 link-aarch64 = st4lane._EXTpi8_
3429 const-aarch64 = LANE
3430 generate *mut i8:int8x16x4_t:void, *mut i64:int64x1x4_t:void, *mut i64:int64x2x4_t:void
3431
3432 arm = vst4
3433 link-arm = vst4lane._EXTpi8r_
3434 const-arm = LANE
3435 generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
3436 generate *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
3437
3438 /// Store multiple 4-element structures from four registers
3439 name = vst4
3440 in1-lane-nox
3441 constn = LANE
3442 multi_fn = static_assert_imm-in_exp_len-LANE
3443 multi_fn = transmute, {vst4-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
3444 a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
3445 n = 0
3446 validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3447 store_fn
3448
3449 aarch64 = st4
3450 generate *mut u8:uint8x16x4_t:void, *mut u64:uint64x1x4_t:void, *mut u64:uint64x2x4_t:void, *mut p8:poly8x16x4_t:void
3451 target = aes
3452 generate *mut p64:poly64x1x4_t:void, *mut p64:poly64x2x4_t:void
3453
3454 target = default
3455 arm = vst4
3456 generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
3457 generate *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
3458 generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p16:poly16x8x4_t:void
3459
3460 /// Store multiple 4-element structures from four registers
3461 name = vst4
3462 in1-lane-nox
3463 constn = LANE
3464 multi_fn = static_assert_imm-in_exp_len-LANE
3465 a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
3466 n = 0
3467 validate 1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
3468 store_fn
3469 arm-aarch64-separate
3470
3471 aarch64 = st4
3472 link-aarch64 = st4lane._EXTpi8_
3473 const-aarch64 = LANE
3474 generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
3475
3476 arm = vst4
3477 link-arm = vst4lane._EXTpi8r_
3478 const-arm = LANE
3479 generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
3480
3481 /// Dot product vector form with unsigned and signed integers
3482 name = vusdot
3483 out-suffix
3484 a = 1000, -4200, -1000, 2000
3485 b = 100, 205, 110, 195, 120, 185, 130, 175, 140, 165, 150, 155, 160, 145, 170, 135
3486 c = 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8
3487 aarch64 = usdot
3488 arm = vusdot
3489 target = i8mm
3490
3491 // 1000 + (100, 205, 110, 195) . ( 0, 1, 2, 3)
3492 // -4200 + (120, 185, 130, 175) . (-1, -2, -3, -4)
3493 // ...
3494 validate 2010, -5780, 2370, -1940
3495
3496 link-arm = usdot._EXT2_._EXT4_:int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3497 link-aarch64 = usdot._EXT2_._EXT4_:int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3498 generate int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3499
3500 link-arm = usdot._EXT2_._EXT4_:int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3501 link-aarch64 = usdot._EXT2_._EXT4_:int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3502 generate int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3503
3504 /// Dot product index form with unsigned and signed integers
3505 name = vusdot
3506 out-lane-suffixes
3507 constn = LANE
3508 aarch64 = usdot
3509 arm = vusdot
3510 target = i8mm
3511 multi_fn = static_assert_imm-in2_dot-LANE
3512 multi_fn = transmute, c:merge4_t2, c
3513 multi_fn = simd_shuffle!, c:out_signed, c, c, {dup-out_len-LANE as u32}
3514 multi_fn = vusdot-out-noext, a, b, {transmute, c}
3515 a = 1000, -4200, -1000, 2000
3516 b = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3517 c = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3518
3519 // 1000 + (100, 110, 120, 130) . (4, 3, 2, 1)
3520 // -4200 + (140, 150, 160, 170) . (4, 3, 2, 1)
3521 // ...
3522 n = 0
3523 validate 2100, -2700, 900, 4300
3524
3525 // 1000 + (100, 110, 120, 130) . (0, -1, -2, -3)
3526 // -4200 + (140, 150, 160, 170) . (0, -1, -2, -3)
3527 // ...
3528 n = 1
3529 validate 260, -5180, -2220, 540
3530
3531 generate int32x2_t:uint8x8_t:int8x8_t:int32x2_t
3532 generate int32x4_t:uint8x16_t:int8x8_t:int32x4_t
3533
3534 /// Dot product index form with unsigned and signed integers
3535 name = vusdot
3536 out-lane-suffixes
3537 constn = LANE
3538 // Only AArch64 has the laneq forms.
3539 aarch64 = usdot
3540 target = i8mm
3541 multi_fn = static_assert_imm-in2_dot-LANE
3542 multi_fn = transmute, c:merge4_t2, c
3543 multi_fn = simd_shuffle!, c:out_signed, c, c, {dup-out_len-LANE as u32}
3544 multi_fn = vusdot-out-noext, a, b, {transmute, c}
3545 a = 1000, -4200, -1000, 2000
3546 b = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3547 c = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3548
3549 // 1000 + (100, 110, 120, 130) . (-4, -5, -6, -7)
3550 // -4200 + (140, 150, 160, 170) . (-4, -5, -6, -7)
3551 // ...
3552 n = 3
3553 validate -3420, -10140, -8460, -6980
3554
3555 generate int32x2_t:uint8x8_t:int8x16_t:int32x2_t
3556 generate int32x4_t:uint8x16_t:int8x16_t:int32x4_t
3557
3558 /// Dot product index form with signed and unsigned integers
3559 name = vsudot
3560 out-lane-suffixes
3561 constn = LANE
3562 aarch64 = sudot
3563 arm = vsudot
3564 target = i8mm
3565
3566 multi_fn = static_assert_imm-in2_dot-LANE
3567 multi_fn = transmute, c:merge4_t2, c
3568 multi_fn = simd_shuffle!, c:out_unsigned, c, c, {dup-out_len-LANE as u32}
3569 multi_fn = vusdot-out-noext, a, {transmute, c}, b
3570 a = -2000, 4200, -1000, 2000
3571 b = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3572 c = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3573
3574 // -2000 + (4, 3, 2, 1) . (100, 110, 120, 130)
3575 // 4200 + (0, -1, -2, -3) . (100, 110, 120, 130)
3576 // ...
3577 n = 0
3578 validate -900, 3460, -3580, -2420
3579
3580 // -2000 + (4, 3, 2, 1) . (140, 150, 160, 170)
3581 // 4200 + (0, -1, -2, -3) . (140, 150, 160, 170)
3582 // ...
3583 n = 1
3584 validate -500, 3220, -4460, -3940
3585
3586 generate int32x2_t:int8x8_t:uint8x8_t:int32x2_t
3587 generate int32x4_t:int8x16_t:uint8x8_t:int32x4_t
3588
3589 /// Dot product index form with signed and unsigned integers
3590 name = vsudot
3591 out-lane-suffixes
3592 constn = LANE
3593 // Only AArch64 has the laneq forms.
3594 aarch64 = sudot
3595 target = i8mm
3596
3597 multi_fn = static_assert_imm-in2_dot-LANE
3598 multi_fn = transmute, c:merge4_t2, c
3599 multi_fn = simd_shuffle!, c:out_unsigned, c, c, {dup-out_len-LANE as u32}
3600 multi_fn = vusdot-out-noext, a, {transmute, c}, b
3601 a = -2000, 4200, -1000, 2000
3602 b = 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11
3603 c = 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250
3604
3605 // -2000 + (4, 3, 2, 1) . (220, 230, 240, 250)
3606 // 4200 + (0, -1, -2, -3) . (220, 230, 240, 250)
3607 // ...
3608 n = 3
3609 validate 300, 2740, -6220, -6980
3610
3611 generate int32x2_t:int8x8_t:uint8x16_t:int32x2_t
3612 generate int32x4_t:int8x16_t:uint8x16_t:int32x4_t
3613
3614 /// Multiply
3615 name = vmul
3616 a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3617 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3618 validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3619 arm = vmul.
3620 aarch64 = mul
3621 fn = simd_mul
3622 generate int*_t, uint*_t
3623
3624 /// Polynomial multiply
3625 name = vmul
3626 a = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3627 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3628 validate 1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48
3629
3630 aarch64 = pmul
3631 link-aarch64 = pmul._EXT_
3632 arm = vmul
3633 link-arm = vmulp._EXT_
3634 generate poly8x8_t, poly8x16_t
3635
3636 /// Multiply
3637 name = vmul
3638 fn = simd_mul
3639 a = 1.0, 2.0, 1.0, 2.0
3640 b = 2.0, 3.0, 4.0, 5.0
3641 validate 2.0, 6.0, 4.0, 10.0
3642
3643 aarch64 = fmul
3644 generate float64x*_t
3645
3646 arm = vmul.
3647 generate float*_t
3648
3649 /// Vector multiply by scalar
3650 name = vmul
3651 out-n-suffix
3652 multi_fn = simd_mul, a, {vdup-nout-noext, b}
3653 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3654 b = 2
3655 validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3656
3657 arm = vmul
3658 aarch64 = mul
3659 generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
3660 generate uint16x4_t:u16:uint16x4_t, uint16x8_t:u16:uint16x8_t, uint32x2_t:u32:uint32x2_t, uint32x4_t:u32:uint32x4_t
3661
3662 /// Vector multiply by scalar
3663 name = vmul
3664 out-n-suffix
3665 multi_fn = simd_mul, a, {vdup-nout-noext, b}
3666 a = 1., 2., 3., 4.
3667 b = 2.
3668 validate 2., 4., 6., 8.
3669
3670 aarch64 = fmul
3671 generate float64x1_t:f64:float64x1_t, float64x2_t:f64:float64x2_t
3672
3673 arm = vmul
3674 generate float32x2_t:f32:float32x2_t, float32x4_t:f32:float32x4_t
3675
3676 /// Multiply
3677 name = vmul
3678 lane-suffixes
3679 constn = LANE
3680 multi_fn = static_assert_imm-in_exp_len-LANE
3681 multi_fn = simd_mul, a, {simd_shuffle!, b, b, {dup-out_len-LANE as u32}}
3682 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3683 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3684 n = 1
3685 validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3686
3687 aarch64 = mul
3688 arm = vmul
3689 generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
3690 generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
3691 generate uint16x4_t, uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
3692 generate uint32x2_t, uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
3693
3694 /// Floating-point multiply
3695 name = vmul
3696 lane-suffixes
3697 constn = LANE
3698 multi_fn = static_assert_imm-in_exp_len-LANE
3699 multi_fn = simd_mul, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
3700 a = 1., 2., 3., 4.
3701 b = 2., 0., 0., 0.
3702 n = 0
3703 validate 2., 4., 6., 8.
3704
3705 aarch64 = fmul
3706 generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
3707
3708 /// Floating-point multiply
3709 name = vmul
3710 lane-suffixes
3711 constn = LANE
3712 multi_fn = static_assert_imm-in_exp_len-LANE
3713 multi_fn = simd_mul, a, {simd_shuffle!, b, b, {dup-out_len-LANE as u32}}
3714 a = 1., 2., 3., 4.
3715 b = 2., 0., 0., 0.
3716 n = 0
3717 validate 2., 4., 6., 8.
3718
3719 aarch64 = fmul
3720 generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
3721
3722 arm = vmul
3723 generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
3724
3725 /// Floating-point multiply
3726 name = vmuls_lane
3727 constn = LANE
3728 multi_fn = static_assert_imm-in_exp_len-LANE
3729 multi_fn = simd_extract, b:f32, b, LANE as u32
3730 multi_fn = a * b
3731 a = 1.
3732 b = 2., 0., 0., 0.
3733 n = 0
3734 validate 2.
3735 aarch64 = fmul
3736 generate f32:float32x2_t:f32, f32:float32x4_t:f32
3737
3738 /// Floating-point multiply
3739 name = vmuld_lane
3740 constn = LANE
3741 multi_fn = static_assert_imm-in_exp_len-LANE
3742 multi_fn = simd_extract, b:f64, b, LANE as u32
3743 multi_fn = a * b
3744 a = 1.
3745 b = 2., 0.
3746 n = 0
3747 validate 2.
3748 aarch64 = fmul
3749 generate f64:float64x1_t:f64, f64:float64x2_t:f64
3750
3751 /// Signed multiply long
3752 name = vmull
3753 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3754 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3755 validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
3756
3757 arm = vmull.s
3758 aarch64 = smull
3759 link-arm = vmulls._EXT_
3760 link-aarch64 = smull._EXT_
3761 generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
3762
3763 /// Signed multiply long
3764 name = vmull_high
3765 no-q
3766 multi_fn = simd_shuffle!, a:half, a, a, {fixed-half-right}
3767 multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
3768 multi_fn = vmull-noqself-noext, a, b
3769 a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3770 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3771 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3772 validate 9, 20, 11, 24, 13, 28, 15, 32
3773
3774 aarch64 = smull2
3775 generate int8x16_t:int8x16_t:int16x8_t, int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
3776
3777 /// Unsigned multiply long
3778 name = vmull
3779 a = 1, 2, 3, 4, 5, 6, 7, 8
3780 b = 1, 2, 1, 2, 1, 2, 1, 2
3781 validate 1, 4, 3, 8, 5, 12, 7, 16
3782
3783 arm = vmull.s
3784 aarch64 = umull
3785 link-arm = vmullu._EXT_
3786 link-aarch64 = umull._EXT_
3787 generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
3788
3789 /// Unsigned multiply long
3790 name = vmull_high
3791 no-q
3792 multi_fn = simd_shuffle!, a:half, a, a, {fixed-half-right}
3793 multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
3794 multi_fn = vmull-noqself-noext, a, b
3795 a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3796 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
3797 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3798 validate 9, 20, 11, 24, 13, 28, 15, 32
3799
3800 aarch64 = umull2
3801 generate uint8x16_t:uint8x16_t:uint16x8_t, uint16x8_t:uint16x8_t:uint32x4_t, uint32x4_t:uint32x4_t:uint64x2_t
3802
3803 /// Polynomial multiply long
3804 name = vmull
3805 a = 1, 2, 3, 4, 5, 6, 7, 8
3806 b = 1, 3, 1, 3, 1, 3, 1, 3
3807 validate 1, 6, 3, 12, 5, 10, 7, 24
3808
3809 arm = vmull.s
3810 aarch64 = pmull
3811 link-arm = vmullp._EXT_
3812 link-aarch64 = pmull._EXT_
3813 generate poly8x8_t:poly8x8_t:poly16x8_t
3814
3815 /// Polynomial multiply long
3816 name = vmull
3817 no-q
3818 a = 15
3819 b = 3
3820 validate 17
3821 target = aes
3822
3823 aarch64 = pmull
3824 link-aarch64 = pmull64:p64:p64:p64:int8x16_t
3825 // Because of the support status of llvm, vmull_p64 is currently only available on arm
3826 // arm = vmull
3827 // link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
3828 generate p64:p64:p128
3829
3830
3831 /// Polynomial multiply long
3832 name = vmull_high
3833 no-q
3834 multi_fn = simd_shuffle!, a:half, a, a, {fixed-half-right}
3835 multi_fn = simd_shuffle!, b:half, b, b, {fixed-half-right}
3836 multi_fn = vmull-noqself-noext, a, b
3837 a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3838 b = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
3839 fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
3840 validate 9, 30, 11, 20, 13, 18, 15, 48
3841
3842 aarch64 = pmull
3843 generate poly8x16_t:poly8x16_t:poly16x8_t
3844
3845 /// Polynomial multiply long
3846 name = vmull_high
3847 no-q
3848 multi_fn = vmull-noqself-noext, {simd_extract, a, 1}, {simd_extract, b, 1}
3849 a = 1, 15
3850 b = 1, 3
3851 validate 17
3852 target = aes
3853
3854 aarch64 = pmull
3855 generate poly64x2_t:poly64x2_t:p128
3856
3857 /// Vector long multiply with scalar
3858 name = vmull_n
3859 no-q
3860 multi_fn = vmull-in0-noext, a, {vdup-nin0-noext, b}
3861 a = 1, 2, 3, 4, 5, 6, 7, 8
3862 b = 2
3863 validate 2, 4, 6, 8, 10, 12, 14, 16
3864
3865 arm = vmull
3866 aarch64 = smull
3867 generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
3868 aarch64 = umull
3869 generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t
3870
3871 /// Vector long multiply by scalar
3872 name = vmull_lane
3873 constn = LANE
3874 multi_fn = static_assert_imm-in_exp_len-LANE
3875 multi_fn = vmull-in0-noext, a, {simd_shuffle!, b, b, {dup-in0_len-LANE as u32}}
3876 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
3877 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3878 n = 1
3879 validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
3880
3881 arm = vmull
3882 aarch64 = smull
3883 generate int16x4_t:int16x4_t:int32x4_t, int16x4_t:int16x8_t:int32x4_t
3884 generate int32x2_t:int32x2_t:int64x2_t, int32x2_t:int32x4_t:int64x2_t
3885 aarch64 = umull
3886 generate uint16x4_t:uint16x4_t:uint32x4_t, uint16x4_t:uint16x8_t:uint32x4_t
3887 generate uint32x2_t:uint32x2_t:uint64x2_t, uint32x2_t:uint32x4_t:uint64x2_t
3888
3889 /// Multiply long
3890 name = vmull_high_n
3891 no-q
3892 multi_fn = vmull_high-noqself-noext, a, {vdup-nin0-noext, b}
3893 a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3894 b = 2
3895 validate 18, 20, 22, 24, 26, 28, 30, 32
3896
3897 aarch64 = smull2
3898 generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
3899 aarch64 = umull2
3900 generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t
3901
3902 /// Multiply long
3903 name = vmull_high_lane
3904 constn = LANE
3905 multi_fn = static_assert_imm-in_exp_len-LANE
3906 multi_fn = vmull_high-noqself-noext, a, {simd_shuffle!, b, b, {dup-in0_len-LANE as u32}}
3907 a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
3908 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3909 n = 1
3910 validate 18, 20, 22, 24, 26, 28, 30, 32
3911
3912 aarch64 = smull2
3913 generate int16x8_t:int16x4_t:int32x4_t, int16x8_t:int16x8_t:int32x4_t
3914 generate int32x4_t:int32x2_t:int64x2_t, int32x4_t:int32x4_t:int64x2_t
3915 aarch64 = umull2
3916 generate uint16x8_t:uint16x4_t:uint32x4_t, uint16x8_t:uint16x8_t:uint32x4_t
3917 generate uint32x4_t:uint32x2_t:uint64x2_t, uint32x4_t:uint32x4_t:uint64x2_t
3918
3919 /// Floating-point multiply extended
3920 name = vmulx
3921 a = 1., 2., 3., 4.
3922 b = 2., 2., 2., 2.
3923 validate 2., 4., 6., 8.
3924
3925 aarch64 = fmulx
3926 link-aarch64 = fmulx._EXT_
3927 generate float*_t, float64x*_t
3928
3929 /// Floating-point multiply extended
3930 name = vmulx
3931 lane-suffixes
3932 constn = LANE
3933 multi_fn = static_assert_imm-in_exp_len-LANE
3934 multi_fn = vmulx-in0-noext, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
3935 a = 1.
3936 b = 2., 0.
3937 n = 0
3938 validate 2.
3939
3940 aarch64 = fmulx
3941 generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
3942
3943 /// Floating-point multiply extended
3944 name = vmulx
3945 lane-suffixes
3946 constn = LANE
3947 multi_fn = static_assert_imm-in_exp_len-LANE
3948 multi_fn = vmulx-in0-noext, a, {simd_shuffle!, b, b, {dup-in0_len-LANE as u32}}
3949 a = 1., 2., 3., 4.
3950 b = 2., 0., 0., 0.
3951 n = 0
3952 validate 2., 4., 6., 8.
3953
3954 aarch64 = fmulx
3955 generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
3956 generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
3957
3958 /// Floating-point multiply extended
3959 name = vmulx
3960 a = 2.
3961 b = 3.
3962 validate 6.
3963
3964 aarch64 = fmulx
3965 link-aarch64 = fmulx._EXT_
3966 generate f32, f64
3967
3968 /// Floating-point multiply extended
3969 name = vmulx
3970 lane-suffixes
3971 constn = LANE
3972 multi_fn = static_assert_imm-in_exp_len-LANE
3973 multi_fn = vmulx-out-noext, a, {simd_extract, b, LANE as u32}
3974
3975 a = 2.
3976 b = 3., 0., 0., 0.
3977 n = 0
3978 validate 6.
3979
3980 aarch64 = fmulx
3981 generate f32:float32x2_t:f32, f32:float32x4_t:f32, f64:float64x1_t:f64, f64:float64x2_t:f64
3982
3983 /// Floating-point fused Multiply-Add to accumulator(vector)
3984 name = vfma
3985 multi_fn = vfma-self-_, b, c, a
3986 a = 8.0, 18.0, 12.0, 10.0
3987 b = 6.0, 4.0, 7.0, 8.0
3988 c = 2.0, 3.0, 4.0, 5.0
3989 validate 20.0, 30.0, 40.0, 50.0
3990
3991 link-aarch64 = llvm.fma._EXT_
3992 aarch64 = fmadd
3993 generate float64x1_t
3994 aarch64 = fmla
3995 generate float64x2_t
3996
3997 target = vfp4
3998 arm = vfma
3999 link-arm = llvm.fma._EXT_
4000 generate float*_t
4001
4002 /// Floating-point fused Multiply-Add to accumulator(vector)
4003 name = vfma
4004 n-suffix
4005 multi_fn = vfma-self-noext, a, b, {vdup-nselfvfp4-noext, c}
4006 a = 2.0, 3.0, 4.0, 5.0
4007 b = 6.0, 4.0, 7.0, 8.0
4008 c = 8.0
4009 validate 50.0, 35.0, 60.0, 69.0
4010
4011 aarch64 = fmadd
4012 generate float64x1_t:float64x1_t:f64:float64x1_t
4013 aarch64 = fmla
4014 generate float64x2_t:float64x2_t:f64:float64x2_t
4015
4016 target = vfp4
4017 arm = vfma
4018 generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
4019
4020 /// Floating-point fused multiply-add to accumulator
4021 name = vfma
4022 in2-lane-suffixes
4023 constn = LANE
4024 multi_fn = static_assert_imm-in2_exp_len-LANE
4025 multi_fn = vfma-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}}
4026 a = 2., 3., 4., 5.
4027 b = 6., 4., 7., 8.
4028 c = 2., 0., 0., 0.
4029 n = 0
4030 validate 14., 11., 18., 21.
4031
4032 aarch64 = fmla
4033 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4034 aarch64 = fmadd
4035 generate float64x1_t
4036 aarch64 = fmla
4037 generate float64x1_t:float64x1_t:float64x2_t:float64x1_t, float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t
4038
4039 /// Floating-point fused multiply-add to accumulator
4040 name = vfma
4041 in2-lane-suffixes
4042 constn = LANE
4043 multi_fn = static_assert_imm-in2_exp_len-LANE
4044 multi_fn = simd_extract, c:out_t, c, LANE as u32
4045 multi_fn = vfma-in2lane-_, b, c, a
4046 a = 2.
4047 b = 6.
4048 c = 3., 0., 0., 0.
4049 n = 0
4050 validate 20.
4051
4052 aarch64 = fmla
4053 link-aarch64 = llvm.fma._EXT_:f32:f32:f32:f32
4054 generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32
4055 link-aarch64 = llvm.fma._EXT_:f64:f64:f64:f64
4056 aarch64 = fmadd
4057 generate f64:f64:float64x1_t:f64
4058 aarch64 = fmla
4059 generate f64:f64:float64x2_t:f64
4060
4061 /// Floating-point fused multiply-subtract from accumulator
4062 name = vfms
4063 multi_fn = simd_neg, b:in_t, b
4064 multi_fn = vfma-self-noext, a, b, c
4065 a = 20.0, 30.0, 40.0, 50.0
4066 b = 6.0, 4.0, 7.0, 8.0
4067 c = 2.0, 3.0, 4.0, 5.0
4068 validate 8.0, 18.0, 12.0, 10.0
4069
4070 aarch64 = fmsub
4071 generate float64x1_t
4072 aarch64 = fmls
4073 generate float64x2_t
4074
4075 target = vfp4
4076 arm = vfms
4077 generate float*_t
4078
4079 /// Floating-point fused Multiply-subtract to accumulator(vector)
4080 name = vfms
4081 n-suffix
4082 multi_fn = vfms-self-noext, a, b, {vdup-nselfvfp4-noext, c}
4083 a = 50.0, 35.0, 60.0, 69.0
4084 b = 6.0, 4.0, 7.0, 8.0
4085 c = 8.0
4086 validate 2.0, 3.0, 4.0, 5.0
4087
4088 aarch64 = fmsub
4089 generate float64x1_t:float64x1_t:f64:float64x1_t
4090 aarch64 = fmls
4091 generate float64x2_t:float64x2_t:f64:float64x2_t
4092
4093 target = vfp4
4094 arm = vfms
4095 generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
4096
4097 /// Floating-point fused multiply-subtract to accumulator
4098 name = vfms
4099 in2-lane-suffixes
4100 constn = LANE
4101 multi_fn = static_assert_imm-in2_exp_len-LANE
4102 multi_fn = vfms-out-noext, a, b, {vdup-nout-noext, {simd_extract, c, LANE as u32}}
4103 a = 14., 11., 18., 21.
4104 b = 6., 4., 7., 8.
4105 c = 2., 0., 0., 0.
4106 n = 0
4107 validate 2., 3., 4., 5.
4108
4109 aarch64 = fmls
4110 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4111 aarch64 = fmsub
4112 generate float64x1_t
4113 aarch64 = fmls
4114 generate float64x1_t:float64x1_t:float64x2_t:float64x1_t, float64x2_t:float64x2_t:float64x1_t:float64x2_t, float64x2_t
4115
4116 /// Floating-point fused multiply-subtract to accumulator
4117 name = vfms
4118 in2-lane-suffixes
4119 constn = LANE
4120 multi_fn = vfma-in2lane-::<LANE>, a, -b, c
4121 a = 14.
4122 b = 6.
4123 c = 2., 0., 0., 0.
4124 n = 0
4125 validate 2.
4126
4127 aarch64 = fmls
4128 generate f32:f32:float32x2_t:f32, f32:f32:float32x4_t:f32
4129 aarch64 = fmsub
4130 generate f64:f64:float64x1_t:f64
4131 aarch64 = fmls
4132 generate f64:f64:float64x2_t:f64
4133
4134 /// Divide
4135 name = vdiv
4136 fn = simd_div
4137 a = 2.0, 6.0, 4.0, 10.0
4138 b = 1.0, 2.0, 1.0, 2.0
4139 validate 2.0, 3.0, 4.0, 5.0
4140
4141 aarch64 = fdiv
4142 generate float*_t, float64x*_t
4143
4144 /// Subtract
4145 name = vsub
4146 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4147 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4148 validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
4149 arm = vsub.
4150 aarch64 = sub
4151 fn = simd_sub
4152 generate int*_t, uint*_t, int64x*_t, uint64x*_t
4153
4154 /// Subtract
4155 name = vsub
4156 fn = simd_sub
4157 a = 1.0, 4.0, 3.0, 8.0
4158 b = 1.0, 2.0, 3.0, 4.0
4159 validate 0.0, 2.0, 0.0, 4.0
4160
4161 aarch64 = fsub
4162 generate float64x*_t
4163
4164 arm = vsub.
4165 generate float*_t
4166
4167 /// Subtract
4168 name = vsub
4169 multi_fn = a.wrapping_sub(b)
4170 a = 3
4171 b = 2
4172 validate 1
4173
4174 aarch64 = nop
4175 generate i64, u64
4176
4177 /// Add
4178 name = vadd
4179 multi_fn = a.wrapping_add(b)
4180 a = 1
4181 b = 2
4182 validate 3
4183
4184 aarch64 = nop
4185 generate i64, u64
4186
4187 /// Bitwise exclusive OR
4188 name = vadd
4189 multi_fn = simd_xor, a, b
4190 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4191 b = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4192 validate 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17
4193
4194 aarch64 = nop
4195 arm = nop
4196 generate poly8x8_t, poly16x4_t, poly8x16_t, poly16x8_t, poly64x1_t, poly64x2_t
4197
4198 /// Bitwise exclusive OR
4199 name = vaddq
4200 no-q
4201 multi_fn = a ^ b
4202 a = 16
4203 b = 1
4204 validate 17
4205
4206 aarch64 = nop
4207 arm = nop
4208 generate p128
4209
4210 /// Floating-point add across vector
4211 name = vaddv
4212 a = 1., 2., 0., 0.
4213 validate 3.
4214
4215 aarch64 = faddp
4216 link-aarch64 = faddv._EXT2_._EXT_
4217 generate float32x2_t:f32, float32x4_t:f32, float64x2_t:f64
4218
4219 /// Signed Add Long across Vector
4220 name = vaddlv
4221 a = 1, 2, 3, 4
4222 validate 10
4223
4224 aarch64 = saddlv
4225 link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
4226 generate int16x4_t:i32
4227
4228 /// Signed Add Long across Vector
4229 name = vaddlv
4230 a = 1, 2, 3, 4, 5, 6, 7, 8
4231 validate 36
4232
4233 aarch64 = saddlv
4234 link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
4235 generate int16x8_t:i32
4236
4237 /// Signed Add Long across Vector
4238 name = vaddlv
4239 a = 1, 2
4240 validate 3
4241
4242 aarch64 = saddlp
4243 link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
4244 generate int32x2_t:i64
4245
4246 /// Signed Add Long across Vector
4247 name = vaddlv
4248 a = 1, 2, 3, 4
4249 validate 10
4250
4251 aarch64 = saddlv
4252 link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
4253 generate int32x4_t:i64
4254
4255 /// Unsigned Add Long across Vector
4256 name = vaddlv
4257 a = 1, 2, 3, 4
4258 validate 10
4259
4260 aarch64 = uaddlv
4261 link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
4262 generate uint16x4_t:u32
4263
4264 /// Unsigned Add Long across Vector
4265 name = vaddlv
4266 a = 1, 2, 3, 4, 5, 6, 7, 8
4267 validate 36
4268
4269 aarch64 = uaddlv
4270 link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
4271 generate uint16x8_t:u32
4272
4273 /// Unsigned Add Long across Vector
4274 name = vaddlv
4275 a = 1, 2
4276 validate 3
4277
4278 aarch64 = uaddlp
4279 link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
4280 generate uint32x2_t:u64
4281
4282 /// Unsigned Add Long across Vector
4283 name = vaddlv
4284 a = 1, 2, 3, 4
4285 validate 10
4286
4287 aarch64 = uaddlv
4288 link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
4289 generate uint32x4_t:u64
4290
4291 /// Subtract returning high narrow
4292 name = vsubhn
4293 no-q
4294 multi_fn = fixed, c:in_t
4295 multi_fn = simd_cast, {simd_shr, {simd_sub, a, b}, transmute(c)}
4296 a = MAX, MIN, 1, 1, MAX, MIN, 1, 1
4297 b = 1, 0, 0, 0, 1, 0, 0, 0
4298 fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS
4299 validate MAX, MIN, 0, 0, MAX, MIN, 0, 0
4300
4301 arm = vsubhn
4302 aarch64 = subhn
4303 generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
4304 generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
4305
4306 /// Subtract returning high narrow
4307 name = vsubhn_high
4308 no-q
4309 multi_fn = vsubhn-noqself-noext, d:in_t0, b, c
4310 multi_fn = simd_shuffle!, a, d, {asc-0-out_len}
4311 a = MAX, 0, MAX, 0, MAX, 0, MAX, 0
4312 b = MAX, 1, MAX, 1, MAX, 1, MAX, 1
4313 c = 1, 0, 1, 0, 1, 0, 1, 0
4314 validate MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0
4315
4316 arm = vsubhn
4317 aarch64 = subhn2
4318 generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
4319 generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
4320
4321 /// Signed halving subtract
4322 name = vhsub
4323 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4324 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
4325 validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
4326
4327 arm = vhsub.s
4328 aarch64 = uhsub
4329 link-arm = vhsubu._EXT_
4330 link-aarch64 = uhsub._EXT_
4331 generate uint*_t
4332
4333 arm = vhsub.s
4334 aarch64 = shsub
4335 link-arm = vhsubs._EXT_
4336 link-aarch64 = shsub._EXT_
4337 generate int*_t
4338
4339 /// Signed Subtract Wide
4340 name = vsubw
4341 no-q
4342 multi_fn = simd_sub, a, {simd_cast, b}
4343 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4344 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4345 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4346
4347 arm = vsubw
4348 aarch64 = ssubw
4349 generate int16x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int64x2_t
4350
4351 /// Unsigned Subtract Wide
4352 name = vsubw
4353 no-q
4354 multi_fn = simd_sub, a, {simd_cast, b}
4355 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4356 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4357 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4358
4359 arm = vsubw
4360 aarch64 = usubw
4361 generate uint16x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint64x2_t
4362
4363 /// Signed Subtract Wide
4364 name = vsubw_high
4365 no-q
4366 multi_fn = simd_shuffle!, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4367 multi_fn = simd_sub, a, {simd_cast, c}
4368 a = 8, 9, 10, 12, 13, 14, 15, 16
4369 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16
4370 validate 0, 0, 0, 0, 0, 0, 0, 0
4371
4372 aarch64 = ssubw
4373 generate int16x8_t:int8x16_t:int16x8_t
4374
4375 /// Signed Subtract Wide
4376 name = vsubw_high
4377 no-q
4378 multi_fn = simd_shuffle!, c:int16x4_t, b, b, [4, 5, 6, 7]
4379 multi_fn = simd_sub, a, {simd_cast, c}
4380 a = 8, 9, 10, 11
4381 b = 0, 1, 2, 3, 8, 9, 10, 11
4382 validate 0, 0, 0, 0
4383
4384 aarch64 = ssubw
4385 generate int32x4_t:int16x8_t:int32x4_t
4386
4387 /// Signed Subtract Wide
4388 name = vsubw_high
4389 no-q
4390 multi_fn = simd_shuffle!, c:int32x2_t, b, b, [2, 3]
4391 multi_fn = simd_sub, a, {simd_cast, c}
4392 a = 8, 9
4393 b = 6, 7, 8, 9
4394 validate 0, 0
4395
4396 aarch64 = ssubw
4397 generate int64x2_t:int32x4_t:int64x2_t
4398
4399 /// Unsigned Subtract Wide
4400 name = vsubw_high
4401 no-q
4402 multi_fn = simd_shuffle!, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4403 multi_fn = simd_sub, a, {simd_cast, c}
4404 a = 8, 9, 10, 11, 12, 13, 14, 15
4405 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4406 validate 0, 0, 0, 0, 0, 0, 0, 0
4407
4408 aarch64 = usubw
4409 generate uint16x8_t:uint8x16_t:uint16x8_t
4410
4411 /// Unsigned Subtract Wide
4412 name = vsubw_high
4413 no-q
4414 multi_fn = simd_shuffle!, c:uint16x4_t, b, b, [4, 5, 6, 7]
4415 multi_fn = simd_sub, a, {simd_cast, c}
4416 a = 8, 9, 10, 11
4417 b = 0, 1, 2, 3, 8, 9, 10, 11
4418 validate 0, 0, 0, 0
4419
4420 aarch64 = usubw
4421 generate uint32x4_t:uint16x8_t:uint32x4_t
4422
4423 /// Unsigned Subtract Wide
4424 name = vsubw_high
4425 no-q
4426 multi_fn = simd_shuffle!, c:uint32x2_t, b, b, [2, 3]
4427 multi_fn = simd_sub, a, {simd_cast, c}
4428 a = 8, 9
4429 b = 6, 7, 8, 9
4430 validate 0, 0
4431
4432 aarch64 = usubw
4433 generate uint64x2_t:uint32x4_t:uint64x2_t
4434
4435 /// Signed Subtract Long
4436 name = vsubl
4437 no-q
4438 multi_fn = simd_cast, c:out_t, a
4439 multi_fn = simd_cast, d:out_t, b
4440 multi_fn = simd_sub, c, d
4441
4442 a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4443 b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4444 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4445
4446 arm = vsubl
4447 aarch64 = ssubl
4448 generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
4449
4450 /// Unsigned Subtract Long
4451 name = vsubl
4452 no-q
4453 multi_fn = simd_cast, c:out_t, a
4454 multi_fn = simd_cast, d:out_t, b
4455 multi_fn = simd_sub, c, d
4456
4457 a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4458 b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4459 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4460
4461 arm = vsubl
4462 aarch64 = usubl
4463 generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
4464
4465 /// Signed Subtract Long
4466 name = vsubl_high
4467 no-q
4468 multi_fn = simd_shuffle!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
4469 multi_fn = simd_cast, d:out_t, c
4470 multi_fn = simd_shuffle!, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4471 multi_fn = simd_cast, f:out_t, e
4472 multi_fn = simd_sub, d, f
4473
4474 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4475 b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4476 validate 6, 7, 8, 9, 10, 11, 12, 13
4477
4478 aarch64 = ssubl
4479 generate int8x16_t:int8x16_t:int16x8_t
4480
4481 /// Signed Subtract Long
4482 name = vsubl_high
4483 no-q
4484 multi_fn = simd_shuffle!, c:int16x4_t, a, a, [4, 5, 6, 7]
4485 multi_fn = simd_cast, d:out_t, c
4486 multi_fn = simd_shuffle!, e:int16x4_t, b, b, [4, 5, 6, 7]
4487 multi_fn = simd_cast, f:out_t, e
4488 multi_fn = simd_sub, d, f
4489
4490 a = 8, 9, 10, 11, 12, 13, 14, 15
4491 b = 6, 6, 6, 6, 8, 8, 8, 8
4492 validate 4, 5, 6, 7
4493
4494 aarch64 = ssubl
4495 generate int16x8_t:int16x8_t:int32x4_t
4496
4497 /// Signed Subtract Long
4498 name = vsubl_high
4499 no-q
4500 multi_fn = simd_shuffle!, c:int32x2_t, a, a, [2, 3]
4501 multi_fn = simd_cast, d:out_t, c
4502 multi_fn = simd_shuffle!, e:int32x2_t, b, b, [2, 3]
4503 multi_fn = simd_cast, f:out_t, e
4504 multi_fn = simd_sub, d, f
4505
4506 a = 12, 13, 14, 15
4507 b = 6, 6, 8, 8
4508 validate 6, 7
4509
4510 aarch64 = ssubl
4511 generate int32x4_t:int32x4_t:int64x2_t
4512
4513 /// Unsigned Subtract Long
4514 name = vsubl_high
4515 no-q
4516 multi_fn = simd_shuffle!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15]
4517 multi_fn = simd_cast, d:out_t, c
4518 multi_fn = simd_shuffle!, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
4519 multi_fn = simd_cast, f:out_t, e
4520 multi_fn = simd_sub, d, f
4521
4522 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4523 b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
4524 validate 6, 7, 8, 9, 10, 11, 12, 13
4525
4526 aarch64 = usubl
4527 generate uint8x16_t:uint8x16_t:uint16x8_t
4528
4529 /// Unsigned Subtract Long
4530 name = vsubl_high
4531 no-q
4532 multi_fn = simd_shuffle!, c:uint16x4_t, a, a, [4, 5, 6, 7]
4533 multi_fn = simd_cast, d:out_t, c
4534 multi_fn = simd_shuffle!, e:uint16x4_t, b, b, [4, 5, 6, 7]
4535 multi_fn = simd_cast, f:out_t, e
4536 multi_fn = simd_sub, d, f
4537
4538 a = 8, 9, 10, 11, 12, 13, 14, 15
4539 b = 6, 6, 6, 6, 8, 8, 8, 8
4540 validate 4, 5, 6, 7
4541
4542 aarch64 = usubl
4543 generate uint16x8_t:uint16x8_t:uint32x4_t
4544
4545 /// Unsigned Subtract Long
4546 name = vsubl_high
4547 no-q
4548 multi_fn = simd_shuffle!, c:uint32x2_t, a, a, [2, 3]
4549 multi_fn = simd_cast, d:out_t, c
4550 multi_fn = simd_shuffle!, e:uint32x2_t, b, b, [2, 3]
4551 multi_fn = simd_cast, f:out_t, e
4552 multi_fn = simd_sub, d, f
4553
4554 a = 12, 13, 14, 15
4555 b = 6, 6, 8, 8
4556 validate 6, 7
4557
4558 aarch64 = usubl
4559 generate uint32x4_t:uint32x4_t:uint64x2_t
4560
4561 /// Bit clear and exclusive OR
4562 name = vbcax
4563 a = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0
4564 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
4565 c = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4566 validate 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14
4567 target = sha3
4568
4569 aarch64 = bcax
4570 link-aarch64 = llvm.aarch64.crypto.bcaxs._EXT_
4571 generate int8x16_t, int16x8_t, int32x4_t, int64x2_t
4572 link-aarch64 = llvm.aarch64.crypto.bcaxu._EXT_
4573 generate uint8x16_t, uint16x8_t, uint32x4_t, uint64x2_t
4574
4575 /// Floating-point complex add
4576 name = vcadd_rot270
4577 no-q
4578 a = 1., -1., 1., -1.
4579 b = -1., 1., -1., 1.
4580 validate 2., 0., 2., 0.
4581 target = fcma
4582
4583 aarch64 = fcadd
4584 link-aarch64 = vcadd.rot270._EXT_
4585 generate float32x2_t
4586 name = vcaddq_rot270
4587 generate float32x4_t, float64x2_t
4588
4589 /// Floating-point complex add
4590 name = vcadd_rot90
4591 no-q
4592 a = 1., -1., 1., -1.
4593 b = -1., 1., -1., 1.
4594 validate 0., -2., 0., -2.
4595 target = fcma
4596
4597 aarch64 = fcadd
4598 link-aarch64 = vcadd.rot90._EXT_
4599 generate float32x2_t
4600 name = vcaddq_rot90
4601 generate float32x4_t, float64x2_t
4602
4603 /// Floating-point complex multiply accumulate
4604 name = vcmla
4605 a = 1., -1., 1., -1.
4606 b = -1., 1., -1., 1.
4607 c = 1., 1., -1., -1.
4608 validate 0., -2., 2., 0.
4609 target = fcma
4610
4611 aarch64 = fcmla
4612 link-aarch64 = vcmla.rot0._EXT_
4613 generate float32x2_t, float32x4_t, float64x2_t
4614
4615 /// Floating-point complex multiply accumulate
4616 name = vcmla_rot90
4617 rot-suffix
4618 a = 1., 1., 1., 1.
4619 b = 1., -1., 1., -1.
4620 c = 1., 1., 1., 1.
4621 validate 2., 0., 2., 0.
4622 target = fcma
4623
4624 aarch64 = fcmla
4625 link-aarch64 = vcmla.rot90._EXT_
4626 generate float32x2_t, float32x4_t, float64x2_t
4627
4628 /// Floating-point complex multiply accumulate
4629 name = vcmla_rot180
4630 rot-suffix
4631 a = 1., 1., 1., 1.
4632 b = 1., -1., 1., -1.
4633 c = 1., 1., 1., 1.
4634 validate 0., 0., 0., 0.
4635 target = fcma
4636
4637 aarch64 = fcmla
4638 link-aarch64 = vcmla.rot180._EXT_
4639 generate float32x2_t, float32x4_t, float64x2_t
4640
4641 /// Floating-point complex multiply accumulate
4642 name = vcmla_rot270
4643 rot-suffix
4644 a = 1., 1., 1., 1.
4645 b = 1., -1., 1., -1.
4646 c = 1., 1., 1., 1.
4647 validate 0., 2., 0., 2.
4648 target = fcma
4649
4650 aarch64 = fcmla
4651 link-aarch64 = vcmla.rot270._EXT_
4652 generate float32x2_t, float32x4_t, float64x2_t
4653
4654 /// Floating-point complex multiply accumulate
4655 name = vcmla
4656 in2-lane-suffixes
4657 constn = LANE
4658 multi_fn = static_assert_imm-in2_rot-LANE
4659 multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
4660 multi_fn = vcmla-self-noext, a, b, c
4661 a = 1., -1., 1., -1.
4662 b = -1., 1., -1., 1.
4663 c = 1., 1., -1., -1.
4664 n = 0
4665 validate 0., -2., 0., -2.
4666 target = fcma
4667
4668 aarch64 = fcmla
4669 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4670 generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4671
4672 /// Floating-point complex multiply accumulate
4673 name = vcmla_rot90
4674 rot-lane-suffixes
4675 constn = LANE
4676 multi_fn = static_assert_imm-in2_rot-LANE
4677 multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
4678 multi_fn = vcmla_rot90-rot-noext, a, b, c
4679 a = 1., -1., 1., -1.
4680 b = -1., 1., -1., 1.
4681 c = 1., 1., -1., -1.
4682 n = 0
4683 validate 0., 0., 0., 0.
4684 target = fcma
4685
4686 aarch64 = fcmla
4687 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4688 generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4689
4690 /// Floating-point complex multiply accumulate
4691 name = vcmla_rot180
4692 rot-lane-suffixes
4693 constn = LANE
4694 multi_fn = static_assert_imm-in2_rot-LANE
4695 multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
4696 multi_fn = vcmla_rot180-rot-noext, a, b, c
4697 a = 1., -1., 1., -1.
4698 b = -1., 1., -1., 1.
4699 c = 1., 1., -1., -1.
4700 n = 0
4701 validate 2., 0., 2., 0.
4702 target = fcma
4703
4704 aarch64 = fcmla
4705 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4706 generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4707
4708 /// Floating-point complex multiply accumulate
4709 name = vcmla_rot270
4710 rot-lane-suffixes
4711 constn = LANE
4712 multi_fn = static_assert_imm-in2_rot-LANE
4713 multi_fn = simd_shuffle!, c:out_t, c, c, {base-2-LANE}
4714 multi_fn = vcmla_rot270-rot-noext, a, b, c
4715 a = 1., -1., 1., -1.
4716 b = -1., 1., -1., 1.
4717 c = 1., 1., -1., -1.
4718 n = 0
4719 validate 2., -2., 2., -2.
4720 target = fcma
4721
4722 aarch64 = fcmla
4723 generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t
4724 generate float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
4725
4726 /// Dot product arithmetic (vector)
4727 name = vdot
4728 out-suffix
4729 a = 1, 2, 1, 2
4730 b = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4731 c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4732 validate 31, 176, 31, 176
4733 target = dotprod
4734
4735 arm = vsdot
4736 aarch64 = sdot
4737 link-arm = sdot._EXT_._EXT3_
4738 link-aarch64 = sdot._EXT_._EXT3_
4739 generate int32x2_t:int8x8_t:int8x8_t:int32x2_t, int32x4_t:int8x16_t:int8x16_t:int32x4_t
4740
4741 arm = vudot
4742 aarch64 = udot
4743 link-arm = udot._EXT_._EXT3_
4744 link-aarch64 = udot._EXT_._EXT3_
4745 generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t, uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
4746
4747 /// Dot product arithmetic (indexed)
4748 name = vdot
4749 out-lane-suffixes
4750 constn = LANE
4751 multi_fn = static_assert_imm-in2_dot-LANE
4752 multi_fn = transmute, c:merge4_t2, c
4753 multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
4754 multi_fn = vdot-out-noext, a, b, {transmute, c}
4755 a = 1, 2, 1, 2
4756 b = -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4757 c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4758 n = 0
4759 validate 29, 72, 31, 72
4760 target = dotprod
4761
4762 // Only AArch64 has the laneq forms.
4763 aarch64 = sdot
4764 generate int32x2_t:int8x8_t:int8x16_t:int32x2_t
4765 generate int32x4_t:int8x16_t:int8x16_t:int32x4_t
4766
4767 arm = vsdot
4768 generate int32x2_t:int8x8_t:int8x8_t:int32x2_t
4769 generate int32x4_t:int8x16_t:int8x8_t:int32x4_t
4770
4771 /// Dot product arithmetic (indexed)
4772 name = vdot
4773 out-lane-suffixes
4774 constn = LANE
4775 multi_fn = static_assert_imm-in2_dot-LANE
4776 multi_fn = transmute, c:merge4_t2, c
4777 multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
4778 multi_fn = vdot-out-noext, a, b, {transmute, c}
4779 a = 1, 2, 1, 2
4780 b = 255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4781 c = 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
4782 n = 0
4783 validate 285, 72, 31, 72
4784 target = dotprod
4785
4786 // Only AArch64 has the laneq forms.
4787 aarch64 = udot
4788 generate uint32x2_t:uint8x8_t:uint8x16_t:uint32x2_t
4789 generate uint32x4_t:uint8x16_t:uint8x16_t:uint32x4_t
4790
4791 arm = vudot
4792 generate uint32x2_t:uint8x8_t:uint8x8_t:uint32x2_t
4793 generate uint32x4_t:uint8x16_t:uint8x8_t:uint32x4_t
4794
4795 /// Maximum (vector)
4796 name = vmax
4797 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4798 b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4799 validate 16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16
4800
4801 arm = vmax
4802 aarch64 = smax
4803 link-arm = vmaxs._EXT_
4804 link-aarch64 = smax._EXT_
4805 generate int*_t
4806
4807 arm = vmax
4808 aarch64 = umax
4809 link-arm = vmaxu._EXT_
4810 link-aarch64 = umax._EXT_
4811 generate uint*_t
4812
4813 /// Maximum (vector)
4814 name = vmax
4815 a = 1.0, -2.0, 3.0, -4.0
4816 b = 0.0, 3.0, 2.0, 8.0
4817 validate 1.0, 3.0, 3.0, 8.0
4818
4819 aarch64 = fmax
4820 link-aarch64 = fmax._EXT_
4821 generate float64x*_t
4822
4823 arm = vmax
4824 aarch64 = fmax
4825 link-arm = vmaxs._EXT_
4826 link-aarch64 = fmax._EXT_
4827 generate float*_t
4828
4829 /// Floating-point Maximum Number (vector)
4830 name = vmaxnm
4831 a = 1.0, 2.0, 3.0, -4.0
4832 b = 8.0, 16.0, -1.0, 6.0
4833 validate 8.0, 16.0, 3.0, 6.0
4834
4835 aarch64 = fmaxnm
4836 link-aarch64 = fmaxnm._EXT_
4837 generate float64x*_t
4838
4839 target = fp-armv8
4840 arm = vmaxnm
4841 aarch64 = fmaxnm
4842 link-arm = vmaxnm._EXT_
4843 link-aarch64 = fmaxnm._EXT_
4844 generate float*_t
4845
4846 /// Floating-point maximum number across vector
4847 name = vmaxnmv
4848 a = 1., 2., 0., 1.
4849 validate 2.
4850
4851 aarch64 = fmaxnmp
4852 link-aarch64 = fmaxnmv._EXT2_._EXT_
4853 generate float32x2_t:f32, float64x2_t:f64
4854 aarch64 = fmaxnmv
4855 generate float32x4_t:f32
4856
4857 /// Floating-point Maximum Number Pairwise (vector).
4858 name = vpmaxnm
4859 a = 1.0, 2.0
4860 b = 6.0, -3.0
4861 validate 2.0, 6.0
4862 aarch64 = fmaxnmp
4863 link-aarch64 = fmaxnmp._EXT_
4864 generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
4865
4866 /// Floating-point Maximum Number Pairwise (vector).
4867 name = vpmaxnm
4868 a = 1.0, 2.0, 3.0, -4.0
4869 b = 8.0, 16.0, -1.0, 6.0
4870 validate 2.0, 3.0, 16.0, 6.0
4871 aarch64 = fmaxnmp
4872 link-aarch64 = fmaxnmp._EXT_
4873 generate float32x4_t:float32x4_t:float32x4_t
4874
4875 /// Floating-point maximum number pairwise
4876 name = vpmaxnm
4877 out-suffix
4878 a = 1., 2.
4879 validate 2.
4880
4881 aarch64 = fmaxnmp
4882 link-aarch64 = fmaxnmv._EXT2_._EXT_
4883 generate float32x2_t:f32
4884 name = vpmaxnmq
4885 generate float64x2_t:f64
4886
4887 /// Floating-point maximum pairwise
4888 name = vpmax
4889 out-suffix
4890 a = 1., 2.
4891 validate 2.
4892
4893 aarch64 = fmaxp
4894 link-aarch64 = fmaxv._EXT2_._EXT_
4895 generate float32x2_t:f32
4896 name = vpmaxq
4897 generate float64x2_t:f64
4898
4899 /// Minimum (vector)
4900 name = vmin
4901 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
4902 b = 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
4903 validate 1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1
4904
4905 arm = vmin
4906 aarch64 = smin
4907 link-arm = vmins._EXT_
4908 link-aarch64 = smin._EXT_
4909 generate int*_t
4910
4911 arm = vmin
4912 aarch64 = umin
4913 link-arm = vminu._EXT_
4914 link-aarch64 = umin._EXT_
4915 generate uint*_t
4916
4917 /// Minimum (vector)
4918 name = vmin
4919 a = 1.0, -2.0, 3.0, -4.0
4920 b = 0.0, 3.0, 2.0, 8.0
4921 validate 0.0, -2.0, 2.0, -4.0
4922
4923 aarch64 = fmin
4924 link-aarch64 = fmin._EXT_
4925 generate float64x*_t
4926
4927 arm = vmin
4928 aarch64 = fmin
4929 link-arm = vmins._EXT_
4930 link-aarch64 = fmin._EXT_
4931 generate float*_t
4932
4933 /// Floating-point Minimum Number (vector)
4934 name = vminnm
4935 a = 1.0, 2.0, 3.0, -4.0
4936 b = 8.0, 16.0, -1.0, 6.0
4937 validate 1.0, 2.0, -1.0, -4.0
4938
4939 aarch64 = fminnm
4940 link-aarch64 = fminnm._EXT_
4941 generate float64x*_t
4942
4943 target = fp-armv8
4944 arm = vminnm
4945 aarch64 = fminnm
4946 link-arm = vminnm._EXT_
4947 link-aarch64 = fminnm._EXT_
4948 generate float*_t
4949
4950 /// Floating-point minimum number across vector
4951 name = vminnmv
4952 a = 1., 0., 2., 3.
4953 validate 0.
4954
4955 aarch64 = fminnmp
4956 link-aarch64 = fminnmv._EXT2_._EXT_
4957 generate float32x2_t:f32, float64x2_t:f64
4958 aarch64 = fminnmv
4959 generate float32x4_t:f32
4960
4961 /// Vector move
4962 name = vmovl_high
4963 no-q
4964 multi_fn = simd_shuffle!, a:half, a, a, {asc-halflen-halflen}
4965 multi_fn = vmovl-noqself-noext, a
4966 a = 1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10
4967 validate 3, 4, 5, 6, 7, 8, 9, 10
4968
4969 aarch64 = sxtl2
4970 generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
4971
4972 aarch64 = uxtl2
4973 generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
4974
4975 /// Floating-point add pairwise
4976 name = vpadd
4977 a = 1., 2., 3., 4.
4978 b = 3., 4., 5., 6.
4979 validate 3., 7., 7., 11.
4980
4981 aarch64 = faddp
4982 link-aarch64 = faddp._EXT_
4983 generate float32x4_t, float64x2_t
4984
4985 arm = vpadd
4986 link-arm = vpadd._EXT_
4987 generate float32x2_t
4988
4989 /// Floating-point add pairwise
4990 name = vpadd
4991 out-suffix
4992 multi_fn = simd_extract, a1:out_t, a, 0
4993 multi_fn = simd_extract, a2:out_t, a, 1
4994 multi_fn = a1 + a2
4995 a = 1., 2.
4996 validate 3.
4997
4998 aarch64 = nop
4999 generate float32x2_t:f32, float64x2_t:f64
5000
5001 /// Floating-point Minimum Number Pairwise (vector).
5002 name = vpminnm
5003 a = 1.0, 2.0
5004 b = 6.0, -3.0
5005 validate 1.0, -3.0
5006
5007 aarch64 = fminnmp
5008 link-aarch64 = fminnmp._EXT_
5009 generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t
5010
5011 /// Floating-point Minimum Number Pairwise (vector).
5012 name = vpminnm
5013 a = 1.0, 2.0, 3.0, -4.0
5014 b = 8.0, 16.0, -1.0, 6.0
5015 validate 1.0, -4.0, 8.0, -1.0
5016 aarch64 = fminnmp
5017 link-aarch64 = fminnmp._EXT_
5018 generate float32x4_t:float32x4_t:float32x4_t
5019
5020 /// Floating-point minimum number pairwise
5021 name = vpminnm
5022 out-suffix
5023 a = 1., 2.
5024 validate 1.
5025
5026 aarch64 = fminnmp
5027 link-aarch64 = fminnmv._EXT2_._EXT_
5028 generate float32x2_t:f32
5029 name = vpminnmq
5030 generate float64x2_t:f64
5031
5032 /// Floating-point minimum pairwise
5033 name = vpmin
5034 out-suffix
5035 a = 1., 2.
5036 validate 1.
5037
5038 aarch64 = fminp
5039 link-aarch64 = fminv._EXT2_._EXT_
5040 generate float32x2_t:f32
5041 name = vpminq
5042 generate float64x2_t:f64
5043
5044 /// Signed saturating doubling multiply long
5045 name = vqdmull
5046 a = 0, 1, 2, 3, 4, 5, 6, 7
5047 b = 1, 2, 3, 4, 5, 6, 7, 8
5048 validate 0, 4, 12, 24, 40, 60, 84, 108
5049
5050 aarch64 = sqdmull
5051 link-aarch64 = sqdmull._EXT2_
5052 arm = vqdmull
5053 link-arm = vqdmull._EXT2_
5054 generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
5055
5056 /// Signed saturating doubling multiply long
5057 name = vqdmull
5058 multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5059 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5060 multi_fn = simd_extract, {vqdmull-in_ntt-noext, a, b}, 0
5061 a = 2
5062 b = 3
5063 validate 12
5064
5065 aarch64 = sqdmull
5066 generate i16:i16:i32
5067
5068 /// Signed saturating doubling multiply long
5069 name = vqdmull
5070 a = 2
5071 b = 3
5072 validate 12
5073
5074 aarch64 = sqdmull
5075 link-aarch64 = sqdmulls.scalar
5076 generate i32:i32:i64
5077
5078 /// Vector saturating doubling long multiply with scalar
5079 name = vqdmull_n
5080 no-q
5081 multi_fn = vqdmull-in_ntt-noext, a, {vdup_n-in_ntt-noext, b}
5082 a = 2, 4, 6, 8
5083 b = 2
5084 validate 8, 16, 24, 32
5085
5086 aarch64 = sqdmull
5087 arm = vqdmull
5088 generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
5089
5090 /// Signed saturating doubling multiply long
5091 name = vqdmull_high
5092 no-q
5093 multi_fn = simd_shuffle!, a:half, a, a, {asc-halflen-halflen}
5094 multi_fn = simd_shuffle!, b:half, b, b, {asc-halflen-halflen}
5095 multi_fn = vqdmull-noqself-noext, a, b
5096 a = 0, 1, 4, 5, 4, 5, 6, 7
5097 b = 1, 2, 5, 6, 5, 6, 7, 8
5098 validate 40, 60, 84, 112
5099
5100 aarch64 = sqdmull2
5101 generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
5102
5103 /// Signed saturating doubling multiply long
5104 name = vqdmull_high_n
5105 no-q
5106 multi_fn = simd_shuffle!, a:in_ntt, a, a, {asc-out_len-out_len}
5107 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5108 multi_fn = vqdmull-in_ntt-noext, a, b
5109 a = 0, 2, 8, 10, 8, 10, 12, 14
5110 b = 2
5111 validate 32, 40, 48, 56
5112
5113 aarch64 = sqdmull2
5114 generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
5115
5116 /// Vector saturating doubling long multiply by scalar
5117 name = vqdmull_lane
5118 constn = N
5119 multi_fn = static_assert_imm-in_exp_len-N
5120 multi_fn = simd_shuffle!, b:in_t0, b, b, {dup-out_len-N as u32}
5121 multi_fn = vqdmull-noqself-noext, a, b
5122 a = 1, 2, 3, 4
5123 b = 0, 2, 2, 0, 2, 0, 0, 0
5124 n = HFLEN
5125 validate 4, 8, 12, 16
5126
5127 aarch64 = sqdmull
5128 generate int16x4_t:int16x8_t:int32x4_t, int32x2_t:int32x4_t:int64x2_t
5129
5130 arm = vqdmull
5131 generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
5132
5133 /// Signed saturating doubling multiply long
5134 name = vqdmullh_lane
5135 constn = N
5136 multi_fn = static_assert_imm-in_exp_len-N
5137 multi_fn = simd_extract, b:in_t0, b, N as u32
5138 multi_fn = vqdmullh-noqself-noext, a, b
5139 a = 2
5140 b = 0, 2, 2, 0, 2, 0, 0, 0
5141 n = HFLEN
5142 validate 8
5143
5144 aarch64 = sqdmull
5145 generate i16:int16x4_t:i32, i16:int16x8_t:i32
5146
5147 /// Signed saturating doubling multiply long
5148 name = vqdmulls_lane
5149 constn = N
5150 multi_fn = static_assert_imm-in_exp_len-N
5151 multi_fn = simd_extract, b:in_t0, b, N as u32
5152 multi_fn = vqdmulls-noqself-noext, a, b
5153 a = 2
5154 b = 0, 2, 2, 0, 2, 0, 0, 0
5155 n = HFLEN
5156 validate 8
5157
5158 aarch64 = sqdmull
5159 generate i32:int32x2_t:i64, i32:int32x4_t:i64
5160
5161 /// Signed saturating doubling multiply long
5162 name = vqdmull_high_lane
5163 constn = N
5164 multi_fn = static_assert_imm-in_exp_len-N
5165 multi_fn = simd_shuffle!, a:in_t, a, a, {asc-out_len-out_len}
5166 multi_fn = simd_shuffle!, b:in_t, b, b, {dup-out_len-N as u32}
5167 multi_fn = vqdmull-self-noext, a, b
5168 a = 0, 1, 4, 5, 4, 5, 6, 7
5169 b = 0, 2, 2, 0, 2, 0, 0, 0
5170 n = HFLEN
5171 validate 16, 20, 24, 28
5172
5173 aarch64 = sqdmull2
5174 generate int16x8_t:int16x4_t:int32x4_t, int32x4_t:int32x2_t:int64x2_t
5175
5176 /// Signed saturating doubling multiply long
5177 name = vqdmull_high_lane
5178 constn = N
5179 multi_fn = static_assert_imm-in_exp_len-N
5180 multi_fn = simd_shuffle!, a:half, a, a, {asc-out_len-out_len}
5181 multi_fn = simd_shuffle!, b:half, b, b, {dup-out_len-N as u32}
5182 multi_fn = vqdmull-noqself-noext, a, b
5183 a = 0, 1, 4, 5, 4, 5, 6, 7
5184 b = 0, 2, 2, 0, 2, 0, 0, 0
5185 n = HFLEN
5186 validate 16, 20, 24, 28
5187
5188 aarch64 = sqdmull2
5189 generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
5190
5191 /// Signed saturating doubling multiply-add long
5192 name = vqdmlal
5193 multi_fn = vqadd-out-noext, a, {vqdmull-self-noext, b, c}
5194 a = 1, 1, 1, 1
5195 b = 1, 2, 3, 4
5196 c = 2, 2, 2, 2
5197 validate 5, 9, 13, 17
5198
5199 aarch64 = sqdmlal
5200 arm = vqdmlal
5201 generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5202
5203 /// Vector widening saturating doubling multiply accumulate with scalar
5204 name = vqdmlal
5205 n-suffix
5206 multi_fn = vqadd-out-noext, a, {vqdmull_n-self-noext, b, c}
5207 a = 1, 1, 1, 1
5208 b = 1, 2, 3, 4
5209 c = 2
5210 validate 5, 9, 13, 17
5211
5212 aarch64 = sqdmlal
5213 arm = vqdmlal
5214 generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
5215
5216 /// Signed saturating doubling multiply-add long
5217 name = vqdmlal_high
5218 no-q
5219 multi_fn = vqadd-out-noext, a, {vqdmull_high-noqself-noext, b, c}
5220 a = 1, 2, 3, 4
5221 b = 0, 1, 4, 5, 4, 5, 6, 7
5222 c = 1, 2, 5, 6, 5, 6, 7, 8
5223 validate 41, 62, 87, 116
5224
5225 aarch64 = sqdmlal2
5226 generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5227
5228 /// Signed saturating doubling multiply-add long
5229 name = vqdmlal_high_n
5230 no-q
5231 multi_fn = vqadd-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
5232 a = 1, 2, 3, 4
5233 b = 0, 2, 8, 10, 8, 10, 12, 14
5234 c = 2
5235 validate 33, 42, 51, 60
5236
5237 aarch64 = sqdmlal2
5238 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
5239
5240 /// Vector widening saturating doubling multiply accumulate with scalar
5241 name = vqdmlal_lane
5242 in2-suffix
5243 constn = N
5244 multi_fn = static_assert_imm-in2_exp_len-N
5245 multi_fn = vqadd-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
5246 a = 1, 2, 3, 4
5247 b = 1, 2, 3, 4
5248 c = 0, 2, 2, 0, 2, 0, 0, 0
5249 n = HFLEN
5250 validate 5, 10, 15, 20
5251
5252 aarch64 = sqdmlal
5253 generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
5254
5255 arm = vqdmlal
5256 generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5257
5258 /// Signed saturating doubling multiply-add long
5259 name = vqdmlal_high_lane
5260 in2-suffix
5261 constn = N
5262 multi_fn = static_assert_imm-in2_exp_len-N
5263 multi_fn = vqadd-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
5264 a = 1, 2, 3, 4
5265 b = 0, 1, 4, 5, 4, 5, 6, 7
5266 c = 0, 2, 0, 0, 0, 0, 0, 0
5267 n = 1
5268 validate 17, 22, 27, 32
5269
5270 aarch64 = sqdmlal2
5271 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5272
5273 /// Signed saturating doubling multiply-add long
5274 name = vqdmlal
5275 multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
5276 multi_fn = vqadd-out-noext, a, {simd_extract, x, 0}
5277 a = 1
5278 b = 1
5279 c = 2
5280 validate 5
5281
5282 aarch64 = sqdmlal
5283 generate i32:i16:i16:i32
5284
5285 /// Signed saturating doubling multiply-add long
5286 name = vqdmlal
5287 multi_fn = vqadd-out-noext, x:out_t, a, {vqdmulls-in_ntt-noext, b, c}
5288 multi_fn = x as out_t
5289 a = 1
5290 b = 1
5291 c = 2
5292 validate 5
5293
5294 aarch64 = sqdmlal
5295 generate i64:i32:i32:i64
5296
5297 /// Signed saturating doubling multiply-add long
5298 name = vqdmlalh_lane
5299 in2-suffix
5300 constn = LANE
5301 multi_fn = static_assert_imm-in2_exp_len-LANE
5302 multi_fn = vqdmlal-self-noext, a, b, {simd_extract, c, LANE as u32}
5303 a = 1
5304 b = 1
5305 c = 2, 1, 1, 1, 1, 1, 1, 1
5306 n = 0
5307 validate 5
5308
5309 aarch64 = sqdmlal
5310 generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
5311 name = vqdmlals_lane
5312 aarch64 = sqdmlal
5313 generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
5314
5315 /// Signed saturating doubling multiply-subtract long
5316 name = vqdmlsl
5317 multi_fn = vqsub-out-noext, a, {vqdmull-self-noext, b, c}
5318 a = 3, 7, 11, 15
5319 b = 1, 2, 3, 4
5320 c = 2, 2, 2, 2
5321 validate -1, -1, -1, -1
5322
5323 aarch64 = sqdmlsl
5324 arm = vqdmlsl
5325 generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5326
5327 /// Vector widening saturating doubling multiply subtract with scalar
5328 name = vqdmlsl
5329 n-suffix
5330 multi_fn = vqsub-out-noext, a, {vqdmull_n-self-noext, b, c}
5331 a = 3, 7, 11, 15
5332 b = 1, 2, 3, 4
5333 c = 2
5334 validate -1, -1, -1, -1
5335
5336 aarch64 = sqdmlsl
5337 arm = vqdmlsl
5338 generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
5339
5340 /// Signed saturating doubling multiply-subtract long
5341 name = vqdmlsl_high
5342 no-q
5343 multi_fn = vqsub-out-noext, a, {vqdmull_high-noqself-noext, b, c}
5344 a = 39, 58, 81, 108
5345 b = 0, 1, 4, 5, 4, 5, 6, 7
5346 c = 1, 2, 5, 6, 5, 6, 7, 8
5347 validate -1, -2, -3, -4
5348
5349 aarch64 = sqdmlsl2
5350 generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5351
5352 /// Signed saturating doubling multiply-subtract long
5353 name = vqdmlsl_high_n
5354 no-q
5355 multi_fn = vqsub-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
5356 a = 31, 38, 45, 52
5357 b = 0, 2, 8, 10, 8, 10, 12, 14
5358 c = 2
5359 validate -1, -2, -3, -4
5360
5361 aarch64 = sqdmlsl2
5362 generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
5363
5364 /// Vector widening saturating doubling multiply subtract with scalar
5365 name = vqdmlsl_lane
5366 in2-suffix
5367 constn = N
5368 multi_fn = static_assert_imm-in2_exp_len-N
5369 multi_fn = vqsub-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
5370 a = 3, 6, 9, 12
5371 b = 1, 2, 3, 4
5372 c = 0, 2, 2, 0, 2, 0, 0, 0
5373 n = HFLEN
5374 validate -1, -2, -3, -4
5375
5376 aarch64 = sqdmlsl
5377 generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
5378
5379 arm = vqdmlsl
5380 generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
5381
5382 /// Signed saturating doubling multiply-subtract long
5383 name = vqdmlsl_high_lane
5384 in2-suffix
5385 constn = N
5386 multi_fn = static_assert_imm-in2_exp_len-N
5387 multi_fn = vqsub-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
5388 a = 15, 18, 21, 24
5389 b = 0, 1, 4, 5, 4, 5, 6, 7
5390 c = 0, 2, 0, 0, 0, 0, 0, 0
5391 n = 1
5392 validate -1, -2, -3, -4
5393
5394 aarch64 = sqdmlsl2
5395 generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
5396
5397 /// Signed saturating doubling multiply-subtract long
5398 name = vqdmlsl
5399 multi_fn = vqdmull-in_ntt-noext, x:out_long_ntt, {vdup_n-in_ntt-noext, b}, {vdup_n-in_ntt-noext, c}
5400 multi_fn = vqsub-out-noext, a, {simd_extract, x, 0}
5401 a = 10
5402 b = 1
5403 c = 2
5404 validate 6
5405
5406 aarch64 = sqdmlsl
5407 generate i32:i16:i16:i32
5408
5409 /// Signed saturating doubling multiply-subtract long
5410 name = vqdmlsl
5411 multi_fn = vqsub-out-noext, x:out_t, a, {vqdmulls-in_ntt-noext, b, c}
5412 multi_fn = x as out_t
5413 a = 10
5414 b = 1
5415 c = 2
5416 validate 6
5417
5418 aarch64 = sqdmlsl
5419 generate i64:i32:i32:i64
5420
5421 /// Signed saturating doubling multiply-subtract long
5422 name = vqdmlslh_lane
5423 in2-suffix
5424 constn = LANE
5425 multi_fn = static_assert_imm-in2_exp_len-LANE
5426 multi_fn = vqdmlsl-self-noext, a, b, {simd_extract, c, LANE as u32}
5427 a = 10
5428 b = 1
5429 c = 2, 1, 1, 1, 1, 1, 1, 1
5430 n = 0
5431 validate 6
5432
5433 aarch64 = sqdmlsl
5434 generate i32:i16:int16x4_t:i32, i32:i16:int16x8_t:i32
5435 name = vqdmlsls_lane
5436 aarch64 = sqdmlsl
5437 generate i64:i32:int32x2_t:i64, i64:i32:int32x4_t:i64
5438
5439 /// Signed saturating doubling multiply returning high half
5440 name = vqdmulh
5441 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5442 b = 2, 2, 2, 2, 2, 2, 2, 2
5443 validate 1, 1, 1, 1, 1, 1, 1, 1
5444
5445 aarch64 = sqdmulh
5446 link-aarch64 = sqdmulh._EXT_
5447 arm = vqdmulh
5448 link-arm = vqdmulh._EXT_
5449 generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5450
5451 /// Signed saturating doubling multiply returning high half
5452 name = vqdmulh
5453 multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5454 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5455 multi_fn = simd_extract, {vqdmulh-in_ntt-noext, a, b}, 0
5456 a = 1
5457 b = 2
5458 validate 0
5459
5460 aarch64 = sqdmulh
5461 generate i16, i32
5462
5463 /// Vector saturating doubling multiply high with scalar
5464 name = vqdmulh_n
5465 out-suffix
5466 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5467 multi_fn = vqdmulh-out-noext, a, b
5468 a = MAX, MAX, MAX, MAX
5469 b = 2
5470 validate 1, 1, 1, 1
5471
5472 aarch64 = sqdmulh
5473 arm = vqdmulh
5474 generate int16x4_t:i16:int16x4_t, int32x2_t:i32:int32x2_t
5475
5476 /// Vector saturating doubling multiply high with scalar
5477 name = vqdmulhq_n
5478 no-q
5479 multi_fn = vdupq_n-in_ntt-noext, b:out_t, b
5480 multi_fn = vqdmulh-out-noext, a, b
5481 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5482 b = 2
5483 validate 1, 1, 1, 1, 1, 1, 1, 1
5484
5485 aarch64 = sqdmulh
5486 arm = vqdmulh
5487 generate int16x8_t:i16:int16x8_t, int32x4_t:i32:int32x4_t
5488
5489 /// Signed saturating doubling multiply returning high half
5490 name = vqdmulhh_lane
5491 constn = N
5492 multi_fn = static_assert_imm-in_exp_len-N
5493 multi_fn = simd_extract, b:in_t0, b, N as u32
5494 multi_fn = vqdmulhh-out_ntt-noext, a, b
5495 a = 2
5496 b = 0, 0, MAX, 0, 0, 0, 0, 0
5497 n = 2
5498 validate 1
5499
5500 aarch64 = sqdmulh
5501 generate i16:int16x4_t:i16, i16:int16x8_t:i16
5502
5503 /// Signed saturating doubling multiply returning high half
5504 name = vqdmulhs_lane
5505 constn = N
5506 multi_fn = static_assert_imm-in_exp_len-N
5507 multi_fn = simd_extract, b:in_t0, b, N as u32
5508 multi_fn = vqdmulhs-out_ntt-noext, a, b
5509 a = 2
5510 b = 0, MAX, 0, 0
5511 n = 1
5512 validate 1
5513
5514 aarch64 = sqdmulh
5515 generate i32:int32x2_t:i32, i32:int32x4_t:i32
5516
5517 /// Vector saturating doubling multiply high by scalar
5518 name = vqdmulh
5519 lane-suffixes
5520 constn = LANE
5521 multi_fn = static_assert_imm-in2_exp_len-LANE
5522 multi_fn = vqdmulh-out-noext, a, {vdup-nout-noext, {simd_extract, b, LANE as u32}}
5523 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5524 b = 2, 1, 1, 1, 1, 1, 1, 1
5525 n = 0
5526 validate 1, 1, 1, 1, 1, 1, 1, 1
5527
5528 aarch64 = sqdmulh
5529 generate int16x4_t, int16x8_t:int16x4_t:int16x8_t
5530 generate int32x2_t, int32x4_t:int32x2_t:int32x4_t
5531 arm = vqdmulh
5532 generate int16x8_t, int16x4_t:int16x8_t:int16x4_t
5533 generate int32x4_t, int32x2_t:int32x4_t:int32x2_t
5534
5535 /// Signed saturating extract narrow
5536 name = vqmovn
5537 no-q
5538 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5539 validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5540
5541 aarch64 = sqxtn
5542 link-aarch64 = sqxtn._EXT2_
5543 arm = vqmovn
5544 link-arm = vqmovns._EXT2_
5545 generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
5546
5547 /// Unsigned saturating extract narrow
5548 name = vqmovn
5549 no-q
5550 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5551 validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5552
5553 aarch64 = uqxtn
5554 link-aarch64 = uqxtn._EXT2_
5555 arm = vqmovn
5556 link-arm = vqmovnu._EXT2_
5557 generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
5558
5559 /// Saturating extract narrow
5560 name = vqmovn
5561 multi_fn = simd_extract, {vqmovn-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0
5562 a = 1
5563 validate 1
5564
5565 aarch64 = sqxtn
5566 generate i16:i8, i32:i16
5567 aarch64 = uqxtn
5568 generate u16:u8, u32:u16
5569
5570 /// Saturating extract narrow
5571 name = vqmovn
5572 a = 1
5573 validate 1
5574
5575 aarch64 = sqxtn
5576 link-aarch64 = scalar.sqxtn._EXT2_._EXT_
5577 generate i64:i32
5578
5579 aarch64 = uqxtn
5580 link-aarch64 = scalar.uqxtn._EXT2_._EXT_
5581 generate u64:u32
5582
5583 /// Signed saturating extract narrow
5584 name = vqmovn_high
5585 no-q
5586 multi_fn = simd_shuffle!, a, {vqmovn-noqself-noext, b}, {asc-0-out_len}
5587 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5588 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5589 validate MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5590
5591 aarch64 = sqxtn2
5592 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
5593 aarch64 = uqxtn2
5594 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
5595
5596 /// Signed saturating extract unsigned narrow
5597 name = vqmovun
5598 no-q
5599 a = -1, -1, -1, -1, -1, -1, -1, -1
5600 validate 0, 0, 0, 0, 0, 0, 0, 0
5601
5602 aarch64 = sqxtun
5603 link-aarch64 = sqxtun._EXT2_
5604 arm = vqmovun
5605 link-arm = vqmovnsu._EXT2_
5606 generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
5607
5608 /// Signed saturating extract unsigned narrow
5609 name = vqmovun
5610 multi_fn = simd_extract, {vqmovun-in_ntt-noext, {vdupq_n-in_ntt-noext, a}}, 0
5611 a = 1
5612 validate 1
5613
5614 aarch64 = sqxtun
5615 generate i16:u8, i32:u16, i64:u32
5616
5617 /// Signed saturating extract unsigned narrow
5618 name = vqmovun_high
5619 no-q
5620 multi_fn = simd_shuffle!, a, {vqmovun-noqself-noext, b}, {asc-0-out_len}
5621 a = 0, 0, 0, 0, 0, 0, 0, 0
5622 b = -1, -1, -1, -1, -1, -1, -1, -1
5623 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5624
5625 aarch64 = sqxtun2
5626 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
5627
5628 /// Signed saturating rounding doubling multiply returning high half
5629 name = vqrdmulh
5630 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5631 b = 2, 2, 2, 2, 2, 2, 2, 2
5632 validate 2, 2, 2, 2, 2, 2, 2, 2
5633
5634 aarch64 = sqrdmulh
5635 link-aarch64 = sqrdmulh._EXT_
5636 arm = vqrdmulh
5637 link-arm = vqrdmulh._EXT_
5638 generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5639
5640 /// Signed saturating rounding doubling multiply returning high half
5641 name = vqrdmulh
5642 multi_fn = simd_extract, {vqrdmulh-in_ntt-noext, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
5643 a = 1
5644 b = 2
5645 validate 0
5646
5647 aarch64 = sqrdmulh
5648 generate i16, i32
5649
5650 /// Vector saturating rounding doubling multiply high with scalar
5651 name = vqrdmulh
5652 out-n-suffix
5653 multi_fn = vqrdmulh-out-noext, a, {vdup-nout-noext, b}
5654 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5655 b = 2
5656 validate 2, 2, 2, 2, 2, 2, 2, 2
5657
5658 aarch64 = sqrdmulh
5659 arm = vqrdmulh
5660 generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
5661
5662 /// Vector rounding saturating doubling multiply high by scalar
5663 name = vqrdmulh
5664 lane-suffixes
5665 constn = LANE
5666 multi_fn = static_assert_imm-in_exp_len-LANE
5667 multi_fn = simd_shuffle!, b:out_t, b, b, {dup-out_len-LANE as u32}
5668 multi_fn = vqrdmulh-out-noext, a, b
5669 a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5670 b = 0, 2, 0, 0, 0, 0, 0, 0,
5671 n = 1
5672 validate 2, 2, 2, 2, 2, 2, 2, 2
5673
5674 aarch64 = sqrdmulh
5675 arm = vqrdmulh
5676 generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
5677 generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
5678
5679 /// Signed saturating rounding doubling multiply returning high half
5680 name = vqrdmulh
5681 lane-suffixes
5682 constn = LANE
5683 multi_fn = static_assert_imm-in_exp_len-LANE
5684 multi_fn = vqrdmulh-out-noext, a, {simd_extract, b, LANE as u32}
5685 a = 1
5686 b = 0, 2, 0, 0, 0, 0, 0, 0,
5687 n = 1
5688 validate 0
5689
5690 aarch64 = sqrdmulh
5691 generate i16:int16x4_t:i16, i16:int16x8_t:i16, i32:int32x2_t:i32, i32:int32x4_t:i32
5692
5693 /// Signed saturating rounding doubling multiply accumulate returning high half
5694 name = vqrdmlah
5695 a = 1, 1, 1, 1, 1, 1, 1, 1
5696 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5697 c = 2, 2, 2, 2, 2, 2, 2, 2
5698 validate 3, 3, 3, 3, 3, 3, 3, 3
5699
5700 aarch64 = sqrdmlah
5701 link-aarch64 = sqrdmlah._EXT_
5702 target = rdm
5703 generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5704
5705 /// Signed saturating rounding doubling multiply accumulate returning high half
5706 name = vqrdmlah
5707 multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5708 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5709 multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c
5710 multi_fn = simd_extract, {vqrdmlah-in_ntt-noext, a, b, c}, 0
5711 a = 1
5712 b = 1
5713 c = 2
5714 validate 1
5715
5716 aarch64 = sqrdmlah
5717 target = rdm
5718 generate i16, i32
5719
5720 /// Signed saturating rounding doubling multiply accumulate returning high half
5721 name = vqrdmlah
5722 in2-lane-suffixes
5723 constn = LANE
5724 multi_fn = static_assert_imm-in2_exp_len-LANE
5725 multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
5726 multi_fn = vqrdmlah-out-noext, a, b, c
5727 a = 1, 1, 1, 1, 1, 1, 1, 1
5728 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5729 c = 0, 2, 0, 0, 0, 0, 0, 0
5730 n = 1
5731 validate 3, 3, 3, 3, 3, 3, 3, 3
5732
5733 aarch64 = sqrdmlah
5734 target = rdm
5735 generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
5736 generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
5737
5738 /// Signed saturating rounding doubling multiply accumulate returning high half
5739 name = vqrdmlah
5740 in2-lane-suffixes
5741 constn = LANE
5742 multi_fn = static_assert_imm-in2_exp_len-LANE
5743 multi_fn = vqrdmlah-self-noext, a, b, {simd_extract, c, LANE as u32}
5744 a = 1
5745 b = 1
5746 c = 0, 2, 0, 0, 0, 0, 0, 0
5747 n = 1
5748 validate 1
5749
5750 aarch64 = sqrdmlah
5751 target = rdm
5752 generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32
5753
5754 /// Signed saturating rounding doubling multiply subtract returning high half
5755 name = vqrdmlsh
5756 link-aarch64 = sqrdmlsh._EXT_
5757 a = 1, 1, 1, 1, 1, 1, 1, 1
5758 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5759 c = 2, 2, 2, 2, 2, 2, 2, 2
5760 validate -1, -1, -1, -1, -1, -1, -1, -1
5761
5762 aarch64 = sqrdmlsh
5763 target = rdm
5764 generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
5765
5766 /// Signed saturating rounding doubling multiply subtract returning high half
5767 name = vqrdmlsh
5768 multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5769 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5770 multi_fn = vdup_n-in_ntt-noext, c:in_ntt, c
5771 multi_fn = simd_extract, {vqrdmlsh-in_ntt-noext, a, b, c}, 0
5772 a = 1
5773 b = 1
5774 c = 2
5775 validate 1
5776
5777 aarch64 = sqrdmlsh
5778 target = rdm
5779 generate i16, i32
5780
5781 /// Signed saturating rounding doubling multiply subtract returning high half
5782 name = vqrdmlsh
5783 in2-lane-suffixes
5784 constn = LANE
5785 multi_fn = static_assert_imm-in2_exp_len-LANE
5786 multi_fn = simd_shuffle!, c:out_t, c, c, {dup-out_len-LANE as u32}
5787 multi_fn = vqrdmlsh-out-noext, a, b, c
5788 a = 1, 1, 1, 1, 1, 1, 1, 1
5789 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
5790 c = 0, 2, 0, 0, 0, 0, 0, 0
5791 n = 1
5792 validate -1, -1, -1, -1, -1, -1, -1, -1
5793
5794 aarch64 = sqrdmlsh
5795 target = rdm
5796 generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
5797 generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
5798
5799 /// Signed saturating rounding doubling multiply subtract returning high half
5800 name = vqrdmlsh
5801 in2-lane-suffixes
5802 constn = LANE
5803 multi_fn = static_assert_imm-in2_exp_len-LANE
5804 multi_fn = vqrdmlsh-self-noext, a, b, {simd_extract, c, LANE as u32}
5805 a = 1
5806 b = 1
5807 c = 0, 2, 0, 0, 0, 0, 0, 0
5808 n = 1
5809 validate 1
5810
5811 aarch64 = sqrdmlsh
5812 target = rdm
5813 generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i32:i32:int32x4_t:i32
5814
5815 /// Signed saturating rounding shift left
5816 name = vqrshl
5817 a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5818 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5819 validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5820
5821 aarch64 = sqrshl
5822 link-aarch64 = sqrshl._EXT_
5823 generate i32, i64
5824
5825 arm = vqrshl
5826 link-arm = vqrshifts._EXT_
5827 generate int*_t, int64x*_t
5828
5829 /// Signed saturating rounding shift left
5830 name = vqrshl
5831 multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
5832 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5833 multi_fn = simd_extract, {vqrshl-in_ntt-noext, a, b}, 0
5834 a = 1
5835 b = 2
5836 validate 4
5837
5838 aarch64 = sqrshl
5839 generate i8, i16
5840
5841 /// Unsigned signed saturating rounding shift left
5842 name = vqrshl
5843 out-suffix
5844 a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
5845 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
5846 validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
5847
5848 aarch64 = uqrshl
5849 link-aarch64 = uqrshl._EXT_
5850 generate u32:i32:u32, u64:i64:u64
5851
5852 arm = vqrshl
5853 link-arm = vqrshiftu._EXT_
5854 generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
5855 generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
5856
5857 /// Unsigned signed saturating rounding shift left
5858 name = vqrshl
5859 out-suffix
5860 multi_fn = vdup_n-out_ntt-noext, a:out_ntt, a
5861 multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
5862 multi_fn = simd_extract, {vqrshl-out_ntt-noext, a, b}, 0
5863 a = 1
5864 b = 2
5865 validate 4
5866
5867 aarch64 = uqrshl
5868 generate u8:i8:u8, u16:i16:u16
5869
5870 /// Signed saturating rounded shift right narrow
5871 name = vqrshrn
5872 noq-n-suffix
5873 constn = N
5874 multi_fn = static_assert-N-1-halfbits
5875 a = MIN, 4, 8, 12, 16, 20, 24, 28
5876 n = 2
5877 validate MIN, 1, 2, 3, 4, 5, 6, 7
5878
5879 aarch64 = sqrshrn
5880 link-aarch64 = sqrshrn._EXT2_
5881 const-aarch64 = N
5882
5883 arm = vqrshrn
5884 link-arm = vqrshiftns._EXT2_
5885 const-arm = -N as ttn
5886 arm-aarch64-separate
5887 generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
5888
5889 /// Signed saturating rounded shift right narrow
5890 name = vqrshrn
5891 noq-n-suffix
5892 constn = N
5893 multi_fn = static_assert-N-1-halfbits
5894 multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
5895 multi_fn = simd_extract, {vqrshrn_n-in_ntt-::<N>, a}, 0
5896 a = 4
5897 n = 2
5898 validate 1
5899
5900 aarch64 = sqrshrn
5901 generate i16:i8, i32:i16, i64:i32
5902
5903 /// Signed saturating rounded shift right narrow
5904 name = vqrshrn_high
5905 noq-n-suffix
5906 constn = N
5907 multi_fn = static_assert-N-1-halfbits
5908 multi_fn = simd_shuffle!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
5909 a = 0, 1, 2, 3, 2, 3, 6, 7
5910 b = 8, 12, 24, 28, 48, 52, 56, 60
5911 n = 2
5912 validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5913
5914 aarch64 = sqrshrn2
5915 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
5916
5917 /// Unsigned signed saturating rounded shift right narrow
5918 name = vqrshrn
5919 noq-n-suffix
5920 constn = N
5921 multi_fn = static_assert-N-1-halfbits
5922 a = MIN, 4, 8, 12, 16, 20, 24, 28
5923 n = 2
5924 validate 0, 1, 2, 3, 4, 5, 6, 7
5925
5926 aarch64 = uqrshrn
5927 link-aarch64 = uqrshrn._EXT2_
5928 const-aarch64 = N
5929
5930 arm = vqrshrn
5931 link-arm = vqrshiftnu._EXT2_
5932 const-arm = -N as ttn
5933 arm-aarch64-separate
5934 generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
5935
5936 /// Unsigned saturating rounded shift right narrow
5937 name = vqrshrn
5938 noq-n-suffix
5939 constn = N
5940 multi_fn = static_assert-N-1-halfbits
5941 multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
5942 multi_fn = simd_extract, {vqrshrn_n-in_ntt-::<N>, a}, 0
5943 a = 4
5944 n = 2
5945 validate 1
5946
5947 aarch64 = uqrshrn
5948 generate u16:u8, u32:u16, u64:u32
5949
5950 /// Unsigned saturating rounded shift right narrow
5951 name = vqrshrn_high
5952 noq-n-suffix
5953 constn = N
5954 multi_fn = static_assert-N-1-halfbits
5955 multi_fn = simd_shuffle!, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
5956 a = 0, 1, 2, 3, 2, 3, 6, 7
5957 b = 8, 12, 24, 28, 48, 52, 56, 60
5958 n = 2
5959 validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
5960
5961 aarch64 = uqrshrn2
5962 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
5963
5964 /// Signed saturating rounded shift right unsigned narrow
5965 name = vqrshrun
5966 noq-n-suffix
5967 constn = N
5968 multi_fn = static_assert-N-1-halfbits
5969 a = 0, 4, 8, 12, 16, 20, 24, 28
5970 n = 2
5971 validate 0, 1, 2, 3, 4, 5, 6, 7
5972
5973 aarch64 = sqrshrun
5974 link-aarch64 = sqrshrun._EXT2_
5975 const-aarch64 = N
5976
5977 arm = vqrshrun
5978 link-arm = vqrshiftnsu._EXT2_
5979 const-arm = -N as ttn
5980 arm-aarch64-separate
5981 generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
5982
5983 /// Signed saturating rounded shift right unsigned narrow
5984 name = vqrshrun
5985 noq-n-suffix
5986 constn = N
5987 multi_fn = static_assert-N-1-halfbits
5988 multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
5989 multi_fn = simd_extract, {vqrshrun_n-in_ntt-::<N>, a}, 0
5990 a = 4
5991 n = 2
5992 validate 1
5993
5994 aarch64 = sqrshrun
5995 generate i16:u8, i32:u16, i64:u32
5996
5997 /// Signed saturating rounded shift right unsigned narrow
5998 name = vqrshrun_high
5999 noq-n-suffix
6000 constn = N
6001 multi_fn = static_assert-N-1-halfbits
6002 multi_fn = simd_shuffle!, a, {vqrshrun_n-noqself-::<N>, b}, {asc-0-out_len}
6003 a = 0, 1, 2, 3, 2, 3, 6, 7
6004 b = 8, 12, 24, 28, 48, 52, 56, 60
6005 n = 2
6006 validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
6007
6008 aarch64 = sqrshrun2
6009 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
6010
6011 /// Signed saturating shift left
6012 name = vqshl
6013 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6014 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6015 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6016
6017 aarch64 = sqshl
6018 link-aarch64 = sqshl._EXT_
6019 generate i64
6020
6021 arm = vqshl
6022 link-arm = vqshifts._EXT_
6023 generate int*_t, int64x*_t
6024
6025 /// Signed saturating shift left
6026 name = vqshl
6027 multi_fn = vqshl-in_ntt-noext, c:in_ntt, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
6028 multi_fn = simd_extract, c, 0
6029 a = 1
6030 b = 2
6031 validate 4
6032
6033 aarch64 = sqshl
6034 generate i8, i16, i32
6035
6036 /// Unsigned saturating shift left
6037 name = vqshl
6038 out-suffix
6039 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6040 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6041 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6042
6043 aarch64 = uqshl
6044 link-aarch64 = uqshl._EXT_
6045 generate u64:i64:u64
6046
6047 arm = vqshl
6048 link-arm = vqshiftu._EXT_
6049 generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6050 generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6051
6052 /// Unsigned saturating shift left
6053 name = vqshl
6054 out-suffix
6055 multi_fn = vqshl-out_ntt-noext, c:out_ntt, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
6056 multi_fn = simd_extract, c, 0
6057 a = 1
6058 b = 2
6059 validate 4
6060
6061 aarch64 = uqshl
6062 generate u8:i8:u8, u16:i16:u16, u32:i32:u32
6063
6064 /// Signed saturating shift left
6065 name = vqshl
6066 n-suffix
6067 constn = N
6068 multi_fn = static_assert_imm-out_bits_exp_len-N
6069 multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N as _}
6070 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6071 n = 2
6072 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6073
6074 aarch64 = sqshl
6075 arm = vqshl
6076 generate int*_t, int64x*_t
6077
6078 /// Signed saturating shift left
6079 name = vqshl
6080 n-suffix
6081 constn = N
6082 multi_fn = static_assert_imm-out_bits_exp_len-N
6083 multi_fn = simd_extract, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
6084 a = 1
6085 n = 2
6086 validate 4
6087
6088 aarch64 = sqshl
6089 generate i8, i16, i32, i64
6090
6091 /// Unsigned saturating shift left
6092 name = vqshl
6093 n-suffix
6094 constn = N
6095 multi_fn = static_assert_imm-out_bits_exp_len-N
6096 multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N as _}
6097 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6098 n = 2
6099 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6100
6101 aarch64 = uqshl
6102 arm = vqshl
6103 generate uint*_t, uint64x*_t
6104
6105 /// Unsigned saturating shift left
6106 name = vqshl
6107 n-suffix
6108 constn = N
6109 multi_fn = static_assert_imm-out_bits_exp_len-N
6110 multi_fn = simd_extract, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
6111 a = 1
6112 n = 2
6113 validate 4
6114
6115 aarch64 = uqshl
6116 generate u8, u16, u32, u64
6117
6118 /// Signed saturating shift left unsigned
6119 name = vqshlu
6120 n-suffix
6121 constn = N
6122 multi_fn = static_assert_imm-out_bits_exp_len-N
6123 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6124 n = 2
6125 validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
6126 arm-aarch64-separate
6127
6128 aarch64 = sqshlu
6129 link-aarch64 = sqshlu._EXT_
6130 const-aarch64 = {dup-in_len-N as ttn}
6131 arm = vqshlu
6132 link-arm = vqshiftsu._EXT_
6133 const-arm = N as ttn
6134 generate int8x8_t:uint8x8_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
6135 generate int8x16_t:uint8x16_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
6136
6137 /// Signed saturating shift left unsigned
6138 name = vqshlu
6139 n-suffix
6140 constn = N
6141 multi_fn = static_assert_imm-out_bits_exp_len-N
6142 multi_fn = simd_extract, {vqshlu_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
6143 a = 1
6144 n = 2
6145 validate 4
6146
6147 aarch64 = sqshlu
6148 generate i8:u8, i16:u16, i32:u32, i64:u64
6149
6150 /// Signed saturating shift right narrow
6151 name = vqshrn
6152 noq-n-suffix
6153 constn = N
6154 multi_fn = static_assert-N-1-halfbits
6155 a = 0, 4, 8, 12, 16, 20, 24, 28
6156 n = 2
6157 validate 0, 1, 2, 3, 4, 5, 6, 7
6158 arm-aarch64-separate
6159
6160 aarch64 = sqshrn
6161 link-aarch64 = sqshrn._EXT2_
6162 const-aarch64 = N
6163 generate i64:i32
6164
6165 arm = vqshrn
6166 link-arm = vqshiftns._EXT2_
6167 const-arm = -N as ttn
6168 generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
6169
6170 /// Signed saturating shift right narrow
6171 name = vqshrn
6172 noq-n-suffix
6173 constn = N
6174 multi_fn = static_assert-N-1-halfbits
6175 multi_fn = simd_extract, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
6176 a = 4
6177 n = 2
6178 validate 1
6179
6180 aarch64 = sqshrn
6181 generate i16:i8, i32:i16
6182
6183 /// Signed saturating shift right narrow
6184 name = vqshrn_high
6185 noq-n-suffix
6186 constn = N
6187 multi_fn = static_assert-N-1-halfbits
6188 multi_fn = simd_shuffle!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
6189 a = 0, 1, 8, 9, 8, 9, 10, 11
6190 b = 32, 36, 40, 44, 48, 52, 56, 60
6191 n = 2
6192 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6193
6194 aarch64 = sqshrn2
6195 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
6196
6197 /// Unsigned saturating shift right narrow
6198 name = vqshrn
6199 noq-n-suffix
6200 constn = N
6201 multi_fn = static_assert-N-1-halfbits
6202 a = 0, 4, 8, 12, 16, 20, 24, 28
6203 n = 2
6204 validate 0, 1, 2, 3, 4, 5, 6, 7
6205 arm-aarch64-separate
6206
6207 aarch64 = uqshrn
6208 link-aarch64 = uqshrn._EXT2_
6209 const-aarch64 = N
6210 generate u64:u32
6211
6212 arm = vqshrn
6213 link-arm = vqshiftnu._EXT2_
6214 const-arm = -N as ttn
6215 generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
6216
6217 /// Unsigned saturating shift right narrow
6218 name = vqshrn
6219 noq-n-suffix
6220 constn = N
6221 multi_fn = static_assert-N-1-halfbits
6222 multi_fn = simd_extract, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
6223 a = 4
6224 n = 2
6225 validate 1
6226
6227 aarch64 = uqshrn
6228 generate u16:u8, u32:u16
6229
6230 /// Unsigned saturating shift right narrow
6231 name = vqshrn_high
6232 noq-n-suffix
6233 constn = N
6234 multi_fn = static_assert-N-1-halfbits
6235 multi_fn = simd_shuffle!, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
6236 a = 0, 1, 8, 9, 8, 9, 10, 11
6237 b = 32, 36, 40, 44, 48, 52, 56, 60
6238 n = 2
6239 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6240
6241 aarch64 = uqshrn2
6242 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
6243
6244 /// Signed saturating shift right unsigned narrow
6245 name = vqshrun
6246 noq-n-suffix
6247 constn = N
6248 multi_fn = static_assert-N-1-halfbits
6249 a = 0, 4, 8, 12, 16, 20, 24, 28
6250 n = 2
6251 validate 0, 1, 2, 3, 4, 5, 6, 7
6252 arm-aarch64-separate
6253
6254 aarch64 = sqshrun
6255 link-aarch64 = sqshrun._EXT2_
6256 const-aarch64 = N
6257
6258 arm = vqshrun
6259 link-arm = vqshiftnsu._EXT2_
6260 const-arm = -N as ttn
6261 generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
6262
6263 /// Signed saturating shift right unsigned narrow
6264 name = vqshrun
6265 noq-n-suffix
6266 constn = N
6267 multi_fn = static_assert-N-1-halfbits
6268 multi_fn = simd_extract, {vqshrun_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
6269 a = 4
6270 n = 2
6271 validate 1
6272
6273 aarch64 = sqshrun
6274 generate i16:u8, i32:u16, i64:u32
6275
6276 /// Signed saturating shift right unsigned narrow
6277 name = vqshrun_high
6278 noq-n-suffix
6279 constn = N
6280 multi_fn = static_assert-N-1-halfbits
6281 multi_fn = simd_shuffle!, a, {vqshrun_n-noqself-::<N>, b}, {asc-0-out_len}
6282 a = 0, 1, 8, 9, 8, 9, 10, 11
6283 b = 32, 36, 40, 44, 48, 52, 56, 60
6284 n = 2
6285 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6286
6287 aarch64 = sqshrun2
6288 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
6289
6290 /// Unsigned saturating accumulate of signed value
6291 name = vsqadd
6292 out-suffix
6293 multi_fn = simd_extract, {vsqadd-out_ntt-noext, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}}, 0
6294 a = 2
6295 b = 2
6296 validate 4
6297
6298 aarch64 = usqadd
6299 generate u8:i8:u8, u16:i16:u16
6300
6301 /// Unsigned saturating accumulate of signed value
6302 name = vsqadd
6303 out-suffix
6304 a = 2
6305 b = 2
6306 validate 4
6307
6308 aarch64 = usqadd
6309 link-aarch64 = usqadd._EXT_
6310 generate u32:i32:u32, u64:i64:u64
6311
6312 /// Calculates the square root of each lane.
6313 name = vsqrt
6314 fn = simd_fsqrt
6315 a = 4.0, 9.0, 16.0, 25.0
6316 validate 2.0, 3.0, 4.0, 5.0
6317
6318 aarch64 = fsqrt
6319 generate float*_t, float64x*_t
6320
6321 /// Reciprocal square-root estimate.
6322 name = vrsqrte
6323 a = 1.0, 2.0, 3.0, 4.0
6324 validate 0.998046875, 0.705078125, 0.576171875, 0.4990234375
6325
6326 aarch64 = frsqrte
6327 link-aarch64 = frsqrte._EXT_
6328 generate float64x*_t, f32, f64
6329
6330 arm = vrsqrte
6331 link-arm = vrsqrte._EXT_
6332 generate float*_t
6333
6334 /// Unsigned reciprocal square root estimate
6335 name = vrsqrte
6336 a = 1, 2, 3, 4
6337 validate 4294967295, 4294967295, 4294967295, 4294967295
6338
6339 aarch64 = ursqrte
6340 link-aarch64 = ursqrte._EXT_
6341 arm = vrsqrte
6342 link-arm = vrsqrte._EXT_
6343 generate uint32x2_t, uint32x4_t
6344
6345 /// Floating-point reciprocal square root step
6346 name = vrsqrts
6347 a = 1.0, 2.0, 3.0, 4.0
6348 b = 1.0, 2.0, 3.0, 4.0
6349 validate 1., -0.5, -3.0, -6.5
6350
6351 aarch64 = frsqrts
6352 link-aarch64 = frsqrts._EXT_
6353 generate float64x*_t, f32, f64
6354
6355 arm = vrsqrts
6356 link-arm = vrsqrts._EXT_
6357 generate float*_t
6358
6359 /// Reciprocal estimate.
6360 name = vrecpe
6361 a = 4.0, 3.0, 2.0, 1.0
6362 validate 0.24951171875, 0.3330078125, 0.4990234375, 0.998046875
6363
6364 aarch64 = frecpe
6365 link-aarch64 = frecpe._EXT_
6366 generate float64x*_t, f32, f64
6367
6368 arm = vrecpe
6369 link-arm = vrecpe._EXT_
6370 generate float*_t
6371
6372 /// Unsigned reciprocal estimate
6373 name = vrecpe
6374 a = 4, 3, 2, 1
6375 validate 4294967295, 4294967295, 4294967295, 4294967295
6376
6377 aarch64 = urecpe
6378 link-aarch64 = urecpe._EXT_
6379 arm = vrecpe
6380 link-arm = vrecpe._EXT_
6381 generate uint32x2_t, uint32x4_t
6382
6383 /// Floating-point reciprocal step
6384 name = vrecps
6385 a = 4.0, 3.0, 2.0, 1.0
6386 b = 4.0, 3.0, 2.0, 1.0
6387 validate -14., -7., -2., 1.
6388
6389 aarch64 = frecps
6390 link-aarch64 = frecps._EXT_
6391 generate float64x*_t, f32, f64
6392
6393 arm = vrecps
6394 link-arm = vrecps._EXT_
6395 generate float*_t
6396
6397 /// Floating-point reciprocal exponent
6398 name = vrecpx
6399 a = 4.0
6400 validate 0.5
6401
6402 aarch64 = frecpx
6403 link-aarch64 = frecpx._EXT_
6404 generate f32, f64
6405
6406 /// Vector reinterpret cast operation
6407 name = vreinterpret
6408 double-suffixes
6409 fn = transmute
6410 a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6411 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
6412
6413 aarch64 = nop
6414 generate poly64x1_t:int64x1_t, poly64x1_t:uint64x1_t, int64x1_t:poly64x1_t, uint64x1_t:poly64x1_t
6415 generate poly64x2_t:int64x2_t, poly64x2_t:uint64x2_t, int64x2_t:poly64x2_t, uint64x2_t:poly64x2_t
6416
6417 arm = nop
6418 generate uint8x8_t:int8x8_t, poly8x8_t:int8x8_t, poly16x4_t:int16x4_t, uint16x4_t:int16x4_t, uint32x2_t:int32x2_t, uint64x1_t:int64x1_t
6419 generate uint8x16_t:int8x16_t, poly8x16_t:int8x16_t, poly16x8_t:int16x8_t, uint16x8_t:int16x8_t, uint32x4_t:int32x4_t, uint64x2_t:int64x2_t
6420 generate poly8x8_t:uint8x8_t, int8x8_t:uint8x8_t, poly16x4_t:uint16x4_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
6421 generate poly8x16_t:uint8x16_t, int8x16_t:uint8x16_t, poly16x8_t:uint16x8_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
6422 generate int8x8_t:poly8x8_t, uint8x8_t:poly8x8_t, int16x4_t:poly16x4_t, uint16x4_t:poly16x4_t
6423 generate int8x16_t:poly8x16_t, uint8x16_t:poly8x16_t, int16x8_t:poly16x8_t, uint16x8_t:poly16x8_t
6424
6425 /// Vector reinterpret cast operation
6426 name = vreinterpret
6427 double-suffixes
6428 fn = transmute
6429 a = 0, 1, 2, 3, 4, 5, 6, 7
6430 validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6431
6432 aarch64 = nop
6433 arm = nop
6434 generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t
6435 generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t
6436 generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x2_t:uint16x4_t, uint32x2_t:uint16x4_t, int64x1_t:uint32x2_t, uint64x1_t:uint32x2_t
6437 generate poly16x8_t:uint8x16_t, int16x8_t:uint8x16_t, uint16x8_t:uint8x16_t, int32x4_t:uint16x8_t, uint32x4_t:uint16x8_t, int64x2_t:uint32x4_t, uint64x2_t:uint32x4_t
6438 generate poly16x4_t:poly8x8_t, int16x4_t:poly8x8_t, uint16x4_t:poly8x8_t, int32x2_t:poly16x4_t, uint32x2_t:poly16x4_t
6439 generate poly16x8_t:poly8x16_t, int16x8_t:poly8x16_t, uint16x8_t:poly8x16_t, int32x4_t:poly16x8_t, uint32x4_t:poly16x8_t
6440 target = aes
6441 generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t
6442 generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t
6443 generate p128:int64x2_t, p128:uint64x2_t, p128:poly64x2_t
6444
6445 /// Vector reinterpret cast operation
6446 name = vreinterpret
6447 double-suffixes
6448 fn = transmute
6449 a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
6450 validate 0, 1, 2, 3, 4, 5, 6, 7
6451
6452 aarch64 = nop
6453 arm = nop
6454 generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t
6455 generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t
6456 generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16x4_t:uint32x2_t, int16x4_t:uint32x2_t, uint16x4_t:uint32x2_t, int32x2_t:uint64x1_t, uint32x2_t:uint64x1_t
6457 generate poly8x16_t:uint16x8_t, int8x16_t:uint16x8_t, uint8x16_t:uint16x8_t, poly16x8_t:uint32x4_t, int16x8_t:uint32x4_t, uint16x8_t:uint32x4_t, int32x4_t:uint64x2_t, uint32x4_t:uint64x2_t
6458 generate poly8x8_t:poly16x4_t, int8x8_t:poly16x4_t, uint8x8_t:poly16x4_t
6459 generate poly8x16_t:poly16x8_t, int8x16_t:poly16x8_t, uint8x16_t:poly16x8_t
6460 target = aes
6461 generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t
6462 generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t
6463 generate int64x2_t:p128, uint64x2_t:p128, poly64x2_t:p128
6464
6465 /// Vector reinterpret cast operation
6466 name = vreinterpret
6467 double-suffixes
6468 fn = transmute
6469 a = 0, 1, 2, 3
6470 validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6471
6472 aarch64 = nop
6473 arm = nop
6474 generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t
6475 generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t
6476 generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64x1_t:uint16x4_t
6477 generate int32x4_t:uint8x16_t, uint32x4_t:uint8x16_t, int64x2_t:uint16x8_t, uint64x2_t:uint16x8_t
6478 generate int32x2_t:poly8x8_t, uint32x2_t:poly8x8_t, int64x1_t:poly16x4_t, uint64x1_t:poly16x4_t
6479 generate int32x4_t:poly8x16_t, uint32x4_t:poly8x16_t, int64x2_t:poly16x8_t, uint64x2_t:poly16x8_t
6480 target = aes
6481 generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t
6482 generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t
6483 generate p128:int32x4_t, p128:uint32x4_t
6484
6485 /// Vector reinterpret cast operation
6486 name = vreinterpret
6487 double-suffixes
6488 fn = transmute
6489 a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
6490 validate 0, 1, 2, 3
6491
6492 aarch64 = nop
6493 arm = nop
6494 generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t
6495 generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t
6496 generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t
6497 generate poly8x16_t:uint32x4_t, int8x16_t:uint32x4_t, uint8x16_t:uint32x4_t, poly16x8_t:uint64x2_t, int16x8_t:uint64x2_t, uint16x8_t:uint64x2_t
6498 target = aes
6499 generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t
6500 generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t
6501 generate int32x4_t:p128, uint32x4_t:p128
6502
6503 /// Vector reinterpret cast operation
6504 name = vreinterpret
6505 double-suffixes
6506 fn = transmute
6507 a = 0, 1
6508 validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6509
6510 aarch64 = nop
6511 arm = nop
6512 generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t
6513 generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t
6514 target = aes
6515 generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t
6516 generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t
6517 generate p128:int16x8_t, p128:uint16x8_t, p128:poly16x8_t
6518
6519 /// Vector reinterpret cast operation
6520 name = vreinterpret
6521 double-suffixes
6522 fn = transmute
6523 a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
6524 validate 0, 1
6525
6526 aarch64 = nop
6527 arm = nop
6528 generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t
6529 generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t
6530 target = aes
6531 generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t
6532 generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t
6533 generate int16x8_t:p128, uint16x8_t:p128, poly16x8_t:p128
6534
6535 /// Vector reinterpret cast operation
6536 name = vreinterpret
6537 double-suffixes
6538 fn = transmute
6539 a = 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6540 validate 1
6541 target = aes
6542
6543 aarch64 = nop
6544 arm = nop
6545 generate int8x16_t:p128, uint8x16_t:p128, poly8x16_t:p128
6546
6547 /// Vector reinterpret cast operation
6548 name = vreinterpret
6549 double-suffixes
6550 fn = transmute
6551 a = 1
6552 validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6553 target = aes
6554
6555 aarch64 = nop
6556 arm = nop
6557 generate p128:int8x16_t, p128:uint8x16_t, p128:poly8x16_t
6558
6559 /// Vector reinterpret cast operation
6560 name = vreinterpret
6561 double-suffixes
6562 fn = transmute
6563 a = 0., 0., 0., 0., 0., 0., 0., 0.
6564 validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6565
6566 aarch64 = nop
6567 generate float64x1_t:int8x8_t, float64x1_t:int16x4_t, float64x1_t:int32x2_t, float64x1_t:int64x1_t
6568 generate float64x2_t:int8x16_t, float64x2_t:int16x8_t, float64x2_t:int32x4_t, float64x2_t:int64x2_t
6569 generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t, float64x1_t:uint64x1_t
6570 generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t, float64x2_t:uint64x2_t
6571 generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t
6572 generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t
6573 generate float64x2_t:p128
6574
6575 arm = nop
6576 generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t
6577 generate float32x4_t:int8x16_t, float32x4_t:int16x8_t, float32x4_t:int32x4_t, float32x4_t:int64x2_t
6578 generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t, float32x2_t:uint64x1_t
6579 generate float32x4_t:uint8x16_t, float32x4_t:uint16x8_t, float32x4_t:uint32x4_t, float32x4_t:uint64x2_t
6580 generate float32x2_t:poly8x8_t, float32x2_t:poly16x4_t
6581 generate float32x4_t:poly8x16_t, float32x4_t:poly16x8_t
6582 generate float32x4_t:p128
6583
6584 /// Vector reinterpret cast operation
6585 name = vreinterpret
6586 double-suffixes
6587 fn = transmute
6588 a = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6589 validate 0., 0., 0., 0., 0., 0., 0., 0.
6590
6591 aarch64 = nop
6592 generate int8x8_t:float64x1_t, int16x4_t:float64x1_t, int32x2_t:float64x1_t, int64x1_t:float64x1_t
6593 generate int8x16_t:float64x2_t, int16x8_t:float64x2_t, int32x4_t:float64x2_t, int64x2_t:float64x2_t
6594 generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t, uint64x1_t:float64x1_t
6595 generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t, uint64x2_t:float64x2_t
6596 generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t
6597 generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t
6598 generate p128:float64x2_t
6599
6600 arm = nop
6601 generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t
6602 generate int8x16_t:float32x4_t, int16x8_t:float32x4_t, int32x4_t:float32x4_t, int64x2_t:float32x4_t
6603 generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t, uint64x1_t:float32x2_t
6604 generate uint8x16_t:float32x4_t, uint16x8_t:float32x4_t, uint32x4_t:float32x4_t, uint64x2_t:float32x4_t
6605 generate poly8x8_t:float32x2_t, poly16x4_t:float32x2_t
6606 generate poly8x16_t:float32x4_t, poly16x8_t:float32x4_t
6607 generate p128:float32x4_t
6608
6609 /// Vector reinterpret cast operation
6610 name = vreinterpret
6611 double-suffixes
6612 fn = transmute
6613 a = 0., 0., 0., 0., 0., 0., 0., 0.
6614 validate 0., 0., 0., 0., 0., 0., 0., 0.
6615
6616 aarch64 = nop
6617 generate float32x2_t:float64x1_t, float64x1_t:float32x2_t
6618 generate float32x4_t:float64x2_t, float64x2_t:float32x4_t
6619
6620 /// Signed rounding shift left
6621 name = vrshl
6622 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6623 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6624 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6625
6626 aarch64 = srshl
6627 link-aarch64 = srshl._EXT_
6628 generate i64
6629
6630 arm = vrshl
6631 link-arm = vrshifts._EXT_
6632 generate int*_t, int64x*_t
6633
6634 /// Unsigned rounding shift left
6635 name = vrshl
6636 out-suffix
6637 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6638 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6639 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6640
6641 aarch64 = urshl
6642 link-aarch64 = urshl._EXT_
6643 generate u64:i64:u64
6644
6645 arm = vrshl
6646 link-arm = vrshiftu._EXT_
6647 generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6648 generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6649
6650 /// Signed rounding shift right
6651 name = vrshr
6652 n-suffix
6653 constn = N
6654 multi_fn = static_assert-N-1-bits
6655 multi_fn = vrshl-self-noext, a, {vdup-nself-noext, -N as _}
6656 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6657 n = 2
6658 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6659
6660 aarch64 = srshr
6661 arm = vrshr
6662 generate int*_t, int64x*_t
6663
6664 /// Signed rounding shift right
6665 name = vrshr
6666 n-suffix
6667 constn = N
6668 multi_fn = static_assert-N-1-bits
6669 multi_fn = vrshl-self-noext, a, -N as i64
6670 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6671 n = 2
6672 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6673
6674 aarch64 = srshr
6675 generate i64
6676
6677 /// Unsigned rounding shift right
6678 name = vrshr
6679 n-suffix
6680 constn = N
6681 multi_fn = static_assert-N-1-bits
6682 multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, -N as _}
6683 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6684 n = 2
6685 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6686
6687 aarch64 = urshr
6688 arm = vrshr
6689 generate uint*_t, uint64x*_t
6690
6691 /// Unsigned rounding shift right
6692 name = vrshr
6693 n-suffix
6694 constn = N
6695 multi_fn = static_assert-N-1-bits
6696 multi_fn = vrshl-self-noext, a, -N as i64
6697 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6698 n = 2
6699 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6700
6701 aarch64 = urshr
6702 generate u64
6703
6704 /// Rounding shift right narrow
6705 name = vrshrn
6706 noq-n-suffix
6707 constn = N
6708 multi_fn = static_assert-N-1-halfbits
6709 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6710 n = 2
6711 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6712 arm-aarch64-separate
6713
6714 aarch64 = rshrn
6715 link-aarch64 = rshrn._EXT2_
6716 const-aarch64 = N
6717
6718 arm = vrshrn
6719 link-arm = vrshiftn._EXT2_
6720 const-arm = -N as ttn
6721 generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
6722
6723 /// Rounding shift right narrow
6724 name = vrshrn
6725 noq-n-suffix
6726 constn = N
6727 multi_fn = static_assert-N-1-halfbits
6728 multi_fn = transmute, {vrshrn_n-noqsigned-::<N>, transmute(a)}
6729 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6730 n = 2
6731 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6732
6733 aarch64 = rshrn
6734 arm = vrshrn
6735 generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
6736
6737 /// Rounding shift right narrow
6738 name = vrshrn_high
6739 noq-n-suffix
6740 constn = N
6741 multi_fn = static_assert-N-1-halfbits
6742 multi_fn = simd_shuffle!, a, {vrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
6743 a = 0, 1, 8, 9, 8, 9, 10, 11
6744 b = 32, 36, 40, 44, 48, 52, 56, 60
6745 n = 2
6746 validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
6747
6748 aarch64 = rshrn2
6749 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
6750 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
6751
6752 /// Signed rounding shift right and accumulate
6753 name = vrsra
6754 n-suffix
6755 constn = N
6756 multi_fn = static_assert-N-1-bits
6757 multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
6758 a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6759 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6760 n = 2
6761 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6762
6763 aarch64 = srsra
6764 arm = vrsra
6765 generate int*_t, int64x*_t
6766
6767 /// Unsigned rounding shift right and accumulate
6768 name = vrsra
6769 n-suffix
6770 constn = N
6771 multi_fn = static_assert-N-1-bits
6772 multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
6773 a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
6774 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6775 n = 2
6776 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
6777
6778 aarch64 = ursra
6779 arm = vrsra
6780 generate uint*_t, uint64x*_t
6781
6782 /// Signed rounding shift right and accumulate.
6783 name = vrsra
6784 n-suffix
6785 constn = N
6786 multi_fn = static_assert-N-1-bits
6787 multi_fn = vrshr-nself-::<N>, b:in_t, b
6788 multi_fn = a.wrapping_add(b)
6789 a = 1
6790 b = 4
6791 n = 2
6792 validate 2
6793
6794 aarch64 = srshr
6795 generate i64
6796
6797 /// Unsigned rounding shift right and accumulate.
6798 name = vrsra
6799 n-suffix
6800 constn = N
6801 multi_fn = static_assert-N-1-bits
6802 multi_fn = vrshr-nself-::<N>, b:in_t, b
6803 multi_fn = a.wrapping_add(b)
6804 a = 1
6805 b = 4
6806 n = 2
6807 validate 2
6808
6809 aarch64 = urshr
6810 generate u64
6811
6812 /// Rounding subtract returning high narrow
6813 name = vrsubhn
6814 no-q
6815 a = MAX, MIN, 0, 4, 5, 6, 7, 8
6816 b = 1, 2, 3, 4, 5, 6, 7, 8
6817 validate MIN, MIN, 0, 0, 0, 0, 0, 0
6818
6819 aarch64 = rsubhn
6820 link-aarch64 = rsubhn._EXT2_
6821 arm = vrsubhn
6822 link-arm = vrsubhn._EXT2_
6823 generate int16x8_t:int16x8_t:int8x8_t, int32x4_t:int32x4_t:int16x4_t, int64x2_t:int64x2_t:int32x2_t
6824
6825 /// Rounding subtract returning high narrow
6826 name = vrsubhn
6827 no-q
6828 multi_fn = transmute, {vrsubhn-noqsigned-noext, {transmute, a}, {transmute, b}}
6829 a = MAX, MIN, 3, 4, 5, 6, 7, 8
6830 b = 1, 2, 3, 4, 5, 6, 7, 8
6831 validate 0, 0, 0, 0, 0, 0, 0, 0
6832
6833 aarch64 = rsubhn
6834 arm = vrsubhn
6835 generate uint16x8_t:uint16x8_t:uint8x8_t, uint32x4_t:uint32x4_t:uint16x4_t, uint64x2_t:uint64x2_t:uint32x2_t
6836
6837 /// Rounding subtract returning high narrow
6838 name = vrsubhn_high
6839 no-q
6840 multi_fn = vrsubhn-noqself-noext, x:in_t0, b, c
6841 multi_fn = simd_shuffle!, a, x, {asc-0-out_len}
6842 a = 1, 2, 0, 0, 0, 0, 0, 0
6843 b = 1, 2, 3, 4, 5, 6, 7, 8
6844 c = 1, 2, 3, 4, 5, 6, 7, 8
6845 validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6846
6847 aarch64 = rsubhn2
6848 generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
6849 generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
6850
6851 /// Insert vector element from another vector element
6852 name = vset_lane
6853 constn = LANE
6854 multi_fn = static_assert_imm-in_exp_len-LANE
6855 multi_fn = simd_insert, b, LANE as u32, a
6856 a = 1
6857 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6858 n = 0
6859 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6860
6861 aarch64 = nop
6862 arm = nop
6863 generate i8:int8x8_t:int8x8_t, i16:int16x4_t:int16x4_t
6864 generate i32:int32x2_t:int32x2_t, i64:int64x1_t:int64x1_t
6865 generate u8:uint8x8_t:uint8x8_t, u16:uint16x4_t:uint16x4_t
6866 generate u32:uint32x2_t:uint32x2_t, u64:uint64x1_t:uint64x1_t
6867 generate p8:poly8x8_t:poly8x8_t, p16:poly16x4_t:poly16x4_t
6868
6869 target = aes
6870 generate p64:poly64x1_t:poly64x1_t
6871
6872 /// Insert vector element from another vector element
6873 name = vsetq_lane
6874 no-q
6875 constn = LANE
6876 multi_fn = static_assert_imm-in_exp_len-LANE
6877 multi_fn = simd_insert, b, LANE as u32, a
6878 a = 1
6879 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6880 n = 0
6881 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6882
6883 aarch64 = nop
6884 arm = nop
6885 generate i8:int8x16_t:int8x16_t, i16:int16x8_t:int16x8_t
6886 generate i32:int32x4_t:int32x4_t, i64:int64x2_t:int64x2_t
6887 generate u8:uint8x16_t:uint8x16_t, u16:uint16x8_t:uint16x8_t
6888 generate u32:uint32x4_t:uint32x4_t, u64:uint64x2_t:uint64x2_t
6889 generate p8:poly8x16_t:poly8x16_t, p16:poly16x8_t:poly16x8_t
6890
6891 target = aes
6892 generate p64:poly64x2_t:poly64x2_t
6893
6894 /// Insert vector element from another vector element
6895 name = vset_lane
6896 constn = LANE
6897 multi_fn = static_assert_imm-in_exp_len-LANE
6898 multi_fn = simd_insert, b, LANE as u32, a
6899 a = 1.
6900 b = 0., 2., 3., 4.
6901 n = 0
6902 validate 1., 2., 3., 4.
6903
6904 aarch64 = nop
6905 generate f64:float64x1_t:float64x1_t
6906
6907 arm = nop
6908 generate f32:float32x2_t:float32x2_t
6909
6910 /// Insert vector element from another vector element
6911 name = vsetq_lane
6912 no-q
6913 constn = LANE
6914 multi_fn = static_assert_imm-in_exp_len-LANE
6915 multi_fn = simd_insert, b, LANE as u32, a
6916 a = 1.
6917 b = 0., 2., 3., 4.
6918 n = 0
6919 validate 1., 2., 3., 4.
6920
6921 aarch64 = nop
6922 generate f64:float64x2_t:float64x2_t
6923
6924 arm = nop
6925 generate f32:float32x4_t:float32x4_t
6926
6927 /// Signed Shift left
6928 name = vshl
6929 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6930 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6931 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6932
6933 aarch64 = sshl
6934 link-aarch64 = sshl._EXT_
6935 arm = vshl
6936 link-arm = vshifts._EXT_
6937 generate int*_t, int64x*_t
6938
6939 /// Signed Shift left
6940 name = vshl
6941 multi_fn = transmute, {vshl-in_ntt-noext, transmute(a), transmute(b)}
6942 a = 1
6943 b = 2
6944 validate 4
6945
6946 aarch64 = sshl
6947 generate i64
6948
6949 /// Unsigned Shift left
6950 name = vshl
6951 out-suffix
6952 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6953 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
6954 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6955
6956 aarch64 = ushl
6957 link-aarch64 = ushl._EXT_
6958 arm = vshl
6959 link-arm = vshiftu._EXT_
6960 generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
6961 generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
6962
6963 /// Unsigned Shift left
6964 out-suffix
6965 name = vshl
6966 multi_fn = transmute, {vshl-out_ntt-noext, transmute(a), transmute(b)}
6967 a = 1
6968 b = 2
6969 validate 4
6970
6971 aarch64 = ushl
6972 generate u64:i64:u64
6973
6974 /// Shift left
6975 name = vshl
6976 n-suffix
6977 constn = N
6978 multi_fn = static_assert_imm-out_bits_exp_len-N
6979 multi_fn = simd_shl, a, {vdup-nself-noext, N as _}
6980 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
6981 n = 2
6982 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
6983
6984 arm = vshl
6985 aarch64 = shl
6986 generate int*_t, uint*_t, int64x*_t, uint64x*_t
6987
6988 /// Signed shift left long
6989 name = vshll
6990 n-suffix
6991 constn = N
6992 multi_fn = static_assert-N-0-bits
6993 multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N as _}
6994 a = 1, 2, 3, 4, 5, 6, 7, 8
6995 n = 2
6996 validate 4, 8, 12, 16, 20, 24, 28, 32
6997
6998 arm = vshll.s
6999 aarch64 = sshll
7000 generate int8x8_t:int16x8_t, int16x4_t:int32x4_t, int32x2_t:int64x2_t
7001 aarch64 = ushll
7002 generate uint8x8_t:uint16x8_t, uint16x4_t:uint32x4_t, uint32x2_t:uint64x2_t
7003
7004 /// Signed shift left long
7005 name = vshll_high_n
7006 no-q
7007 constn = N
7008 multi_fn = static_assert-N-0-bits
7009 multi_fn = simd_shuffle!, b:half, a, a, {asc-halflen-halflen}
7010 multi_fn = vshll_n-noqself-::<N>, b
7011 a = 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8
7012 n = 2
7013 validate 4, 8, 12, 16, 20, 24, 28, 32
7014
7015 aarch64 = sshll2
7016 generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t
7017 aarch64 = ushll2
7018 generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t
7019
7020 /// Shift right
7021 name = vshr
7022 n-suffix
7023 constn = N
7024 multi_fn = static_assert-N-1-bits
7025 multi_fn = fix_right_shift_imm-N-bits
7026 multi_fn = simd_shr, a, {vdup-nself-noext, n as _}
7027 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7028 n = 2
7029 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7030
7031 arm = vshr.s
7032 aarch64 = sshr
7033 generate int*_t, int64x*_t
7034 aarch64 = ushr
7035 generate uint*_t, uint64x*_t
7036
7037 /// Shift right narrow
7038 name = vshrn_n
7039 no-q
7040 constn = N
7041 multi_fn = static_assert-N-1-halfbits
7042 multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N as _}}
7043 a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7044 n = 2
7045 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7046
7047 arm = vshrn.
7048 aarch64 = shrn
7049 generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
7050 generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
7051
7052 /// Shift right narrow
7053 name = vshrn_high_n
7054 no-q
7055 constn = N
7056 multi_fn = static_assert-N-1-halfbits
7057 multi_fn = simd_shuffle!, a, {vshrn_n-noqself-::<N>, b}, {asc-0-out_len}
7058 a = 1, 2, 5, 6, 5, 6, 7, 8
7059 b = 20, 24, 28, 32, 52, 56, 60, 64
7060 n = 2
7061 validate 1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16
7062
7063 aarch64 = shrn2
7064 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
7065 generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
7066
7067 /// Signed shift right and accumulate
7068 name = vsra
7069 n-suffix
7070 constn = N
7071 multi_fn = static_assert-N-1-bits
7072 multi_fn = simd_add, a, {vshr-nself-::<N>, b}
7073 a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7074 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7075 n = 2
7076 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
7077
7078 aarch64 = ssra
7079 arm = vsra
7080 generate int*_t, int64x*_t
7081
7082 /// Unsigned shift right and accumulate
7083 name = vsra
7084 n-suffix
7085 constn = N
7086 multi_fn = static_assert-N-1-bits
7087 multi_fn = simd_add, a, {vshr-nself-::<N>, b}
7088 a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7089 b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
7090 n = 2
7091 validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
7092
7093 aarch64 = usra
7094 arm = vsra
7095 generate uint*_t, uint64x*_t
7096
7097 /// SM3PARTW1
7098 name = vsm3partw1
7099 a = 1, 2, 3, 4
7100 b = 1, 2, 3, 4
7101 c = 1, 2, 3, 4
7102 validate 2147549312, 3221323968, 131329, 2684362752
7103 target = sm4
7104
7105 aarch64 = sm3partw1
7106 link-aarch64 = llvm.aarch64.crypto.sm3partw1
7107 generate uint32x4_t
7108
7109 /// SM3PARTW2
7110 name = vsm3partw2
7111 a = 1, 2, 3, 4
7112 b = 1, 2, 3, 4
7113 c = 1, 2, 3, 4
7114 validate 128, 256, 384, 1077977696
7115 target = sm4
7116
7117 aarch64 = sm3partw2
7118 link-aarch64 = llvm.aarch64.crypto.sm3partw2
7119 generate uint32x4_t
7120
7121 /// SM3SS1
7122 name = vsm3ss1
7123 a = 1, 2, 3, 4
7124 b = 1, 2, 3, 4
7125 c = 1, 2, 3, 4
7126 validate 0, 0, 0, 2098176
7127 target = sm4
7128
7129 aarch64 = sm3ss1
7130 link-aarch64 = llvm.aarch64.crypto.sm3ss1
7131 generate uint32x4_t
7132
7133 /// SM4 key
7134 name = vsm4ekey
7135 a = 1, 2, 3, 4
7136 b = 1, 2, 3, 4
7137 validate 1784948604, 136020997, 2940231695, 3789947679
7138 target = sm4
7139
7140 aarch64 = sm4ekey
7141 link-aarch64 = llvm.aarch64.crypto.sm4ekey
7142 generate uint32x4_t
7143
7144 /// SM4 encode
7145 name = vsm4e
7146 a = 1, 2, 3, 4
7147 b = 1, 2, 3, 4
7148 validate 1093874472, 3616769504, 3878330411, 2765298765
7149 target = sm4
7150
7151 aarch64 = sm4e
7152 link-aarch64 = llvm.aarch64.crypto.sm4e
7153 generate uint32x4_t
7154
7155 /// Rotate and exclusive OR
7156 name = vrax1
7157 a = 1, 2
7158 b = 3, 4
7159 validate 7, 10
7160 target = sha3
7161
7162 aarch64 = rax1
7163 link-aarch64 = llvm.aarch64.crypto.rax1
7164 generate uint64x2_t
7165
7166 /// SHA512 hash update part 1
7167 name = vsha512h
7168 a = 1, 2
7169 b = 3, 4
7170 c = 5, 6
7171 validate 11189044327219203, 7177611956453380
7172 target = sha3
7173
7174 aarch64 = sha512h
7175 link-aarch64 = llvm.aarch64.crypto.sha512h
7176 generate uint64x2_t
7177
7178 /// SHA512 hash update part 2
7179 name = vsha512h2
7180 a = 1, 2
7181 b = 3, 4
7182 c = 5, 6
7183 validate 5770237651009406214, 349133864969
7184 target = sha3
7185
7186 aarch64 = sha512h2
7187 link-aarch64 = llvm.aarch64.crypto.sha512h2
7188 generate uint64x2_t
7189
7190 /// SHA512 schedule update 0
7191 name = vsha512su0
7192 a = 1, 2
7193 b = 3, 4
7194 validate 144115188075855874, 9439544818968559619
7195 target = sha3
7196
7197 aarch64 = sha512su0
7198 link-aarch64 = llvm.aarch64.crypto.sha512su0
7199 generate uint64x2_t
7200
7201 /// SHA512 schedule update 1
7202 name = vsha512su1
7203 a = 1, 2
7204 b = 3, 4
7205 c = 5, 6
7206 validate 105553116266526, 140737488355368
7207 target = sha3
7208
7209 aarch64 = sha512su1
7210 link-aarch64 = llvm.aarch64.crypto.sha512su1
7211 generate uint64x2_t
7212
7213 /// Floating-point round to 32-bit integer, using current rounding mode
7214 name = vrnd32x
7215 target = frintts
7216
7217 // For validation, the rounding mode should be the default: round-to-nearest (ties-to-even).
7218 a = -1.5, 2.9, 1.5, -2.5
7219 validate -2.0, 3.0, 2.0, -2.0
7220
7221 aarch64 = frint32x
7222 link-aarch64 = frint32x._EXT_
7223 generate float32x2_t, float32x4_t
7224
7225 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7226 // (and so has no intrinsic-test), so perform extra validation to make sure
7227 // that it matches the float64x2_t form.
7228
7229 a = 1.5, -2.5
7230 validate 2.0, -2.0
7231 // - The biggest f64 that rounds to i32::MAX.
7232 // - The smallest positive f64 that rounds out of range.
7233 a = 2147483647.499999762, 2147483647.5
7234 validate 2147483647.0, -2147483648.0
7235 // - The smallest f64 that rounds to i32::MIN + 1.
7236 // - The largest negative f64 that rounds out of range.
7237 a = -2147483647.499999762, -2147483648.500000477
7238 validate -2147483647.0, -2147483648.0
7239 generate float64x2_t
7240
7241 // Odd-numbered tests for float64x1_t coverage.
7242 a = 2.9
7243 validate 3.0
7244 a = -2.5
7245 validate -2.0
7246 a = 2147483647.5
7247 validate -2147483648.0
7248 a = -2147483648.500000477
7249 validate -2147483648.0
7250
7251 multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
7252 link-aarch64 = llvm.aarch64.frint32x.f64:f64:::f64
7253 generate float64x1_t
7254
7255 /// Floating-point round to 32-bit integer toward zero
7256 name = vrnd32z
7257 target = frintts
7258
7259 a = -1.5, 2.9, 1.5, -2.5
7260 validate -1.0, 2.0, 1.0, -2.0
7261
7262 aarch64 = frint32z
7263 link-aarch64 = frint32z._EXT_
7264 generate float32x2_t, float32x4_t
7265
7266 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7267 // (and so has no intrinsic-test), so perform extra validation to make sure
7268 // that it matches the float64x2_t form.
7269
7270 a = 1.5, -2.5
7271 validate 1.0, -2.0
7272 // - The biggest f64 that rounds to i32::MAX.
7273 // - The smallest positive f64 that rounds out of range.
7274 a = 2147483647.999999762, 2147483648.0
7275 validate 2147483647.0, -2147483648.0
7276 // - The smallest f64 that rounds to i32::MIN + 1.
7277 // - The largest negative f64 that rounds out of range.
7278 a = -2147483647.999999762, -2147483649.0
7279 validate -2147483647.0, -2147483648.0
7280 generate float64x2_t
7281
7282 // Odd-numbered tests for float64x1_t coverage.
7283 a = 2.9
7284 validate 2.0
7285 a = -2.5
7286 validate -2.0
7287 a = 2147483648.0
7288 validate -2147483648.0
7289 a = -2147483649.0
7290 validate -2147483648.0
7291
7292 multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
7293 link-aarch64 = llvm.aarch64.frint32z.f64:f64:::f64
7294 generate float64x1_t
7295
7296 /// Floating-point round to 64-bit integer, using current rounding mode
7297 name = vrnd64x
7298 target = frintts
7299
7300 // For validation, the rounding mode should be the default: round-to-nearest (ties-to-even).
7301 a = -1.5, 2.9, 1.5, -2.5
7302 validate -2.0, 3.0, 2.0, -2.0
7303
7304 aarch64 = frint64x
7305 link-aarch64 = frint64x._EXT_
7306 generate float32x2_t, float32x4_t
7307
7308 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7309 // (and so has no intrinsic-test), so perform extra validation to make sure
7310 // that it matches the float64x2_t form.
7311
7312 a = 1.5, -2.5
7313 validate 2.0, -2.0
7314 // - The biggest f64 representable as an i64 (0x7ffffffffffffc00).
7315 // - The smallest positive f64 that is out of range (2^63).
7316 a = 9223372036854774784.0, 9223372036854775808.0
7317 validate 9223372036854774784.0, -9223372036854775808.0
7318 // - The smallest f64 representable as an i64 (i64::MIN).
7319 // - The biggest negative f64 that is out of range.
7320 a = -9223372036854775808.0, -9223372036854777856.0
7321 validate -9223372036854775808.0, -9223372036854775808.0
7322 generate float64x2_t
7323
7324 // Odd-numbered tests for float64x1_t coverage.
7325 a = 2.9
7326 validate 3.0
7327 a = -2.5
7328 validate -2.0
7329 a = 9223372036854775808.0
7330 validate -9223372036854775808.0
7331 a = -9223372036854777856.0
7332 validate -9223372036854775808.0
7333
7334 multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
7335 link-aarch64 = llvm.aarch64.frint64x.f64:f64:::f64
7336 generate float64x1_t
7337
7338 /// Floating-point round to 64-bit integer toward zero
7339 name = vrnd64z
7340 target = frintts
7341
7342 a = -1.5, 2.9, 1.5, -2.5
7343 validate -1.0, 2.0, 1.0, -2.0
7344
7345 aarch64 = frint64z
7346 link-aarch64 = frint64z._EXT_
7347 generate float32x2_t, float32x4_t
7348
7349 // The float64x1_t form uses a different LLVM link and isn't supported by Clang
7350 // (and so has no intrinsic-test), so perform extra validation to make sure
7351 // that it matches the float64x2_t form.
7352
7353 a = 1.5, -2.5
7354 validate 1.0, -2.0
7355 // - The biggest f64 representable as an i64 (0x7ffffffffffffc00).
7356 // - The smallest positive f64 that is out of range (2^63).
7357 a = 9223372036854774784.0, 9223372036854775808.0
7358 validate 9223372036854774784.0, -9223372036854775808.0
7359 // - The smallest f64 representable as an i64 (i64::MIN).
7360 // - The biggest negative f64 that is out of range.
7361 a = -9223372036854775808.0, -9223372036854777856.0
7362 validate -9223372036854775808.0, -9223372036854775808.0
7363 generate float64x2_t
7364
7365 // Odd-numbered tests for float64x1_t coverage.
7366 a = 2.9
7367 validate 2.0
7368 a = -2.5
7369 validate -2.0
7370 a = 9223372036854775808.0
7371 validate -9223372036854775808.0
7372 a = -9223372036854777856.0
7373 validate -9223372036854775808.0
7374
7375 multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
7376 link-aarch64 = llvm.aarch64.frint64z.f64:f64:::f64
7377 generate float64x1_t
7378
7379 /// Transpose elements
7380 name = vtrn
7381 multi_fn = simd_shuffle!, a1:in_t, a, b, {transpose-1-in_len}
7382 multi_fn = simd_shuffle!, b1:in_t, a, b, {transpose-2-in_len}
7383 multi_fn = transmute, (a1, b1)
7384 a = 0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30
7385 b = 1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31
7386 validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7387
7388 aarch64 = trn
7389 arm = vtrn
7390 generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7391 generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7392 generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7393 aarch64 = zip
7394 generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7395
7396 /// Transpose elements
7397 name = vtrn
7398 multi_fn = simd_shuffle!, a1:in_t, a, b, {transpose-1-in_len}
7399 multi_fn = simd_shuffle!, b1:in_t, a, b, {transpose-2-in_len}
7400 multi_fn = transmute, (a1, b1)
7401 a = 0., 2., 2., 6.
7402 b = 1., 3., 3., 7.
7403 validate 0., 1., 2., 3., 2., 3., 6., 7.
7404
7405 aarch64 = zip
7406 arm = vtrn
7407 generate float32x2_t:float32x2_t:float32x2x2_t
7408 aarch64 = trn
7409 generate float32x4_t:float32x4_t:float32x4x2_t
7410
7411 /// Transpose vectors
7412 name = vtrn1
7413 multi_fn = simd_shuffle!, a, b, {transpose-1-in_len}
7414 a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7415 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7416 validate 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
7417
7418 aarch64 = trn1
7419 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7420
7421 aarch64 = zip1
7422 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7423
7424 /// Transpose vectors
7425 name = vtrn1
7426 multi_fn = simd_shuffle!, a, b, {transpose-1-in_len}
7427 a = 0., 2., 4., 6., 8., 10., 12., 14.
7428 b = 1., 3., 5., 7., 9., 11., 13., 15.
7429 validate 0., 1., 4., 5., 8., 9., 12., 13.
7430
7431 aarch64 = trn1
7432 generate float32x4_t
7433
7434 aarch64 = zip1
7435 generate float32x2_t, float64x2_t
7436
7437 /// Transpose vectors
7438 name = vtrn2
7439 multi_fn = simd_shuffle!, a, b, {transpose-2-in_len}
7440 a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7441 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7442 validate 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
7443
7444 aarch64 = trn2
7445 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7446
7447 aarch64 = zip2
7448 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7449
7450 /// Transpose vectors
7451 name = vtrn2
7452 multi_fn = simd_shuffle!, a, b, {transpose-2-in_len}
7453 a = 0., 2., 4., 6., 8., 10., 12., 14.
7454 b = 1., 3., 5., 7., 9., 11., 13., 15.
7455 validate 2., 3., 6., 7., 10., 11., 14., 15.
7456
7457 aarch64 = trn2
7458 generate float32x4_t
7459
7460 aarch64 = zip2
7461 generate float32x2_t, float64x2_t
7462
7463 /// Zip vectors
7464 name = vzip
7465 multi_fn = simd_shuffle!, a0:in_t, a, b, {zip-1-in_len}
7466 multi_fn = simd_shuffle!, b0:in_t, a, b, {zip-2-in_len}
7467 multi_fn = transmute, (a0, b0)
7468 a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7469 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7470 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7471
7472 aarch64 = zip
7473 arm = vzip
7474 generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t
7475 generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t
7476 generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t
7477 arm = vtrn
7478 generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7479 aarch64 = zip
7480 arm = vorr
7481 generate int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7482 generate uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7483 generate poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7484
7485 /// Zip vectors
7486 name = vzip
7487 multi_fn = simd_shuffle!, a0:in_t, a, b, {zip-1-in_len}
7488 multi_fn = simd_shuffle!, b0:in_t, a, b, {zip-2-in_len}
7489 multi_fn = transmute, (a0, b0)
7490 a = 1., 2., 3., 4.
7491 b = 5., 6., 7., 8.
7492 validate 1., 5., 2., 6., 3., 7., 4., 8.
7493
7494 aarch64 = zip
7495 arm = vtrn
7496 generate float32x2_t:float32x2_t:float32x2x2_t
7497 aarch64 = zip
7498 arm = vorr
7499 generate float32x4_t:float32x4_t:float32x4x2_t
7500
7501 /// Zip vectors
7502 name = vzip1
7503 multi_fn = simd_shuffle!, a, b, {zip-1-in_len}
7504 a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
7505 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
7506 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
7507
7508 aarch64 = zip1
7509 generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
7510
7511 /// Zip vectors
7512 name = vzip1
7513 multi_fn = simd_shuffle!, a, b, {zip-1-in_len}
7514 a = 0., 2., 4., 6., 8., 10., 12., 14.
7515 b = 1., 3., 5., 7., 9., 11., 13., 15.
7516 validate 0., 1., 2., 3., 4., 5., 6., 7.
7517
7518 aarch64 = zip1
7519 generate float32x2_t, float32x4_t, float64x2_t
7520
7521 /// Zip vectors
7522 name = vzip2
7523 multi_fn = simd_shuffle!, a, b, {zip-2-in_len}
7524 a = 0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30
7525 b = 1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31
7526 validate 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
7527
7528 aarch64 = zip2
7529 generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t, poly64x2_t
7530
7531 /// Zip vectors
7532 name = vzip2
7533 multi_fn = simd_shuffle!, a, b, {zip-2-in_len}
7534 a = 0., 8., 8., 10., 8., 10., 12., 14.
7535 b = 1., 9., 9., 11., 9., 11., 13., 15.
7536 validate 8., 9., 10., 11., 12., 13., 14., 15.
7537
7538 aarch64 = zip2
7539 generate float32x2_t, float32x4_t, float64x2_t
7540
7541 /// Unzip vectors
7542 name = vuzp
7543 multi_fn = simd_shuffle!, a0:in_t, a, b, {unzip-1-in_len}
7544 multi_fn = simd_shuffle!, b0:in_t, a, b, {unzip-2-in_len}
7545 multi_fn = transmute, (a0, b0)
7546 a = 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16
7547 b = 2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32
7548 validate 1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32
7549
7550 aarch64 = uzp
7551 arm = vuzp
7552 generate int8x8_t:int8x8_t:int8x8x2_t, int16x4_t:int16x4_t:int16x4x2_t, int8x16_t:int8x16_t:int8x16x2_t, int16x8_t:int16x8_t:int16x8x2_t, int32x4_t:int32x4_t:int32x4x2_t
7553 generate uint8x8_t:uint8x8_t:uint8x8x2_t, uint16x4_t:uint16x4_t:uint16x4x2_t, uint8x16_t:uint8x16_t:uint8x16x2_t, uint16x8_t:uint16x8_t:uint16x8x2_t, uint32x4_t:uint32x4_t:uint32x4x2_t
7554 generate poly8x8_t:poly8x8_t:poly8x8x2_t, poly16x4_t:poly16x4_t:poly16x4x2_t, poly8x16_t:poly8x16_t:poly8x16x2_t, poly16x8_t:poly16x8_t:poly16x8x2_t
7555 aarch64 = zip
7556 arm = vtrn
7557 generate int32x2_t:int32x2_t:int32x2x2_t, uint32x2_t:uint32x2_t:uint32x2x2_t
7558
7559 /// Unzip vectors
7560 name = vuzp
7561 multi_fn = simd_shuffle!, a0:in_t, a, b, {unzip-1-in_len}
7562 multi_fn = simd_shuffle!, b0:in_t, a, b, {unzip-2-in_len}
7563 multi_fn = transmute, (a0, b0)
7564 a = 1., 2., 2., 4.
7565 b = 2., 6., 6., 8.
7566 validate 1., 2., 2., 6., 2., 4., 6., 8.
7567
7568 aarch64 = zip
7569 arm = vtrn
7570 generate float32x2_t:float32x2_t:float32x2x2_t
7571 aarch64 = uzp
7572 arm = vuzp
7573 generate float32x4_t:float32x4_t:float32x4x2_t
7574
7575 /// Unzip vectors
7576 name = vuzp1
7577 multi_fn = simd_shuffle!, a, b, {unzip-1-in_len}
7578 a = 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0
7579 b = 2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0
7580 validate 1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16
7581
7582 aarch64 = uzp1
7583 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7584
7585 aarch64 = zip1
7586 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7587
7588 /// Unzip vectors
7589 name = vuzp1
7590 multi_fn = simd_shuffle!, a, b, {unzip-1-in_len}
7591 a = 0., 8., 1., 9., 4., 12., 5., 13.
7592 b = 1., 10., 3., 11., 6., 14., 7., 15.
7593 validate 0., 1., 1., 3., 4., 5., 6., 7.
7594
7595 aarch64 = uzp1
7596 generate float32x4_t
7597
7598 aarch64 = zip1
7599 generate float32x2_t, float64x2_t
7600
7601 /// Unzip vectors
7602 name = vuzp2
7603 multi_fn = simd_shuffle!, a, b, {unzip-2-in_len}
7604 a = 0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24
7605 b = 0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32
7606 validate 17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32
7607
7608 aarch64 = uzp2
7609 generate int8x8_t, int8x16_t, int16x4_t, int16x8_t, int32x4_t, uint8x8_t, uint8x16_t, uint16x4_t, uint16x8_t, uint32x4_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t
7610
7611 aarch64 = zip2
7612 generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t
7613
7614 /// Unzip vectors
7615 name = vuzp2
7616 multi_fn = simd_shuffle!, a, b, {unzip-2-in_len}
7617 a = 0., 8., 1., 9., 4., 12., 5., 13.
7618 b = 2., 9., 3., 11., 6., 14., 7., 15.
7619 validate 8., 9., 9., 11., 12., 13., 14., 15.
7620
7621 aarch64 = uzp2
7622 generate float32x4_t
7623
7624 aarch64 = zip2
7625 generate float32x2_t, float64x2_t
7626
7627 ////////////////////
7628 // Unsigned Absolute difference and Accumulate Long
7629 ////////////////////
7630
7631 /// Unsigned Absolute difference and Accumulate Long
7632 name = vabal
7633 multi_fn = vabd-unsigned-noext, b, c, d:in_t
7634 multi_fn = simd_add, a, {simd_cast, d}
7635 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7636 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7637 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7638 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7639
7640 arm = vabal.s
7641 aarch64 = uabal
7642 generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
7643
7644 /// Unsigned Absolute difference and Accumulate Long
7645 name = vabal_high
7646 no-q
7647 multi_fn = simd_shuffle!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
7648 multi_fn = simd_shuffle!, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
7649 multi_fn = vabd_u8, d, e, f:uint8x8_t
7650 multi_fn = simd_add, a, {simd_cast, f}
7651 a = 9, 10, 11, 12, 13, 14, 15, 16
7652 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7653 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7654 validate 20, 20, 20, 20, 20, 20, 20, 20
7655
7656 aarch64 = uabal
7657 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t
7658
7659 /// Unsigned Absolute difference and Accumulate Long
7660 name = vabal_high
7661 no-q
7662 multi_fn = simd_shuffle!, d:uint16x4_t, b, b, [4, 5, 6, 7]
7663 multi_fn = simd_shuffle!, e:uint16x4_t, c, c, [4, 5, 6, 7]
7664 multi_fn = vabd_u16, d, e, f:uint16x4_t
7665 multi_fn = simd_add, a, {simd_cast, f}
7666 a = 9, 10, 11, 12
7667 b = 1, 2, 3, 4, 9, 10, 11, 12
7668 c = 10, 10, 10, 10, 20, 0, 2, 4
7669 validate 20, 20, 20, 20
7670
7671 aarch64 = uabal
7672 generate uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
7673
7674 /// Unsigned Absolute difference and Accumulate Long
7675 name = vabal_high
7676 no-q
7677 multi_fn = simd_shuffle!, d:uint32x2_t, b, b, [2, 3]
7678 multi_fn = simd_shuffle!, e:uint32x2_t, c, c, [2, 3]
7679 multi_fn = vabd_u32, d, e, f:uint32x2_t
7680 multi_fn = simd_add, a, {simd_cast, f}
7681 a = 15, 16
7682 b = 1, 2, 15, 16
7683 c = 10, 10, 10, 12
7684 validate 20, 20
7685
7686 aarch64 = uabal
7687 generate uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
7688
7689 ////////////////////
7690 // Signed Absolute difference and Accumulate Long
7691 ////////////////////
7692
7693 /// Signed Absolute difference and Accumulate Long
7694 name = vabal
7695 multi_fn = vabd-signed-noext, b, c, d:int8x8_t
7696 multi_fn = simd_cast, e:uint8x8_t, d
7697 multi_fn = simd_add, a, {simd_cast, e}
7698 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7699 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7700 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7701 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7702
7703 arm = vabal.s
7704 aarch64 = sabal
7705 generate int16x8_t:int8x8_t:int8x8_t:int16x8_t
7706
7707 /// Signed Absolute difference and Accumulate Long
7708 name = vabal
7709 multi_fn = vabd-signed-noext, b, c, d:int16x4_t
7710 multi_fn = simd_cast, e:uint16x4_t, d
7711 multi_fn = simd_add, a, {simd_cast, e}
7712 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7713 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7714 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7715 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7716
7717 arm = vabal.s
7718 aarch64 = sabal
7719 generate int32x4_t:int16x4_t:int16x4_t:int32x4_t
7720
7721 /// Signed Absolute difference and Accumulate Long
7722 name = vabal
7723 multi_fn = vabd-signed-noext, b, c, d:int32x2_t
7724 multi_fn = simd_cast, e:uint32x2_t, d
7725 multi_fn = simd_add, a, {simd_cast, e}
7726 a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7727 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7728 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7729 validate 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20
7730
7731 arm = vabal.s
7732 aarch64 = sabal
7733 generate int64x2_t:int32x2_t:int32x2_t:int64x2_t
7734
7735 /// Signed Absolute difference and Accumulate Long
7736 name = vabal_high
7737 no-q
7738 multi_fn = simd_shuffle!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15]
7739 multi_fn = simd_shuffle!, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15]
7740 multi_fn = vabd_s8, d, e, f:int8x8_t
7741 multi_fn = simd_cast, f:uint8x8_t, f
7742 multi_fn = simd_add, a, {simd_cast, f}
7743 a = 9, 10, 11, 12, 13, 14, 15, 16
7744 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
7745 c = 10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12
7746 validate 20, 20, 20, 20, 20, 20, 20, 20
7747
7748 aarch64 = sabal
7749 generate int16x8_t:int8x16_t:int8x16_t:int16x8_t
7750
7751 /// Signed Absolute difference and Accumulate Long
7752 name = vabal_high
7753 no-q
7754 multi_fn = simd_shuffle!, d:int16x4_t, b, b, [4, 5, 6, 7]
7755 multi_fn = simd_shuffle!, e:int16x4_t, c, c, [4, 5, 6, 7]
7756 multi_fn = vabd_s16, d, e, f:int16x4_t
7757 multi_fn = simd_cast, f:uint16x4_t, f
7758 multi_fn = simd_add, a, {simd_cast, f}
7759 a = 9, 10, 11, 12
7760 b = 1, 2, 3, 4, 9, 10, 11, 12
7761 c = 10, 10, 10, 10, 20, 0, 2, 4
7762 validate 20, 20, 20, 20
7763
7764 aarch64 = sabal
7765 generate int32x4_t:int16x8_t:int16x8_t:int32x4_t
7766
7767 /// Signed Absolute difference and Accumulate Long
7768 name = vabal_high
7769 no-q
7770 multi_fn = simd_shuffle!, d:int32x2_t, b, b, [2, 3]
7771 multi_fn = simd_shuffle!, e:int32x2_t, c, c, [2, 3]
7772 multi_fn = vabd_s32, d, e, f:int32x2_t
7773 multi_fn = simd_cast, f:uint32x2_t, f
7774 multi_fn = simd_add, a, {simd_cast, f}
7775 a = 15, 16
7776 b = 1, 2, 15, 16
7777 c = 10, 10, 10, 12
7778 validate 20, 20
7779
7780 aarch64 = sabal
7781 generate int64x2_t:int32x4_t:int32x4_t:int64x2_t
7782
7783 ////////////////////
7784 // Signed saturating Absolute value
7785 ////////////////////
7786
7787 /// Signed saturating Absolute value
7788 name = vqabs
7789 a = MIN, MAX, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5
7790 validate MAX, MAX, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5
7791
7792 arm = vqabs.s
7793 aarch64 = sqabs
7794 link-arm = vqabs._EXT_
7795 link-aarch64 = sqabs._EXT_
7796 generate int*_t
7797
7798 /// Signed saturating Absolute value
7799 name = vqabs
7800 a = MIN, -7
7801 validate MAX, 7
7802
7803 aarch64 = sqabs
7804 link-aarch64 = sqabs._EXT_
7805 generate int64x*_t
7806
7807 /// Signed saturating absolute value
7808 name = vqabs
7809 multi_fn = simd_extract, {vqabs-in_ntt-noext, {vdup_n-in_ntt-noext, a}}, 0
7810 a = -7
7811 validate 7
7812
7813 aarch64 = sqabs
7814 generate i8:i8, i16:i16
7815
7816 /// Signed saturating absolute value
7817 name = vqabs
7818 a = -7
7819 validate 7
7820
7821 aarch64 = sqabs
7822 link-aarch64 = sqabs._EXT_
7823 generate i32:i32, i64:i64
7824
7825 /// Shift left and insert
7826 name = vsli
7827 n-suffix
7828 constn = N
7829 multi_fn = static_assert-N-0-63
7830 multi_fn = transmute, {vsli_n-in_ntt-::<N>, transmute(a), transmute(b)}
7831 a = 333
7832 b = 2042
7833 n = 2
7834 validate 8169
7835
7836 aarch64 = sli
7837 generate i64, u64
7838
7839 /// Shift right and insert
7840 name = vsri
7841 n-suffix
7842 constn = N
7843 multi_fn = static_assert-N-1-bits
7844 multi_fn = transmute, {vsri_n-in_ntt-::<N>, transmute(a), transmute(b)}
7845 a = 333
7846 b = 2042
7847 n = 2
7848 validate 510
7849
7850 aarch64 = sri
7851 generate i64, u64