]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/tcg-op-vec.c
tcg: Add support for vector compare select
[mirror_qemu.git] / tcg / tcg-op-vec.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2018 Linaro, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 #include "cpu.h"
23 #include "tcg.h"
24 #include "tcg-op.h"
25 #include "tcg-mo.h"
26
27 /* Reduce the number of ifdefs below. This assumes that all uses of
28 TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
29 the compiler can eliminate. */
30 #if TCG_TARGET_REG_BITS == 64
31 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
32 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
33 #define TCGV_LOW TCGV_LOW_link_error
34 #define TCGV_HIGH TCGV_HIGH_link_error
35 #endif
36
37 /*
38 * Vector optional opcode tracking.
39 * Except for the basic logical operations (and, or, xor), and
40 * data movement (mov, ld, st, dupi), many vector opcodes are
41 * optional and may not be supported on the host. Thank Intel
42 * for the irregularity in their instruction set.
43 *
44 * The gvec expanders allow custom vector operations to be composed,
45 * generally via the .fniv callback in the GVecGen* structures. At
46 * the same time, in deciding whether to use this hook we need to
47 * know if the host supports the required operations. This is
48 * presented as an array of opcodes, terminated by 0. Each opcode
49 * is assumed to be expanded with the given VECE.
50 *
51 * For debugging, we want to validate this array. Therefore, when
52 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
53 * will validate that their opcode is present in the list.
54 */
55 #ifdef CONFIG_DEBUG_TCG
56 void tcg_assert_listed_vecop(TCGOpcode op)
57 {
58 const TCGOpcode *p = tcg_ctx->vecop_list;
59 if (p) {
60 for (; *p; ++p) {
61 if (*p == op) {
62 return;
63 }
64 }
65 g_assert_not_reached();
66 }
67 }
68 #endif
69
70 bool tcg_can_emit_vecop_list(const TCGOpcode *list,
71 TCGType type, unsigned vece)
72 {
73 if (list == NULL) {
74 return true;
75 }
76
77 for (; *list; ++list) {
78 TCGOpcode opc = *list;
79
80 #ifdef CONFIG_DEBUG_TCG
81 switch (opc) {
82 case INDEX_op_and_vec:
83 case INDEX_op_or_vec:
84 case INDEX_op_xor_vec:
85 case INDEX_op_mov_vec:
86 case INDEX_op_dup_vec:
87 case INDEX_op_dupi_vec:
88 case INDEX_op_dup2_vec:
89 case INDEX_op_ld_vec:
90 case INDEX_op_st_vec:
91 case INDEX_op_bitsel_vec:
92 /* These opcodes are mandatory and should not be listed. */
93 g_assert_not_reached();
94 default:
95 break;
96 }
97 #endif
98
99 if (tcg_can_emit_vec_op(opc, type, vece)) {
100 continue;
101 }
102
103 /*
104 * The opcode list is created by front ends based on what they
105 * actually invoke. We must mirror the logic in the routines
106 * below for generic expansions using other opcodes.
107 */
108 switch (opc) {
109 case INDEX_op_neg_vec:
110 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
111 continue;
112 }
113 break;
114 case INDEX_op_abs_vec:
115 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
116 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
117 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
118 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
119 continue;
120 }
121 break;
122 case INDEX_op_cmpsel_vec:
123 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
124 continue;
125 }
126 break;
127 default:
128 break;
129 }
130 return false;
131 }
132 return true;
133 }
134
135 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
136 {
137 TCGOp *op = tcg_emit_op(opc);
138 TCGOP_VECL(op) = type - TCG_TYPE_V64;
139 TCGOP_VECE(op) = vece;
140 op->args[0] = r;
141 op->args[1] = a;
142 }
143
144 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
145 TCGArg r, TCGArg a, TCGArg b)
146 {
147 TCGOp *op = tcg_emit_op(opc);
148 TCGOP_VECL(op) = type - TCG_TYPE_V64;
149 TCGOP_VECE(op) = vece;
150 op->args[0] = r;
151 op->args[1] = a;
152 op->args[2] = b;
153 }
154
155 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
156 TCGArg r, TCGArg a, TCGArg b, TCGArg c)
157 {
158 TCGOp *op = tcg_emit_op(opc);
159 TCGOP_VECL(op) = type - TCG_TYPE_V64;
160 TCGOP_VECE(op) = vece;
161 op->args[0] = r;
162 op->args[1] = a;
163 op->args[2] = b;
164 op->args[3] = c;
165 }
166
167 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
168 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
169 {
170 TCGOp *op = tcg_emit_op(opc);
171 TCGOP_VECL(op) = type - TCG_TYPE_V64;
172 TCGOP_VECE(op) = vece;
173 op->args[0] = r;
174 op->args[1] = a;
175 op->args[2] = b;
176 op->args[3] = c;
177 op->args[4] = d;
178 op->args[5] = e;
179 }
180
181 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
182 {
183 TCGTemp *rt = tcgv_vec_temp(r);
184 TCGTemp *at = tcgv_vec_temp(a);
185 TCGType type = rt->base_type;
186
187 /* Must enough inputs for the output. */
188 tcg_debug_assert(at->base_type >= type);
189 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
190 }
191
192 static void vec_gen_op3(TCGOpcode opc, unsigned vece,
193 TCGv_vec r, TCGv_vec a, TCGv_vec b)
194 {
195 TCGTemp *rt = tcgv_vec_temp(r);
196 TCGTemp *at = tcgv_vec_temp(a);
197 TCGTemp *bt = tcgv_vec_temp(b);
198 TCGType type = rt->base_type;
199
200 /* Must enough inputs for the output. */
201 tcg_debug_assert(at->base_type >= type);
202 tcg_debug_assert(bt->base_type >= type);
203 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
204 }
205
206 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
207 {
208 if (r != a) {
209 vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
210 }
211 }
212
213 #define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
214
215 static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
216 {
217 TCGTemp *rt = tcgv_vec_temp(r);
218 vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
219 }
220
221 TCGv_vec tcg_const_zeros_vec(TCGType type)
222 {
223 TCGv_vec ret = tcg_temp_new_vec(type);
224 do_dupi_vec(ret, MO_REG, 0);
225 return ret;
226 }
227
228 TCGv_vec tcg_const_ones_vec(TCGType type)
229 {
230 TCGv_vec ret = tcg_temp_new_vec(type);
231 do_dupi_vec(ret, MO_REG, -1);
232 return ret;
233 }
234
235 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
236 {
237 TCGTemp *t = tcgv_vec_temp(m);
238 return tcg_const_zeros_vec(t->base_type);
239 }
240
241 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
242 {
243 TCGTemp *t = tcgv_vec_temp(m);
244 return tcg_const_ones_vec(t->base_type);
245 }
246
247 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
248 {
249 if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
250 do_dupi_vec(r, MO_32, a);
251 } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
252 do_dupi_vec(r, MO_64, a);
253 } else {
254 TCGv_i64 c = tcg_const_i64(a);
255 tcg_gen_dup_i64_vec(MO_64, r, c);
256 tcg_temp_free_i64(c);
257 }
258 }
259
260 void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
261 {
262 do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
263 }
264
265 void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
266 {
267 do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
268 }
269
270 void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
271 {
272 do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
273 }
274
275 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
276 {
277 do_dupi_vec(r, MO_REG, dup_const(vece, a));
278 }
279
280 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
281 {
282 TCGArg ri = tcgv_vec_arg(r);
283 TCGTemp *rt = arg_temp(ri);
284 TCGType type = rt->base_type;
285
286 if (TCG_TARGET_REG_BITS == 64) {
287 TCGArg ai = tcgv_i64_arg(a);
288 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
289 } else if (vece == MO_64) {
290 TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
291 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
292 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
293 } else {
294 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
295 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
296 }
297 }
298
299 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
300 {
301 TCGArg ri = tcgv_vec_arg(r);
302 TCGArg ai = tcgv_i32_arg(a);
303 TCGTemp *rt = arg_temp(ri);
304 TCGType type = rt->base_type;
305
306 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
307 }
308
309 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
310 tcg_target_long ofs)
311 {
312 TCGArg ri = tcgv_vec_arg(r);
313 TCGArg bi = tcgv_ptr_arg(b);
314 TCGTemp *rt = arg_temp(ri);
315 TCGType type = rt->base_type;
316
317 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
318 }
319
320 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
321 {
322 TCGArg ri = tcgv_vec_arg(r);
323 TCGArg bi = tcgv_ptr_arg(b);
324 TCGTemp *rt = arg_temp(ri);
325 TCGType type = rt->base_type;
326
327 vec_gen_3(opc, type, 0, ri, bi, o);
328 }
329
330 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
331 {
332 vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
333 }
334
335 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
336 {
337 vec_gen_ldst(INDEX_op_st_vec, r, b, o);
338 }
339
340 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
341 {
342 TCGArg ri = tcgv_vec_arg(r);
343 TCGArg bi = tcgv_ptr_arg(b);
344 TCGTemp *rt = arg_temp(ri);
345 TCGType type = rt->base_type;
346
347 tcg_debug_assert(low_type >= TCG_TYPE_V64);
348 tcg_debug_assert(low_type <= type);
349 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
350 }
351
352 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
353 {
354 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
355 }
356
357 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
358 {
359 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
360 }
361
362 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
363 {
364 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
365 }
366
367 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
368 {
369 if (TCG_TARGET_HAS_andc_vec) {
370 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
371 } else {
372 TCGv_vec t = tcg_temp_new_vec_matching(r);
373 tcg_gen_not_vec(0, t, b);
374 tcg_gen_and_vec(0, r, a, t);
375 tcg_temp_free_vec(t);
376 }
377 }
378
379 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
380 {
381 if (TCG_TARGET_HAS_orc_vec) {
382 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
383 } else {
384 TCGv_vec t = tcg_temp_new_vec_matching(r);
385 tcg_gen_not_vec(0, t, b);
386 tcg_gen_or_vec(0, r, a, t);
387 tcg_temp_free_vec(t);
388 }
389 }
390
391 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
392 {
393 /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
394 tcg_gen_and_vec(0, r, a, b);
395 tcg_gen_not_vec(0, r, r);
396 }
397
398 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
399 {
400 /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
401 tcg_gen_or_vec(0, r, a, b);
402 tcg_gen_not_vec(0, r, r);
403 }
404
405 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
406 {
407 /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
408 tcg_gen_xor_vec(0, r, a, b);
409 tcg_gen_not_vec(0, r, r);
410 }
411
412 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
413 {
414 TCGTemp *rt = tcgv_vec_temp(r);
415 TCGTemp *at = tcgv_vec_temp(a);
416 TCGArg ri = temp_arg(rt);
417 TCGArg ai = temp_arg(at);
418 TCGType type = rt->base_type;
419 int can;
420
421 tcg_debug_assert(at->base_type >= type);
422 tcg_assert_listed_vecop(opc);
423 can = tcg_can_emit_vec_op(opc, type, vece);
424 if (can > 0) {
425 vec_gen_2(opc, type, vece, ri, ai);
426 } else if (can < 0) {
427 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
428 tcg_expand_vec_op(opc, type, vece, ri, ai);
429 tcg_swap_vecop_list(hold_list);
430 } else {
431 return false;
432 }
433 return true;
434 }
435
436 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
437 {
438 if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
439 TCGv_vec t = tcg_const_ones_vec_matching(r);
440 tcg_gen_xor_vec(0, r, a, t);
441 tcg_temp_free_vec(t);
442 }
443 }
444
445 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
446 {
447 const TCGOpcode *hold_list;
448
449 tcg_assert_listed_vecop(INDEX_op_neg_vec);
450 hold_list = tcg_swap_vecop_list(NULL);
451
452 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
453 TCGv_vec t = tcg_const_zeros_vec_matching(r);
454 tcg_gen_sub_vec(vece, r, t, a);
455 tcg_temp_free_vec(t);
456 }
457 tcg_swap_vecop_list(hold_list);
458 }
459
460 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
461 {
462 const TCGOpcode *hold_list;
463
464 tcg_assert_listed_vecop(INDEX_op_abs_vec);
465 hold_list = tcg_swap_vecop_list(NULL);
466
467 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
468 TCGType type = tcgv_vec_temp(r)->base_type;
469 TCGv_vec t = tcg_temp_new_vec(type);
470
471 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
472 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
473 tcg_gen_neg_vec(vece, t, a);
474 tcg_gen_smax_vec(vece, r, a, t);
475 } else {
476 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
477 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
478 } else {
479 do_dupi_vec(t, MO_REG, 0);
480 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t);
481 }
482 tcg_gen_xor_vec(vece, r, a, t);
483 tcg_gen_sub_vec(vece, r, r, t);
484 }
485
486 tcg_temp_free_vec(t);
487 }
488 tcg_swap_vecop_list(hold_list);
489 }
490
491 static void do_shifti(TCGOpcode opc, unsigned vece,
492 TCGv_vec r, TCGv_vec a, int64_t i)
493 {
494 TCGTemp *rt = tcgv_vec_temp(r);
495 TCGTemp *at = tcgv_vec_temp(a);
496 TCGArg ri = temp_arg(rt);
497 TCGArg ai = temp_arg(at);
498 TCGType type = rt->base_type;
499 int can;
500
501 tcg_debug_assert(at->base_type == type);
502 tcg_debug_assert(i >= 0 && i < (8 << vece));
503 tcg_assert_listed_vecop(opc);
504
505 if (i == 0) {
506 tcg_gen_mov_vec(r, a);
507 return;
508 }
509
510 can = tcg_can_emit_vec_op(opc, type, vece);
511 if (can > 0) {
512 vec_gen_3(opc, type, vece, ri, ai, i);
513 } else {
514 /* We leave the choice of expansion via scalar or vector shift
515 to the target. Often, but not always, dupi can feed a vector
516 shift easier than a scalar. */
517 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
518 tcg_debug_assert(can < 0);
519 tcg_expand_vec_op(opc, type, vece, ri, ai, i);
520 tcg_swap_vecop_list(hold_list);
521 }
522 }
523
524 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
525 {
526 do_shifti(INDEX_op_shli_vec, vece, r, a, i);
527 }
528
529 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
530 {
531 do_shifti(INDEX_op_shri_vec, vece, r, a, i);
532 }
533
534 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
535 {
536 do_shifti(INDEX_op_sari_vec, vece, r, a, i);
537 }
538
539 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
540 TCGv_vec r, TCGv_vec a, TCGv_vec b)
541 {
542 TCGTemp *rt = tcgv_vec_temp(r);
543 TCGTemp *at = tcgv_vec_temp(a);
544 TCGTemp *bt = tcgv_vec_temp(b);
545 TCGArg ri = temp_arg(rt);
546 TCGArg ai = temp_arg(at);
547 TCGArg bi = temp_arg(bt);
548 TCGType type = rt->base_type;
549 int can;
550
551 tcg_debug_assert(at->base_type >= type);
552 tcg_debug_assert(bt->base_type >= type);
553 tcg_assert_listed_vecop(INDEX_op_cmp_vec);
554 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
555 if (can > 0) {
556 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
557 } else {
558 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
559 tcg_debug_assert(can < 0);
560 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
561 tcg_swap_vecop_list(hold_list);
562 }
563 }
564
565 static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
566 TCGv_vec b, TCGOpcode opc)
567 {
568 TCGTemp *rt = tcgv_vec_temp(r);
569 TCGTemp *at = tcgv_vec_temp(a);
570 TCGTemp *bt = tcgv_vec_temp(b);
571 TCGArg ri = temp_arg(rt);
572 TCGArg ai = temp_arg(at);
573 TCGArg bi = temp_arg(bt);
574 TCGType type = rt->base_type;
575 int can;
576
577 tcg_debug_assert(at->base_type >= type);
578 tcg_debug_assert(bt->base_type >= type);
579 tcg_assert_listed_vecop(opc);
580 can = tcg_can_emit_vec_op(opc, type, vece);
581 if (can > 0) {
582 vec_gen_3(opc, type, vece, ri, ai, bi);
583 } else {
584 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
585 tcg_debug_assert(can < 0);
586 tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
587 tcg_swap_vecop_list(hold_list);
588 }
589 }
590
591 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
592 {
593 do_op3(vece, r, a, b, INDEX_op_add_vec);
594 }
595
596 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
597 {
598 do_op3(vece, r, a, b, INDEX_op_sub_vec);
599 }
600
601 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
602 {
603 do_op3(vece, r, a, b, INDEX_op_mul_vec);
604 }
605
606 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
607 {
608 do_op3(vece, r, a, b, INDEX_op_ssadd_vec);
609 }
610
611 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
612 {
613 do_op3(vece, r, a, b, INDEX_op_usadd_vec);
614 }
615
616 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
617 {
618 do_op3(vece, r, a, b, INDEX_op_sssub_vec);
619 }
620
621 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
622 {
623 do_op3(vece, r, a, b, INDEX_op_ussub_vec);
624 }
625
626 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
627 {
628 do_op3(vece, r, a, b, INDEX_op_smin_vec);
629 }
630
631 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
632 {
633 do_op3(vece, r, a, b, INDEX_op_umin_vec);
634 }
635
636 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
637 {
638 do_op3(vece, r, a, b, INDEX_op_smax_vec);
639 }
640
641 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
642 {
643 do_op3(vece, r, a, b, INDEX_op_umax_vec);
644 }
645
646 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
647 {
648 do_op3(vece, r, a, b, INDEX_op_shlv_vec);
649 }
650
651 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
652 {
653 do_op3(vece, r, a, b, INDEX_op_shrv_vec);
654 }
655
656 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
657 {
658 do_op3(vece, r, a, b, INDEX_op_sarv_vec);
659 }
660
661 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
662 TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
663 {
664 TCGTemp *rt = tcgv_vec_temp(r);
665 TCGTemp *at = tcgv_vec_temp(a);
666 TCGTemp *st = tcgv_i32_temp(s);
667 TCGArg ri = temp_arg(rt);
668 TCGArg ai = temp_arg(at);
669 TCGArg si = temp_arg(st);
670 TCGType type = rt->base_type;
671 const TCGOpcode *hold_list;
672 int can;
673
674 tcg_debug_assert(at->base_type >= type);
675 tcg_assert_listed_vecop(opc_s);
676 hold_list = tcg_swap_vecop_list(NULL);
677
678 can = tcg_can_emit_vec_op(opc_s, type, vece);
679 if (can > 0) {
680 vec_gen_3(opc_s, type, vece, ri, ai, si);
681 } else if (can < 0) {
682 tcg_expand_vec_op(opc_s, type, vece, ri, ai, si);
683 } else {
684 TCGv_vec vec_s = tcg_temp_new_vec(type);
685
686 if (vece == MO_64) {
687 TCGv_i64 s64 = tcg_temp_new_i64();
688 tcg_gen_extu_i32_i64(s64, s);
689 tcg_gen_dup_i64_vec(MO_64, vec_s, s64);
690 tcg_temp_free_i64(s64);
691 } else {
692 tcg_gen_dup_i32_vec(vece, vec_s, s);
693 }
694 do_op3(vece, r, a, vec_s, opc_v);
695 tcg_temp_free_vec(vec_s);
696 }
697 tcg_swap_vecop_list(hold_list);
698 }
699
700 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
701 {
702 do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
703 }
704
705 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
706 {
707 do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
708 }
709
710 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
711 {
712 do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
713 }
714
715 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
716 TCGv_vec b, TCGv_vec c)
717 {
718 TCGTemp *rt = tcgv_vec_temp(r);
719 TCGTemp *at = tcgv_vec_temp(a);
720 TCGTemp *bt = tcgv_vec_temp(b);
721 TCGTemp *ct = tcgv_vec_temp(c);
722 TCGType type = rt->base_type;
723
724 tcg_debug_assert(at->base_type >= type);
725 tcg_debug_assert(bt->base_type >= type);
726 tcg_debug_assert(ct->base_type >= type);
727
728 if (TCG_TARGET_HAS_bitsel_vec) {
729 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
730 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
731 } else {
732 TCGv_vec t = tcg_temp_new_vec(type);
733 tcg_gen_and_vec(MO_8, t, a, b);
734 tcg_gen_andc_vec(MO_8, r, c, a);
735 tcg_gen_or_vec(MO_8, r, r, t);
736 tcg_temp_free_vec(t);
737 }
738 }
739
740 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
741 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
742 {
743 TCGTemp *rt = tcgv_vec_temp(r);
744 TCGTemp *at = tcgv_vec_temp(a);
745 TCGTemp *bt = tcgv_vec_temp(b);
746 TCGTemp *ct = tcgv_vec_temp(c);
747 TCGTemp *dt = tcgv_vec_temp(d);
748 TCGArg ri = temp_arg(rt);
749 TCGArg ai = temp_arg(at);
750 TCGArg bi = temp_arg(bt);
751 TCGArg ci = temp_arg(ct);
752 TCGArg di = temp_arg(dt);
753 TCGType type = rt->base_type;
754 const TCGOpcode *hold_list;
755 int can;
756
757 tcg_debug_assert(at->base_type >= type);
758 tcg_debug_assert(bt->base_type >= type);
759 tcg_debug_assert(ct->base_type >= type);
760 tcg_debug_assert(dt->base_type >= type);
761
762 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
763 hold_list = tcg_swap_vecop_list(NULL);
764 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
765
766 if (can > 0) {
767 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
768 } else if (can < 0) {
769 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
770 ri, ai, bi, ci, di, cond);
771 } else {
772 TCGv_vec t = tcg_temp_new_vec(type);
773 tcg_gen_cmp_vec(cond, vece, t, a, b);
774 tcg_gen_bitsel_vec(vece, r, t, c, d);
775 tcg_temp_free_vec(t);
776 }
777 tcg_swap_vecop_list(hold_list);
778 }