]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/optimize.c
tcg/optimize: Split out fold_addsub2_i32
[mirror_qemu.git] / tcg / optimize.c
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg-internal.h"
29
30 #define CASE_OP_32_64(x) \
31 glue(glue(case INDEX_op_, x), _i32): \
32 glue(glue(case INDEX_op_, x), _i64)
33
34 #define CASE_OP_32_64_VEC(x) \
35 glue(glue(case INDEX_op_, x), _i32): \
36 glue(glue(case INDEX_op_, x), _i64): \
37 glue(glue(case INDEX_op_, x), _vec)
38
39 typedef struct TempOptInfo {
40 bool is_const;
41 TCGTemp *prev_copy;
42 TCGTemp *next_copy;
43 uint64_t val;
44 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
45 } TempOptInfo;
46
47 typedef struct OptContext {
48 TCGContext *tcg;
49 TCGOp *prev_mb;
50 TCGTempSet temps_used;
51
52 /* In flight values from optimization. */
53 uint64_t z_mask;
54 } OptContext;
55
56 static inline TempOptInfo *ts_info(TCGTemp *ts)
57 {
58 return ts->state_ptr;
59 }
60
61 static inline TempOptInfo *arg_info(TCGArg arg)
62 {
63 return ts_info(arg_temp(arg));
64 }
65
66 static inline bool ts_is_const(TCGTemp *ts)
67 {
68 return ts_info(ts)->is_const;
69 }
70
71 static inline bool arg_is_const(TCGArg arg)
72 {
73 return ts_is_const(arg_temp(arg));
74 }
75
76 static inline bool ts_is_copy(TCGTemp *ts)
77 {
78 return ts_info(ts)->next_copy != ts;
79 }
80
81 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
82 static void reset_ts(TCGTemp *ts)
83 {
84 TempOptInfo *ti = ts_info(ts);
85 TempOptInfo *pi = ts_info(ti->prev_copy);
86 TempOptInfo *ni = ts_info(ti->next_copy);
87
88 ni->prev_copy = ti->prev_copy;
89 pi->next_copy = ti->next_copy;
90 ti->next_copy = ts;
91 ti->prev_copy = ts;
92 ti->is_const = false;
93 ti->z_mask = -1;
94 }
95
96 static void reset_temp(TCGArg arg)
97 {
98 reset_ts(arg_temp(arg));
99 }
100
101 /* Initialize and activate a temporary. */
102 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
103 {
104 size_t idx = temp_idx(ts);
105 TempOptInfo *ti;
106
107 if (test_bit(idx, ctx->temps_used.l)) {
108 return;
109 }
110 set_bit(idx, ctx->temps_used.l);
111
112 ti = ts->state_ptr;
113 if (ti == NULL) {
114 ti = tcg_malloc(sizeof(TempOptInfo));
115 ts->state_ptr = ti;
116 }
117
118 ti->next_copy = ts;
119 ti->prev_copy = ts;
120 if (ts->kind == TEMP_CONST) {
121 ti->is_const = true;
122 ti->val = ts->val;
123 ti->z_mask = ts->val;
124 if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
125 /* High bits of a 32-bit quantity are garbage. */
126 ti->z_mask |= ~0xffffffffull;
127 }
128 } else {
129 ti->is_const = false;
130 ti->z_mask = -1;
131 }
132 }
133
134 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
135 {
136 TCGTemp *i, *g, *l;
137
138 /* If this is already readonly, we can't do better. */
139 if (temp_readonly(ts)) {
140 return ts;
141 }
142
143 g = l = NULL;
144 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
145 if (temp_readonly(i)) {
146 return i;
147 } else if (i->kind > ts->kind) {
148 if (i->kind == TEMP_GLOBAL) {
149 g = i;
150 } else if (i->kind == TEMP_LOCAL) {
151 l = i;
152 }
153 }
154 }
155
156 /* If we didn't find a better representation, return the same temp. */
157 return g ? g : l ? l : ts;
158 }
159
160 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
161 {
162 TCGTemp *i;
163
164 if (ts1 == ts2) {
165 return true;
166 }
167
168 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
169 return false;
170 }
171
172 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
173 if (i == ts2) {
174 return true;
175 }
176 }
177
178 return false;
179 }
180
181 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
182 {
183 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
184 }
185
186 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
187 {
188 TCGTemp *dst_ts = arg_temp(dst);
189 TCGTemp *src_ts = arg_temp(src);
190 const TCGOpDef *def;
191 TempOptInfo *di;
192 TempOptInfo *si;
193 uint64_t z_mask;
194 TCGOpcode new_op;
195
196 if (ts_are_copies(dst_ts, src_ts)) {
197 tcg_op_remove(ctx->tcg, op);
198 return true;
199 }
200
201 reset_ts(dst_ts);
202 di = ts_info(dst_ts);
203 si = ts_info(src_ts);
204 def = &tcg_op_defs[op->opc];
205 if (def->flags & TCG_OPF_VECTOR) {
206 new_op = INDEX_op_mov_vec;
207 } else if (def->flags & TCG_OPF_64BIT) {
208 new_op = INDEX_op_mov_i64;
209 } else {
210 new_op = INDEX_op_mov_i32;
211 }
212 op->opc = new_op;
213 /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
214 op->args[0] = dst;
215 op->args[1] = src;
216
217 z_mask = si->z_mask;
218 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
219 /* High bits of the destination are now garbage. */
220 z_mask |= ~0xffffffffull;
221 }
222 di->z_mask = z_mask;
223
224 if (src_ts->type == dst_ts->type) {
225 TempOptInfo *ni = ts_info(si->next_copy);
226
227 di->next_copy = si->next_copy;
228 di->prev_copy = src_ts;
229 ni->prev_copy = dst_ts;
230 si->next_copy = dst_ts;
231 di->is_const = si->is_const;
232 di->val = si->val;
233 }
234 return true;
235 }
236
237 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
238 TCGArg dst, uint64_t val)
239 {
240 const TCGOpDef *def = &tcg_op_defs[op->opc];
241 TCGType type;
242 TCGTemp *tv;
243
244 if (def->flags & TCG_OPF_VECTOR) {
245 type = TCGOP_VECL(op) + TCG_TYPE_V64;
246 } else if (def->flags & TCG_OPF_64BIT) {
247 type = TCG_TYPE_I64;
248 } else {
249 type = TCG_TYPE_I32;
250 }
251
252 /* Convert movi to mov with constant temp. */
253 tv = tcg_constant_internal(type, val);
254 init_ts_info(ctx, tv);
255 return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
256 }
257
258 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
259 {
260 uint64_t l64, h64;
261
262 switch (op) {
263 CASE_OP_32_64(add):
264 return x + y;
265
266 CASE_OP_32_64(sub):
267 return x - y;
268
269 CASE_OP_32_64(mul):
270 return x * y;
271
272 CASE_OP_32_64(and):
273 return x & y;
274
275 CASE_OP_32_64(or):
276 return x | y;
277
278 CASE_OP_32_64(xor):
279 return x ^ y;
280
281 case INDEX_op_shl_i32:
282 return (uint32_t)x << (y & 31);
283
284 case INDEX_op_shl_i64:
285 return (uint64_t)x << (y & 63);
286
287 case INDEX_op_shr_i32:
288 return (uint32_t)x >> (y & 31);
289
290 case INDEX_op_shr_i64:
291 return (uint64_t)x >> (y & 63);
292
293 case INDEX_op_sar_i32:
294 return (int32_t)x >> (y & 31);
295
296 case INDEX_op_sar_i64:
297 return (int64_t)x >> (y & 63);
298
299 case INDEX_op_rotr_i32:
300 return ror32(x, y & 31);
301
302 case INDEX_op_rotr_i64:
303 return ror64(x, y & 63);
304
305 case INDEX_op_rotl_i32:
306 return rol32(x, y & 31);
307
308 case INDEX_op_rotl_i64:
309 return rol64(x, y & 63);
310
311 CASE_OP_32_64(not):
312 return ~x;
313
314 CASE_OP_32_64(neg):
315 return -x;
316
317 CASE_OP_32_64(andc):
318 return x & ~y;
319
320 CASE_OP_32_64(orc):
321 return x | ~y;
322
323 CASE_OP_32_64(eqv):
324 return ~(x ^ y);
325
326 CASE_OP_32_64(nand):
327 return ~(x & y);
328
329 CASE_OP_32_64(nor):
330 return ~(x | y);
331
332 case INDEX_op_clz_i32:
333 return (uint32_t)x ? clz32(x) : y;
334
335 case INDEX_op_clz_i64:
336 return x ? clz64(x) : y;
337
338 case INDEX_op_ctz_i32:
339 return (uint32_t)x ? ctz32(x) : y;
340
341 case INDEX_op_ctz_i64:
342 return x ? ctz64(x) : y;
343
344 case INDEX_op_ctpop_i32:
345 return ctpop32(x);
346
347 case INDEX_op_ctpop_i64:
348 return ctpop64(x);
349
350 CASE_OP_32_64(ext8s):
351 return (int8_t)x;
352
353 CASE_OP_32_64(ext16s):
354 return (int16_t)x;
355
356 CASE_OP_32_64(ext8u):
357 return (uint8_t)x;
358
359 CASE_OP_32_64(ext16u):
360 return (uint16_t)x;
361
362 CASE_OP_32_64(bswap16):
363 x = bswap16(x);
364 return y & TCG_BSWAP_OS ? (int16_t)x : x;
365
366 CASE_OP_32_64(bswap32):
367 x = bswap32(x);
368 return y & TCG_BSWAP_OS ? (int32_t)x : x;
369
370 case INDEX_op_bswap64_i64:
371 return bswap64(x);
372
373 case INDEX_op_ext_i32_i64:
374 case INDEX_op_ext32s_i64:
375 return (int32_t)x;
376
377 case INDEX_op_extu_i32_i64:
378 case INDEX_op_extrl_i64_i32:
379 case INDEX_op_ext32u_i64:
380 return (uint32_t)x;
381
382 case INDEX_op_extrh_i64_i32:
383 return (uint64_t)x >> 32;
384
385 case INDEX_op_muluh_i32:
386 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
387 case INDEX_op_mulsh_i32:
388 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
389
390 case INDEX_op_muluh_i64:
391 mulu64(&l64, &h64, x, y);
392 return h64;
393 case INDEX_op_mulsh_i64:
394 muls64(&l64, &h64, x, y);
395 return h64;
396
397 case INDEX_op_div_i32:
398 /* Avoid crashing on divide by zero, otherwise undefined. */
399 return (int32_t)x / ((int32_t)y ? : 1);
400 case INDEX_op_divu_i32:
401 return (uint32_t)x / ((uint32_t)y ? : 1);
402 case INDEX_op_div_i64:
403 return (int64_t)x / ((int64_t)y ? : 1);
404 case INDEX_op_divu_i64:
405 return (uint64_t)x / ((uint64_t)y ? : 1);
406
407 case INDEX_op_rem_i32:
408 return (int32_t)x % ((int32_t)y ? : 1);
409 case INDEX_op_remu_i32:
410 return (uint32_t)x % ((uint32_t)y ? : 1);
411 case INDEX_op_rem_i64:
412 return (int64_t)x % ((int64_t)y ? : 1);
413 case INDEX_op_remu_i64:
414 return (uint64_t)x % ((uint64_t)y ? : 1);
415
416 default:
417 fprintf(stderr,
418 "Unrecognized operation %d in do_constant_folding.\n", op);
419 tcg_abort();
420 }
421 }
422
423 static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
424 {
425 const TCGOpDef *def = &tcg_op_defs[op];
426 uint64_t res = do_constant_folding_2(op, x, y);
427 if (!(def->flags & TCG_OPF_64BIT)) {
428 res = (int32_t)res;
429 }
430 return res;
431 }
432
433 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
434 {
435 switch (c) {
436 case TCG_COND_EQ:
437 return x == y;
438 case TCG_COND_NE:
439 return x != y;
440 case TCG_COND_LT:
441 return (int32_t)x < (int32_t)y;
442 case TCG_COND_GE:
443 return (int32_t)x >= (int32_t)y;
444 case TCG_COND_LE:
445 return (int32_t)x <= (int32_t)y;
446 case TCG_COND_GT:
447 return (int32_t)x > (int32_t)y;
448 case TCG_COND_LTU:
449 return x < y;
450 case TCG_COND_GEU:
451 return x >= y;
452 case TCG_COND_LEU:
453 return x <= y;
454 case TCG_COND_GTU:
455 return x > y;
456 default:
457 tcg_abort();
458 }
459 }
460
461 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
462 {
463 switch (c) {
464 case TCG_COND_EQ:
465 return x == y;
466 case TCG_COND_NE:
467 return x != y;
468 case TCG_COND_LT:
469 return (int64_t)x < (int64_t)y;
470 case TCG_COND_GE:
471 return (int64_t)x >= (int64_t)y;
472 case TCG_COND_LE:
473 return (int64_t)x <= (int64_t)y;
474 case TCG_COND_GT:
475 return (int64_t)x > (int64_t)y;
476 case TCG_COND_LTU:
477 return x < y;
478 case TCG_COND_GEU:
479 return x >= y;
480 case TCG_COND_LEU:
481 return x <= y;
482 case TCG_COND_GTU:
483 return x > y;
484 default:
485 tcg_abort();
486 }
487 }
488
489 static bool do_constant_folding_cond_eq(TCGCond c)
490 {
491 switch (c) {
492 case TCG_COND_GT:
493 case TCG_COND_LTU:
494 case TCG_COND_LT:
495 case TCG_COND_GTU:
496 case TCG_COND_NE:
497 return 0;
498 case TCG_COND_GE:
499 case TCG_COND_GEU:
500 case TCG_COND_LE:
501 case TCG_COND_LEU:
502 case TCG_COND_EQ:
503 return 1;
504 default:
505 tcg_abort();
506 }
507 }
508
509 /*
510 * Return -1 if the condition can't be simplified,
511 * and the result of the condition (0 or 1) if it can.
512 */
513 static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
514 TCGArg y, TCGCond c)
515 {
516 uint64_t xv = arg_info(x)->val;
517 uint64_t yv = arg_info(y)->val;
518
519 if (arg_is_const(x) && arg_is_const(y)) {
520 const TCGOpDef *def = &tcg_op_defs[op];
521 tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
522 if (def->flags & TCG_OPF_64BIT) {
523 return do_constant_folding_cond_64(xv, yv, c);
524 } else {
525 return do_constant_folding_cond_32(xv, yv, c);
526 }
527 } else if (args_are_copies(x, y)) {
528 return do_constant_folding_cond_eq(c);
529 } else if (arg_is_const(y) && yv == 0) {
530 switch (c) {
531 case TCG_COND_LTU:
532 return 0;
533 case TCG_COND_GEU:
534 return 1;
535 default:
536 return -1;
537 }
538 }
539 return -1;
540 }
541
542 /*
543 * Return -1 if the condition can't be simplified,
544 * and the result of the condition (0 or 1) if it can.
545 */
546 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
547 {
548 TCGArg al = p1[0], ah = p1[1];
549 TCGArg bl = p2[0], bh = p2[1];
550
551 if (arg_is_const(bl) && arg_is_const(bh)) {
552 tcg_target_ulong blv = arg_info(bl)->val;
553 tcg_target_ulong bhv = arg_info(bh)->val;
554 uint64_t b = deposit64(blv, 32, 32, bhv);
555
556 if (arg_is_const(al) && arg_is_const(ah)) {
557 tcg_target_ulong alv = arg_info(al)->val;
558 tcg_target_ulong ahv = arg_info(ah)->val;
559 uint64_t a = deposit64(alv, 32, 32, ahv);
560 return do_constant_folding_cond_64(a, b, c);
561 }
562 if (b == 0) {
563 switch (c) {
564 case TCG_COND_LTU:
565 return 0;
566 case TCG_COND_GEU:
567 return 1;
568 default:
569 break;
570 }
571 }
572 }
573 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
574 return do_constant_folding_cond_eq(c);
575 }
576 return -1;
577 }
578
579 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
580 {
581 TCGArg a1 = *p1, a2 = *p2;
582 int sum = 0;
583 sum += arg_is_const(a1);
584 sum -= arg_is_const(a2);
585
586 /* Prefer the constant in second argument, and then the form
587 op a, a, b, which is better handled on non-RISC hosts. */
588 if (sum > 0 || (sum == 0 && dest == a2)) {
589 *p1 = a2;
590 *p2 = a1;
591 return true;
592 }
593 return false;
594 }
595
596 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
597 {
598 int sum = 0;
599 sum += arg_is_const(p1[0]);
600 sum += arg_is_const(p1[1]);
601 sum -= arg_is_const(p2[0]);
602 sum -= arg_is_const(p2[1]);
603 if (sum > 0) {
604 TCGArg t;
605 t = p1[0], p1[0] = p2[0], p2[0] = t;
606 t = p1[1], p1[1] = p2[1], p2[1] = t;
607 return true;
608 }
609 return false;
610 }
611
612 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
613 {
614 for (int i = 0; i < nb_args; i++) {
615 TCGTemp *ts = arg_temp(op->args[i]);
616 if (ts) {
617 init_ts_info(ctx, ts);
618 }
619 }
620 }
621
622 static void copy_propagate(OptContext *ctx, TCGOp *op,
623 int nb_oargs, int nb_iargs)
624 {
625 TCGContext *s = ctx->tcg;
626
627 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
628 TCGTemp *ts = arg_temp(op->args[i]);
629 if (ts && ts_is_copy(ts)) {
630 op->args[i] = temp_arg(find_better_copy(s, ts));
631 }
632 }
633 }
634
635 static void finish_folding(OptContext *ctx, TCGOp *op)
636 {
637 const TCGOpDef *def = &tcg_op_defs[op->opc];
638 int i, nb_oargs;
639
640 /*
641 * For an opcode that ends a BB, reset all temp data.
642 * We do no cross-BB optimization.
643 */
644 if (def->flags & TCG_OPF_BB_END) {
645 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
646 ctx->prev_mb = NULL;
647 return;
648 }
649
650 nb_oargs = def->nb_oargs;
651 for (i = 0; i < nb_oargs; i++) {
652 reset_temp(op->args[i]);
653 /*
654 * Save the corresponding known-zero bits mask for the
655 * first output argument (only one supported so far).
656 */
657 if (i == 0) {
658 arg_info(op->args[i])->z_mask = ctx->z_mask;
659 }
660 }
661 }
662
663 /*
664 * The fold_* functions return true when processing is complete,
665 * usually by folding the operation to a constant or to a copy,
666 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
667 * like collect information about the value produced, for use in
668 * optimizing a subsequent operation.
669 *
670 * These first fold_* functions are all helpers, used by other
671 * folders for more specific operations.
672 */
673
674 static bool fold_const1(OptContext *ctx, TCGOp *op)
675 {
676 if (arg_is_const(op->args[1])) {
677 uint64_t t;
678
679 t = arg_info(op->args[1])->val;
680 t = do_constant_folding(op->opc, t, 0);
681 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
682 }
683 return false;
684 }
685
686 static bool fold_const2(OptContext *ctx, TCGOp *op)
687 {
688 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
689 uint64_t t1 = arg_info(op->args[1])->val;
690 uint64_t t2 = arg_info(op->args[2])->val;
691
692 t1 = do_constant_folding(op->opc, t1, t2);
693 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
694 }
695 return false;
696 }
697
698 /*
699 * These outermost fold_<op> functions are sorted alphabetically.
700 */
701
702 static bool fold_add(OptContext *ctx, TCGOp *op)
703 {
704 return fold_const2(ctx, op);
705 }
706
707 static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
708 {
709 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
710 arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
711 uint32_t al = arg_info(op->args[2])->val;
712 uint32_t ah = arg_info(op->args[3])->val;
713 uint32_t bl = arg_info(op->args[4])->val;
714 uint32_t bh = arg_info(op->args[5])->val;
715 uint64_t a = ((uint64_t)ah << 32) | al;
716 uint64_t b = ((uint64_t)bh << 32) | bl;
717 TCGArg rl, rh;
718 TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
719
720 if (add) {
721 a += b;
722 } else {
723 a -= b;
724 }
725
726 rl = op->args[0];
727 rh = op->args[1];
728 tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
729 tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
730 return true;
731 }
732 return false;
733 }
734
735 static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
736 {
737 return fold_addsub2_i32(ctx, op, true);
738 }
739
740 static bool fold_and(OptContext *ctx, TCGOp *op)
741 {
742 return fold_const2(ctx, op);
743 }
744
745 static bool fold_andc(OptContext *ctx, TCGOp *op)
746 {
747 return fold_const2(ctx, op);
748 }
749
750 static bool fold_brcond(OptContext *ctx, TCGOp *op)
751 {
752 TCGCond cond = op->args[2];
753 int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
754
755 if (i == 0) {
756 tcg_op_remove(ctx->tcg, op);
757 return true;
758 }
759 if (i > 0) {
760 op->opc = INDEX_op_br;
761 op->args[0] = op->args[3];
762 }
763 return false;
764 }
765
766 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
767 {
768 TCGCond cond = op->args[4];
769 int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
770 TCGArg label = op->args[5];
771 int inv = 0;
772
773 if (i >= 0) {
774 goto do_brcond_const;
775 }
776
777 switch (cond) {
778 case TCG_COND_LT:
779 case TCG_COND_GE:
780 /*
781 * Simplify LT/GE comparisons vs zero to a single compare
782 * vs the high word of the input.
783 */
784 if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
785 arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
786 goto do_brcond_high;
787 }
788 break;
789
790 case TCG_COND_NE:
791 inv = 1;
792 QEMU_FALLTHROUGH;
793 case TCG_COND_EQ:
794 /*
795 * Simplify EQ/NE comparisons where one of the pairs
796 * can be simplified.
797 */
798 i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
799 op->args[2], cond);
800 switch (i ^ inv) {
801 case 0:
802 goto do_brcond_const;
803 case 1:
804 goto do_brcond_high;
805 }
806
807 i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
808 op->args[3], cond);
809 switch (i ^ inv) {
810 case 0:
811 goto do_brcond_const;
812 case 1:
813 op->opc = INDEX_op_brcond_i32;
814 op->args[1] = op->args[2];
815 op->args[2] = cond;
816 op->args[3] = label;
817 break;
818 }
819 break;
820
821 default:
822 break;
823
824 do_brcond_high:
825 op->opc = INDEX_op_brcond_i32;
826 op->args[0] = op->args[1];
827 op->args[1] = op->args[3];
828 op->args[2] = cond;
829 op->args[3] = label;
830 break;
831
832 do_brcond_const:
833 if (i == 0) {
834 tcg_op_remove(ctx->tcg, op);
835 return true;
836 }
837 op->opc = INDEX_op_br;
838 op->args[0] = label;
839 break;
840 }
841 return false;
842 }
843
844 static bool fold_call(OptContext *ctx, TCGOp *op)
845 {
846 TCGContext *s = ctx->tcg;
847 int nb_oargs = TCGOP_CALLO(op);
848 int nb_iargs = TCGOP_CALLI(op);
849 int flags, i;
850
851 init_arguments(ctx, op, nb_oargs + nb_iargs);
852 copy_propagate(ctx, op, nb_oargs, nb_iargs);
853
854 /* If the function reads or writes globals, reset temp data. */
855 flags = tcg_call_flags(op);
856 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
857 int nb_globals = s->nb_globals;
858
859 for (i = 0; i < nb_globals; i++) {
860 if (test_bit(i, ctx->temps_used.l)) {
861 reset_ts(&ctx->tcg->temps[i]);
862 }
863 }
864 }
865
866 /* Reset temp data for outputs. */
867 for (i = 0; i < nb_oargs; i++) {
868 reset_temp(op->args[i]);
869 }
870
871 /* Stop optimizing MB across calls. */
872 ctx->prev_mb = NULL;
873 return true;
874 }
875
876 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
877 {
878 return fold_const1(ctx, op);
879 }
880
881 static bool fold_divide(OptContext *ctx, TCGOp *op)
882 {
883 return fold_const2(ctx, op);
884 }
885
886 static bool fold_eqv(OptContext *ctx, TCGOp *op)
887 {
888 return fold_const2(ctx, op);
889 }
890
891 static bool fold_exts(OptContext *ctx, TCGOp *op)
892 {
893 return fold_const1(ctx, op);
894 }
895
896 static bool fold_extu(OptContext *ctx, TCGOp *op)
897 {
898 return fold_const1(ctx, op);
899 }
900
901 static bool fold_mb(OptContext *ctx, TCGOp *op)
902 {
903 /* Eliminate duplicate and redundant fence instructions. */
904 if (ctx->prev_mb) {
905 /*
906 * Merge two barriers of the same type into one,
907 * or a weaker barrier into a stronger one,
908 * or two weaker barriers into a stronger one.
909 * mb X; mb Y => mb X|Y
910 * mb; strl => mb; st
911 * ldaq; mb => ld; mb
912 * ldaq; strl => ld; mb; st
913 * Other combinations are also merged into a strong
914 * barrier. This is stricter than specified but for
915 * the purposes of TCG is better than not optimizing.
916 */
917 ctx->prev_mb->args[0] |= op->args[0];
918 tcg_op_remove(ctx->tcg, op);
919 } else {
920 ctx->prev_mb = op;
921 }
922 return true;
923 }
924
925 static bool fold_mul(OptContext *ctx, TCGOp *op)
926 {
927 return fold_const2(ctx, op);
928 }
929
930 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
931 {
932 return fold_const2(ctx, op);
933 }
934
935 static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
936 {
937 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
938 uint32_t a = arg_info(op->args[2])->val;
939 uint32_t b = arg_info(op->args[3])->val;
940 uint64_t r = (uint64_t)a * b;
941 TCGArg rl, rh;
942 TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
943
944 rl = op->args[0];
945 rh = op->args[1];
946 tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
947 tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
948 return true;
949 }
950 return false;
951 }
952
953 static bool fold_nand(OptContext *ctx, TCGOp *op)
954 {
955 return fold_const2(ctx, op);
956 }
957
958 static bool fold_neg(OptContext *ctx, TCGOp *op)
959 {
960 return fold_const1(ctx, op);
961 }
962
963 static bool fold_nor(OptContext *ctx, TCGOp *op)
964 {
965 return fold_const2(ctx, op);
966 }
967
968 static bool fold_not(OptContext *ctx, TCGOp *op)
969 {
970 return fold_const1(ctx, op);
971 }
972
973 static bool fold_or(OptContext *ctx, TCGOp *op)
974 {
975 return fold_const2(ctx, op);
976 }
977
978 static bool fold_orc(OptContext *ctx, TCGOp *op)
979 {
980 return fold_const2(ctx, op);
981 }
982
983 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
984 {
985 /* Opcodes that touch guest memory stop the mb optimization. */
986 ctx->prev_mb = NULL;
987 return false;
988 }
989
990 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
991 {
992 /* Opcodes that touch guest memory stop the mb optimization. */
993 ctx->prev_mb = NULL;
994 return false;
995 }
996
997 static bool fold_remainder(OptContext *ctx, TCGOp *op)
998 {
999 return fold_const2(ctx, op);
1000 }
1001
1002 static bool fold_setcond(OptContext *ctx, TCGOp *op)
1003 {
1004 TCGCond cond = op->args[3];
1005 int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
1006
1007 if (i >= 0) {
1008 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1009 }
1010 return false;
1011 }
1012
1013 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1014 {
1015 TCGCond cond = op->args[5];
1016 int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1017 int inv = 0;
1018
1019 if (i >= 0) {
1020 goto do_setcond_const;
1021 }
1022
1023 switch (cond) {
1024 case TCG_COND_LT:
1025 case TCG_COND_GE:
1026 /*
1027 * Simplify LT/GE comparisons vs zero to a single compare
1028 * vs the high word of the input.
1029 */
1030 if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1031 arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1032 goto do_setcond_high;
1033 }
1034 break;
1035
1036 case TCG_COND_NE:
1037 inv = 1;
1038 QEMU_FALLTHROUGH;
1039 case TCG_COND_EQ:
1040 /*
1041 * Simplify EQ/NE comparisons where one of the pairs
1042 * can be simplified.
1043 */
1044 i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
1045 op->args[3], cond);
1046 switch (i ^ inv) {
1047 case 0:
1048 goto do_setcond_const;
1049 case 1:
1050 goto do_setcond_high;
1051 }
1052
1053 i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
1054 op->args[4], cond);
1055 switch (i ^ inv) {
1056 case 0:
1057 goto do_setcond_const;
1058 case 1:
1059 op->args[2] = op->args[3];
1060 op->args[3] = cond;
1061 op->opc = INDEX_op_setcond_i32;
1062 break;
1063 }
1064 break;
1065
1066 default:
1067 break;
1068
1069 do_setcond_high:
1070 op->args[1] = op->args[2];
1071 op->args[2] = op->args[4];
1072 op->args[3] = cond;
1073 op->opc = INDEX_op_setcond_i32;
1074 break;
1075 }
1076 return false;
1077
1078 do_setcond_const:
1079 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1080 }
1081
1082 static bool fold_shift(OptContext *ctx, TCGOp *op)
1083 {
1084 return fold_const2(ctx, op);
1085 }
1086
1087 static bool fold_sub(OptContext *ctx, TCGOp *op)
1088 {
1089 return fold_const2(ctx, op);
1090 }
1091
1092 static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
1093 {
1094 return fold_addsub2_i32(ctx, op, false);
1095 }
1096
1097 static bool fold_xor(OptContext *ctx, TCGOp *op)
1098 {
1099 return fold_const2(ctx, op);
1100 }
1101
1102 /* Propagate constants and copies, fold constant expressions. */
1103 void tcg_optimize(TCGContext *s)
1104 {
1105 int nb_temps, i;
1106 TCGOp *op, *op_next;
1107 OptContext ctx = { .tcg = s };
1108
1109 /* Array VALS has an element for each temp.
1110 If this temp holds a constant then its value is kept in VALS' element.
1111 If this temp is a copy of other ones then the other copies are
1112 available through the doubly linked circular list. */
1113
1114 nb_temps = s->nb_temps;
1115 for (i = 0; i < nb_temps; ++i) {
1116 s->temps[i].state_ptr = NULL;
1117 }
1118
1119 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
1120 uint64_t z_mask, partmask, affected, tmp;
1121 TCGOpcode opc = op->opc;
1122 const TCGOpDef *def;
1123 bool done = false;
1124
1125 /* Calls are special. */
1126 if (opc == INDEX_op_call) {
1127 fold_call(&ctx, op);
1128 continue;
1129 }
1130
1131 def = &tcg_op_defs[opc];
1132 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
1133 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
1134
1135 /* For commutative operations make constant second argument */
1136 switch (opc) {
1137 CASE_OP_32_64_VEC(add):
1138 CASE_OP_32_64_VEC(mul):
1139 CASE_OP_32_64_VEC(and):
1140 CASE_OP_32_64_VEC(or):
1141 CASE_OP_32_64_VEC(xor):
1142 CASE_OP_32_64(eqv):
1143 CASE_OP_32_64(nand):
1144 CASE_OP_32_64(nor):
1145 CASE_OP_32_64(muluh):
1146 CASE_OP_32_64(mulsh):
1147 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1148 break;
1149 CASE_OP_32_64(brcond):
1150 if (swap_commutative(-1, &op->args[0], &op->args[1])) {
1151 op->args[2] = tcg_swap_cond(op->args[2]);
1152 }
1153 break;
1154 CASE_OP_32_64(setcond):
1155 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1156 op->args[3] = tcg_swap_cond(op->args[3]);
1157 }
1158 break;
1159 CASE_OP_32_64(movcond):
1160 if (swap_commutative(-1, &op->args[1], &op->args[2])) {
1161 op->args[5] = tcg_swap_cond(op->args[5]);
1162 }
1163 /* For movcond, we canonicalize the "false" input reg to match
1164 the destination reg so that the tcg backend can implement
1165 a "move if true" operation. */
1166 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1167 op->args[5] = tcg_invert_cond(op->args[5]);
1168 }
1169 break;
1170 CASE_OP_32_64(add2):
1171 swap_commutative(op->args[0], &op->args[2], &op->args[4]);
1172 swap_commutative(op->args[1], &op->args[3], &op->args[5]);
1173 break;
1174 CASE_OP_32_64(mulu2):
1175 CASE_OP_32_64(muls2):
1176 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1177 break;
1178 case INDEX_op_brcond2_i32:
1179 if (swap_commutative2(&op->args[0], &op->args[2])) {
1180 op->args[4] = tcg_swap_cond(op->args[4]);
1181 }
1182 break;
1183 case INDEX_op_setcond2_i32:
1184 if (swap_commutative2(&op->args[1], &op->args[3])) {
1185 op->args[5] = tcg_swap_cond(op->args[5]);
1186 }
1187 break;
1188 default:
1189 break;
1190 }
1191
1192 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
1193 and "sub r, 0, a => neg r, a" case. */
1194 switch (opc) {
1195 CASE_OP_32_64(shl):
1196 CASE_OP_32_64(shr):
1197 CASE_OP_32_64(sar):
1198 CASE_OP_32_64(rotl):
1199 CASE_OP_32_64(rotr):
1200 if (arg_is_const(op->args[1])
1201 && arg_info(op->args[1])->val == 0) {
1202 tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
1203 continue;
1204 }
1205 break;
1206 CASE_OP_32_64_VEC(sub):
1207 {
1208 TCGOpcode neg_op;
1209 bool have_neg;
1210
1211 if (arg_is_const(op->args[2])) {
1212 /* Proceed with possible constant folding. */
1213 break;
1214 }
1215 if (opc == INDEX_op_sub_i32) {
1216 neg_op = INDEX_op_neg_i32;
1217 have_neg = TCG_TARGET_HAS_neg_i32;
1218 } else if (opc == INDEX_op_sub_i64) {
1219 neg_op = INDEX_op_neg_i64;
1220 have_neg = TCG_TARGET_HAS_neg_i64;
1221 } else if (TCG_TARGET_HAS_neg_vec) {
1222 TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
1223 unsigned vece = TCGOP_VECE(op);
1224 neg_op = INDEX_op_neg_vec;
1225 have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
1226 } else {
1227 break;
1228 }
1229 if (!have_neg) {
1230 break;
1231 }
1232 if (arg_is_const(op->args[1])
1233 && arg_info(op->args[1])->val == 0) {
1234 op->opc = neg_op;
1235 reset_temp(op->args[0]);
1236 op->args[1] = op->args[2];
1237 continue;
1238 }
1239 }
1240 break;
1241 CASE_OP_32_64_VEC(xor):
1242 CASE_OP_32_64(nand):
1243 if (!arg_is_const(op->args[1])
1244 && arg_is_const(op->args[2])
1245 && arg_info(op->args[2])->val == -1) {
1246 i = 1;
1247 goto try_not;
1248 }
1249 break;
1250 CASE_OP_32_64(nor):
1251 if (!arg_is_const(op->args[1])
1252 && arg_is_const(op->args[2])
1253 && arg_info(op->args[2])->val == 0) {
1254 i = 1;
1255 goto try_not;
1256 }
1257 break;
1258 CASE_OP_32_64_VEC(andc):
1259 if (!arg_is_const(op->args[2])
1260 && arg_is_const(op->args[1])
1261 && arg_info(op->args[1])->val == -1) {
1262 i = 2;
1263 goto try_not;
1264 }
1265 break;
1266 CASE_OP_32_64_VEC(orc):
1267 CASE_OP_32_64(eqv):
1268 if (!arg_is_const(op->args[2])
1269 && arg_is_const(op->args[1])
1270 && arg_info(op->args[1])->val == 0) {
1271 i = 2;
1272 goto try_not;
1273 }
1274 break;
1275 try_not:
1276 {
1277 TCGOpcode not_op;
1278 bool have_not;
1279
1280 if (def->flags & TCG_OPF_VECTOR) {
1281 not_op = INDEX_op_not_vec;
1282 have_not = TCG_TARGET_HAS_not_vec;
1283 } else if (def->flags & TCG_OPF_64BIT) {
1284 not_op = INDEX_op_not_i64;
1285 have_not = TCG_TARGET_HAS_not_i64;
1286 } else {
1287 not_op = INDEX_op_not_i32;
1288 have_not = TCG_TARGET_HAS_not_i32;
1289 }
1290 if (!have_not) {
1291 break;
1292 }
1293 op->opc = not_op;
1294 reset_temp(op->args[0]);
1295 op->args[1] = op->args[i];
1296 continue;
1297 }
1298 default:
1299 break;
1300 }
1301
1302 /* Simplify expression for "op r, a, const => mov r, a" cases */
1303 switch (opc) {
1304 CASE_OP_32_64_VEC(add):
1305 CASE_OP_32_64_VEC(sub):
1306 CASE_OP_32_64_VEC(or):
1307 CASE_OP_32_64_VEC(xor):
1308 CASE_OP_32_64_VEC(andc):
1309 CASE_OP_32_64(shl):
1310 CASE_OP_32_64(shr):
1311 CASE_OP_32_64(sar):
1312 CASE_OP_32_64(rotl):
1313 CASE_OP_32_64(rotr):
1314 if (!arg_is_const(op->args[1])
1315 && arg_is_const(op->args[2])
1316 && arg_info(op->args[2])->val == 0) {
1317 tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
1318 continue;
1319 }
1320 break;
1321 CASE_OP_32_64_VEC(and):
1322 CASE_OP_32_64_VEC(orc):
1323 CASE_OP_32_64(eqv):
1324 if (!arg_is_const(op->args[1])
1325 && arg_is_const(op->args[2])
1326 && arg_info(op->args[2])->val == -1) {
1327 tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
1328 continue;
1329 }
1330 break;
1331 default:
1332 break;
1333 }
1334
1335 /* Simplify using known-zero bits. Currently only ops with a single
1336 output argument is supported. */
1337 z_mask = -1;
1338 affected = -1;
1339 switch (opc) {
1340 CASE_OP_32_64(ext8s):
1341 if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
1342 break;
1343 }
1344 QEMU_FALLTHROUGH;
1345 CASE_OP_32_64(ext8u):
1346 z_mask = 0xff;
1347 goto and_const;
1348 CASE_OP_32_64(ext16s):
1349 if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
1350 break;
1351 }
1352 QEMU_FALLTHROUGH;
1353 CASE_OP_32_64(ext16u):
1354 z_mask = 0xffff;
1355 goto and_const;
1356 case INDEX_op_ext32s_i64:
1357 if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
1358 break;
1359 }
1360 QEMU_FALLTHROUGH;
1361 case INDEX_op_ext32u_i64:
1362 z_mask = 0xffffffffU;
1363 goto and_const;
1364
1365 CASE_OP_32_64(and):
1366 z_mask = arg_info(op->args[2])->z_mask;
1367 if (arg_is_const(op->args[2])) {
1368 and_const:
1369 affected = arg_info(op->args[1])->z_mask & ~z_mask;
1370 }
1371 z_mask = arg_info(op->args[1])->z_mask & z_mask;
1372 break;
1373
1374 case INDEX_op_ext_i32_i64:
1375 if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
1376 break;
1377 }
1378 QEMU_FALLTHROUGH;
1379 case INDEX_op_extu_i32_i64:
1380 /* We do not compute affected as it is a size changing op. */
1381 z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
1382 break;
1383
1384 CASE_OP_32_64(andc):
1385 /* Known-zeros does not imply known-ones. Therefore unless
1386 op->args[2] is constant, we can't infer anything from it. */
1387 if (arg_is_const(op->args[2])) {
1388 z_mask = ~arg_info(op->args[2])->z_mask;
1389 goto and_const;
1390 }
1391 /* But we certainly know nothing outside args[1] may be set. */
1392 z_mask = arg_info(op->args[1])->z_mask;
1393 break;
1394
1395 case INDEX_op_sar_i32:
1396 if (arg_is_const(op->args[2])) {
1397 tmp = arg_info(op->args[2])->val & 31;
1398 z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
1399 }
1400 break;
1401 case INDEX_op_sar_i64:
1402 if (arg_is_const(op->args[2])) {
1403 tmp = arg_info(op->args[2])->val & 63;
1404 z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
1405 }
1406 break;
1407
1408 case INDEX_op_shr_i32:
1409 if (arg_is_const(op->args[2])) {
1410 tmp = arg_info(op->args[2])->val & 31;
1411 z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
1412 }
1413 break;
1414 case INDEX_op_shr_i64:
1415 if (arg_is_const(op->args[2])) {
1416 tmp = arg_info(op->args[2])->val & 63;
1417 z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
1418 }
1419 break;
1420
1421 case INDEX_op_extrl_i64_i32:
1422 z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
1423 break;
1424 case INDEX_op_extrh_i64_i32:
1425 z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
1426 break;
1427
1428 CASE_OP_32_64(shl):
1429 if (arg_is_const(op->args[2])) {
1430 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
1431 z_mask = arg_info(op->args[1])->z_mask << tmp;
1432 }
1433 break;
1434
1435 CASE_OP_32_64(neg):
1436 /* Set to 1 all bits to the left of the rightmost. */
1437 z_mask = -(arg_info(op->args[1])->z_mask
1438 & -arg_info(op->args[1])->z_mask);
1439 break;
1440
1441 CASE_OP_32_64(deposit):
1442 z_mask = deposit64(arg_info(op->args[1])->z_mask,
1443 op->args[3], op->args[4],
1444 arg_info(op->args[2])->z_mask);
1445 break;
1446
1447 CASE_OP_32_64(extract):
1448 z_mask = extract64(arg_info(op->args[1])->z_mask,
1449 op->args[2], op->args[3]);
1450 if (op->args[2] == 0) {
1451 affected = arg_info(op->args[1])->z_mask & ~z_mask;
1452 }
1453 break;
1454 CASE_OP_32_64(sextract):
1455 z_mask = sextract64(arg_info(op->args[1])->z_mask,
1456 op->args[2], op->args[3]);
1457 if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
1458 affected = arg_info(op->args[1])->z_mask & ~z_mask;
1459 }
1460 break;
1461
1462 CASE_OP_32_64(or):
1463 CASE_OP_32_64(xor):
1464 z_mask = arg_info(op->args[1])->z_mask
1465 | arg_info(op->args[2])->z_mask;
1466 break;
1467
1468 case INDEX_op_clz_i32:
1469 case INDEX_op_ctz_i32:
1470 z_mask = arg_info(op->args[2])->z_mask | 31;
1471 break;
1472
1473 case INDEX_op_clz_i64:
1474 case INDEX_op_ctz_i64:
1475 z_mask = arg_info(op->args[2])->z_mask | 63;
1476 break;
1477
1478 case INDEX_op_ctpop_i32:
1479 z_mask = 32 | 31;
1480 break;
1481 case INDEX_op_ctpop_i64:
1482 z_mask = 64 | 63;
1483 break;
1484
1485 CASE_OP_32_64(setcond):
1486 case INDEX_op_setcond2_i32:
1487 z_mask = 1;
1488 break;
1489
1490 CASE_OP_32_64(movcond):
1491 z_mask = arg_info(op->args[3])->z_mask
1492 | arg_info(op->args[4])->z_mask;
1493 break;
1494
1495 CASE_OP_32_64(ld8u):
1496 z_mask = 0xff;
1497 break;
1498 CASE_OP_32_64(ld16u):
1499 z_mask = 0xffff;
1500 break;
1501 case INDEX_op_ld32u_i64:
1502 z_mask = 0xffffffffu;
1503 break;
1504
1505 CASE_OP_32_64(qemu_ld):
1506 {
1507 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1508 MemOp mop = get_memop(oi);
1509 if (!(mop & MO_SIGN)) {
1510 z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1511 }
1512 }
1513 break;
1514
1515 CASE_OP_32_64(bswap16):
1516 z_mask = arg_info(op->args[1])->z_mask;
1517 if (z_mask <= 0xffff) {
1518 op->args[2] |= TCG_BSWAP_IZ;
1519 }
1520 z_mask = bswap16(z_mask);
1521 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1522 case TCG_BSWAP_OZ:
1523 break;
1524 case TCG_BSWAP_OS:
1525 z_mask = (int16_t)z_mask;
1526 break;
1527 default: /* undefined high bits */
1528 z_mask |= MAKE_64BIT_MASK(16, 48);
1529 break;
1530 }
1531 break;
1532
1533 case INDEX_op_bswap32_i64:
1534 z_mask = arg_info(op->args[1])->z_mask;
1535 if (z_mask <= 0xffffffffu) {
1536 op->args[2] |= TCG_BSWAP_IZ;
1537 }
1538 z_mask = bswap32(z_mask);
1539 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1540 case TCG_BSWAP_OZ:
1541 break;
1542 case TCG_BSWAP_OS:
1543 z_mask = (int32_t)z_mask;
1544 break;
1545 default: /* undefined high bits */
1546 z_mask |= MAKE_64BIT_MASK(32, 32);
1547 break;
1548 }
1549 break;
1550
1551 default:
1552 break;
1553 }
1554
1555 /* 32-bit ops generate 32-bit results. For the result is zero test
1556 below, we can ignore high bits, but for further optimizations we
1557 need to record that the high bits contain garbage. */
1558 partmask = z_mask;
1559 if (!(def->flags & TCG_OPF_64BIT)) {
1560 z_mask |= ~(tcg_target_ulong)0xffffffffu;
1561 partmask &= 0xffffffffu;
1562 affected &= 0xffffffffu;
1563 }
1564 ctx.z_mask = z_mask;
1565
1566 if (partmask == 0) {
1567 tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
1568 continue;
1569 }
1570 if (affected == 0) {
1571 tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
1572 continue;
1573 }
1574
1575 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1576 switch (opc) {
1577 CASE_OP_32_64_VEC(and):
1578 CASE_OP_32_64_VEC(mul):
1579 CASE_OP_32_64(muluh):
1580 CASE_OP_32_64(mulsh):
1581 if (arg_is_const(op->args[2])
1582 && arg_info(op->args[2])->val == 0) {
1583 tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
1584 continue;
1585 }
1586 break;
1587 default:
1588 break;
1589 }
1590
1591 /* Simplify expression for "op r, a, a => mov r, a" cases */
1592 switch (opc) {
1593 CASE_OP_32_64_VEC(or):
1594 CASE_OP_32_64_VEC(and):
1595 if (args_are_copies(op->args[1], op->args[2])) {
1596 tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
1597 continue;
1598 }
1599 break;
1600 default:
1601 break;
1602 }
1603
1604 /* Simplify expression for "op r, a, a => movi r, 0" cases */
1605 switch (opc) {
1606 CASE_OP_32_64_VEC(andc):
1607 CASE_OP_32_64_VEC(sub):
1608 CASE_OP_32_64_VEC(xor):
1609 if (args_are_copies(op->args[1], op->args[2])) {
1610 tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
1611 continue;
1612 }
1613 break;
1614 default:
1615 break;
1616 }
1617
1618 /* Propagate constants through copy operations and do constant
1619 folding. Constants will be substituted to arguments by register
1620 allocator where needed and possible. Also detect copies. */
1621 switch (opc) {
1622 CASE_OP_32_64_VEC(mov):
1623 done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
1624 break;
1625
1626 case INDEX_op_dup_vec:
1627 if (arg_is_const(op->args[1])) {
1628 tmp = arg_info(op->args[1])->val;
1629 tmp = dup_const(TCGOP_VECE(op), tmp);
1630 tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
1631 continue;
1632 }
1633 break;
1634
1635 case INDEX_op_dup2_vec:
1636 assert(TCG_TARGET_REG_BITS == 32);
1637 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1638 tcg_opt_gen_movi(&ctx, op, op->args[0],
1639 deposit64(arg_info(op->args[1])->val, 32, 32,
1640 arg_info(op->args[2])->val));
1641 continue;
1642 } else if (args_are_copies(op->args[1], op->args[2])) {
1643 op->opc = INDEX_op_dup_vec;
1644 TCGOP_VECE(op) = MO_32;
1645 }
1646 break;
1647
1648 CASE_OP_32_64(bswap16):
1649 CASE_OP_32_64(bswap32):
1650 case INDEX_op_bswap64_i64:
1651 if (arg_is_const(op->args[1])) {
1652 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1653 op->args[2]);
1654 tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
1655 continue;
1656 }
1657 break;
1658
1659 CASE_OP_32_64(clz):
1660 CASE_OP_32_64(ctz):
1661 if (arg_is_const(op->args[1])) {
1662 TCGArg v = arg_info(op->args[1])->val;
1663 if (v != 0) {
1664 tmp = do_constant_folding(opc, v, 0);
1665 tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
1666 } else {
1667 tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
1668 }
1669 continue;
1670 }
1671 break;
1672
1673 CASE_OP_32_64(deposit):
1674 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1675 tmp = deposit64(arg_info(op->args[1])->val,
1676 op->args[3], op->args[4],
1677 arg_info(op->args[2])->val);
1678 tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
1679 continue;
1680 }
1681 break;
1682
1683 CASE_OP_32_64(extract):
1684 if (arg_is_const(op->args[1])) {
1685 tmp = extract64(arg_info(op->args[1])->val,
1686 op->args[2], op->args[3]);
1687 tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
1688 continue;
1689 }
1690 break;
1691
1692 CASE_OP_32_64(sextract):
1693 if (arg_is_const(op->args[1])) {
1694 tmp = sextract64(arg_info(op->args[1])->val,
1695 op->args[2], op->args[3]);
1696 tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
1697 continue;
1698 }
1699 break;
1700
1701 CASE_OP_32_64(extract2):
1702 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1703 uint64_t v1 = arg_info(op->args[1])->val;
1704 uint64_t v2 = arg_info(op->args[2])->val;
1705 int shr = op->args[3];
1706
1707 if (opc == INDEX_op_extract2_i64) {
1708 tmp = (v1 >> shr) | (v2 << (64 - shr));
1709 } else {
1710 tmp = (int32_t)(((uint32_t)v1 >> shr) |
1711 ((uint32_t)v2 << (32 - shr)));
1712 }
1713 tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
1714 continue;
1715 }
1716 break;
1717
1718 CASE_OP_32_64(movcond):
1719 i = do_constant_folding_cond(opc, op->args[1],
1720 op->args[2], op->args[5]);
1721 if (i >= 0) {
1722 tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
1723 continue;
1724 }
1725 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1726 uint64_t tv = arg_info(op->args[3])->val;
1727 uint64_t fv = arg_info(op->args[4])->val;
1728 TCGCond cond = op->args[5];
1729
1730 if (fv == 1 && tv == 0) {
1731 cond = tcg_invert_cond(cond);
1732 } else if (!(tv == 1 && fv == 0)) {
1733 break;
1734 }
1735 op->args[3] = cond;
1736 op->opc = opc = (opc == INDEX_op_movcond_i32
1737 ? INDEX_op_setcond_i32
1738 : INDEX_op_setcond_i64);
1739 }
1740 break;
1741
1742
1743 default:
1744 break;
1745
1746 /* ---------------------------------------------------------- */
1747 /* Sorted alphabetically by opcode as much as possible. */
1748
1749 CASE_OP_32_64_VEC(add):
1750 done = fold_add(&ctx, op);
1751 break;
1752 case INDEX_op_add2_i32:
1753 done = fold_add2_i32(&ctx, op);
1754 break;
1755 CASE_OP_32_64_VEC(and):
1756 done = fold_and(&ctx, op);
1757 break;
1758 CASE_OP_32_64_VEC(andc):
1759 done = fold_andc(&ctx, op);
1760 break;
1761 CASE_OP_32_64(brcond):
1762 done = fold_brcond(&ctx, op);
1763 break;
1764 case INDEX_op_brcond2_i32:
1765 done = fold_brcond2(&ctx, op);
1766 break;
1767 CASE_OP_32_64(ctpop):
1768 done = fold_ctpop(&ctx, op);
1769 break;
1770 CASE_OP_32_64(div):
1771 CASE_OP_32_64(divu):
1772 done = fold_divide(&ctx, op);
1773 break;
1774 CASE_OP_32_64(eqv):
1775 done = fold_eqv(&ctx, op);
1776 break;
1777 CASE_OP_32_64(ext8s):
1778 CASE_OP_32_64(ext16s):
1779 case INDEX_op_ext32s_i64:
1780 case INDEX_op_ext_i32_i64:
1781 done = fold_exts(&ctx, op);
1782 break;
1783 CASE_OP_32_64(ext8u):
1784 CASE_OP_32_64(ext16u):
1785 case INDEX_op_ext32u_i64:
1786 case INDEX_op_extu_i32_i64:
1787 case INDEX_op_extrl_i64_i32:
1788 case INDEX_op_extrh_i64_i32:
1789 done = fold_extu(&ctx, op);
1790 break;
1791 case INDEX_op_mb:
1792 done = fold_mb(&ctx, op);
1793 break;
1794 CASE_OP_32_64(mul):
1795 done = fold_mul(&ctx, op);
1796 break;
1797 CASE_OP_32_64(mulsh):
1798 CASE_OP_32_64(muluh):
1799 done = fold_mul_highpart(&ctx, op);
1800 break;
1801 case INDEX_op_mulu2_i32:
1802 done = fold_mulu2_i32(&ctx, op);
1803 break;
1804 CASE_OP_32_64(nand):
1805 done = fold_nand(&ctx, op);
1806 break;
1807 CASE_OP_32_64(neg):
1808 done = fold_neg(&ctx, op);
1809 break;
1810 CASE_OP_32_64(nor):
1811 done = fold_nor(&ctx, op);
1812 break;
1813 CASE_OP_32_64_VEC(not):
1814 done = fold_not(&ctx, op);
1815 break;
1816 CASE_OP_32_64_VEC(or):
1817 done = fold_or(&ctx, op);
1818 break;
1819 CASE_OP_32_64_VEC(orc):
1820 done = fold_orc(&ctx, op);
1821 break;
1822 case INDEX_op_qemu_ld_i32:
1823 case INDEX_op_qemu_ld_i64:
1824 done = fold_qemu_ld(&ctx, op);
1825 break;
1826 case INDEX_op_qemu_st_i32:
1827 case INDEX_op_qemu_st8_i32:
1828 case INDEX_op_qemu_st_i64:
1829 done = fold_qemu_st(&ctx, op);
1830 break;
1831 CASE_OP_32_64(rem):
1832 CASE_OP_32_64(remu):
1833 done = fold_remainder(&ctx, op);
1834 break;
1835 CASE_OP_32_64(rotl):
1836 CASE_OP_32_64(rotr):
1837 CASE_OP_32_64(sar):
1838 CASE_OP_32_64(shl):
1839 CASE_OP_32_64(shr):
1840 done = fold_shift(&ctx, op);
1841 break;
1842 CASE_OP_32_64(setcond):
1843 done = fold_setcond(&ctx, op);
1844 break;
1845 case INDEX_op_setcond2_i32:
1846 done = fold_setcond2(&ctx, op);
1847 break;
1848 CASE_OP_32_64_VEC(sub):
1849 done = fold_sub(&ctx, op);
1850 break;
1851 case INDEX_op_sub2_i32:
1852 done = fold_sub2_i32(&ctx, op);
1853 break;
1854 CASE_OP_32_64_VEC(xor):
1855 done = fold_xor(&ctx, op);
1856 break;
1857 }
1858
1859 if (!done) {
1860 finish_folding(&ctx, op);
1861 }
1862 }
1863 }