]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/optimize.c
tcg: Use per-temp state data in optimize
[mirror_qemu.git] / tcg / optimize.c
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "exec/cpu-common.h"
29 #include "tcg-op.h"
30
31 #define CASE_OP_32_64(x) \
32 glue(glue(case INDEX_op_, x), _i32): \
33 glue(glue(case INDEX_op_, x), _i64)
34
35 struct tcg_temp_info {
36 bool is_const;
37 TCGTemp *prev_copy;
38 TCGTemp *next_copy;
39 tcg_target_ulong val;
40 tcg_target_ulong mask;
41 };
42
43 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
44 static TCGTempSet temps_used;
45
46 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
47 {
48 return ts->state_ptr;
49 }
50
51 static inline struct tcg_temp_info *arg_info(TCGArg arg)
52 {
53 return ts_info(arg_temp(arg));
54 }
55
56 static inline bool ts_is_const(TCGTemp *ts)
57 {
58 return ts_info(ts)->is_const;
59 }
60
61 static inline bool arg_is_const(TCGArg arg)
62 {
63 return ts_is_const(arg_temp(arg));
64 }
65
66 static inline bool ts_is_copy(TCGTemp *ts)
67 {
68 return ts_info(ts)->next_copy != ts;
69 }
70
71 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
72 static void reset_ts(TCGTemp *ts)
73 {
74 struct tcg_temp_info *ti = ts_info(ts);
75 struct tcg_temp_info *pi = ts_info(ti->prev_copy);
76 struct tcg_temp_info *ni = ts_info(ti->next_copy);
77
78 ni->prev_copy = ti->prev_copy;
79 pi->next_copy = ti->next_copy;
80 ti->next_copy = ts;
81 ti->prev_copy = ts;
82 ti->is_const = false;
83 ti->mask = -1;
84 }
85
86 static void reset_temp(TCGArg arg)
87 {
88 reset_ts(arg_temp(arg));
89 }
90
91 /* Reset all temporaries, given that there are NB_TEMPS of them. */
92 static void reset_all_temps(int nb_temps)
93 {
94 bitmap_zero(temps_used.l, nb_temps);
95 }
96
97 /* Initialize and activate a temporary. */
98 static void init_ts_info(TCGTemp *ts)
99 {
100 size_t idx = temp_idx(ts);
101 if (!test_bit(idx, temps_used.l)) {
102 struct tcg_temp_info *ti = &temps[idx];
103
104 ts->state_ptr = ti;
105 ti->next_copy = ts;
106 ti->prev_copy = ts;
107 ti->is_const = false;
108 ti->mask = -1;
109 set_bit(idx, temps_used.l);
110 }
111 }
112
113 static void init_arg_info(TCGArg arg)
114 {
115 init_ts_info(arg_temp(arg));
116 }
117
118 static int op_bits(TCGOpcode op)
119 {
120 const TCGOpDef *def = &tcg_op_defs[op];
121 return def->flags & TCG_OPF_64BIT ? 64 : 32;
122 }
123
124 static TCGOpcode op_to_mov(TCGOpcode op)
125 {
126 switch (op_bits(op)) {
127 case 32:
128 return INDEX_op_mov_i32;
129 case 64:
130 return INDEX_op_mov_i64;
131 default:
132 fprintf(stderr, "op_to_mov: unexpected return value of "
133 "function op_bits.\n");
134 tcg_abort();
135 }
136 }
137
138 static TCGOpcode op_to_movi(TCGOpcode op)
139 {
140 switch (op_bits(op)) {
141 case 32:
142 return INDEX_op_movi_i32;
143 case 64:
144 return INDEX_op_movi_i64;
145 default:
146 fprintf(stderr, "op_to_movi: unexpected return value of "
147 "function op_bits.\n");
148 tcg_abort();
149 }
150 }
151
152 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
153 {
154 TCGTemp *i;
155
156 /* If this is already a global, we can't do better. */
157 if (ts->temp_global) {
158 return ts;
159 }
160
161 /* Search for a global first. */
162 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
163 if (i->temp_global) {
164 return i;
165 }
166 }
167
168 /* If it is a temp, search for a temp local. */
169 if (!ts->temp_local) {
170 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
171 if (ts->temp_local) {
172 return i;
173 }
174 }
175 }
176
177 /* Failure to find a better representation, return the same temp. */
178 return ts;
179 }
180
181 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
182 {
183 TCGTemp *i;
184
185 if (ts1 == ts2) {
186 return true;
187 }
188
189 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
190 return false;
191 }
192
193 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
194 if (i == ts2) {
195 return true;
196 }
197 }
198
199 return false;
200 }
201
202 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
203 {
204 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
205 }
206
207 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
208 {
209 TCGOpcode new_op = op_to_movi(op->opc);
210 tcg_target_ulong mask;
211 struct tcg_temp_info *di = arg_info(dst);
212
213 op->opc = new_op;
214
215 reset_temp(dst);
216 di->is_const = true;
217 di->val = val;
218 mask = val;
219 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
220 /* High bits of the destination are now garbage. */
221 mask |= ~0xffffffffull;
222 }
223 di->mask = mask;
224
225 op->args[0] = dst;
226 op->args[1] = val;
227 }
228
229 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
230 {
231 TCGTemp *dst_ts = arg_temp(dst);
232 TCGTemp *src_ts = arg_temp(src);
233 struct tcg_temp_info *di;
234 struct tcg_temp_info *si;
235 tcg_target_ulong mask;
236 TCGOpcode new_op;
237
238 if (ts_are_copies(dst_ts, src_ts)) {
239 tcg_op_remove(s, op);
240 return;
241 }
242
243 reset_ts(dst_ts);
244 di = ts_info(dst_ts);
245 si = ts_info(src_ts);
246 new_op = op_to_mov(op->opc);
247
248 op->opc = new_op;
249 op->args[0] = dst;
250 op->args[1] = src;
251
252 mask = si->mask;
253 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
254 /* High bits of the destination are now garbage. */
255 mask |= ~0xffffffffull;
256 }
257 di->mask = mask;
258
259 if (src_ts->type == dst_ts->type) {
260 struct tcg_temp_info *ni = ts_info(si->next_copy);
261
262 di->next_copy = si->next_copy;
263 di->prev_copy = src_ts;
264 ni->prev_copy = dst_ts;
265 si->next_copy = dst_ts;
266 di->is_const = si->is_const;
267 di->val = si->val;
268 }
269 }
270
271 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
272 {
273 uint64_t l64, h64;
274
275 switch (op) {
276 CASE_OP_32_64(add):
277 return x + y;
278
279 CASE_OP_32_64(sub):
280 return x - y;
281
282 CASE_OP_32_64(mul):
283 return x * y;
284
285 CASE_OP_32_64(and):
286 return x & y;
287
288 CASE_OP_32_64(or):
289 return x | y;
290
291 CASE_OP_32_64(xor):
292 return x ^ y;
293
294 case INDEX_op_shl_i32:
295 return (uint32_t)x << (y & 31);
296
297 case INDEX_op_shl_i64:
298 return (uint64_t)x << (y & 63);
299
300 case INDEX_op_shr_i32:
301 return (uint32_t)x >> (y & 31);
302
303 case INDEX_op_shr_i64:
304 return (uint64_t)x >> (y & 63);
305
306 case INDEX_op_sar_i32:
307 return (int32_t)x >> (y & 31);
308
309 case INDEX_op_sar_i64:
310 return (int64_t)x >> (y & 63);
311
312 case INDEX_op_rotr_i32:
313 return ror32(x, y & 31);
314
315 case INDEX_op_rotr_i64:
316 return ror64(x, y & 63);
317
318 case INDEX_op_rotl_i32:
319 return rol32(x, y & 31);
320
321 case INDEX_op_rotl_i64:
322 return rol64(x, y & 63);
323
324 CASE_OP_32_64(not):
325 return ~x;
326
327 CASE_OP_32_64(neg):
328 return -x;
329
330 CASE_OP_32_64(andc):
331 return x & ~y;
332
333 CASE_OP_32_64(orc):
334 return x | ~y;
335
336 CASE_OP_32_64(eqv):
337 return ~(x ^ y);
338
339 CASE_OP_32_64(nand):
340 return ~(x & y);
341
342 CASE_OP_32_64(nor):
343 return ~(x | y);
344
345 case INDEX_op_clz_i32:
346 return (uint32_t)x ? clz32(x) : y;
347
348 case INDEX_op_clz_i64:
349 return x ? clz64(x) : y;
350
351 case INDEX_op_ctz_i32:
352 return (uint32_t)x ? ctz32(x) : y;
353
354 case INDEX_op_ctz_i64:
355 return x ? ctz64(x) : y;
356
357 case INDEX_op_ctpop_i32:
358 return ctpop32(x);
359
360 case INDEX_op_ctpop_i64:
361 return ctpop64(x);
362
363 CASE_OP_32_64(ext8s):
364 return (int8_t)x;
365
366 CASE_OP_32_64(ext16s):
367 return (int16_t)x;
368
369 CASE_OP_32_64(ext8u):
370 return (uint8_t)x;
371
372 CASE_OP_32_64(ext16u):
373 return (uint16_t)x;
374
375 case INDEX_op_ext_i32_i64:
376 case INDEX_op_ext32s_i64:
377 return (int32_t)x;
378
379 case INDEX_op_extu_i32_i64:
380 case INDEX_op_extrl_i64_i32:
381 case INDEX_op_ext32u_i64:
382 return (uint32_t)x;
383
384 case INDEX_op_extrh_i64_i32:
385 return (uint64_t)x >> 32;
386
387 case INDEX_op_muluh_i32:
388 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
389 case INDEX_op_mulsh_i32:
390 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
391
392 case INDEX_op_muluh_i64:
393 mulu64(&l64, &h64, x, y);
394 return h64;
395 case INDEX_op_mulsh_i64:
396 muls64(&l64, &h64, x, y);
397 return h64;
398
399 case INDEX_op_div_i32:
400 /* Avoid crashing on divide by zero, otherwise undefined. */
401 return (int32_t)x / ((int32_t)y ? : 1);
402 case INDEX_op_divu_i32:
403 return (uint32_t)x / ((uint32_t)y ? : 1);
404 case INDEX_op_div_i64:
405 return (int64_t)x / ((int64_t)y ? : 1);
406 case INDEX_op_divu_i64:
407 return (uint64_t)x / ((uint64_t)y ? : 1);
408
409 case INDEX_op_rem_i32:
410 return (int32_t)x % ((int32_t)y ? : 1);
411 case INDEX_op_remu_i32:
412 return (uint32_t)x % ((uint32_t)y ? : 1);
413 case INDEX_op_rem_i64:
414 return (int64_t)x % ((int64_t)y ? : 1);
415 case INDEX_op_remu_i64:
416 return (uint64_t)x % ((uint64_t)y ? : 1);
417
418 default:
419 fprintf(stderr,
420 "Unrecognized operation %d in do_constant_folding.\n", op);
421 tcg_abort();
422 }
423 }
424
425 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
426 {
427 TCGArg res = do_constant_folding_2(op, x, y);
428 if (op_bits(op) == 32) {
429 res = (int32_t)res;
430 }
431 return res;
432 }
433
434 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
435 {
436 switch (c) {
437 case TCG_COND_EQ:
438 return x == y;
439 case TCG_COND_NE:
440 return x != y;
441 case TCG_COND_LT:
442 return (int32_t)x < (int32_t)y;
443 case TCG_COND_GE:
444 return (int32_t)x >= (int32_t)y;
445 case TCG_COND_LE:
446 return (int32_t)x <= (int32_t)y;
447 case TCG_COND_GT:
448 return (int32_t)x > (int32_t)y;
449 case TCG_COND_LTU:
450 return x < y;
451 case TCG_COND_GEU:
452 return x >= y;
453 case TCG_COND_LEU:
454 return x <= y;
455 case TCG_COND_GTU:
456 return x > y;
457 default:
458 tcg_abort();
459 }
460 }
461
462 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
463 {
464 switch (c) {
465 case TCG_COND_EQ:
466 return x == y;
467 case TCG_COND_NE:
468 return x != y;
469 case TCG_COND_LT:
470 return (int64_t)x < (int64_t)y;
471 case TCG_COND_GE:
472 return (int64_t)x >= (int64_t)y;
473 case TCG_COND_LE:
474 return (int64_t)x <= (int64_t)y;
475 case TCG_COND_GT:
476 return (int64_t)x > (int64_t)y;
477 case TCG_COND_LTU:
478 return x < y;
479 case TCG_COND_GEU:
480 return x >= y;
481 case TCG_COND_LEU:
482 return x <= y;
483 case TCG_COND_GTU:
484 return x > y;
485 default:
486 tcg_abort();
487 }
488 }
489
490 static bool do_constant_folding_cond_eq(TCGCond c)
491 {
492 switch (c) {
493 case TCG_COND_GT:
494 case TCG_COND_LTU:
495 case TCG_COND_LT:
496 case TCG_COND_GTU:
497 case TCG_COND_NE:
498 return 0;
499 case TCG_COND_GE:
500 case TCG_COND_GEU:
501 case TCG_COND_LE:
502 case TCG_COND_LEU:
503 case TCG_COND_EQ:
504 return 1;
505 default:
506 tcg_abort();
507 }
508 }
509
510 /* Return 2 if the condition can't be simplified, and the result
511 of the condition (0 or 1) if it can */
512 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
513 TCGArg y, TCGCond c)
514 {
515 tcg_target_ulong xv = arg_info(x)->val;
516 tcg_target_ulong yv = arg_info(y)->val;
517 if (arg_is_const(x) && arg_is_const(y)) {
518 switch (op_bits(op)) {
519 case 32:
520 return do_constant_folding_cond_32(xv, yv, c);
521 case 64:
522 return do_constant_folding_cond_64(xv, yv, c);
523 default:
524 tcg_abort();
525 }
526 } else if (args_are_copies(x, y)) {
527 return do_constant_folding_cond_eq(c);
528 } else if (arg_is_const(y) && yv == 0) {
529 switch (c) {
530 case TCG_COND_LTU:
531 return 0;
532 case TCG_COND_GEU:
533 return 1;
534 default:
535 return 2;
536 }
537 }
538 return 2;
539 }
540
541 /* Return 2 if the condition can't be simplified, and the result
542 of the condition (0 or 1) if it can */
543 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
544 {
545 TCGArg al = p1[0], ah = p1[1];
546 TCGArg bl = p2[0], bh = p2[1];
547
548 if (arg_is_const(bl) && arg_is_const(bh)) {
549 tcg_target_ulong blv = arg_info(bl)->val;
550 tcg_target_ulong bhv = arg_info(bh)->val;
551 uint64_t b = deposit64(blv, 32, 32, bhv);
552
553 if (arg_is_const(al) && arg_is_const(ah)) {
554 tcg_target_ulong alv = arg_info(al)->val;
555 tcg_target_ulong ahv = arg_info(ah)->val;
556 uint64_t a = deposit64(alv, 32, 32, ahv);
557 return do_constant_folding_cond_64(a, b, c);
558 }
559 if (b == 0) {
560 switch (c) {
561 case TCG_COND_LTU:
562 return 0;
563 case TCG_COND_GEU:
564 return 1;
565 default:
566 break;
567 }
568 }
569 }
570 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
571 return do_constant_folding_cond_eq(c);
572 }
573 return 2;
574 }
575
576 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
577 {
578 TCGArg a1 = *p1, a2 = *p2;
579 int sum = 0;
580 sum += arg_is_const(a1);
581 sum -= arg_is_const(a2);
582
583 /* Prefer the constant in second argument, and then the form
584 op a, a, b, which is better handled on non-RISC hosts. */
585 if (sum > 0 || (sum == 0 && dest == a2)) {
586 *p1 = a2;
587 *p2 = a1;
588 return true;
589 }
590 return false;
591 }
592
593 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
594 {
595 int sum = 0;
596 sum += arg_is_const(p1[0]);
597 sum += arg_is_const(p1[1]);
598 sum -= arg_is_const(p2[0]);
599 sum -= arg_is_const(p2[1]);
600 if (sum > 0) {
601 TCGArg t;
602 t = p1[0], p1[0] = p2[0], p2[0] = t;
603 t = p1[1], p1[1] = p2[1], p2[1] = t;
604 return true;
605 }
606 return false;
607 }
608
609 /* Propagate constants and copies, fold constant expressions. */
610 void tcg_optimize(TCGContext *s)
611 {
612 int oi, oi_next, nb_temps, nb_globals;
613 TCGOp *prev_mb = NULL;
614
615 /* Array VALS has an element for each temp.
616 If this temp holds a constant then its value is kept in VALS' element.
617 If this temp is a copy of other ones then the other copies are
618 available through the doubly linked circular list. */
619
620 nb_temps = s->nb_temps;
621 nb_globals = s->nb_globals;
622 reset_all_temps(nb_temps);
623
624 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
625 tcg_target_ulong mask, partmask, affected;
626 int nb_oargs, nb_iargs, i;
627 TCGArg tmp;
628
629 TCGOp * const op = &s->gen_op_buf[oi];
630 TCGOpcode opc = op->opc;
631 const TCGOpDef *def = &tcg_op_defs[opc];
632
633 oi_next = op->next;
634
635 /* Count the arguments, and initialize the temps that are
636 going to be used */
637 if (opc == INDEX_op_call) {
638 nb_oargs = op->callo;
639 nb_iargs = op->calli;
640 for (i = 0; i < nb_oargs + nb_iargs; i++) {
641 TCGTemp *ts = arg_temp(op->args[i]);
642 if (ts) {
643 init_ts_info(ts);
644 }
645 }
646 } else {
647 nb_oargs = def->nb_oargs;
648 nb_iargs = def->nb_iargs;
649 for (i = 0; i < nb_oargs + nb_iargs; i++) {
650 init_arg_info(op->args[i]);
651 }
652 }
653
654 /* Do copy propagation */
655 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
656 TCGTemp *ts = arg_temp(op->args[i]);
657 if (ts && ts_is_copy(ts)) {
658 op->args[i] = temp_arg(find_better_copy(s, ts));
659 }
660 }
661
662 /* For commutative operations make constant second argument */
663 switch (opc) {
664 CASE_OP_32_64(add):
665 CASE_OP_32_64(mul):
666 CASE_OP_32_64(and):
667 CASE_OP_32_64(or):
668 CASE_OP_32_64(xor):
669 CASE_OP_32_64(eqv):
670 CASE_OP_32_64(nand):
671 CASE_OP_32_64(nor):
672 CASE_OP_32_64(muluh):
673 CASE_OP_32_64(mulsh):
674 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
675 break;
676 CASE_OP_32_64(brcond):
677 if (swap_commutative(-1, &op->args[0], &op->args[1])) {
678 op->args[2] = tcg_swap_cond(op->args[2]);
679 }
680 break;
681 CASE_OP_32_64(setcond):
682 if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
683 op->args[3] = tcg_swap_cond(op->args[3]);
684 }
685 break;
686 CASE_OP_32_64(movcond):
687 if (swap_commutative(-1, &op->args[1], &op->args[2])) {
688 op->args[5] = tcg_swap_cond(op->args[5]);
689 }
690 /* For movcond, we canonicalize the "false" input reg to match
691 the destination reg so that the tcg backend can implement
692 a "move if true" operation. */
693 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
694 op->args[5] = tcg_invert_cond(op->args[5]);
695 }
696 break;
697 CASE_OP_32_64(add2):
698 swap_commutative(op->args[0], &op->args[2], &op->args[4]);
699 swap_commutative(op->args[1], &op->args[3], &op->args[5]);
700 break;
701 CASE_OP_32_64(mulu2):
702 CASE_OP_32_64(muls2):
703 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
704 break;
705 case INDEX_op_brcond2_i32:
706 if (swap_commutative2(&op->args[0], &op->args[2])) {
707 op->args[4] = tcg_swap_cond(op->args[4]);
708 }
709 break;
710 case INDEX_op_setcond2_i32:
711 if (swap_commutative2(&op->args[1], &op->args[3])) {
712 op->args[5] = tcg_swap_cond(op->args[5]);
713 }
714 break;
715 default:
716 break;
717 }
718
719 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
720 and "sub r, 0, a => neg r, a" case. */
721 switch (opc) {
722 CASE_OP_32_64(shl):
723 CASE_OP_32_64(shr):
724 CASE_OP_32_64(sar):
725 CASE_OP_32_64(rotl):
726 CASE_OP_32_64(rotr):
727 if (arg_is_const(op->args[1])
728 && arg_info(op->args[1])->val == 0) {
729 tcg_opt_gen_movi(s, op, op->args[0], 0);
730 continue;
731 }
732 break;
733 CASE_OP_32_64(sub):
734 {
735 TCGOpcode neg_op;
736 bool have_neg;
737
738 if (arg_is_const(op->args[2])) {
739 /* Proceed with possible constant folding. */
740 break;
741 }
742 if (opc == INDEX_op_sub_i32) {
743 neg_op = INDEX_op_neg_i32;
744 have_neg = TCG_TARGET_HAS_neg_i32;
745 } else {
746 neg_op = INDEX_op_neg_i64;
747 have_neg = TCG_TARGET_HAS_neg_i64;
748 }
749 if (!have_neg) {
750 break;
751 }
752 if (arg_is_const(op->args[1])
753 && arg_info(op->args[1])->val == 0) {
754 op->opc = neg_op;
755 reset_temp(op->args[0]);
756 op->args[1] = op->args[2];
757 continue;
758 }
759 }
760 break;
761 CASE_OP_32_64(xor):
762 CASE_OP_32_64(nand):
763 if (!arg_is_const(op->args[1])
764 && arg_is_const(op->args[2])
765 && arg_info(op->args[2])->val == -1) {
766 i = 1;
767 goto try_not;
768 }
769 break;
770 CASE_OP_32_64(nor):
771 if (!arg_is_const(op->args[1])
772 && arg_is_const(op->args[2])
773 && arg_info(op->args[2])->val == 0) {
774 i = 1;
775 goto try_not;
776 }
777 break;
778 CASE_OP_32_64(andc):
779 if (!arg_is_const(op->args[2])
780 && arg_is_const(op->args[1])
781 && arg_info(op->args[1])->val == -1) {
782 i = 2;
783 goto try_not;
784 }
785 break;
786 CASE_OP_32_64(orc):
787 CASE_OP_32_64(eqv):
788 if (!arg_is_const(op->args[2])
789 && arg_is_const(op->args[1])
790 && arg_info(op->args[1])->val == 0) {
791 i = 2;
792 goto try_not;
793 }
794 break;
795 try_not:
796 {
797 TCGOpcode not_op;
798 bool have_not;
799
800 if (def->flags & TCG_OPF_64BIT) {
801 not_op = INDEX_op_not_i64;
802 have_not = TCG_TARGET_HAS_not_i64;
803 } else {
804 not_op = INDEX_op_not_i32;
805 have_not = TCG_TARGET_HAS_not_i32;
806 }
807 if (!have_not) {
808 break;
809 }
810 op->opc = not_op;
811 reset_temp(op->args[0]);
812 op->args[1] = op->args[i];
813 continue;
814 }
815 default:
816 break;
817 }
818
819 /* Simplify expression for "op r, a, const => mov r, a" cases */
820 switch (opc) {
821 CASE_OP_32_64(add):
822 CASE_OP_32_64(sub):
823 CASE_OP_32_64(shl):
824 CASE_OP_32_64(shr):
825 CASE_OP_32_64(sar):
826 CASE_OP_32_64(rotl):
827 CASE_OP_32_64(rotr):
828 CASE_OP_32_64(or):
829 CASE_OP_32_64(xor):
830 CASE_OP_32_64(andc):
831 if (!arg_is_const(op->args[1])
832 && arg_is_const(op->args[2])
833 && arg_info(op->args[2])->val == 0) {
834 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
835 continue;
836 }
837 break;
838 CASE_OP_32_64(and):
839 CASE_OP_32_64(orc):
840 CASE_OP_32_64(eqv):
841 if (!arg_is_const(op->args[1])
842 && arg_is_const(op->args[2])
843 && arg_info(op->args[2])->val == -1) {
844 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
845 continue;
846 }
847 break;
848 default:
849 break;
850 }
851
852 /* Simplify using known-zero bits. Currently only ops with a single
853 output argument is supported. */
854 mask = -1;
855 affected = -1;
856 switch (opc) {
857 CASE_OP_32_64(ext8s):
858 if ((arg_info(op->args[1])->mask & 0x80) != 0) {
859 break;
860 }
861 CASE_OP_32_64(ext8u):
862 mask = 0xff;
863 goto and_const;
864 CASE_OP_32_64(ext16s):
865 if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
866 break;
867 }
868 CASE_OP_32_64(ext16u):
869 mask = 0xffff;
870 goto and_const;
871 case INDEX_op_ext32s_i64:
872 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
873 break;
874 }
875 case INDEX_op_ext32u_i64:
876 mask = 0xffffffffU;
877 goto and_const;
878
879 CASE_OP_32_64(and):
880 mask = arg_info(op->args[2])->mask;
881 if (arg_is_const(op->args[2])) {
882 and_const:
883 affected = arg_info(op->args[1])->mask & ~mask;
884 }
885 mask = arg_info(op->args[1])->mask & mask;
886 break;
887
888 case INDEX_op_ext_i32_i64:
889 if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
890 break;
891 }
892 case INDEX_op_extu_i32_i64:
893 /* We do not compute affected as it is a size changing op. */
894 mask = (uint32_t)arg_info(op->args[1])->mask;
895 break;
896
897 CASE_OP_32_64(andc):
898 /* Known-zeros does not imply known-ones. Therefore unless
899 op->args[2] is constant, we can't infer anything from it. */
900 if (arg_is_const(op->args[2])) {
901 mask = ~arg_info(op->args[2])->mask;
902 goto and_const;
903 }
904 /* But we certainly know nothing outside args[1] may be set. */
905 mask = arg_info(op->args[1])->mask;
906 break;
907
908 case INDEX_op_sar_i32:
909 if (arg_is_const(op->args[2])) {
910 tmp = arg_info(op->args[2])->val & 31;
911 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
912 }
913 break;
914 case INDEX_op_sar_i64:
915 if (arg_is_const(op->args[2])) {
916 tmp = arg_info(op->args[2])->val & 63;
917 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
918 }
919 break;
920
921 case INDEX_op_shr_i32:
922 if (arg_is_const(op->args[2])) {
923 tmp = arg_info(op->args[2])->val & 31;
924 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
925 }
926 break;
927 case INDEX_op_shr_i64:
928 if (arg_is_const(op->args[2])) {
929 tmp = arg_info(op->args[2])->val & 63;
930 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
931 }
932 break;
933
934 case INDEX_op_extrl_i64_i32:
935 mask = (uint32_t)arg_info(op->args[1])->mask;
936 break;
937 case INDEX_op_extrh_i64_i32:
938 mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
939 break;
940
941 CASE_OP_32_64(shl):
942 if (arg_is_const(op->args[2])) {
943 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
944 mask = arg_info(op->args[1])->mask << tmp;
945 }
946 break;
947
948 CASE_OP_32_64(neg):
949 /* Set to 1 all bits to the left of the rightmost. */
950 mask = -(arg_info(op->args[1])->mask
951 & -arg_info(op->args[1])->mask);
952 break;
953
954 CASE_OP_32_64(deposit):
955 mask = deposit64(arg_info(op->args[1])->mask,
956 op->args[3], op->args[4],
957 arg_info(op->args[2])->mask);
958 break;
959
960 CASE_OP_32_64(extract):
961 mask = extract64(arg_info(op->args[1])->mask,
962 op->args[2], op->args[3]);
963 if (op->args[2] == 0) {
964 affected = arg_info(op->args[1])->mask & ~mask;
965 }
966 break;
967 CASE_OP_32_64(sextract):
968 mask = sextract64(arg_info(op->args[1])->mask,
969 op->args[2], op->args[3]);
970 if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
971 affected = arg_info(op->args[1])->mask & ~mask;
972 }
973 break;
974
975 CASE_OP_32_64(or):
976 CASE_OP_32_64(xor):
977 mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
978 break;
979
980 case INDEX_op_clz_i32:
981 case INDEX_op_ctz_i32:
982 mask = arg_info(op->args[2])->mask | 31;
983 break;
984
985 case INDEX_op_clz_i64:
986 case INDEX_op_ctz_i64:
987 mask = arg_info(op->args[2])->mask | 63;
988 break;
989
990 case INDEX_op_ctpop_i32:
991 mask = 32 | 31;
992 break;
993 case INDEX_op_ctpop_i64:
994 mask = 64 | 63;
995 break;
996
997 CASE_OP_32_64(setcond):
998 case INDEX_op_setcond2_i32:
999 mask = 1;
1000 break;
1001
1002 CASE_OP_32_64(movcond):
1003 mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1004 break;
1005
1006 CASE_OP_32_64(ld8u):
1007 mask = 0xff;
1008 break;
1009 CASE_OP_32_64(ld16u):
1010 mask = 0xffff;
1011 break;
1012 case INDEX_op_ld32u_i64:
1013 mask = 0xffffffffu;
1014 break;
1015
1016 CASE_OP_32_64(qemu_ld):
1017 {
1018 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1019 TCGMemOp mop = get_memop(oi);
1020 if (!(mop & MO_SIGN)) {
1021 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1022 }
1023 }
1024 break;
1025
1026 default:
1027 break;
1028 }
1029
1030 /* 32-bit ops generate 32-bit results. For the result is zero test
1031 below, we can ignore high bits, but for further optimizations we
1032 need to record that the high bits contain garbage. */
1033 partmask = mask;
1034 if (!(def->flags & TCG_OPF_64BIT)) {
1035 mask |= ~(tcg_target_ulong)0xffffffffu;
1036 partmask &= 0xffffffffu;
1037 affected &= 0xffffffffu;
1038 }
1039
1040 if (partmask == 0) {
1041 tcg_debug_assert(nb_oargs == 1);
1042 tcg_opt_gen_movi(s, op, op->args[0], 0);
1043 continue;
1044 }
1045 if (affected == 0) {
1046 tcg_debug_assert(nb_oargs == 1);
1047 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1048 continue;
1049 }
1050
1051 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1052 switch (opc) {
1053 CASE_OP_32_64(and):
1054 CASE_OP_32_64(mul):
1055 CASE_OP_32_64(muluh):
1056 CASE_OP_32_64(mulsh):
1057 if (arg_is_const(op->args[2])
1058 && arg_info(op->args[2])->val == 0) {
1059 tcg_opt_gen_movi(s, op, op->args[0], 0);
1060 continue;
1061 }
1062 break;
1063 default:
1064 break;
1065 }
1066
1067 /* Simplify expression for "op r, a, a => mov r, a" cases */
1068 switch (opc) {
1069 CASE_OP_32_64(or):
1070 CASE_OP_32_64(and):
1071 if (args_are_copies(op->args[1], op->args[2])) {
1072 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1073 continue;
1074 }
1075 break;
1076 default:
1077 break;
1078 }
1079
1080 /* Simplify expression for "op r, a, a => movi r, 0" cases */
1081 switch (opc) {
1082 CASE_OP_32_64(andc):
1083 CASE_OP_32_64(sub):
1084 CASE_OP_32_64(xor):
1085 if (args_are_copies(op->args[1], op->args[2])) {
1086 tcg_opt_gen_movi(s, op, op->args[0], 0);
1087 continue;
1088 }
1089 break;
1090 default:
1091 break;
1092 }
1093
1094 /* Propagate constants through copy operations and do constant
1095 folding. Constants will be substituted to arguments by register
1096 allocator where needed and possible. Also detect copies. */
1097 switch (opc) {
1098 CASE_OP_32_64(mov):
1099 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1100 break;
1101 CASE_OP_32_64(movi):
1102 tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1103 break;
1104
1105 CASE_OP_32_64(not):
1106 CASE_OP_32_64(neg):
1107 CASE_OP_32_64(ext8s):
1108 CASE_OP_32_64(ext8u):
1109 CASE_OP_32_64(ext16s):
1110 CASE_OP_32_64(ext16u):
1111 CASE_OP_32_64(ctpop):
1112 case INDEX_op_ext32s_i64:
1113 case INDEX_op_ext32u_i64:
1114 case INDEX_op_ext_i32_i64:
1115 case INDEX_op_extu_i32_i64:
1116 case INDEX_op_extrl_i64_i32:
1117 case INDEX_op_extrh_i64_i32:
1118 if (arg_is_const(op->args[1])) {
1119 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1120 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1121 break;
1122 }
1123 goto do_default;
1124
1125 CASE_OP_32_64(add):
1126 CASE_OP_32_64(sub):
1127 CASE_OP_32_64(mul):
1128 CASE_OP_32_64(or):
1129 CASE_OP_32_64(and):
1130 CASE_OP_32_64(xor):
1131 CASE_OP_32_64(shl):
1132 CASE_OP_32_64(shr):
1133 CASE_OP_32_64(sar):
1134 CASE_OP_32_64(rotl):
1135 CASE_OP_32_64(rotr):
1136 CASE_OP_32_64(andc):
1137 CASE_OP_32_64(orc):
1138 CASE_OP_32_64(eqv):
1139 CASE_OP_32_64(nand):
1140 CASE_OP_32_64(nor):
1141 CASE_OP_32_64(muluh):
1142 CASE_OP_32_64(mulsh):
1143 CASE_OP_32_64(div):
1144 CASE_OP_32_64(divu):
1145 CASE_OP_32_64(rem):
1146 CASE_OP_32_64(remu):
1147 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1148 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1149 arg_info(op->args[2])->val);
1150 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1151 break;
1152 }
1153 goto do_default;
1154
1155 CASE_OP_32_64(clz):
1156 CASE_OP_32_64(ctz):
1157 if (arg_is_const(op->args[1])) {
1158 TCGArg v = arg_info(op->args[1])->val;
1159 if (v != 0) {
1160 tmp = do_constant_folding(opc, v, 0);
1161 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1162 } else {
1163 tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1164 }
1165 break;
1166 }
1167 goto do_default;
1168
1169 CASE_OP_32_64(deposit):
1170 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1171 tmp = deposit64(arg_info(op->args[1])->val,
1172 op->args[3], op->args[4],
1173 arg_info(op->args[2])->val);
1174 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1175 break;
1176 }
1177 goto do_default;
1178
1179 CASE_OP_32_64(extract):
1180 if (arg_is_const(op->args[1])) {
1181 tmp = extract64(arg_info(op->args[1])->val,
1182 op->args[2], op->args[3]);
1183 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1184 break;
1185 }
1186 goto do_default;
1187
1188 CASE_OP_32_64(sextract):
1189 if (arg_is_const(op->args[1])) {
1190 tmp = sextract64(arg_info(op->args[1])->val,
1191 op->args[2], op->args[3]);
1192 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1193 break;
1194 }
1195 goto do_default;
1196
1197 CASE_OP_32_64(setcond):
1198 tmp = do_constant_folding_cond(opc, op->args[1],
1199 op->args[2], op->args[3]);
1200 if (tmp != 2) {
1201 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1202 break;
1203 }
1204 goto do_default;
1205
1206 CASE_OP_32_64(brcond):
1207 tmp = do_constant_folding_cond(opc, op->args[0],
1208 op->args[1], op->args[2]);
1209 if (tmp != 2) {
1210 if (tmp) {
1211 reset_all_temps(nb_temps);
1212 op->opc = INDEX_op_br;
1213 op->args[0] = op->args[3];
1214 } else {
1215 tcg_op_remove(s, op);
1216 }
1217 break;
1218 }
1219 goto do_default;
1220
1221 CASE_OP_32_64(movcond):
1222 tmp = do_constant_folding_cond(opc, op->args[1],
1223 op->args[2], op->args[5]);
1224 if (tmp != 2) {
1225 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1226 break;
1227 }
1228 if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1229 tcg_target_ulong tv = arg_info(op->args[3])->val;
1230 tcg_target_ulong fv = arg_info(op->args[4])->val;
1231 TCGCond cond = op->args[5];
1232 if (fv == 1 && tv == 0) {
1233 cond = tcg_invert_cond(cond);
1234 } else if (!(tv == 1 && fv == 0)) {
1235 goto do_default;
1236 }
1237 op->args[3] = cond;
1238 op->opc = opc = (opc == INDEX_op_movcond_i32
1239 ? INDEX_op_setcond_i32
1240 : INDEX_op_setcond_i64);
1241 nb_iargs = 2;
1242 }
1243 goto do_default;
1244
1245 case INDEX_op_add2_i32:
1246 case INDEX_op_sub2_i32:
1247 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1248 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1249 uint32_t al = arg_info(op->args[2])->val;
1250 uint32_t ah = arg_info(op->args[3])->val;
1251 uint32_t bl = arg_info(op->args[4])->val;
1252 uint32_t bh = arg_info(op->args[5])->val;
1253 uint64_t a = ((uint64_t)ah << 32) | al;
1254 uint64_t b = ((uint64_t)bh << 32) | bl;
1255 TCGArg rl, rh;
1256 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1257
1258 if (opc == INDEX_op_add2_i32) {
1259 a += b;
1260 } else {
1261 a -= b;
1262 }
1263
1264 rl = op->args[0];
1265 rh = op->args[1];
1266 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1267 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1268
1269 /* We've done all we need to do with the movi. Skip it. */
1270 oi_next = op2->next;
1271 break;
1272 }
1273 goto do_default;
1274
1275 case INDEX_op_mulu2_i32:
1276 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1277 uint32_t a = arg_info(op->args[2])->val;
1278 uint32_t b = arg_info(op->args[3])->val;
1279 uint64_t r = (uint64_t)a * b;
1280 TCGArg rl, rh;
1281 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
1282
1283 rl = op->args[0];
1284 rh = op->args[1];
1285 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1286 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1287
1288 /* We've done all we need to do with the movi. Skip it. */
1289 oi_next = op2->next;
1290 break;
1291 }
1292 goto do_default;
1293
1294 case INDEX_op_brcond2_i32:
1295 tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1296 op->args[4]);
1297 if (tmp != 2) {
1298 if (tmp) {
1299 do_brcond_true:
1300 reset_all_temps(nb_temps);
1301 op->opc = INDEX_op_br;
1302 op->args[0] = op->args[5];
1303 } else {
1304 do_brcond_false:
1305 tcg_op_remove(s, op);
1306 }
1307 } else if ((op->args[4] == TCG_COND_LT
1308 || op->args[4] == TCG_COND_GE)
1309 && arg_is_const(op->args[2])
1310 && arg_info(op->args[2])->val == 0
1311 && arg_is_const(op->args[3])
1312 && arg_info(op->args[3])->val == 0) {
1313 /* Simplify LT/GE comparisons vs zero to a single compare
1314 vs the high word of the input. */
1315 do_brcond_high:
1316 reset_all_temps(nb_temps);
1317 op->opc = INDEX_op_brcond_i32;
1318 op->args[0] = op->args[1];
1319 op->args[1] = op->args[3];
1320 op->args[2] = op->args[4];
1321 op->args[3] = op->args[5];
1322 } else if (op->args[4] == TCG_COND_EQ) {
1323 /* Simplify EQ comparisons where one of the pairs
1324 can be simplified. */
1325 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1326 op->args[0], op->args[2],
1327 TCG_COND_EQ);
1328 if (tmp == 0) {
1329 goto do_brcond_false;
1330 } else if (tmp == 1) {
1331 goto do_brcond_high;
1332 }
1333 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1334 op->args[1], op->args[3],
1335 TCG_COND_EQ);
1336 if (tmp == 0) {
1337 goto do_brcond_false;
1338 } else if (tmp != 1) {
1339 goto do_default;
1340 }
1341 do_brcond_low:
1342 reset_all_temps(nb_temps);
1343 op->opc = INDEX_op_brcond_i32;
1344 op->args[1] = op->args[2];
1345 op->args[2] = op->args[4];
1346 op->args[3] = op->args[5];
1347 } else if (op->args[4] == TCG_COND_NE) {
1348 /* Simplify NE comparisons where one of the pairs
1349 can be simplified. */
1350 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1351 op->args[0], op->args[2],
1352 TCG_COND_NE);
1353 if (tmp == 0) {
1354 goto do_brcond_high;
1355 } else if (tmp == 1) {
1356 goto do_brcond_true;
1357 }
1358 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1359 op->args[1], op->args[3],
1360 TCG_COND_NE);
1361 if (tmp == 0) {
1362 goto do_brcond_low;
1363 } else if (tmp == 1) {
1364 goto do_brcond_true;
1365 }
1366 goto do_default;
1367 } else {
1368 goto do_default;
1369 }
1370 break;
1371
1372 case INDEX_op_setcond2_i32:
1373 tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1374 op->args[5]);
1375 if (tmp != 2) {
1376 do_setcond_const:
1377 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1378 } else if ((op->args[5] == TCG_COND_LT
1379 || op->args[5] == TCG_COND_GE)
1380 && arg_is_const(op->args[3])
1381 && arg_info(op->args[3])->val == 0
1382 && arg_is_const(op->args[4])
1383 && arg_info(op->args[4])->val == 0) {
1384 /* Simplify LT/GE comparisons vs zero to a single compare
1385 vs the high word of the input. */
1386 do_setcond_high:
1387 reset_temp(op->args[0]);
1388 arg_info(op->args[0])->mask = 1;
1389 op->opc = INDEX_op_setcond_i32;
1390 op->args[1] = op->args[2];
1391 op->args[2] = op->args[4];
1392 op->args[3] = op->args[5];
1393 } else if (op->args[5] == TCG_COND_EQ) {
1394 /* Simplify EQ comparisons where one of the pairs
1395 can be simplified. */
1396 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1397 op->args[1], op->args[3],
1398 TCG_COND_EQ);
1399 if (tmp == 0) {
1400 goto do_setcond_const;
1401 } else if (tmp == 1) {
1402 goto do_setcond_high;
1403 }
1404 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1405 op->args[2], op->args[4],
1406 TCG_COND_EQ);
1407 if (tmp == 0) {
1408 goto do_setcond_high;
1409 } else if (tmp != 1) {
1410 goto do_default;
1411 }
1412 do_setcond_low:
1413 reset_temp(op->args[0]);
1414 arg_info(op->args[0])->mask = 1;
1415 op->opc = INDEX_op_setcond_i32;
1416 op->args[2] = op->args[3];
1417 op->args[3] = op->args[5];
1418 } else if (op->args[5] == TCG_COND_NE) {
1419 /* Simplify NE comparisons where one of the pairs
1420 can be simplified. */
1421 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1422 op->args[1], op->args[3],
1423 TCG_COND_NE);
1424 if (tmp == 0) {
1425 goto do_setcond_high;
1426 } else if (tmp == 1) {
1427 goto do_setcond_const;
1428 }
1429 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1430 op->args[2], op->args[4],
1431 TCG_COND_NE);
1432 if (tmp == 0) {
1433 goto do_setcond_low;
1434 } else if (tmp == 1) {
1435 goto do_setcond_const;
1436 }
1437 goto do_default;
1438 } else {
1439 goto do_default;
1440 }
1441 break;
1442
1443 case INDEX_op_call:
1444 if (!(op->args[nb_oargs + nb_iargs + 1]
1445 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1446 for (i = 0; i < nb_globals; i++) {
1447 if (test_bit(i, temps_used.l)) {
1448 reset_ts(&s->temps[i]);
1449 }
1450 }
1451 }
1452 goto do_reset_output;
1453
1454 default:
1455 do_default:
1456 /* Default case: we know nothing about operation (or were unable
1457 to compute the operation result) so no propagation is done.
1458 We trash everything if the operation is the end of a basic
1459 block, otherwise we only trash the output args. "mask" is
1460 the non-zero bits mask for the first output arg. */
1461 if (def->flags & TCG_OPF_BB_END) {
1462 reset_all_temps(nb_temps);
1463 } else {
1464 do_reset_output:
1465 for (i = 0; i < nb_oargs; i++) {
1466 reset_temp(op->args[i]);
1467 /* Save the corresponding known-zero bits mask for the
1468 first output argument (only one supported so far). */
1469 if (i == 0) {
1470 arg_info(op->args[i])->mask = mask;
1471 }
1472 }
1473 }
1474 break;
1475 }
1476
1477 /* Eliminate duplicate and redundant fence instructions. */
1478 if (prev_mb) {
1479 switch (opc) {
1480 case INDEX_op_mb:
1481 /* Merge two barriers of the same type into one,
1482 * or a weaker barrier into a stronger one,
1483 * or two weaker barriers into a stronger one.
1484 * mb X; mb Y => mb X|Y
1485 * mb; strl => mb; st
1486 * ldaq; mb => ld; mb
1487 * ldaq; strl => ld; mb; st
1488 * Other combinations are also merged into a strong
1489 * barrier. This is stricter than specified but for
1490 * the purposes of TCG is better than not optimizing.
1491 */
1492 prev_mb->args[0] |= op->args[0];
1493 tcg_op_remove(s, op);
1494 break;
1495
1496 default:
1497 /* Opcodes that end the block stop the optimization. */
1498 if ((def->flags & TCG_OPF_BB_END) == 0) {
1499 break;
1500 }
1501 /* fallthru */
1502 case INDEX_op_qemu_ld_i32:
1503 case INDEX_op_qemu_ld_i64:
1504 case INDEX_op_qemu_st_i32:
1505 case INDEX_op_qemu_st_i64:
1506 case INDEX_op_call:
1507 /* Opcodes that touch guest memory stop the optimization. */
1508 prev_mb = NULL;
1509 break;
1510 }
1511 } else if (opc == INDEX_op_mb) {
1512 prev_mb = op;
1513 }
1514 }
1515 }