]> git.proxmox.com Git - qemu.git/blob - tcg/optimize.c
optimize: only write to state when clearing optimizer data
[qemu.git] / tcg / optimize.c
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "config.h"
27
28 #include <stdlib.h>
29 #include <stdio.h>
30
31 #include "qemu-common.h"
32 #include "tcg-op.h"
33
34 #define CASE_OP_32_64(x) \
35 glue(glue(case INDEX_op_, x), _i32): \
36 glue(glue(case INDEX_op_, x), _i64)
37
38 typedef enum {
39 TCG_TEMP_UNDEF = 0,
40 TCG_TEMP_CONST,
41 TCG_TEMP_COPY,
42 } tcg_temp_state;
43
44 struct tcg_temp_info {
45 tcg_temp_state state;
46 uint16_t prev_copy;
47 uint16_t next_copy;
48 tcg_target_ulong val;
49 };
50
51 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
52
53 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
54 the copy flag from the left temp. */
55 static void reset_temp(TCGArg temp)
56 {
57 if (temps[temp].state == TCG_TEMP_COPY) {
58 if (temps[temp].prev_copy == temps[temp].next_copy) {
59 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
60 } else {
61 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
62 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
63 }
64 }
65 temps[temp].state = TCG_TEMP_UNDEF;
66 }
67
68 /* Reset all temporaries, given that there are NB_TEMPS of them. */
69 static void reset_all_temps(int nb_temps)
70 {
71 int i;
72 for (i = 0; i < nb_temps; i++) {
73 temps[i].state = TCG_TEMP_UNDEF;
74 }
75 }
76
77 static int op_bits(TCGOpcode op)
78 {
79 const TCGOpDef *def = &tcg_op_defs[op];
80 return def->flags & TCG_OPF_64BIT ? 64 : 32;
81 }
82
83 static TCGOpcode op_to_movi(TCGOpcode op)
84 {
85 switch (op_bits(op)) {
86 case 32:
87 return INDEX_op_movi_i32;
88 case 64:
89 return INDEX_op_movi_i64;
90 default:
91 fprintf(stderr, "op_to_movi: unexpected return value of "
92 "function op_bits.\n");
93 tcg_abort();
94 }
95 }
96
97 static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
98 {
99 TCGArg i;
100
101 /* If this is already a global, we can't do better. */
102 if (temp < s->nb_globals) {
103 return temp;
104 }
105
106 /* Search for a global first. */
107 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
108 if (i < s->nb_globals) {
109 return i;
110 }
111 }
112
113 /* If it is a temp, search for a temp local. */
114 if (!s->temps[temp].temp_local) {
115 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
116 if (s->temps[i].temp_local) {
117 return i;
118 }
119 }
120 }
121
122 /* Failure to find a better representation, return the same temp. */
123 return temp;
124 }
125
126 static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
127 {
128 TCGArg i;
129
130 if (arg1 == arg2) {
131 return true;
132 }
133
134 if (temps[arg1].state != TCG_TEMP_COPY
135 || temps[arg2].state != TCG_TEMP_COPY) {
136 return false;
137 }
138
139 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
140 if (i == arg2) {
141 return true;
142 }
143 }
144
145 return false;
146 }
147
148 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
149 TCGArg dst, TCGArg src)
150 {
151 reset_temp(dst);
152 assert(temps[src].state != TCG_TEMP_CONST);
153
154 if (s->temps[src].type == s->temps[dst].type) {
155 if (temps[src].state != TCG_TEMP_COPY) {
156 temps[src].state = TCG_TEMP_COPY;
157 temps[src].next_copy = src;
158 temps[src].prev_copy = src;
159 }
160 temps[dst].state = TCG_TEMP_COPY;
161 temps[dst].next_copy = temps[src].next_copy;
162 temps[dst].prev_copy = src;
163 temps[temps[dst].next_copy].prev_copy = dst;
164 temps[src].next_copy = dst;
165 }
166
167 gen_args[0] = dst;
168 gen_args[1] = src;
169 }
170
171 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
172 {
173 reset_temp(dst);
174 temps[dst].state = TCG_TEMP_CONST;
175 temps[dst].val = val;
176 gen_args[0] = dst;
177 gen_args[1] = val;
178 }
179
180 static TCGOpcode op_to_mov(TCGOpcode op)
181 {
182 switch (op_bits(op)) {
183 case 32:
184 return INDEX_op_mov_i32;
185 case 64:
186 return INDEX_op_mov_i64;
187 default:
188 fprintf(stderr, "op_to_mov: unexpected return value of "
189 "function op_bits.\n");
190 tcg_abort();
191 }
192 }
193
194 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
195 {
196 switch (op) {
197 CASE_OP_32_64(add):
198 return x + y;
199
200 CASE_OP_32_64(sub):
201 return x - y;
202
203 CASE_OP_32_64(mul):
204 return x * y;
205
206 CASE_OP_32_64(and):
207 return x & y;
208
209 CASE_OP_32_64(or):
210 return x | y;
211
212 CASE_OP_32_64(xor):
213 return x ^ y;
214
215 case INDEX_op_shl_i32:
216 return (uint32_t)x << (uint32_t)y;
217
218 case INDEX_op_shl_i64:
219 return (uint64_t)x << (uint64_t)y;
220
221 case INDEX_op_shr_i32:
222 return (uint32_t)x >> (uint32_t)y;
223
224 case INDEX_op_shr_i64:
225 return (uint64_t)x >> (uint64_t)y;
226
227 case INDEX_op_sar_i32:
228 return (int32_t)x >> (int32_t)y;
229
230 case INDEX_op_sar_i64:
231 return (int64_t)x >> (int64_t)y;
232
233 case INDEX_op_rotr_i32:
234 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
235 return x;
236
237 case INDEX_op_rotr_i64:
238 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
239 return x;
240
241 case INDEX_op_rotl_i32:
242 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
243 return x;
244
245 case INDEX_op_rotl_i64:
246 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
247 return x;
248
249 CASE_OP_32_64(not):
250 return ~x;
251
252 CASE_OP_32_64(neg):
253 return -x;
254
255 CASE_OP_32_64(andc):
256 return x & ~y;
257
258 CASE_OP_32_64(orc):
259 return x | ~y;
260
261 CASE_OP_32_64(eqv):
262 return ~(x ^ y);
263
264 CASE_OP_32_64(nand):
265 return ~(x & y);
266
267 CASE_OP_32_64(nor):
268 return ~(x | y);
269
270 CASE_OP_32_64(ext8s):
271 return (int8_t)x;
272
273 CASE_OP_32_64(ext16s):
274 return (int16_t)x;
275
276 CASE_OP_32_64(ext8u):
277 return (uint8_t)x;
278
279 CASE_OP_32_64(ext16u):
280 return (uint16_t)x;
281
282 case INDEX_op_ext32s_i64:
283 return (int32_t)x;
284
285 case INDEX_op_ext32u_i64:
286 return (uint32_t)x;
287
288 default:
289 fprintf(stderr,
290 "Unrecognized operation %d in do_constant_folding.\n", op);
291 tcg_abort();
292 }
293 }
294
295 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
296 {
297 TCGArg res = do_constant_folding_2(op, x, y);
298 if (op_bits(op) == 32) {
299 res &= 0xffffffff;
300 }
301 return res;
302 }
303
304 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
305 {
306 switch (c) {
307 case TCG_COND_EQ:
308 return x == y;
309 case TCG_COND_NE:
310 return x != y;
311 case TCG_COND_LT:
312 return (int32_t)x < (int32_t)y;
313 case TCG_COND_GE:
314 return (int32_t)x >= (int32_t)y;
315 case TCG_COND_LE:
316 return (int32_t)x <= (int32_t)y;
317 case TCG_COND_GT:
318 return (int32_t)x > (int32_t)y;
319 case TCG_COND_LTU:
320 return x < y;
321 case TCG_COND_GEU:
322 return x >= y;
323 case TCG_COND_LEU:
324 return x <= y;
325 case TCG_COND_GTU:
326 return x > y;
327 default:
328 tcg_abort();
329 }
330 }
331
332 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
333 {
334 switch (c) {
335 case TCG_COND_EQ:
336 return x == y;
337 case TCG_COND_NE:
338 return x != y;
339 case TCG_COND_LT:
340 return (int64_t)x < (int64_t)y;
341 case TCG_COND_GE:
342 return (int64_t)x >= (int64_t)y;
343 case TCG_COND_LE:
344 return (int64_t)x <= (int64_t)y;
345 case TCG_COND_GT:
346 return (int64_t)x > (int64_t)y;
347 case TCG_COND_LTU:
348 return x < y;
349 case TCG_COND_GEU:
350 return x >= y;
351 case TCG_COND_LEU:
352 return x <= y;
353 case TCG_COND_GTU:
354 return x > y;
355 default:
356 tcg_abort();
357 }
358 }
359
360 static bool do_constant_folding_cond_eq(TCGCond c)
361 {
362 switch (c) {
363 case TCG_COND_GT:
364 case TCG_COND_LTU:
365 case TCG_COND_LT:
366 case TCG_COND_GTU:
367 case TCG_COND_NE:
368 return 0;
369 case TCG_COND_GE:
370 case TCG_COND_GEU:
371 case TCG_COND_LE:
372 case TCG_COND_LEU:
373 case TCG_COND_EQ:
374 return 1;
375 default:
376 tcg_abort();
377 }
378 }
379
380 /* Return 2 if the condition can't be simplified, and the result
381 of the condition (0 or 1) if it can */
382 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
383 TCGArg y, TCGCond c)
384 {
385 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
386 switch (op_bits(op)) {
387 case 32:
388 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
389 case 64:
390 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
391 default:
392 tcg_abort();
393 }
394 } else if (temps_are_copies(x, y)) {
395 return do_constant_folding_cond_eq(c);
396 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
397 switch (c) {
398 case TCG_COND_LTU:
399 return 0;
400 case TCG_COND_GEU:
401 return 1;
402 default:
403 return 2;
404 }
405 } else {
406 return 2;
407 }
408 }
409
410 /* Return 2 if the condition can't be simplified, and the result
411 of the condition (0 or 1) if it can */
412 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
413 {
414 TCGArg al = p1[0], ah = p1[1];
415 TCGArg bl = p2[0], bh = p2[1];
416
417 if (temps[bl].state == TCG_TEMP_CONST
418 && temps[bh].state == TCG_TEMP_CONST) {
419 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
420
421 if (temps[al].state == TCG_TEMP_CONST
422 && temps[ah].state == TCG_TEMP_CONST) {
423 uint64_t a;
424 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
425 return do_constant_folding_cond_64(a, b, c);
426 }
427 if (b == 0) {
428 switch (c) {
429 case TCG_COND_LTU:
430 return 0;
431 case TCG_COND_GEU:
432 return 1;
433 default:
434 break;
435 }
436 }
437 }
438 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
439 return do_constant_folding_cond_eq(c);
440 }
441 return 2;
442 }
443
444 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
445 {
446 TCGArg a1 = *p1, a2 = *p2;
447 int sum = 0;
448 sum += temps[a1].state == TCG_TEMP_CONST;
449 sum -= temps[a2].state == TCG_TEMP_CONST;
450
451 /* Prefer the constant in second argument, and then the form
452 op a, a, b, which is better handled on non-RISC hosts. */
453 if (sum > 0 || (sum == 0 && dest == a2)) {
454 *p1 = a2;
455 *p2 = a1;
456 return true;
457 }
458 return false;
459 }
460
461 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
462 {
463 int sum = 0;
464 sum += temps[p1[0]].state == TCG_TEMP_CONST;
465 sum += temps[p1[1]].state == TCG_TEMP_CONST;
466 sum -= temps[p2[0]].state == TCG_TEMP_CONST;
467 sum -= temps[p2[1]].state == TCG_TEMP_CONST;
468 if (sum > 0) {
469 TCGArg t;
470 t = p1[0], p1[0] = p2[0], p2[0] = t;
471 t = p1[1], p1[1] = p2[1], p2[1] = t;
472 return true;
473 }
474 return false;
475 }
476
477 /* Propagate constants and copies, fold constant expressions. */
478 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
479 TCGArg *args, TCGOpDef *tcg_op_defs)
480 {
481 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
482 TCGOpcode op;
483 const TCGOpDef *def;
484 TCGArg *gen_args;
485 TCGArg tmp;
486
487 /* Array VALS has an element for each temp.
488 If this temp holds a constant then its value is kept in VALS' element.
489 If this temp is a copy of other ones then the other copies are
490 available through the doubly linked circular list. */
491
492 nb_temps = s->nb_temps;
493 nb_globals = s->nb_globals;
494 reset_all_temps(nb_temps);
495
496 nb_ops = tcg_opc_ptr - s->gen_opc_buf;
497 gen_args = args;
498 for (op_index = 0; op_index < nb_ops; op_index++) {
499 op = s->gen_opc_buf[op_index];
500 def = &tcg_op_defs[op];
501 /* Do copy propagation */
502 if (op == INDEX_op_call) {
503 int nb_oargs = args[0] >> 16;
504 int nb_iargs = args[0] & 0xffff;
505 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
506 if (temps[args[i]].state == TCG_TEMP_COPY) {
507 args[i] = find_better_copy(s, args[i]);
508 }
509 }
510 } else {
511 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
512 if (temps[args[i]].state == TCG_TEMP_COPY) {
513 args[i] = find_better_copy(s, args[i]);
514 }
515 }
516 }
517
518 /* For commutative operations make constant second argument */
519 switch (op) {
520 CASE_OP_32_64(add):
521 CASE_OP_32_64(mul):
522 CASE_OP_32_64(and):
523 CASE_OP_32_64(or):
524 CASE_OP_32_64(xor):
525 CASE_OP_32_64(eqv):
526 CASE_OP_32_64(nand):
527 CASE_OP_32_64(nor):
528 swap_commutative(args[0], &args[1], &args[2]);
529 break;
530 CASE_OP_32_64(brcond):
531 if (swap_commutative(-1, &args[0], &args[1])) {
532 args[2] = tcg_swap_cond(args[2]);
533 }
534 break;
535 CASE_OP_32_64(setcond):
536 if (swap_commutative(args[0], &args[1], &args[2])) {
537 args[3] = tcg_swap_cond(args[3]);
538 }
539 break;
540 CASE_OP_32_64(movcond):
541 if (swap_commutative(-1, &args[1], &args[2])) {
542 args[5] = tcg_swap_cond(args[5]);
543 }
544 /* For movcond, we canonicalize the "false" input reg to match
545 the destination reg so that the tcg backend can implement
546 a "move if true" operation. */
547 if (swap_commutative(args[0], &args[4], &args[3])) {
548 args[5] = tcg_invert_cond(args[5]);
549 }
550 break;
551 case INDEX_op_add2_i32:
552 swap_commutative(args[0], &args[2], &args[4]);
553 swap_commutative(args[1], &args[3], &args[5]);
554 break;
555 case INDEX_op_mulu2_i32:
556 swap_commutative(args[0], &args[2], &args[3]);
557 break;
558 case INDEX_op_brcond2_i32:
559 if (swap_commutative2(&args[0], &args[2])) {
560 args[4] = tcg_swap_cond(args[4]);
561 }
562 break;
563 case INDEX_op_setcond2_i32:
564 if (swap_commutative2(&args[1], &args[3])) {
565 args[5] = tcg_swap_cond(args[5]);
566 }
567 break;
568 default:
569 break;
570 }
571
572 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
573 switch (op) {
574 CASE_OP_32_64(shl):
575 CASE_OP_32_64(shr):
576 CASE_OP_32_64(sar):
577 CASE_OP_32_64(rotl):
578 CASE_OP_32_64(rotr):
579 if (temps[args[1]].state == TCG_TEMP_CONST
580 && temps[args[1]].val == 0) {
581 s->gen_opc_buf[op_index] = op_to_movi(op);
582 tcg_opt_gen_movi(gen_args, args[0], 0);
583 args += 3;
584 gen_args += 2;
585 continue;
586 }
587 break;
588 default:
589 break;
590 }
591
592 /* Simplify expression for "op r, a, 0 => mov r, a" cases */
593 switch (op) {
594 CASE_OP_32_64(add):
595 CASE_OP_32_64(sub):
596 CASE_OP_32_64(shl):
597 CASE_OP_32_64(shr):
598 CASE_OP_32_64(sar):
599 CASE_OP_32_64(rotl):
600 CASE_OP_32_64(rotr):
601 CASE_OP_32_64(or):
602 CASE_OP_32_64(xor):
603 if (temps[args[1]].state == TCG_TEMP_CONST) {
604 /* Proceed with possible constant folding. */
605 break;
606 }
607 if (temps[args[2]].state == TCG_TEMP_CONST
608 && temps[args[2]].val == 0) {
609 if (temps_are_copies(args[0], args[1])) {
610 s->gen_opc_buf[op_index] = INDEX_op_nop;
611 } else {
612 s->gen_opc_buf[op_index] = op_to_mov(op);
613 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
614 gen_args += 2;
615 }
616 args += 3;
617 continue;
618 }
619 break;
620 default:
621 break;
622 }
623
624 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
625 switch (op) {
626 CASE_OP_32_64(and):
627 CASE_OP_32_64(mul):
628 if ((temps[args[2]].state == TCG_TEMP_CONST
629 && temps[args[2]].val == 0)) {
630 s->gen_opc_buf[op_index] = op_to_movi(op);
631 tcg_opt_gen_movi(gen_args, args[0], 0);
632 args += 3;
633 gen_args += 2;
634 continue;
635 }
636 break;
637 default:
638 break;
639 }
640
641 /* Simplify expression for "op r, a, a => mov r, a" cases */
642 switch (op) {
643 CASE_OP_32_64(or):
644 CASE_OP_32_64(and):
645 if (temps_are_copies(args[1], args[2])) {
646 if (temps_are_copies(args[0], args[1])) {
647 s->gen_opc_buf[op_index] = INDEX_op_nop;
648 } else {
649 s->gen_opc_buf[op_index] = op_to_mov(op);
650 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
651 gen_args += 2;
652 }
653 args += 3;
654 continue;
655 }
656 break;
657 default:
658 break;
659 }
660
661 /* Simplify expression for "op r, a, a => movi r, 0" cases */
662 switch (op) {
663 CASE_OP_32_64(sub):
664 CASE_OP_32_64(xor):
665 if (temps_are_copies(args[1], args[2])) {
666 s->gen_opc_buf[op_index] = op_to_movi(op);
667 tcg_opt_gen_movi(gen_args, args[0], 0);
668 gen_args += 2;
669 args += 3;
670 continue;
671 }
672 break;
673 default:
674 break;
675 }
676
677 /* Propagate constants through copy operations and do constant
678 folding. Constants will be substituted to arguments by register
679 allocator where needed and possible. Also detect copies. */
680 switch (op) {
681 CASE_OP_32_64(mov):
682 if (temps_are_copies(args[0], args[1])) {
683 args += 2;
684 s->gen_opc_buf[op_index] = INDEX_op_nop;
685 break;
686 }
687 if (temps[args[1]].state != TCG_TEMP_CONST) {
688 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
689 gen_args += 2;
690 args += 2;
691 break;
692 }
693 /* Source argument is constant. Rewrite the operation and
694 let movi case handle it. */
695 op = op_to_movi(op);
696 s->gen_opc_buf[op_index] = op;
697 args[1] = temps[args[1]].val;
698 /* fallthrough */
699 CASE_OP_32_64(movi):
700 tcg_opt_gen_movi(gen_args, args[0], args[1]);
701 gen_args += 2;
702 args += 2;
703 break;
704
705 CASE_OP_32_64(not):
706 CASE_OP_32_64(neg):
707 CASE_OP_32_64(ext8s):
708 CASE_OP_32_64(ext8u):
709 CASE_OP_32_64(ext16s):
710 CASE_OP_32_64(ext16u):
711 case INDEX_op_ext32s_i64:
712 case INDEX_op_ext32u_i64:
713 if (temps[args[1]].state == TCG_TEMP_CONST) {
714 s->gen_opc_buf[op_index] = op_to_movi(op);
715 tmp = do_constant_folding(op, temps[args[1]].val, 0);
716 tcg_opt_gen_movi(gen_args, args[0], tmp);
717 gen_args += 2;
718 args += 2;
719 break;
720 }
721 goto do_default;
722
723 CASE_OP_32_64(add):
724 CASE_OP_32_64(sub):
725 CASE_OP_32_64(mul):
726 CASE_OP_32_64(or):
727 CASE_OP_32_64(and):
728 CASE_OP_32_64(xor):
729 CASE_OP_32_64(shl):
730 CASE_OP_32_64(shr):
731 CASE_OP_32_64(sar):
732 CASE_OP_32_64(rotl):
733 CASE_OP_32_64(rotr):
734 CASE_OP_32_64(andc):
735 CASE_OP_32_64(orc):
736 CASE_OP_32_64(eqv):
737 CASE_OP_32_64(nand):
738 CASE_OP_32_64(nor):
739 if (temps[args[1]].state == TCG_TEMP_CONST
740 && temps[args[2]].state == TCG_TEMP_CONST) {
741 s->gen_opc_buf[op_index] = op_to_movi(op);
742 tmp = do_constant_folding(op, temps[args[1]].val,
743 temps[args[2]].val);
744 tcg_opt_gen_movi(gen_args, args[0], tmp);
745 gen_args += 2;
746 args += 3;
747 break;
748 }
749 goto do_default;
750
751 CASE_OP_32_64(deposit):
752 if (temps[args[1]].state == TCG_TEMP_CONST
753 && temps[args[2]].state == TCG_TEMP_CONST) {
754 s->gen_opc_buf[op_index] = op_to_movi(op);
755 tmp = ((1ull << args[4]) - 1);
756 tmp = (temps[args[1]].val & ~(tmp << args[3]))
757 | ((temps[args[2]].val & tmp) << args[3]);
758 tcg_opt_gen_movi(gen_args, args[0], tmp);
759 gen_args += 2;
760 args += 5;
761 break;
762 }
763 goto do_default;
764
765 CASE_OP_32_64(setcond):
766 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
767 if (tmp != 2) {
768 s->gen_opc_buf[op_index] = op_to_movi(op);
769 tcg_opt_gen_movi(gen_args, args[0], tmp);
770 gen_args += 2;
771 args += 4;
772 break;
773 }
774 goto do_default;
775
776 CASE_OP_32_64(brcond):
777 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
778 if (tmp != 2) {
779 if (tmp) {
780 reset_all_temps(nb_temps);
781 s->gen_opc_buf[op_index] = INDEX_op_br;
782 gen_args[0] = args[3];
783 gen_args += 1;
784 } else {
785 s->gen_opc_buf[op_index] = INDEX_op_nop;
786 }
787 args += 4;
788 break;
789 }
790 goto do_default;
791
792 CASE_OP_32_64(movcond):
793 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
794 if (tmp != 2) {
795 if (temps_are_copies(args[0], args[4-tmp])) {
796 s->gen_opc_buf[op_index] = INDEX_op_nop;
797 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
798 s->gen_opc_buf[op_index] = op_to_movi(op);
799 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
800 gen_args += 2;
801 } else {
802 s->gen_opc_buf[op_index] = op_to_mov(op);
803 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
804 gen_args += 2;
805 }
806 args += 6;
807 break;
808 }
809 goto do_default;
810
811 case INDEX_op_add2_i32:
812 case INDEX_op_sub2_i32:
813 if (temps[args[2]].state == TCG_TEMP_CONST
814 && temps[args[3]].state == TCG_TEMP_CONST
815 && temps[args[4]].state == TCG_TEMP_CONST
816 && temps[args[5]].state == TCG_TEMP_CONST) {
817 uint32_t al = temps[args[2]].val;
818 uint32_t ah = temps[args[3]].val;
819 uint32_t bl = temps[args[4]].val;
820 uint32_t bh = temps[args[5]].val;
821 uint64_t a = ((uint64_t)ah << 32) | al;
822 uint64_t b = ((uint64_t)bh << 32) | bl;
823 TCGArg rl, rh;
824
825 if (op == INDEX_op_add2_i32) {
826 a += b;
827 } else {
828 a -= b;
829 }
830
831 /* We emit the extra nop when we emit the add2/sub2. */
832 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
833
834 rl = args[0];
835 rh = args[1];
836 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
837 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
838 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
839 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
840 gen_args += 4;
841 args += 6;
842 break;
843 }
844 goto do_default;
845
846 case INDEX_op_mulu2_i32:
847 if (temps[args[2]].state == TCG_TEMP_CONST
848 && temps[args[3]].state == TCG_TEMP_CONST) {
849 uint32_t a = temps[args[2]].val;
850 uint32_t b = temps[args[3]].val;
851 uint64_t r = (uint64_t)a * b;
852 TCGArg rl, rh;
853
854 /* We emit the extra nop when we emit the mulu2. */
855 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
856
857 rl = args[0];
858 rh = args[1];
859 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
860 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
861 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
862 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
863 gen_args += 4;
864 args += 4;
865 break;
866 }
867 goto do_default;
868
869 case INDEX_op_brcond2_i32:
870 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
871 if (tmp != 2) {
872 if (tmp) {
873 reset_all_temps(nb_temps);
874 s->gen_opc_buf[op_index] = INDEX_op_br;
875 gen_args[0] = args[5];
876 gen_args += 1;
877 } else {
878 s->gen_opc_buf[op_index] = INDEX_op_nop;
879 }
880 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
881 && temps[args[2]].state == TCG_TEMP_CONST
882 && temps[args[3]].state == TCG_TEMP_CONST
883 && temps[args[2]].val == 0
884 && temps[args[3]].val == 0) {
885 /* Simplify LT/GE comparisons vs zero to a single compare
886 vs the high word of the input. */
887 reset_all_temps(nb_temps);
888 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
889 gen_args[0] = args[1];
890 gen_args[1] = args[3];
891 gen_args[2] = args[4];
892 gen_args[3] = args[5];
893 gen_args += 4;
894 } else {
895 goto do_default;
896 }
897 args += 6;
898 break;
899
900 case INDEX_op_setcond2_i32:
901 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
902 if (tmp != 2) {
903 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
904 tcg_opt_gen_movi(gen_args, args[0], tmp);
905 gen_args += 2;
906 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
907 && temps[args[3]].state == TCG_TEMP_CONST
908 && temps[args[4]].state == TCG_TEMP_CONST
909 && temps[args[3]].val == 0
910 && temps[args[4]].val == 0) {
911 /* Simplify LT/GE comparisons vs zero to a single compare
912 vs the high word of the input. */
913 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
914 gen_args[0] = args[0];
915 gen_args[1] = args[2];
916 gen_args[2] = args[4];
917 gen_args[3] = args[5];
918 gen_args += 4;
919 } else {
920 goto do_default;
921 }
922 args += 6;
923 break;
924
925 case INDEX_op_call:
926 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
927 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
928 TCG_CALL_NO_WRITE_GLOBALS))) {
929 for (i = 0; i < nb_globals; i++) {
930 reset_temp(i);
931 }
932 }
933 for (i = 0; i < (args[0] >> 16); i++) {
934 reset_temp(args[i + 1]);
935 }
936 i = nb_call_args + 3;
937 while (i) {
938 *gen_args = *args;
939 args++;
940 gen_args++;
941 i--;
942 }
943 break;
944
945 default:
946 do_default:
947 /* Default case: we know nothing about operation (or were unable
948 to compute the operation result) so no propagation is done.
949 We trash everything if the operation is the end of a basic
950 block, otherwise we only trash the output args. */
951 if (def->flags & TCG_OPF_BB_END) {
952 reset_all_temps(nb_temps);
953 } else {
954 for (i = 0; i < def->nb_oargs; i++) {
955 reset_temp(args[i]);
956 }
957 }
958 for (i = 0; i < def->nb_args; i++) {
959 gen_args[i] = args[i];
960 }
961 args += def->nb_args;
962 gen_args += def->nb_args;
963 break;
964 }
965 }
966
967 return gen_args;
968 }
969
970 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
971 TCGArg *args, TCGOpDef *tcg_op_defs)
972 {
973 TCGArg *res;
974 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
975 return res;
976 }