]> git.proxmox.com Git - qemu.git/blob - tcg/optimize.c
tcg/optimize: add constant folding for setcond
[qemu.git] / tcg / optimize.c
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "config.h"
27
28 #include <stdlib.h>
29 #include <stdio.h>
30
31 #include "qemu-common.h"
32 #include "tcg-op.h"
33
34 #define CASE_OP_32_64(x) \
35 glue(glue(case INDEX_op_, x), _i32): \
36 glue(glue(case INDEX_op_, x), _i64)
37
38 typedef enum {
39 TCG_TEMP_UNDEF = 0,
40 TCG_TEMP_CONST,
41 TCG_TEMP_COPY,
42 TCG_TEMP_HAS_COPY,
43 TCG_TEMP_ANY
44 } tcg_temp_state;
45
46 struct tcg_temp_info {
47 tcg_temp_state state;
48 uint16_t prev_copy;
49 uint16_t next_copy;
50 tcg_target_ulong val;
51 };
52
53 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
54
55 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
56 class of equivalent temp's, a new representative should be chosen in this
57 class. */
58 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
59 {
60 int i;
61 TCGArg new_base = (TCGArg)-1;
62 if (temps[temp].state == TCG_TEMP_HAS_COPY) {
63 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
64 if (i >= nb_globals) {
65 temps[i].state = TCG_TEMP_HAS_COPY;
66 new_base = i;
67 break;
68 }
69 }
70 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
71 if (new_base == (TCGArg)-1) {
72 temps[i].state = TCG_TEMP_ANY;
73 } else {
74 temps[i].val = new_base;
75 }
76 }
77 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
78 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
79 } else if (temps[temp].state == TCG_TEMP_COPY) {
80 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
81 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
82 new_base = temps[temp].val;
83 }
84 temps[temp].state = TCG_TEMP_ANY;
85 if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
86 temps[new_base].state = TCG_TEMP_ANY;
87 }
88 }
89
90 static int op_bits(TCGOpcode op)
91 {
92 const TCGOpDef *def = &tcg_op_defs[op];
93 return def->flags & TCG_OPF_64BIT ? 64 : 32;
94 }
95
96 static TCGOpcode op_to_movi(TCGOpcode op)
97 {
98 switch (op_bits(op)) {
99 case 32:
100 return INDEX_op_movi_i32;
101 case 64:
102 return INDEX_op_movi_i64;
103 default:
104 fprintf(stderr, "op_to_movi: unexpected return value of "
105 "function op_bits.\n");
106 tcg_abort();
107 }
108 }
109
110 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
111 TCGArg src, int nb_temps, int nb_globals)
112 {
113 reset_temp(dst, nb_temps, nb_globals);
114 assert(temps[src].state != TCG_TEMP_COPY);
115 /* Don't try to copy if one of temps is a global or either one
116 is local and another is register */
117 if (src >= nb_globals && dst >= nb_globals &&
118 tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
119 assert(temps[src].state != TCG_TEMP_CONST);
120 if (temps[src].state != TCG_TEMP_HAS_COPY) {
121 temps[src].state = TCG_TEMP_HAS_COPY;
122 temps[src].next_copy = src;
123 temps[src].prev_copy = src;
124 }
125 temps[dst].state = TCG_TEMP_COPY;
126 temps[dst].val = src;
127 temps[dst].next_copy = temps[src].next_copy;
128 temps[dst].prev_copy = src;
129 temps[temps[dst].next_copy].prev_copy = dst;
130 temps[src].next_copy = dst;
131 }
132 gen_args[0] = dst;
133 gen_args[1] = src;
134 }
135
136 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
137 int nb_temps, int nb_globals)
138 {
139 reset_temp(dst, nb_temps, nb_globals);
140 temps[dst].state = TCG_TEMP_CONST;
141 temps[dst].val = val;
142 gen_args[0] = dst;
143 gen_args[1] = val;
144 }
145
146 static TCGOpcode op_to_mov(TCGOpcode op)
147 {
148 switch (op_bits(op)) {
149 case 32:
150 return INDEX_op_mov_i32;
151 case 64:
152 return INDEX_op_mov_i64;
153 default:
154 fprintf(stderr, "op_to_mov: unexpected return value of "
155 "function op_bits.\n");
156 tcg_abort();
157 }
158 }
159
160 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
161 {
162 switch (op) {
163 CASE_OP_32_64(add):
164 return x + y;
165
166 CASE_OP_32_64(sub):
167 return x - y;
168
169 CASE_OP_32_64(mul):
170 return x * y;
171
172 CASE_OP_32_64(and):
173 return x & y;
174
175 CASE_OP_32_64(or):
176 return x | y;
177
178 CASE_OP_32_64(xor):
179 return x ^ y;
180
181 case INDEX_op_shl_i32:
182 return (uint32_t)x << (uint32_t)y;
183
184 case INDEX_op_shl_i64:
185 return (uint64_t)x << (uint64_t)y;
186
187 case INDEX_op_shr_i32:
188 return (uint32_t)x >> (uint32_t)y;
189
190 case INDEX_op_shr_i64:
191 return (uint64_t)x >> (uint64_t)y;
192
193 case INDEX_op_sar_i32:
194 return (int32_t)x >> (int32_t)y;
195
196 case INDEX_op_sar_i64:
197 return (int64_t)x >> (int64_t)y;
198
199 case INDEX_op_rotr_i32:
200 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
201 return x;
202
203 case INDEX_op_rotr_i64:
204 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
205 return x;
206
207 case INDEX_op_rotl_i32:
208 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
209 return x;
210
211 case INDEX_op_rotl_i64:
212 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
213 return x;
214
215 CASE_OP_32_64(not):
216 return ~x;
217
218 CASE_OP_32_64(neg):
219 return -x;
220
221 CASE_OP_32_64(andc):
222 return x & ~y;
223
224 CASE_OP_32_64(orc):
225 return x | ~y;
226
227 CASE_OP_32_64(eqv):
228 return ~(x ^ y);
229
230 CASE_OP_32_64(nand):
231 return ~(x & y);
232
233 CASE_OP_32_64(nor):
234 return ~(x | y);
235
236 CASE_OP_32_64(ext8s):
237 return (int8_t)x;
238
239 CASE_OP_32_64(ext16s):
240 return (int16_t)x;
241
242 CASE_OP_32_64(ext8u):
243 return (uint8_t)x;
244
245 CASE_OP_32_64(ext16u):
246 return (uint16_t)x;
247
248 case INDEX_op_ext32s_i64:
249 return (int32_t)x;
250
251 case INDEX_op_ext32u_i64:
252 return (uint32_t)x;
253
254 default:
255 fprintf(stderr,
256 "Unrecognized operation %d in do_constant_folding.\n", op);
257 tcg_abort();
258 }
259 }
260
261 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
262 {
263 TCGArg res = do_constant_folding_2(op, x, y);
264 if (op_bits(op) == 32) {
265 res &= 0xffffffff;
266 }
267 return res;
268 }
269
270 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
271 TCGArg y, TCGCond c)
272 {
273 switch (op_bits(op)) {
274 case 32:
275 switch (c) {
276 case TCG_COND_EQ:
277 return (uint32_t)x == (uint32_t)y;
278 case TCG_COND_NE:
279 return (uint32_t)x != (uint32_t)y;
280 case TCG_COND_LT:
281 return (int32_t)x < (int32_t)y;
282 case TCG_COND_GE:
283 return (int32_t)x >= (int32_t)y;
284 case TCG_COND_LE:
285 return (int32_t)x <= (int32_t)y;
286 case TCG_COND_GT:
287 return (int32_t)x > (int32_t)y;
288 case TCG_COND_LTU:
289 return (uint32_t)x < (uint32_t)y;
290 case TCG_COND_GEU:
291 return (uint32_t)x >= (uint32_t)y;
292 case TCG_COND_LEU:
293 return (uint32_t)x <= (uint32_t)y;
294 case TCG_COND_GTU:
295 return (uint32_t)x > (uint32_t)y;
296 }
297 break;
298 case 64:
299 switch (c) {
300 case TCG_COND_EQ:
301 return (uint64_t)x == (uint64_t)y;
302 case TCG_COND_NE:
303 return (uint64_t)x != (uint64_t)y;
304 case TCG_COND_LT:
305 return (int64_t)x < (int64_t)y;
306 case TCG_COND_GE:
307 return (int64_t)x >= (int64_t)y;
308 case TCG_COND_LE:
309 return (int64_t)x <= (int64_t)y;
310 case TCG_COND_GT:
311 return (int64_t)x > (int64_t)y;
312 case TCG_COND_LTU:
313 return (uint64_t)x < (uint64_t)y;
314 case TCG_COND_GEU:
315 return (uint64_t)x >= (uint64_t)y;
316 case TCG_COND_LEU:
317 return (uint64_t)x <= (uint64_t)y;
318 case TCG_COND_GTU:
319 return (uint64_t)x > (uint64_t)y;
320 }
321 break;
322 }
323
324 fprintf(stderr,
325 "Unrecognized bitness %d or condition %d in "
326 "do_constant_folding_cond.\n", op_bits(op), c);
327 tcg_abort();
328 }
329
330
331 /* Propagate constants and copies, fold constant expressions. */
332 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
333 TCGArg *args, TCGOpDef *tcg_op_defs)
334 {
335 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
336 TCGOpcode op;
337 const TCGOpDef *def;
338 TCGArg *gen_args;
339 TCGArg tmp;
340 /* Array VALS has an element for each temp.
341 If this temp holds a constant then its value is kept in VALS' element.
342 If this temp is a copy of other ones then this equivalence class'
343 representative is kept in VALS' element.
344 If this temp is neither copy nor constant then corresponding VALS'
345 element is unused. */
346
347 nb_temps = s->nb_temps;
348 nb_globals = s->nb_globals;
349 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
350
351 nb_ops = tcg_opc_ptr - gen_opc_buf;
352 gen_args = args;
353 for (op_index = 0; op_index < nb_ops; op_index++) {
354 op = gen_opc_buf[op_index];
355 def = &tcg_op_defs[op];
356 /* Do copy propagation */
357 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
358 assert(op != INDEX_op_call);
359 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
360 if (temps[args[i]].state == TCG_TEMP_COPY) {
361 args[i] = temps[args[i]].val;
362 }
363 }
364 }
365
366 /* For commutative operations make constant second argument */
367 switch (op) {
368 CASE_OP_32_64(add):
369 CASE_OP_32_64(mul):
370 CASE_OP_32_64(and):
371 CASE_OP_32_64(or):
372 CASE_OP_32_64(xor):
373 CASE_OP_32_64(eqv):
374 CASE_OP_32_64(nand):
375 CASE_OP_32_64(nor):
376 if (temps[args[1]].state == TCG_TEMP_CONST) {
377 tmp = args[1];
378 args[1] = args[2];
379 args[2] = tmp;
380 }
381 break;
382 CASE_OP_32_64(brcond):
383 if (temps[args[0]].state == TCG_TEMP_CONST
384 && temps[args[1]].state != TCG_TEMP_CONST) {
385 tmp = args[0];
386 args[0] = args[1];
387 args[1] = tmp;
388 args[2] = tcg_swap_cond(args[2]);
389 }
390 break;
391 CASE_OP_32_64(setcond):
392 if (temps[args[1]].state == TCG_TEMP_CONST
393 && temps[args[2]].state != TCG_TEMP_CONST) {
394 tmp = args[1];
395 args[1] = args[2];
396 args[2] = tmp;
397 args[3] = tcg_swap_cond(args[3]);
398 }
399 break;
400 default:
401 break;
402 }
403
404 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
405 switch (op) {
406 CASE_OP_32_64(shl):
407 CASE_OP_32_64(shr):
408 CASE_OP_32_64(sar):
409 CASE_OP_32_64(rotl):
410 CASE_OP_32_64(rotr):
411 if (temps[args[1]].state == TCG_TEMP_CONST
412 && temps[args[1]].val == 0) {
413 gen_opc_buf[op_index] = op_to_movi(op);
414 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
415 args += 3;
416 gen_args += 2;
417 continue;
418 }
419 break;
420 default:
421 break;
422 }
423
424 /* Simplify expression for "op r, a, 0 => mov r, a" cases */
425 switch (op) {
426 CASE_OP_32_64(add):
427 CASE_OP_32_64(sub):
428 CASE_OP_32_64(shl):
429 CASE_OP_32_64(shr):
430 CASE_OP_32_64(sar):
431 CASE_OP_32_64(rotl):
432 CASE_OP_32_64(rotr):
433 CASE_OP_32_64(or):
434 CASE_OP_32_64(xor):
435 if (temps[args[1]].state == TCG_TEMP_CONST) {
436 /* Proceed with possible constant folding. */
437 break;
438 }
439 if (temps[args[2]].state == TCG_TEMP_CONST
440 && temps[args[2]].val == 0) {
441 if ((temps[args[0]].state == TCG_TEMP_COPY
442 && temps[args[0]].val == args[1])
443 || args[0] == args[1]) {
444 args += 3;
445 gen_opc_buf[op_index] = INDEX_op_nop;
446 } else {
447 gen_opc_buf[op_index] = op_to_mov(op);
448 tcg_opt_gen_mov(s, gen_args, args[0], args[1],
449 nb_temps, nb_globals);
450 gen_args += 2;
451 args += 3;
452 }
453 continue;
454 }
455 break;
456 default:
457 break;
458 }
459
460 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
461 switch (op) {
462 CASE_OP_32_64(and):
463 CASE_OP_32_64(mul):
464 if ((temps[args[2]].state == TCG_TEMP_CONST
465 && temps[args[2]].val == 0)) {
466 gen_opc_buf[op_index] = op_to_movi(op);
467 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
468 args += 3;
469 gen_args += 2;
470 continue;
471 }
472 break;
473 default:
474 break;
475 }
476
477 /* Simplify expression for "op r, a, a => mov r, a" cases */
478 switch (op) {
479 CASE_OP_32_64(or):
480 CASE_OP_32_64(and):
481 if (args[1] == args[2]) {
482 if (args[1] == args[0]) {
483 args += 3;
484 gen_opc_buf[op_index] = INDEX_op_nop;
485 } else {
486 gen_opc_buf[op_index] = op_to_mov(op);
487 tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
488 nb_globals);
489 gen_args += 2;
490 args += 3;
491 }
492 continue;
493 }
494 break;
495 default:
496 break;
497 }
498
499 /* Propagate constants through copy operations and do constant
500 folding. Constants will be substituted to arguments by register
501 allocator where needed and possible. Also detect copies. */
502 switch (op) {
503 CASE_OP_32_64(mov):
504 if ((temps[args[1]].state == TCG_TEMP_COPY
505 && temps[args[1]].val == args[0])
506 || args[0] == args[1]) {
507 args += 2;
508 gen_opc_buf[op_index] = INDEX_op_nop;
509 break;
510 }
511 if (temps[args[1]].state != TCG_TEMP_CONST) {
512 tcg_opt_gen_mov(s, gen_args, args[0], args[1],
513 nb_temps, nb_globals);
514 gen_args += 2;
515 args += 2;
516 break;
517 }
518 /* Source argument is constant. Rewrite the operation and
519 let movi case handle it. */
520 op = op_to_movi(op);
521 gen_opc_buf[op_index] = op;
522 args[1] = temps[args[1]].val;
523 /* fallthrough */
524 CASE_OP_32_64(movi):
525 tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
526 gen_args += 2;
527 args += 2;
528 break;
529 CASE_OP_32_64(not):
530 CASE_OP_32_64(neg):
531 CASE_OP_32_64(ext8s):
532 CASE_OP_32_64(ext8u):
533 CASE_OP_32_64(ext16s):
534 CASE_OP_32_64(ext16u):
535 case INDEX_op_ext32s_i64:
536 case INDEX_op_ext32u_i64:
537 if (temps[args[1]].state == TCG_TEMP_CONST) {
538 gen_opc_buf[op_index] = op_to_movi(op);
539 tmp = do_constant_folding(op, temps[args[1]].val, 0);
540 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
541 gen_args += 2;
542 args += 2;
543 break;
544 } else {
545 reset_temp(args[0], nb_temps, nb_globals);
546 gen_args[0] = args[0];
547 gen_args[1] = args[1];
548 gen_args += 2;
549 args += 2;
550 break;
551 }
552 CASE_OP_32_64(add):
553 CASE_OP_32_64(sub):
554 CASE_OP_32_64(mul):
555 CASE_OP_32_64(or):
556 CASE_OP_32_64(and):
557 CASE_OP_32_64(xor):
558 CASE_OP_32_64(shl):
559 CASE_OP_32_64(shr):
560 CASE_OP_32_64(sar):
561 CASE_OP_32_64(rotl):
562 CASE_OP_32_64(rotr):
563 CASE_OP_32_64(andc):
564 CASE_OP_32_64(orc):
565 CASE_OP_32_64(eqv):
566 CASE_OP_32_64(nand):
567 CASE_OP_32_64(nor):
568 if (temps[args[1]].state == TCG_TEMP_CONST
569 && temps[args[2]].state == TCG_TEMP_CONST) {
570 gen_opc_buf[op_index] = op_to_movi(op);
571 tmp = do_constant_folding(op, temps[args[1]].val,
572 temps[args[2]].val);
573 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
574 gen_args += 2;
575 args += 3;
576 break;
577 } else {
578 reset_temp(args[0], nb_temps, nb_globals);
579 gen_args[0] = args[0];
580 gen_args[1] = args[1];
581 gen_args[2] = args[2];
582 gen_args += 3;
583 args += 3;
584 break;
585 }
586 CASE_OP_32_64(setcond):
587 if (temps[args[1]].state == TCG_TEMP_CONST
588 && temps[args[2]].state == TCG_TEMP_CONST) {
589 gen_opc_buf[op_index] = op_to_movi(op);
590 tmp = do_constant_folding_cond(op, temps[args[1]].val,
591 temps[args[2]].val, args[3]);
592 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
593 gen_args += 2;
594 args += 4;
595 break;
596 } else {
597 reset_temp(args[0], nb_temps, nb_globals);
598 gen_args[0] = args[0];
599 gen_args[1] = args[1];
600 gen_args[2] = args[2];
601 gen_args[3] = args[3];
602 gen_args += 4;
603 args += 4;
604 break;
605 }
606 case INDEX_op_call:
607 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
608 if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
609 for (i = 0; i < nb_globals; i++) {
610 reset_temp(i, nb_temps, nb_globals);
611 }
612 }
613 for (i = 0; i < (args[0] >> 16); i++) {
614 reset_temp(args[i + 1], nb_temps, nb_globals);
615 }
616 i = nb_call_args + 3;
617 while (i) {
618 *gen_args = *args;
619 args++;
620 gen_args++;
621 i--;
622 }
623 break;
624 case INDEX_op_set_label:
625 case INDEX_op_jmp:
626 case INDEX_op_br:
627 CASE_OP_32_64(brcond):
628 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
629 for (i = 0; i < def->nb_args; i++) {
630 *gen_args = *args;
631 args++;
632 gen_args++;
633 }
634 break;
635 default:
636 /* Default case: we do know nothing about operation so no
637 propagation is done. We only trash output args. */
638 for (i = 0; i < def->nb_oargs; i++) {
639 reset_temp(args[i], nb_temps, nb_globals);
640 }
641 for (i = 0; i < def->nb_args; i++) {
642 gen_args[i] = args[i];
643 }
644 args += def->nb_args;
645 gen_args += def->nb_args;
646 break;
647 }
648 }
649
650 return gen_args;
651 }
652
653 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
654 TCGArg *args, TCGOpDef *tcg_op_defs)
655 {
656 TCGArg *res;
657 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
658 return res;
659 }