]> git.proxmox.com Git - qemu.git/blob - tcg/optimize.c
revert "TCG: fix copy propagation"
[qemu.git] / tcg / optimize.c
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "config.h"
27
28 #include <stdlib.h>
29 #include <stdio.h>
30
31 #include "qemu-common.h"
32 #include "tcg-op.h"
33
34 #define CASE_OP_32_64(x) \
35 glue(glue(case INDEX_op_, x), _i32): \
36 glue(glue(case INDEX_op_, x), _i64)
37
38 typedef enum {
39 TCG_TEMP_UNDEF = 0,
40 TCG_TEMP_CONST,
41 TCG_TEMP_COPY,
42 TCG_TEMP_HAS_COPY,
43 TCG_TEMP_ANY
44 } tcg_temp_state;
45
46 struct tcg_temp_info {
47 tcg_temp_state state;
48 uint16_t prev_copy;
49 uint16_t next_copy;
50 tcg_target_ulong val;
51 };
52
53 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
54
55 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
56 class of equivalent temp's, a new representative should be chosen in this
57 class. */
58 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
59 {
60 int i;
61 TCGArg new_base = (TCGArg)-1;
62 if (temps[temp].state == TCG_TEMP_HAS_COPY) {
63 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
64 if (i >= nb_globals) {
65 temps[i].state = TCG_TEMP_HAS_COPY;
66 new_base = i;
67 break;
68 }
69 }
70 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
71 if (new_base == (TCGArg)-1) {
72 temps[i].state = TCG_TEMP_ANY;
73 } else {
74 temps[i].val = new_base;
75 }
76 }
77 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
78 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
79 } else if (temps[temp].state == TCG_TEMP_COPY) {
80 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
81 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
82 new_base = temps[temp].val;
83 }
84 temps[temp].state = TCG_TEMP_ANY;
85 if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
86 temps[new_base].state = TCG_TEMP_ANY;
87 }
88 }
89
90 static int op_bits(TCGOpcode op)
91 {
92 const TCGOpDef *def = &tcg_op_defs[op];
93 return def->flags & TCG_OPF_64BIT ? 64 : 32;
94 }
95
96 static TCGOpcode op_to_movi(TCGOpcode op)
97 {
98 switch (op_bits(op)) {
99 case 32:
100 return INDEX_op_movi_i32;
101 case 64:
102 return INDEX_op_movi_i64;
103 default:
104 fprintf(stderr, "op_to_movi: unexpected return value of "
105 "function op_bits.\n");
106 tcg_abort();
107 }
108 }
109
110 static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
111 int nb_temps, int nb_globals)
112 {
113 reset_temp(dst, nb_temps, nb_globals);
114 assert(temps[src].state != TCG_TEMP_COPY);
115 if (src >= nb_globals) {
116 assert(temps[src].state != TCG_TEMP_CONST);
117 if (temps[src].state != TCG_TEMP_HAS_COPY) {
118 temps[src].state = TCG_TEMP_HAS_COPY;
119 temps[src].next_copy = src;
120 temps[src].prev_copy = src;
121 }
122 temps[dst].state = TCG_TEMP_COPY;
123 temps[dst].val = src;
124 temps[dst].next_copy = temps[src].next_copy;
125 temps[dst].prev_copy = src;
126 temps[temps[dst].next_copy].prev_copy = dst;
127 temps[src].next_copy = dst;
128 }
129 gen_args[0] = dst;
130 gen_args[1] = src;
131 }
132
133 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
134 int nb_temps, int nb_globals)
135 {
136 reset_temp(dst, nb_temps, nb_globals);
137 temps[dst].state = TCG_TEMP_CONST;
138 temps[dst].val = val;
139 gen_args[0] = dst;
140 gen_args[1] = val;
141 }
142
143 static TCGOpcode op_to_mov(TCGOpcode op)
144 {
145 switch (op_bits(op)) {
146 case 32:
147 return INDEX_op_mov_i32;
148 case 64:
149 return INDEX_op_mov_i64;
150 default:
151 fprintf(stderr, "op_to_mov: unexpected return value of "
152 "function op_bits.\n");
153 tcg_abort();
154 }
155 }
156
157 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
158 {
159 switch (op) {
160 CASE_OP_32_64(add):
161 return x + y;
162
163 CASE_OP_32_64(sub):
164 return x - y;
165
166 CASE_OP_32_64(mul):
167 return x * y;
168
169 CASE_OP_32_64(and):
170 return x & y;
171
172 CASE_OP_32_64(or):
173 return x | y;
174
175 CASE_OP_32_64(xor):
176 return x ^ y;
177
178 case INDEX_op_shl_i32:
179 return (uint32_t)x << (uint32_t)y;
180
181 case INDEX_op_shl_i64:
182 return (uint64_t)x << (uint64_t)y;
183
184 case INDEX_op_shr_i32:
185 return (uint32_t)x >> (uint32_t)y;
186
187 case INDEX_op_shr_i64:
188 return (uint64_t)x >> (uint64_t)y;
189
190 case INDEX_op_sar_i32:
191 return (int32_t)x >> (int32_t)y;
192
193 case INDEX_op_sar_i64:
194 return (int64_t)x >> (int64_t)y;
195
196 case INDEX_op_rotr_i32:
197 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
198 return x;
199
200 case INDEX_op_rotr_i64:
201 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
202 return x;
203
204 case INDEX_op_rotl_i32:
205 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
206 return x;
207
208 case INDEX_op_rotl_i64:
209 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
210 return x;
211
212 CASE_OP_32_64(not):
213 return ~x;
214
215 CASE_OP_32_64(neg):
216 return -x;
217
218 CASE_OP_32_64(andc):
219 return x & ~y;
220
221 CASE_OP_32_64(orc):
222 return x | ~y;
223
224 CASE_OP_32_64(eqv):
225 return ~(x ^ y);
226
227 CASE_OP_32_64(nand):
228 return ~(x & y);
229
230 CASE_OP_32_64(nor):
231 return ~(x | y);
232
233 CASE_OP_32_64(ext8s):
234 return (int8_t)x;
235
236 CASE_OP_32_64(ext16s):
237 return (int16_t)x;
238
239 CASE_OP_32_64(ext8u):
240 return (uint8_t)x;
241
242 CASE_OP_32_64(ext16u):
243 return (uint16_t)x;
244
245 case INDEX_op_ext32s_i64:
246 return (int32_t)x;
247
248 case INDEX_op_ext32u_i64:
249 return (uint32_t)x;
250
251 default:
252 fprintf(stderr,
253 "Unrecognized operation %d in do_constant_folding.\n", op);
254 tcg_abort();
255 }
256 }
257
258 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
259 {
260 TCGArg res = do_constant_folding_2(op, x, y);
261 if (op_bits(op) == 32) {
262 res &= 0xffffffff;
263 }
264 return res;
265 }
266
267 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
268 TCGArg y, TCGCond c)
269 {
270 switch (op_bits(op)) {
271 case 32:
272 switch (c) {
273 case TCG_COND_EQ:
274 return (uint32_t)x == (uint32_t)y;
275 case TCG_COND_NE:
276 return (uint32_t)x != (uint32_t)y;
277 case TCG_COND_LT:
278 return (int32_t)x < (int32_t)y;
279 case TCG_COND_GE:
280 return (int32_t)x >= (int32_t)y;
281 case TCG_COND_LE:
282 return (int32_t)x <= (int32_t)y;
283 case TCG_COND_GT:
284 return (int32_t)x > (int32_t)y;
285 case TCG_COND_LTU:
286 return (uint32_t)x < (uint32_t)y;
287 case TCG_COND_GEU:
288 return (uint32_t)x >= (uint32_t)y;
289 case TCG_COND_LEU:
290 return (uint32_t)x <= (uint32_t)y;
291 case TCG_COND_GTU:
292 return (uint32_t)x > (uint32_t)y;
293 }
294 break;
295 case 64:
296 switch (c) {
297 case TCG_COND_EQ:
298 return (uint64_t)x == (uint64_t)y;
299 case TCG_COND_NE:
300 return (uint64_t)x != (uint64_t)y;
301 case TCG_COND_LT:
302 return (int64_t)x < (int64_t)y;
303 case TCG_COND_GE:
304 return (int64_t)x >= (int64_t)y;
305 case TCG_COND_LE:
306 return (int64_t)x <= (int64_t)y;
307 case TCG_COND_GT:
308 return (int64_t)x > (int64_t)y;
309 case TCG_COND_LTU:
310 return (uint64_t)x < (uint64_t)y;
311 case TCG_COND_GEU:
312 return (uint64_t)x >= (uint64_t)y;
313 case TCG_COND_LEU:
314 return (uint64_t)x <= (uint64_t)y;
315 case TCG_COND_GTU:
316 return (uint64_t)x > (uint64_t)y;
317 }
318 break;
319 }
320
321 fprintf(stderr,
322 "Unrecognized bitness %d or condition %d in "
323 "do_constant_folding_cond.\n", op_bits(op), c);
324 tcg_abort();
325 }
326
327
328 /* Propagate constants and copies, fold constant expressions. */
329 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
330 TCGArg *args, TCGOpDef *tcg_op_defs)
331 {
332 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
333 TCGOpcode op;
334 const TCGOpDef *def;
335 TCGArg *gen_args;
336 TCGArg tmp;
337 /* Array VALS has an element for each temp.
338 If this temp holds a constant then its value is kept in VALS' element.
339 If this temp is a copy of other ones then this equivalence class'
340 representative is kept in VALS' element.
341 If this temp is neither copy nor constant then corresponding VALS'
342 element is unused. */
343
344 nb_temps = s->nb_temps;
345 nb_globals = s->nb_globals;
346 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
347
348 nb_ops = tcg_opc_ptr - gen_opc_buf;
349 gen_args = args;
350 for (op_index = 0; op_index < nb_ops; op_index++) {
351 op = gen_opc_buf[op_index];
352 def = &tcg_op_defs[op];
353 /* Do copy propagation */
354 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
355 assert(op != INDEX_op_call);
356 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
357 if (temps[args[i]].state == TCG_TEMP_COPY) {
358 args[i] = temps[args[i]].val;
359 }
360 }
361 }
362
363 /* For commutative operations make constant second argument */
364 switch (op) {
365 CASE_OP_32_64(add):
366 CASE_OP_32_64(mul):
367 CASE_OP_32_64(and):
368 CASE_OP_32_64(or):
369 CASE_OP_32_64(xor):
370 CASE_OP_32_64(eqv):
371 CASE_OP_32_64(nand):
372 CASE_OP_32_64(nor):
373 if (temps[args[1]].state == TCG_TEMP_CONST) {
374 tmp = args[1];
375 args[1] = args[2];
376 args[2] = tmp;
377 }
378 break;
379 CASE_OP_32_64(brcond):
380 if (temps[args[0]].state == TCG_TEMP_CONST
381 && temps[args[1]].state != TCG_TEMP_CONST) {
382 tmp = args[0];
383 args[0] = args[1];
384 args[1] = tmp;
385 args[2] = tcg_swap_cond(args[2]);
386 }
387 break;
388 CASE_OP_32_64(setcond):
389 if (temps[args[1]].state == TCG_TEMP_CONST
390 && temps[args[2]].state != TCG_TEMP_CONST) {
391 tmp = args[1];
392 args[1] = args[2];
393 args[2] = tmp;
394 args[3] = tcg_swap_cond(args[3]);
395 }
396 break;
397 default:
398 break;
399 }
400
401 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
402 switch (op) {
403 CASE_OP_32_64(shl):
404 CASE_OP_32_64(shr):
405 CASE_OP_32_64(sar):
406 CASE_OP_32_64(rotl):
407 CASE_OP_32_64(rotr):
408 if (temps[args[1]].state == TCG_TEMP_CONST
409 && temps[args[1]].val == 0) {
410 gen_opc_buf[op_index] = op_to_movi(op);
411 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
412 args += 3;
413 gen_args += 2;
414 continue;
415 }
416 break;
417 default:
418 break;
419 }
420
421 /* Simplify expression for "op r, a, 0 => mov r, a" cases */
422 switch (op) {
423 CASE_OP_32_64(add):
424 CASE_OP_32_64(sub):
425 CASE_OP_32_64(shl):
426 CASE_OP_32_64(shr):
427 CASE_OP_32_64(sar):
428 CASE_OP_32_64(rotl):
429 CASE_OP_32_64(rotr):
430 CASE_OP_32_64(or):
431 CASE_OP_32_64(xor):
432 if (temps[args[1]].state == TCG_TEMP_CONST) {
433 /* Proceed with possible constant folding. */
434 break;
435 }
436 if (temps[args[2]].state == TCG_TEMP_CONST
437 && temps[args[2]].val == 0) {
438 if ((temps[args[0]].state == TCG_TEMP_COPY
439 && temps[args[0]].val == args[1])
440 || args[0] == args[1]) {
441 gen_opc_buf[op_index] = INDEX_op_nop;
442 } else {
443 gen_opc_buf[op_index] = op_to_mov(op);
444 tcg_opt_gen_mov(gen_args, args[0], args[1],
445 nb_temps, nb_globals);
446 gen_args += 2;
447 }
448 args += 3;
449 continue;
450 }
451 break;
452 default:
453 break;
454 }
455
456 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
457 switch (op) {
458 CASE_OP_32_64(and):
459 CASE_OP_32_64(mul):
460 if ((temps[args[2]].state == TCG_TEMP_CONST
461 && temps[args[2]].val == 0)) {
462 gen_opc_buf[op_index] = op_to_movi(op);
463 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
464 args += 3;
465 gen_args += 2;
466 continue;
467 }
468 break;
469 default:
470 break;
471 }
472
473 /* Simplify expression for "op r, a, a => mov r, a" cases */
474 switch (op) {
475 CASE_OP_32_64(or):
476 CASE_OP_32_64(and):
477 if (args[1] == args[2]) {
478 if (args[1] == args[0]) {
479 gen_opc_buf[op_index] = INDEX_op_nop;
480 } else {
481 gen_opc_buf[op_index] = op_to_mov(op);
482 tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
483 nb_globals);
484 gen_args += 2;
485 }
486 args += 3;
487 continue;
488 }
489 break;
490 default:
491 break;
492 }
493
494 /* Propagate constants through copy operations and do constant
495 folding. Constants will be substituted to arguments by register
496 allocator where needed and possible. Also detect copies. */
497 switch (op) {
498 CASE_OP_32_64(mov):
499 if ((temps[args[1]].state == TCG_TEMP_COPY
500 && temps[args[1]].val == args[0])
501 || args[0] == args[1]) {
502 args += 2;
503 gen_opc_buf[op_index] = INDEX_op_nop;
504 break;
505 }
506 if (temps[args[1]].state != TCG_TEMP_CONST) {
507 tcg_opt_gen_mov(gen_args, args[0], args[1],
508 nb_temps, nb_globals);
509 gen_args += 2;
510 args += 2;
511 break;
512 }
513 /* Source argument is constant. Rewrite the operation and
514 let movi case handle it. */
515 op = op_to_movi(op);
516 gen_opc_buf[op_index] = op;
517 args[1] = temps[args[1]].val;
518 /* fallthrough */
519 CASE_OP_32_64(movi):
520 tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
521 gen_args += 2;
522 args += 2;
523 break;
524 CASE_OP_32_64(not):
525 CASE_OP_32_64(neg):
526 CASE_OP_32_64(ext8s):
527 CASE_OP_32_64(ext8u):
528 CASE_OP_32_64(ext16s):
529 CASE_OP_32_64(ext16u):
530 case INDEX_op_ext32s_i64:
531 case INDEX_op_ext32u_i64:
532 if (temps[args[1]].state == TCG_TEMP_CONST) {
533 gen_opc_buf[op_index] = op_to_movi(op);
534 tmp = do_constant_folding(op, temps[args[1]].val, 0);
535 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
536 } else {
537 reset_temp(args[0], nb_temps, nb_globals);
538 gen_args[0] = args[0];
539 gen_args[1] = args[1];
540 }
541 gen_args += 2;
542 args += 2;
543 break;
544 CASE_OP_32_64(add):
545 CASE_OP_32_64(sub):
546 CASE_OP_32_64(mul):
547 CASE_OP_32_64(or):
548 CASE_OP_32_64(and):
549 CASE_OP_32_64(xor):
550 CASE_OP_32_64(shl):
551 CASE_OP_32_64(shr):
552 CASE_OP_32_64(sar):
553 CASE_OP_32_64(rotl):
554 CASE_OP_32_64(rotr):
555 CASE_OP_32_64(andc):
556 CASE_OP_32_64(orc):
557 CASE_OP_32_64(eqv):
558 CASE_OP_32_64(nand):
559 CASE_OP_32_64(nor):
560 if (temps[args[1]].state == TCG_TEMP_CONST
561 && temps[args[2]].state == TCG_TEMP_CONST) {
562 gen_opc_buf[op_index] = op_to_movi(op);
563 tmp = do_constant_folding(op, temps[args[1]].val,
564 temps[args[2]].val);
565 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
566 gen_args += 2;
567 } else {
568 reset_temp(args[0], nb_temps, nb_globals);
569 gen_args[0] = args[0];
570 gen_args[1] = args[1];
571 gen_args[2] = args[2];
572 gen_args += 3;
573 }
574 args += 3;
575 break;
576 CASE_OP_32_64(setcond):
577 if (temps[args[1]].state == TCG_TEMP_CONST
578 && temps[args[2]].state == TCG_TEMP_CONST) {
579 gen_opc_buf[op_index] = op_to_movi(op);
580 tmp = do_constant_folding_cond(op, temps[args[1]].val,
581 temps[args[2]].val, args[3]);
582 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
583 gen_args += 2;
584 } else {
585 reset_temp(args[0], nb_temps, nb_globals);
586 gen_args[0] = args[0];
587 gen_args[1] = args[1];
588 gen_args[2] = args[2];
589 gen_args[3] = args[3];
590 gen_args += 4;
591 }
592 args += 4;
593 break;
594 CASE_OP_32_64(brcond):
595 if (temps[args[0]].state == TCG_TEMP_CONST
596 && temps[args[1]].state == TCG_TEMP_CONST) {
597 if (do_constant_folding_cond(op, temps[args[0]].val,
598 temps[args[1]].val, args[2])) {
599 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
600 gen_opc_buf[op_index] = INDEX_op_br;
601 gen_args[0] = args[3];
602 gen_args += 1;
603 } else {
604 gen_opc_buf[op_index] = INDEX_op_nop;
605 }
606 } else {
607 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
608 reset_temp(args[0], nb_temps, nb_globals);
609 gen_args[0] = args[0];
610 gen_args[1] = args[1];
611 gen_args[2] = args[2];
612 gen_args[3] = args[3];
613 gen_args += 4;
614 }
615 args += 4;
616 break;
617 case INDEX_op_call:
618 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
619 if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
620 for (i = 0; i < nb_globals; i++) {
621 reset_temp(i, nb_temps, nb_globals);
622 }
623 }
624 for (i = 0; i < (args[0] >> 16); i++) {
625 reset_temp(args[i + 1], nb_temps, nb_globals);
626 }
627 i = nb_call_args + 3;
628 while (i) {
629 *gen_args = *args;
630 args++;
631 gen_args++;
632 i--;
633 }
634 break;
635 case INDEX_op_set_label:
636 case INDEX_op_jmp:
637 case INDEX_op_br:
638 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
639 for (i = 0; i < def->nb_args; i++) {
640 *gen_args = *args;
641 args++;
642 gen_args++;
643 }
644 break;
645 default:
646 /* Default case: we do know nothing about operation so no
647 propagation is done. We only trash output args. */
648 for (i = 0; i < def->nb_oargs; i++) {
649 reset_temp(args[i], nb_temps, nb_globals);
650 }
651 for (i = 0; i < def->nb_args; i++) {
652 gen_args[i] = args[i];
653 }
654 args += def->nb_args;
655 gen_args += def->nb_args;
656 break;
657 }
658 }
659
660 return gen_args;
661 }
662
663 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
664 TCGArg *args, TCGOpDef *tcg_op_defs)
665 {
666 TCGArg *res;
667 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
668 return res;
669 }