]> git.proxmox.com Git - qemu.git/blob - tcg/optimize.c
tcg/optimize: swap brcond/setcond arguments when possible
[qemu.git] / tcg / optimize.c
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "config.h"
27
28 #include <stdlib.h>
29 #include <stdio.h>
30
31 #include "qemu-common.h"
32 #include "tcg-op.h"
33
34 #define CASE_OP_32_64(x) \
35 glue(glue(case INDEX_op_, x), _i32): \
36 glue(glue(case INDEX_op_, x), _i64)
37
38 typedef enum {
39 TCG_TEMP_UNDEF = 0,
40 TCG_TEMP_CONST,
41 TCG_TEMP_COPY,
42 TCG_TEMP_HAS_COPY,
43 TCG_TEMP_ANY
44 } tcg_temp_state;
45
46 struct tcg_temp_info {
47 tcg_temp_state state;
48 uint16_t prev_copy;
49 uint16_t next_copy;
50 tcg_target_ulong val;
51 };
52
53 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
54
55 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
56 class of equivalent temp's, a new representative should be chosen in this
57 class. */
58 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
59 {
60 int i;
61 TCGArg new_base = (TCGArg)-1;
62 if (temps[temp].state == TCG_TEMP_HAS_COPY) {
63 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
64 if (i >= nb_globals) {
65 temps[i].state = TCG_TEMP_HAS_COPY;
66 new_base = i;
67 break;
68 }
69 }
70 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
71 if (new_base == (TCGArg)-1) {
72 temps[i].state = TCG_TEMP_ANY;
73 } else {
74 temps[i].val = new_base;
75 }
76 }
77 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
78 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
79 } else if (temps[temp].state == TCG_TEMP_COPY) {
80 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
81 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
82 new_base = temps[temp].val;
83 }
84 temps[temp].state = TCG_TEMP_ANY;
85 if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
86 temps[new_base].state = TCG_TEMP_ANY;
87 }
88 }
89
90 static int op_bits(TCGOpcode op)
91 {
92 const TCGOpDef *def = &tcg_op_defs[op];
93 return def->flags & TCG_OPF_64BIT ? 64 : 32;
94 }
95
96 static TCGOpcode op_to_movi(TCGOpcode op)
97 {
98 switch (op_bits(op)) {
99 case 32:
100 return INDEX_op_movi_i32;
101 case 64:
102 return INDEX_op_movi_i64;
103 default:
104 fprintf(stderr, "op_to_movi: unexpected return value of "
105 "function op_bits.\n");
106 tcg_abort();
107 }
108 }
109
110 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
111 TCGArg src, int nb_temps, int nb_globals)
112 {
113 reset_temp(dst, nb_temps, nb_globals);
114 assert(temps[src].state != TCG_TEMP_COPY);
115 /* Don't try to copy if one of temps is a global or either one
116 is local and another is register */
117 if (src >= nb_globals && dst >= nb_globals &&
118 tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
119 assert(temps[src].state != TCG_TEMP_CONST);
120 if (temps[src].state != TCG_TEMP_HAS_COPY) {
121 temps[src].state = TCG_TEMP_HAS_COPY;
122 temps[src].next_copy = src;
123 temps[src].prev_copy = src;
124 }
125 temps[dst].state = TCG_TEMP_COPY;
126 temps[dst].val = src;
127 temps[dst].next_copy = temps[src].next_copy;
128 temps[dst].prev_copy = src;
129 temps[temps[dst].next_copy].prev_copy = dst;
130 temps[src].next_copy = dst;
131 }
132 gen_args[0] = dst;
133 gen_args[1] = src;
134 }
135
136 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
137 int nb_temps, int nb_globals)
138 {
139 reset_temp(dst, nb_temps, nb_globals);
140 temps[dst].state = TCG_TEMP_CONST;
141 temps[dst].val = val;
142 gen_args[0] = dst;
143 gen_args[1] = val;
144 }
145
146 static TCGOpcode op_to_mov(TCGOpcode op)
147 {
148 switch (op_bits(op)) {
149 case 32:
150 return INDEX_op_mov_i32;
151 case 64:
152 return INDEX_op_mov_i64;
153 default:
154 fprintf(stderr, "op_to_mov: unexpected return value of "
155 "function op_bits.\n");
156 tcg_abort();
157 }
158 }
159
160 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
161 {
162 switch (op) {
163 CASE_OP_32_64(add):
164 return x + y;
165
166 CASE_OP_32_64(sub):
167 return x - y;
168
169 CASE_OP_32_64(mul):
170 return x * y;
171
172 CASE_OP_32_64(and):
173 return x & y;
174
175 CASE_OP_32_64(or):
176 return x | y;
177
178 CASE_OP_32_64(xor):
179 return x ^ y;
180
181 case INDEX_op_shl_i32:
182 return (uint32_t)x << (uint32_t)y;
183
184 case INDEX_op_shl_i64:
185 return (uint64_t)x << (uint64_t)y;
186
187 case INDEX_op_shr_i32:
188 return (uint32_t)x >> (uint32_t)y;
189
190 case INDEX_op_shr_i64:
191 return (uint64_t)x >> (uint64_t)y;
192
193 case INDEX_op_sar_i32:
194 return (int32_t)x >> (int32_t)y;
195
196 case INDEX_op_sar_i64:
197 return (int64_t)x >> (int64_t)y;
198
199 case INDEX_op_rotr_i32:
200 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
201 return x;
202
203 case INDEX_op_rotr_i64:
204 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
205 return x;
206
207 case INDEX_op_rotl_i32:
208 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
209 return x;
210
211 case INDEX_op_rotl_i64:
212 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
213 return x;
214
215 CASE_OP_32_64(not):
216 return ~x;
217
218 CASE_OP_32_64(neg):
219 return -x;
220
221 CASE_OP_32_64(andc):
222 return x & ~y;
223
224 CASE_OP_32_64(orc):
225 return x | ~y;
226
227 CASE_OP_32_64(eqv):
228 return ~(x ^ y);
229
230 CASE_OP_32_64(nand):
231 return ~(x & y);
232
233 CASE_OP_32_64(nor):
234 return ~(x | y);
235
236 CASE_OP_32_64(ext8s):
237 return (int8_t)x;
238
239 CASE_OP_32_64(ext16s):
240 return (int16_t)x;
241
242 CASE_OP_32_64(ext8u):
243 return (uint8_t)x;
244
245 CASE_OP_32_64(ext16u):
246 return (uint16_t)x;
247
248 case INDEX_op_ext32s_i64:
249 return (int32_t)x;
250
251 case INDEX_op_ext32u_i64:
252 return (uint32_t)x;
253
254 default:
255 fprintf(stderr,
256 "Unrecognized operation %d in do_constant_folding.\n", op);
257 tcg_abort();
258 }
259 }
260
261 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
262 {
263 TCGArg res = do_constant_folding_2(op, x, y);
264 if (op_bits(op) == 32) {
265 res &= 0xffffffff;
266 }
267 return res;
268 }
269
270 /* Propagate constants and copies, fold constant expressions. */
271 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
272 TCGArg *args, TCGOpDef *tcg_op_defs)
273 {
274 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
275 TCGOpcode op;
276 const TCGOpDef *def;
277 TCGArg *gen_args;
278 TCGArg tmp;
279 /* Array VALS has an element for each temp.
280 If this temp holds a constant then its value is kept in VALS' element.
281 If this temp is a copy of other ones then this equivalence class'
282 representative is kept in VALS' element.
283 If this temp is neither copy nor constant then corresponding VALS'
284 element is unused. */
285
286 nb_temps = s->nb_temps;
287 nb_globals = s->nb_globals;
288 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
289
290 nb_ops = tcg_opc_ptr - gen_opc_buf;
291 gen_args = args;
292 for (op_index = 0; op_index < nb_ops; op_index++) {
293 op = gen_opc_buf[op_index];
294 def = &tcg_op_defs[op];
295 /* Do copy propagation */
296 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
297 assert(op != INDEX_op_call);
298 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
299 if (temps[args[i]].state == TCG_TEMP_COPY) {
300 args[i] = temps[args[i]].val;
301 }
302 }
303 }
304
305 /* For commutative operations make constant second argument */
306 switch (op) {
307 CASE_OP_32_64(add):
308 CASE_OP_32_64(mul):
309 CASE_OP_32_64(and):
310 CASE_OP_32_64(or):
311 CASE_OP_32_64(xor):
312 CASE_OP_32_64(eqv):
313 CASE_OP_32_64(nand):
314 CASE_OP_32_64(nor):
315 if (temps[args[1]].state == TCG_TEMP_CONST) {
316 tmp = args[1];
317 args[1] = args[2];
318 args[2] = tmp;
319 }
320 break;
321 CASE_OP_32_64(brcond):
322 if (temps[args[0]].state == TCG_TEMP_CONST
323 && temps[args[1]].state != TCG_TEMP_CONST) {
324 tmp = args[0];
325 args[0] = args[1];
326 args[1] = tmp;
327 args[2] = tcg_swap_cond(args[2]);
328 }
329 break;
330 CASE_OP_32_64(setcond):
331 if (temps[args[1]].state == TCG_TEMP_CONST
332 && temps[args[2]].state != TCG_TEMP_CONST) {
333 tmp = args[1];
334 args[1] = args[2];
335 args[2] = tmp;
336 args[3] = tcg_swap_cond(args[3]);
337 }
338 break;
339 default:
340 break;
341 }
342
343 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
344 switch (op) {
345 CASE_OP_32_64(shl):
346 CASE_OP_32_64(shr):
347 CASE_OP_32_64(sar):
348 CASE_OP_32_64(rotl):
349 CASE_OP_32_64(rotr):
350 if (temps[args[1]].state == TCG_TEMP_CONST
351 && temps[args[1]].val == 0) {
352 gen_opc_buf[op_index] = op_to_movi(op);
353 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
354 args += 3;
355 gen_args += 2;
356 continue;
357 }
358 break;
359 default:
360 break;
361 }
362
363 /* Simplify expression for "op r, a, 0 => mov r, a" cases */
364 switch (op) {
365 CASE_OP_32_64(add):
366 CASE_OP_32_64(sub):
367 CASE_OP_32_64(shl):
368 CASE_OP_32_64(shr):
369 CASE_OP_32_64(sar):
370 CASE_OP_32_64(rotl):
371 CASE_OP_32_64(rotr):
372 CASE_OP_32_64(or):
373 CASE_OP_32_64(xor):
374 if (temps[args[1]].state == TCG_TEMP_CONST) {
375 /* Proceed with possible constant folding. */
376 break;
377 }
378 if (temps[args[2]].state == TCG_TEMP_CONST
379 && temps[args[2]].val == 0) {
380 if ((temps[args[0]].state == TCG_TEMP_COPY
381 && temps[args[0]].val == args[1])
382 || args[0] == args[1]) {
383 args += 3;
384 gen_opc_buf[op_index] = INDEX_op_nop;
385 } else {
386 gen_opc_buf[op_index] = op_to_mov(op);
387 tcg_opt_gen_mov(s, gen_args, args[0], args[1],
388 nb_temps, nb_globals);
389 gen_args += 2;
390 args += 3;
391 }
392 continue;
393 }
394 break;
395 default:
396 break;
397 }
398
399 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
400 switch (op) {
401 CASE_OP_32_64(and):
402 CASE_OP_32_64(mul):
403 if ((temps[args[2]].state == TCG_TEMP_CONST
404 && temps[args[2]].val == 0)) {
405 gen_opc_buf[op_index] = op_to_movi(op);
406 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
407 args += 3;
408 gen_args += 2;
409 continue;
410 }
411 break;
412 default:
413 break;
414 }
415
416 /* Simplify expression for "op r, a, a => mov r, a" cases */
417 switch (op) {
418 CASE_OP_32_64(or):
419 CASE_OP_32_64(and):
420 if (args[1] == args[2]) {
421 if (args[1] == args[0]) {
422 args += 3;
423 gen_opc_buf[op_index] = INDEX_op_nop;
424 } else {
425 gen_opc_buf[op_index] = op_to_mov(op);
426 tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
427 nb_globals);
428 gen_args += 2;
429 args += 3;
430 }
431 continue;
432 }
433 break;
434 default:
435 break;
436 }
437
438 /* Propagate constants through copy operations and do constant
439 folding. Constants will be substituted to arguments by register
440 allocator where needed and possible. Also detect copies. */
441 switch (op) {
442 CASE_OP_32_64(mov):
443 if ((temps[args[1]].state == TCG_TEMP_COPY
444 && temps[args[1]].val == args[0])
445 || args[0] == args[1]) {
446 args += 2;
447 gen_opc_buf[op_index] = INDEX_op_nop;
448 break;
449 }
450 if (temps[args[1]].state != TCG_TEMP_CONST) {
451 tcg_opt_gen_mov(s, gen_args, args[0], args[1],
452 nb_temps, nb_globals);
453 gen_args += 2;
454 args += 2;
455 break;
456 }
457 /* Source argument is constant. Rewrite the operation and
458 let movi case handle it. */
459 op = op_to_movi(op);
460 gen_opc_buf[op_index] = op;
461 args[1] = temps[args[1]].val;
462 /* fallthrough */
463 CASE_OP_32_64(movi):
464 tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
465 gen_args += 2;
466 args += 2;
467 break;
468 CASE_OP_32_64(not):
469 CASE_OP_32_64(neg):
470 CASE_OP_32_64(ext8s):
471 CASE_OP_32_64(ext8u):
472 CASE_OP_32_64(ext16s):
473 CASE_OP_32_64(ext16u):
474 case INDEX_op_ext32s_i64:
475 case INDEX_op_ext32u_i64:
476 if (temps[args[1]].state == TCG_TEMP_CONST) {
477 gen_opc_buf[op_index] = op_to_movi(op);
478 tmp = do_constant_folding(op, temps[args[1]].val, 0);
479 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
480 gen_args += 2;
481 args += 2;
482 break;
483 } else {
484 reset_temp(args[0], nb_temps, nb_globals);
485 gen_args[0] = args[0];
486 gen_args[1] = args[1];
487 gen_args += 2;
488 args += 2;
489 break;
490 }
491 CASE_OP_32_64(add):
492 CASE_OP_32_64(sub):
493 CASE_OP_32_64(mul):
494 CASE_OP_32_64(or):
495 CASE_OP_32_64(and):
496 CASE_OP_32_64(xor):
497 CASE_OP_32_64(shl):
498 CASE_OP_32_64(shr):
499 CASE_OP_32_64(sar):
500 CASE_OP_32_64(rotl):
501 CASE_OP_32_64(rotr):
502 CASE_OP_32_64(andc):
503 CASE_OP_32_64(orc):
504 CASE_OP_32_64(eqv):
505 CASE_OP_32_64(nand):
506 CASE_OP_32_64(nor):
507 if (temps[args[1]].state == TCG_TEMP_CONST
508 && temps[args[2]].state == TCG_TEMP_CONST) {
509 gen_opc_buf[op_index] = op_to_movi(op);
510 tmp = do_constant_folding(op, temps[args[1]].val,
511 temps[args[2]].val);
512 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
513 gen_args += 2;
514 args += 3;
515 break;
516 } else {
517 reset_temp(args[0], nb_temps, nb_globals);
518 gen_args[0] = args[0];
519 gen_args[1] = args[1];
520 gen_args[2] = args[2];
521 gen_args += 3;
522 args += 3;
523 break;
524 }
525 case INDEX_op_call:
526 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
527 if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
528 for (i = 0; i < nb_globals; i++) {
529 reset_temp(i, nb_temps, nb_globals);
530 }
531 }
532 for (i = 0; i < (args[0] >> 16); i++) {
533 reset_temp(args[i + 1], nb_temps, nb_globals);
534 }
535 i = nb_call_args + 3;
536 while (i) {
537 *gen_args = *args;
538 args++;
539 gen_args++;
540 i--;
541 }
542 break;
543 case INDEX_op_set_label:
544 case INDEX_op_jmp:
545 case INDEX_op_br:
546 CASE_OP_32_64(brcond):
547 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
548 for (i = 0; i < def->nb_args; i++) {
549 *gen_args = *args;
550 args++;
551 gen_args++;
552 }
553 break;
554 default:
555 /* Default case: we do know nothing about operation so no
556 propagation is done. We only trash output args. */
557 for (i = 0; i < def->nb_oargs; i++) {
558 reset_temp(args[i], nb_temps, nb_globals);
559 }
560 for (i = 0; i < def->nb_args; i++) {
561 gen_args[i] = args[i];
562 }
563 args += def->nb_args;
564 gen_args += def->nb_args;
565 break;
566 }
567 }
568
569 return gen_args;
570 }
571
572 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
573 TCGArg *args, TCGOpDef *tcg_op_defs)
574 {
575 TCGArg *res;
576 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
577 return res;
578 }