]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/optimize.c
optimize: only write to state when clearing optimizer data
[mirror_qemu.git] / tcg / optimize.c
CommitLineData
8f2e8c07
KB
1/*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26#include "config.h"
27
28#include <stdlib.h>
29#include <stdio.h>
30
31#include "qemu-common.h"
32#include "tcg-op.h"
33
8f2e8c07
KB
34#define CASE_OP_32_64(x) \
35 glue(glue(case INDEX_op_, x), _i32): \
36 glue(glue(case INDEX_op_, x), _i64)
8f2e8c07 37
22613af4
KB
38typedef enum {
39 TCG_TEMP_UNDEF = 0,
40 TCG_TEMP_CONST,
41 TCG_TEMP_COPY,
22613af4
KB
42} tcg_temp_state;
43
44struct tcg_temp_info {
45 tcg_temp_state state;
46 uint16_t prev_copy;
47 uint16_t next_copy;
48 tcg_target_ulong val;
49};
50
51static struct tcg_temp_info temps[TCG_MAX_TEMPS];
52
e590d4e6
AJ
53/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
54 the copy flag from the left temp. */
55static void reset_temp(TCGArg temp)
22613af4 56{
e590d4e6
AJ
57 if (temps[temp].state == TCG_TEMP_COPY) {
58 if (temps[temp].prev_copy == temps[temp].next_copy) {
59 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
60 } else {
61 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
62 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
22613af4 63 }
22613af4 64 }
48b56ce1 65 temps[temp].state = TCG_TEMP_UNDEF;
22613af4
KB
66}
67
d193a14a
PB
68/* Reset all temporaries, given that there are NB_TEMPS of them. */
69static void reset_all_temps(int nb_temps)
70{
71 int i;
72 for (i = 0; i < nb_temps; i++) {
73 temps[i].state = TCG_TEMP_UNDEF;
74 }
75}
76
fe0de7aa 77static int op_bits(TCGOpcode op)
22613af4 78{
8399ad59
RH
79 const TCGOpDef *def = &tcg_op_defs[op];
80 return def->flags & TCG_OPF_64BIT ? 64 : 32;
22613af4
KB
81}
82
fe0de7aa 83static TCGOpcode op_to_movi(TCGOpcode op)
22613af4
KB
84{
85 switch (op_bits(op)) {
86 case 32:
87 return INDEX_op_movi_i32;
22613af4
KB
88 case 64:
89 return INDEX_op_movi_i64;
22613af4
KB
90 default:
91 fprintf(stderr, "op_to_movi: unexpected return value of "
92 "function op_bits.\n");
93 tcg_abort();
94 }
95}
96
e590d4e6
AJ
97static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
98{
99 TCGArg i;
100
101 /* If this is already a global, we can't do better. */
102 if (temp < s->nb_globals) {
103 return temp;
104 }
105
106 /* Search for a global first. */
107 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
108 if (i < s->nb_globals) {
109 return i;
110 }
111 }
112
113 /* If it is a temp, search for a temp local. */
114 if (!s->temps[temp].temp_local) {
115 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
116 if (s->temps[i].temp_local) {
117 return i;
118 }
119 }
120 }
121
122 /* Failure to find a better representation, return the same temp. */
123 return temp;
124}
125
126static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
127{
128 TCGArg i;
129
130 if (arg1 == arg2) {
131 return true;
132 }
133
134 if (temps[arg1].state != TCG_TEMP_COPY
135 || temps[arg2].state != TCG_TEMP_COPY) {
136 return false;
137 }
138
139 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
140 if (i == arg2) {
141 return true;
142 }
143 }
144
145 return false;
146}
147
b80bb016
AJ
148static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
149 TCGArg dst, TCGArg src)
22613af4 150{
e590d4e6
AJ
151 reset_temp(dst);
152 assert(temps[src].state != TCG_TEMP_CONST);
153
154 if (s->temps[src].type == s->temps[dst].type) {
155 if (temps[src].state != TCG_TEMP_COPY) {
156 temps[src].state = TCG_TEMP_COPY;
22613af4
KB
157 temps[src].next_copy = src;
158 temps[src].prev_copy = src;
159 }
160 temps[dst].state = TCG_TEMP_COPY;
22613af4
KB
161 temps[dst].next_copy = temps[src].next_copy;
162 temps[dst].prev_copy = src;
163 temps[temps[dst].next_copy].prev_copy = dst;
164 temps[src].next_copy = dst;
165 }
e590d4e6 166
22613af4
KB
167 gen_args[0] = dst;
168 gen_args[1] = src;
169}
170
e590d4e6 171static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
22613af4 172{
e590d4e6 173 reset_temp(dst);
22613af4
KB
174 temps[dst].state = TCG_TEMP_CONST;
175 temps[dst].val = val;
176 gen_args[0] = dst;
177 gen_args[1] = val;
178}
179
fe0de7aa 180static TCGOpcode op_to_mov(TCGOpcode op)
53108fb5
KB
181{
182 switch (op_bits(op)) {
183 case 32:
184 return INDEX_op_mov_i32;
53108fb5
KB
185 case 64:
186 return INDEX_op_mov_i64;
53108fb5
KB
187 default:
188 fprintf(stderr, "op_to_mov: unexpected return value of "
189 "function op_bits.\n");
190 tcg_abort();
191 }
192}
193
fe0de7aa 194static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
53108fb5
KB
195{
196 switch (op) {
197 CASE_OP_32_64(add):
198 return x + y;
199
200 CASE_OP_32_64(sub):
201 return x - y;
202
203 CASE_OP_32_64(mul):
204 return x * y;
205
9a81090b
KB
206 CASE_OP_32_64(and):
207 return x & y;
208
209 CASE_OP_32_64(or):
210 return x | y;
211
212 CASE_OP_32_64(xor):
213 return x ^ y;
214
55c0975c
KB
215 case INDEX_op_shl_i32:
216 return (uint32_t)x << (uint32_t)y;
217
55c0975c
KB
218 case INDEX_op_shl_i64:
219 return (uint64_t)x << (uint64_t)y;
55c0975c
KB
220
221 case INDEX_op_shr_i32:
222 return (uint32_t)x >> (uint32_t)y;
223
55c0975c
KB
224 case INDEX_op_shr_i64:
225 return (uint64_t)x >> (uint64_t)y;
55c0975c
KB
226
227 case INDEX_op_sar_i32:
228 return (int32_t)x >> (int32_t)y;
229
55c0975c
KB
230 case INDEX_op_sar_i64:
231 return (int64_t)x >> (int64_t)y;
55c0975c
KB
232
233 case INDEX_op_rotr_i32:
25c4d9cc 234 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
55c0975c
KB
235 return x;
236
55c0975c 237 case INDEX_op_rotr_i64:
25c4d9cc 238 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
55c0975c 239 return x;
55c0975c
KB
240
241 case INDEX_op_rotl_i32:
25c4d9cc 242 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
55c0975c
KB
243 return x;
244
55c0975c 245 case INDEX_op_rotl_i64:
25c4d9cc 246 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
55c0975c 247 return x;
25c4d9cc
RH
248
249 CASE_OP_32_64(not):
a640f031 250 return ~x;
25c4d9cc 251
cb25c80a
RH
252 CASE_OP_32_64(neg):
253 return -x;
254
255 CASE_OP_32_64(andc):
256 return x & ~y;
257
258 CASE_OP_32_64(orc):
259 return x | ~y;
260
261 CASE_OP_32_64(eqv):
262 return ~(x ^ y);
263
264 CASE_OP_32_64(nand):
265 return ~(x & y);
266
267 CASE_OP_32_64(nor):
268 return ~(x | y);
269
25c4d9cc 270 CASE_OP_32_64(ext8s):
a640f031 271 return (int8_t)x;
25c4d9cc
RH
272
273 CASE_OP_32_64(ext16s):
a640f031 274 return (int16_t)x;
25c4d9cc
RH
275
276 CASE_OP_32_64(ext8u):
a640f031 277 return (uint8_t)x;
25c4d9cc
RH
278
279 CASE_OP_32_64(ext16u):
a640f031
KB
280 return (uint16_t)x;
281
a640f031
KB
282 case INDEX_op_ext32s_i64:
283 return (int32_t)x;
284
285 case INDEX_op_ext32u_i64:
286 return (uint32_t)x;
a640f031 287
53108fb5
KB
288 default:
289 fprintf(stderr,
290 "Unrecognized operation %d in do_constant_folding.\n", op);
291 tcg_abort();
292 }
293}
294
fe0de7aa 295static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
53108fb5
KB
296{
297 TCGArg res = do_constant_folding_2(op, x, y);
53108fb5
KB
298 if (op_bits(op) == 32) {
299 res &= 0xffffffff;
300 }
53108fb5
KB
301 return res;
302}
303
9519da7e
RH
304static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
305{
306 switch (c) {
307 case TCG_COND_EQ:
308 return x == y;
309 case TCG_COND_NE:
310 return x != y;
311 case TCG_COND_LT:
312 return (int32_t)x < (int32_t)y;
313 case TCG_COND_GE:
314 return (int32_t)x >= (int32_t)y;
315 case TCG_COND_LE:
316 return (int32_t)x <= (int32_t)y;
317 case TCG_COND_GT:
318 return (int32_t)x > (int32_t)y;
319 case TCG_COND_LTU:
320 return x < y;
321 case TCG_COND_GEU:
322 return x >= y;
323 case TCG_COND_LEU:
324 return x <= y;
325 case TCG_COND_GTU:
326 return x > y;
327 default:
328 tcg_abort();
329 }
330}
331
332static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
333{
334 switch (c) {
335 case TCG_COND_EQ:
336 return x == y;
337 case TCG_COND_NE:
338 return x != y;
339 case TCG_COND_LT:
340 return (int64_t)x < (int64_t)y;
341 case TCG_COND_GE:
342 return (int64_t)x >= (int64_t)y;
343 case TCG_COND_LE:
344 return (int64_t)x <= (int64_t)y;
345 case TCG_COND_GT:
346 return (int64_t)x > (int64_t)y;
347 case TCG_COND_LTU:
348 return x < y;
349 case TCG_COND_GEU:
350 return x >= y;
351 case TCG_COND_LEU:
352 return x <= y;
353 case TCG_COND_GTU:
354 return x > y;
355 default:
356 tcg_abort();
357 }
358}
359
360static bool do_constant_folding_cond_eq(TCGCond c)
361{
362 switch (c) {
363 case TCG_COND_GT:
364 case TCG_COND_LTU:
365 case TCG_COND_LT:
366 case TCG_COND_GTU:
367 case TCG_COND_NE:
368 return 0;
369 case TCG_COND_GE:
370 case TCG_COND_GEU:
371 case TCG_COND_LE:
372 case TCG_COND_LEU:
373 case TCG_COND_EQ:
374 return 1;
375 default:
376 tcg_abort();
377 }
378}
379
b336ceb6
AJ
380/* Return 2 if the condition can't be simplified, and the result
381 of the condition (0 or 1) if it can */
f8dd19e5
AJ
382static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
383 TCGArg y, TCGCond c)
384{
b336ceb6
AJ
385 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
386 switch (op_bits(op)) {
387 case 32:
9519da7e 388 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
b336ceb6 389 case 64:
9519da7e 390 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
0aed257f 391 default:
9519da7e 392 tcg_abort();
b336ceb6 393 }
9519da7e
RH
394 } else if (temps_are_copies(x, y)) {
395 return do_constant_folding_cond_eq(c);
b336ceb6
AJ
396 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
397 switch (c) {
f8dd19e5 398 case TCG_COND_LTU:
b336ceb6 399 return 0;
f8dd19e5 400 case TCG_COND_GEU:
b336ceb6
AJ
401 return 1;
402 default:
403 return 2;
f8dd19e5 404 }
b336ceb6
AJ
405 } else {
406 return 2;
f8dd19e5 407 }
f8dd19e5
AJ
408}
409
6c4382f8
RH
410/* Return 2 if the condition can't be simplified, and the result
411 of the condition (0 or 1) if it can */
412static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
413{
414 TCGArg al = p1[0], ah = p1[1];
415 TCGArg bl = p2[0], bh = p2[1];
416
417 if (temps[bl].state == TCG_TEMP_CONST
418 && temps[bh].state == TCG_TEMP_CONST) {
419 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
420
421 if (temps[al].state == TCG_TEMP_CONST
422 && temps[ah].state == TCG_TEMP_CONST) {
423 uint64_t a;
424 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
425 return do_constant_folding_cond_64(a, b, c);
426 }
427 if (b == 0) {
428 switch (c) {
429 case TCG_COND_LTU:
430 return 0;
431 case TCG_COND_GEU:
432 return 1;
433 default:
434 break;
435 }
436 }
437 }
438 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
439 return do_constant_folding_cond_eq(c);
440 }
441 return 2;
442}
443
24c9ae4e
RH
444static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
445{
446 TCGArg a1 = *p1, a2 = *p2;
447 int sum = 0;
448 sum += temps[a1].state == TCG_TEMP_CONST;
449 sum -= temps[a2].state == TCG_TEMP_CONST;
450
451 /* Prefer the constant in second argument, and then the form
452 op a, a, b, which is better handled on non-RISC hosts. */
453 if (sum > 0 || (sum == 0 && dest == a2)) {
454 *p1 = a2;
455 *p2 = a1;
456 return true;
457 }
458 return false;
459}
460
0bfcb865
RH
461static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
462{
463 int sum = 0;
464 sum += temps[p1[0]].state == TCG_TEMP_CONST;
465 sum += temps[p1[1]].state == TCG_TEMP_CONST;
466 sum -= temps[p2[0]].state == TCG_TEMP_CONST;
467 sum -= temps[p2[1]].state == TCG_TEMP_CONST;
468 if (sum > 0) {
469 TCGArg t;
470 t = p1[0], p1[0] = p2[0], p2[0] = t;
471 t = p1[1], p1[1] = p2[1], p2[1] = t;
472 return true;
473 }
474 return false;
475}
476
22613af4 477/* Propagate constants and copies, fold constant expressions. */
8f2e8c07
KB
478static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
479 TCGArg *args, TCGOpDef *tcg_op_defs)
480{
fe0de7aa
BS
481 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
482 TCGOpcode op;
8f2e8c07
KB
483 const TCGOpDef *def;
484 TCGArg *gen_args;
53108fb5 485 TCGArg tmp;
5d8f5363 486
22613af4
KB
487 /* Array VALS has an element for each temp.
488 If this temp holds a constant then its value is kept in VALS' element.
e590d4e6
AJ
489 If this temp is a copy of other ones then the other copies are
490 available through the doubly linked circular list. */
8f2e8c07
KB
491
492 nb_temps = s->nb_temps;
493 nb_globals = s->nb_globals;
d193a14a 494 reset_all_temps(nb_temps);
8f2e8c07 495
92414b31 496 nb_ops = tcg_opc_ptr - s->gen_opc_buf;
8f2e8c07
KB
497 gen_args = args;
498 for (op_index = 0; op_index < nb_ops; op_index++) {
92414b31 499 op = s->gen_opc_buf[op_index];
8f2e8c07 500 def = &tcg_op_defs[op];
22613af4 501 /* Do copy propagation */
1ff8c541
AJ
502 if (op == INDEX_op_call) {
503 int nb_oargs = args[0] >> 16;
504 int nb_iargs = args[0] & 0xffff;
505 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
506 if (temps[args[i]].state == TCG_TEMP_COPY) {
507 args[i] = find_better_copy(s, args[i]);
508 }
509 }
510 } else {
22613af4
KB
511 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
512 if (temps[args[i]].state == TCG_TEMP_COPY) {
e590d4e6 513 args[i] = find_better_copy(s, args[i]);
22613af4
KB
514 }
515 }
516 }
517
53108fb5
KB
518 /* For commutative operations make constant second argument */
519 switch (op) {
520 CASE_OP_32_64(add):
521 CASE_OP_32_64(mul):
9a81090b
KB
522 CASE_OP_32_64(and):
523 CASE_OP_32_64(or):
524 CASE_OP_32_64(xor):
cb25c80a
RH
525 CASE_OP_32_64(eqv):
526 CASE_OP_32_64(nand):
527 CASE_OP_32_64(nor):
24c9ae4e 528 swap_commutative(args[0], &args[1], &args[2]);
53108fb5 529 break;
65a7cce1 530 CASE_OP_32_64(brcond):
24c9ae4e 531 if (swap_commutative(-1, &args[0], &args[1])) {
65a7cce1
AJ
532 args[2] = tcg_swap_cond(args[2]);
533 }
534 break;
535 CASE_OP_32_64(setcond):
24c9ae4e 536 if (swap_commutative(args[0], &args[1], &args[2])) {
65a7cce1
AJ
537 args[3] = tcg_swap_cond(args[3]);
538 }
539 break;
fa01a208 540 CASE_OP_32_64(movcond):
24c9ae4e
RH
541 if (swap_commutative(-1, &args[1], &args[2])) {
542 args[5] = tcg_swap_cond(args[5]);
5d8f5363
RH
543 }
544 /* For movcond, we canonicalize the "false" input reg to match
545 the destination reg so that the tcg backend can implement
546 a "move if true" operation. */
24c9ae4e
RH
547 if (swap_commutative(args[0], &args[4], &args[3])) {
548 args[5] = tcg_invert_cond(args[5]);
fa01a208 549 }
1e484e61
RH
550 break;
551 case INDEX_op_add2_i32:
552 swap_commutative(args[0], &args[2], &args[4]);
553 swap_commutative(args[1], &args[3], &args[5]);
554 break;
1414968a
RH
555 case INDEX_op_mulu2_i32:
556 swap_commutative(args[0], &args[2], &args[3]);
557 break;
0bfcb865
RH
558 case INDEX_op_brcond2_i32:
559 if (swap_commutative2(&args[0], &args[2])) {
560 args[4] = tcg_swap_cond(args[4]);
561 }
562 break;
563 case INDEX_op_setcond2_i32:
564 if (swap_commutative2(&args[1], &args[3])) {
565 args[5] = tcg_swap_cond(args[5]);
566 }
567 break;
53108fb5
KB
568 default:
569 break;
570 }
571
01ee5282
AJ
572 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
573 switch (op) {
574 CASE_OP_32_64(shl):
575 CASE_OP_32_64(shr):
576 CASE_OP_32_64(sar):
577 CASE_OP_32_64(rotl):
578 CASE_OP_32_64(rotr):
579 if (temps[args[1]].state == TCG_TEMP_CONST
580 && temps[args[1]].val == 0) {
92414b31 581 s->gen_opc_buf[op_index] = op_to_movi(op);
e590d4e6 582 tcg_opt_gen_movi(gen_args, args[0], 0);
01ee5282
AJ
583 args += 3;
584 gen_args += 2;
585 continue;
586 }
587 break;
588 default:
589 break;
590 }
591
56e49438 592 /* Simplify expression for "op r, a, 0 => mov r, a" cases */
53108fb5
KB
593 switch (op) {
594 CASE_OP_32_64(add):
595 CASE_OP_32_64(sub):
55c0975c
KB
596 CASE_OP_32_64(shl):
597 CASE_OP_32_64(shr):
598 CASE_OP_32_64(sar):
25c4d9cc
RH
599 CASE_OP_32_64(rotl):
600 CASE_OP_32_64(rotr):
38ee188b
AJ
601 CASE_OP_32_64(or):
602 CASE_OP_32_64(xor):
53108fb5
KB
603 if (temps[args[1]].state == TCG_TEMP_CONST) {
604 /* Proceed with possible constant folding. */
605 break;
606 }
607 if (temps[args[2]].state == TCG_TEMP_CONST
608 && temps[args[2]].val == 0) {
e590d4e6 609 if (temps_are_copies(args[0], args[1])) {
92414b31 610 s->gen_opc_buf[op_index] = INDEX_op_nop;
53108fb5 611 } else {
92414b31 612 s->gen_opc_buf[op_index] = op_to_mov(op);
b80bb016 613 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
53108fb5 614 gen_args += 2;
53108fb5 615 }
fedc0da2 616 args += 3;
53108fb5
KB
617 continue;
618 }
619 break;
56e49438
AJ
620 default:
621 break;
622 }
623
624 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
625 switch (op) {
61251c0c 626 CASE_OP_32_64(and):
53108fb5
KB
627 CASE_OP_32_64(mul):
628 if ((temps[args[2]].state == TCG_TEMP_CONST
629 && temps[args[2]].val == 0)) {
92414b31 630 s->gen_opc_buf[op_index] = op_to_movi(op);
e590d4e6 631 tcg_opt_gen_movi(gen_args, args[0], 0);
53108fb5
KB
632 args += 3;
633 gen_args += 2;
634 continue;
635 }
636 break;
56e49438
AJ
637 default:
638 break;
639 }
640
641 /* Simplify expression for "op r, a, a => mov r, a" cases */
642 switch (op) {
9a81090b
KB
643 CASE_OP_32_64(or):
644 CASE_OP_32_64(and):
0aba1c73 645 if (temps_are_copies(args[1], args[2])) {
e590d4e6 646 if (temps_are_copies(args[0], args[1])) {
92414b31 647 s->gen_opc_buf[op_index] = INDEX_op_nop;
9a81090b 648 } else {
92414b31 649 s->gen_opc_buf[op_index] = op_to_mov(op);
b80bb016 650 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
9a81090b 651 gen_args += 2;
9a81090b 652 }
fedc0da2 653 args += 3;
9a81090b
KB
654 continue;
655 }
656 break;
fe0de7aa
BS
657 default:
658 break;
53108fb5
KB
659 }
660
3c94193e
AJ
661 /* Simplify expression for "op r, a, a => movi r, 0" cases */
662 switch (op) {
663 CASE_OP_32_64(sub):
664 CASE_OP_32_64(xor):
665 if (temps_are_copies(args[1], args[2])) {
92414b31 666 s->gen_opc_buf[op_index] = op_to_movi(op);
3c94193e
AJ
667 tcg_opt_gen_movi(gen_args, args[0], 0);
668 gen_args += 2;
669 args += 3;
670 continue;
671 }
672 break;
673 default:
674 break;
675 }
676
22613af4
KB
677 /* Propagate constants through copy operations and do constant
678 folding. Constants will be substituted to arguments by register
679 allocator where needed and possible. Also detect copies. */
8f2e8c07 680 switch (op) {
22613af4 681 CASE_OP_32_64(mov):
e590d4e6 682 if (temps_are_copies(args[0], args[1])) {
22613af4 683 args += 2;
92414b31 684 s->gen_opc_buf[op_index] = INDEX_op_nop;
22613af4
KB
685 break;
686 }
687 if (temps[args[1]].state != TCG_TEMP_CONST) {
b80bb016 688 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
22613af4
KB
689 gen_args += 2;
690 args += 2;
691 break;
692 }
693 /* Source argument is constant. Rewrite the operation and
694 let movi case handle it. */
695 op = op_to_movi(op);
92414b31 696 s->gen_opc_buf[op_index] = op;
22613af4
KB
697 args[1] = temps[args[1]].val;
698 /* fallthrough */
699 CASE_OP_32_64(movi):
e590d4e6 700 tcg_opt_gen_movi(gen_args, args[0], args[1]);
22613af4
KB
701 gen_args += 2;
702 args += 2;
703 break;
6e14e91b 704
a640f031 705 CASE_OP_32_64(not):
cb25c80a 706 CASE_OP_32_64(neg):
25c4d9cc
RH
707 CASE_OP_32_64(ext8s):
708 CASE_OP_32_64(ext8u):
709 CASE_OP_32_64(ext16s):
710 CASE_OP_32_64(ext16u):
a640f031
KB
711 case INDEX_op_ext32s_i64:
712 case INDEX_op_ext32u_i64:
a640f031 713 if (temps[args[1]].state == TCG_TEMP_CONST) {
92414b31 714 s->gen_opc_buf[op_index] = op_to_movi(op);
a640f031 715 tmp = do_constant_folding(op, temps[args[1]].val, 0);
e590d4e6 716 tcg_opt_gen_movi(gen_args, args[0], tmp);
6e14e91b
RH
717 gen_args += 2;
718 args += 2;
719 break;
a640f031 720 }
6e14e91b
RH
721 goto do_default;
722
53108fb5
KB
723 CASE_OP_32_64(add):
724 CASE_OP_32_64(sub):
725 CASE_OP_32_64(mul):
9a81090b
KB
726 CASE_OP_32_64(or):
727 CASE_OP_32_64(and):
728 CASE_OP_32_64(xor):
55c0975c
KB
729 CASE_OP_32_64(shl):
730 CASE_OP_32_64(shr):
731 CASE_OP_32_64(sar):
25c4d9cc
RH
732 CASE_OP_32_64(rotl):
733 CASE_OP_32_64(rotr):
cb25c80a
RH
734 CASE_OP_32_64(andc):
735 CASE_OP_32_64(orc):
736 CASE_OP_32_64(eqv):
737 CASE_OP_32_64(nand):
738 CASE_OP_32_64(nor):
53108fb5
KB
739 if (temps[args[1]].state == TCG_TEMP_CONST
740 && temps[args[2]].state == TCG_TEMP_CONST) {
92414b31 741 s->gen_opc_buf[op_index] = op_to_movi(op);
53108fb5
KB
742 tmp = do_constant_folding(op, temps[args[1]].val,
743 temps[args[2]].val);
e590d4e6 744 tcg_opt_gen_movi(gen_args, args[0], tmp);
53108fb5 745 gen_args += 2;
6e14e91b
RH
746 args += 3;
747 break;
53108fb5 748 }
6e14e91b
RH
749 goto do_default;
750
7ef55fc9
AJ
751 CASE_OP_32_64(deposit):
752 if (temps[args[1]].state == TCG_TEMP_CONST
753 && temps[args[2]].state == TCG_TEMP_CONST) {
92414b31 754 s->gen_opc_buf[op_index] = op_to_movi(op);
7ef55fc9
AJ
755 tmp = ((1ull << args[4]) - 1);
756 tmp = (temps[args[1]].val & ~(tmp << args[3]))
757 | ((temps[args[2]].val & tmp) << args[3]);
758 tcg_opt_gen_movi(gen_args, args[0], tmp);
759 gen_args += 2;
6e14e91b
RH
760 args += 5;
761 break;
7ef55fc9 762 }
6e14e91b
RH
763 goto do_default;
764
f8dd19e5 765 CASE_OP_32_64(setcond):
b336ceb6
AJ
766 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
767 if (tmp != 2) {
92414b31 768 s->gen_opc_buf[op_index] = op_to_movi(op);
e590d4e6 769 tcg_opt_gen_movi(gen_args, args[0], tmp);
f8dd19e5 770 gen_args += 2;
6e14e91b
RH
771 args += 4;
772 break;
f8dd19e5 773 }
6e14e91b
RH
774 goto do_default;
775
fbeaa26c 776 CASE_OP_32_64(brcond):
b336ceb6
AJ
777 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
778 if (tmp != 2) {
779 if (tmp) {
d193a14a 780 reset_all_temps(nb_temps);
92414b31 781 s->gen_opc_buf[op_index] = INDEX_op_br;
fbeaa26c
AJ
782 gen_args[0] = args[3];
783 gen_args += 1;
fbeaa26c 784 } else {
92414b31 785 s->gen_opc_buf[op_index] = INDEX_op_nop;
fbeaa26c 786 }
6e14e91b
RH
787 args += 4;
788 break;
fbeaa26c 789 }
6e14e91b
RH
790 goto do_default;
791
fa01a208 792 CASE_OP_32_64(movcond):
b336ceb6
AJ
793 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
794 if (tmp != 2) {
e590d4e6 795 if (temps_are_copies(args[0], args[4-tmp])) {
92414b31 796 s->gen_opc_buf[op_index] = INDEX_op_nop;
fa01a208 797 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
92414b31 798 s->gen_opc_buf[op_index] = op_to_movi(op);
e590d4e6 799 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
fa01a208
RH
800 gen_args += 2;
801 } else {
92414b31 802 s->gen_opc_buf[op_index] = op_to_mov(op);
e590d4e6 803 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
fa01a208
RH
804 gen_args += 2;
805 }
6e14e91b
RH
806 args += 6;
807 break;
fa01a208 808 }
6e14e91b 809 goto do_default;
212c328d
RH
810
811 case INDEX_op_add2_i32:
812 case INDEX_op_sub2_i32:
813 if (temps[args[2]].state == TCG_TEMP_CONST
814 && temps[args[3]].state == TCG_TEMP_CONST
815 && temps[args[4]].state == TCG_TEMP_CONST
816 && temps[args[5]].state == TCG_TEMP_CONST) {
817 uint32_t al = temps[args[2]].val;
818 uint32_t ah = temps[args[3]].val;
819 uint32_t bl = temps[args[4]].val;
820 uint32_t bh = temps[args[5]].val;
821 uint64_t a = ((uint64_t)ah << 32) | al;
822 uint64_t b = ((uint64_t)bh << 32) | bl;
823 TCGArg rl, rh;
824
825 if (op == INDEX_op_add2_i32) {
826 a += b;
827 } else {
828 a -= b;
829 }
830
831 /* We emit the extra nop when we emit the add2/sub2. */
92414b31 832 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
212c328d
RH
833
834 rl = args[0];
835 rh = args[1];
92414b31
EV
836 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
837 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
212c328d
RH
838 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
839 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
840 gen_args += 4;
841 args += 6;
842 break;
843 }
844 goto do_default;
1414968a
RH
845
846 case INDEX_op_mulu2_i32:
847 if (temps[args[2]].state == TCG_TEMP_CONST
848 && temps[args[3]].state == TCG_TEMP_CONST) {
849 uint32_t a = temps[args[2]].val;
850 uint32_t b = temps[args[3]].val;
851 uint64_t r = (uint64_t)a * b;
852 TCGArg rl, rh;
853
854 /* We emit the extra nop when we emit the mulu2. */
92414b31 855 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1414968a
RH
856
857 rl = args[0];
858 rh = args[1];
92414b31
EV
859 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
860 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
1414968a
RH
861 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
862 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
863 gen_args += 4;
864 args += 4;
865 break;
866 }
867 goto do_default;
6e14e91b 868
bc1473ef 869 case INDEX_op_brcond2_i32:
6c4382f8
RH
870 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
871 if (tmp != 2) {
872 if (tmp) {
d193a14a 873 reset_all_temps(nb_temps);
92414b31 874 s->gen_opc_buf[op_index] = INDEX_op_br;
6c4382f8
RH
875 gen_args[0] = args[5];
876 gen_args += 1;
877 } else {
92414b31 878 s->gen_opc_buf[op_index] = INDEX_op_nop;
6c4382f8
RH
879 }
880 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
881 && temps[args[2]].state == TCG_TEMP_CONST
882 && temps[args[3]].state == TCG_TEMP_CONST
883 && temps[args[2]].val == 0
884 && temps[args[3]].val == 0) {
885 /* Simplify LT/GE comparisons vs zero to a single compare
886 vs the high word of the input. */
d193a14a 887 reset_all_temps(nb_temps);
92414b31 888 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
bc1473ef
RH
889 gen_args[0] = args[1];
890 gen_args[1] = args[3];
891 gen_args[2] = args[4];
892 gen_args[3] = args[5];
893 gen_args += 4;
6c4382f8
RH
894 } else {
895 goto do_default;
bc1473ef 896 }
6c4382f8
RH
897 args += 6;
898 break;
bc1473ef
RH
899
900 case INDEX_op_setcond2_i32:
6c4382f8
RH
901 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
902 if (tmp != 2) {
92414b31 903 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
6c4382f8
RH
904 tcg_opt_gen_movi(gen_args, args[0], tmp);
905 gen_args += 2;
906 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
907 && temps[args[3]].state == TCG_TEMP_CONST
908 && temps[args[4]].state == TCG_TEMP_CONST
909 && temps[args[3]].val == 0
910 && temps[args[4]].val == 0) {
911 /* Simplify LT/GE comparisons vs zero to a single compare
912 vs the high word of the input. */
92414b31 913 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
bc1473ef
RH
914 gen_args[0] = args[0];
915 gen_args[1] = args[2];
916 gen_args[2] = args[4];
917 gen_args[3] = args[5];
918 gen_args += 4;
6c4382f8
RH
919 } else {
920 goto do_default;
bc1473ef 921 }
6c4382f8
RH
922 args += 6;
923 break;
bc1473ef 924
8f2e8c07 925 case INDEX_op_call:
22613af4 926 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
78505279
AJ
927 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
928 TCG_CALL_NO_WRITE_GLOBALS))) {
22613af4 929 for (i = 0; i < nb_globals; i++) {
e590d4e6 930 reset_temp(i);
22613af4
KB
931 }
932 }
933 for (i = 0; i < (args[0] >> 16); i++) {
e590d4e6 934 reset_temp(args[i + 1]);
22613af4
KB
935 }
936 i = nb_call_args + 3;
8f2e8c07
KB
937 while (i) {
938 *gen_args = *args;
939 args++;
940 gen_args++;
941 i--;
942 }
943 break;
6e14e91b 944
8f2e8c07 945 default:
6e14e91b
RH
946 do_default:
947 /* Default case: we know nothing about operation (or were unable
948 to compute the operation result) so no propagation is done.
949 We trash everything if the operation is the end of a basic
950 block, otherwise we only trash the output args. */
a2550660 951 if (def->flags & TCG_OPF_BB_END) {
d193a14a 952 reset_all_temps(nb_temps);
a2550660
AJ
953 } else {
954 for (i = 0; i < def->nb_oargs; i++) {
e590d4e6 955 reset_temp(args[i]);
a2550660 956 }
22613af4 957 }
8f2e8c07
KB
958 for (i = 0; i < def->nb_args; i++) {
959 gen_args[i] = args[i];
960 }
961 args += def->nb_args;
962 gen_args += def->nb_args;
963 break;
964 }
965 }
966
967 return gen_args;
968}
969
970TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
971 TCGArg *args, TCGOpDef *tcg_op_defs)
972{
973 TCGArg *res;
974 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
975 return res;
976}