]>
Commit | Line | Data |
---|---|---|
8f2e8c07 KB |
1 | /* |
2 | * Optimizations for Tiny Code Generator for QEMU | |
3 | * | |
4 | * Copyright (c) 2010 Samsung Electronics. | |
5 | * Contributed by Kirill Batuzov <batuzovk@ispras.ru> | |
6 | * | |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 | * of this software and associated documentation files (the "Software"), to deal | |
9 | * in the Software without restriction, including without limitation the rights | |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 | * copies of the Software, and to permit persons to whom the Software is | |
12 | * furnished to do so, subject to the following conditions: | |
13 | * | |
14 | * The above copyright notice and this permission notice shall be included in | |
15 | * all copies or substantial portions of the Software. | |
16 | * | |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 | * THE SOFTWARE. | |
24 | */ | |
25 | ||
26 | #include "config.h" | |
27 | ||
28 | #include <stdlib.h> | |
29 | #include <stdio.h> | |
30 | ||
31 | #include "qemu-common.h" | |
32 | #include "tcg-op.h" | |
33 | ||
8f2e8c07 KB |
34 | #define CASE_OP_32_64(x) \ |
35 | glue(glue(case INDEX_op_, x), _i32): \ | |
36 | glue(glue(case INDEX_op_, x), _i64) | |
8f2e8c07 | 37 | |
22613af4 KB |
38 | typedef enum { |
39 | TCG_TEMP_UNDEF = 0, | |
40 | TCG_TEMP_CONST, | |
41 | TCG_TEMP_COPY, | |
22613af4 KB |
42 | } tcg_temp_state; |
43 | ||
44 | struct tcg_temp_info { | |
45 | tcg_temp_state state; | |
46 | uint16_t prev_copy; | |
47 | uint16_t next_copy; | |
48 | tcg_target_ulong val; | |
49 | }; | |
50 | ||
51 | static struct tcg_temp_info temps[TCG_MAX_TEMPS]; | |
52 | ||
e590d4e6 AJ |
53 | /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove |
54 | the copy flag from the left temp. */ | |
55 | static void reset_temp(TCGArg temp) | |
22613af4 | 56 | { |
e590d4e6 AJ |
57 | if (temps[temp].state == TCG_TEMP_COPY) { |
58 | if (temps[temp].prev_copy == temps[temp].next_copy) { | |
59 | temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; | |
60 | } else { | |
61 | temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; | |
62 | temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; | |
22613af4 | 63 | } |
22613af4 | 64 | } |
48b56ce1 | 65 | temps[temp].state = TCG_TEMP_UNDEF; |
22613af4 KB |
66 | } |
67 | ||
d193a14a PB |
68 | /* Reset all temporaries, given that there are NB_TEMPS of them. */ |
69 | static void reset_all_temps(int nb_temps) | |
70 | { | |
71 | int i; | |
72 | for (i = 0; i < nb_temps; i++) { | |
73 | temps[i].state = TCG_TEMP_UNDEF; | |
74 | } | |
75 | } | |
76 | ||
fe0de7aa | 77 | static int op_bits(TCGOpcode op) |
22613af4 | 78 | { |
8399ad59 RH |
79 | const TCGOpDef *def = &tcg_op_defs[op]; |
80 | return def->flags & TCG_OPF_64BIT ? 64 : 32; | |
22613af4 KB |
81 | } |
82 | ||
fe0de7aa | 83 | static TCGOpcode op_to_movi(TCGOpcode op) |
22613af4 KB |
84 | { |
85 | switch (op_bits(op)) { | |
86 | case 32: | |
87 | return INDEX_op_movi_i32; | |
22613af4 KB |
88 | case 64: |
89 | return INDEX_op_movi_i64; | |
22613af4 KB |
90 | default: |
91 | fprintf(stderr, "op_to_movi: unexpected return value of " | |
92 | "function op_bits.\n"); | |
93 | tcg_abort(); | |
94 | } | |
95 | } | |
96 | ||
e590d4e6 AJ |
97 | static TCGArg find_better_copy(TCGContext *s, TCGArg temp) |
98 | { | |
99 | TCGArg i; | |
100 | ||
101 | /* If this is already a global, we can't do better. */ | |
102 | if (temp < s->nb_globals) { | |
103 | return temp; | |
104 | } | |
105 | ||
106 | /* Search for a global first. */ | |
107 | for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { | |
108 | if (i < s->nb_globals) { | |
109 | return i; | |
110 | } | |
111 | } | |
112 | ||
113 | /* If it is a temp, search for a temp local. */ | |
114 | if (!s->temps[temp].temp_local) { | |
115 | for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { | |
116 | if (s->temps[i].temp_local) { | |
117 | return i; | |
118 | } | |
119 | } | |
120 | } | |
121 | ||
122 | /* Failure to find a better representation, return the same temp. */ | |
123 | return temp; | |
124 | } | |
125 | ||
126 | static bool temps_are_copies(TCGArg arg1, TCGArg arg2) | |
127 | { | |
128 | TCGArg i; | |
129 | ||
130 | if (arg1 == arg2) { | |
131 | return true; | |
132 | } | |
133 | ||
134 | if (temps[arg1].state != TCG_TEMP_COPY | |
135 | || temps[arg2].state != TCG_TEMP_COPY) { | |
136 | return false; | |
137 | } | |
138 | ||
139 | for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { | |
140 | if (i == arg2) { | |
141 | return true; | |
142 | } | |
143 | } | |
144 | ||
145 | return false; | |
146 | } | |
147 | ||
b80bb016 AJ |
148 | static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, |
149 | TCGArg dst, TCGArg src) | |
22613af4 | 150 | { |
e590d4e6 AJ |
151 | reset_temp(dst); |
152 | assert(temps[src].state != TCG_TEMP_CONST); | |
153 | ||
154 | if (s->temps[src].type == s->temps[dst].type) { | |
155 | if (temps[src].state != TCG_TEMP_COPY) { | |
156 | temps[src].state = TCG_TEMP_COPY; | |
22613af4 KB |
157 | temps[src].next_copy = src; |
158 | temps[src].prev_copy = src; | |
159 | } | |
160 | temps[dst].state = TCG_TEMP_COPY; | |
22613af4 KB |
161 | temps[dst].next_copy = temps[src].next_copy; |
162 | temps[dst].prev_copy = src; | |
163 | temps[temps[dst].next_copy].prev_copy = dst; | |
164 | temps[src].next_copy = dst; | |
165 | } | |
e590d4e6 | 166 | |
22613af4 KB |
167 | gen_args[0] = dst; |
168 | gen_args[1] = src; | |
169 | } | |
170 | ||
e590d4e6 | 171 | static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) |
22613af4 | 172 | { |
e590d4e6 | 173 | reset_temp(dst); |
22613af4 KB |
174 | temps[dst].state = TCG_TEMP_CONST; |
175 | temps[dst].val = val; | |
176 | gen_args[0] = dst; | |
177 | gen_args[1] = val; | |
178 | } | |
179 | ||
fe0de7aa | 180 | static TCGOpcode op_to_mov(TCGOpcode op) |
53108fb5 KB |
181 | { |
182 | switch (op_bits(op)) { | |
183 | case 32: | |
184 | return INDEX_op_mov_i32; | |
53108fb5 KB |
185 | case 64: |
186 | return INDEX_op_mov_i64; | |
53108fb5 KB |
187 | default: |
188 | fprintf(stderr, "op_to_mov: unexpected return value of " | |
189 | "function op_bits.\n"); | |
190 | tcg_abort(); | |
191 | } | |
192 | } | |
193 | ||
fe0de7aa | 194 | static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) |
53108fb5 KB |
195 | { |
196 | switch (op) { | |
197 | CASE_OP_32_64(add): | |
198 | return x + y; | |
199 | ||
200 | CASE_OP_32_64(sub): | |
201 | return x - y; | |
202 | ||
203 | CASE_OP_32_64(mul): | |
204 | return x * y; | |
205 | ||
9a81090b KB |
206 | CASE_OP_32_64(and): |
207 | return x & y; | |
208 | ||
209 | CASE_OP_32_64(or): | |
210 | return x | y; | |
211 | ||
212 | CASE_OP_32_64(xor): | |
213 | return x ^ y; | |
214 | ||
55c0975c KB |
215 | case INDEX_op_shl_i32: |
216 | return (uint32_t)x << (uint32_t)y; | |
217 | ||
55c0975c KB |
218 | case INDEX_op_shl_i64: |
219 | return (uint64_t)x << (uint64_t)y; | |
55c0975c KB |
220 | |
221 | case INDEX_op_shr_i32: | |
222 | return (uint32_t)x >> (uint32_t)y; | |
223 | ||
55c0975c KB |
224 | case INDEX_op_shr_i64: |
225 | return (uint64_t)x >> (uint64_t)y; | |
55c0975c KB |
226 | |
227 | case INDEX_op_sar_i32: | |
228 | return (int32_t)x >> (int32_t)y; | |
229 | ||
55c0975c KB |
230 | case INDEX_op_sar_i64: |
231 | return (int64_t)x >> (int64_t)y; | |
55c0975c KB |
232 | |
233 | case INDEX_op_rotr_i32: | |
25c4d9cc | 234 | x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y); |
55c0975c KB |
235 | return x; |
236 | ||
55c0975c | 237 | case INDEX_op_rotr_i64: |
25c4d9cc | 238 | x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y); |
55c0975c | 239 | return x; |
55c0975c KB |
240 | |
241 | case INDEX_op_rotl_i32: | |
25c4d9cc | 242 | x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y)); |
55c0975c KB |
243 | return x; |
244 | ||
55c0975c | 245 | case INDEX_op_rotl_i64: |
25c4d9cc | 246 | x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y)); |
55c0975c | 247 | return x; |
25c4d9cc RH |
248 | |
249 | CASE_OP_32_64(not): | |
a640f031 | 250 | return ~x; |
25c4d9cc | 251 | |
cb25c80a RH |
252 | CASE_OP_32_64(neg): |
253 | return -x; | |
254 | ||
255 | CASE_OP_32_64(andc): | |
256 | return x & ~y; | |
257 | ||
258 | CASE_OP_32_64(orc): | |
259 | return x | ~y; | |
260 | ||
261 | CASE_OP_32_64(eqv): | |
262 | return ~(x ^ y); | |
263 | ||
264 | CASE_OP_32_64(nand): | |
265 | return ~(x & y); | |
266 | ||
267 | CASE_OP_32_64(nor): | |
268 | return ~(x | y); | |
269 | ||
25c4d9cc | 270 | CASE_OP_32_64(ext8s): |
a640f031 | 271 | return (int8_t)x; |
25c4d9cc RH |
272 | |
273 | CASE_OP_32_64(ext16s): | |
a640f031 | 274 | return (int16_t)x; |
25c4d9cc RH |
275 | |
276 | CASE_OP_32_64(ext8u): | |
a640f031 | 277 | return (uint8_t)x; |
25c4d9cc RH |
278 | |
279 | CASE_OP_32_64(ext16u): | |
a640f031 KB |
280 | return (uint16_t)x; |
281 | ||
a640f031 KB |
282 | case INDEX_op_ext32s_i64: |
283 | return (int32_t)x; | |
284 | ||
285 | case INDEX_op_ext32u_i64: | |
286 | return (uint32_t)x; | |
a640f031 | 287 | |
53108fb5 KB |
288 | default: |
289 | fprintf(stderr, | |
290 | "Unrecognized operation %d in do_constant_folding.\n", op); | |
291 | tcg_abort(); | |
292 | } | |
293 | } | |
294 | ||
fe0de7aa | 295 | static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) |
53108fb5 KB |
296 | { |
297 | TCGArg res = do_constant_folding_2(op, x, y); | |
53108fb5 KB |
298 | if (op_bits(op) == 32) { |
299 | res &= 0xffffffff; | |
300 | } | |
53108fb5 KB |
301 | return res; |
302 | } | |
303 | ||
9519da7e RH |
304 | static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) |
305 | { | |
306 | switch (c) { | |
307 | case TCG_COND_EQ: | |
308 | return x == y; | |
309 | case TCG_COND_NE: | |
310 | return x != y; | |
311 | case TCG_COND_LT: | |
312 | return (int32_t)x < (int32_t)y; | |
313 | case TCG_COND_GE: | |
314 | return (int32_t)x >= (int32_t)y; | |
315 | case TCG_COND_LE: | |
316 | return (int32_t)x <= (int32_t)y; | |
317 | case TCG_COND_GT: | |
318 | return (int32_t)x > (int32_t)y; | |
319 | case TCG_COND_LTU: | |
320 | return x < y; | |
321 | case TCG_COND_GEU: | |
322 | return x >= y; | |
323 | case TCG_COND_LEU: | |
324 | return x <= y; | |
325 | case TCG_COND_GTU: | |
326 | return x > y; | |
327 | default: | |
328 | tcg_abort(); | |
329 | } | |
330 | } | |
331 | ||
332 | static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) | |
333 | { | |
334 | switch (c) { | |
335 | case TCG_COND_EQ: | |
336 | return x == y; | |
337 | case TCG_COND_NE: | |
338 | return x != y; | |
339 | case TCG_COND_LT: | |
340 | return (int64_t)x < (int64_t)y; | |
341 | case TCG_COND_GE: | |
342 | return (int64_t)x >= (int64_t)y; | |
343 | case TCG_COND_LE: | |
344 | return (int64_t)x <= (int64_t)y; | |
345 | case TCG_COND_GT: | |
346 | return (int64_t)x > (int64_t)y; | |
347 | case TCG_COND_LTU: | |
348 | return x < y; | |
349 | case TCG_COND_GEU: | |
350 | return x >= y; | |
351 | case TCG_COND_LEU: | |
352 | return x <= y; | |
353 | case TCG_COND_GTU: | |
354 | return x > y; | |
355 | default: | |
356 | tcg_abort(); | |
357 | } | |
358 | } | |
359 | ||
360 | static bool do_constant_folding_cond_eq(TCGCond c) | |
361 | { | |
362 | switch (c) { | |
363 | case TCG_COND_GT: | |
364 | case TCG_COND_LTU: | |
365 | case TCG_COND_LT: | |
366 | case TCG_COND_GTU: | |
367 | case TCG_COND_NE: | |
368 | return 0; | |
369 | case TCG_COND_GE: | |
370 | case TCG_COND_GEU: | |
371 | case TCG_COND_LE: | |
372 | case TCG_COND_LEU: | |
373 | case TCG_COND_EQ: | |
374 | return 1; | |
375 | default: | |
376 | tcg_abort(); | |
377 | } | |
378 | } | |
379 | ||
b336ceb6 AJ |
380 | /* Return 2 if the condition can't be simplified, and the result |
381 | of the condition (0 or 1) if it can */ | |
f8dd19e5 AJ |
382 | static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, |
383 | TCGArg y, TCGCond c) | |
384 | { | |
b336ceb6 AJ |
385 | if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { |
386 | switch (op_bits(op)) { | |
387 | case 32: | |
9519da7e | 388 | return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); |
b336ceb6 | 389 | case 64: |
9519da7e | 390 | return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); |
0aed257f | 391 | default: |
9519da7e | 392 | tcg_abort(); |
b336ceb6 | 393 | } |
9519da7e RH |
394 | } else if (temps_are_copies(x, y)) { |
395 | return do_constant_folding_cond_eq(c); | |
b336ceb6 AJ |
396 | } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { |
397 | switch (c) { | |
f8dd19e5 | 398 | case TCG_COND_LTU: |
b336ceb6 | 399 | return 0; |
f8dd19e5 | 400 | case TCG_COND_GEU: |
b336ceb6 AJ |
401 | return 1; |
402 | default: | |
403 | return 2; | |
f8dd19e5 | 404 | } |
b336ceb6 AJ |
405 | } else { |
406 | return 2; | |
f8dd19e5 | 407 | } |
f8dd19e5 AJ |
408 | } |
409 | ||
6c4382f8 RH |
410 | /* Return 2 if the condition can't be simplified, and the result |
411 | of the condition (0 or 1) if it can */ | |
412 | static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) | |
413 | { | |
414 | TCGArg al = p1[0], ah = p1[1]; | |
415 | TCGArg bl = p2[0], bh = p2[1]; | |
416 | ||
417 | if (temps[bl].state == TCG_TEMP_CONST | |
418 | && temps[bh].state == TCG_TEMP_CONST) { | |
419 | uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; | |
420 | ||
421 | if (temps[al].state == TCG_TEMP_CONST | |
422 | && temps[ah].state == TCG_TEMP_CONST) { | |
423 | uint64_t a; | |
424 | a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; | |
425 | return do_constant_folding_cond_64(a, b, c); | |
426 | } | |
427 | if (b == 0) { | |
428 | switch (c) { | |
429 | case TCG_COND_LTU: | |
430 | return 0; | |
431 | case TCG_COND_GEU: | |
432 | return 1; | |
433 | default: | |
434 | break; | |
435 | } | |
436 | } | |
437 | } | |
438 | if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { | |
439 | return do_constant_folding_cond_eq(c); | |
440 | } | |
441 | return 2; | |
442 | } | |
443 | ||
24c9ae4e RH |
444 | static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) |
445 | { | |
446 | TCGArg a1 = *p1, a2 = *p2; | |
447 | int sum = 0; | |
448 | sum += temps[a1].state == TCG_TEMP_CONST; | |
449 | sum -= temps[a2].state == TCG_TEMP_CONST; | |
450 | ||
451 | /* Prefer the constant in second argument, and then the form | |
452 | op a, a, b, which is better handled on non-RISC hosts. */ | |
453 | if (sum > 0 || (sum == 0 && dest == a2)) { | |
454 | *p1 = a2; | |
455 | *p2 = a1; | |
456 | return true; | |
457 | } | |
458 | return false; | |
459 | } | |
460 | ||
0bfcb865 RH |
461 | static bool swap_commutative2(TCGArg *p1, TCGArg *p2) |
462 | { | |
463 | int sum = 0; | |
464 | sum += temps[p1[0]].state == TCG_TEMP_CONST; | |
465 | sum += temps[p1[1]].state == TCG_TEMP_CONST; | |
466 | sum -= temps[p2[0]].state == TCG_TEMP_CONST; | |
467 | sum -= temps[p2[1]].state == TCG_TEMP_CONST; | |
468 | if (sum > 0) { | |
469 | TCGArg t; | |
470 | t = p1[0], p1[0] = p2[0], p2[0] = t; | |
471 | t = p1[1], p1[1] = p2[1], p2[1] = t; | |
472 | return true; | |
473 | } | |
474 | return false; | |
475 | } | |
476 | ||
22613af4 | 477 | /* Propagate constants and copies, fold constant expressions. */ |
8f2e8c07 KB |
478 | static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, |
479 | TCGArg *args, TCGOpDef *tcg_op_defs) | |
480 | { | |
fe0de7aa BS |
481 | int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; |
482 | TCGOpcode op; | |
8f2e8c07 KB |
483 | const TCGOpDef *def; |
484 | TCGArg *gen_args; | |
53108fb5 | 485 | TCGArg tmp; |
5d8f5363 | 486 | |
22613af4 KB |
487 | /* Array VALS has an element for each temp. |
488 | If this temp holds a constant then its value is kept in VALS' element. | |
e590d4e6 AJ |
489 | If this temp is a copy of other ones then the other copies are |
490 | available through the doubly linked circular list. */ | |
8f2e8c07 KB |
491 | |
492 | nb_temps = s->nb_temps; | |
493 | nb_globals = s->nb_globals; | |
d193a14a | 494 | reset_all_temps(nb_temps); |
8f2e8c07 | 495 | |
92414b31 | 496 | nb_ops = tcg_opc_ptr - s->gen_opc_buf; |
8f2e8c07 KB |
497 | gen_args = args; |
498 | for (op_index = 0; op_index < nb_ops; op_index++) { | |
92414b31 | 499 | op = s->gen_opc_buf[op_index]; |
8f2e8c07 | 500 | def = &tcg_op_defs[op]; |
22613af4 | 501 | /* Do copy propagation */ |
1ff8c541 AJ |
502 | if (op == INDEX_op_call) { |
503 | int nb_oargs = args[0] >> 16; | |
504 | int nb_iargs = args[0] & 0xffff; | |
505 | for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { | |
506 | if (temps[args[i]].state == TCG_TEMP_COPY) { | |
507 | args[i] = find_better_copy(s, args[i]); | |
508 | } | |
509 | } | |
510 | } else { | |
22613af4 KB |
511 | for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { |
512 | if (temps[args[i]].state == TCG_TEMP_COPY) { | |
e590d4e6 | 513 | args[i] = find_better_copy(s, args[i]); |
22613af4 KB |
514 | } |
515 | } | |
516 | } | |
517 | ||
53108fb5 KB |
518 | /* For commutative operations make constant second argument */ |
519 | switch (op) { | |
520 | CASE_OP_32_64(add): | |
521 | CASE_OP_32_64(mul): | |
9a81090b KB |
522 | CASE_OP_32_64(and): |
523 | CASE_OP_32_64(or): | |
524 | CASE_OP_32_64(xor): | |
cb25c80a RH |
525 | CASE_OP_32_64(eqv): |
526 | CASE_OP_32_64(nand): | |
527 | CASE_OP_32_64(nor): | |
24c9ae4e | 528 | swap_commutative(args[0], &args[1], &args[2]); |
53108fb5 | 529 | break; |
65a7cce1 | 530 | CASE_OP_32_64(brcond): |
24c9ae4e | 531 | if (swap_commutative(-1, &args[0], &args[1])) { |
65a7cce1 AJ |
532 | args[2] = tcg_swap_cond(args[2]); |
533 | } | |
534 | break; | |
535 | CASE_OP_32_64(setcond): | |
24c9ae4e | 536 | if (swap_commutative(args[0], &args[1], &args[2])) { |
65a7cce1 AJ |
537 | args[3] = tcg_swap_cond(args[3]); |
538 | } | |
539 | break; | |
fa01a208 | 540 | CASE_OP_32_64(movcond): |
24c9ae4e RH |
541 | if (swap_commutative(-1, &args[1], &args[2])) { |
542 | args[5] = tcg_swap_cond(args[5]); | |
5d8f5363 RH |
543 | } |
544 | /* For movcond, we canonicalize the "false" input reg to match | |
545 | the destination reg so that the tcg backend can implement | |
546 | a "move if true" operation. */ | |
24c9ae4e RH |
547 | if (swap_commutative(args[0], &args[4], &args[3])) { |
548 | args[5] = tcg_invert_cond(args[5]); | |
fa01a208 | 549 | } |
1e484e61 RH |
550 | break; |
551 | case INDEX_op_add2_i32: | |
552 | swap_commutative(args[0], &args[2], &args[4]); | |
553 | swap_commutative(args[1], &args[3], &args[5]); | |
554 | break; | |
1414968a RH |
555 | case INDEX_op_mulu2_i32: |
556 | swap_commutative(args[0], &args[2], &args[3]); | |
557 | break; | |
0bfcb865 RH |
558 | case INDEX_op_brcond2_i32: |
559 | if (swap_commutative2(&args[0], &args[2])) { | |
560 | args[4] = tcg_swap_cond(args[4]); | |
561 | } | |
562 | break; | |
563 | case INDEX_op_setcond2_i32: | |
564 | if (swap_commutative2(&args[1], &args[3])) { | |
565 | args[5] = tcg_swap_cond(args[5]); | |
566 | } | |
567 | break; | |
53108fb5 KB |
568 | default: |
569 | break; | |
570 | } | |
571 | ||
01ee5282 AJ |
572 | /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */ |
573 | switch (op) { | |
574 | CASE_OP_32_64(shl): | |
575 | CASE_OP_32_64(shr): | |
576 | CASE_OP_32_64(sar): | |
577 | CASE_OP_32_64(rotl): | |
578 | CASE_OP_32_64(rotr): | |
579 | if (temps[args[1]].state == TCG_TEMP_CONST | |
580 | && temps[args[1]].val == 0) { | |
92414b31 | 581 | s->gen_opc_buf[op_index] = op_to_movi(op); |
e590d4e6 | 582 | tcg_opt_gen_movi(gen_args, args[0], 0); |
01ee5282 AJ |
583 | args += 3; |
584 | gen_args += 2; | |
585 | continue; | |
586 | } | |
587 | break; | |
588 | default: | |
589 | break; | |
590 | } | |
591 | ||
56e49438 | 592 | /* Simplify expression for "op r, a, 0 => mov r, a" cases */ |
53108fb5 KB |
593 | switch (op) { |
594 | CASE_OP_32_64(add): | |
595 | CASE_OP_32_64(sub): | |
55c0975c KB |
596 | CASE_OP_32_64(shl): |
597 | CASE_OP_32_64(shr): | |
598 | CASE_OP_32_64(sar): | |
25c4d9cc RH |
599 | CASE_OP_32_64(rotl): |
600 | CASE_OP_32_64(rotr): | |
38ee188b AJ |
601 | CASE_OP_32_64(or): |
602 | CASE_OP_32_64(xor): | |
53108fb5 KB |
603 | if (temps[args[1]].state == TCG_TEMP_CONST) { |
604 | /* Proceed with possible constant folding. */ | |
605 | break; | |
606 | } | |
607 | if (temps[args[2]].state == TCG_TEMP_CONST | |
608 | && temps[args[2]].val == 0) { | |
e590d4e6 | 609 | if (temps_are_copies(args[0], args[1])) { |
92414b31 | 610 | s->gen_opc_buf[op_index] = INDEX_op_nop; |
53108fb5 | 611 | } else { |
92414b31 | 612 | s->gen_opc_buf[op_index] = op_to_mov(op); |
b80bb016 | 613 | tcg_opt_gen_mov(s, gen_args, args[0], args[1]); |
53108fb5 | 614 | gen_args += 2; |
53108fb5 | 615 | } |
fedc0da2 | 616 | args += 3; |
53108fb5 KB |
617 | continue; |
618 | } | |
619 | break; | |
56e49438 AJ |
620 | default: |
621 | break; | |
622 | } | |
623 | ||
624 | /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ | |
625 | switch (op) { | |
61251c0c | 626 | CASE_OP_32_64(and): |
53108fb5 KB |
627 | CASE_OP_32_64(mul): |
628 | if ((temps[args[2]].state == TCG_TEMP_CONST | |
629 | && temps[args[2]].val == 0)) { | |
92414b31 | 630 | s->gen_opc_buf[op_index] = op_to_movi(op); |
e590d4e6 | 631 | tcg_opt_gen_movi(gen_args, args[0], 0); |
53108fb5 KB |
632 | args += 3; |
633 | gen_args += 2; | |
634 | continue; | |
635 | } | |
636 | break; | |
56e49438 AJ |
637 | default: |
638 | break; | |
639 | } | |
640 | ||
641 | /* Simplify expression for "op r, a, a => mov r, a" cases */ | |
642 | switch (op) { | |
9a81090b KB |
643 | CASE_OP_32_64(or): |
644 | CASE_OP_32_64(and): | |
0aba1c73 | 645 | if (temps_are_copies(args[1], args[2])) { |
e590d4e6 | 646 | if (temps_are_copies(args[0], args[1])) { |
92414b31 | 647 | s->gen_opc_buf[op_index] = INDEX_op_nop; |
9a81090b | 648 | } else { |
92414b31 | 649 | s->gen_opc_buf[op_index] = op_to_mov(op); |
b80bb016 | 650 | tcg_opt_gen_mov(s, gen_args, args[0], args[1]); |
9a81090b | 651 | gen_args += 2; |
9a81090b | 652 | } |
fedc0da2 | 653 | args += 3; |
9a81090b KB |
654 | continue; |
655 | } | |
656 | break; | |
fe0de7aa BS |
657 | default: |
658 | break; | |
53108fb5 KB |
659 | } |
660 | ||
3c94193e AJ |
661 | /* Simplify expression for "op r, a, a => movi r, 0" cases */ |
662 | switch (op) { | |
663 | CASE_OP_32_64(sub): | |
664 | CASE_OP_32_64(xor): | |
665 | if (temps_are_copies(args[1], args[2])) { | |
92414b31 | 666 | s->gen_opc_buf[op_index] = op_to_movi(op); |
3c94193e AJ |
667 | tcg_opt_gen_movi(gen_args, args[0], 0); |
668 | gen_args += 2; | |
669 | args += 3; | |
670 | continue; | |
671 | } | |
672 | break; | |
673 | default: | |
674 | break; | |
675 | } | |
676 | ||
22613af4 KB |
677 | /* Propagate constants through copy operations and do constant |
678 | folding. Constants will be substituted to arguments by register | |
679 | allocator where needed and possible. Also detect copies. */ | |
8f2e8c07 | 680 | switch (op) { |
22613af4 | 681 | CASE_OP_32_64(mov): |
e590d4e6 | 682 | if (temps_are_copies(args[0], args[1])) { |
22613af4 | 683 | args += 2; |
92414b31 | 684 | s->gen_opc_buf[op_index] = INDEX_op_nop; |
22613af4 KB |
685 | break; |
686 | } | |
687 | if (temps[args[1]].state != TCG_TEMP_CONST) { | |
b80bb016 | 688 | tcg_opt_gen_mov(s, gen_args, args[0], args[1]); |
22613af4 KB |
689 | gen_args += 2; |
690 | args += 2; | |
691 | break; | |
692 | } | |
693 | /* Source argument is constant. Rewrite the operation and | |
694 | let movi case handle it. */ | |
695 | op = op_to_movi(op); | |
92414b31 | 696 | s->gen_opc_buf[op_index] = op; |
22613af4 KB |
697 | args[1] = temps[args[1]].val; |
698 | /* fallthrough */ | |
699 | CASE_OP_32_64(movi): | |
e590d4e6 | 700 | tcg_opt_gen_movi(gen_args, args[0], args[1]); |
22613af4 KB |
701 | gen_args += 2; |
702 | args += 2; | |
703 | break; | |
6e14e91b | 704 | |
a640f031 | 705 | CASE_OP_32_64(not): |
cb25c80a | 706 | CASE_OP_32_64(neg): |
25c4d9cc RH |
707 | CASE_OP_32_64(ext8s): |
708 | CASE_OP_32_64(ext8u): | |
709 | CASE_OP_32_64(ext16s): | |
710 | CASE_OP_32_64(ext16u): | |
a640f031 KB |
711 | case INDEX_op_ext32s_i64: |
712 | case INDEX_op_ext32u_i64: | |
a640f031 | 713 | if (temps[args[1]].state == TCG_TEMP_CONST) { |
92414b31 | 714 | s->gen_opc_buf[op_index] = op_to_movi(op); |
a640f031 | 715 | tmp = do_constant_folding(op, temps[args[1]].val, 0); |
e590d4e6 | 716 | tcg_opt_gen_movi(gen_args, args[0], tmp); |
6e14e91b RH |
717 | gen_args += 2; |
718 | args += 2; | |
719 | break; | |
a640f031 | 720 | } |
6e14e91b RH |
721 | goto do_default; |
722 | ||
53108fb5 KB |
723 | CASE_OP_32_64(add): |
724 | CASE_OP_32_64(sub): | |
725 | CASE_OP_32_64(mul): | |
9a81090b KB |
726 | CASE_OP_32_64(or): |
727 | CASE_OP_32_64(and): | |
728 | CASE_OP_32_64(xor): | |
55c0975c KB |
729 | CASE_OP_32_64(shl): |
730 | CASE_OP_32_64(shr): | |
731 | CASE_OP_32_64(sar): | |
25c4d9cc RH |
732 | CASE_OP_32_64(rotl): |
733 | CASE_OP_32_64(rotr): | |
cb25c80a RH |
734 | CASE_OP_32_64(andc): |
735 | CASE_OP_32_64(orc): | |
736 | CASE_OP_32_64(eqv): | |
737 | CASE_OP_32_64(nand): | |
738 | CASE_OP_32_64(nor): | |
53108fb5 KB |
739 | if (temps[args[1]].state == TCG_TEMP_CONST |
740 | && temps[args[2]].state == TCG_TEMP_CONST) { | |
92414b31 | 741 | s->gen_opc_buf[op_index] = op_to_movi(op); |
53108fb5 KB |
742 | tmp = do_constant_folding(op, temps[args[1]].val, |
743 | temps[args[2]].val); | |
e590d4e6 | 744 | tcg_opt_gen_movi(gen_args, args[0], tmp); |
53108fb5 | 745 | gen_args += 2; |
6e14e91b RH |
746 | args += 3; |
747 | break; | |
53108fb5 | 748 | } |
6e14e91b RH |
749 | goto do_default; |
750 | ||
7ef55fc9 AJ |
751 | CASE_OP_32_64(deposit): |
752 | if (temps[args[1]].state == TCG_TEMP_CONST | |
753 | && temps[args[2]].state == TCG_TEMP_CONST) { | |
92414b31 | 754 | s->gen_opc_buf[op_index] = op_to_movi(op); |
7ef55fc9 AJ |
755 | tmp = ((1ull << args[4]) - 1); |
756 | tmp = (temps[args[1]].val & ~(tmp << args[3])) | |
757 | | ((temps[args[2]].val & tmp) << args[3]); | |
758 | tcg_opt_gen_movi(gen_args, args[0], tmp); | |
759 | gen_args += 2; | |
6e14e91b RH |
760 | args += 5; |
761 | break; | |
7ef55fc9 | 762 | } |
6e14e91b RH |
763 | goto do_default; |
764 | ||
f8dd19e5 | 765 | CASE_OP_32_64(setcond): |
b336ceb6 AJ |
766 | tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); |
767 | if (tmp != 2) { | |
92414b31 | 768 | s->gen_opc_buf[op_index] = op_to_movi(op); |
e590d4e6 | 769 | tcg_opt_gen_movi(gen_args, args[0], tmp); |
f8dd19e5 | 770 | gen_args += 2; |
6e14e91b RH |
771 | args += 4; |
772 | break; | |
f8dd19e5 | 773 | } |
6e14e91b RH |
774 | goto do_default; |
775 | ||
fbeaa26c | 776 | CASE_OP_32_64(brcond): |
b336ceb6 AJ |
777 | tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); |
778 | if (tmp != 2) { | |
779 | if (tmp) { | |
d193a14a | 780 | reset_all_temps(nb_temps); |
92414b31 | 781 | s->gen_opc_buf[op_index] = INDEX_op_br; |
fbeaa26c AJ |
782 | gen_args[0] = args[3]; |
783 | gen_args += 1; | |
fbeaa26c | 784 | } else { |
92414b31 | 785 | s->gen_opc_buf[op_index] = INDEX_op_nop; |
fbeaa26c | 786 | } |
6e14e91b RH |
787 | args += 4; |
788 | break; | |
fbeaa26c | 789 | } |
6e14e91b RH |
790 | goto do_default; |
791 | ||
fa01a208 | 792 | CASE_OP_32_64(movcond): |
b336ceb6 AJ |
793 | tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); |
794 | if (tmp != 2) { | |
e590d4e6 | 795 | if (temps_are_copies(args[0], args[4-tmp])) { |
92414b31 | 796 | s->gen_opc_buf[op_index] = INDEX_op_nop; |
fa01a208 | 797 | } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { |
92414b31 | 798 | s->gen_opc_buf[op_index] = op_to_movi(op); |
e590d4e6 | 799 | tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); |
fa01a208 RH |
800 | gen_args += 2; |
801 | } else { | |
92414b31 | 802 | s->gen_opc_buf[op_index] = op_to_mov(op); |
e590d4e6 | 803 | tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); |
fa01a208 RH |
804 | gen_args += 2; |
805 | } | |
6e14e91b RH |
806 | args += 6; |
807 | break; | |
fa01a208 | 808 | } |
6e14e91b | 809 | goto do_default; |
212c328d RH |
810 | |
811 | case INDEX_op_add2_i32: | |
812 | case INDEX_op_sub2_i32: | |
813 | if (temps[args[2]].state == TCG_TEMP_CONST | |
814 | && temps[args[3]].state == TCG_TEMP_CONST | |
815 | && temps[args[4]].state == TCG_TEMP_CONST | |
816 | && temps[args[5]].state == TCG_TEMP_CONST) { | |
817 | uint32_t al = temps[args[2]].val; | |
818 | uint32_t ah = temps[args[3]].val; | |
819 | uint32_t bl = temps[args[4]].val; | |
820 | uint32_t bh = temps[args[5]].val; | |
821 | uint64_t a = ((uint64_t)ah << 32) | al; | |
822 | uint64_t b = ((uint64_t)bh << 32) | bl; | |
823 | TCGArg rl, rh; | |
824 | ||
825 | if (op == INDEX_op_add2_i32) { | |
826 | a += b; | |
827 | } else { | |
828 | a -= b; | |
829 | } | |
830 | ||
831 | /* We emit the extra nop when we emit the add2/sub2. */ | |
92414b31 | 832 | assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); |
212c328d RH |
833 | |
834 | rl = args[0]; | |
835 | rh = args[1]; | |
92414b31 EV |
836 | s->gen_opc_buf[op_index] = INDEX_op_movi_i32; |
837 | s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; | |
212c328d RH |
838 | tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); |
839 | tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); | |
840 | gen_args += 4; | |
841 | args += 6; | |
842 | break; | |
843 | } | |
844 | goto do_default; | |
1414968a RH |
845 | |
846 | case INDEX_op_mulu2_i32: | |
847 | if (temps[args[2]].state == TCG_TEMP_CONST | |
848 | && temps[args[3]].state == TCG_TEMP_CONST) { | |
849 | uint32_t a = temps[args[2]].val; | |
850 | uint32_t b = temps[args[3]].val; | |
851 | uint64_t r = (uint64_t)a * b; | |
852 | TCGArg rl, rh; | |
853 | ||
854 | /* We emit the extra nop when we emit the mulu2. */ | |
92414b31 | 855 | assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); |
1414968a RH |
856 | |
857 | rl = args[0]; | |
858 | rh = args[1]; | |
92414b31 EV |
859 | s->gen_opc_buf[op_index] = INDEX_op_movi_i32; |
860 | s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; | |
1414968a RH |
861 | tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); |
862 | tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); | |
863 | gen_args += 4; | |
864 | args += 4; | |
865 | break; | |
866 | } | |
867 | goto do_default; | |
6e14e91b | 868 | |
bc1473ef | 869 | case INDEX_op_brcond2_i32: |
6c4382f8 RH |
870 | tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); |
871 | if (tmp != 2) { | |
872 | if (tmp) { | |
d193a14a | 873 | reset_all_temps(nb_temps); |
92414b31 | 874 | s->gen_opc_buf[op_index] = INDEX_op_br; |
6c4382f8 RH |
875 | gen_args[0] = args[5]; |
876 | gen_args += 1; | |
877 | } else { | |
92414b31 | 878 | s->gen_opc_buf[op_index] = INDEX_op_nop; |
6c4382f8 RH |
879 | } |
880 | } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) | |
881 | && temps[args[2]].state == TCG_TEMP_CONST | |
882 | && temps[args[3]].state == TCG_TEMP_CONST | |
883 | && temps[args[2]].val == 0 | |
884 | && temps[args[3]].val == 0) { | |
885 | /* Simplify LT/GE comparisons vs zero to a single compare | |
886 | vs the high word of the input. */ | |
d193a14a | 887 | reset_all_temps(nb_temps); |
92414b31 | 888 | s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; |
bc1473ef RH |
889 | gen_args[0] = args[1]; |
890 | gen_args[1] = args[3]; | |
891 | gen_args[2] = args[4]; | |
892 | gen_args[3] = args[5]; | |
893 | gen_args += 4; | |
6c4382f8 RH |
894 | } else { |
895 | goto do_default; | |
bc1473ef | 896 | } |
6c4382f8 RH |
897 | args += 6; |
898 | break; | |
bc1473ef RH |
899 | |
900 | case INDEX_op_setcond2_i32: | |
6c4382f8 RH |
901 | tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); |
902 | if (tmp != 2) { | |
92414b31 | 903 | s->gen_opc_buf[op_index] = INDEX_op_movi_i32; |
6c4382f8 RH |
904 | tcg_opt_gen_movi(gen_args, args[0], tmp); |
905 | gen_args += 2; | |
906 | } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) | |
907 | && temps[args[3]].state == TCG_TEMP_CONST | |
908 | && temps[args[4]].state == TCG_TEMP_CONST | |
909 | && temps[args[3]].val == 0 | |
910 | && temps[args[4]].val == 0) { | |
911 | /* Simplify LT/GE comparisons vs zero to a single compare | |
912 | vs the high word of the input. */ | |
92414b31 | 913 | s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; |
bc1473ef RH |
914 | gen_args[0] = args[0]; |
915 | gen_args[1] = args[2]; | |
916 | gen_args[2] = args[4]; | |
917 | gen_args[3] = args[5]; | |
918 | gen_args += 4; | |
6c4382f8 RH |
919 | } else { |
920 | goto do_default; | |
bc1473ef | 921 | } |
6c4382f8 RH |
922 | args += 6; |
923 | break; | |
bc1473ef | 924 | |
8f2e8c07 | 925 | case INDEX_op_call: |
22613af4 | 926 | nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); |
78505279 AJ |
927 | if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | |
928 | TCG_CALL_NO_WRITE_GLOBALS))) { | |
22613af4 | 929 | for (i = 0; i < nb_globals; i++) { |
e590d4e6 | 930 | reset_temp(i); |
22613af4 KB |
931 | } |
932 | } | |
933 | for (i = 0; i < (args[0] >> 16); i++) { | |
e590d4e6 | 934 | reset_temp(args[i + 1]); |
22613af4 KB |
935 | } |
936 | i = nb_call_args + 3; | |
8f2e8c07 KB |
937 | while (i) { |
938 | *gen_args = *args; | |
939 | args++; | |
940 | gen_args++; | |
941 | i--; | |
942 | } | |
943 | break; | |
6e14e91b | 944 | |
8f2e8c07 | 945 | default: |
6e14e91b RH |
946 | do_default: |
947 | /* Default case: we know nothing about operation (or were unable | |
948 | to compute the operation result) so no propagation is done. | |
949 | We trash everything if the operation is the end of a basic | |
950 | block, otherwise we only trash the output args. */ | |
a2550660 | 951 | if (def->flags & TCG_OPF_BB_END) { |
d193a14a | 952 | reset_all_temps(nb_temps); |
a2550660 AJ |
953 | } else { |
954 | for (i = 0; i < def->nb_oargs; i++) { | |
e590d4e6 | 955 | reset_temp(args[i]); |
a2550660 | 956 | } |
22613af4 | 957 | } |
8f2e8c07 KB |
958 | for (i = 0; i < def->nb_args; i++) { |
959 | gen_args[i] = args[i]; | |
960 | } | |
961 | args += def->nb_args; | |
962 | gen_args += def->nb_args; | |
963 | break; | |
964 | } | |
965 | } | |
966 | ||
967 | return gen_args; | |
968 | } | |
969 | ||
970 | TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, | |
971 | TCGArg *args, TCGOpDef *tcg_op_defs) | |
972 | { | |
973 | TCGArg *res; | |
974 | res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); | |
975 | return res; | |
976 | } |