]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/tcg.c
tcg: Simplify calls to temp_sync vs mem_coherent
[mirror_qemu.git] / tcg / tcg.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27
28 #include "qemu/osdep.h"
29
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
32
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42 CPU definitions. Currently they are used for qemu_ld/st
43 instructions */
44 #define NO_CPU_IO_DEFS
45
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS ELFCLASS32
51 #else
52 # define ELF_CLASS ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA ELFDATA2MSB
56 #else
57 # define ELF_DATA ELFDATA2LSB
58 #endif
59
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74 intptr_t value, intptr_t addend);
75
76 /* The CIE and FDE header definitions will be common to all hosts. */
77 typedef struct {
78 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint32_t id;
80 uint8_t version;
81 char augmentation[1];
82 uint8_t code_align;
83 uint8_t data_align;
84 uint8_t return_column;
85 } DebugFrameCIE;
86
87 typedef struct QEMU_PACKED {
88 uint32_t len __attribute__((aligned((sizeof(void *)))));
89 uint32_t cie_offset;
90 uintptr_t func_start;
91 uintptr_t func_len;
92 } DebugFrameFDEHeader;
93
94 typedef struct QEMU_PACKED {
95 DebugFrameCIE cie;
96 DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98
99 static void tcg_register_jit_int(const void *buf, size_t size,
100 const void *debug_frame,
101 size_t debug_frame_size)
102 __attribute__((unused));
103
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106 intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109 TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111 const TCGArg args[TCG_MAX_OP_ARGS],
112 const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115 TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121 unsigned vecl, unsigned vece,
122 const TCGArg args[TCG_MAX_OP_ARGS],
123 const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126 TCGReg dst, TCGReg src)
127 {
128 g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131 TCGReg dst, TCGReg base, intptr_t offset)
132 {
133 g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136 TCGReg dst, int64_t arg)
137 {
138 g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141 unsigned vecl, unsigned vece,
142 const TCGArg args[TCG_MAX_OP_ARGS],
143 const int const_args[TCG_MAX_OP_ARGS])
144 {
145 g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149 intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154 ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183 *s->code_ptr++ = v;
184 }
185
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187 uint8_t v)
188 {
189 *p = v;
190 }
191 #endif
192
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197 *s->code_ptr++ = v;
198 } else {
199 tcg_insn_unit *p = s->code_ptr;
200 memcpy(p, &v, sizeof(v));
201 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202 }
203 }
204
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206 uint16_t v)
207 {
208 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209 *p = v;
210 } else {
211 memcpy(p, &v, sizeof(v));
212 }
213 }
214 #endif
215
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220 *s->code_ptr++ = v;
221 } else {
222 tcg_insn_unit *p = s->code_ptr;
223 memcpy(p, &v, sizeof(v));
224 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225 }
226 }
227
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229 uint32_t v)
230 {
231 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232 *p = v;
233 } else {
234 memcpy(p, &v, sizeof(v));
235 }
236 }
237 #endif
238
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243 *s->code_ptr++ = v;
244 } else {
245 tcg_insn_unit *p = s->code_ptr;
246 memcpy(p, &v, sizeof(v));
247 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248 }
249 }
250
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252 uint64_t v)
253 {
254 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255 *p = v;
256 } else {
257 memcpy(p, &v, sizeof(v));
258 }
259 }
260 #endif
261
262 /* label relocation processing */
263
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265 TCGLabel *l, intptr_t addend)
266 {
267 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268
269 r->type = type;
270 r->ptr = code_ptr;
271 r->addend = addend;
272 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277 tcg_debug_assert(!l->has_value);
278 l->has_value = 1;
279 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281
282 TCGLabel *gen_new_label(void)
283 {
284 TCGContext *s = tcg_ctx;
285 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286
287 memset(l, 0, sizeof(TCGLabel));
288 l->id = s->nb_labels++;
289 QSIMPLEQ_INIT(&l->relocs);
290
291 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292
293 return l;
294 }
295
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298 TCGLabel *l;
299
300 QSIMPLEQ_FOREACH(l, &s->labels, next) {
301 TCGRelocation *r;
302 uintptr_t value = l->u.value;
303
304 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306 return false;
307 }
308 }
309 }
310 return true;
311 }
312
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315 /*
316 * We will check for overflow at the end of the opcode loop in
317 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318 */
319 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321
322 /* Signal overflow, starting over with fewer guest insns. */
323 static G_NORETURN
324 void tcg_raise_tb_overflow(TCGContext *s)
325 {
326 siglongjmp(s->jmp_trans, -2);
327 }
328
329 #define C_PFX1(P, A) P##A
330 #define C_PFX2(P, A, B) P##A##_##B
331 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
332 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
333 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
334 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
335
336 /* Define an enumeration for the various combinations. */
337
338 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
339 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
340 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
341 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
342
343 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
344 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
345 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
346 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
347
348 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
349
350 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
351 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
352 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
353 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
354
355 typedef enum {
356 #include "tcg-target-con-set.h"
357 } TCGConstraintSetIndex;
358
359 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
360
361 #undef C_O0_I1
362 #undef C_O0_I2
363 #undef C_O0_I3
364 #undef C_O0_I4
365 #undef C_O1_I1
366 #undef C_O1_I2
367 #undef C_O1_I3
368 #undef C_O1_I4
369 #undef C_N1_I2
370 #undef C_O2_I1
371 #undef C_O2_I2
372 #undef C_O2_I3
373 #undef C_O2_I4
374
375 /* Put all of the constraint sets into an array, indexed by the enum. */
376
377 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
378 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
379 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
380 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
381
382 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
383 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
384 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
385 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
386
387 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
388
389 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
390 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
391 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
392 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
393
394 static const TCGTargetOpDef constraint_sets[] = {
395 #include "tcg-target-con-set.h"
396 };
397
398
399 #undef C_O0_I1
400 #undef C_O0_I2
401 #undef C_O0_I3
402 #undef C_O0_I4
403 #undef C_O1_I1
404 #undef C_O1_I2
405 #undef C_O1_I3
406 #undef C_O1_I4
407 #undef C_N1_I2
408 #undef C_O2_I1
409 #undef C_O2_I2
410 #undef C_O2_I3
411 #undef C_O2_I4
412
413 /* Expand the enumerator to be returned from tcg_target_op_def(). */
414
415 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
416 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
417 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
418 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
419
420 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
421 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
422 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
423 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
424
425 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
426
427 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
428 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
429 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
430 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
431
432 #include "tcg-target.c.inc"
433
434 static void alloc_tcg_plugin_context(TCGContext *s)
435 {
436 #ifdef CONFIG_PLUGIN
437 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
438 s->plugin_tb->insns =
439 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
440 #endif
441 }
442
443 /*
444 * All TCG threads except the parent (i.e. the one that called tcg_context_init
445 * and registered the target's TCG globals) must register with this function
446 * before initiating translation.
447 *
448 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
449 * of tcg_region_init() for the reasoning behind this.
450 *
451 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
452 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
453 * is not used anymore for translation once this function is called.
454 *
455 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
456 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
457 */
458 #ifdef CONFIG_USER_ONLY
459 void tcg_register_thread(void)
460 {
461 tcg_ctx = &tcg_init_ctx;
462 }
463 #else
464 void tcg_register_thread(void)
465 {
466 TCGContext *s = g_malloc(sizeof(*s));
467 unsigned int i, n;
468
469 *s = tcg_init_ctx;
470
471 /* Relink mem_base. */
472 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
473 if (tcg_init_ctx.temps[i].mem_base) {
474 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
475 tcg_debug_assert(b >= 0 && b < n);
476 s->temps[i].mem_base = &s->temps[b];
477 }
478 }
479
480 /* Claim an entry in tcg_ctxs */
481 n = qatomic_fetch_inc(&tcg_cur_ctxs);
482 g_assert(n < tcg_max_ctxs);
483 qatomic_set(&tcg_ctxs[n], s);
484
485 if (n > 0) {
486 alloc_tcg_plugin_context(s);
487 tcg_region_initial_alloc(s);
488 }
489
490 tcg_ctx = s;
491 }
492 #endif /* !CONFIG_USER_ONLY */
493
494 /* pool based memory allocation */
495 void *tcg_malloc_internal(TCGContext *s, int size)
496 {
497 TCGPool *p;
498 int pool_size;
499
500 if (size > TCG_POOL_CHUNK_SIZE) {
501 /* big malloc: insert a new pool (XXX: could optimize) */
502 p = g_malloc(sizeof(TCGPool) + size);
503 p->size = size;
504 p->next = s->pool_first_large;
505 s->pool_first_large = p;
506 return p->data;
507 } else {
508 p = s->pool_current;
509 if (!p) {
510 p = s->pool_first;
511 if (!p)
512 goto new_pool;
513 } else {
514 if (!p->next) {
515 new_pool:
516 pool_size = TCG_POOL_CHUNK_SIZE;
517 p = g_malloc(sizeof(TCGPool) + pool_size);
518 p->size = pool_size;
519 p->next = NULL;
520 if (s->pool_current) {
521 s->pool_current->next = p;
522 } else {
523 s->pool_first = p;
524 }
525 } else {
526 p = p->next;
527 }
528 }
529 }
530 s->pool_current = p;
531 s->pool_cur = p->data + size;
532 s->pool_end = p->data + p->size;
533 return p->data;
534 }
535
536 void tcg_pool_reset(TCGContext *s)
537 {
538 TCGPool *p, *t;
539 for (p = s->pool_first_large; p; p = t) {
540 t = p->next;
541 g_free(p);
542 }
543 s->pool_first_large = NULL;
544 s->pool_cur = s->pool_end = NULL;
545 s->pool_current = NULL;
546 }
547
548 #include "exec/helper-proto.h"
549
550 static const TCGHelperInfo all_helpers[] = {
551 #include "exec/helper-tcg.h"
552 };
553 static GHashTable *helper_table;
554
555 #ifdef CONFIG_TCG_INTERPRETER
556 static GHashTable *ffi_table;
557
558 static ffi_type * const typecode_to_ffi[8] = {
559 [dh_typecode_void] = &ffi_type_void,
560 [dh_typecode_i32] = &ffi_type_uint32,
561 [dh_typecode_s32] = &ffi_type_sint32,
562 [dh_typecode_i64] = &ffi_type_uint64,
563 [dh_typecode_s64] = &ffi_type_sint64,
564 [dh_typecode_ptr] = &ffi_type_pointer,
565 };
566 #endif
567
568 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
569 static void process_op_defs(TCGContext *s);
570 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
571 TCGReg reg, const char *name);
572
573 static void tcg_context_init(unsigned max_cpus)
574 {
575 TCGContext *s = &tcg_init_ctx;
576 int op, total_args, n, i;
577 TCGOpDef *def;
578 TCGArgConstraint *args_ct;
579 TCGTemp *ts;
580
581 memset(s, 0, sizeof(*s));
582 s->nb_globals = 0;
583
584 /* Count total number of arguments and allocate the corresponding
585 space */
586 total_args = 0;
587 for(op = 0; op < NB_OPS; op++) {
588 def = &tcg_op_defs[op];
589 n = def->nb_iargs + def->nb_oargs;
590 total_args += n;
591 }
592
593 args_ct = g_new0(TCGArgConstraint, total_args);
594
595 for(op = 0; op < NB_OPS; op++) {
596 def = &tcg_op_defs[op];
597 def->args_ct = args_ct;
598 n = def->nb_iargs + def->nb_oargs;
599 args_ct += n;
600 }
601
602 /* Register helpers. */
603 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
604 helper_table = g_hash_table_new(NULL, NULL);
605
606 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
607 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
608 (gpointer)&all_helpers[i]);
609 }
610
611 #ifdef CONFIG_TCG_INTERPRETER
612 /* g_direct_hash/equal for direct comparisons on uint32_t. */
613 ffi_table = g_hash_table_new(NULL, NULL);
614 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
615 struct {
616 ffi_cif cif;
617 ffi_type *args[];
618 } *ca;
619 uint32_t typemask = all_helpers[i].typemask;
620 gpointer hash = (gpointer)(uintptr_t)typemask;
621 ffi_status status;
622 int nargs;
623
624 if (g_hash_table_lookup(ffi_table, hash)) {
625 continue;
626 }
627
628 /* Ignoring the return type, find the last non-zero field. */
629 nargs = 32 - clz32(typemask >> 3);
630 nargs = DIV_ROUND_UP(nargs, 3);
631
632 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
633 ca->cif.rtype = typecode_to_ffi[typemask & 7];
634 ca->cif.nargs = nargs;
635
636 if (nargs != 0) {
637 ca->cif.arg_types = ca->args;
638 for (int j = 0; j < nargs; ++j) {
639 int typecode = extract32(typemask, (j + 1) * 3, 3);
640 ca->args[j] = typecode_to_ffi[typecode];
641 }
642 }
643
644 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
645 ca->cif.rtype, ca->cif.arg_types);
646 assert(status == FFI_OK);
647
648 g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
649 }
650 #endif
651
652 tcg_target_init(s);
653 process_op_defs(s);
654
655 /* Reverse the order of the saved registers, assuming they're all at
656 the start of tcg_target_reg_alloc_order. */
657 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
658 int r = tcg_target_reg_alloc_order[n];
659 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
660 break;
661 }
662 }
663 for (i = 0; i < n; ++i) {
664 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
665 }
666 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
667 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
668 }
669
670 alloc_tcg_plugin_context(s);
671
672 tcg_ctx = s;
673 /*
674 * In user-mode we simply share the init context among threads, since we
675 * use a single region. See the documentation tcg_region_init() for the
676 * reasoning behind this.
677 * In softmmu we will have at most max_cpus TCG threads.
678 */
679 #ifdef CONFIG_USER_ONLY
680 tcg_ctxs = &tcg_ctx;
681 tcg_cur_ctxs = 1;
682 tcg_max_ctxs = 1;
683 #else
684 tcg_max_ctxs = max_cpus;
685 tcg_ctxs = g_new0(TCGContext *, max_cpus);
686 #endif
687
688 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
689 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
690 cpu_env = temp_tcgv_ptr(ts);
691 }
692
693 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
694 {
695 tcg_context_init(max_cpus);
696 tcg_region_init(tb_size, splitwx, max_cpus);
697 }
698
699 /*
700 * Allocate TBs right before their corresponding translated code, making
701 * sure that TBs and code are on different cache lines.
702 */
703 TranslationBlock *tcg_tb_alloc(TCGContext *s)
704 {
705 uintptr_t align = qemu_icache_linesize;
706 TranslationBlock *tb;
707 void *next;
708
709 retry:
710 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
711 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
712
713 if (unlikely(next > s->code_gen_highwater)) {
714 if (tcg_region_alloc(s)) {
715 return NULL;
716 }
717 goto retry;
718 }
719 qatomic_set(&s->code_gen_ptr, next);
720 s->data_gen_ptr = NULL;
721 return tb;
722 }
723
724 void tcg_prologue_init(TCGContext *s)
725 {
726 size_t prologue_size;
727
728 s->code_ptr = s->code_gen_ptr;
729 s->code_buf = s->code_gen_ptr;
730 s->data_gen_ptr = NULL;
731
732 #ifndef CONFIG_TCG_INTERPRETER
733 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
734 #endif
735
736 #ifdef TCG_TARGET_NEED_POOL_LABELS
737 s->pool_labels = NULL;
738 #endif
739
740 qemu_thread_jit_write();
741 /* Generate the prologue. */
742 tcg_target_qemu_prologue(s);
743
744 #ifdef TCG_TARGET_NEED_POOL_LABELS
745 /* Allow the prologue to put e.g. guest_base into a pool entry. */
746 {
747 int result = tcg_out_pool_finalize(s);
748 tcg_debug_assert(result == 0);
749 }
750 #endif
751
752 prologue_size = tcg_current_code_size(s);
753
754 #ifndef CONFIG_TCG_INTERPRETER
755 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
756 (uintptr_t)s->code_buf, prologue_size);
757 #endif
758
759 #ifdef DEBUG_DISAS
760 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
761 FILE *logfile = qemu_log_trylock();
762 if (logfile) {
763 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
764 if (s->data_gen_ptr) {
765 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
766 size_t data_size = prologue_size - code_size;
767 size_t i;
768
769 disas(logfile, s->code_gen_ptr, code_size);
770
771 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
772 if (sizeof(tcg_target_ulong) == 8) {
773 fprintf(logfile,
774 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
775 (uintptr_t)s->data_gen_ptr + i,
776 *(uint64_t *)(s->data_gen_ptr + i));
777 } else {
778 fprintf(logfile,
779 "0x%08" PRIxPTR ": .long 0x%08x\n",
780 (uintptr_t)s->data_gen_ptr + i,
781 *(uint32_t *)(s->data_gen_ptr + i));
782 }
783 }
784 } else {
785 disas(logfile, s->code_gen_ptr, prologue_size);
786 }
787 fprintf(logfile, "\n");
788 qemu_log_unlock(logfile);
789 }
790 }
791 #endif
792
793 #ifndef CONFIG_TCG_INTERPRETER
794 /*
795 * Assert that goto_ptr is implemented completely, setting an epilogue.
796 * For tci, we use NULL as the signal to return from the interpreter,
797 * so skip this check.
798 */
799 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
800 #endif
801
802 tcg_region_prologue_set(s);
803 }
804
805 void tcg_func_start(TCGContext *s)
806 {
807 tcg_pool_reset(s);
808 s->nb_temps = s->nb_globals;
809
810 /* No temps have been previously allocated for size or locality. */
811 memset(s->free_temps, 0, sizeof(s->free_temps));
812
813 /* No constant temps have been previously allocated. */
814 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
815 if (s->const_table[i]) {
816 g_hash_table_remove_all(s->const_table[i]);
817 }
818 }
819
820 s->nb_ops = 0;
821 s->nb_labels = 0;
822 s->current_frame_offset = s->frame_start;
823
824 #ifdef CONFIG_DEBUG_TCG
825 s->goto_tb_issue_mask = 0;
826 #endif
827
828 QTAILQ_INIT(&s->ops);
829 QTAILQ_INIT(&s->free_ops);
830 QSIMPLEQ_INIT(&s->labels);
831 }
832
833 static TCGTemp *tcg_temp_alloc(TCGContext *s)
834 {
835 int n = s->nb_temps++;
836
837 if (n >= TCG_MAX_TEMPS) {
838 tcg_raise_tb_overflow(s);
839 }
840 return memset(&s->temps[n], 0, sizeof(TCGTemp));
841 }
842
843 static TCGTemp *tcg_global_alloc(TCGContext *s)
844 {
845 TCGTemp *ts;
846
847 tcg_debug_assert(s->nb_globals == s->nb_temps);
848 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
849 s->nb_globals++;
850 ts = tcg_temp_alloc(s);
851 ts->kind = TEMP_GLOBAL;
852
853 return ts;
854 }
855
856 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
857 TCGReg reg, const char *name)
858 {
859 TCGTemp *ts;
860
861 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
862 tcg_abort();
863 }
864
865 ts = tcg_global_alloc(s);
866 ts->base_type = type;
867 ts->type = type;
868 ts->kind = TEMP_FIXED;
869 ts->reg = reg;
870 ts->name = name;
871 tcg_regset_set_reg(s->reserved_regs, reg);
872
873 return ts;
874 }
875
876 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
877 {
878 s->frame_start = start;
879 s->frame_end = start + size;
880 s->frame_temp
881 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
882 }
883
884 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
885 intptr_t offset, const char *name)
886 {
887 TCGContext *s = tcg_ctx;
888 TCGTemp *base_ts = tcgv_ptr_temp(base);
889 TCGTemp *ts = tcg_global_alloc(s);
890 int indirect_reg = 0, bigendian = 0;
891 #if HOST_BIG_ENDIAN
892 bigendian = 1;
893 #endif
894
895 switch (base_ts->kind) {
896 case TEMP_FIXED:
897 break;
898 case TEMP_GLOBAL:
899 /* We do not support double-indirect registers. */
900 tcg_debug_assert(!base_ts->indirect_reg);
901 base_ts->indirect_base = 1;
902 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
903 ? 2 : 1);
904 indirect_reg = 1;
905 break;
906 default:
907 g_assert_not_reached();
908 }
909
910 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
911 TCGTemp *ts2 = tcg_global_alloc(s);
912 char buf[64];
913
914 ts->base_type = TCG_TYPE_I64;
915 ts->type = TCG_TYPE_I32;
916 ts->indirect_reg = indirect_reg;
917 ts->mem_allocated = 1;
918 ts->mem_base = base_ts;
919 ts->mem_offset = offset + bigendian * 4;
920 pstrcpy(buf, sizeof(buf), name);
921 pstrcat(buf, sizeof(buf), "_0");
922 ts->name = strdup(buf);
923
924 tcg_debug_assert(ts2 == ts + 1);
925 ts2->base_type = TCG_TYPE_I64;
926 ts2->type = TCG_TYPE_I32;
927 ts2->indirect_reg = indirect_reg;
928 ts2->mem_allocated = 1;
929 ts2->mem_base = base_ts;
930 ts2->mem_offset = offset + (1 - bigendian) * 4;
931 ts2->temp_subindex = 1;
932 pstrcpy(buf, sizeof(buf), name);
933 pstrcat(buf, sizeof(buf), "_1");
934 ts2->name = strdup(buf);
935 } else {
936 ts->base_type = type;
937 ts->type = type;
938 ts->indirect_reg = indirect_reg;
939 ts->mem_allocated = 1;
940 ts->mem_base = base_ts;
941 ts->mem_offset = offset;
942 ts->name = name;
943 }
944 return ts;
945 }
946
947 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
948 {
949 TCGContext *s = tcg_ctx;
950 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
951 TCGTemp *ts;
952 int idx, k;
953
954 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
955 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
956 if (idx < TCG_MAX_TEMPS) {
957 /* There is already an available temp with the right type. */
958 clear_bit(idx, s->free_temps[k].l);
959
960 ts = &s->temps[idx];
961 ts->temp_allocated = 1;
962 tcg_debug_assert(ts->base_type == type);
963 tcg_debug_assert(ts->kind == kind);
964 } else {
965 ts = tcg_temp_alloc(s);
966 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
967 TCGTemp *ts2 = tcg_temp_alloc(s);
968
969 ts->base_type = type;
970 ts->type = TCG_TYPE_I32;
971 ts->temp_allocated = 1;
972 ts->kind = kind;
973
974 tcg_debug_assert(ts2 == ts + 1);
975 ts2->base_type = TCG_TYPE_I64;
976 ts2->type = TCG_TYPE_I32;
977 ts2->temp_allocated = 1;
978 ts2->temp_subindex = 1;
979 ts2->kind = kind;
980 } else {
981 ts->base_type = type;
982 ts->type = type;
983 ts->temp_allocated = 1;
984 ts->kind = kind;
985 }
986 }
987
988 #if defined(CONFIG_DEBUG_TCG)
989 s->temps_in_use++;
990 #endif
991 return ts;
992 }
993
994 TCGv_vec tcg_temp_new_vec(TCGType type)
995 {
996 TCGTemp *t;
997
998 #ifdef CONFIG_DEBUG_TCG
999 switch (type) {
1000 case TCG_TYPE_V64:
1001 assert(TCG_TARGET_HAS_v64);
1002 break;
1003 case TCG_TYPE_V128:
1004 assert(TCG_TARGET_HAS_v128);
1005 break;
1006 case TCG_TYPE_V256:
1007 assert(TCG_TARGET_HAS_v256);
1008 break;
1009 default:
1010 g_assert_not_reached();
1011 }
1012 #endif
1013
1014 t = tcg_temp_new_internal(type, 0);
1015 return temp_tcgv_vec(t);
1016 }
1017
1018 /* Create a new temp of the same type as an existing temp. */
1019 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1020 {
1021 TCGTemp *t = tcgv_vec_temp(match);
1022
1023 tcg_debug_assert(t->temp_allocated != 0);
1024
1025 t = tcg_temp_new_internal(t->base_type, 0);
1026 return temp_tcgv_vec(t);
1027 }
1028
1029 void tcg_temp_free_internal(TCGTemp *ts)
1030 {
1031 TCGContext *s = tcg_ctx;
1032 int k, idx;
1033
1034 switch (ts->kind) {
1035 case TEMP_CONST:
1036 /*
1037 * In order to simplify users of tcg_constant_*,
1038 * silently ignore free.
1039 */
1040 return;
1041 case TEMP_NORMAL:
1042 case TEMP_LOCAL:
1043 break;
1044 default:
1045 g_assert_not_reached();
1046 }
1047
1048 #if defined(CONFIG_DEBUG_TCG)
1049 s->temps_in_use--;
1050 if (s->temps_in_use < 0) {
1051 fprintf(stderr, "More temporaries freed than allocated!\n");
1052 }
1053 #endif
1054
1055 tcg_debug_assert(ts->temp_allocated != 0);
1056 ts->temp_allocated = 0;
1057
1058 idx = temp_idx(ts);
1059 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1060 set_bit(idx, s->free_temps[k].l);
1061 }
1062
1063 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1064 {
1065 TCGContext *s = tcg_ctx;
1066 GHashTable *h = s->const_table[type];
1067 TCGTemp *ts;
1068
1069 if (h == NULL) {
1070 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1071 s->const_table[type] = h;
1072 }
1073
1074 ts = g_hash_table_lookup(h, &val);
1075 if (ts == NULL) {
1076 ts = tcg_temp_alloc(s);
1077
1078 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1079 TCGTemp *ts2 = tcg_temp_alloc(s);
1080
1081 ts->base_type = TCG_TYPE_I64;
1082 ts->type = TCG_TYPE_I32;
1083 ts->kind = TEMP_CONST;
1084 ts->temp_allocated = 1;
1085 /*
1086 * Retain the full value of the 64-bit constant in the low
1087 * part, so that the hash table works. Actual uses will
1088 * truncate the value to the low part.
1089 */
1090 ts->val = val;
1091
1092 tcg_debug_assert(ts2 == ts + 1);
1093 ts2->base_type = TCG_TYPE_I64;
1094 ts2->type = TCG_TYPE_I32;
1095 ts2->kind = TEMP_CONST;
1096 ts2->temp_allocated = 1;
1097 ts2->temp_subindex = 1;
1098 ts2->val = val >> 32;
1099 } else {
1100 ts->base_type = type;
1101 ts->type = type;
1102 ts->kind = TEMP_CONST;
1103 ts->temp_allocated = 1;
1104 ts->val = val;
1105 }
1106 g_hash_table_insert(h, &ts->val, ts);
1107 }
1108
1109 return ts;
1110 }
1111
1112 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1113 {
1114 val = dup_const(vece, val);
1115 return temp_tcgv_vec(tcg_constant_internal(type, val));
1116 }
1117
1118 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1119 {
1120 TCGTemp *t = tcgv_vec_temp(match);
1121
1122 tcg_debug_assert(t->temp_allocated != 0);
1123 return tcg_constant_vec(t->base_type, vece, val);
1124 }
1125
1126 TCGv_i32 tcg_const_i32(int32_t val)
1127 {
1128 TCGv_i32 t0;
1129 t0 = tcg_temp_new_i32();
1130 tcg_gen_movi_i32(t0, val);
1131 return t0;
1132 }
1133
1134 TCGv_i64 tcg_const_i64(int64_t val)
1135 {
1136 TCGv_i64 t0;
1137 t0 = tcg_temp_new_i64();
1138 tcg_gen_movi_i64(t0, val);
1139 return t0;
1140 }
1141
1142 TCGv_i32 tcg_const_local_i32(int32_t val)
1143 {
1144 TCGv_i32 t0;
1145 t0 = tcg_temp_local_new_i32();
1146 tcg_gen_movi_i32(t0, val);
1147 return t0;
1148 }
1149
1150 TCGv_i64 tcg_const_local_i64(int64_t val)
1151 {
1152 TCGv_i64 t0;
1153 t0 = tcg_temp_local_new_i64();
1154 tcg_gen_movi_i64(t0, val);
1155 return t0;
1156 }
1157
1158 #if defined(CONFIG_DEBUG_TCG)
1159 void tcg_clear_temp_count(void)
1160 {
1161 TCGContext *s = tcg_ctx;
1162 s->temps_in_use = 0;
1163 }
1164
1165 int tcg_check_temp_count(void)
1166 {
1167 TCGContext *s = tcg_ctx;
1168 if (s->temps_in_use) {
1169 /* Clear the count so that we don't give another
1170 * warning immediately next time around.
1171 */
1172 s->temps_in_use = 0;
1173 return 1;
1174 }
1175 return 0;
1176 }
1177 #endif
1178
1179 /* Return true if OP may appear in the opcode stream.
1180 Test the runtime variable that controls each opcode. */
1181 bool tcg_op_supported(TCGOpcode op)
1182 {
1183 const bool have_vec
1184 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1185
1186 switch (op) {
1187 case INDEX_op_discard:
1188 case INDEX_op_set_label:
1189 case INDEX_op_call:
1190 case INDEX_op_br:
1191 case INDEX_op_mb:
1192 case INDEX_op_insn_start:
1193 case INDEX_op_exit_tb:
1194 case INDEX_op_goto_tb:
1195 case INDEX_op_goto_ptr:
1196 case INDEX_op_qemu_ld_i32:
1197 case INDEX_op_qemu_st_i32:
1198 case INDEX_op_qemu_ld_i64:
1199 case INDEX_op_qemu_st_i64:
1200 return true;
1201
1202 case INDEX_op_qemu_st8_i32:
1203 return TCG_TARGET_HAS_qemu_st8_i32;
1204
1205 case INDEX_op_mov_i32:
1206 case INDEX_op_setcond_i32:
1207 case INDEX_op_brcond_i32:
1208 case INDEX_op_ld8u_i32:
1209 case INDEX_op_ld8s_i32:
1210 case INDEX_op_ld16u_i32:
1211 case INDEX_op_ld16s_i32:
1212 case INDEX_op_ld_i32:
1213 case INDEX_op_st8_i32:
1214 case INDEX_op_st16_i32:
1215 case INDEX_op_st_i32:
1216 case INDEX_op_add_i32:
1217 case INDEX_op_sub_i32:
1218 case INDEX_op_mul_i32:
1219 case INDEX_op_and_i32:
1220 case INDEX_op_or_i32:
1221 case INDEX_op_xor_i32:
1222 case INDEX_op_shl_i32:
1223 case INDEX_op_shr_i32:
1224 case INDEX_op_sar_i32:
1225 return true;
1226
1227 case INDEX_op_movcond_i32:
1228 return TCG_TARGET_HAS_movcond_i32;
1229 case INDEX_op_div_i32:
1230 case INDEX_op_divu_i32:
1231 return TCG_TARGET_HAS_div_i32;
1232 case INDEX_op_rem_i32:
1233 case INDEX_op_remu_i32:
1234 return TCG_TARGET_HAS_rem_i32;
1235 case INDEX_op_div2_i32:
1236 case INDEX_op_divu2_i32:
1237 return TCG_TARGET_HAS_div2_i32;
1238 case INDEX_op_rotl_i32:
1239 case INDEX_op_rotr_i32:
1240 return TCG_TARGET_HAS_rot_i32;
1241 case INDEX_op_deposit_i32:
1242 return TCG_TARGET_HAS_deposit_i32;
1243 case INDEX_op_extract_i32:
1244 return TCG_TARGET_HAS_extract_i32;
1245 case INDEX_op_sextract_i32:
1246 return TCG_TARGET_HAS_sextract_i32;
1247 case INDEX_op_extract2_i32:
1248 return TCG_TARGET_HAS_extract2_i32;
1249 case INDEX_op_add2_i32:
1250 return TCG_TARGET_HAS_add2_i32;
1251 case INDEX_op_sub2_i32:
1252 return TCG_TARGET_HAS_sub2_i32;
1253 case INDEX_op_mulu2_i32:
1254 return TCG_TARGET_HAS_mulu2_i32;
1255 case INDEX_op_muls2_i32:
1256 return TCG_TARGET_HAS_muls2_i32;
1257 case INDEX_op_muluh_i32:
1258 return TCG_TARGET_HAS_muluh_i32;
1259 case INDEX_op_mulsh_i32:
1260 return TCG_TARGET_HAS_mulsh_i32;
1261 case INDEX_op_ext8s_i32:
1262 return TCG_TARGET_HAS_ext8s_i32;
1263 case INDEX_op_ext16s_i32:
1264 return TCG_TARGET_HAS_ext16s_i32;
1265 case INDEX_op_ext8u_i32:
1266 return TCG_TARGET_HAS_ext8u_i32;
1267 case INDEX_op_ext16u_i32:
1268 return TCG_TARGET_HAS_ext16u_i32;
1269 case INDEX_op_bswap16_i32:
1270 return TCG_TARGET_HAS_bswap16_i32;
1271 case INDEX_op_bswap32_i32:
1272 return TCG_TARGET_HAS_bswap32_i32;
1273 case INDEX_op_not_i32:
1274 return TCG_TARGET_HAS_not_i32;
1275 case INDEX_op_neg_i32:
1276 return TCG_TARGET_HAS_neg_i32;
1277 case INDEX_op_andc_i32:
1278 return TCG_TARGET_HAS_andc_i32;
1279 case INDEX_op_orc_i32:
1280 return TCG_TARGET_HAS_orc_i32;
1281 case INDEX_op_eqv_i32:
1282 return TCG_TARGET_HAS_eqv_i32;
1283 case INDEX_op_nand_i32:
1284 return TCG_TARGET_HAS_nand_i32;
1285 case INDEX_op_nor_i32:
1286 return TCG_TARGET_HAS_nor_i32;
1287 case INDEX_op_clz_i32:
1288 return TCG_TARGET_HAS_clz_i32;
1289 case INDEX_op_ctz_i32:
1290 return TCG_TARGET_HAS_ctz_i32;
1291 case INDEX_op_ctpop_i32:
1292 return TCG_TARGET_HAS_ctpop_i32;
1293
1294 case INDEX_op_brcond2_i32:
1295 case INDEX_op_setcond2_i32:
1296 return TCG_TARGET_REG_BITS == 32;
1297
1298 case INDEX_op_mov_i64:
1299 case INDEX_op_setcond_i64:
1300 case INDEX_op_brcond_i64:
1301 case INDEX_op_ld8u_i64:
1302 case INDEX_op_ld8s_i64:
1303 case INDEX_op_ld16u_i64:
1304 case INDEX_op_ld16s_i64:
1305 case INDEX_op_ld32u_i64:
1306 case INDEX_op_ld32s_i64:
1307 case INDEX_op_ld_i64:
1308 case INDEX_op_st8_i64:
1309 case INDEX_op_st16_i64:
1310 case INDEX_op_st32_i64:
1311 case INDEX_op_st_i64:
1312 case INDEX_op_add_i64:
1313 case INDEX_op_sub_i64:
1314 case INDEX_op_mul_i64:
1315 case INDEX_op_and_i64:
1316 case INDEX_op_or_i64:
1317 case INDEX_op_xor_i64:
1318 case INDEX_op_shl_i64:
1319 case INDEX_op_shr_i64:
1320 case INDEX_op_sar_i64:
1321 case INDEX_op_ext_i32_i64:
1322 case INDEX_op_extu_i32_i64:
1323 return TCG_TARGET_REG_BITS == 64;
1324
1325 case INDEX_op_movcond_i64:
1326 return TCG_TARGET_HAS_movcond_i64;
1327 case INDEX_op_div_i64:
1328 case INDEX_op_divu_i64:
1329 return TCG_TARGET_HAS_div_i64;
1330 case INDEX_op_rem_i64:
1331 case INDEX_op_remu_i64:
1332 return TCG_TARGET_HAS_rem_i64;
1333 case INDEX_op_div2_i64:
1334 case INDEX_op_divu2_i64:
1335 return TCG_TARGET_HAS_div2_i64;
1336 case INDEX_op_rotl_i64:
1337 case INDEX_op_rotr_i64:
1338 return TCG_TARGET_HAS_rot_i64;
1339 case INDEX_op_deposit_i64:
1340 return TCG_TARGET_HAS_deposit_i64;
1341 case INDEX_op_extract_i64:
1342 return TCG_TARGET_HAS_extract_i64;
1343 case INDEX_op_sextract_i64:
1344 return TCG_TARGET_HAS_sextract_i64;
1345 case INDEX_op_extract2_i64:
1346 return TCG_TARGET_HAS_extract2_i64;
1347 case INDEX_op_extrl_i64_i32:
1348 return TCG_TARGET_HAS_extrl_i64_i32;
1349 case INDEX_op_extrh_i64_i32:
1350 return TCG_TARGET_HAS_extrh_i64_i32;
1351 case INDEX_op_ext8s_i64:
1352 return TCG_TARGET_HAS_ext8s_i64;
1353 case INDEX_op_ext16s_i64:
1354 return TCG_TARGET_HAS_ext16s_i64;
1355 case INDEX_op_ext32s_i64:
1356 return TCG_TARGET_HAS_ext32s_i64;
1357 case INDEX_op_ext8u_i64:
1358 return TCG_TARGET_HAS_ext8u_i64;
1359 case INDEX_op_ext16u_i64:
1360 return TCG_TARGET_HAS_ext16u_i64;
1361 case INDEX_op_ext32u_i64:
1362 return TCG_TARGET_HAS_ext32u_i64;
1363 case INDEX_op_bswap16_i64:
1364 return TCG_TARGET_HAS_bswap16_i64;
1365 case INDEX_op_bswap32_i64:
1366 return TCG_TARGET_HAS_bswap32_i64;
1367 case INDEX_op_bswap64_i64:
1368 return TCG_TARGET_HAS_bswap64_i64;
1369 case INDEX_op_not_i64:
1370 return TCG_TARGET_HAS_not_i64;
1371 case INDEX_op_neg_i64:
1372 return TCG_TARGET_HAS_neg_i64;
1373 case INDEX_op_andc_i64:
1374 return TCG_TARGET_HAS_andc_i64;
1375 case INDEX_op_orc_i64:
1376 return TCG_TARGET_HAS_orc_i64;
1377 case INDEX_op_eqv_i64:
1378 return TCG_TARGET_HAS_eqv_i64;
1379 case INDEX_op_nand_i64:
1380 return TCG_TARGET_HAS_nand_i64;
1381 case INDEX_op_nor_i64:
1382 return TCG_TARGET_HAS_nor_i64;
1383 case INDEX_op_clz_i64:
1384 return TCG_TARGET_HAS_clz_i64;
1385 case INDEX_op_ctz_i64:
1386 return TCG_TARGET_HAS_ctz_i64;
1387 case INDEX_op_ctpop_i64:
1388 return TCG_TARGET_HAS_ctpop_i64;
1389 case INDEX_op_add2_i64:
1390 return TCG_TARGET_HAS_add2_i64;
1391 case INDEX_op_sub2_i64:
1392 return TCG_TARGET_HAS_sub2_i64;
1393 case INDEX_op_mulu2_i64:
1394 return TCG_TARGET_HAS_mulu2_i64;
1395 case INDEX_op_muls2_i64:
1396 return TCG_TARGET_HAS_muls2_i64;
1397 case INDEX_op_muluh_i64:
1398 return TCG_TARGET_HAS_muluh_i64;
1399 case INDEX_op_mulsh_i64:
1400 return TCG_TARGET_HAS_mulsh_i64;
1401
1402 case INDEX_op_mov_vec:
1403 case INDEX_op_dup_vec:
1404 case INDEX_op_dupm_vec:
1405 case INDEX_op_ld_vec:
1406 case INDEX_op_st_vec:
1407 case INDEX_op_add_vec:
1408 case INDEX_op_sub_vec:
1409 case INDEX_op_and_vec:
1410 case INDEX_op_or_vec:
1411 case INDEX_op_xor_vec:
1412 case INDEX_op_cmp_vec:
1413 return have_vec;
1414 case INDEX_op_dup2_vec:
1415 return have_vec && TCG_TARGET_REG_BITS == 32;
1416 case INDEX_op_not_vec:
1417 return have_vec && TCG_TARGET_HAS_not_vec;
1418 case INDEX_op_neg_vec:
1419 return have_vec && TCG_TARGET_HAS_neg_vec;
1420 case INDEX_op_abs_vec:
1421 return have_vec && TCG_TARGET_HAS_abs_vec;
1422 case INDEX_op_andc_vec:
1423 return have_vec && TCG_TARGET_HAS_andc_vec;
1424 case INDEX_op_orc_vec:
1425 return have_vec && TCG_TARGET_HAS_orc_vec;
1426 case INDEX_op_nand_vec:
1427 return have_vec && TCG_TARGET_HAS_nand_vec;
1428 case INDEX_op_nor_vec:
1429 return have_vec && TCG_TARGET_HAS_nor_vec;
1430 case INDEX_op_eqv_vec:
1431 return have_vec && TCG_TARGET_HAS_eqv_vec;
1432 case INDEX_op_mul_vec:
1433 return have_vec && TCG_TARGET_HAS_mul_vec;
1434 case INDEX_op_shli_vec:
1435 case INDEX_op_shri_vec:
1436 case INDEX_op_sari_vec:
1437 return have_vec && TCG_TARGET_HAS_shi_vec;
1438 case INDEX_op_shls_vec:
1439 case INDEX_op_shrs_vec:
1440 case INDEX_op_sars_vec:
1441 return have_vec && TCG_TARGET_HAS_shs_vec;
1442 case INDEX_op_shlv_vec:
1443 case INDEX_op_shrv_vec:
1444 case INDEX_op_sarv_vec:
1445 return have_vec && TCG_TARGET_HAS_shv_vec;
1446 case INDEX_op_rotli_vec:
1447 return have_vec && TCG_TARGET_HAS_roti_vec;
1448 case INDEX_op_rotls_vec:
1449 return have_vec && TCG_TARGET_HAS_rots_vec;
1450 case INDEX_op_rotlv_vec:
1451 case INDEX_op_rotrv_vec:
1452 return have_vec && TCG_TARGET_HAS_rotv_vec;
1453 case INDEX_op_ssadd_vec:
1454 case INDEX_op_usadd_vec:
1455 case INDEX_op_sssub_vec:
1456 case INDEX_op_ussub_vec:
1457 return have_vec && TCG_TARGET_HAS_sat_vec;
1458 case INDEX_op_smin_vec:
1459 case INDEX_op_umin_vec:
1460 case INDEX_op_smax_vec:
1461 case INDEX_op_umax_vec:
1462 return have_vec && TCG_TARGET_HAS_minmax_vec;
1463 case INDEX_op_bitsel_vec:
1464 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1465 case INDEX_op_cmpsel_vec:
1466 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1467
1468 default:
1469 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1470 return true;
1471 }
1472 }
1473
1474 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1475 and endian swap. Maybe it would be better to do the alignment
1476 and endian swap in tcg_reg_alloc_call(). */
1477 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1478 {
1479 int i, real_args, nb_rets, pi;
1480 unsigned typemask;
1481 const TCGHelperInfo *info;
1482 TCGOp *op;
1483
1484 info = g_hash_table_lookup(helper_table, (gpointer)func);
1485 typemask = info->typemask;
1486
1487 #ifdef CONFIG_PLUGIN
1488 /* detect non-plugin helpers */
1489 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1490 tcg_ctx->plugin_insn->calls_helpers = true;
1491 }
1492 #endif
1493
1494 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1495 for (i = 0; i < nargs; ++i) {
1496 int argtype = extract32(typemask, (i + 1) * 3, 3);
1497 bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1498 bool is_signed = argtype & 1;
1499
1500 if (is_32bit) {
1501 TCGv_i64 temp = tcg_temp_new_i64();
1502 TCGv_i32 orig = temp_tcgv_i32(args[i]);
1503 if (is_signed) {
1504 tcg_gen_ext_i32_i64(temp, orig);
1505 } else {
1506 tcg_gen_extu_i32_i64(temp, orig);
1507 }
1508 args[i] = tcgv_i64_temp(temp);
1509 }
1510 }
1511 #endif /* TCG_TARGET_EXTEND_ARGS */
1512
1513 op = tcg_emit_op(INDEX_op_call);
1514
1515 pi = 0;
1516 if (ret != NULL) {
1517 if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1518 #if HOST_BIG_ENDIAN
1519 op->args[pi++] = temp_arg(ret + 1);
1520 op->args[pi++] = temp_arg(ret);
1521 #else
1522 op->args[pi++] = temp_arg(ret);
1523 op->args[pi++] = temp_arg(ret + 1);
1524 #endif
1525 nb_rets = 2;
1526 } else {
1527 op->args[pi++] = temp_arg(ret);
1528 nb_rets = 1;
1529 }
1530 } else {
1531 nb_rets = 0;
1532 }
1533 TCGOP_CALLO(op) = nb_rets;
1534
1535 real_args = 0;
1536 for (i = 0; i < nargs; i++) {
1537 int argtype = extract32(typemask, (i + 1) * 3, 3);
1538 bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1539 bool want_align = false;
1540
1541 #if defined(CONFIG_TCG_INTERPRETER)
1542 /*
1543 * Align all arguments, so that they land in predictable places
1544 * for passing off to ffi_call.
1545 */
1546 want_align = true;
1547 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1548 /* Some targets want aligned 64 bit args */
1549 want_align = is_64bit;
1550 #endif
1551
1552 if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1553 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1554 real_args++;
1555 }
1556
1557 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1558 op->args[pi++] = temp_arg(args[i] + HOST_BIG_ENDIAN);
1559 op->args[pi++] = temp_arg(args[i] + !HOST_BIG_ENDIAN);
1560 real_args += 2;
1561 continue;
1562 }
1563
1564 op->args[pi++] = temp_arg(args[i]);
1565 real_args++;
1566 }
1567 op->args[pi++] = (uintptr_t)func;
1568 op->args[pi++] = (uintptr_t)info;
1569 TCGOP_CALLI(op) = real_args;
1570
1571 /* Make sure the fields didn't overflow. */
1572 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1573 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1574
1575 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1576 for (i = 0; i < nargs; ++i) {
1577 int argtype = extract32(typemask, (i + 1) * 3, 3);
1578 bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1579
1580 if (is_32bit) {
1581 tcg_temp_free_internal(args[i]);
1582 }
1583 }
1584 #endif /* TCG_TARGET_EXTEND_ARGS */
1585 }
1586
1587 static void tcg_reg_alloc_start(TCGContext *s)
1588 {
1589 int i, n;
1590
1591 for (i = 0, n = s->nb_temps; i < n; i++) {
1592 TCGTemp *ts = &s->temps[i];
1593 TCGTempVal val = TEMP_VAL_MEM;
1594
1595 switch (ts->kind) {
1596 case TEMP_CONST:
1597 val = TEMP_VAL_CONST;
1598 break;
1599 case TEMP_FIXED:
1600 val = TEMP_VAL_REG;
1601 break;
1602 case TEMP_GLOBAL:
1603 break;
1604 case TEMP_NORMAL:
1605 case TEMP_EBB:
1606 val = TEMP_VAL_DEAD;
1607 /* fall through */
1608 case TEMP_LOCAL:
1609 ts->mem_allocated = 0;
1610 break;
1611 default:
1612 g_assert_not_reached();
1613 }
1614 ts->val_type = val;
1615 }
1616
1617 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1618 }
1619
1620 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1621 TCGTemp *ts)
1622 {
1623 int idx = temp_idx(ts);
1624
1625 switch (ts->kind) {
1626 case TEMP_FIXED:
1627 case TEMP_GLOBAL:
1628 pstrcpy(buf, buf_size, ts->name);
1629 break;
1630 case TEMP_LOCAL:
1631 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1632 break;
1633 case TEMP_EBB:
1634 snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1635 break;
1636 case TEMP_NORMAL:
1637 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1638 break;
1639 case TEMP_CONST:
1640 switch (ts->type) {
1641 case TCG_TYPE_I32:
1642 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1643 break;
1644 #if TCG_TARGET_REG_BITS > 32
1645 case TCG_TYPE_I64:
1646 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1647 break;
1648 #endif
1649 case TCG_TYPE_V64:
1650 case TCG_TYPE_V128:
1651 case TCG_TYPE_V256:
1652 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1653 64 << (ts->type - TCG_TYPE_V64), ts->val);
1654 break;
1655 default:
1656 g_assert_not_reached();
1657 }
1658 break;
1659 }
1660 return buf;
1661 }
1662
1663 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1664 int buf_size, TCGArg arg)
1665 {
1666 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1667 }
1668
1669 static const char * const cond_name[] =
1670 {
1671 [TCG_COND_NEVER] = "never",
1672 [TCG_COND_ALWAYS] = "always",
1673 [TCG_COND_EQ] = "eq",
1674 [TCG_COND_NE] = "ne",
1675 [TCG_COND_LT] = "lt",
1676 [TCG_COND_GE] = "ge",
1677 [TCG_COND_LE] = "le",
1678 [TCG_COND_GT] = "gt",
1679 [TCG_COND_LTU] = "ltu",
1680 [TCG_COND_GEU] = "geu",
1681 [TCG_COND_LEU] = "leu",
1682 [TCG_COND_GTU] = "gtu"
1683 };
1684
1685 static const char * const ldst_name[] =
1686 {
1687 [MO_UB] = "ub",
1688 [MO_SB] = "sb",
1689 [MO_LEUW] = "leuw",
1690 [MO_LESW] = "lesw",
1691 [MO_LEUL] = "leul",
1692 [MO_LESL] = "lesl",
1693 [MO_LEUQ] = "leq",
1694 [MO_BEUW] = "beuw",
1695 [MO_BESW] = "besw",
1696 [MO_BEUL] = "beul",
1697 [MO_BESL] = "besl",
1698 [MO_BEUQ] = "beq",
1699 };
1700
1701 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1702 #ifdef TARGET_ALIGNED_ONLY
1703 [MO_UNALN >> MO_ASHIFT] = "un+",
1704 [MO_ALIGN >> MO_ASHIFT] = "",
1705 #else
1706 [MO_UNALN >> MO_ASHIFT] = "",
1707 [MO_ALIGN >> MO_ASHIFT] = "al+",
1708 #endif
1709 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1710 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1711 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1712 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1713 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1714 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1715 };
1716
1717 static const char bswap_flag_name[][6] = {
1718 [TCG_BSWAP_IZ] = "iz",
1719 [TCG_BSWAP_OZ] = "oz",
1720 [TCG_BSWAP_OS] = "os",
1721 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1722 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1723 };
1724
1725 static inline bool tcg_regset_single(TCGRegSet d)
1726 {
1727 return (d & (d - 1)) == 0;
1728 }
1729
1730 static inline TCGReg tcg_regset_first(TCGRegSet d)
1731 {
1732 if (TCG_TARGET_NB_REGS <= 32) {
1733 return ctz32(d);
1734 } else {
1735 return ctz64(d);
1736 }
1737 }
1738
1739 /* Return only the number of characters output -- no error return. */
1740 #define ne_fprintf(...) \
1741 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1742
1743 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1744 {
1745 char buf[128];
1746 TCGOp *op;
1747
1748 QTAILQ_FOREACH(op, &s->ops, link) {
1749 int i, k, nb_oargs, nb_iargs, nb_cargs;
1750 const TCGOpDef *def;
1751 TCGOpcode c;
1752 int col = 0;
1753
1754 c = op->opc;
1755 def = &tcg_op_defs[c];
1756
1757 if (c == INDEX_op_insn_start) {
1758 nb_oargs = 0;
1759 col += ne_fprintf(f, "\n ----");
1760
1761 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1762 target_ulong a;
1763 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1764 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1765 #else
1766 a = op->args[i];
1767 #endif
1768 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1769 }
1770 } else if (c == INDEX_op_call) {
1771 const TCGHelperInfo *info = tcg_call_info(op);
1772 void *func = tcg_call_func(op);
1773
1774 /* variable number of arguments */
1775 nb_oargs = TCGOP_CALLO(op);
1776 nb_iargs = TCGOP_CALLI(op);
1777 nb_cargs = def->nb_cargs;
1778
1779 col += ne_fprintf(f, " %s ", def->name);
1780
1781 /*
1782 * Print the function name from TCGHelperInfo, if available.
1783 * Note that plugins have a template function for the info,
1784 * but the actual function pointer comes from the plugin.
1785 */
1786 if (func == info->func) {
1787 col += ne_fprintf(f, "%s", info->name);
1788 } else {
1789 col += ne_fprintf(f, "plugin(%p)", func);
1790 }
1791
1792 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1793 for (i = 0; i < nb_oargs; i++) {
1794 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1795 op->args[i]));
1796 }
1797 for (i = 0; i < nb_iargs; i++) {
1798 TCGArg arg = op->args[nb_oargs + i];
1799 const char *t = "<dummy>";
1800 if (arg != TCG_CALL_DUMMY_ARG) {
1801 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1802 }
1803 col += ne_fprintf(f, ",%s", t);
1804 }
1805 } else {
1806 col += ne_fprintf(f, " %s ", def->name);
1807
1808 nb_oargs = def->nb_oargs;
1809 nb_iargs = def->nb_iargs;
1810 nb_cargs = def->nb_cargs;
1811
1812 if (def->flags & TCG_OPF_VECTOR) {
1813 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1814 8 << TCGOP_VECE(op));
1815 }
1816
1817 k = 0;
1818 for (i = 0; i < nb_oargs; i++) {
1819 const char *sep = k ? "," : "";
1820 col += ne_fprintf(f, "%s%s", sep,
1821 tcg_get_arg_str(s, buf, sizeof(buf),
1822 op->args[k++]));
1823 }
1824 for (i = 0; i < nb_iargs; i++) {
1825 const char *sep = k ? "," : "";
1826 col += ne_fprintf(f, "%s%s", sep,
1827 tcg_get_arg_str(s, buf, sizeof(buf),
1828 op->args[k++]));
1829 }
1830 switch (c) {
1831 case INDEX_op_brcond_i32:
1832 case INDEX_op_setcond_i32:
1833 case INDEX_op_movcond_i32:
1834 case INDEX_op_brcond2_i32:
1835 case INDEX_op_setcond2_i32:
1836 case INDEX_op_brcond_i64:
1837 case INDEX_op_setcond_i64:
1838 case INDEX_op_movcond_i64:
1839 case INDEX_op_cmp_vec:
1840 case INDEX_op_cmpsel_vec:
1841 if (op->args[k] < ARRAY_SIZE(cond_name)
1842 && cond_name[op->args[k]]) {
1843 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1844 } else {
1845 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1846 }
1847 i = 1;
1848 break;
1849 case INDEX_op_qemu_ld_i32:
1850 case INDEX_op_qemu_st_i32:
1851 case INDEX_op_qemu_st8_i32:
1852 case INDEX_op_qemu_ld_i64:
1853 case INDEX_op_qemu_st_i64:
1854 {
1855 MemOpIdx oi = op->args[k++];
1856 MemOp op = get_memop(oi);
1857 unsigned ix = get_mmuidx(oi);
1858
1859 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1860 col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1861 } else {
1862 const char *s_al, *s_op;
1863 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1864 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1865 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1866 }
1867 i = 1;
1868 }
1869 break;
1870 case INDEX_op_bswap16_i32:
1871 case INDEX_op_bswap16_i64:
1872 case INDEX_op_bswap32_i32:
1873 case INDEX_op_bswap32_i64:
1874 case INDEX_op_bswap64_i64:
1875 {
1876 TCGArg flags = op->args[k];
1877 const char *name = NULL;
1878
1879 if (flags < ARRAY_SIZE(bswap_flag_name)) {
1880 name = bswap_flag_name[flags];
1881 }
1882 if (name) {
1883 col += ne_fprintf(f, ",%s", name);
1884 } else {
1885 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1886 }
1887 i = k = 1;
1888 }
1889 break;
1890 default:
1891 i = 0;
1892 break;
1893 }
1894 switch (c) {
1895 case INDEX_op_set_label:
1896 case INDEX_op_br:
1897 case INDEX_op_brcond_i32:
1898 case INDEX_op_brcond_i64:
1899 case INDEX_op_brcond2_i32:
1900 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1901 arg_label(op->args[k])->id);
1902 i++, k++;
1903 break;
1904 default:
1905 break;
1906 }
1907 for (; i < nb_cargs; i++, k++) {
1908 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1909 op->args[k]);
1910 }
1911 }
1912
1913 if (have_prefs || op->life) {
1914 for (; col < 40; ++col) {
1915 putc(' ', f);
1916 }
1917 }
1918
1919 if (op->life) {
1920 unsigned life = op->life;
1921
1922 if (life & (SYNC_ARG * 3)) {
1923 ne_fprintf(f, " sync:");
1924 for (i = 0; i < 2; ++i) {
1925 if (life & (SYNC_ARG << i)) {
1926 ne_fprintf(f, " %d", i);
1927 }
1928 }
1929 }
1930 life /= DEAD_ARG;
1931 if (life) {
1932 ne_fprintf(f, " dead:");
1933 for (i = 0; life; ++i, life >>= 1) {
1934 if (life & 1) {
1935 ne_fprintf(f, " %d", i);
1936 }
1937 }
1938 }
1939 }
1940
1941 if (have_prefs) {
1942 for (i = 0; i < nb_oargs; ++i) {
1943 TCGRegSet set = op->output_pref[i];
1944
1945 if (i == 0) {
1946 ne_fprintf(f, " pref=");
1947 } else {
1948 ne_fprintf(f, ",");
1949 }
1950 if (set == 0) {
1951 ne_fprintf(f, "none");
1952 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
1953 ne_fprintf(f, "all");
1954 #ifdef CONFIG_DEBUG_TCG
1955 } else if (tcg_regset_single(set)) {
1956 TCGReg reg = tcg_regset_first(set);
1957 ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
1958 #endif
1959 } else if (TCG_TARGET_NB_REGS <= 32) {
1960 ne_fprintf(f, "0x%x", (uint32_t)set);
1961 } else {
1962 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
1963 }
1964 }
1965 }
1966
1967 putc('\n', f);
1968 }
1969 }
1970
1971 /* we give more priority to constraints with less registers */
1972 static int get_constraint_priority(const TCGOpDef *def, int k)
1973 {
1974 const TCGArgConstraint *arg_ct = &def->args_ct[k];
1975 int n = ctpop64(arg_ct->regs);
1976
1977 /*
1978 * Sort constraints of a single register first, which includes output
1979 * aliases (which must exactly match the input already allocated).
1980 */
1981 if (n == 1 || arg_ct->oalias) {
1982 return INT_MAX;
1983 }
1984
1985 /*
1986 * Sort register pairs next, first then second immediately after.
1987 * Arbitrarily sort multiple pairs by the index of the first reg;
1988 * there shouldn't be many pairs.
1989 */
1990 switch (arg_ct->pair) {
1991 case 1:
1992 case 3:
1993 return (k + 1) * 2;
1994 case 2:
1995 return (arg_ct->pair_index + 1) * 2 - 1;
1996 }
1997
1998 /* Finally, sort by decreasing register count. */
1999 assert(n > 1);
2000 return -n;
2001 }
2002
2003 /* sort from highest priority to lowest */
2004 static void sort_constraints(TCGOpDef *def, int start, int n)
2005 {
2006 int i, j;
2007 TCGArgConstraint *a = def->args_ct;
2008
2009 for (i = 0; i < n; i++) {
2010 a[start + i].sort_index = start + i;
2011 }
2012 if (n <= 1) {
2013 return;
2014 }
2015 for (i = 0; i < n - 1; i++) {
2016 for (j = i + 1; j < n; j++) {
2017 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2018 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2019 if (p1 < p2) {
2020 int tmp = a[start + i].sort_index;
2021 a[start + i].sort_index = a[start + j].sort_index;
2022 a[start + j].sort_index = tmp;
2023 }
2024 }
2025 }
2026 }
2027
2028 static void process_op_defs(TCGContext *s)
2029 {
2030 TCGOpcode op;
2031
2032 for (op = 0; op < NB_OPS; op++) {
2033 TCGOpDef *def = &tcg_op_defs[op];
2034 const TCGTargetOpDef *tdefs;
2035 bool saw_alias_pair = false;
2036 int i, o, i2, o2, nb_args;
2037
2038 if (def->flags & TCG_OPF_NOT_PRESENT) {
2039 continue;
2040 }
2041
2042 nb_args = def->nb_iargs + def->nb_oargs;
2043 if (nb_args == 0) {
2044 continue;
2045 }
2046
2047 /*
2048 * Macro magic should make it impossible, but double-check that
2049 * the array index is in range. Since the signness of an enum
2050 * is implementation defined, force the result to unsigned.
2051 */
2052 unsigned con_set = tcg_target_op_def(op);
2053 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2054 tdefs = &constraint_sets[con_set];
2055
2056 for (i = 0; i < nb_args; i++) {
2057 const char *ct_str = tdefs->args_ct_str[i];
2058 bool input_p = i >= def->nb_oargs;
2059
2060 /* Incomplete TCGTargetOpDef entry. */
2061 tcg_debug_assert(ct_str != NULL);
2062
2063 switch (*ct_str) {
2064 case '0' ... '9':
2065 o = *ct_str - '0';
2066 tcg_debug_assert(input_p);
2067 tcg_debug_assert(o < def->nb_oargs);
2068 tcg_debug_assert(def->args_ct[o].regs != 0);
2069 tcg_debug_assert(!def->args_ct[o].oalias);
2070 def->args_ct[i] = def->args_ct[o];
2071 /* The output sets oalias. */
2072 def->args_ct[o].oalias = 1;
2073 def->args_ct[o].alias_index = i;
2074 /* The input sets ialias. */
2075 def->args_ct[i].ialias = 1;
2076 def->args_ct[i].alias_index = o;
2077 if (def->args_ct[i].pair) {
2078 saw_alias_pair = true;
2079 }
2080 tcg_debug_assert(ct_str[1] == '\0');
2081 continue;
2082
2083 case '&':
2084 tcg_debug_assert(!input_p);
2085 def->args_ct[i].newreg = true;
2086 ct_str++;
2087 break;
2088
2089 case 'p': /* plus */
2090 /* Allocate to the register after the previous. */
2091 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2092 o = i - 1;
2093 tcg_debug_assert(!def->args_ct[o].pair);
2094 tcg_debug_assert(!def->args_ct[o].ct);
2095 def->args_ct[i] = (TCGArgConstraint){
2096 .pair = 2,
2097 .pair_index = o,
2098 .regs = def->args_ct[o].regs << 1,
2099 };
2100 def->args_ct[o].pair = 1;
2101 def->args_ct[o].pair_index = i;
2102 tcg_debug_assert(ct_str[1] == '\0');
2103 continue;
2104
2105 case 'm': /* minus */
2106 /* Allocate to the register before the previous. */
2107 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2108 o = i - 1;
2109 tcg_debug_assert(!def->args_ct[o].pair);
2110 tcg_debug_assert(!def->args_ct[o].ct);
2111 def->args_ct[i] = (TCGArgConstraint){
2112 .pair = 1,
2113 .pair_index = o,
2114 .regs = def->args_ct[o].regs >> 1,
2115 };
2116 def->args_ct[o].pair = 2;
2117 def->args_ct[o].pair_index = i;
2118 tcg_debug_assert(ct_str[1] == '\0');
2119 continue;
2120 }
2121
2122 do {
2123 switch (*ct_str) {
2124 case 'i':
2125 def->args_ct[i].ct |= TCG_CT_CONST;
2126 break;
2127
2128 /* Include all of the target-specific constraints. */
2129
2130 #undef CONST
2131 #define CONST(CASE, MASK) \
2132 case CASE: def->args_ct[i].ct |= MASK; break;
2133 #define REGS(CASE, MASK) \
2134 case CASE: def->args_ct[i].regs |= MASK; break;
2135
2136 #include "tcg-target-con-str.h"
2137
2138 #undef REGS
2139 #undef CONST
2140 default:
2141 case '0' ... '9':
2142 case '&':
2143 case 'p':
2144 case 'm':
2145 /* Typo in TCGTargetOpDef constraint. */
2146 g_assert_not_reached();
2147 }
2148 } while (*++ct_str != '\0');
2149 }
2150
2151 /* TCGTargetOpDef entry with too much information? */
2152 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2153
2154 /*
2155 * Fix up output pairs that are aliased with inputs.
2156 * When we created the alias, we copied pair from the output.
2157 * There are three cases:
2158 * (1a) Pairs of inputs alias pairs of outputs.
2159 * (1b) One input aliases the first of a pair of outputs.
2160 * (2) One input aliases the second of a pair of outputs.
2161 *
2162 * Case 1a is handled by making sure that the pair_index'es are
2163 * properly updated so that they appear the same as a pair of inputs.
2164 *
2165 * Case 1b is handled by setting the pair_index of the input to
2166 * itself, simply so it doesn't point to an unrelated argument.
2167 * Since we don't encounter the "second" during the input allocation
2168 * phase, nothing happens with the second half of the input pair.
2169 *
2170 * Case 2 is handled by setting the second input to pair=3, the
2171 * first output to pair=3, and the pair_index'es to match.
2172 */
2173 if (saw_alias_pair) {
2174 for (i = def->nb_oargs; i < nb_args; i++) {
2175 /*
2176 * Since [0-9pm] must be alone in the constraint string,
2177 * the only way they can both be set is if the pair comes
2178 * from the output alias.
2179 */
2180 if (!def->args_ct[i].ialias) {
2181 continue;
2182 }
2183 switch (def->args_ct[i].pair) {
2184 case 0:
2185 break;
2186 case 1:
2187 o = def->args_ct[i].alias_index;
2188 o2 = def->args_ct[o].pair_index;
2189 tcg_debug_assert(def->args_ct[o].pair == 1);
2190 tcg_debug_assert(def->args_ct[o2].pair == 2);
2191 if (def->args_ct[o2].oalias) {
2192 /* Case 1a */
2193 i2 = def->args_ct[o2].alias_index;
2194 tcg_debug_assert(def->args_ct[i2].pair == 2);
2195 def->args_ct[i2].pair_index = i;
2196 def->args_ct[i].pair_index = i2;
2197 } else {
2198 /* Case 1b */
2199 def->args_ct[i].pair_index = i;
2200 }
2201 break;
2202 case 2:
2203 o = def->args_ct[i].alias_index;
2204 o2 = def->args_ct[o].pair_index;
2205 tcg_debug_assert(def->args_ct[o].pair == 2);
2206 tcg_debug_assert(def->args_ct[o2].pair == 1);
2207 if (def->args_ct[o2].oalias) {
2208 /* Case 1a */
2209 i2 = def->args_ct[o2].alias_index;
2210 tcg_debug_assert(def->args_ct[i2].pair == 1);
2211 def->args_ct[i2].pair_index = i;
2212 def->args_ct[i].pair_index = i2;
2213 } else {
2214 /* Case 2 */
2215 def->args_ct[i].pair = 3;
2216 def->args_ct[o2].pair = 3;
2217 def->args_ct[i].pair_index = o2;
2218 def->args_ct[o2].pair_index = i;
2219 }
2220 break;
2221 default:
2222 g_assert_not_reached();
2223 }
2224 }
2225 }
2226
2227 /* sort the constraints (XXX: this is just an heuristic) */
2228 sort_constraints(def, 0, def->nb_oargs);
2229 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2230 }
2231 }
2232
2233 void tcg_op_remove(TCGContext *s, TCGOp *op)
2234 {
2235 TCGLabel *label;
2236
2237 switch (op->opc) {
2238 case INDEX_op_br:
2239 label = arg_label(op->args[0]);
2240 label->refs--;
2241 break;
2242 case INDEX_op_brcond_i32:
2243 case INDEX_op_brcond_i64:
2244 label = arg_label(op->args[3]);
2245 label->refs--;
2246 break;
2247 case INDEX_op_brcond2_i32:
2248 label = arg_label(op->args[5]);
2249 label->refs--;
2250 break;
2251 default:
2252 break;
2253 }
2254
2255 QTAILQ_REMOVE(&s->ops, op, link);
2256 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2257 s->nb_ops--;
2258
2259 #ifdef CONFIG_PROFILER
2260 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2261 #endif
2262 }
2263
2264 void tcg_remove_ops_after(TCGOp *op)
2265 {
2266 TCGContext *s = tcg_ctx;
2267
2268 while (true) {
2269 TCGOp *last = tcg_last_op();
2270 if (last == op) {
2271 return;
2272 }
2273 tcg_op_remove(s, last);
2274 }
2275 }
2276
2277 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2278 {
2279 TCGContext *s = tcg_ctx;
2280 TCGOp *op;
2281
2282 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2283 op = tcg_malloc(sizeof(TCGOp));
2284 } else {
2285 op = QTAILQ_FIRST(&s->free_ops);
2286 QTAILQ_REMOVE(&s->free_ops, op, link);
2287 }
2288 memset(op, 0, offsetof(TCGOp, link));
2289 op->opc = opc;
2290 s->nb_ops++;
2291
2292 return op;
2293 }
2294
2295 TCGOp *tcg_emit_op(TCGOpcode opc)
2296 {
2297 TCGOp *op = tcg_op_alloc(opc);
2298 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2299 return op;
2300 }
2301
2302 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2303 {
2304 TCGOp *new_op = tcg_op_alloc(opc);
2305 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2306 return new_op;
2307 }
2308
2309 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2310 {
2311 TCGOp *new_op = tcg_op_alloc(opc);
2312 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2313 return new_op;
2314 }
2315
2316 /* Reachable analysis : remove unreachable code. */
2317 static void reachable_code_pass(TCGContext *s)
2318 {
2319 TCGOp *op, *op_next;
2320 bool dead = false;
2321
2322 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2323 bool remove = dead;
2324 TCGLabel *label;
2325
2326 switch (op->opc) {
2327 case INDEX_op_set_label:
2328 label = arg_label(op->args[0]);
2329 if (label->refs == 0) {
2330 /*
2331 * While there is an occasional backward branch, virtually
2332 * all branches generated by the translators are forward.
2333 * Which means that generally we will have already removed
2334 * all references to the label that will be, and there is
2335 * little to be gained by iterating.
2336 */
2337 remove = true;
2338 } else {
2339 /* Once we see a label, insns become live again. */
2340 dead = false;
2341 remove = false;
2342
2343 /*
2344 * Optimization can fold conditional branches to unconditional.
2345 * If we find a label with one reference which is preceded by
2346 * an unconditional branch to it, remove both. This needed to
2347 * wait until the dead code in between them was removed.
2348 */
2349 if (label->refs == 1) {
2350 TCGOp *op_prev = QTAILQ_PREV(op, link);
2351 if (op_prev->opc == INDEX_op_br &&
2352 label == arg_label(op_prev->args[0])) {
2353 tcg_op_remove(s, op_prev);
2354 remove = true;
2355 }
2356 }
2357 }
2358 break;
2359
2360 case INDEX_op_br:
2361 case INDEX_op_exit_tb:
2362 case INDEX_op_goto_ptr:
2363 /* Unconditional branches; everything following is dead. */
2364 dead = true;
2365 break;
2366
2367 case INDEX_op_call:
2368 /* Notice noreturn helper calls, raising exceptions. */
2369 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2370 dead = true;
2371 }
2372 break;
2373
2374 case INDEX_op_insn_start:
2375 /* Never remove -- we need to keep these for unwind. */
2376 remove = false;
2377 break;
2378
2379 default:
2380 break;
2381 }
2382
2383 if (remove) {
2384 tcg_op_remove(s, op);
2385 }
2386 }
2387 }
2388
2389 #define TS_DEAD 1
2390 #define TS_MEM 2
2391
2392 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2393 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2394
2395 /* For liveness_pass_1, the register preferences for a given temp. */
2396 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2397 {
2398 return ts->state_ptr;
2399 }
2400
2401 /* For liveness_pass_1, reset the preferences for a given temp to the
2402 * maximal regset for its type.
2403 */
2404 static inline void la_reset_pref(TCGTemp *ts)
2405 {
2406 *la_temp_pref(ts)
2407 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2408 }
2409
2410 /* liveness analysis: end of function: all temps are dead, and globals
2411 should be in memory. */
2412 static void la_func_end(TCGContext *s, int ng, int nt)
2413 {
2414 int i;
2415
2416 for (i = 0; i < ng; ++i) {
2417 s->temps[i].state = TS_DEAD | TS_MEM;
2418 la_reset_pref(&s->temps[i]);
2419 }
2420 for (i = ng; i < nt; ++i) {
2421 s->temps[i].state = TS_DEAD;
2422 la_reset_pref(&s->temps[i]);
2423 }
2424 }
2425
2426 /* liveness analysis: end of basic block: all temps are dead, globals
2427 and local temps should be in memory. */
2428 static void la_bb_end(TCGContext *s, int ng, int nt)
2429 {
2430 int i;
2431
2432 for (i = 0; i < nt; ++i) {
2433 TCGTemp *ts = &s->temps[i];
2434 int state;
2435
2436 switch (ts->kind) {
2437 case TEMP_FIXED:
2438 case TEMP_GLOBAL:
2439 case TEMP_LOCAL:
2440 state = TS_DEAD | TS_MEM;
2441 break;
2442 case TEMP_NORMAL:
2443 case TEMP_EBB:
2444 case TEMP_CONST:
2445 state = TS_DEAD;
2446 break;
2447 default:
2448 g_assert_not_reached();
2449 }
2450 ts->state = state;
2451 la_reset_pref(ts);
2452 }
2453 }
2454
2455 /* liveness analysis: sync globals back to memory. */
2456 static void la_global_sync(TCGContext *s, int ng)
2457 {
2458 int i;
2459
2460 for (i = 0; i < ng; ++i) {
2461 int state = s->temps[i].state;
2462 s->temps[i].state = state | TS_MEM;
2463 if (state == TS_DEAD) {
2464 /* If the global was previously dead, reset prefs. */
2465 la_reset_pref(&s->temps[i]);
2466 }
2467 }
2468 }
2469
2470 /*
2471 * liveness analysis: conditional branch: all temps are dead unless
2472 * explicitly live-across-conditional-branch, globals and local temps
2473 * should be synced.
2474 */
2475 static void la_bb_sync(TCGContext *s, int ng, int nt)
2476 {
2477 la_global_sync(s, ng);
2478
2479 for (int i = ng; i < nt; ++i) {
2480 TCGTemp *ts = &s->temps[i];
2481 int state;
2482
2483 switch (ts->kind) {
2484 case TEMP_LOCAL:
2485 state = ts->state;
2486 ts->state = state | TS_MEM;
2487 if (state != TS_DEAD) {
2488 continue;
2489 }
2490 break;
2491 case TEMP_NORMAL:
2492 s->temps[i].state = TS_DEAD;
2493 break;
2494 case TEMP_EBB:
2495 case TEMP_CONST:
2496 continue;
2497 default:
2498 g_assert_not_reached();
2499 }
2500 la_reset_pref(&s->temps[i]);
2501 }
2502 }
2503
2504 /* liveness analysis: sync globals back to memory and kill. */
2505 static void la_global_kill(TCGContext *s, int ng)
2506 {
2507 int i;
2508
2509 for (i = 0; i < ng; i++) {
2510 s->temps[i].state = TS_DEAD | TS_MEM;
2511 la_reset_pref(&s->temps[i]);
2512 }
2513 }
2514
2515 /* liveness analysis: note live globals crossing calls. */
2516 static void la_cross_call(TCGContext *s, int nt)
2517 {
2518 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2519 int i;
2520
2521 for (i = 0; i < nt; i++) {
2522 TCGTemp *ts = &s->temps[i];
2523 if (!(ts->state & TS_DEAD)) {
2524 TCGRegSet *pset = la_temp_pref(ts);
2525 TCGRegSet set = *pset;
2526
2527 set &= mask;
2528 /* If the combination is not possible, restart. */
2529 if (set == 0) {
2530 set = tcg_target_available_regs[ts->type] & mask;
2531 }
2532 *pset = set;
2533 }
2534 }
2535 }
2536
2537 /* Liveness analysis : update the opc_arg_life array to tell if a
2538 given input arguments is dead. Instructions updating dead
2539 temporaries are removed. */
2540 static void liveness_pass_1(TCGContext *s)
2541 {
2542 int nb_globals = s->nb_globals;
2543 int nb_temps = s->nb_temps;
2544 TCGOp *op, *op_prev;
2545 TCGRegSet *prefs;
2546 int i;
2547
2548 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2549 for (i = 0; i < nb_temps; ++i) {
2550 s->temps[i].state_ptr = prefs + i;
2551 }
2552
2553 /* ??? Should be redundant with the exit_tb that ends the TB. */
2554 la_func_end(s, nb_globals, nb_temps);
2555
2556 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2557 int nb_iargs, nb_oargs;
2558 TCGOpcode opc_new, opc_new2;
2559 bool have_opc_new2;
2560 TCGLifeData arg_life = 0;
2561 TCGTemp *ts;
2562 TCGOpcode opc = op->opc;
2563 const TCGOpDef *def = &tcg_op_defs[opc];
2564
2565 switch (opc) {
2566 case INDEX_op_call:
2567 {
2568 int call_flags;
2569 int nb_call_regs;
2570
2571 nb_oargs = TCGOP_CALLO(op);
2572 nb_iargs = TCGOP_CALLI(op);
2573 call_flags = tcg_call_flags(op);
2574
2575 /* pure functions can be removed if their result is unused */
2576 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2577 for (i = 0; i < nb_oargs; i++) {
2578 ts = arg_temp(op->args[i]);
2579 if (ts->state != TS_DEAD) {
2580 goto do_not_remove_call;
2581 }
2582 }
2583 goto do_remove;
2584 }
2585 do_not_remove_call:
2586
2587 /* Output args are dead. */
2588 for (i = 0; i < nb_oargs; i++) {
2589 ts = arg_temp(op->args[i]);
2590 if (ts->state & TS_DEAD) {
2591 arg_life |= DEAD_ARG << i;
2592 }
2593 if (ts->state & TS_MEM) {
2594 arg_life |= SYNC_ARG << i;
2595 }
2596 ts->state = TS_DEAD;
2597 la_reset_pref(ts);
2598
2599 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2600 op->output_pref[i] = 0;
2601 }
2602
2603 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2604 TCG_CALL_NO_READ_GLOBALS))) {
2605 la_global_kill(s, nb_globals);
2606 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2607 la_global_sync(s, nb_globals);
2608 }
2609
2610 /* Record arguments that die in this helper. */
2611 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2612 ts = arg_temp(op->args[i]);
2613 if (ts && ts->state & TS_DEAD) {
2614 arg_life |= DEAD_ARG << i;
2615 }
2616 }
2617
2618 /* For all live registers, remove call-clobbered prefs. */
2619 la_cross_call(s, nb_temps);
2620
2621 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2622
2623 /* Input arguments are live for preceding opcodes. */
2624 for (i = 0; i < nb_iargs; i++) {
2625 ts = arg_temp(op->args[i + nb_oargs]);
2626 if (ts && ts->state & TS_DEAD) {
2627 /* For those arguments that die, and will be allocated
2628 * in registers, clear the register set for that arg,
2629 * to be filled in below. For args that will be on
2630 * the stack, reset to any available reg.
2631 */
2632 *la_temp_pref(ts)
2633 = (i < nb_call_regs ? 0 :
2634 tcg_target_available_regs[ts->type]);
2635 ts->state &= ~TS_DEAD;
2636 }
2637 }
2638
2639 /* For each input argument, add its input register to prefs.
2640 If a temp is used once, this produces a single set bit. */
2641 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2642 ts = arg_temp(op->args[i + nb_oargs]);
2643 if (ts) {
2644 tcg_regset_set_reg(*la_temp_pref(ts),
2645 tcg_target_call_iarg_regs[i]);
2646 }
2647 }
2648 }
2649 break;
2650 case INDEX_op_insn_start:
2651 break;
2652 case INDEX_op_discard:
2653 /* mark the temporary as dead */
2654 ts = arg_temp(op->args[0]);
2655 ts->state = TS_DEAD;
2656 la_reset_pref(ts);
2657 break;
2658
2659 case INDEX_op_add2_i32:
2660 opc_new = INDEX_op_add_i32;
2661 goto do_addsub2;
2662 case INDEX_op_sub2_i32:
2663 opc_new = INDEX_op_sub_i32;
2664 goto do_addsub2;
2665 case INDEX_op_add2_i64:
2666 opc_new = INDEX_op_add_i64;
2667 goto do_addsub2;
2668 case INDEX_op_sub2_i64:
2669 opc_new = INDEX_op_sub_i64;
2670 do_addsub2:
2671 nb_iargs = 4;
2672 nb_oargs = 2;
2673 /* Test if the high part of the operation is dead, but not
2674 the low part. The result can be optimized to a simple
2675 add or sub. This happens often for x86_64 guest when the
2676 cpu mode is set to 32 bit. */
2677 if (arg_temp(op->args[1])->state == TS_DEAD) {
2678 if (arg_temp(op->args[0])->state == TS_DEAD) {
2679 goto do_remove;
2680 }
2681 /* Replace the opcode and adjust the args in place,
2682 leaving 3 unused args at the end. */
2683 op->opc = opc = opc_new;
2684 op->args[1] = op->args[2];
2685 op->args[2] = op->args[4];
2686 /* Fall through and mark the single-word operation live. */
2687 nb_iargs = 2;
2688 nb_oargs = 1;
2689 }
2690 goto do_not_remove;
2691
2692 case INDEX_op_mulu2_i32:
2693 opc_new = INDEX_op_mul_i32;
2694 opc_new2 = INDEX_op_muluh_i32;
2695 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2696 goto do_mul2;
2697 case INDEX_op_muls2_i32:
2698 opc_new = INDEX_op_mul_i32;
2699 opc_new2 = INDEX_op_mulsh_i32;
2700 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2701 goto do_mul2;
2702 case INDEX_op_mulu2_i64:
2703 opc_new = INDEX_op_mul_i64;
2704 opc_new2 = INDEX_op_muluh_i64;
2705 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2706 goto do_mul2;
2707 case INDEX_op_muls2_i64:
2708 opc_new = INDEX_op_mul_i64;
2709 opc_new2 = INDEX_op_mulsh_i64;
2710 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2711 goto do_mul2;
2712 do_mul2:
2713 nb_iargs = 2;
2714 nb_oargs = 2;
2715 if (arg_temp(op->args[1])->state == TS_DEAD) {
2716 if (arg_temp(op->args[0])->state == TS_DEAD) {
2717 /* Both parts of the operation are dead. */
2718 goto do_remove;
2719 }
2720 /* The high part of the operation is dead; generate the low. */
2721 op->opc = opc = opc_new;
2722 op->args[1] = op->args[2];
2723 op->args[2] = op->args[3];
2724 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2725 /* The low part of the operation is dead; generate the high. */
2726 op->opc = opc = opc_new2;
2727 op->args[0] = op->args[1];
2728 op->args[1] = op->args[2];
2729 op->args[2] = op->args[3];
2730 } else {
2731 goto do_not_remove;
2732 }
2733 /* Mark the single-word operation live. */
2734 nb_oargs = 1;
2735 goto do_not_remove;
2736
2737 default:
2738 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2739 nb_iargs = def->nb_iargs;
2740 nb_oargs = def->nb_oargs;
2741
2742 /* Test if the operation can be removed because all
2743 its outputs are dead. We assume that nb_oargs == 0
2744 implies side effects */
2745 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2746 for (i = 0; i < nb_oargs; i++) {
2747 if (arg_temp(op->args[i])->state != TS_DEAD) {
2748 goto do_not_remove;
2749 }
2750 }
2751 goto do_remove;
2752 }
2753 goto do_not_remove;
2754
2755 do_remove:
2756 tcg_op_remove(s, op);
2757 break;
2758
2759 do_not_remove:
2760 for (i = 0; i < nb_oargs; i++) {
2761 ts = arg_temp(op->args[i]);
2762
2763 /* Remember the preference of the uses that followed. */
2764 op->output_pref[i] = *la_temp_pref(ts);
2765
2766 /* Output args are dead. */
2767 if (ts->state & TS_DEAD) {
2768 arg_life |= DEAD_ARG << i;
2769 }
2770 if (ts->state & TS_MEM) {
2771 arg_life |= SYNC_ARG << i;
2772 }
2773 ts->state = TS_DEAD;
2774 la_reset_pref(ts);
2775 }
2776
2777 /* If end of basic block, update. */
2778 if (def->flags & TCG_OPF_BB_EXIT) {
2779 la_func_end(s, nb_globals, nb_temps);
2780 } else if (def->flags & TCG_OPF_COND_BRANCH) {
2781 la_bb_sync(s, nb_globals, nb_temps);
2782 } else if (def->flags & TCG_OPF_BB_END) {
2783 la_bb_end(s, nb_globals, nb_temps);
2784 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2785 la_global_sync(s, nb_globals);
2786 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2787 la_cross_call(s, nb_temps);
2788 }
2789 }
2790
2791 /* Record arguments that die in this opcode. */
2792 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2793 ts = arg_temp(op->args[i]);
2794 if (ts->state & TS_DEAD) {
2795 arg_life |= DEAD_ARG << i;
2796 }
2797 }
2798
2799 /* Input arguments are live for preceding opcodes. */
2800 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2801 ts = arg_temp(op->args[i]);
2802 if (ts->state & TS_DEAD) {
2803 /* For operands that were dead, initially allow
2804 all regs for the type. */
2805 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2806 ts->state &= ~TS_DEAD;
2807 }
2808 }
2809
2810 /* Incorporate constraints for this operand. */
2811 switch (opc) {
2812 case INDEX_op_mov_i32:
2813 case INDEX_op_mov_i64:
2814 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2815 have proper constraints. That said, special case
2816 moves to propagate preferences backward. */
2817 if (IS_DEAD_ARG(1)) {
2818 *la_temp_pref(arg_temp(op->args[0]))
2819 = *la_temp_pref(arg_temp(op->args[1]));
2820 }
2821 break;
2822
2823 default:
2824 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2825 const TCGArgConstraint *ct = &def->args_ct[i];
2826 TCGRegSet set, *pset;
2827
2828 ts = arg_temp(op->args[i]);
2829 pset = la_temp_pref(ts);
2830 set = *pset;
2831
2832 set &= ct->regs;
2833 if (ct->ialias) {
2834 set &= op->output_pref[ct->alias_index];
2835 }
2836 /* If the combination is not possible, restart. */
2837 if (set == 0) {
2838 set = ct->regs;
2839 }
2840 *pset = set;
2841 }
2842 break;
2843 }
2844 break;
2845 }
2846 op->life = arg_life;
2847 }
2848 }
2849
2850 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2851 static bool liveness_pass_2(TCGContext *s)
2852 {
2853 int nb_globals = s->nb_globals;
2854 int nb_temps, i;
2855 bool changes = false;
2856 TCGOp *op, *op_next;
2857
2858 /* Create a temporary for each indirect global. */
2859 for (i = 0; i < nb_globals; ++i) {
2860 TCGTemp *its = &s->temps[i];
2861 if (its->indirect_reg) {
2862 TCGTemp *dts = tcg_temp_alloc(s);
2863 dts->type = its->type;
2864 dts->base_type = its->base_type;
2865 dts->kind = TEMP_EBB;
2866 its->state_ptr = dts;
2867 } else {
2868 its->state_ptr = NULL;
2869 }
2870 /* All globals begin dead. */
2871 its->state = TS_DEAD;
2872 }
2873 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2874 TCGTemp *its = &s->temps[i];
2875 its->state_ptr = NULL;
2876 its->state = TS_DEAD;
2877 }
2878
2879 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2880 TCGOpcode opc = op->opc;
2881 const TCGOpDef *def = &tcg_op_defs[opc];
2882 TCGLifeData arg_life = op->life;
2883 int nb_iargs, nb_oargs, call_flags;
2884 TCGTemp *arg_ts, *dir_ts;
2885
2886 if (opc == INDEX_op_call) {
2887 nb_oargs = TCGOP_CALLO(op);
2888 nb_iargs = TCGOP_CALLI(op);
2889 call_flags = tcg_call_flags(op);
2890 } else {
2891 nb_iargs = def->nb_iargs;
2892 nb_oargs = def->nb_oargs;
2893
2894 /* Set flags similar to how calls require. */
2895 if (def->flags & TCG_OPF_COND_BRANCH) {
2896 /* Like reading globals: sync_globals */
2897 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2898 } else if (def->flags & TCG_OPF_BB_END) {
2899 /* Like writing globals: save_globals */
2900 call_flags = 0;
2901 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2902 /* Like reading globals: sync_globals */
2903 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2904 } else {
2905 /* No effect on globals. */
2906 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2907 TCG_CALL_NO_WRITE_GLOBALS);
2908 }
2909 }
2910
2911 /* Make sure that input arguments are available. */
2912 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2913 arg_ts = arg_temp(op->args[i]);
2914 if (arg_ts) {
2915 dir_ts = arg_ts->state_ptr;
2916 if (dir_ts && arg_ts->state == TS_DEAD) {
2917 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2918 ? INDEX_op_ld_i32
2919 : INDEX_op_ld_i64);
2920 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2921
2922 lop->args[0] = temp_arg(dir_ts);
2923 lop->args[1] = temp_arg(arg_ts->mem_base);
2924 lop->args[2] = arg_ts->mem_offset;
2925
2926 /* Loaded, but synced with memory. */
2927 arg_ts->state = TS_MEM;
2928 }
2929 }
2930 }
2931
2932 /* Perform input replacement, and mark inputs that became dead.
2933 No action is required except keeping temp_state up to date
2934 so that we reload when needed. */
2935 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2936 arg_ts = arg_temp(op->args[i]);
2937 if (arg_ts) {
2938 dir_ts = arg_ts->state_ptr;
2939 if (dir_ts) {
2940 op->args[i] = temp_arg(dir_ts);
2941 changes = true;
2942 if (IS_DEAD_ARG(i)) {
2943 arg_ts->state = TS_DEAD;
2944 }
2945 }
2946 }
2947 }
2948
2949 /* Liveness analysis should ensure that the following are
2950 all correct, for call sites and basic block end points. */
2951 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2952 /* Nothing to do */
2953 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2954 for (i = 0; i < nb_globals; ++i) {
2955 /* Liveness should see that globals are synced back,
2956 that is, either TS_DEAD or TS_MEM. */
2957 arg_ts = &s->temps[i];
2958 tcg_debug_assert(arg_ts->state_ptr == 0
2959 || arg_ts->state != 0);
2960 }
2961 } else {
2962 for (i = 0; i < nb_globals; ++i) {
2963 /* Liveness should see that globals are saved back,
2964 that is, TS_DEAD, waiting to be reloaded. */
2965 arg_ts = &s->temps[i];
2966 tcg_debug_assert(arg_ts->state_ptr == 0
2967 || arg_ts->state == TS_DEAD);
2968 }
2969 }
2970
2971 /* Outputs become available. */
2972 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2973 arg_ts = arg_temp(op->args[0]);
2974 dir_ts = arg_ts->state_ptr;
2975 if (dir_ts) {
2976 op->args[0] = temp_arg(dir_ts);
2977 changes = true;
2978
2979 /* The output is now live and modified. */
2980 arg_ts->state = 0;
2981
2982 if (NEED_SYNC_ARG(0)) {
2983 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2984 ? INDEX_op_st_i32
2985 : INDEX_op_st_i64);
2986 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2987 TCGTemp *out_ts = dir_ts;
2988
2989 if (IS_DEAD_ARG(0)) {
2990 out_ts = arg_temp(op->args[1]);
2991 arg_ts->state = TS_DEAD;
2992 tcg_op_remove(s, op);
2993 } else {
2994 arg_ts->state = TS_MEM;
2995 }
2996
2997 sop->args[0] = temp_arg(out_ts);
2998 sop->args[1] = temp_arg(arg_ts->mem_base);
2999 sop->args[2] = arg_ts->mem_offset;
3000 } else {
3001 tcg_debug_assert(!IS_DEAD_ARG(0));
3002 }
3003 }
3004 } else {
3005 for (i = 0; i < nb_oargs; i++) {
3006 arg_ts = arg_temp(op->args[i]);
3007 dir_ts = arg_ts->state_ptr;
3008 if (!dir_ts) {
3009 continue;
3010 }
3011 op->args[i] = temp_arg(dir_ts);
3012 changes = true;
3013
3014 /* The output is now live and modified. */
3015 arg_ts->state = 0;
3016
3017 /* Sync outputs upon their last write. */
3018 if (NEED_SYNC_ARG(i)) {
3019 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3020 ? INDEX_op_st_i32
3021 : INDEX_op_st_i64);
3022 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3023
3024 sop->args[0] = temp_arg(dir_ts);
3025 sop->args[1] = temp_arg(arg_ts->mem_base);
3026 sop->args[2] = arg_ts->mem_offset;
3027
3028 arg_ts->state = TS_MEM;
3029 }
3030 /* Drop outputs that are dead. */
3031 if (IS_DEAD_ARG(i)) {
3032 arg_ts->state = TS_DEAD;
3033 }
3034 }
3035 }
3036 }
3037
3038 return changes;
3039 }
3040
3041 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3042 {
3043 intptr_t off, size, align;
3044
3045 switch (ts->type) {
3046 case TCG_TYPE_I32:
3047 size = align = 4;
3048 break;
3049 case TCG_TYPE_I64:
3050 case TCG_TYPE_V64:
3051 size = align = 8;
3052 break;
3053 case TCG_TYPE_V128:
3054 size = align = 16;
3055 break;
3056 case TCG_TYPE_V256:
3057 /* Note that we do not require aligned storage for V256. */
3058 size = 32, align = 16;
3059 break;
3060 default:
3061 g_assert_not_reached();
3062 }
3063
3064 /*
3065 * Assume the stack is sufficiently aligned.
3066 * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3067 * and do not require 16 byte vector alignment. This seems slightly
3068 * easier than fully parameterizing the above switch statement.
3069 */
3070 align = MIN(TCG_TARGET_STACK_ALIGN, align);
3071 off = ROUND_UP(s->current_frame_offset, align);
3072
3073 /* If we've exhausted the stack frame, restart with a smaller TB. */
3074 if (off + size > s->frame_end) {
3075 tcg_raise_tb_overflow(s);
3076 }
3077 s->current_frame_offset = off + size;
3078
3079 ts->mem_offset = off;
3080 #if defined(__sparc__)
3081 ts->mem_offset += TCG_TARGET_STACK_BIAS;
3082 #endif
3083 ts->mem_base = s->frame_temp;
3084 ts->mem_allocated = 1;
3085 }
3086
3087 /* Assign @reg to @ts, and update reg_to_temp[]. */
3088 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3089 {
3090 if (ts->val_type == TEMP_VAL_REG) {
3091 TCGReg old = ts->reg;
3092 tcg_debug_assert(s->reg_to_temp[old] == ts);
3093 if (old == reg) {
3094 return;
3095 }
3096 s->reg_to_temp[old] = NULL;
3097 }
3098 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3099 s->reg_to_temp[reg] = ts;
3100 ts->val_type = TEMP_VAL_REG;
3101 ts->reg = reg;
3102 }
3103
3104 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3105 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3106 {
3107 tcg_debug_assert(type != TEMP_VAL_REG);
3108 if (ts->val_type == TEMP_VAL_REG) {
3109 TCGReg reg = ts->reg;
3110 tcg_debug_assert(s->reg_to_temp[reg] == ts);
3111 s->reg_to_temp[reg] = NULL;
3112 }
3113 ts->val_type = type;
3114 }
3115
3116 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3117
3118 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3119 mark it free; otherwise mark it dead. */
3120 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3121 {
3122 TCGTempVal new_type;
3123
3124 switch (ts->kind) {
3125 case TEMP_FIXED:
3126 return;
3127 case TEMP_GLOBAL:
3128 case TEMP_LOCAL:
3129 new_type = TEMP_VAL_MEM;
3130 break;
3131 case TEMP_NORMAL:
3132 case TEMP_EBB:
3133 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3134 break;
3135 case TEMP_CONST:
3136 new_type = TEMP_VAL_CONST;
3137 break;
3138 default:
3139 g_assert_not_reached();
3140 }
3141 set_temp_val_nonreg(s, ts, new_type);
3142 }
3143
3144 /* Mark a temporary as dead. */
3145 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3146 {
3147 temp_free_or_dead(s, ts, 1);
3148 }
3149
3150 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3151 registers needs to be allocated to store a constant. If 'free_or_dead'
3152 is non-zero, subsequently release the temporary; if it is positive, the
3153 temp is dead; if it is negative, the temp is free. */
3154 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3155 TCGRegSet preferred_regs, int free_or_dead)
3156 {
3157 if (!temp_readonly(ts) && !ts->mem_coherent) {
3158 if (!ts->mem_allocated) {
3159 temp_allocate_frame(s, ts);
3160 }
3161 switch (ts->val_type) {
3162 case TEMP_VAL_CONST:
3163 /* If we're going to free the temp immediately, then we won't
3164 require it later in a register, so attempt to store the
3165 constant to memory directly. */
3166 if (free_or_dead
3167 && tcg_out_sti(s, ts->type, ts->val,
3168 ts->mem_base->reg, ts->mem_offset)) {
3169 break;
3170 }
3171 temp_load(s, ts, tcg_target_available_regs[ts->type],
3172 allocated_regs, preferred_regs);
3173 /* fallthrough */
3174
3175 case TEMP_VAL_REG:
3176 tcg_out_st(s, ts->type, ts->reg,
3177 ts->mem_base->reg, ts->mem_offset);
3178 break;
3179
3180 case TEMP_VAL_MEM:
3181 break;
3182
3183 case TEMP_VAL_DEAD:
3184 default:
3185 tcg_abort();
3186 }
3187 ts->mem_coherent = 1;
3188 }
3189 if (free_or_dead) {
3190 temp_free_or_dead(s, ts, free_or_dead);
3191 }
3192 }
3193
3194 /* free register 'reg' by spilling the corresponding temporary if necessary */
3195 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3196 {
3197 TCGTemp *ts = s->reg_to_temp[reg];
3198 if (ts != NULL) {
3199 temp_sync(s, ts, allocated_regs, 0, -1);
3200 }
3201 }
3202
3203 /**
3204 * tcg_reg_alloc:
3205 * @required_regs: Set of registers in which we must allocate.
3206 * @allocated_regs: Set of registers which must be avoided.
3207 * @preferred_regs: Set of registers we should prefer.
3208 * @rev: True if we search the registers in "indirect" order.
3209 *
3210 * The allocated register must be in @required_regs & ~@allocated_regs,
3211 * but if we can put it in @preferred_regs we may save a move later.
3212 */
3213 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3214 TCGRegSet allocated_regs,
3215 TCGRegSet preferred_regs, bool rev)
3216 {
3217 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3218 TCGRegSet reg_ct[2];
3219 const int *order;
3220
3221 reg_ct[1] = required_regs & ~allocated_regs;
3222 tcg_debug_assert(reg_ct[1] != 0);
3223 reg_ct[0] = reg_ct[1] & preferred_regs;
3224
3225 /* Skip the preferred_regs option if it cannot be satisfied,
3226 or if the preference made no difference. */
3227 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3228
3229 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3230
3231 /* Try free registers, preferences first. */
3232 for (j = f; j < 2; j++) {
3233 TCGRegSet set = reg_ct[j];
3234
3235 if (tcg_regset_single(set)) {
3236 /* One register in the set. */
3237 TCGReg reg = tcg_regset_first(set);
3238 if (s->reg_to_temp[reg] == NULL) {
3239 return reg;
3240 }
3241 } else {
3242 for (i = 0; i < n; i++) {
3243 TCGReg reg = order[i];
3244 if (s->reg_to_temp[reg] == NULL &&
3245 tcg_regset_test_reg(set, reg)) {
3246 return reg;
3247 }
3248 }
3249 }
3250 }
3251
3252 /* We must spill something. */
3253 for (j = f; j < 2; j++) {
3254 TCGRegSet set = reg_ct[j];
3255
3256 if (tcg_regset_single(set)) {
3257 /* One register in the set. */
3258 TCGReg reg = tcg_regset_first(set);
3259 tcg_reg_free(s, reg, allocated_regs);
3260 return reg;
3261 } else {
3262 for (i = 0; i < n; i++) {
3263 TCGReg reg = order[i];
3264 if (tcg_regset_test_reg(set, reg)) {
3265 tcg_reg_free(s, reg, allocated_regs);
3266 return reg;
3267 }
3268 }
3269 }
3270 }
3271
3272 tcg_abort();
3273 }
3274
3275 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3276 TCGRegSet allocated_regs,
3277 TCGRegSet preferred_regs, bool rev)
3278 {
3279 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3280 TCGRegSet reg_ct[2];
3281 const int *order;
3282
3283 /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3284 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3285 tcg_debug_assert(reg_ct[1] != 0);
3286 reg_ct[0] = reg_ct[1] & preferred_regs;
3287
3288 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3289
3290 /*
3291 * Skip the preferred_regs option if it cannot be satisfied,
3292 * or if the preference made no difference.
3293 */
3294 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3295
3296 /*
3297 * Minimize the number of flushes by looking for 2 free registers first,
3298 * then a single flush, then two flushes.
3299 */
3300 for (fmin = 2; fmin >= 0; fmin--) {
3301 for (j = k; j < 2; j++) {
3302 TCGRegSet set = reg_ct[j];
3303
3304 for (i = 0; i < n; i++) {
3305 TCGReg reg = order[i];
3306
3307 if (tcg_regset_test_reg(set, reg)) {
3308 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3309 if (f >= fmin) {
3310 tcg_reg_free(s, reg, allocated_regs);
3311 tcg_reg_free(s, reg + 1, allocated_regs);
3312 return reg;
3313 }
3314 }
3315 }
3316 }
3317 }
3318 tcg_abort();
3319 }
3320
3321 /* Make sure the temporary is in a register. If needed, allocate the register
3322 from DESIRED while avoiding ALLOCATED. */
3323 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3324 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3325 {
3326 TCGReg reg;
3327
3328 switch (ts->val_type) {
3329 case TEMP_VAL_REG:
3330 return;
3331 case TEMP_VAL_CONST:
3332 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3333 preferred_regs, ts->indirect_base);
3334 if (ts->type <= TCG_TYPE_I64) {
3335 tcg_out_movi(s, ts->type, reg, ts->val);
3336 } else {
3337 uint64_t val = ts->val;
3338 MemOp vece = MO_64;
3339
3340 /*
3341 * Find the minimal vector element that matches the constant.
3342 * The targets will, in general, have to do this search anyway,
3343 * do this generically.
3344 */
3345 if (val == dup_const(MO_8, val)) {
3346 vece = MO_8;
3347 } else if (val == dup_const(MO_16, val)) {
3348 vece = MO_16;
3349 } else if (val == dup_const(MO_32, val)) {
3350 vece = MO_32;
3351 }
3352
3353 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3354 }
3355 ts->mem_coherent = 0;
3356 break;
3357 case TEMP_VAL_MEM:
3358 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3359 preferred_regs, ts->indirect_base);
3360 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3361 ts->mem_coherent = 1;
3362 break;
3363 case TEMP_VAL_DEAD:
3364 default:
3365 tcg_abort();
3366 }
3367 set_temp_val_reg(s, ts, reg);
3368 }
3369
3370 /* Save a temporary to memory. 'allocated_regs' is used in case a
3371 temporary registers needs to be allocated to store a constant. */
3372 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3373 {
3374 /* The liveness analysis already ensures that globals are back
3375 in memory. Keep an tcg_debug_assert for safety. */
3376 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3377 }
3378
3379 /* save globals to their canonical location and assume they can be
3380 modified be the following code. 'allocated_regs' is used in case a
3381 temporary registers needs to be allocated to store a constant. */
3382 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3383 {
3384 int i, n;
3385
3386 for (i = 0, n = s->nb_globals; i < n; i++) {
3387 temp_save(s, &s->temps[i], allocated_regs);
3388 }
3389 }
3390
3391 /* sync globals to their canonical location and assume they can be
3392 read by the following code. 'allocated_regs' is used in case a
3393 temporary registers needs to be allocated to store a constant. */
3394 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3395 {
3396 int i, n;
3397
3398 for (i = 0, n = s->nb_globals; i < n; i++) {
3399 TCGTemp *ts = &s->temps[i];
3400 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3401 || ts->kind == TEMP_FIXED
3402 || ts->mem_coherent);
3403 }
3404 }
3405
3406 /* at the end of a basic block, we assume all temporaries are dead and
3407 all globals are stored at their canonical location. */
3408 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3409 {
3410 int i;
3411
3412 for (i = s->nb_globals; i < s->nb_temps; i++) {
3413 TCGTemp *ts = &s->temps[i];
3414
3415 switch (ts->kind) {
3416 case TEMP_LOCAL:
3417 temp_save(s, ts, allocated_regs);
3418 break;
3419 case TEMP_NORMAL:
3420 case TEMP_EBB:
3421 /* The liveness analysis already ensures that temps are dead.
3422 Keep an tcg_debug_assert for safety. */
3423 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3424 break;
3425 case TEMP_CONST:
3426 /* Similarly, we should have freed any allocated register. */
3427 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3428 break;
3429 default:
3430 g_assert_not_reached();
3431 }
3432 }
3433
3434 save_globals(s, allocated_regs);
3435 }
3436
3437 /*
3438 * At a conditional branch, we assume all temporaries are dead unless
3439 * explicitly live-across-conditional-branch; all globals and local
3440 * temps are synced to their location.
3441 */
3442 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3443 {
3444 sync_globals(s, allocated_regs);
3445
3446 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3447 TCGTemp *ts = &s->temps[i];
3448 /*
3449 * The liveness analysis already ensures that temps are dead.
3450 * Keep tcg_debug_asserts for safety.
3451 */
3452 switch (ts->kind) {
3453 case TEMP_LOCAL:
3454 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3455 break;
3456 case TEMP_NORMAL:
3457 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3458 break;
3459 case TEMP_EBB:
3460 case TEMP_CONST:
3461 break;
3462 default:
3463 g_assert_not_reached();
3464 }
3465 }
3466 }
3467
3468 /*
3469 * Specialized code generation for INDEX_op_mov_* with a constant.
3470 */
3471 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3472 tcg_target_ulong val, TCGLifeData arg_life,
3473 TCGRegSet preferred_regs)
3474 {
3475 /* ENV should not be modified. */
3476 tcg_debug_assert(!temp_readonly(ots));
3477
3478 /* The movi is not explicitly generated here. */
3479 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3480 ots->val = val;
3481 ots->mem_coherent = 0;
3482 if (NEED_SYNC_ARG(0)) {
3483 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3484 } else if (IS_DEAD_ARG(0)) {
3485 temp_dead(s, ots);
3486 }
3487 }
3488
3489 /*
3490 * Specialized code generation for INDEX_op_mov_*.
3491 */
3492 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3493 {
3494 const TCGLifeData arg_life = op->life;
3495 TCGRegSet allocated_regs, preferred_regs;
3496 TCGTemp *ts, *ots;
3497 TCGType otype, itype;
3498 TCGReg oreg, ireg;
3499
3500 allocated_regs = s->reserved_regs;
3501 preferred_regs = op->output_pref[0];
3502 ots = arg_temp(op->args[0]);
3503 ts = arg_temp(op->args[1]);
3504
3505 /* ENV should not be modified. */
3506 tcg_debug_assert(!temp_readonly(ots));
3507
3508 /* Note that otype != itype for no-op truncation. */
3509 otype = ots->type;
3510 itype = ts->type;
3511
3512 if (ts->val_type == TEMP_VAL_CONST) {
3513 /* propagate constant or generate sti */
3514 tcg_target_ulong val = ts->val;
3515 if (IS_DEAD_ARG(1)) {
3516 temp_dead(s, ts);
3517 }
3518 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3519 return;
3520 }
3521
3522 /* If the source value is in memory we're going to be forced
3523 to have it in a register in order to perform the copy. Copy
3524 the SOURCE value into its own register first, that way we
3525 don't have to reload SOURCE the next time it is used. */
3526 if (ts->val_type == TEMP_VAL_MEM) {
3527 temp_load(s, ts, tcg_target_available_regs[itype],
3528 allocated_regs, preferred_regs);
3529 }
3530 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3531 ireg = ts->reg;
3532
3533 if (IS_DEAD_ARG(0)) {
3534 /* mov to a non-saved dead register makes no sense (even with
3535 liveness analysis disabled). */
3536 tcg_debug_assert(NEED_SYNC_ARG(0));
3537 if (!ots->mem_allocated) {
3538 temp_allocate_frame(s, ots);
3539 }
3540 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3541 if (IS_DEAD_ARG(1)) {
3542 temp_dead(s, ts);
3543 }
3544 temp_dead(s, ots);
3545 return;
3546 }
3547
3548 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3549 /*
3550 * The mov can be suppressed. Kill input first, so that it
3551 * is unlinked from reg_to_temp, then set the output to the
3552 * reg that we saved from the input.
3553 */
3554 temp_dead(s, ts);
3555 oreg = ireg;
3556 } else {
3557 if (ots->val_type == TEMP_VAL_REG) {
3558 oreg = ots->reg;
3559 } else {
3560 /* Make sure to not spill the input register during allocation. */
3561 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3562 allocated_regs | ((TCGRegSet)1 << ireg),
3563 preferred_regs, ots->indirect_base);
3564 }
3565 if (!tcg_out_mov(s, otype, oreg, ireg)) {
3566 /*
3567 * Cross register class move not supported.
3568 * Store the source register into the destination slot
3569 * and leave the destination temp as TEMP_VAL_MEM.
3570 */
3571 assert(!temp_readonly(ots));
3572 if (!ts->mem_allocated) {
3573 temp_allocate_frame(s, ots);
3574 }
3575 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
3576 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
3577 ots->mem_coherent = 1;
3578 return;
3579 }
3580 }
3581 set_temp_val_reg(s, ots, oreg);
3582 ots->mem_coherent = 0;
3583
3584 if (NEED_SYNC_ARG(0)) {
3585 temp_sync(s, ots, allocated_regs, 0, 0);
3586 }
3587 }
3588
3589 /*
3590 * Specialized code generation for INDEX_op_dup_vec.
3591 */
3592 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3593 {
3594 const TCGLifeData arg_life = op->life;
3595 TCGRegSet dup_out_regs, dup_in_regs;
3596 TCGTemp *its, *ots;
3597 TCGType itype, vtype;
3598 intptr_t endian_fixup;
3599 unsigned vece;
3600 bool ok;
3601
3602 ots = arg_temp(op->args[0]);
3603 its = arg_temp(op->args[1]);
3604
3605 /* ENV should not be modified. */
3606 tcg_debug_assert(!temp_readonly(ots));
3607
3608 itype = its->type;
3609 vece = TCGOP_VECE(op);
3610 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3611
3612 if (its->val_type == TEMP_VAL_CONST) {
3613 /* Propagate constant via movi -> dupi. */
3614 tcg_target_ulong val = its->val;
3615 if (IS_DEAD_ARG(1)) {
3616 temp_dead(s, its);
3617 }
3618 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3619 return;
3620 }
3621
3622 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3623 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3624
3625 /* Allocate the output register now. */
3626 if (ots->val_type != TEMP_VAL_REG) {
3627 TCGRegSet allocated_regs = s->reserved_regs;
3628 TCGReg oreg;
3629
3630 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3631 /* Make sure to not spill the input register. */
3632 tcg_regset_set_reg(allocated_regs, its->reg);
3633 }
3634 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3635 op->output_pref[0], ots->indirect_base);
3636 set_temp_val_reg(s, ots, oreg);
3637 }
3638
3639 switch (its->val_type) {
3640 case TEMP_VAL_REG:
3641 /*
3642 * The dup constriaints must be broad, covering all possible VECE.
3643 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3644 * to fail, indicating that extra moves are required for that case.
3645 */
3646 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3647 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3648 goto done;
3649 }
3650 /* Try again from memory or a vector input register. */
3651 }
3652 if (!its->mem_coherent) {
3653 /*
3654 * The input register is not synced, and so an extra store
3655 * would be required to use memory. Attempt an integer-vector
3656 * register move first. We do not have a TCGRegSet for this.
3657 */
3658 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3659 break;
3660 }
3661 /* Sync the temp back to its slot and load from there. */
3662 temp_sync(s, its, s->reserved_regs, 0, 0);
3663 }
3664 /* fall through */
3665
3666 case TEMP_VAL_MEM:
3667 #if HOST_BIG_ENDIAN
3668 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3669 endian_fixup -= 1 << vece;
3670 #else
3671 endian_fixup = 0;
3672 #endif
3673 /* Attempt to dup directly from the input memory slot. */
3674 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3675 its->mem_offset + endian_fixup)) {
3676 goto done;
3677 }
3678 /* Load the input into the destination vector register. */
3679 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3680 break;
3681
3682 default:
3683 g_assert_not_reached();
3684 }
3685
3686 /* We now have a vector input register, so dup must succeed. */
3687 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3688 tcg_debug_assert(ok);
3689
3690 done:
3691 ots->mem_coherent = 0;
3692 if (IS_DEAD_ARG(1)) {
3693 temp_dead(s, its);
3694 }
3695 if (NEED_SYNC_ARG(0)) {
3696 temp_sync(s, ots, s->reserved_regs, 0, 0);
3697 }
3698 if (IS_DEAD_ARG(0)) {
3699 temp_dead(s, ots);
3700 }
3701 }
3702
3703 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3704 {
3705 const TCGLifeData arg_life = op->life;
3706 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3707 TCGRegSet i_allocated_regs;
3708 TCGRegSet o_allocated_regs;
3709 int i, k, nb_iargs, nb_oargs;
3710 TCGReg reg;
3711 TCGArg arg;
3712 const TCGArgConstraint *arg_ct;
3713 TCGTemp *ts;
3714 TCGArg new_args[TCG_MAX_OP_ARGS];
3715 int const_args[TCG_MAX_OP_ARGS];
3716
3717 nb_oargs = def->nb_oargs;
3718 nb_iargs = def->nb_iargs;
3719
3720 /* copy constants */
3721 memcpy(new_args + nb_oargs + nb_iargs,
3722 op->args + nb_oargs + nb_iargs,
3723 sizeof(TCGArg) * def->nb_cargs);
3724
3725 i_allocated_regs = s->reserved_regs;
3726 o_allocated_regs = s->reserved_regs;
3727
3728 /* satisfy input constraints */
3729 for (k = 0; k < nb_iargs; k++) {
3730 TCGRegSet i_preferred_regs, i_required_regs;
3731 bool allocate_new_reg, copyto_new_reg;
3732 TCGTemp *ts2;
3733 int i1, i2;
3734
3735 i = def->args_ct[nb_oargs + k].sort_index;
3736 arg = op->args[i];
3737 arg_ct = &def->args_ct[i];
3738 ts = arg_temp(arg);
3739
3740 if (ts->val_type == TEMP_VAL_CONST
3741 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3742 /* constant is OK for instruction */
3743 const_args[i] = 1;
3744 new_args[i] = ts->val;
3745 continue;
3746 }
3747
3748 reg = ts->reg;
3749 i_preferred_regs = 0;
3750 i_required_regs = arg_ct->regs;
3751 allocate_new_reg = false;
3752 copyto_new_reg = false;
3753
3754 switch (arg_ct->pair) {
3755 case 0: /* not paired */
3756 if (arg_ct->ialias) {
3757 i_preferred_regs = op->output_pref[arg_ct->alias_index];
3758
3759 /*
3760 * If the input is readonly, then it cannot also be an
3761 * output and aliased to itself. If the input is not
3762 * dead after the instruction, we must allocate a new
3763 * register and move it.
3764 */
3765 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3766 allocate_new_reg = true;
3767 } else if (ts->val_type == TEMP_VAL_REG) {
3768 /*
3769 * Check if the current register has already been
3770 * allocated for another input.
3771 */
3772 allocate_new_reg =
3773 tcg_regset_test_reg(i_allocated_regs, reg);
3774 }
3775 }
3776 if (!allocate_new_reg) {
3777 temp_load(s, ts, i_required_regs, i_allocated_regs,
3778 i_preferred_regs);
3779 reg = ts->reg;
3780 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
3781 }
3782 if (allocate_new_reg) {
3783 /*
3784 * Allocate a new register matching the constraint
3785 * and move the temporary register into it.
3786 */
3787 temp_load(s, ts, tcg_target_available_regs[ts->type],
3788 i_allocated_regs, 0);
3789 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
3790 i_preferred_regs, ts->indirect_base);
3791 copyto_new_reg = true;
3792 }
3793 break;
3794
3795 case 1:
3796 /* First of an input pair; if i1 == i2, the second is an output. */
3797 i1 = i;
3798 i2 = arg_ct->pair_index;
3799 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
3800
3801 /*
3802 * It is easier to default to allocating a new pair
3803 * and to identify a few cases where it's not required.
3804 */
3805 if (arg_ct->ialias) {
3806 i_preferred_regs = op->output_pref[arg_ct->alias_index];
3807 if (IS_DEAD_ARG(i1) &&
3808 IS_DEAD_ARG(i2) &&
3809 !temp_readonly(ts) &&
3810 ts->val_type == TEMP_VAL_REG &&
3811 ts->reg < TCG_TARGET_NB_REGS - 1 &&
3812 tcg_regset_test_reg(i_required_regs, reg) &&
3813 !tcg_regset_test_reg(i_allocated_regs, reg) &&
3814 !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
3815 (ts2
3816 ? ts2->val_type == TEMP_VAL_REG &&
3817 ts2->reg == reg + 1 &&
3818 !temp_readonly(ts2)
3819 : s->reg_to_temp[reg + 1] == NULL)) {
3820 break;
3821 }
3822 } else {
3823 /* Without aliasing, the pair must also be an input. */
3824 tcg_debug_assert(ts2);
3825 if (ts->val_type == TEMP_VAL_REG &&
3826 ts2->val_type == TEMP_VAL_REG &&
3827 ts2->reg == reg + 1 &&
3828 tcg_regset_test_reg(i_required_regs, reg)) {
3829 break;
3830 }
3831 }
3832 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
3833 0, ts->indirect_base);
3834 goto do_pair;
3835
3836 case 2: /* pair second */
3837 reg = new_args[arg_ct->pair_index] + 1;
3838 goto do_pair;
3839
3840 case 3: /* ialias with second output, no first input */
3841 tcg_debug_assert(arg_ct->ialias);
3842 i_preferred_regs = op->output_pref[arg_ct->alias_index];
3843
3844 if (IS_DEAD_ARG(i) &&
3845 !temp_readonly(ts) &&
3846 ts->val_type == TEMP_VAL_REG &&
3847 reg > 0 &&
3848 s->reg_to_temp[reg - 1] == NULL &&
3849 tcg_regset_test_reg(i_required_regs, reg) &&
3850 !tcg_regset_test_reg(i_allocated_regs, reg) &&
3851 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
3852 tcg_regset_set_reg(i_allocated_regs, reg - 1);
3853 break;
3854 }
3855 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
3856 i_allocated_regs, 0,
3857 ts->indirect_base);
3858 tcg_regset_set_reg(i_allocated_regs, reg);
3859 reg += 1;
3860 goto do_pair;
3861
3862 do_pair:
3863 /*
3864 * If an aliased input is not dead after the instruction,
3865 * we must allocate a new register and move it.
3866 */
3867 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
3868 TCGRegSet t_allocated_regs = i_allocated_regs;
3869
3870 /*
3871 * Because of the alias, and the continued life, make sure
3872 * that the temp is somewhere *other* than the reg pair,
3873 * and we get a copy in reg.
3874 */
3875 tcg_regset_set_reg(t_allocated_regs, reg);
3876 tcg_regset_set_reg(t_allocated_regs, reg + 1);
3877 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
3878 /* If ts was already in reg, copy it somewhere else. */
3879 TCGReg nr;
3880 bool ok;
3881
3882 tcg_debug_assert(ts->kind != TEMP_FIXED);
3883 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
3884 t_allocated_regs, 0, ts->indirect_base);
3885 ok = tcg_out_mov(s, ts->type, nr, reg);
3886 tcg_debug_assert(ok);
3887
3888 set_temp_val_reg(s, ts, nr);
3889 } else {
3890 temp_load(s, ts, tcg_target_available_regs[ts->type],
3891 t_allocated_regs, 0);
3892 copyto_new_reg = true;
3893 }
3894 } else {
3895 /* Preferably allocate to reg, otherwise copy. */
3896 i_required_regs = (TCGRegSet)1 << reg;
3897 temp_load(s, ts, i_required_regs, i_allocated_regs,
3898 i_preferred_regs);
3899 copyto_new_reg = ts->reg != reg;
3900 }
3901 break;
3902
3903 default:
3904 g_assert_not_reached();
3905 }
3906
3907 if (copyto_new_reg) {
3908 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3909 /*
3910 * Cross register class move not supported. Sync the
3911 * temp back to its slot and load from there.
3912 */
3913 temp_sync(s, ts, i_allocated_regs, 0, 0);
3914 tcg_out_ld(s, ts->type, reg,
3915 ts->mem_base->reg, ts->mem_offset);
3916 }
3917 }
3918 new_args[i] = reg;
3919 const_args[i] = 0;
3920 tcg_regset_set_reg(i_allocated_regs, reg);
3921 }
3922
3923 /* mark dead temporaries and free the associated registers */
3924 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3925 if (IS_DEAD_ARG(i)) {
3926 temp_dead(s, arg_temp(op->args[i]));
3927 }
3928 }
3929
3930 if (def->flags & TCG_OPF_COND_BRANCH) {
3931 tcg_reg_alloc_cbranch(s, i_allocated_regs);
3932 } else if (def->flags & TCG_OPF_BB_END) {
3933 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3934 } else {
3935 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3936 /* XXX: permit generic clobber register list ? */
3937 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3938 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3939 tcg_reg_free(s, i, i_allocated_regs);
3940 }
3941 }
3942 }
3943 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3944 /* sync globals if the op has side effects and might trigger
3945 an exception. */
3946 sync_globals(s, i_allocated_regs);
3947 }
3948
3949 /* satisfy the output constraints */
3950 for(k = 0; k < nb_oargs; k++) {
3951 i = def->args_ct[k].sort_index;
3952 arg = op->args[i];
3953 arg_ct = &def->args_ct[i];
3954 ts = arg_temp(arg);
3955
3956 /* ENV should not be modified. */
3957 tcg_debug_assert(!temp_readonly(ts));
3958
3959 switch (arg_ct->pair) {
3960 case 0: /* not paired */
3961 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3962 reg = new_args[arg_ct->alias_index];
3963 } else if (arg_ct->newreg) {
3964 reg = tcg_reg_alloc(s, arg_ct->regs,
3965 i_allocated_regs | o_allocated_regs,
3966 op->output_pref[k], ts->indirect_base);
3967 } else {
3968 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3969 op->output_pref[k], ts->indirect_base);
3970 }
3971 break;
3972
3973 case 1: /* first of pair */
3974 tcg_debug_assert(!arg_ct->newreg);
3975 if (arg_ct->oalias) {
3976 reg = new_args[arg_ct->alias_index];
3977 break;
3978 }
3979 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
3980 op->output_pref[k], ts->indirect_base);
3981 break;
3982
3983 case 2: /* second of pair */
3984 tcg_debug_assert(!arg_ct->newreg);
3985 if (arg_ct->oalias) {
3986 reg = new_args[arg_ct->alias_index];
3987 } else {
3988 reg = new_args[arg_ct->pair_index] + 1;
3989 }
3990 break;
3991
3992 case 3: /* first of pair, aliasing with a second input */
3993 tcg_debug_assert(!arg_ct->newreg);
3994 reg = new_args[arg_ct->pair_index] - 1;
3995 break;
3996
3997 default:
3998 g_assert_not_reached();
3999 }
4000 tcg_regset_set_reg(o_allocated_regs, reg);
4001 set_temp_val_reg(s, ts, reg);
4002 ts->mem_coherent = 0;
4003 new_args[i] = reg;
4004 }
4005 }
4006
4007 /* emit instruction */
4008 if (def->flags & TCG_OPF_VECTOR) {
4009 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4010 new_args, const_args);
4011 } else {
4012 tcg_out_op(s, op->opc, new_args, const_args);
4013 }
4014
4015 /* move the outputs in the correct register if needed */
4016 for(i = 0; i < nb_oargs; i++) {
4017 ts = arg_temp(op->args[i]);
4018
4019 /* ENV should not be modified. */
4020 tcg_debug_assert(!temp_readonly(ts));
4021
4022 if (NEED_SYNC_ARG(i)) {
4023 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4024 } else if (IS_DEAD_ARG(i)) {
4025 temp_dead(s, ts);
4026 }
4027 }
4028 }
4029
4030 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4031 {
4032 const TCGLifeData arg_life = op->life;
4033 TCGTemp *ots, *itsl, *itsh;
4034 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4035
4036 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4037 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4038 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4039
4040 ots = arg_temp(op->args[0]);
4041 itsl = arg_temp(op->args[1]);
4042 itsh = arg_temp(op->args[2]);
4043
4044 /* ENV should not be modified. */
4045 tcg_debug_assert(!temp_readonly(ots));
4046
4047 /* Allocate the output register now. */
4048 if (ots->val_type != TEMP_VAL_REG) {
4049 TCGRegSet allocated_regs = s->reserved_regs;
4050 TCGRegSet dup_out_regs =
4051 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4052 TCGReg oreg;
4053
4054 /* Make sure to not spill the input registers. */
4055 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4056 tcg_regset_set_reg(allocated_regs, itsl->reg);
4057 }
4058 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4059 tcg_regset_set_reg(allocated_regs, itsh->reg);
4060 }
4061
4062 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4063 op->output_pref[0], ots->indirect_base);
4064 set_temp_val_reg(s, ots, oreg);
4065 }
4066
4067 /* Promote dup2 of immediates to dupi_vec. */
4068 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4069 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4070 MemOp vece = MO_64;
4071
4072 if (val == dup_const(MO_8, val)) {
4073 vece = MO_8;
4074 } else if (val == dup_const(MO_16, val)) {
4075 vece = MO_16;
4076 } else if (val == dup_const(MO_32, val)) {
4077 vece = MO_32;
4078 }
4079
4080 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4081 goto done;
4082 }
4083
4084 /* If the two inputs form one 64-bit value, try dupm_vec. */
4085 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4086 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4087 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4088 #if HOST_BIG_ENDIAN
4089 TCGTemp *its = itsh;
4090 #else
4091 TCGTemp *its = itsl;
4092 #endif
4093 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4094 its->mem_base->reg, its->mem_offset)) {
4095 goto done;
4096 }
4097 }
4098
4099 /* Fall back to generic expansion. */
4100 return false;
4101
4102 done:
4103 ots->mem_coherent = 0;
4104 if (IS_DEAD_ARG(1)) {
4105 temp_dead(s, itsl);
4106 }
4107 if (IS_DEAD_ARG(2)) {
4108 temp_dead(s, itsh);
4109 }
4110 if (NEED_SYNC_ARG(0)) {
4111 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4112 } else if (IS_DEAD_ARG(0)) {
4113 temp_dead(s, ots);
4114 }
4115 return true;
4116 }
4117
4118 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4119 {
4120 const int nb_oargs = TCGOP_CALLO(op);
4121 const int nb_iargs = TCGOP_CALLI(op);
4122 const TCGLifeData arg_life = op->life;
4123 const TCGHelperInfo *info;
4124 int flags, nb_regs, i;
4125 TCGReg reg;
4126 TCGArg arg;
4127 TCGTemp *ts;
4128 intptr_t stack_offset;
4129 size_t call_stack_size;
4130 tcg_insn_unit *func_addr;
4131 int allocate_args;
4132 TCGRegSet allocated_regs;
4133
4134 func_addr = tcg_call_func(op);
4135 info = tcg_call_info(op);
4136 flags = info->flags;
4137
4138 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4139 if (nb_regs > nb_iargs) {
4140 nb_regs = nb_iargs;
4141 }
4142
4143 /* assign stack slots first */
4144 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4145 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4146 ~(TCG_TARGET_STACK_ALIGN - 1);
4147 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4148 if (allocate_args) {
4149 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4150 preallocate call stack */
4151 tcg_abort();
4152 }
4153
4154 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4155 for (i = nb_regs; i < nb_iargs; i++) {
4156 arg = op->args[nb_oargs + i];
4157 if (arg != TCG_CALL_DUMMY_ARG) {
4158 ts = arg_temp(arg);
4159 temp_load(s, ts, tcg_target_available_regs[ts->type],
4160 s->reserved_regs, 0);
4161 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4162 }
4163 stack_offset += sizeof(tcg_target_long);
4164 }
4165
4166 /* assign input registers */
4167 allocated_regs = s->reserved_regs;
4168 for (i = 0; i < nb_regs; i++) {
4169 arg = op->args[nb_oargs + i];
4170 if (arg != TCG_CALL_DUMMY_ARG) {
4171 ts = arg_temp(arg);
4172 reg = tcg_target_call_iarg_regs[i];
4173
4174 if (ts->val_type == TEMP_VAL_REG) {
4175 if (ts->reg != reg) {
4176 tcg_reg_free(s, reg, allocated_regs);
4177 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4178 /*
4179 * Cross register class move not supported. Sync the
4180 * temp back to its slot and load from there.
4181 */
4182 temp_sync(s, ts, allocated_regs, 0, 0);
4183 tcg_out_ld(s, ts->type, reg,
4184 ts->mem_base->reg, ts->mem_offset);
4185 }
4186 }
4187 } else {
4188 TCGRegSet arg_set = 0;
4189
4190 tcg_reg_free(s, reg, allocated_regs);
4191 tcg_regset_set_reg(arg_set, reg);
4192 temp_load(s, ts, arg_set, allocated_regs, 0);
4193 }
4194
4195 tcg_regset_set_reg(allocated_regs, reg);
4196 }
4197 }
4198
4199 /* mark dead temporaries and free the associated registers */
4200 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4201 if (IS_DEAD_ARG(i)) {
4202 temp_dead(s, arg_temp(op->args[i]));
4203 }
4204 }
4205
4206 /* clobber call registers */
4207 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4208 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4209 tcg_reg_free(s, i, allocated_regs);
4210 }
4211 }
4212
4213 /* Save globals if they might be written by the helper, sync them if
4214 they might be read. */
4215 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4216 /* Nothing to do */
4217 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4218 sync_globals(s, allocated_regs);
4219 } else {
4220 save_globals(s, allocated_regs);
4221 }
4222
4223 #ifdef CONFIG_TCG_INTERPRETER
4224 {
4225 gpointer hash = (gpointer)(uintptr_t)info->typemask;
4226 ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4227 assert(cif != NULL);
4228 tcg_out_call(s, func_addr, cif);
4229 }
4230 #else
4231 tcg_out_call(s, func_addr);
4232 #endif
4233
4234 /* assign output registers and emit moves if needed */
4235 for(i = 0; i < nb_oargs; i++) {
4236 arg = op->args[i];
4237 ts = arg_temp(arg);
4238
4239 /* ENV should not be modified. */
4240 tcg_debug_assert(!temp_readonly(ts));
4241
4242 reg = tcg_target_call_oarg_regs[i];
4243 set_temp_val_reg(s, ts, reg);
4244 ts->mem_coherent = 0;
4245 if (NEED_SYNC_ARG(i)) {
4246 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4247 } else if (IS_DEAD_ARG(i)) {
4248 temp_dead(s, ts);
4249 }
4250 }
4251 }
4252
4253 #ifdef CONFIG_PROFILER
4254
4255 /* avoid copy/paste errors */
4256 #define PROF_ADD(to, from, field) \
4257 do { \
4258 (to)->field += qatomic_read(&((from)->field)); \
4259 } while (0)
4260
4261 #define PROF_MAX(to, from, field) \
4262 do { \
4263 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4264 if (val__ > (to)->field) { \
4265 (to)->field = val__; \
4266 } \
4267 } while (0)
4268
4269 /* Pass in a zero'ed @prof */
4270 static inline
4271 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4272 {
4273 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4274 unsigned int i;
4275
4276 for (i = 0; i < n_ctxs; i++) {
4277 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4278 const TCGProfile *orig = &s->prof;
4279
4280 if (counters) {
4281 PROF_ADD(prof, orig, cpu_exec_time);
4282 PROF_ADD(prof, orig, tb_count1);
4283 PROF_ADD(prof, orig, tb_count);
4284 PROF_ADD(prof, orig, op_count);
4285 PROF_MAX(prof, orig, op_count_max);
4286 PROF_ADD(prof, orig, temp_count);
4287 PROF_MAX(prof, orig, temp_count_max);
4288 PROF_ADD(prof, orig, del_op_count);
4289 PROF_ADD(prof, orig, code_in_len);
4290 PROF_ADD(prof, orig, code_out_len);
4291 PROF_ADD(prof, orig, search_out_len);
4292 PROF_ADD(prof, orig, interm_time);
4293 PROF_ADD(prof, orig, code_time);
4294 PROF_ADD(prof, orig, la_time);
4295 PROF_ADD(prof, orig, opt_time);
4296 PROF_ADD(prof, orig, restore_count);
4297 PROF_ADD(prof, orig, restore_time);
4298 }
4299 if (table) {
4300 int i;
4301
4302 for (i = 0; i < NB_OPS; i++) {
4303 PROF_ADD(prof, orig, table_op_count[i]);
4304 }
4305 }
4306 }
4307 }
4308
4309 #undef PROF_ADD
4310 #undef PROF_MAX
4311
4312 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4313 {
4314 tcg_profile_snapshot(prof, true, false);
4315 }
4316
4317 static void tcg_profile_snapshot_table(TCGProfile *prof)
4318 {
4319 tcg_profile_snapshot(prof, false, true);
4320 }
4321
4322 void tcg_dump_op_count(GString *buf)
4323 {
4324 TCGProfile prof = {};
4325 int i;
4326
4327 tcg_profile_snapshot_table(&prof);
4328 for (i = 0; i < NB_OPS; i++) {
4329 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4330 prof.table_op_count[i]);
4331 }
4332 }
4333
4334 int64_t tcg_cpu_exec_time(void)
4335 {
4336 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4337 unsigned int i;
4338 int64_t ret = 0;
4339
4340 for (i = 0; i < n_ctxs; i++) {
4341 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4342 const TCGProfile *prof = &s->prof;
4343
4344 ret += qatomic_read(&prof->cpu_exec_time);
4345 }
4346 return ret;
4347 }
4348 #else
4349 void tcg_dump_op_count(GString *buf)
4350 {
4351 g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4352 }
4353
4354 int64_t tcg_cpu_exec_time(void)
4355 {
4356 error_report("%s: TCG profiler not compiled", __func__);
4357 exit(EXIT_FAILURE);
4358 }
4359 #endif
4360
4361
4362 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4363 {
4364 #ifdef CONFIG_PROFILER
4365 TCGProfile *prof = &s->prof;
4366 #endif
4367 int i, num_insns;
4368 TCGOp *op;
4369
4370 #ifdef CONFIG_PROFILER
4371 {
4372 int n = 0;
4373
4374 QTAILQ_FOREACH(op, &s->ops, link) {
4375 n++;
4376 }
4377 qatomic_set(&prof->op_count, prof->op_count + n);
4378 if (n > prof->op_count_max) {
4379 qatomic_set(&prof->op_count_max, n);
4380 }
4381
4382 n = s->nb_temps;
4383 qatomic_set(&prof->temp_count, prof->temp_count + n);
4384 if (n > prof->temp_count_max) {
4385 qatomic_set(&prof->temp_count_max, n);
4386 }
4387 }
4388 #endif
4389
4390 #ifdef DEBUG_DISAS
4391 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4392 && qemu_log_in_addr_range(pc_start))) {
4393 FILE *logfile = qemu_log_trylock();
4394 if (logfile) {
4395 fprintf(logfile, "OP:\n");
4396 tcg_dump_ops(s, logfile, false);
4397 fprintf(logfile, "\n");
4398 qemu_log_unlock(logfile);
4399 }
4400 }
4401 #endif
4402
4403 #ifdef CONFIG_DEBUG_TCG
4404 /* Ensure all labels referenced have been emitted. */
4405 {
4406 TCGLabel *l;
4407 bool error = false;
4408
4409 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4410 if (unlikely(!l->present) && l->refs) {
4411 qemu_log_mask(CPU_LOG_TB_OP,
4412 "$L%d referenced but not present.\n", l->id);
4413 error = true;
4414 }
4415 }
4416 assert(!error);
4417 }
4418 #endif
4419
4420 #ifdef CONFIG_PROFILER
4421 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4422 #endif
4423
4424 #ifdef USE_TCG_OPTIMIZATIONS
4425 tcg_optimize(s);
4426 #endif
4427
4428 #ifdef CONFIG_PROFILER
4429 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4430 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4431 #endif
4432
4433 reachable_code_pass(s);
4434 liveness_pass_1(s);
4435
4436 if (s->nb_indirects > 0) {
4437 #ifdef DEBUG_DISAS
4438 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4439 && qemu_log_in_addr_range(pc_start))) {
4440 FILE *logfile = qemu_log_trylock();
4441 if (logfile) {
4442 fprintf(logfile, "OP before indirect lowering:\n");
4443 tcg_dump_ops(s, logfile, false);
4444 fprintf(logfile, "\n");
4445 qemu_log_unlock(logfile);
4446 }
4447 }
4448 #endif
4449 /* Replace indirect temps with direct temps. */
4450 if (liveness_pass_2(s)) {
4451 /* If changes were made, re-run liveness. */
4452 liveness_pass_1(s);
4453 }
4454 }
4455
4456 #ifdef CONFIG_PROFILER
4457 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4458 #endif
4459
4460 #ifdef DEBUG_DISAS
4461 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4462 && qemu_log_in_addr_range(pc_start))) {
4463 FILE *logfile = qemu_log_trylock();
4464 if (logfile) {
4465 fprintf(logfile, "OP after optimization and liveness analysis:\n");
4466 tcg_dump_ops(s, logfile, true);
4467 fprintf(logfile, "\n");
4468 qemu_log_unlock(logfile);
4469 }
4470 }
4471 #endif
4472
4473 /* Initialize goto_tb jump offsets. */
4474 tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
4475 tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
4476 tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
4477 if (TCG_TARGET_HAS_direct_jump) {
4478 tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
4479 tcg_ctx->tb_jmp_target_addr = NULL;
4480 } else {
4481 tcg_ctx->tb_jmp_insn_offset = NULL;
4482 tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
4483 }
4484
4485 tcg_reg_alloc_start(s);
4486
4487 /*
4488 * Reset the buffer pointers when restarting after overflow.
4489 * TODO: Move this into translate-all.c with the rest of the
4490 * buffer management. Having only this done here is confusing.
4491 */
4492 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4493 s->code_ptr = s->code_buf;
4494
4495 #ifdef TCG_TARGET_NEED_LDST_LABELS
4496 QSIMPLEQ_INIT(&s->ldst_labels);
4497 #endif
4498 #ifdef TCG_TARGET_NEED_POOL_LABELS
4499 s->pool_labels = NULL;
4500 #endif
4501
4502 num_insns = -1;
4503 QTAILQ_FOREACH(op, &s->ops, link) {
4504 TCGOpcode opc = op->opc;
4505
4506 #ifdef CONFIG_PROFILER
4507 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4508 #endif
4509
4510 switch (opc) {
4511 case INDEX_op_mov_i32:
4512 case INDEX_op_mov_i64:
4513 case INDEX_op_mov_vec:
4514 tcg_reg_alloc_mov(s, op);
4515 break;
4516 case INDEX_op_dup_vec:
4517 tcg_reg_alloc_dup(s, op);
4518 break;
4519 case INDEX_op_insn_start:
4520 if (num_insns >= 0) {
4521 size_t off = tcg_current_code_size(s);
4522 s->gen_insn_end_off[num_insns] = off;
4523 /* Assert that we do not overflow our stored offset. */
4524 assert(s->gen_insn_end_off[num_insns] == off);
4525 }
4526 num_insns++;
4527 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4528 target_ulong a;
4529 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4530 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4531 #else
4532 a = op->args[i];
4533 #endif
4534 s->gen_insn_data[num_insns][i] = a;
4535 }
4536 break;
4537 case INDEX_op_discard:
4538 temp_dead(s, arg_temp(op->args[0]));
4539 break;
4540 case INDEX_op_set_label:
4541 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4542 tcg_out_label(s, arg_label(op->args[0]));
4543 break;
4544 case INDEX_op_call:
4545 tcg_reg_alloc_call(s, op);
4546 break;
4547 case INDEX_op_dup2_vec:
4548 if (tcg_reg_alloc_dup2(s, op)) {
4549 break;
4550 }
4551 /* fall through */
4552 default:
4553 /* Sanity check that we've not introduced any unhandled opcodes. */
4554 tcg_debug_assert(tcg_op_supported(opc));
4555 /* Note: in order to speed up the code, it would be much
4556 faster to have specialized register allocator functions for
4557 some common argument patterns */
4558 tcg_reg_alloc_op(s, op);
4559 break;
4560 }
4561 /* Test for (pending) buffer overflow. The assumption is that any
4562 one operation beginning below the high water mark cannot overrun
4563 the buffer completely. Thus we can test for overflow after
4564 generating code without having to check during generation. */
4565 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4566 return -1;
4567 }
4568 /* Test for TB overflow, as seen by gen_insn_end_off. */
4569 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4570 return -2;
4571 }
4572 }
4573 tcg_debug_assert(num_insns >= 0);
4574 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4575
4576 /* Generate TB finalization at the end of block */
4577 #ifdef TCG_TARGET_NEED_LDST_LABELS
4578 i = tcg_out_ldst_finalize(s);
4579 if (i < 0) {
4580 return i;
4581 }
4582 #endif
4583 #ifdef TCG_TARGET_NEED_POOL_LABELS
4584 i = tcg_out_pool_finalize(s);
4585 if (i < 0) {
4586 return i;
4587 }
4588 #endif
4589 if (!tcg_resolve_relocs(s)) {
4590 return -2;
4591 }
4592
4593 #ifndef CONFIG_TCG_INTERPRETER
4594 /* flush instruction cache */
4595 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4596 (uintptr_t)s->code_buf,
4597 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4598 #endif
4599
4600 return tcg_current_code_size(s);
4601 }
4602
4603 #ifdef CONFIG_PROFILER
4604 void tcg_dump_info(GString *buf)
4605 {
4606 TCGProfile prof = {};
4607 const TCGProfile *s;
4608 int64_t tb_count;
4609 int64_t tb_div_count;
4610 int64_t tot;
4611
4612 tcg_profile_snapshot_counters(&prof);
4613 s = &prof;
4614 tb_count = s->tb_count;
4615 tb_div_count = tb_count ? tb_count : 1;
4616 tot = s->interm_time + s->code_time;
4617
4618 g_string_append_printf(buf, "JIT cycles %" PRId64
4619 " (%0.3f s at 2.4 GHz)\n",
4620 tot, tot / 2.4e9);
4621 g_string_append_printf(buf, "translated TBs %" PRId64
4622 " (aborted=%" PRId64 " %0.1f%%)\n",
4623 tb_count, s->tb_count1 - tb_count,
4624 (double)(s->tb_count1 - s->tb_count)
4625 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4626 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n",
4627 (double)s->op_count / tb_div_count, s->op_count_max);
4628 g_string_append_printf(buf, "deleted ops/TB %0.2f\n",
4629 (double)s->del_op_count / tb_div_count);
4630 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n",
4631 (double)s->temp_count / tb_div_count,
4632 s->temp_count_max);
4633 g_string_append_printf(buf, "avg host code/TB %0.1f\n",
4634 (double)s->code_out_len / tb_div_count);
4635 g_string_append_printf(buf, "avg search data/TB %0.1f\n",
4636 (double)s->search_out_len / tb_div_count);
4637
4638 g_string_append_printf(buf, "cycles/op %0.1f\n",
4639 s->op_count ? (double)tot / s->op_count : 0);
4640 g_string_append_printf(buf, "cycles/in byte %0.1f\n",
4641 s->code_in_len ? (double)tot / s->code_in_len : 0);
4642 g_string_append_printf(buf, "cycles/out byte %0.1f\n",
4643 s->code_out_len ? (double)tot / s->code_out_len : 0);
4644 g_string_append_printf(buf, "cycles/search byte %0.1f\n",
4645 s->search_out_len ?
4646 (double)tot / s->search_out_len : 0);
4647 if (tot == 0) {
4648 tot = 1;
4649 }
4650 g_string_append_printf(buf, " gen_interm time %0.1f%%\n",
4651 (double)s->interm_time / tot * 100.0);
4652 g_string_append_printf(buf, " gen_code time %0.1f%%\n",
4653 (double)s->code_time / tot * 100.0);
4654 g_string_append_printf(buf, "optim./code time %0.1f%%\n",
4655 (double)s->opt_time / (s->code_time ?
4656 s->code_time : 1)
4657 * 100.0);
4658 g_string_append_printf(buf, "liveness/code time %0.1f%%\n",
4659 (double)s->la_time / (s->code_time ?
4660 s->code_time : 1) * 100.0);
4661 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n",
4662 s->restore_count);
4663 g_string_append_printf(buf, " avg cycles %0.1f\n",
4664 s->restore_count ?
4665 (double)s->restore_time / s->restore_count : 0);
4666 }
4667 #else
4668 void tcg_dump_info(GString *buf)
4669 {
4670 g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4671 }
4672 #endif
4673
4674 #ifdef ELF_HOST_MACHINE
4675 /* In order to use this feature, the backend needs to do three things:
4676
4677 (1) Define ELF_HOST_MACHINE to indicate both what value to
4678 put into the ELF image and to indicate support for the feature.
4679
4680 (2) Define tcg_register_jit. This should create a buffer containing
4681 the contents of a .debug_frame section that describes the post-
4682 prologue unwind info for the tcg machine.
4683
4684 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4685 */
4686
4687 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4688 typedef enum {
4689 JIT_NOACTION = 0,
4690 JIT_REGISTER_FN,
4691 JIT_UNREGISTER_FN
4692 } jit_actions_t;
4693
4694 struct jit_code_entry {
4695 struct jit_code_entry *next_entry;
4696 struct jit_code_entry *prev_entry;
4697 const void *symfile_addr;
4698 uint64_t symfile_size;
4699 };
4700
4701 struct jit_descriptor {
4702 uint32_t version;
4703 uint32_t action_flag;
4704 struct jit_code_entry *relevant_entry;
4705 struct jit_code_entry *first_entry;
4706 };
4707
4708 void __jit_debug_register_code(void) __attribute__((noinline));
4709 void __jit_debug_register_code(void)
4710 {
4711 asm("");
4712 }
4713
4714 /* Must statically initialize the version, because GDB may check
4715 the version before we can set it. */
4716 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4717
4718 /* End GDB interface. */
4719
4720 static int find_string(const char *strtab, const char *str)
4721 {
4722 const char *p = strtab + 1;
4723
4724 while (1) {
4725 if (strcmp(p, str) == 0) {
4726 return p - strtab;
4727 }
4728 p += strlen(p) + 1;
4729 }
4730 }
4731
4732 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4733 const void *debug_frame,
4734 size_t debug_frame_size)
4735 {
4736 struct __attribute__((packed)) DebugInfo {
4737 uint32_t len;
4738 uint16_t version;
4739 uint32_t abbrev;
4740 uint8_t ptr_size;
4741 uint8_t cu_die;
4742 uint16_t cu_lang;
4743 uintptr_t cu_low_pc;
4744 uintptr_t cu_high_pc;
4745 uint8_t fn_die;
4746 char fn_name[16];
4747 uintptr_t fn_low_pc;
4748 uintptr_t fn_high_pc;
4749 uint8_t cu_eoc;
4750 };
4751
4752 struct ElfImage {
4753 ElfW(Ehdr) ehdr;
4754 ElfW(Phdr) phdr;
4755 ElfW(Shdr) shdr[7];
4756 ElfW(Sym) sym[2];
4757 struct DebugInfo di;
4758 uint8_t da[24];
4759 char str[80];
4760 };
4761
4762 struct ElfImage *img;
4763
4764 static const struct ElfImage img_template = {
4765 .ehdr = {
4766 .e_ident[EI_MAG0] = ELFMAG0,
4767 .e_ident[EI_MAG1] = ELFMAG1,
4768 .e_ident[EI_MAG2] = ELFMAG2,
4769 .e_ident[EI_MAG3] = ELFMAG3,
4770 .e_ident[EI_CLASS] = ELF_CLASS,
4771 .e_ident[EI_DATA] = ELF_DATA,
4772 .e_ident[EI_VERSION] = EV_CURRENT,
4773 .e_type = ET_EXEC,
4774 .e_machine = ELF_HOST_MACHINE,
4775 .e_version = EV_CURRENT,
4776 .e_phoff = offsetof(struct ElfImage, phdr),
4777 .e_shoff = offsetof(struct ElfImage, shdr),
4778 .e_ehsize = sizeof(ElfW(Shdr)),
4779 .e_phentsize = sizeof(ElfW(Phdr)),
4780 .e_phnum = 1,
4781 .e_shentsize = sizeof(ElfW(Shdr)),
4782 .e_shnum = ARRAY_SIZE(img->shdr),
4783 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4784 #ifdef ELF_HOST_FLAGS
4785 .e_flags = ELF_HOST_FLAGS,
4786 #endif
4787 #ifdef ELF_OSABI
4788 .e_ident[EI_OSABI] = ELF_OSABI,
4789 #endif
4790 },
4791 .phdr = {
4792 .p_type = PT_LOAD,
4793 .p_flags = PF_X,
4794 },
4795 .shdr = {
4796 [0] = { .sh_type = SHT_NULL },
4797 /* Trick: The contents of code_gen_buffer are not present in
4798 this fake ELF file; that got allocated elsewhere. Therefore
4799 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4800 will not look for contents. We can record any address. */
4801 [1] = { /* .text */
4802 .sh_type = SHT_NOBITS,
4803 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4804 },
4805 [2] = { /* .debug_info */
4806 .sh_type = SHT_PROGBITS,
4807 .sh_offset = offsetof(struct ElfImage, di),
4808 .sh_size = sizeof(struct DebugInfo),
4809 },
4810 [3] = { /* .debug_abbrev */
4811 .sh_type = SHT_PROGBITS,
4812 .sh_offset = offsetof(struct ElfImage, da),
4813 .sh_size = sizeof(img->da),
4814 },
4815 [4] = { /* .debug_frame */
4816 .sh_type = SHT_PROGBITS,
4817 .sh_offset = sizeof(struct ElfImage),
4818 },
4819 [5] = { /* .symtab */
4820 .sh_type = SHT_SYMTAB,
4821 .sh_offset = offsetof(struct ElfImage, sym),
4822 .sh_size = sizeof(img->sym),
4823 .sh_info = 1,
4824 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4825 .sh_entsize = sizeof(ElfW(Sym)),
4826 },
4827 [6] = { /* .strtab */
4828 .sh_type = SHT_STRTAB,
4829 .sh_offset = offsetof(struct ElfImage, str),
4830 .sh_size = sizeof(img->str),
4831 }
4832 },
4833 .sym = {
4834 [1] = { /* code_gen_buffer */
4835 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4836 .st_shndx = 1,
4837 }
4838 },
4839 .di = {
4840 .len = sizeof(struct DebugInfo) - 4,
4841 .version = 2,
4842 .ptr_size = sizeof(void *),
4843 .cu_die = 1,
4844 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4845 .fn_die = 2,
4846 .fn_name = "code_gen_buffer"
4847 },
4848 .da = {
4849 1, /* abbrev number (the cu) */
4850 0x11, 1, /* DW_TAG_compile_unit, has children */
4851 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4852 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4853 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4854 0, 0, /* end of abbrev */
4855 2, /* abbrev number (the fn) */
4856 0x2e, 0, /* DW_TAG_subprogram, no children */
4857 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4858 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4859 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4860 0, 0, /* end of abbrev */
4861 0 /* no more abbrev */
4862 },
4863 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4864 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4865 };
4866
4867 /* We only need a single jit entry; statically allocate it. */
4868 static struct jit_code_entry one_entry;
4869
4870 uintptr_t buf = (uintptr_t)buf_ptr;
4871 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4872 DebugFrameHeader *dfh;
4873
4874 img = g_malloc(img_size);
4875 *img = img_template;
4876
4877 img->phdr.p_vaddr = buf;
4878 img->phdr.p_paddr = buf;
4879 img->phdr.p_memsz = buf_size;
4880
4881 img->shdr[1].sh_name = find_string(img->str, ".text");
4882 img->shdr[1].sh_addr = buf;
4883 img->shdr[1].sh_size = buf_size;
4884
4885 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4886 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4887
4888 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4889 img->shdr[4].sh_size = debug_frame_size;
4890
4891 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4892 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4893
4894 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4895 img->sym[1].st_value = buf;
4896 img->sym[1].st_size = buf_size;
4897
4898 img->di.cu_low_pc = buf;
4899 img->di.cu_high_pc = buf + buf_size;
4900 img->di.fn_low_pc = buf;
4901 img->di.fn_high_pc = buf + buf_size;
4902
4903 dfh = (DebugFrameHeader *)(img + 1);
4904 memcpy(dfh, debug_frame, debug_frame_size);
4905 dfh->fde.func_start = buf;
4906 dfh->fde.func_len = buf_size;
4907
4908 #ifdef DEBUG_JIT
4909 /* Enable this block to be able to debug the ELF image file creation.
4910 One can use readelf, objdump, or other inspection utilities. */
4911 {
4912 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
4913 FILE *f = fopen(jit, "w+b");
4914 if (f) {
4915 if (fwrite(img, img_size, 1, f) != img_size) {
4916 /* Avoid stupid unused return value warning for fwrite. */
4917 }
4918 fclose(f);
4919 }
4920 }
4921 #endif
4922
4923 one_entry.symfile_addr = img;
4924 one_entry.symfile_size = img_size;
4925
4926 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4927 __jit_debug_descriptor.relevant_entry = &one_entry;
4928 __jit_debug_descriptor.first_entry = &one_entry;
4929 __jit_debug_register_code();
4930 }
4931 #else
4932 /* No support for the feature. Provide the entry point expected by exec.c,
4933 and implement the internal function we declared earlier. */
4934
4935 static void tcg_register_jit_int(const void *buf, size_t size,
4936 const void *debug_frame,
4937 size_t debug_frame_size)
4938 {
4939 }
4940
4941 void tcg_register_jit(const void *buf, size_t buf_size)
4942 {
4943 }
4944 #endif /* ELF_HOST_MACHINE */
4945
4946 #if !TCG_TARGET_MAYBE_vec
4947 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4948 {
4949 g_assert_not_reached();
4950 }
4951 #endif