]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/tcg.c
tcg: Increase tcg_out_dupi_vec immediate to int64_t
[mirror_qemu.git] / tcg / tcg.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27
28 #include "qemu/osdep.h"
29
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
32
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
45
46 #include "exec/exec-all.h"
47
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
51
52 #include "tcg/tcg-op.h"
53
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
56 #else
57 # define ELF_CLASS ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
61 #else
62 # define ELF_DATA ELFDATA2LSB
63 #endif
64
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
68
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75 intptr_t value, intptr_t addend);
76
77 /* The CIE and FDE header definitions will be common to all hosts. */
78 typedef struct {
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
80 uint32_t id;
81 uint8_t version;
82 char augmentation[1];
83 uint8_t code_align;
84 uint8_t data_align;
85 uint8_t return_column;
86 } DebugFrameCIE;
87
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 uint32_t cie_offset;
91 uintptr_t func_start;
92 uintptr_t func_len;
93 } DebugFrameFDEHeader;
94
95 typedef struct QEMU_PACKED {
96 DebugFrameCIE cie;
97 DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
99
100 static void tcg_register_jit_int(const void *buf, size_t size,
101 const void *debug_frame,
102 size_t debug_frame_size)
103 __attribute__((unused));
104
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107 const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
109 intptr_t arg2);
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112 TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114 const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
121 TCGReg dst, int64_t arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123 unsigned vece, const TCGArg *args,
124 const int *const_args);
125 #else
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127 TCGReg dst, TCGReg src)
128 {
129 g_assert_not_reached();
130 }
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132 TCGReg dst, TCGReg base, intptr_t offset)
133 {
134 g_assert_not_reached();
135 }
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
137 TCGReg dst, int64_t arg)
138 {
139 g_assert_not_reached();
140 }
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142 unsigned vece, const TCGArg *args,
143 const int *const_args)
144 {
145 g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149 intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154 const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158
159 #define TCG_HIGHWATER 1024
160
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
166
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
169 #endif
170
171 struct tcg_region_tree {
172 QemuMutex lock;
173 GTree *tree;
174 /* padding to avoid false sharing is computed at run-time */
175 };
176
177 /*
178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179 * dynamically allocate from as demand dictates. Given appropriate region
180 * sizing, this minimizes flushes even when some TCG threads generate a lot
181 * more code than others.
182 */
183 struct tcg_region_state {
184 QemuMutex lock;
185
186 /* fields set at init time */
187 void *start;
188 void *start_aligned;
189 void *end;
190 size_t n;
191 size_t size; /* size of one region */
192 size_t stride; /* .size + guard size */
193
194 /* fields protected by the lock */
195 size_t current; /* current region index */
196 size_t agg_size_full; /* aggregate size of full regions */
197 };
198
199 static struct tcg_region_state region;
200 /*
201 * This is an array of struct tcg_region_tree's, with padding.
202 * We use void * to simplify the computation of region_trees[i]; each
203 * struct is found every tree_size bytes.
204 */
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
209
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
212 {
213 *s->code_ptr++ = v;
214 }
215
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
217 uint8_t v)
218 {
219 *p = v;
220 }
221 #endif
222
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
225 {
226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
227 *s->code_ptr++ = v;
228 } else {
229 tcg_insn_unit *p = s->code_ptr;
230 memcpy(p, &v, sizeof(v));
231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
232 }
233 }
234
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
236 uint16_t v)
237 {
238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
239 *p = v;
240 } else {
241 memcpy(p, &v, sizeof(v));
242 }
243 }
244 #endif
245
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
248 {
249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
250 *s->code_ptr++ = v;
251 } else {
252 tcg_insn_unit *p = s->code_ptr;
253 memcpy(p, &v, sizeof(v));
254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
255 }
256 }
257
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
259 uint32_t v)
260 {
261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
262 *p = v;
263 } else {
264 memcpy(p, &v, sizeof(v));
265 }
266 }
267 #endif
268
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
271 {
272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
273 *s->code_ptr++ = v;
274 } else {
275 tcg_insn_unit *p = s->code_ptr;
276 memcpy(p, &v, sizeof(v));
277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
278 }
279 }
280
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
282 uint64_t v)
283 {
284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
285 *p = v;
286 } else {
287 memcpy(p, &v, sizeof(v));
288 }
289 }
290 #endif
291
292 /* label relocation processing */
293
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295 TCGLabel *l, intptr_t addend)
296 {
297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
298
299 r->type = type;
300 r->ptr = code_ptr;
301 r->addend = addend;
302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
303 }
304
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
306 {
307 tcg_debug_assert(!l->has_value);
308 l->has_value = 1;
309 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
310 }
311
312 TCGLabel *gen_new_label(void)
313 {
314 TCGContext *s = tcg_ctx;
315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
316
317 memset(l, 0, sizeof(TCGLabel));
318 l->id = s->nb_labels++;
319 QSIMPLEQ_INIT(&l->relocs);
320
321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
322
323 return l;
324 }
325
326 static bool tcg_resolve_relocs(TCGContext *s)
327 {
328 TCGLabel *l;
329
330 QSIMPLEQ_FOREACH(l, &s->labels, next) {
331 TCGRelocation *r;
332 uintptr_t value = l->u.value;
333
334 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
336 return false;
337 }
338 }
339 }
340 return true;
341 }
342
343 static void set_jmp_reset_offset(TCGContext *s, int which)
344 {
345 /*
346 * We will check for overflow at the end of the opcode loop in
347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
348 */
349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
350 }
351
352 #include "tcg-target.c.inc"
353
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
356 {
357 if (ptr >= s->ptr + s->size) {
358 return 1;
359 } else if (ptr < s->ptr) {
360 return -1;
361 }
362 return 0;
363 }
364
365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
366 {
367 const struct tb_tc *a = ap;
368 const struct tb_tc *b = bp;
369
370 /*
371 * When both sizes are set, we know this isn't a lookup.
372 * This is the most likely case: every TB must be inserted; lookups
373 * are a lot less frequent.
374 */
375 if (likely(a->size && b->size)) {
376 if (a->ptr > b->ptr) {
377 return 1;
378 } else if (a->ptr < b->ptr) {
379 return -1;
380 }
381 /* a->ptr == b->ptr should happen only on deletions */
382 g_assert(a->size == b->size);
383 return 0;
384 }
385 /*
386 * All lookups have either .size field set to 0.
387 * From the glib sources we see that @ap is always the lookup key. However
388 * the docs provide no guarantee, so we just mark this case as likely.
389 */
390 if (likely(a->size == 0)) {
391 return ptr_cmp_tb_tc(a->ptr, b);
392 }
393 return ptr_cmp_tb_tc(b->ptr, a);
394 }
395
396 static void tcg_region_trees_init(void)
397 {
398 size_t i;
399
400 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
401 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
402 for (i = 0; i < region.n; i++) {
403 struct tcg_region_tree *rt = region_trees + i * tree_size;
404
405 qemu_mutex_init(&rt->lock);
406 rt->tree = g_tree_new(tb_tc_cmp);
407 }
408 }
409
410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
411 {
412 void *p = tcg_splitwx_to_rw(cp);
413 size_t region_idx;
414
415 if (p < region.start_aligned) {
416 region_idx = 0;
417 } else {
418 ptrdiff_t offset = p - region.start_aligned;
419
420 if (offset > region.stride * (region.n - 1)) {
421 region_idx = region.n - 1;
422 } else {
423 region_idx = offset / region.stride;
424 }
425 }
426 return region_trees + region_idx * tree_size;
427 }
428
429 void tcg_tb_insert(TranslationBlock *tb)
430 {
431 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432
433 qemu_mutex_lock(&rt->lock);
434 g_tree_insert(rt->tree, &tb->tc, tb);
435 qemu_mutex_unlock(&rt->lock);
436 }
437
438 void tcg_tb_remove(TranslationBlock *tb)
439 {
440 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
441
442 qemu_mutex_lock(&rt->lock);
443 g_tree_remove(rt->tree, &tb->tc);
444 qemu_mutex_unlock(&rt->lock);
445 }
446
447 /*
448 * Find the TB 'tb' such that
449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450 * Return NULL if not found.
451 */
452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
453 {
454 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
455 TranslationBlock *tb;
456 struct tb_tc s = { .ptr = (void *)tc_ptr };
457
458 qemu_mutex_lock(&rt->lock);
459 tb = g_tree_lookup(rt->tree, &s);
460 qemu_mutex_unlock(&rt->lock);
461 return tb;
462 }
463
464 static void tcg_region_tree_lock_all(void)
465 {
466 size_t i;
467
468 for (i = 0; i < region.n; i++) {
469 struct tcg_region_tree *rt = region_trees + i * tree_size;
470
471 qemu_mutex_lock(&rt->lock);
472 }
473 }
474
475 static void tcg_region_tree_unlock_all(void)
476 {
477 size_t i;
478
479 for (i = 0; i < region.n; i++) {
480 struct tcg_region_tree *rt = region_trees + i * tree_size;
481
482 qemu_mutex_unlock(&rt->lock);
483 }
484 }
485
486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
487 {
488 size_t i;
489
490 tcg_region_tree_lock_all();
491 for (i = 0; i < region.n; i++) {
492 struct tcg_region_tree *rt = region_trees + i * tree_size;
493
494 g_tree_foreach(rt->tree, func, user_data);
495 }
496 tcg_region_tree_unlock_all();
497 }
498
499 size_t tcg_nb_tbs(void)
500 {
501 size_t nb_tbs = 0;
502 size_t i;
503
504 tcg_region_tree_lock_all();
505 for (i = 0; i < region.n; i++) {
506 struct tcg_region_tree *rt = region_trees + i * tree_size;
507
508 nb_tbs += g_tree_nnodes(rt->tree);
509 }
510 tcg_region_tree_unlock_all();
511 return nb_tbs;
512 }
513
514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
515 {
516 TranslationBlock *tb = v;
517
518 tb_destroy(tb);
519 return FALSE;
520 }
521
522 static void tcg_region_tree_reset_all(void)
523 {
524 size_t i;
525
526 tcg_region_tree_lock_all();
527 for (i = 0; i < region.n; i++) {
528 struct tcg_region_tree *rt = region_trees + i * tree_size;
529
530 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
531 /* Increment the refcount first so that destroy acts as a reset */
532 g_tree_ref(rt->tree);
533 g_tree_destroy(rt->tree);
534 }
535 tcg_region_tree_unlock_all();
536 }
537
538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
539 {
540 void *start, *end;
541
542 start = region.start_aligned + curr_region * region.stride;
543 end = start + region.size;
544
545 if (curr_region == 0) {
546 start = region.start;
547 }
548 if (curr_region == region.n - 1) {
549 end = region.end;
550 }
551
552 *pstart = start;
553 *pend = end;
554 }
555
556 static void tcg_region_assign(TCGContext *s, size_t curr_region)
557 {
558 void *start, *end;
559
560 tcg_region_bounds(curr_region, &start, &end);
561
562 s->code_gen_buffer = start;
563 s->code_gen_ptr = start;
564 s->code_gen_buffer_size = end - start;
565 s->code_gen_highwater = end - TCG_HIGHWATER;
566 }
567
568 static bool tcg_region_alloc__locked(TCGContext *s)
569 {
570 if (region.current == region.n) {
571 return true;
572 }
573 tcg_region_assign(s, region.current);
574 region.current++;
575 return false;
576 }
577
578 /*
579 * Request a new region once the one in use has filled up.
580 * Returns true on error.
581 */
582 static bool tcg_region_alloc(TCGContext *s)
583 {
584 bool err;
585 /* read the region size now; alloc__locked will overwrite it on success */
586 size_t size_full = s->code_gen_buffer_size;
587
588 qemu_mutex_lock(&region.lock);
589 err = tcg_region_alloc__locked(s);
590 if (!err) {
591 region.agg_size_full += size_full - TCG_HIGHWATER;
592 }
593 qemu_mutex_unlock(&region.lock);
594 return err;
595 }
596
597 /*
598 * Perform a context's first region allocation.
599 * This function does _not_ increment region.agg_size_full.
600 */
601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
602 {
603 return tcg_region_alloc__locked(s);
604 }
605
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
608 {
609 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
610 unsigned int i;
611
612 qemu_mutex_lock(&region.lock);
613 region.current = 0;
614 region.agg_size_full = 0;
615
616 for (i = 0; i < n_ctxs; i++) {
617 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
618 bool err = tcg_region_initial_alloc__locked(s);
619
620 g_assert(!err);
621 }
622 qemu_mutex_unlock(&region.lock);
623
624 tcg_region_tree_reset_all();
625 }
626
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
629 {
630 return 1;
631 }
632 #else
633 /*
634 * It is likely that some vCPUs will translate more code than others, so we
635 * first try to set more regions than max_cpus, with those regions being of
636 * reasonable size. If that's not possible we make do by evenly dividing
637 * the code_gen_buffer among the vCPUs.
638 */
639 static size_t tcg_n_regions(void)
640 {
641 size_t i;
642
643 /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645 MachineState *ms = MACHINE(qdev_get_machine());
646 unsigned int max_cpus = ms->smp.max_cpus;
647 #endif
648 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
649 return 1;
650 }
651
652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653 for (i = 8; i > 0; i--) {
654 size_t regions_per_thread = i;
655 size_t region_size;
656
657 region_size = tcg_init_ctx.code_gen_buffer_size;
658 region_size /= max_cpus * regions_per_thread;
659
660 if (region_size >= 2 * 1024u * 1024) {
661 return max_cpus * regions_per_thread;
662 }
663 }
664 /* If we can't, then just allocate one region per vCPU thread */
665 return max_cpus;
666 }
667 #endif
668
669 /*
670 * Initializes region partitioning.
671 *
672 * Called at init time from the parent thread (i.e. the one calling
673 * tcg_context_init), after the target's TCG globals have been set.
674 *
675 * Region partitioning works by splitting code_gen_buffer into separate regions,
676 * and then assigning regions to TCG threads so that the threads can translate
677 * code in parallel without synchronization.
678 *
679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682 * must have been parsed before calling this function, since it calls
683 * qemu_tcg_mttcg_enabled().
684 *
685 * In user-mode we use a single region. Having multiple regions in user-mode
686 * is not supported, because the number of vCPU threads (recall that each thread
687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688 * OS, and usually this number is huge (tens of thousands is not uncommon).
689 * Thus, given this large bound on the number of vCPU threads and the fact
690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691 * that the availability of at least one region per vCPU thread.
692 *
693 * However, this user-mode limitation is unlikely to be a significant problem
694 * in practice. Multi-threaded guests share most if not all of their translated
695 * code, which makes parallel code generation less appealing than in softmmu.
696 */
697 void tcg_region_init(void)
698 {
699 void *buf = tcg_init_ctx.code_gen_buffer;
700 void *aligned;
701 size_t size = tcg_init_ctx.code_gen_buffer_size;
702 size_t page_size = qemu_real_host_page_size;
703 size_t region_size;
704 size_t n_regions;
705 size_t i;
706 uintptr_t splitwx_diff;
707
708 n_regions = tcg_n_regions();
709
710 /* The first region will be 'aligned - buf' bytes larger than the others */
711 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
712 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
713 /*
714 * Make region_size a multiple of page_size, using aligned as the start.
715 * As a result of this we might end up with a few extra pages at the end of
716 * the buffer; we will assign those to the last region.
717 */
718 region_size = (size - (aligned - buf)) / n_regions;
719 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
720
721 /* A region must have at least 2 pages; one code, one guard */
722 g_assert(region_size >= 2 * page_size);
723
724 /* init the region struct */
725 qemu_mutex_init(&region.lock);
726 region.n = n_regions;
727 region.size = region_size - page_size;
728 region.stride = region_size;
729 region.start = buf;
730 region.start_aligned = aligned;
731 /* page-align the end, since its last page will be a guard page */
732 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
733 /* account for that last guard page */
734 region.end -= page_size;
735
736 /* set guard pages */
737 splitwx_diff = tcg_splitwx_diff;
738 for (i = 0; i < region.n; i++) {
739 void *start, *end;
740 int rc;
741
742 tcg_region_bounds(i, &start, &end);
743 rc = qemu_mprotect_none(end, page_size);
744 g_assert(!rc);
745 if (splitwx_diff) {
746 rc = qemu_mprotect_none(end + splitwx_diff, page_size);
747 g_assert(!rc);
748 }
749 }
750
751 tcg_region_trees_init();
752
753 /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
755 {
756 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
757
758 g_assert(!err);
759 }
760 #endif
761 }
762
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw)
765 {
766 /* Pass NULL pointers unchanged. */
767 if (rw) {
768 g_assert(in_code_gen_buffer(rw));
769 rw += tcg_splitwx_diff;
770 }
771 return rw;
772 }
773
774 void *tcg_splitwx_to_rw(const void *rx)
775 {
776 /* Pass NULL pointers unchanged. */
777 if (rx) {
778 rx -= tcg_splitwx_diff;
779 /* Assert that we end with a pointer in the rw region. */
780 g_assert(in_code_gen_buffer(rx));
781 }
782 return (void *)rx;
783 }
784 #endif /* CONFIG_DEBUG_TCG */
785
786 static void alloc_tcg_plugin_context(TCGContext *s)
787 {
788 #ifdef CONFIG_PLUGIN
789 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
790 s->plugin_tb->insns =
791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
792 #endif
793 }
794
795 /*
796 * All TCG threads except the parent (i.e. the one that called tcg_context_init
797 * and registered the target's TCG globals) must register with this function
798 * before initiating translation.
799 *
800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801 * of tcg_region_init() for the reasoning behind this.
802 *
803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805 * is not used anymore for translation once this function is called.
806 *
807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
809 */
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
812 {
813 tcg_ctx = &tcg_init_ctx;
814 }
815 #else
816 void tcg_register_thread(void)
817 {
818 MachineState *ms = MACHINE(qdev_get_machine());
819 TCGContext *s = g_malloc(sizeof(*s));
820 unsigned int i, n;
821 bool err;
822
823 *s = tcg_init_ctx;
824
825 /* Relink mem_base. */
826 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
827 if (tcg_init_ctx.temps[i].mem_base) {
828 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
829 tcg_debug_assert(b >= 0 && b < n);
830 s->temps[i].mem_base = &s->temps[b];
831 }
832 }
833
834 /* Claim an entry in tcg_ctxs */
835 n = qatomic_fetch_inc(&n_tcg_ctxs);
836 g_assert(n < ms->smp.max_cpus);
837 qatomic_set(&tcg_ctxs[n], s);
838
839 if (n > 0) {
840 alloc_tcg_plugin_context(s);
841 }
842
843 tcg_ctx = s;
844 qemu_mutex_lock(&region.lock);
845 err = tcg_region_initial_alloc__locked(tcg_ctx);
846 g_assert(!err);
847 qemu_mutex_unlock(&region.lock);
848 }
849 #endif /* !CONFIG_USER_ONLY */
850
851 /*
852 * Returns the size (in bytes) of all translated code (i.e. from all regions)
853 * currently in the cache.
854 * See also: tcg_code_capacity()
855 * Do not confuse with tcg_current_code_size(); that one applies to a single
856 * TCG context.
857 */
858 size_t tcg_code_size(void)
859 {
860 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
861 unsigned int i;
862 size_t total;
863
864 qemu_mutex_lock(&region.lock);
865 total = region.agg_size_full;
866 for (i = 0; i < n_ctxs; i++) {
867 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
868 size_t size;
869
870 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
871 g_assert(size <= s->code_gen_buffer_size);
872 total += size;
873 }
874 qemu_mutex_unlock(&region.lock);
875 return total;
876 }
877
878 /*
879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
880 * regions.
881 * See also: tcg_code_size()
882 */
883 size_t tcg_code_capacity(void)
884 {
885 size_t guard_size, capacity;
886
887 /* no need for synchronization; these variables are set at init time */
888 guard_size = region.stride - region.size;
889 capacity = region.end + guard_size - region.start;
890 capacity -= region.n * (guard_size + TCG_HIGHWATER);
891 return capacity;
892 }
893
894 size_t tcg_tb_phys_invalidate_count(void)
895 {
896 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
897 unsigned int i;
898 size_t total = 0;
899
900 for (i = 0; i < n_ctxs; i++) {
901 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
902
903 total += qatomic_read(&s->tb_phys_invalidate_count);
904 }
905 return total;
906 }
907
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext *s, int size)
910 {
911 TCGPool *p;
912 int pool_size;
913
914 if (size > TCG_POOL_CHUNK_SIZE) {
915 /* big malloc: insert a new pool (XXX: could optimize) */
916 p = g_malloc(sizeof(TCGPool) + size);
917 p->size = size;
918 p->next = s->pool_first_large;
919 s->pool_first_large = p;
920 return p->data;
921 } else {
922 p = s->pool_current;
923 if (!p) {
924 p = s->pool_first;
925 if (!p)
926 goto new_pool;
927 } else {
928 if (!p->next) {
929 new_pool:
930 pool_size = TCG_POOL_CHUNK_SIZE;
931 p = g_malloc(sizeof(TCGPool) + pool_size);
932 p->size = pool_size;
933 p->next = NULL;
934 if (s->pool_current)
935 s->pool_current->next = p;
936 else
937 s->pool_first = p;
938 } else {
939 p = p->next;
940 }
941 }
942 }
943 s->pool_current = p;
944 s->pool_cur = p->data + size;
945 s->pool_end = p->data + p->size;
946 return p->data;
947 }
948
949 void tcg_pool_reset(TCGContext *s)
950 {
951 TCGPool *p, *t;
952 for (p = s->pool_first_large; p; p = t) {
953 t = p->next;
954 g_free(p);
955 }
956 s->pool_first_large = NULL;
957 s->pool_cur = s->pool_end = NULL;
958 s->pool_current = NULL;
959 }
960
961 typedef struct TCGHelperInfo {
962 void *func;
963 const char *name;
964 unsigned flags;
965 unsigned sizemask;
966 } TCGHelperInfo;
967
968 #include "exec/helper-proto.h"
969
970 static const TCGHelperInfo all_helpers[] = {
971 #include "exec/helper-tcg.h"
972 };
973 static GHashTable *helper_table;
974
975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
976 static void process_op_defs(TCGContext *s);
977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
978 TCGReg reg, const char *name);
979
980 void tcg_context_init(TCGContext *s)
981 {
982 int op, total_args, n, i;
983 TCGOpDef *def;
984 TCGArgConstraint *args_ct;
985 TCGTemp *ts;
986
987 memset(s, 0, sizeof(*s));
988 s->nb_globals = 0;
989
990 /* Count total number of arguments and allocate the corresponding
991 space */
992 total_args = 0;
993 for(op = 0; op < NB_OPS; op++) {
994 def = &tcg_op_defs[op];
995 n = def->nb_iargs + def->nb_oargs;
996 total_args += n;
997 }
998
999 args_ct = g_new0(TCGArgConstraint, total_args);
1000
1001 for(op = 0; op < NB_OPS; op++) {
1002 def = &tcg_op_defs[op];
1003 def->args_ct = args_ct;
1004 n = def->nb_iargs + def->nb_oargs;
1005 args_ct += n;
1006 }
1007
1008 /* Register helpers. */
1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1010 helper_table = g_hash_table_new(NULL, NULL);
1011
1012 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1013 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1014 (gpointer)&all_helpers[i]);
1015 }
1016
1017 tcg_target_init(s);
1018 process_op_defs(s);
1019
1020 /* Reverse the order of the saved registers, assuming they're all at
1021 the start of tcg_target_reg_alloc_order. */
1022 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1023 int r = tcg_target_reg_alloc_order[n];
1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1025 break;
1026 }
1027 }
1028 for (i = 0; i < n; ++i) {
1029 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1030 }
1031 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1032 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1033 }
1034
1035 alloc_tcg_plugin_context(s);
1036
1037 tcg_ctx = s;
1038 /*
1039 * In user-mode we simply share the init context among threads, since we
1040 * use a single region. See the documentation tcg_region_init() for the
1041 * reasoning behind this.
1042 * In softmmu we will have at most max_cpus TCG threads.
1043 */
1044 #ifdef CONFIG_USER_ONLY
1045 tcg_ctxs = &tcg_ctx;
1046 n_tcg_ctxs = 1;
1047 #else
1048 MachineState *ms = MACHINE(qdev_get_machine());
1049 unsigned int max_cpus = ms->smp.max_cpus;
1050 tcg_ctxs = g_new(TCGContext *, max_cpus);
1051 #endif
1052
1053 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1054 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1055 cpu_env = temp_tcgv_ptr(ts);
1056 }
1057
1058 /*
1059 * Allocate TBs right before their corresponding translated code, making
1060 * sure that TBs and code are on different cache lines.
1061 */
1062 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1063 {
1064 uintptr_t align = qemu_icache_linesize;
1065 TranslationBlock *tb;
1066 void *next;
1067
1068 retry:
1069 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1070 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1071
1072 if (unlikely(next > s->code_gen_highwater)) {
1073 if (tcg_region_alloc(s)) {
1074 return NULL;
1075 }
1076 goto retry;
1077 }
1078 qatomic_set(&s->code_gen_ptr, next);
1079 s->data_gen_ptr = NULL;
1080 return tb;
1081 }
1082
1083 void tcg_prologue_init(TCGContext *s)
1084 {
1085 size_t prologue_size, total_size;
1086 void *buf0, *buf1;
1087
1088 /* Put the prologue at the beginning of code_gen_buffer. */
1089 buf0 = s->code_gen_buffer;
1090 total_size = s->code_gen_buffer_size;
1091 s->code_ptr = buf0;
1092 s->code_buf = buf0;
1093 s->data_gen_ptr = NULL;
1094
1095 /*
1096 * The region trees are not yet configured, but tcg_splitwx_to_rx
1097 * needs the bounds for an assert.
1098 */
1099 region.start = buf0;
1100 region.end = buf0 + total_size;
1101
1102 #ifndef CONFIG_TCG_INTERPRETER
1103 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1104 #endif
1105
1106 /* Compute a high-water mark, at which we voluntarily flush the buffer
1107 and start over. The size here is arbitrary, significantly larger
1108 than we expect the code generation for any one opcode to require. */
1109 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1110
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112 s->pool_labels = NULL;
1113 #endif
1114
1115 /* Generate the prologue. */
1116 tcg_target_qemu_prologue(s);
1117
1118 #ifdef TCG_TARGET_NEED_POOL_LABELS
1119 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1120 {
1121 int result = tcg_out_pool_finalize(s);
1122 tcg_debug_assert(result == 0);
1123 }
1124 #endif
1125
1126 buf1 = s->code_ptr;
1127 #ifndef CONFIG_TCG_INTERPRETER
1128 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1129 tcg_ptr_byte_diff(buf1, buf0));
1130 #endif
1131
1132 /* Deduct the prologue from the buffer. */
1133 prologue_size = tcg_current_code_size(s);
1134 s->code_gen_ptr = buf1;
1135 s->code_gen_buffer = buf1;
1136 s->code_buf = buf1;
1137 total_size -= prologue_size;
1138 s->code_gen_buffer_size = total_size;
1139
1140 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1141
1142 #ifdef DEBUG_DISAS
1143 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1144 FILE *logfile = qemu_log_lock();
1145 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1146 if (s->data_gen_ptr) {
1147 size_t code_size = s->data_gen_ptr - buf0;
1148 size_t data_size = prologue_size - code_size;
1149 size_t i;
1150
1151 log_disas(buf0, code_size);
1152
1153 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1154 if (sizeof(tcg_target_ulong) == 8) {
1155 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1156 (uintptr_t)s->data_gen_ptr + i,
1157 *(uint64_t *)(s->data_gen_ptr + i));
1158 } else {
1159 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1160 (uintptr_t)s->data_gen_ptr + i,
1161 *(uint32_t *)(s->data_gen_ptr + i));
1162 }
1163 }
1164 } else {
1165 log_disas(buf0, prologue_size);
1166 }
1167 qemu_log("\n");
1168 qemu_log_flush();
1169 qemu_log_unlock(logfile);
1170 }
1171 #endif
1172
1173 /* Assert that goto_ptr is implemented completely. */
1174 if (TCG_TARGET_HAS_goto_ptr) {
1175 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1176 }
1177 }
1178
1179 void tcg_func_start(TCGContext *s)
1180 {
1181 tcg_pool_reset(s);
1182 s->nb_temps = s->nb_globals;
1183
1184 /* No temps have been previously allocated for size or locality. */
1185 memset(s->free_temps, 0, sizeof(s->free_temps));
1186
1187 s->nb_ops = 0;
1188 s->nb_labels = 0;
1189 s->current_frame_offset = s->frame_start;
1190
1191 #ifdef CONFIG_DEBUG_TCG
1192 s->goto_tb_issue_mask = 0;
1193 #endif
1194
1195 QTAILQ_INIT(&s->ops);
1196 QTAILQ_INIT(&s->free_ops);
1197 QSIMPLEQ_INIT(&s->labels);
1198 }
1199
1200 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1201 {
1202 int n = s->nb_temps++;
1203 tcg_debug_assert(n < TCG_MAX_TEMPS);
1204 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1205 }
1206
1207 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1208 {
1209 TCGTemp *ts;
1210
1211 tcg_debug_assert(s->nb_globals == s->nb_temps);
1212 s->nb_globals++;
1213 ts = tcg_temp_alloc(s);
1214 ts->temp_global = 1;
1215
1216 return ts;
1217 }
1218
1219 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1220 TCGReg reg, const char *name)
1221 {
1222 TCGTemp *ts;
1223
1224 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1225 tcg_abort();
1226 }
1227
1228 ts = tcg_global_alloc(s);
1229 ts->base_type = type;
1230 ts->type = type;
1231 ts->fixed_reg = 1;
1232 ts->reg = reg;
1233 ts->name = name;
1234 tcg_regset_set_reg(s->reserved_regs, reg);
1235
1236 return ts;
1237 }
1238
1239 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1240 {
1241 s->frame_start = start;
1242 s->frame_end = start + size;
1243 s->frame_temp
1244 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1245 }
1246
1247 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1248 intptr_t offset, const char *name)
1249 {
1250 TCGContext *s = tcg_ctx;
1251 TCGTemp *base_ts = tcgv_ptr_temp(base);
1252 TCGTemp *ts = tcg_global_alloc(s);
1253 int indirect_reg = 0, bigendian = 0;
1254 #ifdef HOST_WORDS_BIGENDIAN
1255 bigendian = 1;
1256 #endif
1257
1258 if (!base_ts->fixed_reg) {
1259 /* We do not support double-indirect registers. */
1260 tcg_debug_assert(!base_ts->indirect_reg);
1261 base_ts->indirect_base = 1;
1262 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1263 ? 2 : 1);
1264 indirect_reg = 1;
1265 }
1266
1267 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1268 TCGTemp *ts2 = tcg_global_alloc(s);
1269 char buf[64];
1270
1271 ts->base_type = TCG_TYPE_I64;
1272 ts->type = TCG_TYPE_I32;
1273 ts->indirect_reg = indirect_reg;
1274 ts->mem_allocated = 1;
1275 ts->mem_base = base_ts;
1276 ts->mem_offset = offset + bigendian * 4;
1277 pstrcpy(buf, sizeof(buf), name);
1278 pstrcat(buf, sizeof(buf), "_0");
1279 ts->name = strdup(buf);
1280
1281 tcg_debug_assert(ts2 == ts + 1);
1282 ts2->base_type = TCG_TYPE_I64;
1283 ts2->type = TCG_TYPE_I32;
1284 ts2->indirect_reg = indirect_reg;
1285 ts2->mem_allocated = 1;
1286 ts2->mem_base = base_ts;
1287 ts2->mem_offset = offset + (1 - bigendian) * 4;
1288 pstrcpy(buf, sizeof(buf), name);
1289 pstrcat(buf, sizeof(buf), "_1");
1290 ts2->name = strdup(buf);
1291 } else {
1292 ts->base_type = type;
1293 ts->type = type;
1294 ts->indirect_reg = indirect_reg;
1295 ts->mem_allocated = 1;
1296 ts->mem_base = base_ts;
1297 ts->mem_offset = offset;
1298 ts->name = name;
1299 }
1300 return ts;
1301 }
1302
1303 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1304 {
1305 TCGContext *s = tcg_ctx;
1306 TCGTemp *ts;
1307 int idx, k;
1308
1309 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1310 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1311 if (idx < TCG_MAX_TEMPS) {
1312 /* There is already an available temp with the right type. */
1313 clear_bit(idx, s->free_temps[k].l);
1314
1315 ts = &s->temps[idx];
1316 ts->temp_allocated = 1;
1317 tcg_debug_assert(ts->base_type == type);
1318 tcg_debug_assert(ts->temp_local == temp_local);
1319 } else {
1320 ts = tcg_temp_alloc(s);
1321 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1322 TCGTemp *ts2 = tcg_temp_alloc(s);
1323
1324 ts->base_type = type;
1325 ts->type = TCG_TYPE_I32;
1326 ts->temp_allocated = 1;
1327 ts->temp_local = temp_local;
1328
1329 tcg_debug_assert(ts2 == ts + 1);
1330 ts2->base_type = TCG_TYPE_I64;
1331 ts2->type = TCG_TYPE_I32;
1332 ts2->temp_allocated = 1;
1333 ts2->temp_local = temp_local;
1334 } else {
1335 ts->base_type = type;
1336 ts->type = type;
1337 ts->temp_allocated = 1;
1338 ts->temp_local = temp_local;
1339 }
1340 }
1341
1342 #if defined(CONFIG_DEBUG_TCG)
1343 s->temps_in_use++;
1344 #endif
1345 return ts;
1346 }
1347
1348 TCGv_vec tcg_temp_new_vec(TCGType type)
1349 {
1350 TCGTemp *t;
1351
1352 #ifdef CONFIG_DEBUG_TCG
1353 switch (type) {
1354 case TCG_TYPE_V64:
1355 assert(TCG_TARGET_HAS_v64);
1356 break;
1357 case TCG_TYPE_V128:
1358 assert(TCG_TARGET_HAS_v128);
1359 break;
1360 case TCG_TYPE_V256:
1361 assert(TCG_TARGET_HAS_v256);
1362 break;
1363 default:
1364 g_assert_not_reached();
1365 }
1366 #endif
1367
1368 t = tcg_temp_new_internal(type, 0);
1369 return temp_tcgv_vec(t);
1370 }
1371
1372 /* Create a new temp of the same type as an existing temp. */
1373 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1374 {
1375 TCGTemp *t = tcgv_vec_temp(match);
1376
1377 tcg_debug_assert(t->temp_allocated != 0);
1378
1379 t = tcg_temp_new_internal(t->base_type, 0);
1380 return temp_tcgv_vec(t);
1381 }
1382
1383 void tcg_temp_free_internal(TCGTemp *ts)
1384 {
1385 TCGContext *s = tcg_ctx;
1386 int k, idx;
1387
1388 #if defined(CONFIG_DEBUG_TCG)
1389 s->temps_in_use--;
1390 if (s->temps_in_use < 0) {
1391 fprintf(stderr, "More temporaries freed than allocated!\n");
1392 }
1393 #endif
1394
1395 tcg_debug_assert(ts->temp_global == 0);
1396 tcg_debug_assert(ts->temp_allocated != 0);
1397 ts->temp_allocated = 0;
1398
1399 idx = temp_idx(ts);
1400 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1401 set_bit(idx, s->free_temps[k].l);
1402 }
1403
1404 TCGv_i32 tcg_const_i32(int32_t val)
1405 {
1406 TCGv_i32 t0;
1407 t0 = tcg_temp_new_i32();
1408 tcg_gen_movi_i32(t0, val);
1409 return t0;
1410 }
1411
1412 TCGv_i64 tcg_const_i64(int64_t val)
1413 {
1414 TCGv_i64 t0;
1415 t0 = tcg_temp_new_i64();
1416 tcg_gen_movi_i64(t0, val);
1417 return t0;
1418 }
1419
1420 TCGv_i32 tcg_const_local_i32(int32_t val)
1421 {
1422 TCGv_i32 t0;
1423 t0 = tcg_temp_local_new_i32();
1424 tcg_gen_movi_i32(t0, val);
1425 return t0;
1426 }
1427
1428 TCGv_i64 tcg_const_local_i64(int64_t val)
1429 {
1430 TCGv_i64 t0;
1431 t0 = tcg_temp_local_new_i64();
1432 tcg_gen_movi_i64(t0, val);
1433 return t0;
1434 }
1435
1436 #if defined(CONFIG_DEBUG_TCG)
1437 void tcg_clear_temp_count(void)
1438 {
1439 TCGContext *s = tcg_ctx;
1440 s->temps_in_use = 0;
1441 }
1442
1443 int tcg_check_temp_count(void)
1444 {
1445 TCGContext *s = tcg_ctx;
1446 if (s->temps_in_use) {
1447 /* Clear the count so that we don't give another
1448 * warning immediately next time around.
1449 */
1450 s->temps_in_use = 0;
1451 return 1;
1452 }
1453 return 0;
1454 }
1455 #endif
1456
1457 /* Return true if OP may appear in the opcode stream.
1458 Test the runtime variable that controls each opcode. */
1459 bool tcg_op_supported(TCGOpcode op)
1460 {
1461 const bool have_vec
1462 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1463
1464 switch (op) {
1465 case INDEX_op_discard:
1466 case INDEX_op_set_label:
1467 case INDEX_op_call:
1468 case INDEX_op_br:
1469 case INDEX_op_mb:
1470 case INDEX_op_insn_start:
1471 case INDEX_op_exit_tb:
1472 case INDEX_op_goto_tb:
1473 case INDEX_op_qemu_ld_i32:
1474 case INDEX_op_qemu_st_i32:
1475 case INDEX_op_qemu_ld_i64:
1476 case INDEX_op_qemu_st_i64:
1477 return true;
1478
1479 case INDEX_op_qemu_st8_i32:
1480 return TCG_TARGET_HAS_qemu_st8_i32;
1481
1482 case INDEX_op_goto_ptr:
1483 return TCG_TARGET_HAS_goto_ptr;
1484
1485 case INDEX_op_mov_i32:
1486 case INDEX_op_movi_i32:
1487 case INDEX_op_setcond_i32:
1488 case INDEX_op_brcond_i32:
1489 case INDEX_op_ld8u_i32:
1490 case INDEX_op_ld8s_i32:
1491 case INDEX_op_ld16u_i32:
1492 case INDEX_op_ld16s_i32:
1493 case INDEX_op_ld_i32:
1494 case INDEX_op_st8_i32:
1495 case INDEX_op_st16_i32:
1496 case INDEX_op_st_i32:
1497 case INDEX_op_add_i32:
1498 case INDEX_op_sub_i32:
1499 case INDEX_op_mul_i32:
1500 case INDEX_op_and_i32:
1501 case INDEX_op_or_i32:
1502 case INDEX_op_xor_i32:
1503 case INDEX_op_shl_i32:
1504 case INDEX_op_shr_i32:
1505 case INDEX_op_sar_i32:
1506 return true;
1507
1508 case INDEX_op_movcond_i32:
1509 return TCG_TARGET_HAS_movcond_i32;
1510 case INDEX_op_div_i32:
1511 case INDEX_op_divu_i32:
1512 return TCG_TARGET_HAS_div_i32;
1513 case INDEX_op_rem_i32:
1514 case INDEX_op_remu_i32:
1515 return TCG_TARGET_HAS_rem_i32;
1516 case INDEX_op_div2_i32:
1517 case INDEX_op_divu2_i32:
1518 return TCG_TARGET_HAS_div2_i32;
1519 case INDEX_op_rotl_i32:
1520 case INDEX_op_rotr_i32:
1521 return TCG_TARGET_HAS_rot_i32;
1522 case INDEX_op_deposit_i32:
1523 return TCG_TARGET_HAS_deposit_i32;
1524 case INDEX_op_extract_i32:
1525 return TCG_TARGET_HAS_extract_i32;
1526 case INDEX_op_sextract_i32:
1527 return TCG_TARGET_HAS_sextract_i32;
1528 case INDEX_op_extract2_i32:
1529 return TCG_TARGET_HAS_extract2_i32;
1530 case INDEX_op_add2_i32:
1531 return TCG_TARGET_HAS_add2_i32;
1532 case INDEX_op_sub2_i32:
1533 return TCG_TARGET_HAS_sub2_i32;
1534 case INDEX_op_mulu2_i32:
1535 return TCG_TARGET_HAS_mulu2_i32;
1536 case INDEX_op_muls2_i32:
1537 return TCG_TARGET_HAS_muls2_i32;
1538 case INDEX_op_muluh_i32:
1539 return TCG_TARGET_HAS_muluh_i32;
1540 case INDEX_op_mulsh_i32:
1541 return TCG_TARGET_HAS_mulsh_i32;
1542 case INDEX_op_ext8s_i32:
1543 return TCG_TARGET_HAS_ext8s_i32;
1544 case INDEX_op_ext16s_i32:
1545 return TCG_TARGET_HAS_ext16s_i32;
1546 case INDEX_op_ext8u_i32:
1547 return TCG_TARGET_HAS_ext8u_i32;
1548 case INDEX_op_ext16u_i32:
1549 return TCG_TARGET_HAS_ext16u_i32;
1550 case INDEX_op_bswap16_i32:
1551 return TCG_TARGET_HAS_bswap16_i32;
1552 case INDEX_op_bswap32_i32:
1553 return TCG_TARGET_HAS_bswap32_i32;
1554 case INDEX_op_not_i32:
1555 return TCG_TARGET_HAS_not_i32;
1556 case INDEX_op_neg_i32:
1557 return TCG_TARGET_HAS_neg_i32;
1558 case INDEX_op_andc_i32:
1559 return TCG_TARGET_HAS_andc_i32;
1560 case INDEX_op_orc_i32:
1561 return TCG_TARGET_HAS_orc_i32;
1562 case INDEX_op_eqv_i32:
1563 return TCG_TARGET_HAS_eqv_i32;
1564 case INDEX_op_nand_i32:
1565 return TCG_TARGET_HAS_nand_i32;
1566 case INDEX_op_nor_i32:
1567 return TCG_TARGET_HAS_nor_i32;
1568 case INDEX_op_clz_i32:
1569 return TCG_TARGET_HAS_clz_i32;
1570 case INDEX_op_ctz_i32:
1571 return TCG_TARGET_HAS_ctz_i32;
1572 case INDEX_op_ctpop_i32:
1573 return TCG_TARGET_HAS_ctpop_i32;
1574
1575 case INDEX_op_brcond2_i32:
1576 case INDEX_op_setcond2_i32:
1577 return TCG_TARGET_REG_BITS == 32;
1578
1579 case INDEX_op_mov_i64:
1580 case INDEX_op_movi_i64:
1581 case INDEX_op_setcond_i64:
1582 case INDEX_op_brcond_i64:
1583 case INDEX_op_ld8u_i64:
1584 case INDEX_op_ld8s_i64:
1585 case INDEX_op_ld16u_i64:
1586 case INDEX_op_ld16s_i64:
1587 case INDEX_op_ld32u_i64:
1588 case INDEX_op_ld32s_i64:
1589 case INDEX_op_ld_i64:
1590 case INDEX_op_st8_i64:
1591 case INDEX_op_st16_i64:
1592 case INDEX_op_st32_i64:
1593 case INDEX_op_st_i64:
1594 case INDEX_op_add_i64:
1595 case INDEX_op_sub_i64:
1596 case INDEX_op_mul_i64:
1597 case INDEX_op_and_i64:
1598 case INDEX_op_or_i64:
1599 case INDEX_op_xor_i64:
1600 case INDEX_op_shl_i64:
1601 case INDEX_op_shr_i64:
1602 case INDEX_op_sar_i64:
1603 case INDEX_op_ext_i32_i64:
1604 case INDEX_op_extu_i32_i64:
1605 return TCG_TARGET_REG_BITS == 64;
1606
1607 case INDEX_op_movcond_i64:
1608 return TCG_TARGET_HAS_movcond_i64;
1609 case INDEX_op_div_i64:
1610 case INDEX_op_divu_i64:
1611 return TCG_TARGET_HAS_div_i64;
1612 case INDEX_op_rem_i64:
1613 case INDEX_op_remu_i64:
1614 return TCG_TARGET_HAS_rem_i64;
1615 case INDEX_op_div2_i64:
1616 case INDEX_op_divu2_i64:
1617 return TCG_TARGET_HAS_div2_i64;
1618 case INDEX_op_rotl_i64:
1619 case INDEX_op_rotr_i64:
1620 return TCG_TARGET_HAS_rot_i64;
1621 case INDEX_op_deposit_i64:
1622 return TCG_TARGET_HAS_deposit_i64;
1623 case INDEX_op_extract_i64:
1624 return TCG_TARGET_HAS_extract_i64;
1625 case INDEX_op_sextract_i64:
1626 return TCG_TARGET_HAS_sextract_i64;
1627 case INDEX_op_extract2_i64:
1628 return TCG_TARGET_HAS_extract2_i64;
1629 case INDEX_op_extrl_i64_i32:
1630 return TCG_TARGET_HAS_extrl_i64_i32;
1631 case INDEX_op_extrh_i64_i32:
1632 return TCG_TARGET_HAS_extrh_i64_i32;
1633 case INDEX_op_ext8s_i64:
1634 return TCG_TARGET_HAS_ext8s_i64;
1635 case INDEX_op_ext16s_i64:
1636 return TCG_TARGET_HAS_ext16s_i64;
1637 case INDEX_op_ext32s_i64:
1638 return TCG_TARGET_HAS_ext32s_i64;
1639 case INDEX_op_ext8u_i64:
1640 return TCG_TARGET_HAS_ext8u_i64;
1641 case INDEX_op_ext16u_i64:
1642 return TCG_TARGET_HAS_ext16u_i64;
1643 case INDEX_op_ext32u_i64:
1644 return TCG_TARGET_HAS_ext32u_i64;
1645 case INDEX_op_bswap16_i64:
1646 return TCG_TARGET_HAS_bswap16_i64;
1647 case INDEX_op_bswap32_i64:
1648 return TCG_TARGET_HAS_bswap32_i64;
1649 case INDEX_op_bswap64_i64:
1650 return TCG_TARGET_HAS_bswap64_i64;
1651 case INDEX_op_not_i64:
1652 return TCG_TARGET_HAS_not_i64;
1653 case INDEX_op_neg_i64:
1654 return TCG_TARGET_HAS_neg_i64;
1655 case INDEX_op_andc_i64:
1656 return TCG_TARGET_HAS_andc_i64;
1657 case INDEX_op_orc_i64:
1658 return TCG_TARGET_HAS_orc_i64;
1659 case INDEX_op_eqv_i64:
1660 return TCG_TARGET_HAS_eqv_i64;
1661 case INDEX_op_nand_i64:
1662 return TCG_TARGET_HAS_nand_i64;
1663 case INDEX_op_nor_i64:
1664 return TCG_TARGET_HAS_nor_i64;
1665 case INDEX_op_clz_i64:
1666 return TCG_TARGET_HAS_clz_i64;
1667 case INDEX_op_ctz_i64:
1668 return TCG_TARGET_HAS_ctz_i64;
1669 case INDEX_op_ctpop_i64:
1670 return TCG_TARGET_HAS_ctpop_i64;
1671 case INDEX_op_add2_i64:
1672 return TCG_TARGET_HAS_add2_i64;
1673 case INDEX_op_sub2_i64:
1674 return TCG_TARGET_HAS_sub2_i64;
1675 case INDEX_op_mulu2_i64:
1676 return TCG_TARGET_HAS_mulu2_i64;
1677 case INDEX_op_muls2_i64:
1678 return TCG_TARGET_HAS_muls2_i64;
1679 case INDEX_op_muluh_i64:
1680 return TCG_TARGET_HAS_muluh_i64;
1681 case INDEX_op_mulsh_i64:
1682 return TCG_TARGET_HAS_mulsh_i64;
1683
1684 case INDEX_op_mov_vec:
1685 case INDEX_op_dup_vec:
1686 case INDEX_op_dupi_vec:
1687 case INDEX_op_dupm_vec:
1688 case INDEX_op_ld_vec:
1689 case INDEX_op_st_vec:
1690 case INDEX_op_add_vec:
1691 case INDEX_op_sub_vec:
1692 case INDEX_op_and_vec:
1693 case INDEX_op_or_vec:
1694 case INDEX_op_xor_vec:
1695 case INDEX_op_cmp_vec:
1696 return have_vec;
1697 case INDEX_op_dup2_vec:
1698 return have_vec && TCG_TARGET_REG_BITS == 32;
1699 case INDEX_op_not_vec:
1700 return have_vec && TCG_TARGET_HAS_not_vec;
1701 case INDEX_op_neg_vec:
1702 return have_vec && TCG_TARGET_HAS_neg_vec;
1703 case INDEX_op_abs_vec:
1704 return have_vec && TCG_TARGET_HAS_abs_vec;
1705 case INDEX_op_andc_vec:
1706 return have_vec && TCG_TARGET_HAS_andc_vec;
1707 case INDEX_op_orc_vec:
1708 return have_vec && TCG_TARGET_HAS_orc_vec;
1709 case INDEX_op_mul_vec:
1710 return have_vec && TCG_TARGET_HAS_mul_vec;
1711 case INDEX_op_shli_vec:
1712 case INDEX_op_shri_vec:
1713 case INDEX_op_sari_vec:
1714 return have_vec && TCG_TARGET_HAS_shi_vec;
1715 case INDEX_op_shls_vec:
1716 case INDEX_op_shrs_vec:
1717 case INDEX_op_sars_vec:
1718 return have_vec && TCG_TARGET_HAS_shs_vec;
1719 case INDEX_op_shlv_vec:
1720 case INDEX_op_shrv_vec:
1721 case INDEX_op_sarv_vec:
1722 return have_vec && TCG_TARGET_HAS_shv_vec;
1723 case INDEX_op_rotli_vec:
1724 return have_vec && TCG_TARGET_HAS_roti_vec;
1725 case INDEX_op_rotls_vec:
1726 return have_vec && TCG_TARGET_HAS_rots_vec;
1727 case INDEX_op_rotlv_vec:
1728 case INDEX_op_rotrv_vec:
1729 return have_vec && TCG_TARGET_HAS_rotv_vec;
1730 case INDEX_op_ssadd_vec:
1731 case INDEX_op_usadd_vec:
1732 case INDEX_op_sssub_vec:
1733 case INDEX_op_ussub_vec:
1734 return have_vec && TCG_TARGET_HAS_sat_vec;
1735 case INDEX_op_smin_vec:
1736 case INDEX_op_umin_vec:
1737 case INDEX_op_smax_vec:
1738 case INDEX_op_umax_vec:
1739 return have_vec && TCG_TARGET_HAS_minmax_vec;
1740 case INDEX_op_bitsel_vec:
1741 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1742 case INDEX_op_cmpsel_vec:
1743 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1744
1745 default:
1746 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1747 return true;
1748 }
1749 }
1750
1751 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1752 and endian swap. Maybe it would be better to do the alignment
1753 and endian swap in tcg_reg_alloc_call(). */
1754 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1755 {
1756 int i, real_args, nb_rets, pi;
1757 unsigned sizemask, flags;
1758 TCGHelperInfo *info;
1759 TCGOp *op;
1760
1761 info = g_hash_table_lookup(helper_table, (gpointer)func);
1762 flags = info->flags;
1763 sizemask = info->sizemask;
1764
1765 #ifdef CONFIG_PLUGIN
1766 /* detect non-plugin helpers */
1767 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1768 tcg_ctx->plugin_insn->calls_helpers = true;
1769 }
1770 #endif
1771
1772 #if defined(__sparc__) && !defined(__arch64__) \
1773 && !defined(CONFIG_TCG_INTERPRETER)
1774 /* We have 64-bit values in one register, but need to pass as two
1775 separate parameters. Split them. */
1776 int orig_sizemask = sizemask;
1777 int orig_nargs = nargs;
1778 TCGv_i64 retl, reth;
1779 TCGTemp *split_args[MAX_OPC_PARAM];
1780
1781 retl = NULL;
1782 reth = NULL;
1783 if (sizemask != 0) {
1784 for (i = real_args = 0; i < nargs; ++i) {
1785 int is_64bit = sizemask & (1 << (i+1)*2);
1786 if (is_64bit) {
1787 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1788 TCGv_i32 h = tcg_temp_new_i32();
1789 TCGv_i32 l = tcg_temp_new_i32();
1790 tcg_gen_extr_i64_i32(l, h, orig);
1791 split_args[real_args++] = tcgv_i32_temp(h);
1792 split_args[real_args++] = tcgv_i32_temp(l);
1793 } else {
1794 split_args[real_args++] = args[i];
1795 }
1796 }
1797 nargs = real_args;
1798 args = split_args;
1799 sizemask = 0;
1800 }
1801 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1802 for (i = 0; i < nargs; ++i) {
1803 int is_64bit = sizemask & (1 << (i+1)*2);
1804 int is_signed = sizemask & (2 << (i+1)*2);
1805 if (!is_64bit) {
1806 TCGv_i64 temp = tcg_temp_new_i64();
1807 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1808 if (is_signed) {
1809 tcg_gen_ext32s_i64(temp, orig);
1810 } else {
1811 tcg_gen_ext32u_i64(temp, orig);
1812 }
1813 args[i] = tcgv_i64_temp(temp);
1814 }
1815 }
1816 #endif /* TCG_TARGET_EXTEND_ARGS */
1817
1818 op = tcg_emit_op(INDEX_op_call);
1819
1820 pi = 0;
1821 if (ret != NULL) {
1822 #if defined(__sparc__) && !defined(__arch64__) \
1823 && !defined(CONFIG_TCG_INTERPRETER)
1824 if (orig_sizemask & 1) {
1825 /* The 32-bit ABI is going to return the 64-bit value in
1826 the %o0/%o1 register pair. Prepare for this by using
1827 two return temporaries, and reassemble below. */
1828 retl = tcg_temp_new_i64();
1829 reth = tcg_temp_new_i64();
1830 op->args[pi++] = tcgv_i64_arg(reth);
1831 op->args[pi++] = tcgv_i64_arg(retl);
1832 nb_rets = 2;
1833 } else {
1834 op->args[pi++] = temp_arg(ret);
1835 nb_rets = 1;
1836 }
1837 #else
1838 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1839 #ifdef HOST_WORDS_BIGENDIAN
1840 op->args[pi++] = temp_arg(ret + 1);
1841 op->args[pi++] = temp_arg(ret);
1842 #else
1843 op->args[pi++] = temp_arg(ret);
1844 op->args[pi++] = temp_arg(ret + 1);
1845 #endif
1846 nb_rets = 2;
1847 } else {
1848 op->args[pi++] = temp_arg(ret);
1849 nb_rets = 1;
1850 }
1851 #endif
1852 } else {
1853 nb_rets = 0;
1854 }
1855 TCGOP_CALLO(op) = nb_rets;
1856
1857 real_args = 0;
1858 for (i = 0; i < nargs; i++) {
1859 int is_64bit = sizemask & (1 << (i+1)*2);
1860 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1861 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1862 /* some targets want aligned 64 bit args */
1863 if (real_args & 1) {
1864 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1865 real_args++;
1866 }
1867 #endif
1868 /* If stack grows up, then we will be placing successive
1869 arguments at lower addresses, which means we need to
1870 reverse the order compared to how we would normally
1871 treat either big or little-endian. For those arguments
1872 that will wind up in registers, this still works for
1873 HPPA (the only current STACK_GROWSUP target) since the
1874 argument registers are *also* allocated in decreasing
1875 order. If another such target is added, this logic may
1876 have to get more complicated to differentiate between
1877 stack arguments and register arguments. */
1878 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1879 op->args[pi++] = temp_arg(args[i] + 1);
1880 op->args[pi++] = temp_arg(args[i]);
1881 #else
1882 op->args[pi++] = temp_arg(args[i]);
1883 op->args[pi++] = temp_arg(args[i] + 1);
1884 #endif
1885 real_args += 2;
1886 continue;
1887 }
1888
1889 op->args[pi++] = temp_arg(args[i]);
1890 real_args++;
1891 }
1892 op->args[pi++] = (uintptr_t)func;
1893 op->args[pi++] = flags;
1894 TCGOP_CALLI(op) = real_args;
1895
1896 /* Make sure the fields didn't overflow. */
1897 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1898 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1899
1900 #if defined(__sparc__) && !defined(__arch64__) \
1901 && !defined(CONFIG_TCG_INTERPRETER)
1902 /* Free all of the parts we allocated above. */
1903 for (i = real_args = 0; i < orig_nargs; ++i) {
1904 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1905 if (is_64bit) {
1906 tcg_temp_free_internal(args[real_args++]);
1907 tcg_temp_free_internal(args[real_args++]);
1908 } else {
1909 real_args++;
1910 }
1911 }
1912 if (orig_sizemask & 1) {
1913 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1914 Note that describing these as TCGv_i64 eliminates an unnecessary
1915 zero-extension that tcg_gen_concat_i32_i64 would create. */
1916 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1917 tcg_temp_free_i64(retl);
1918 tcg_temp_free_i64(reth);
1919 }
1920 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1921 for (i = 0; i < nargs; ++i) {
1922 int is_64bit = sizemask & (1 << (i+1)*2);
1923 if (!is_64bit) {
1924 tcg_temp_free_internal(args[i]);
1925 }
1926 }
1927 #endif /* TCG_TARGET_EXTEND_ARGS */
1928 }
1929
1930 static void tcg_reg_alloc_start(TCGContext *s)
1931 {
1932 int i, n;
1933 TCGTemp *ts;
1934
1935 for (i = 0, n = s->nb_globals; i < n; i++) {
1936 ts = &s->temps[i];
1937 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1938 }
1939 for (n = s->nb_temps; i < n; i++) {
1940 ts = &s->temps[i];
1941 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1942 ts->mem_allocated = 0;
1943 ts->fixed_reg = 0;
1944 }
1945
1946 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1947 }
1948
1949 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1950 TCGTemp *ts)
1951 {
1952 int idx = temp_idx(ts);
1953
1954 if (ts->temp_global) {
1955 pstrcpy(buf, buf_size, ts->name);
1956 } else if (ts->temp_local) {
1957 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1958 } else {
1959 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1960 }
1961 return buf;
1962 }
1963
1964 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1965 int buf_size, TCGArg arg)
1966 {
1967 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1968 }
1969
1970 /* Find helper name. */
1971 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1972 {
1973 const char *ret = NULL;
1974 if (helper_table) {
1975 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1976 if (info) {
1977 ret = info->name;
1978 }
1979 }
1980 return ret;
1981 }
1982
1983 static const char * const cond_name[] =
1984 {
1985 [TCG_COND_NEVER] = "never",
1986 [TCG_COND_ALWAYS] = "always",
1987 [TCG_COND_EQ] = "eq",
1988 [TCG_COND_NE] = "ne",
1989 [TCG_COND_LT] = "lt",
1990 [TCG_COND_GE] = "ge",
1991 [TCG_COND_LE] = "le",
1992 [TCG_COND_GT] = "gt",
1993 [TCG_COND_LTU] = "ltu",
1994 [TCG_COND_GEU] = "geu",
1995 [TCG_COND_LEU] = "leu",
1996 [TCG_COND_GTU] = "gtu"
1997 };
1998
1999 static const char * const ldst_name[] =
2000 {
2001 [MO_UB] = "ub",
2002 [MO_SB] = "sb",
2003 [MO_LEUW] = "leuw",
2004 [MO_LESW] = "lesw",
2005 [MO_LEUL] = "leul",
2006 [MO_LESL] = "lesl",
2007 [MO_LEQ] = "leq",
2008 [MO_BEUW] = "beuw",
2009 [MO_BESW] = "besw",
2010 [MO_BEUL] = "beul",
2011 [MO_BESL] = "besl",
2012 [MO_BEQ] = "beq",
2013 };
2014
2015 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2016 #ifdef TARGET_ALIGNED_ONLY
2017 [MO_UNALN >> MO_ASHIFT] = "un+",
2018 [MO_ALIGN >> MO_ASHIFT] = "",
2019 #else
2020 [MO_UNALN >> MO_ASHIFT] = "",
2021 [MO_ALIGN >> MO_ASHIFT] = "al+",
2022 #endif
2023 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2024 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2025 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2026 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2027 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2028 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2029 };
2030
2031 static inline bool tcg_regset_single(TCGRegSet d)
2032 {
2033 return (d & (d - 1)) == 0;
2034 }
2035
2036 static inline TCGReg tcg_regset_first(TCGRegSet d)
2037 {
2038 if (TCG_TARGET_NB_REGS <= 32) {
2039 return ctz32(d);
2040 } else {
2041 return ctz64(d);
2042 }
2043 }
2044
2045 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2046 {
2047 char buf[128];
2048 TCGOp *op;
2049
2050 QTAILQ_FOREACH(op, &s->ops, link) {
2051 int i, k, nb_oargs, nb_iargs, nb_cargs;
2052 const TCGOpDef *def;
2053 TCGOpcode c;
2054 int col = 0;
2055
2056 c = op->opc;
2057 def = &tcg_op_defs[c];
2058
2059 if (c == INDEX_op_insn_start) {
2060 nb_oargs = 0;
2061 col += qemu_log("\n ----");
2062
2063 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2064 target_ulong a;
2065 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2066 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2067 #else
2068 a = op->args[i];
2069 #endif
2070 col += qemu_log(" " TARGET_FMT_lx, a);
2071 }
2072 } else if (c == INDEX_op_call) {
2073 /* variable number of arguments */
2074 nb_oargs = TCGOP_CALLO(op);
2075 nb_iargs = TCGOP_CALLI(op);
2076 nb_cargs = def->nb_cargs;
2077
2078 /* function name, flags, out args */
2079 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2080 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2081 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2082 for (i = 0; i < nb_oargs; i++) {
2083 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2084 op->args[i]));
2085 }
2086 for (i = 0; i < nb_iargs; i++) {
2087 TCGArg arg = op->args[nb_oargs + i];
2088 const char *t = "<dummy>";
2089 if (arg != TCG_CALL_DUMMY_ARG) {
2090 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2091 }
2092 col += qemu_log(",%s", t);
2093 }
2094 } else {
2095 col += qemu_log(" %s ", def->name);
2096
2097 nb_oargs = def->nb_oargs;
2098 nb_iargs = def->nb_iargs;
2099 nb_cargs = def->nb_cargs;
2100
2101 if (def->flags & TCG_OPF_VECTOR) {
2102 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2103 8 << TCGOP_VECE(op));
2104 }
2105
2106 k = 0;
2107 for (i = 0; i < nb_oargs; i++) {
2108 if (k != 0) {
2109 col += qemu_log(",");
2110 }
2111 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2112 op->args[k++]));
2113 }
2114 for (i = 0; i < nb_iargs; i++) {
2115 if (k != 0) {
2116 col += qemu_log(",");
2117 }
2118 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2119 op->args[k++]));
2120 }
2121 switch (c) {
2122 case INDEX_op_brcond_i32:
2123 case INDEX_op_setcond_i32:
2124 case INDEX_op_movcond_i32:
2125 case INDEX_op_brcond2_i32:
2126 case INDEX_op_setcond2_i32:
2127 case INDEX_op_brcond_i64:
2128 case INDEX_op_setcond_i64:
2129 case INDEX_op_movcond_i64:
2130 case INDEX_op_cmp_vec:
2131 case INDEX_op_cmpsel_vec:
2132 if (op->args[k] < ARRAY_SIZE(cond_name)
2133 && cond_name[op->args[k]]) {
2134 col += qemu_log(",%s", cond_name[op->args[k++]]);
2135 } else {
2136 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2137 }
2138 i = 1;
2139 break;
2140 case INDEX_op_qemu_ld_i32:
2141 case INDEX_op_qemu_st_i32:
2142 case INDEX_op_qemu_st8_i32:
2143 case INDEX_op_qemu_ld_i64:
2144 case INDEX_op_qemu_st_i64:
2145 {
2146 TCGMemOpIdx oi = op->args[k++];
2147 MemOp op = get_memop(oi);
2148 unsigned ix = get_mmuidx(oi);
2149
2150 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2151 col += qemu_log(",$0x%x,%u", op, ix);
2152 } else {
2153 const char *s_al, *s_op;
2154 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2155 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2156 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2157 }
2158 i = 1;
2159 }
2160 break;
2161 default:
2162 i = 0;
2163 break;
2164 }
2165 switch (c) {
2166 case INDEX_op_set_label:
2167 case INDEX_op_br:
2168 case INDEX_op_brcond_i32:
2169 case INDEX_op_brcond_i64:
2170 case INDEX_op_brcond2_i32:
2171 col += qemu_log("%s$L%d", k ? "," : "",
2172 arg_label(op->args[k])->id);
2173 i++, k++;
2174 break;
2175 default:
2176 break;
2177 }
2178 for (; i < nb_cargs; i++, k++) {
2179 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2180 }
2181 }
2182
2183 if (have_prefs || op->life) {
2184
2185 QemuLogFile *logfile;
2186
2187 rcu_read_lock();
2188 logfile = qatomic_rcu_read(&qemu_logfile);
2189 if (logfile) {
2190 for (; col < 40; ++col) {
2191 putc(' ', logfile->fd);
2192 }
2193 }
2194 rcu_read_unlock();
2195 }
2196
2197 if (op->life) {
2198 unsigned life = op->life;
2199
2200 if (life & (SYNC_ARG * 3)) {
2201 qemu_log(" sync:");
2202 for (i = 0; i < 2; ++i) {
2203 if (life & (SYNC_ARG << i)) {
2204 qemu_log(" %d", i);
2205 }
2206 }
2207 }
2208 life /= DEAD_ARG;
2209 if (life) {
2210 qemu_log(" dead:");
2211 for (i = 0; life; ++i, life >>= 1) {
2212 if (life & 1) {
2213 qemu_log(" %d", i);
2214 }
2215 }
2216 }
2217 }
2218
2219 if (have_prefs) {
2220 for (i = 0; i < nb_oargs; ++i) {
2221 TCGRegSet set = op->output_pref[i];
2222
2223 if (i == 0) {
2224 qemu_log(" pref=");
2225 } else {
2226 qemu_log(",");
2227 }
2228 if (set == 0) {
2229 qemu_log("none");
2230 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2231 qemu_log("all");
2232 #ifdef CONFIG_DEBUG_TCG
2233 } else if (tcg_regset_single(set)) {
2234 TCGReg reg = tcg_regset_first(set);
2235 qemu_log("%s", tcg_target_reg_names[reg]);
2236 #endif
2237 } else if (TCG_TARGET_NB_REGS <= 32) {
2238 qemu_log("%#x", (uint32_t)set);
2239 } else {
2240 qemu_log("%#" PRIx64, (uint64_t)set);
2241 }
2242 }
2243 }
2244
2245 qemu_log("\n");
2246 }
2247 }
2248
2249 /* we give more priority to constraints with less registers */
2250 static int get_constraint_priority(const TCGOpDef *def, int k)
2251 {
2252 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2253 int n;
2254
2255 if (arg_ct->oalias) {
2256 /* an alias is equivalent to a single register */
2257 n = 1;
2258 } else {
2259 n = ctpop64(arg_ct->regs);
2260 }
2261 return TCG_TARGET_NB_REGS - n + 1;
2262 }
2263
2264 /* sort from highest priority to lowest */
2265 static void sort_constraints(TCGOpDef *def, int start, int n)
2266 {
2267 int i, j;
2268 TCGArgConstraint *a = def->args_ct;
2269
2270 for (i = 0; i < n; i++) {
2271 a[start + i].sort_index = start + i;
2272 }
2273 if (n <= 1) {
2274 return;
2275 }
2276 for (i = 0; i < n - 1; i++) {
2277 for (j = i + 1; j < n; j++) {
2278 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2279 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2280 if (p1 < p2) {
2281 int tmp = a[start + i].sort_index;
2282 a[start + i].sort_index = a[start + j].sort_index;
2283 a[start + j].sort_index = tmp;
2284 }
2285 }
2286 }
2287 }
2288
2289 static void process_op_defs(TCGContext *s)
2290 {
2291 TCGOpcode op;
2292
2293 for (op = 0; op < NB_OPS; op++) {
2294 TCGOpDef *def = &tcg_op_defs[op];
2295 const TCGTargetOpDef *tdefs;
2296 TCGType type;
2297 int i, nb_args;
2298
2299 if (def->flags & TCG_OPF_NOT_PRESENT) {
2300 continue;
2301 }
2302
2303 nb_args = def->nb_iargs + def->nb_oargs;
2304 if (nb_args == 0) {
2305 continue;
2306 }
2307
2308 tdefs = tcg_target_op_def(op);
2309 /* Missing TCGTargetOpDef entry. */
2310 tcg_debug_assert(tdefs != NULL);
2311
2312 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2313 for (i = 0; i < nb_args; i++) {
2314 const char *ct_str = tdefs->args_ct_str[i];
2315 /* Incomplete TCGTargetOpDef entry. */
2316 tcg_debug_assert(ct_str != NULL);
2317
2318 while (*ct_str != '\0') {
2319 switch(*ct_str) {
2320 case '0' ... '9':
2321 {
2322 int oarg = *ct_str - '0';
2323 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2324 tcg_debug_assert(oarg < def->nb_oargs);
2325 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2326 def->args_ct[i] = def->args_ct[oarg];
2327 /* The output sets oalias. */
2328 def->args_ct[oarg].oalias = true;
2329 def->args_ct[oarg].alias_index = i;
2330 /* The input sets ialias. */
2331 def->args_ct[i].ialias = true;
2332 def->args_ct[i].alias_index = oarg;
2333 }
2334 ct_str++;
2335 break;
2336 case '&':
2337 def->args_ct[i].newreg = true;
2338 ct_str++;
2339 break;
2340 case 'i':
2341 def->args_ct[i].ct |= TCG_CT_CONST;
2342 ct_str++;
2343 break;
2344 default:
2345 ct_str = target_parse_constraint(&def->args_ct[i],
2346 ct_str, type);
2347 /* Typo in TCGTargetOpDef constraint. */
2348 tcg_debug_assert(ct_str != NULL);
2349 }
2350 }
2351 }
2352
2353 /* TCGTargetOpDef entry with too much information? */
2354 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2355
2356 /* sort the constraints (XXX: this is just an heuristic) */
2357 sort_constraints(def, 0, def->nb_oargs);
2358 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2359 }
2360 }
2361
2362 void tcg_op_remove(TCGContext *s, TCGOp *op)
2363 {
2364 TCGLabel *label;
2365
2366 switch (op->opc) {
2367 case INDEX_op_br:
2368 label = arg_label(op->args[0]);
2369 label->refs--;
2370 break;
2371 case INDEX_op_brcond_i32:
2372 case INDEX_op_brcond_i64:
2373 label = arg_label(op->args[3]);
2374 label->refs--;
2375 break;
2376 case INDEX_op_brcond2_i32:
2377 label = arg_label(op->args[5]);
2378 label->refs--;
2379 break;
2380 default:
2381 break;
2382 }
2383
2384 QTAILQ_REMOVE(&s->ops, op, link);
2385 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2386 s->nb_ops--;
2387
2388 #ifdef CONFIG_PROFILER
2389 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2390 #endif
2391 }
2392
2393 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2394 {
2395 TCGContext *s = tcg_ctx;
2396 TCGOp *op;
2397
2398 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2399 op = tcg_malloc(sizeof(TCGOp));
2400 } else {
2401 op = QTAILQ_FIRST(&s->free_ops);
2402 QTAILQ_REMOVE(&s->free_ops, op, link);
2403 }
2404 memset(op, 0, offsetof(TCGOp, link));
2405 op->opc = opc;
2406 s->nb_ops++;
2407
2408 return op;
2409 }
2410
2411 TCGOp *tcg_emit_op(TCGOpcode opc)
2412 {
2413 TCGOp *op = tcg_op_alloc(opc);
2414 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2415 return op;
2416 }
2417
2418 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2419 {
2420 TCGOp *new_op = tcg_op_alloc(opc);
2421 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2422 return new_op;
2423 }
2424
2425 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2426 {
2427 TCGOp *new_op = tcg_op_alloc(opc);
2428 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2429 return new_op;
2430 }
2431
2432 /* Reachable analysis : remove unreachable code. */
2433 static void reachable_code_pass(TCGContext *s)
2434 {
2435 TCGOp *op, *op_next;
2436 bool dead = false;
2437
2438 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2439 bool remove = dead;
2440 TCGLabel *label;
2441 int call_flags;
2442
2443 switch (op->opc) {
2444 case INDEX_op_set_label:
2445 label = arg_label(op->args[0]);
2446 if (label->refs == 0) {
2447 /*
2448 * While there is an occasional backward branch, virtually
2449 * all branches generated by the translators are forward.
2450 * Which means that generally we will have already removed
2451 * all references to the label that will be, and there is
2452 * little to be gained by iterating.
2453 */
2454 remove = true;
2455 } else {
2456 /* Once we see a label, insns become live again. */
2457 dead = false;
2458 remove = false;
2459
2460 /*
2461 * Optimization can fold conditional branches to unconditional.
2462 * If we find a label with one reference which is preceded by
2463 * an unconditional branch to it, remove both. This needed to
2464 * wait until the dead code in between them was removed.
2465 */
2466 if (label->refs == 1) {
2467 TCGOp *op_prev = QTAILQ_PREV(op, link);
2468 if (op_prev->opc == INDEX_op_br &&
2469 label == arg_label(op_prev->args[0])) {
2470 tcg_op_remove(s, op_prev);
2471 remove = true;
2472 }
2473 }
2474 }
2475 break;
2476
2477 case INDEX_op_br:
2478 case INDEX_op_exit_tb:
2479 case INDEX_op_goto_ptr:
2480 /* Unconditional branches; everything following is dead. */
2481 dead = true;
2482 break;
2483
2484 case INDEX_op_call:
2485 /* Notice noreturn helper calls, raising exceptions. */
2486 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2487 if (call_flags & TCG_CALL_NO_RETURN) {
2488 dead = true;
2489 }
2490 break;
2491
2492 case INDEX_op_insn_start:
2493 /* Never remove -- we need to keep these for unwind. */
2494 remove = false;
2495 break;
2496
2497 default:
2498 break;
2499 }
2500
2501 if (remove) {
2502 tcg_op_remove(s, op);
2503 }
2504 }
2505 }
2506
2507 #define TS_DEAD 1
2508 #define TS_MEM 2
2509
2510 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2511 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2512
2513 /* For liveness_pass_1, the register preferences for a given temp. */
2514 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2515 {
2516 return ts->state_ptr;
2517 }
2518
2519 /* For liveness_pass_1, reset the preferences for a given temp to the
2520 * maximal regset for its type.
2521 */
2522 static inline void la_reset_pref(TCGTemp *ts)
2523 {
2524 *la_temp_pref(ts)
2525 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2526 }
2527
2528 /* liveness analysis: end of function: all temps are dead, and globals
2529 should be in memory. */
2530 static void la_func_end(TCGContext *s, int ng, int nt)
2531 {
2532 int i;
2533
2534 for (i = 0; i < ng; ++i) {
2535 s->temps[i].state = TS_DEAD | TS_MEM;
2536 la_reset_pref(&s->temps[i]);
2537 }
2538 for (i = ng; i < nt; ++i) {
2539 s->temps[i].state = TS_DEAD;
2540 la_reset_pref(&s->temps[i]);
2541 }
2542 }
2543
2544 /* liveness analysis: end of basic block: all temps are dead, globals
2545 and local temps should be in memory. */
2546 static void la_bb_end(TCGContext *s, int ng, int nt)
2547 {
2548 int i;
2549
2550 for (i = 0; i < ng; ++i) {
2551 s->temps[i].state = TS_DEAD | TS_MEM;
2552 la_reset_pref(&s->temps[i]);
2553 }
2554 for (i = ng; i < nt; ++i) {
2555 s->temps[i].state = (s->temps[i].temp_local
2556 ? TS_DEAD | TS_MEM
2557 : TS_DEAD);
2558 la_reset_pref(&s->temps[i]);
2559 }
2560 }
2561
2562 /* liveness analysis: sync globals back to memory. */
2563 static void la_global_sync(TCGContext *s, int ng)
2564 {
2565 int i;
2566
2567 for (i = 0; i < ng; ++i) {
2568 int state = s->temps[i].state;
2569 s->temps[i].state = state | TS_MEM;
2570 if (state == TS_DEAD) {
2571 /* If the global was previously dead, reset prefs. */
2572 la_reset_pref(&s->temps[i]);
2573 }
2574 }
2575 }
2576
2577 /*
2578 * liveness analysis: conditional branch: all temps are dead,
2579 * globals and local temps should be synced.
2580 */
2581 static void la_bb_sync(TCGContext *s, int ng, int nt)
2582 {
2583 la_global_sync(s, ng);
2584
2585 for (int i = ng; i < nt; ++i) {
2586 if (s->temps[i].temp_local) {
2587 int state = s->temps[i].state;
2588 s->temps[i].state = state | TS_MEM;
2589 if (state != TS_DEAD) {
2590 continue;
2591 }
2592 } else {
2593 s->temps[i].state = TS_DEAD;
2594 }
2595 la_reset_pref(&s->temps[i]);
2596 }
2597 }
2598
2599 /* liveness analysis: sync globals back to memory and kill. */
2600 static void la_global_kill(TCGContext *s, int ng)
2601 {
2602 int i;
2603
2604 for (i = 0; i < ng; i++) {
2605 s->temps[i].state = TS_DEAD | TS_MEM;
2606 la_reset_pref(&s->temps[i]);
2607 }
2608 }
2609
2610 /* liveness analysis: note live globals crossing calls. */
2611 static void la_cross_call(TCGContext *s, int nt)
2612 {
2613 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2614 int i;
2615
2616 for (i = 0; i < nt; i++) {
2617 TCGTemp *ts = &s->temps[i];
2618 if (!(ts->state & TS_DEAD)) {
2619 TCGRegSet *pset = la_temp_pref(ts);
2620 TCGRegSet set = *pset;
2621
2622 set &= mask;
2623 /* If the combination is not possible, restart. */
2624 if (set == 0) {
2625 set = tcg_target_available_regs[ts->type] & mask;
2626 }
2627 *pset = set;
2628 }
2629 }
2630 }
2631
2632 /* Liveness analysis : update the opc_arg_life array to tell if a
2633 given input arguments is dead. Instructions updating dead
2634 temporaries are removed. */
2635 static void liveness_pass_1(TCGContext *s)
2636 {
2637 int nb_globals = s->nb_globals;
2638 int nb_temps = s->nb_temps;
2639 TCGOp *op, *op_prev;
2640 TCGRegSet *prefs;
2641 int i;
2642
2643 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2644 for (i = 0; i < nb_temps; ++i) {
2645 s->temps[i].state_ptr = prefs + i;
2646 }
2647
2648 /* ??? Should be redundant with the exit_tb that ends the TB. */
2649 la_func_end(s, nb_globals, nb_temps);
2650
2651 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2652 int nb_iargs, nb_oargs;
2653 TCGOpcode opc_new, opc_new2;
2654 bool have_opc_new2;
2655 TCGLifeData arg_life = 0;
2656 TCGTemp *ts;
2657 TCGOpcode opc = op->opc;
2658 const TCGOpDef *def = &tcg_op_defs[opc];
2659
2660 switch (opc) {
2661 case INDEX_op_call:
2662 {
2663 int call_flags;
2664 int nb_call_regs;
2665
2666 nb_oargs = TCGOP_CALLO(op);
2667 nb_iargs = TCGOP_CALLI(op);
2668 call_flags = op->args[nb_oargs + nb_iargs + 1];
2669
2670 /* pure functions can be removed if their result is unused */
2671 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2672 for (i = 0; i < nb_oargs; i++) {
2673 ts = arg_temp(op->args[i]);
2674 if (ts->state != TS_DEAD) {
2675 goto do_not_remove_call;
2676 }
2677 }
2678 goto do_remove;
2679 }
2680 do_not_remove_call:
2681
2682 /* Output args are dead. */
2683 for (i = 0; i < nb_oargs; i++) {
2684 ts = arg_temp(op->args[i]);
2685 if (ts->state & TS_DEAD) {
2686 arg_life |= DEAD_ARG << i;
2687 }
2688 if (ts->state & TS_MEM) {
2689 arg_life |= SYNC_ARG << i;
2690 }
2691 ts->state = TS_DEAD;
2692 la_reset_pref(ts);
2693
2694 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2695 op->output_pref[i] = 0;
2696 }
2697
2698 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2699 TCG_CALL_NO_READ_GLOBALS))) {
2700 la_global_kill(s, nb_globals);
2701 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2702 la_global_sync(s, nb_globals);
2703 }
2704
2705 /* Record arguments that die in this helper. */
2706 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2707 ts = arg_temp(op->args[i]);
2708 if (ts && ts->state & TS_DEAD) {
2709 arg_life |= DEAD_ARG << i;
2710 }
2711 }
2712
2713 /* For all live registers, remove call-clobbered prefs. */
2714 la_cross_call(s, nb_temps);
2715
2716 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2717
2718 /* Input arguments are live for preceding opcodes. */
2719 for (i = 0; i < nb_iargs; i++) {
2720 ts = arg_temp(op->args[i + nb_oargs]);
2721 if (ts && ts->state & TS_DEAD) {
2722 /* For those arguments that die, and will be allocated
2723 * in registers, clear the register set for that arg,
2724 * to be filled in below. For args that will be on
2725 * the stack, reset to any available reg.
2726 */
2727 *la_temp_pref(ts)
2728 = (i < nb_call_regs ? 0 :
2729 tcg_target_available_regs[ts->type]);
2730 ts->state &= ~TS_DEAD;
2731 }
2732 }
2733
2734 /* For each input argument, add its input register to prefs.
2735 If a temp is used once, this produces a single set bit. */
2736 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2737 ts = arg_temp(op->args[i + nb_oargs]);
2738 if (ts) {
2739 tcg_regset_set_reg(*la_temp_pref(ts),
2740 tcg_target_call_iarg_regs[i]);
2741 }
2742 }
2743 }
2744 break;
2745 case INDEX_op_insn_start:
2746 break;
2747 case INDEX_op_discard:
2748 /* mark the temporary as dead */
2749 ts = arg_temp(op->args[0]);
2750 ts->state = TS_DEAD;
2751 la_reset_pref(ts);
2752 break;
2753
2754 case INDEX_op_add2_i32:
2755 opc_new = INDEX_op_add_i32;
2756 goto do_addsub2;
2757 case INDEX_op_sub2_i32:
2758 opc_new = INDEX_op_sub_i32;
2759 goto do_addsub2;
2760 case INDEX_op_add2_i64:
2761 opc_new = INDEX_op_add_i64;
2762 goto do_addsub2;
2763 case INDEX_op_sub2_i64:
2764 opc_new = INDEX_op_sub_i64;
2765 do_addsub2:
2766 nb_iargs = 4;
2767 nb_oargs = 2;
2768 /* Test if the high part of the operation is dead, but not
2769 the low part. The result can be optimized to a simple
2770 add or sub. This happens often for x86_64 guest when the
2771 cpu mode is set to 32 bit. */
2772 if (arg_temp(op->args[1])->state == TS_DEAD) {
2773 if (arg_temp(op->args[0])->state == TS_DEAD) {
2774 goto do_remove;
2775 }
2776 /* Replace the opcode and adjust the args in place,
2777 leaving 3 unused args at the end. */
2778 op->opc = opc = opc_new;
2779 op->args[1] = op->args[2];
2780 op->args[2] = op->args[4];
2781 /* Fall through and mark the single-word operation live. */
2782 nb_iargs = 2;
2783 nb_oargs = 1;
2784 }
2785 goto do_not_remove;
2786
2787 case INDEX_op_mulu2_i32:
2788 opc_new = INDEX_op_mul_i32;
2789 opc_new2 = INDEX_op_muluh_i32;
2790 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2791 goto do_mul2;
2792 case INDEX_op_muls2_i32:
2793 opc_new = INDEX_op_mul_i32;
2794 opc_new2 = INDEX_op_mulsh_i32;
2795 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2796 goto do_mul2;
2797 case INDEX_op_mulu2_i64:
2798 opc_new = INDEX_op_mul_i64;
2799 opc_new2 = INDEX_op_muluh_i64;
2800 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2801 goto do_mul2;
2802 case INDEX_op_muls2_i64:
2803 opc_new = INDEX_op_mul_i64;
2804 opc_new2 = INDEX_op_mulsh_i64;
2805 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2806 goto do_mul2;
2807 do_mul2:
2808 nb_iargs = 2;
2809 nb_oargs = 2;
2810 if (arg_temp(op->args[1])->state == TS_DEAD) {
2811 if (arg_temp(op->args[0])->state == TS_DEAD) {
2812 /* Both parts of the operation are dead. */
2813 goto do_remove;
2814 }
2815 /* The high part of the operation is dead; generate the low. */
2816 op->opc = opc = opc_new;
2817 op->args[1] = op->args[2];
2818 op->args[2] = op->args[3];
2819 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2820 /* The low part of the operation is dead; generate the high. */
2821 op->opc = opc = opc_new2;
2822 op->args[0] = op->args[1];
2823 op->args[1] = op->args[2];
2824 op->args[2] = op->args[3];
2825 } else {
2826 goto do_not_remove;
2827 }
2828 /* Mark the single-word operation live. */
2829 nb_oargs = 1;
2830 goto do_not_remove;
2831
2832 default:
2833 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2834 nb_iargs = def->nb_iargs;
2835 nb_oargs = def->nb_oargs;
2836
2837 /* Test if the operation can be removed because all
2838 its outputs are dead. We assume that nb_oargs == 0
2839 implies side effects */
2840 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2841 for (i = 0; i < nb_oargs; i++) {
2842 if (arg_temp(op->args[i])->state != TS_DEAD) {
2843 goto do_not_remove;
2844 }
2845 }
2846 goto do_remove;
2847 }
2848 goto do_not_remove;
2849
2850 do_remove:
2851 tcg_op_remove(s, op);
2852 break;
2853
2854 do_not_remove:
2855 for (i = 0; i < nb_oargs; i++) {
2856 ts = arg_temp(op->args[i]);
2857
2858 /* Remember the preference of the uses that followed. */
2859 op->output_pref[i] = *la_temp_pref(ts);
2860
2861 /* Output args are dead. */
2862 if (ts->state & TS_DEAD) {
2863 arg_life |= DEAD_ARG << i;
2864 }
2865 if (ts->state & TS_MEM) {
2866 arg_life |= SYNC_ARG << i;
2867 }
2868 ts->state = TS_DEAD;
2869 la_reset_pref(ts);
2870 }
2871
2872 /* If end of basic block, update. */
2873 if (def->flags & TCG_OPF_BB_EXIT) {
2874 la_func_end(s, nb_globals, nb_temps);
2875 } else if (def->flags & TCG_OPF_COND_BRANCH) {
2876 la_bb_sync(s, nb_globals, nb_temps);
2877 } else if (def->flags & TCG_OPF_BB_END) {
2878 la_bb_end(s, nb_globals, nb_temps);
2879 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2880 la_global_sync(s, nb_globals);
2881 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2882 la_cross_call(s, nb_temps);
2883 }
2884 }
2885
2886 /* Record arguments that die in this opcode. */
2887 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2888 ts = arg_temp(op->args[i]);
2889 if (ts->state & TS_DEAD) {
2890 arg_life |= DEAD_ARG << i;
2891 }
2892 }
2893
2894 /* Input arguments are live for preceding opcodes. */
2895 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2896 ts = arg_temp(op->args[i]);
2897 if (ts->state & TS_DEAD) {
2898 /* For operands that were dead, initially allow
2899 all regs for the type. */
2900 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2901 ts->state &= ~TS_DEAD;
2902 }
2903 }
2904
2905 /* Incorporate constraints for this operand. */
2906 switch (opc) {
2907 case INDEX_op_mov_i32:
2908 case INDEX_op_mov_i64:
2909 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2910 have proper constraints. That said, special case
2911 moves to propagate preferences backward. */
2912 if (IS_DEAD_ARG(1)) {
2913 *la_temp_pref(arg_temp(op->args[0]))
2914 = *la_temp_pref(arg_temp(op->args[1]));
2915 }
2916 break;
2917
2918 default:
2919 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2920 const TCGArgConstraint *ct = &def->args_ct[i];
2921 TCGRegSet set, *pset;
2922
2923 ts = arg_temp(op->args[i]);
2924 pset = la_temp_pref(ts);
2925 set = *pset;
2926
2927 set &= ct->regs;
2928 if (ct->ialias) {
2929 set &= op->output_pref[ct->alias_index];
2930 }
2931 /* If the combination is not possible, restart. */
2932 if (set == 0) {
2933 set = ct->regs;
2934 }
2935 *pset = set;
2936 }
2937 break;
2938 }
2939 break;
2940 }
2941 op->life = arg_life;
2942 }
2943 }
2944
2945 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2946 static bool liveness_pass_2(TCGContext *s)
2947 {
2948 int nb_globals = s->nb_globals;
2949 int nb_temps, i;
2950 bool changes = false;
2951 TCGOp *op, *op_next;
2952
2953 /* Create a temporary for each indirect global. */
2954 for (i = 0; i < nb_globals; ++i) {
2955 TCGTemp *its = &s->temps[i];
2956 if (its->indirect_reg) {
2957 TCGTemp *dts = tcg_temp_alloc(s);
2958 dts->type = its->type;
2959 dts->base_type = its->base_type;
2960 its->state_ptr = dts;
2961 } else {
2962 its->state_ptr = NULL;
2963 }
2964 /* All globals begin dead. */
2965 its->state = TS_DEAD;
2966 }
2967 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2968 TCGTemp *its = &s->temps[i];
2969 its->state_ptr = NULL;
2970 its->state = TS_DEAD;
2971 }
2972
2973 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2974 TCGOpcode opc = op->opc;
2975 const TCGOpDef *def = &tcg_op_defs[opc];
2976 TCGLifeData arg_life = op->life;
2977 int nb_iargs, nb_oargs, call_flags;
2978 TCGTemp *arg_ts, *dir_ts;
2979
2980 if (opc == INDEX_op_call) {
2981 nb_oargs = TCGOP_CALLO(op);
2982 nb_iargs = TCGOP_CALLI(op);
2983 call_flags = op->args[nb_oargs + nb_iargs + 1];
2984 } else {
2985 nb_iargs = def->nb_iargs;
2986 nb_oargs = def->nb_oargs;
2987
2988 /* Set flags similar to how calls require. */
2989 if (def->flags & TCG_OPF_COND_BRANCH) {
2990 /* Like reading globals: sync_globals */
2991 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2992 } else if (def->flags & TCG_OPF_BB_END) {
2993 /* Like writing globals: save_globals */
2994 call_flags = 0;
2995 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2996 /* Like reading globals: sync_globals */
2997 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2998 } else {
2999 /* No effect on globals. */
3000 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3001 TCG_CALL_NO_WRITE_GLOBALS);
3002 }
3003 }
3004
3005 /* Make sure that input arguments are available. */
3006 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3007 arg_ts = arg_temp(op->args[i]);
3008 if (arg_ts) {
3009 dir_ts = arg_ts->state_ptr;
3010 if (dir_ts && arg_ts->state == TS_DEAD) {
3011 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3012 ? INDEX_op_ld_i32
3013 : INDEX_op_ld_i64);
3014 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3015
3016 lop->args[0] = temp_arg(dir_ts);
3017 lop->args[1] = temp_arg(arg_ts->mem_base);
3018 lop->args[2] = arg_ts->mem_offset;
3019
3020 /* Loaded, but synced with memory. */
3021 arg_ts->state = TS_MEM;
3022 }
3023 }
3024 }
3025
3026 /* Perform input replacement, and mark inputs that became dead.
3027 No action is required except keeping temp_state up to date
3028 so that we reload when needed. */
3029 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3030 arg_ts = arg_temp(op->args[i]);
3031 if (arg_ts) {
3032 dir_ts = arg_ts->state_ptr;
3033 if (dir_ts) {
3034 op->args[i] = temp_arg(dir_ts);
3035 changes = true;
3036 if (IS_DEAD_ARG(i)) {
3037 arg_ts->state = TS_DEAD;
3038 }
3039 }
3040 }
3041 }
3042
3043 /* Liveness analysis should ensure that the following are
3044 all correct, for call sites and basic block end points. */
3045 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3046 /* Nothing to do */
3047 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3048 for (i = 0; i < nb_globals; ++i) {
3049 /* Liveness should see that globals are synced back,
3050 that is, either TS_DEAD or TS_MEM. */
3051 arg_ts = &s->temps[i];
3052 tcg_debug_assert(arg_ts->state_ptr == 0
3053 || arg_ts->state != 0);
3054 }
3055 } else {
3056 for (i = 0; i < nb_globals; ++i) {
3057 /* Liveness should see that globals are saved back,
3058 that is, TS_DEAD, waiting to be reloaded. */
3059 arg_ts = &s->temps[i];
3060 tcg_debug_assert(arg_ts->state_ptr == 0
3061 || arg_ts->state == TS_DEAD);
3062 }
3063 }
3064
3065 /* Outputs become available. */
3066 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3067 arg_ts = arg_temp(op->args[0]);
3068 dir_ts = arg_ts->state_ptr;
3069 if (dir_ts) {
3070 op->args[0] = temp_arg(dir_ts);
3071 changes = true;
3072
3073 /* The output is now live and modified. */
3074 arg_ts->state = 0;
3075
3076 if (NEED_SYNC_ARG(0)) {
3077 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3078 ? INDEX_op_st_i32
3079 : INDEX_op_st_i64);
3080 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3081 TCGTemp *out_ts = dir_ts;
3082
3083 if (IS_DEAD_ARG(0)) {
3084 out_ts = arg_temp(op->args[1]);
3085 arg_ts->state = TS_DEAD;
3086 tcg_op_remove(s, op);
3087 } else {
3088 arg_ts->state = TS_MEM;
3089 }
3090
3091 sop->args[0] = temp_arg(out_ts);
3092 sop->args[1] = temp_arg(arg_ts->mem_base);
3093 sop->args[2] = arg_ts->mem_offset;
3094 } else {
3095 tcg_debug_assert(!IS_DEAD_ARG(0));
3096 }
3097 }
3098 } else {
3099 for (i = 0; i < nb_oargs; i++) {
3100 arg_ts = arg_temp(op->args[i]);
3101 dir_ts = arg_ts->state_ptr;
3102 if (!dir_ts) {
3103 continue;
3104 }
3105 op->args[i] = temp_arg(dir_ts);
3106 changes = true;
3107
3108 /* The output is now live and modified. */
3109 arg_ts->state = 0;
3110
3111 /* Sync outputs upon their last write. */
3112 if (NEED_SYNC_ARG(i)) {
3113 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3114 ? INDEX_op_st_i32
3115 : INDEX_op_st_i64);
3116 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3117
3118 sop->args[0] = temp_arg(dir_ts);
3119 sop->args[1] = temp_arg(arg_ts->mem_base);
3120 sop->args[2] = arg_ts->mem_offset;
3121
3122 arg_ts->state = TS_MEM;
3123 }
3124 /* Drop outputs that are dead. */
3125 if (IS_DEAD_ARG(i)) {
3126 arg_ts->state = TS_DEAD;
3127 }
3128 }
3129 }
3130 }
3131
3132 return changes;
3133 }
3134
3135 #ifdef CONFIG_DEBUG_TCG
3136 static void dump_regs(TCGContext *s)
3137 {
3138 TCGTemp *ts;
3139 int i;
3140 char buf[64];
3141
3142 for(i = 0; i < s->nb_temps; i++) {
3143 ts = &s->temps[i];
3144 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3145 switch(ts->val_type) {
3146 case TEMP_VAL_REG:
3147 printf("%s", tcg_target_reg_names[ts->reg]);
3148 break;
3149 case TEMP_VAL_MEM:
3150 printf("%d(%s)", (int)ts->mem_offset,
3151 tcg_target_reg_names[ts->mem_base->reg]);
3152 break;
3153 case TEMP_VAL_CONST:
3154 printf("$0x%" TCG_PRIlx, ts->val);
3155 break;
3156 case TEMP_VAL_DEAD:
3157 printf("D");
3158 break;
3159 default:
3160 printf("???");
3161 break;
3162 }
3163 printf("\n");
3164 }
3165
3166 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3167 if (s->reg_to_temp[i] != NULL) {
3168 printf("%s: %s\n",
3169 tcg_target_reg_names[i],
3170 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3171 }
3172 }
3173 }
3174
3175 static void check_regs(TCGContext *s)
3176 {
3177 int reg;
3178 int k;
3179 TCGTemp *ts;
3180 char buf[64];
3181
3182 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3183 ts = s->reg_to_temp[reg];
3184 if (ts != NULL) {
3185 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3186 printf("Inconsistency for register %s:\n",
3187 tcg_target_reg_names[reg]);
3188 goto fail;
3189 }
3190 }
3191 }
3192 for (k = 0; k < s->nb_temps; k++) {
3193 ts = &s->temps[k];
3194 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3195 && s->reg_to_temp[ts->reg] != ts) {
3196 printf("Inconsistency for temp %s:\n",
3197 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3198 fail:
3199 printf("reg state:\n");
3200 dump_regs(s);
3201 tcg_abort();
3202 }
3203 }
3204 }
3205 #endif
3206
3207 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3208 {
3209 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3210 /* Sparc64 stack is accessed with offset of 2047 */
3211 s->current_frame_offset = (s->current_frame_offset +
3212 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3213 ~(sizeof(tcg_target_long) - 1);
3214 #endif
3215 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3216 s->frame_end) {
3217 tcg_abort();
3218 }
3219 ts->mem_offset = s->current_frame_offset;
3220 ts->mem_base = s->frame_temp;
3221 ts->mem_allocated = 1;
3222 s->current_frame_offset += sizeof(tcg_target_long);
3223 }
3224
3225 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3226
3227 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3228 mark it free; otherwise mark it dead. */
3229 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3230 {
3231 if (ts->fixed_reg) {
3232 return;
3233 }
3234 if (ts->val_type == TEMP_VAL_REG) {
3235 s->reg_to_temp[ts->reg] = NULL;
3236 }
3237 ts->val_type = (free_or_dead < 0
3238 || ts->temp_local
3239 || ts->temp_global
3240 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3241 }
3242
3243 /* Mark a temporary as dead. */
3244 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3245 {
3246 temp_free_or_dead(s, ts, 1);
3247 }
3248
3249 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3250 registers needs to be allocated to store a constant. If 'free_or_dead'
3251 is non-zero, subsequently release the temporary; if it is positive, the
3252 temp is dead; if it is negative, the temp is free. */
3253 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3254 TCGRegSet preferred_regs, int free_or_dead)
3255 {
3256 if (ts->fixed_reg) {
3257 return;
3258 }
3259 if (!ts->mem_coherent) {
3260 if (!ts->mem_allocated) {
3261 temp_allocate_frame(s, ts);
3262 }
3263 switch (ts->val_type) {
3264 case TEMP_VAL_CONST:
3265 /* If we're going to free the temp immediately, then we won't
3266 require it later in a register, so attempt to store the
3267 constant to memory directly. */
3268 if (free_or_dead
3269 && tcg_out_sti(s, ts->type, ts->val,
3270 ts->mem_base->reg, ts->mem_offset)) {
3271 break;
3272 }
3273 temp_load(s, ts, tcg_target_available_regs[ts->type],
3274 allocated_regs, preferred_regs);
3275 /* fallthrough */
3276
3277 case TEMP_VAL_REG:
3278 tcg_out_st(s, ts->type, ts->reg,
3279 ts->mem_base->reg, ts->mem_offset);
3280 break;
3281
3282 case TEMP_VAL_MEM:
3283 break;
3284
3285 case TEMP_VAL_DEAD:
3286 default:
3287 tcg_abort();
3288 }
3289 ts->mem_coherent = 1;
3290 }
3291 if (free_or_dead) {
3292 temp_free_or_dead(s, ts, free_or_dead);
3293 }
3294 }
3295
3296 /* free register 'reg' by spilling the corresponding temporary if necessary */
3297 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3298 {
3299 TCGTemp *ts = s->reg_to_temp[reg];
3300 if (ts != NULL) {
3301 temp_sync(s, ts, allocated_regs, 0, -1);
3302 }
3303 }
3304
3305 /**
3306 * tcg_reg_alloc:
3307 * @required_regs: Set of registers in which we must allocate.
3308 * @allocated_regs: Set of registers which must be avoided.
3309 * @preferred_regs: Set of registers we should prefer.
3310 * @rev: True if we search the registers in "indirect" order.
3311 *
3312 * The allocated register must be in @required_regs & ~@allocated_regs,
3313 * but if we can put it in @preferred_regs we may save a move later.
3314 */
3315 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3316 TCGRegSet allocated_regs,
3317 TCGRegSet preferred_regs, bool rev)
3318 {
3319 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3320 TCGRegSet reg_ct[2];
3321 const int *order;
3322
3323 reg_ct[1] = required_regs & ~allocated_regs;
3324 tcg_debug_assert(reg_ct[1] != 0);
3325 reg_ct[0] = reg_ct[1] & preferred_regs;
3326
3327 /* Skip the preferred_regs option if it cannot be satisfied,
3328 or if the preference made no difference. */
3329 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3330
3331 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3332
3333 /* Try free registers, preferences first. */
3334 for (j = f; j < 2; j++) {
3335 TCGRegSet set = reg_ct[j];
3336
3337 if (tcg_regset_single(set)) {
3338 /* One register in the set. */
3339 TCGReg reg = tcg_regset_first(set);
3340 if (s->reg_to_temp[reg] == NULL) {
3341 return reg;
3342 }
3343 } else {
3344 for (i = 0; i < n; i++) {
3345 TCGReg reg = order[i];
3346 if (s->reg_to_temp[reg] == NULL &&
3347 tcg_regset_test_reg(set, reg)) {
3348 return reg;
3349 }
3350 }
3351 }
3352 }
3353
3354 /* We must spill something. */
3355 for (j = f; j < 2; j++) {
3356 TCGRegSet set = reg_ct[j];
3357
3358 if (tcg_regset_single(set)) {
3359 /* One register in the set. */
3360 TCGReg reg = tcg_regset_first(set);
3361 tcg_reg_free(s, reg, allocated_regs);
3362 return reg;
3363 } else {
3364 for (i = 0; i < n; i++) {
3365 TCGReg reg = order[i];
3366 if (tcg_regset_test_reg(set, reg)) {
3367 tcg_reg_free(s, reg, allocated_regs);
3368 return reg;
3369 }
3370 }
3371 }
3372 }
3373
3374 tcg_abort();
3375 }
3376
3377 /* Make sure the temporary is in a register. If needed, allocate the register
3378 from DESIRED while avoiding ALLOCATED. */
3379 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3380 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3381 {
3382 TCGReg reg;
3383
3384 switch (ts->val_type) {
3385 case TEMP_VAL_REG:
3386 return;
3387 case TEMP_VAL_CONST:
3388 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3389 preferred_regs, ts->indirect_base);
3390 if (ts->type <= TCG_TYPE_I64) {
3391 tcg_out_movi(s, ts->type, reg, ts->val);
3392 } else {
3393 uint64_t val = ts->val;
3394 MemOp vece = MO_64;
3395
3396 /*
3397 * Find the minimal vector element that matches the constant.
3398 * The targets will, in general, have to do this search anyway,
3399 * do this generically.
3400 */
3401 if (TCG_TARGET_REG_BITS == 32) {
3402 val = dup_const(MO_32, val);
3403 vece = MO_32;
3404 }
3405 if (val == dup_const(MO_8, val)) {
3406 vece = MO_8;
3407 } else if (val == dup_const(MO_16, val)) {
3408 vece = MO_16;
3409 } else if (TCG_TARGET_REG_BITS == 64 &&
3410 val == dup_const(MO_32, val)) {
3411 vece = MO_32;
3412 }
3413
3414 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3415 }
3416 ts->mem_coherent = 0;
3417 break;
3418 case TEMP_VAL_MEM:
3419 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3420 preferred_regs, ts->indirect_base);
3421 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3422 ts->mem_coherent = 1;
3423 break;
3424 case TEMP_VAL_DEAD:
3425 default:
3426 tcg_abort();
3427 }
3428 ts->reg = reg;
3429 ts->val_type = TEMP_VAL_REG;
3430 s->reg_to_temp[reg] = ts;
3431 }
3432
3433 /* Save a temporary to memory. 'allocated_regs' is used in case a
3434 temporary registers needs to be allocated to store a constant. */
3435 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3436 {
3437 /* The liveness analysis already ensures that globals are back
3438 in memory. Keep an tcg_debug_assert for safety. */
3439 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3440 }
3441
3442 /* save globals to their canonical location and assume they can be
3443 modified be the following code. 'allocated_regs' is used in case a
3444 temporary registers needs to be allocated to store a constant. */
3445 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3446 {
3447 int i, n;
3448
3449 for (i = 0, n = s->nb_globals; i < n; i++) {
3450 temp_save(s, &s->temps[i], allocated_regs);
3451 }
3452 }
3453
3454 /* sync globals to their canonical location and assume they can be
3455 read by the following code. 'allocated_regs' is used in case a
3456 temporary registers needs to be allocated to store a constant. */
3457 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3458 {
3459 int i, n;
3460
3461 for (i = 0, n = s->nb_globals; i < n; i++) {
3462 TCGTemp *ts = &s->temps[i];
3463 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3464 || ts->fixed_reg
3465 || ts->mem_coherent);
3466 }
3467 }
3468
3469 /* at the end of a basic block, we assume all temporaries are dead and
3470 all globals are stored at their canonical location. */
3471 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3472 {
3473 int i;
3474
3475 for (i = s->nb_globals; i < s->nb_temps; i++) {
3476 TCGTemp *ts = &s->temps[i];
3477 if (ts->temp_local) {
3478 temp_save(s, ts, allocated_regs);
3479 } else {
3480 /* The liveness analysis already ensures that temps are dead.
3481 Keep an tcg_debug_assert for safety. */
3482 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3483 }
3484 }
3485
3486 save_globals(s, allocated_regs);
3487 }
3488
3489 /*
3490 * At a conditional branch, we assume all temporaries are dead and
3491 * all globals and local temps are synced to their location.
3492 */
3493 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3494 {
3495 sync_globals(s, allocated_regs);
3496
3497 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3498 TCGTemp *ts = &s->temps[i];
3499 /*
3500 * The liveness analysis already ensures that temps are dead.
3501 * Keep tcg_debug_asserts for safety.
3502 */
3503 if (ts->temp_local) {
3504 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3505 } else {
3506 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3507 }
3508 }
3509 }
3510
3511 /*
3512 * Specialized code generation for INDEX_op_movi_*.
3513 */
3514 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3515 tcg_target_ulong val, TCGLifeData arg_life,
3516 TCGRegSet preferred_regs)
3517 {
3518 /* ENV should not be modified. */
3519 tcg_debug_assert(!ots->fixed_reg);
3520
3521 /* The movi is not explicitly generated here. */
3522 if (ots->val_type == TEMP_VAL_REG) {
3523 s->reg_to_temp[ots->reg] = NULL;
3524 }
3525 ots->val_type = TEMP_VAL_CONST;
3526 ots->val = val;
3527 ots->mem_coherent = 0;
3528 if (NEED_SYNC_ARG(0)) {
3529 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3530 } else if (IS_DEAD_ARG(0)) {
3531 temp_dead(s, ots);
3532 }
3533 }
3534
3535 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3536 {
3537 TCGTemp *ots = arg_temp(op->args[0]);
3538 tcg_target_ulong val = op->args[1];
3539
3540 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3541 }
3542
3543 /*
3544 * Specialized code generation for INDEX_op_mov_*.
3545 */
3546 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3547 {
3548 const TCGLifeData arg_life = op->life;
3549 TCGRegSet allocated_regs, preferred_regs;
3550 TCGTemp *ts, *ots;
3551 TCGType otype, itype;
3552
3553 allocated_regs = s->reserved_regs;
3554 preferred_regs = op->output_pref[0];
3555 ots = arg_temp(op->args[0]);
3556 ts = arg_temp(op->args[1]);
3557
3558 /* ENV should not be modified. */
3559 tcg_debug_assert(!ots->fixed_reg);
3560
3561 /* Note that otype != itype for no-op truncation. */
3562 otype = ots->type;
3563 itype = ts->type;
3564
3565 if (ts->val_type == TEMP_VAL_CONST) {
3566 /* propagate constant or generate sti */
3567 tcg_target_ulong val = ts->val;
3568 if (IS_DEAD_ARG(1)) {
3569 temp_dead(s, ts);
3570 }
3571 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3572 return;
3573 }
3574
3575 /* If the source value is in memory we're going to be forced
3576 to have it in a register in order to perform the copy. Copy
3577 the SOURCE value into its own register first, that way we
3578 don't have to reload SOURCE the next time it is used. */
3579 if (ts->val_type == TEMP_VAL_MEM) {
3580 temp_load(s, ts, tcg_target_available_regs[itype],
3581 allocated_regs, preferred_regs);
3582 }
3583
3584 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3585 if (IS_DEAD_ARG(0)) {
3586 /* mov to a non-saved dead register makes no sense (even with
3587 liveness analysis disabled). */
3588 tcg_debug_assert(NEED_SYNC_ARG(0));
3589 if (!ots->mem_allocated) {
3590 temp_allocate_frame(s, ots);
3591 }
3592 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3593 if (IS_DEAD_ARG(1)) {
3594 temp_dead(s, ts);
3595 }
3596 temp_dead(s, ots);
3597 } else {
3598 if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3599 /* the mov can be suppressed */
3600 if (ots->val_type == TEMP_VAL_REG) {
3601 s->reg_to_temp[ots->reg] = NULL;
3602 }
3603 ots->reg = ts->reg;
3604 temp_dead(s, ts);
3605 } else {
3606 if (ots->val_type != TEMP_VAL_REG) {
3607 /* When allocating a new register, make sure to not spill the
3608 input one. */
3609 tcg_regset_set_reg(allocated_regs, ts->reg);
3610 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3611 allocated_regs, preferred_regs,
3612 ots->indirect_base);
3613 }
3614 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3615 /*
3616 * Cross register class move not supported.
3617 * Store the source register into the destination slot
3618 * and leave the destination temp as TEMP_VAL_MEM.
3619 */
3620 assert(!ots->fixed_reg);
3621 if (!ts->mem_allocated) {
3622 temp_allocate_frame(s, ots);
3623 }
3624 tcg_out_st(s, ts->type, ts->reg,
3625 ots->mem_base->reg, ots->mem_offset);
3626 ots->mem_coherent = 1;
3627 temp_free_or_dead(s, ots, -1);
3628 return;
3629 }
3630 }
3631 ots->val_type = TEMP_VAL_REG;
3632 ots->mem_coherent = 0;
3633 s->reg_to_temp[ots->reg] = ots;
3634 if (NEED_SYNC_ARG(0)) {
3635 temp_sync(s, ots, allocated_regs, 0, 0);
3636 }
3637 }
3638 }
3639
3640 /*
3641 * Specialized code generation for INDEX_op_dup_vec.
3642 */
3643 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3644 {
3645 const TCGLifeData arg_life = op->life;
3646 TCGRegSet dup_out_regs, dup_in_regs;
3647 TCGTemp *its, *ots;
3648 TCGType itype, vtype;
3649 intptr_t endian_fixup;
3650 unsigned vece;
3651 bool ok;
3652
3653 ots = arg_temp(op->args[0]);
3654 its = arg_temp(op->args[1]);
3655
3656 /* ENV should not be modified. */
3657 tcg_debug_assert(!ots->fixed_reg);
3658
3659 itype = its->type;
3660 vece = TCGOP_VECE(op);
3661 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3662
3663 if (its->val_type == TEMP_VAL_CONST) {
3664 /* Propagate constant via movi -> dupi. */
3665 tcg_target_ulong val = its->val;
3666 if (IS_DEAD_ARG(1)) {
3667 temp_dead(s, its);
3668 }
3669 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3670 return;
3671 }
3672
3673 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3674 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3675
3676 /* Allocate the output register now. */
3677 if (ots->val_type != TEMP_VAL_REG) {
3678 TCGRegSet allocated_regs = s->reserved_regs;
3679
3680 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3681 /* Make sure to not spill the input register. */
3682 tcg_regset_set_reg(allocated_regs, its->reg);
3683 }
3684 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3685 op->output_pref[0], ots->indirect_base);
3686 ots->val_type = TEMP_VAL_REG;
3687 ots->mem_coherent = 0;
3688 s->reg_to_temp[ots->reg] = ots;
3689 }
3690
3691 switch (its->val_type) {
3692 case TEMP_VAL_REG:
3693 /*
3694 * The dup constriaints must be broad, covering all possible VECE.
3695 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3696 * to fail, indicating that extra moves are required for that case.
3697 */
3698 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3699 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3700 goto done;
3701 }
3702 /* Try again from memory or a vector input register. */
3703 }
3704 if (!its->mem_coherent) {
3705 /*
3706 * The input register is not synced, and so an extra store
3707 * would be required to use memory. Attempt an integer-vector
3708 * register move first. We do not have a TCGRegSet for this.
3709 */
3710 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3711 break;
3712 }
3713 /* Sync the temp back to its slot and load from there. */
3714 temp_sync(s, its, s->reserved_regs, 0, 0);
3715 }
3716 /* fall through */
3717
3718 case TEMP_VAL_MEM:
3719 #ifdef HOST_WORDS_BIGENDIAN
3720 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3721 endian_fixup -= 1 << vece;
3722 #else
3723 endian_fixup = 0;
3724 #endif
3725 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3726 its->mem_offset + endian_fixup)) {
3727 goto done;
3728 }
3729 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3730 break;
3731
3732 default:
3733 g_assert_not_reached();
3734 }
3735
3736 /* We now have a vector input register, so dup must succeed. */
3737 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3738 tcg_debug_assert(ok);
3739
3740 done:
3741 if (IS_DEAD_ARG(1)) {
3742 temp_dead(s, its);
3743 }
3744 if (NEED_SYNC_ARG(0)) {
3745 temp_sync(s, ots, s->reserved_regs, 0, 0);
3746 }
3747 if (IS_DEAD_ARG(0)) {
3748 temp_dead(s, ots);
3749 }
3750 }
3751
3752 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3753 {
3754 const TCGLifeData arg_life = op->life;
3755 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3756 TCGRegSet i_allocated_regs;
3757 TCGRegSet o_allocated_regs;
3758 int i, k, nb_iargs, nb_oargs;
3759 TCGReg reg;
3760 TCGArg arg;
3761 const TCGArgConstraint *arg_ct;
3762 TCGTemp *ts;
3763 TCGArg new_args[TCG_MAX_OP_ARGS];
3764 int const_args[TCG_MAX_OP_ARGS];
3765
3766 nb_oargs = def->nb_oargs;
3767 nb_iargs = def->nb_iargs;
3768
3769 /* copy constants */
3770 memcpy(new_args + nb_oargs + nb_iargs,
3771 op->args + nb_oargs + nb_iargs,
3772 sizeof(TCGArg) * def->nb_cargs);
3773
3774 i_allocated_regs = s->reserved_regs;
3775 o_allocated_regs = s->reserved_regs;
3776
3777 /* satisfy input constraints */
3778 for (k = 0; k < nb_iargs; k++) {
3779 TCGRegSet i_preferred_regs, o_preferred_regs;
3780
3781 i = def->args_ct[nb_oargs + k].sort_index;
3782 arg = op->args[i];
3783 arg_ct = &def->args_ct[i];
3784 ts = arg_temp(arg);
3785
3786 if (ts->val_type == TEMP_VAL_CONST
3787 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3788 /* constant is OK for instruction */
3789 const_args[i] = 1;
3790 new_args[i] = ts->val;
3791 continue;
3792 }
3793
3794 i_preferred_regs = o_preferred_regs = 0;
3795 if (arg_ct->ialias) {
3796 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3797 if (ts->fixed_reg) {
3798 /* if fixed register, we must allocate a new register
3799 if the alias is not the same register */
3800 if (arg != op->args[arg_ct->alias_index]) {
3801 goto allocate_in_reg;
3802 }
3803 } else {
3804 /* if the input is aliased to an output and if it is
3805 not dead after the instruction, we must allocate
3806 a new register and move it */
3807 if (!IS_DEAD_ARG(i)) {
3808 goto allocate_in_reg;
3809 }
3810
3811 /* check if the current register has already been allocated
3812 for another input aliased to an output */
3813 if (ts->val_type == TEMP_VAL_REG) {
3814 int k2, i2;
3815 reg = ts->reg;
3816 for (k2 = 0 ; k2 < k ; k2++) {
3817 i2 = def->args_ct[nb_oargs + k2].sort_index;
3818 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3819 goto allocate_in_reg;
3820 }
3821 }
3822 }
3823 i_preferred_regs = o_preferred_regs;
3824 }
3825 }
3826
3827 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3828 reg = ts->reg;
3829
3830 if (tcg_regset_test_reg(arg_ct->regs, reg)) {
3831 /* nothing to do : the constraint is satisfied */
3832 } else {
3833 allocate_in_reg:
3834 /* allocate a new register matching the constraint
3835 and move the temporary register into it */
3836 temp_load(s, ts, tcg_target_available_regs[ts->type],
3837 i_allocated_regs, 0);
3838 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3839 o_preferred_regs, ts->indirect_base);
3840 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3841 /*
3842 * Cross register class move not supported. Sync the
3843 * temp back to its slot and load from there.
3844 */
3845 temp_sync(s, ts, i_allocated_regs, 0, 0);
3846 tcg_out_ld(s, ts->type, reg,
3847 ts->mem_base->reg, ts->mem_offset);
3848 }
3849 }
3850 new_args[i] = reg;
3851 const_args[i] = 0;
3852 tcg_regset_set_reg(i_allocated_regs, reg);
3853 }
3854
3855 /* mark dead temporaries and free the associated registers */
3856 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3857 if (IS_DEAD_ARG(i)) {
3858 temp_dead(s, arg_temp(op->args[i]));
3859 }
3860 }
3861
3862 if (def->flags & TCG_OPF_COND_BRANCH) {
3863 tcg_reg_alloc_cbranch(s, i_allocated_regs);
3864 } else if (def->flags & TCG_OPF_BB_END) {
3865 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3866 } else {
3867 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3868 /* XXX: permit generic clobber register list ? */
3869 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3870 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3871 tcg_reg_free(s, i, i_allocated_regs);
3872 }
3873 }
3874 }
3875 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3876 /* sync globals if the op has side effects and might trigger
3877 an exception. */
3878 sync_globals(s, i_allocated_regs);
3879 }
3880
3881 /* satisfy the output constraints */
3882 for(k = 0; k < nb_oargs; k++) {
3883 i = def->args_ct[k].sort_index;
3884 arg = op->args[i];
3885 arg_ct = &def->args_ct[i];
3886 ts = arg_temp(arg);
3887
3888 /* ENV should not be modified. */
3889 tcg_debug_assert(!ts->fixed_reg);
3890
3891 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3892 reg = new_args[arg_ct->alias_index];
3893 } else if (arg_ct->newreg) {
3894 reg = tcg_reg_alloc(s, arg_ct->regs,
3895 i_allocated_regs | o_allocated_regs,
3896 op->output_pref[k], ts->indirect_base);
3897 } else {
3898 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3899 op->output_pref[k], ts->indirect_base);
3900 }
3901 tcg_regset_set_reg(o_allocated_regs, reg);
3902 if (ts->val_type == TEMP_VAL_REG) {
3903 s->reg_to_temp[ts->reg] = NULL;
3904 }
3905 ts->val_type = TEMP_VAL_REG;
3906 ts->reg = reg;
3907 /*
3908 * Temp value is modified, so the value kept in memory is
3909 * potentially not the same.
3910 */
3911 ts->mem_coherent = 0;
3912 s->reg_to_temp[reg] = ts;
3913 new_args[i] = reg;
3914 }
3915 }
3916
3917 /* emit instruction */
3918 if (def->flags & TCG_OPF_VECTOR) {
3919 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3920 new_args, const_args);
3921 } else {
3922 tcg_out_op(s, op->opc, new_args, const_args);
3923 }
3924
3925 /* move the outputs in the correct register if needed */
3926 for(i = 0; i < nb_oargs; i++) {
3927 ts = arg_temp(op->args[i]);
3928
3929 /* ENV should not be modified. */
3930 tcg_debug_assert(!ts->fixed_reg);
3931
3932 if (NEED_SYNC_ARG(i)) {
3933 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3934 } else if (IS_DEAD_ARG(i)) {
3935 temp_dead(s, ts);
3936 }
3937 }
3938 }
3939
3940 #ifdef TCG_TARGET_STACK_GROWSUP
3941 #define STACK_DIR(x) (-(x))
3942 #else
3943 #define STACK_DIR(x) (x)
3944 #endif
3945
3946 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3947 {
3948 const int nb_oargs = TCGOP_CALLO(op);
3949 const int nb_iargs = TCGOP_CALLI(op);
3950 const TCGLifeData arg_life = op->life;
3951 int flags, nb_regs, i;
3952 TCGReg reg;
3953 TCGArg arg;
3954 TCGTemp *ts;
3955 intptr_t stack_offset;
3956 size_t call_stack_size;
3957 tcg_insn_unit *func_addr;
3958 int allocate_args;
3959 TCGRegSet allocated_regs;
3960
3961 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3962 flags = op->args[nb_oargs + nb_iargs + 1];
3963
3964 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3965 if (nb_regs > nb_iargs) {
3966 nb_regs = nb_iargs;
3967 }
3968
3969 /* assign stack slots first */
3970 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3971 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3972 ~(TCG_TARGET_STACK_ALIGN - 1);
3973 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3974 if (allocate_args) {
3975 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3976 preallocate call stack */
3977 tcg_abort();
3978 }
3979
3980 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3981 for (i = nb_regs; i < nb_iargs; i++) {
3982 arg = op->args[nb_oargs + i];
3983 #ifdef TCG_TARGET_STACK_GROWSUP
3984 stack_offset -= sizeof(tcg_target_long);
3985 #endif
3986 if (arg != TCG_CALL_DUMMY_ARG) {
3987 ts = arg_temp(arg);
3988 temp_load(s, ts, tcg_target_available_regs[ts->type],
3989 s->reserved_regs, 0);
3990 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3991 }
3992 #ifndef TCG_TARGET_STACK_GROWSUP
3993 stack_offset += sizeof(tcg_target_long);
3994 #endif
3995 }
3996
3997 /* assign input registers */
3998 allocated_regs = s->reserved_regs;
3999 for (i = 0; i < nb_regs; i++) {
4000 arg = op->args[nb_oargs + i];
4001 if (arg != TCG_CALL_DUMMY_ARG) {
4002 ts = arg_temp(arg);
4003 reg = tcg_target_call_iarg_regs[i];
4004
4005 if (ts->val_type == TEMP_VAL_REG) {
4006 if (ts->reg != reg) {
4007 tcg_reg_free(s, reg, allocated_regs);
4008 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4009 /*
4010 * Cross register class move not supported. Sync the
4011 * temp back to its slot and load from there.
4012 */
4013 temp_sync(s, ts, allocated_regs, 0, 0);
4014 tcg_out_ld(s, ts->type, reg,
4015 ts->mem_base->reg, ts->mem_offset);
4016 }
4017 }
4018 } else {
4019 TCGRegSet arg_set = 0;
4020
4021 tcg_reg_free(s, reg, allocated_regs);
4022 tcg_regset_set_reg(arg_set, reg);
4023 temp_load(s, ts, arg_set, allocated_regs, 0);
4024 }
4025
4026 tcg_regset_set_reg(allocated_regs, reg);
4027 }
4028 }
4029
4030 /* mark dead temporaries and free the associated registers */
4031 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4032 if (IS_DEAD_ARG(i)) {
4033 temp_dead(s, arg_temp(op->args[i]));
4034 }
4035 }
4036
4037 /* clobber call registers */
4038 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4039 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4040 tcg_reg_free(s, i, allocated_regs);
4041 }
4042 }
4043
4044 /* Save globals if they might be written by the helper, sync them if
4045 they might be read. */
4046 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4047 /* Nothing to do */
4048 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4049 sync_globals(s, allocated_regs);
4050 } else {
4051 save_globals(s, allocated_regs);
4052 }
4053
4054 tcg_out_call(s, func_addr);
4055
4056 /* assign output registers and emit moves if needed */
4057 for(i = 0; i < nb_oargs; i++) {
4058 arg = op->args[i];
4059 ts = arg_temp(arg);
4060
4061 /* ENV should not be modified. */
4062 tcg_debug_assert(!ts->fixed_reg);
4063
4064 reg = tcg_target_call_oarg_regs[i];
4065 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4066 if (ts->val_type == TEMP_VAL_REG) {
4067 s->reg_to_temp[ts->reg] = NULL;
4068 }
4069 ts->val_type = TEMP_VAL_REG;
4070 ts->reg = reg;
4071 ts->mem_coherent = 0;
4072 s->reg_to_temp[reg] = ts;
4073 if (NEED_SYNC_ARG(i)) {
4074 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4075 } else if (IS_DEAD_ARG(i)) {
4076 temp_dead(s, ts);
4077 }
4078 }
4079 }
4080
4081 #ifdef CONFIG_PROFILER
4082
4083 /* avoid copy/paste errors */
4084 #define PROF_ADD(to, from, field) \
4085 do { \
4086 (to)->field += qatomic_read(&((from)->field)); \
4087 } while (0)
4088
4089 #define PROF_MAX(to, from, field) \
4090 do { \
4091 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4092 if (val__ > (to)->field) { \
4093 (to)->field = val__; \
4094 } \
4095 } while (0)
4096
4097 /* Pass in a zero'ed @prof */
4098 static inline
4099 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4100 {
4101 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4102 unsigned int i;
4103
4104 for (i = 0; i < n_ctxs; i++) {
4105 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4106 const TCGProfile *orig = &s->prof;
4107
4108 if (counters) {
4109 PROF_ADD(prof, orig, cpu_exec_time);
4110 PROF_ADD(prof, orig, tb_count1);
4111 PROF_ADD(prof, orig, tb_count);
4112 PROF_ADD(prof, orig, op_count);
4113 PROF_MAX(prof, orig, op_count_max);
4114 PROF_ADD(prof, orig, temp_count);
4115 PROF_MAX(prof, orig, temp_count_max);
4116 PROF_ADD(prof, orig, del_op_count);
4117 PROF_ADD(prof, orig, code_in_len);
4118 PROF_ADD(prof, orig, code_out_len);
4119 PROF_ADD(prof, orig, search_out_len);
4120 PROF_ADD(prof, orig, interm_time);
4121 PROF_ADD(prof, orig, code_time);
4122 PROF_ADD(prof, orig, la_time);
4123 PROF_ADD(prof, orig, opt_time);
4124 PROF_ADD(prof, orig, restore_count);
4125 PROF_ADD(prof, orig, restore_time);
4126 }
4127 if (table) {
4128 int i;
4129
4130 for (i = 0; i < NB_OPS; i++) {
4131 PROF_ADD(prof, orig, table_op_count[i]);
4132 }
4133 }
4134 }
4135 }
4136
4137 #undef PROF_ADD
4138 #undef PROF_MAX
4139
4140 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4141 {
4142 tcg_profile_snapshot(prof, true, false);
4143 }
4144
4145 static void tcg_profile_snapshot_table(TCGProfile *prof)
4146 {
4147 tcg_profile_snapshot(prof, false, true);
4148 }
4149
4150 void tcg_dump_op_count(void)
4151 {
4152 TCGProfile prof = {};
4153 int i;
4154
4155 tcg_profile_snapshot_table(&prof);
4156 for (i = 0; i < NB_OPS; i++) {
4157 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4158 prof.table_op_count[i]);
4159 }
4160 }
4161
4162 int64_t tcg_cpu_exec_time(void)
4163 {
4164 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4165 unsigned int i;
4166 int64_t ret = 0;
4167
4168 for (i = 0; i < n_ctxs; i++) {
4169 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4170 const TCGProfile *prof = &s->prof;
4171
4172 ret += qatomic_read(&prof->cpu_exec_time);
4173 }
4174 return ret;
4175 }
4176 #else
4177 void tcg_dump_op_count(void)
4178 {
4179 qemu_printf("[TCG profiler not compiled]\n");
4180 }
4181
4182 int64_t tcg_cpu_exec_time(void)
4183 {
4184 error_report("%s: TCG profiler not compiled", __func__);
4185 exit(EXIT_FAILURE);
4186 }
4187 #endif
4188
4189
4190 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4191 {
4192 #ifdef CONFIG_PROFILER
4193 TCGProfile *prof = &s->prof;
4194 #endif
4195 int i, num_insns;
4196 TCGOp *op;
4197
4198 #ifdef CONFIG_PROFILER
4199 {
4200 int n = 0;
4201
4202 QTAILQ_FOREACH(op, &s->ops, link) {
4203 n++;
4204 }
4205 qatomic_set(&prof->op_count, prof->op_count + n);
4206 if (n > prof->op_count_max) {
4207 qatomic_set(&prof->op_count_max, n);
4208 }
4209
4210 n = s->nb_temps;
4211 qatomic_set(&prof->temp_count, prof->temp_count + n);
4212 if (n > prof->temp_count_max) {
4213 qatomic_set(&prof->temp_count_max, n);
4214 }
4215 }
4216 #endif
4217
4218 #ifdef DEBUG_DISAS
4219 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4220 && qemu_log_in_addr_range(tb->pc))) {
4221 FILE *logfile = qemu_log_lock();
4222 qemu_log("OP:\n");
4223 tcg_dump_ops(s, false);
4224 qemu_log("\n");
4225 qemu_log_unlock(logfile);
4226 }
4227 #endif
4228
4229 #ifdef CONFIG_DEBUG_TCG
4230 /* Ensure all labels referenced have been emitted. */
4231 {
4232 TCGLabel *l;
4233 bool error = false;
4234
4235 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4236 if (unlikely(!l->present) && l->refs) {
4237 qemu_log_mask(CPU_LOG_TB_OP,
4238 "$L%d referenced but not present.\n", l->id);
4239 error = true;
4240 }
4241 }
4242 assert(!error);
4243 }
4244 #endif
4245
4246 #ifdef CONFIG_PROFILER
4247 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4248 #endif
4249
4250 #ifdef USE_TCG_OPTIMIZATIONS
4251 tcg_optimize(s);
4252 #endif
4253
4254 #ifdef CONFIG_PROFILER
4255 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4256 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4257 #endif
4258
4259 reachable_code_pass(s);
4260 liveness_pass_1(s);
4261
4262 if (s->nb_indirects > 0) {
4263 #ifdef DEBUG_DISAS
4264 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4265 && qemu_log_in_addr_range(tb->pc))) {
4266 FILE *logfile = qemu_log_lock();
4267 qemu_log("OP before indirect lowering:\n");
4268 tcg_dump_ops(s, false);
4269 qemu_log("\n");
4270 qemu_log_unlock(logfile);
4271 }
4272 #endif
4273 /* Replace indirect temps with direct temps. */
4274 if (liveness_pass_2(s)) {
4275 /* If changes were made, re-run liveness. */
4276 liveness_pass_1(s);
4277 }
4278 }
4279
4280 #ifdef CONFIG_PROFILER
4281 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4282 #endif
4283
4284 #ifdef DEBUG_DISAS
4285 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4286 && qemu_log_in_addr_range(tb->pc))) {
4287 FILE *logfile = qemu_log_lock();
4288 qemu_log("OP after optimization and liveness analysis:\n");
4289 tcg_dump_ops(s, true);
4290 qemu_log("\n");
4291 qemu_log_unlock(logfile);
4292 }
4293 #endif
4294
4295 tcg_reg_alloc_start(s);
4296
4297 /*
4298 * Reset the buffer pointers when restarting after overflow.
4299 * TODO: Move this into translate-all.c with the rest of the
4300 * buffer management. Having only this done here is confusing.
4301 */
4302 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4303 s->code_ptr = s->code_buf;
4304
4305 #ifdef TCG_TARGET_NEED_LDST_LABELS
4306 QSIMPLEQ_INIT(&s->ldst_labels);
4307 #endif
4308 #ifdef TCG_TARGET_NEED_POOL_LABELS
4309 s->pool_labels = NULL;
4310 #endif
4311
4312 num_insns = -1;
4313 QTAILQ_FOREACH(op, &s->ops, link) {
4314 TCGOpcode opc = op->opc;
4315
4316 #ifdef CONFIG_PROFILER
4317 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4318 #endif
4319
4320 switch (opc) {
4321 case INDEX_op_mov_i32:
4322 case INDEX_op_mov_i64:
4323 case INDEX_op_mov_vec:
4324 tcg_reg_alloc_mov(s, op);
4325 break;
4326 case INDEX_op_movi_i32:
4327 case INDEX_op_movi_i64:
4328 case INDEX_op_dupi_vec:
4329 tcg_reg_alloc_movi(s, op);
4330 break;
4331 case INDEX_op_dup_vec:
4332 tcg_reg_alloc_dup(s, op);
4333 break;
4334 case INDEX_op_insn_start:
4335 if (num_insns >= 0) {
4336 size_t off = tcg_current_code_size(s);
4337 s->gen_insn_end_off[num_insns] = off;
4338 /* Assert that we do not overflow our stored offset. */
4339 assert(s->gen_insn_end_off[num_insns] == off);
4340 }
4341 num_insns++;
4342 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4343 target_ulong a;
4344 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4345 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4346 #else
4347 a = op->args[i];
4348 #endif
4349 s->gen_insn_data[num_insns][i] = a;
4350 }
4351 break;
4352 case INDEX_op_discard:
4353 temp_dead(s, arg_temp(op->args[0]));
4354 break;
4355 case INDEX_op_set_label:
4356 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4357 tcg_out_label(s, arg_label(op->args[0]));
4358 break;
4359 case INDEX_op_call:
4360 tcg_reg_alloc_call(s, op);
4361 break;
4362 default:
4363 /* Sanity check that we've not introduced any unhandled opcodes. */
4364 tcg_debug_assert(tcg_op_supported(opc));
4365 /* Note: in order to speed up the code, it would be much
4366 faster to have specialized register allocator functions for
4367 some common argument patterns */
4368 tcg_reg_alloc_op(s, op);
4369 break;
4370 }
4371 #ifdef CONFIG_DEBUG_TCG
4372 check_regs(s);
4373 #endif
4374 /* Test for (pending) buffer overflow. The assumption is that any
4375 one operation beginning below the high water mark cannot overrun
4376 the buffer completely. Thus we can test for overflow after
4377 generating code without having to check during generation. */
4378 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4379 return -1;
4380 }
4381 /* Test for TB overflow, as seen by gen_insn_end_off. */
4382 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4383 return -2;
4384 }
4385 }
4386 tcg_debug_assert(num_insns >= 0);
4387 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4388
4389 /* Generate TB finalization at the end of block */
4390 #ifdef TCG_TARGET_NEED_LDST_LABELS
4391 i = tcg_out_ldst_finalize(s);
4392 if (i < 0) {
4393 return i;
4394 }
4395 #endif
4396 #ifdef TCG_TARGET_NEED_POOL_LABELS
4397 i = tcg_out_pool_finalize(s);
4398 if (i < 0) {
4399 return i;
4400 }
4401 #endif
4402 if (!tcg_resolve_relocs(s)) {
4403 return -2;
4404 }
4405
4406 #ifndef CONFIG_TCG_INTERPRETER
4407 /* flush instruction cache */
4408 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4409 (uintptr_t)s->code_buf,
4410 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4411 #endif
4412
4413 return tcg_current_code_size(s);
4414 }
4415
4416 #ifdef CONFIG_PROFILER
4417 void tcg_dump_info(void)
4418 {
4419 TCGProfile prof = {};
4420 const TCGProfile *s;
4421 int64_t tb_count;
4422 int64_t tb_div_count;
4423 int64_t tot;
4424
4425 tcg_profile_snapshot_counters(&prof);
4426 s = &prof;
4427 tb_count = s->tb_count;
4428 tb_div_count = tb_count ? tb_count : 1;
4429 tot = s->interm_time + s->code_time;
4430
4431 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4432 tot, tot / 2.4e9);
4433 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4434 " %0.1f%%)\n",
4435 tb_count, s->tb_count1 - tb_count,
4436 (double)(s->tb_count1 - s->tb_count)
4437 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4438 qemu_printf("avg ops/TB %0.1f max=%d\n",
4439 (double)s->op_count / tb_div_count, s->op_count_max);
4440 qemu_printf("deleted ops/TB %0.2f\n",
4441 (double)s->del_op_count / tb_div_count);
4442 qemu_printf("avg temps/TB %0.2f max=%d\n",
4443 (double)s->temp_count / tb_div_count, s->temp_count_max);
4444 qemu_printf("avg host code/TB %0.1f\n",
4445 (double)s->code_out_len / tb_div_count);
4446 qemu_printf("avg search data/TB %0.1f\n",
4447 (double)s->search_out_len / tb_div_count);
4448
4449 qemu_printf("cycles/op %0.1f\n",
4450 s->op_count ? (double)tot / s->op_count : 0);
4451 qemu_printf("cycles/in byte %0.1f\n",
4452 s->code_in_len ? (double)tot / s->code_in_len : 0);
4453 qemu_printf("cycles/out byte %0.1f\n",
4454 s->code_out_len ? (double)tot / s->code_out_len : 0);
4455 qemu_printf("cycles/search byte %0.1f\n",
4456 s->search_out_len ? (double)tot / s->search_out_len : 0);
4457 if (tot == 0) {
4458 tot = 1;
4459 }
4460 qemu_printf(" gen_interm time %0.1f%%\n",
4461 (double)s->interm_time / tot * 100.0);
4462 qemu_printf(" gen_code time %0.1f%%\n",
4463 (double)s->code_time / tot * 100.0);
4464 qemu_printf("optim./code time %0.1f%%\n",
4465 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4466 * 100.0);
4467 qemu_printf("liveness/code time %0.1f%%\n",
4468 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4469 qemu_printf("cpu_restore count %" PRId64 "\n",
4470 s->restore_count);
4471 qemu_printf(" avg cycles %0.1f\n",
4472 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4473 }
4474 #else
4475 void tcg_dump_info(void)
4476 {
4477 qemu_printf("[TCG profiler not compiled]\n");
4478 }
4479 #endif
4480
4481 #ifdef ELF_HOST_MACHINE
4482 /* In order to use this feature, the backend needs to do three things:
4483
4484 (1) Define ELF_HOST_MACHINE to indicate both what value to
4485 put into the ELF image and to indicate support for the feature.
4486
4487 (2) Define tcg_register_jit. This should create a buffer containing
4488 the contents of a .debug_frame section that describes the post-
4489 prologue unwind info for the tcg machine.
4490
4491 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4492 */
4493
4494 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4495 typedef enum {
4496 JIT_NOACTION = 0,
4497 JIT_REGISTER_FN,
4498 JIT_UNREGISTER_FN
4499 } jit_actions_t;
4500
4501 struct jit_code_entry {
4502 struct jit_code_entry *next_entry;
4503 struct jit_code_entry *prev_entry;
4504 const void *symfile_addr;
4505 uint64_t symfile_size;
4506 };
4507
4508 struct jit_descriptor {
4509 uint32_t version;
4510 uint32_t action_flag;
4511 struct jit_code_entry *relevant_entry;
4512 struct jit_code_entry *first_entry;
4513 };
4514
4515 void __jit_debug_register_code(void) __attribute__((noinline));
4516 void __jit_debug_register_code(void)
4517 {
4518 asm("");
4519 }
4520
4521 /* Must statically initialize the version, because GDB may check
4522 the version before we can set it. */
4523 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4524
4525 /* End GDB interface. */
4526
4527 static int find_string(const char *strtab, const char *str)
4528 {
4529 const char *p = strtab + 1;
4530
4531 while (1) {
4532 if (strcmp(p, str) == 0) {
4533 return p - strtab;
4534 }
4535 p += strlen(p) + 1;
4536 }
4537 }
4538
4539 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4540 const void *debug_frame,
4541 size_t debug_frame_size)
4542 {
4543 struct __attribute__((packed)) DebugInfo {
4544 uint32_t len;
4545 uint16_t version;
4546 uint32_t abbrev;
4547 uint8_t ptr_size;
4548 uint8_t cu_die;
4549 uint16_t cu_lang;
4550 uintptr_t cu_low_pc;
4551 uintptr_t cu_high_pc;
4552 uint8_t fn_die;
4553 char fn_name[16];
4554 uintptr_t fn_low_pc;
4555 uintptr_t fn_high_pc;
4556 uint8_t cu_eoc;
4557 };
4558
4559 struct ElfImage {
4560 ElfW(Ehdr) ehdr;
4561 ElfW(Phdr) phdr;
4562 ElfW(Shdr) shdr[7];
4563 ElfW(Sym) sym[2];
4564 struct DebugInfo di;
4565 uint8_t da[24];
4566 char str[80];
4567 };
4568
4569 struct ElfImage *img;
4570
4571 static const struct ElfImage img_template = {
4572 .ehdr = {
4573 .e_ident[EI_MAG0] = ELFMAG0,
4574 .e_ident[EI_MAG1] = ELFMAG1,
4575 .e_ident[EI_MAG2] = ELFMAG2,
4576 .e_ident[EI_MAG3] = ELFMAG3,
4577 .e_ident[EI_CLASS] = ELF_CLASS,
4578 .e_ident[EI_DATA] = ELF_DATA,
4579 .e_ident[EI_VERSION] = EV_CURRENT,
4580 .e_type = ET_EXEC,
4581 .e_machine = ELF_HOST_MACHINE,
4582 .e_version = EV_CURRENT,
4583 .e_phoff = offsetof(struct ElfImage, phdr),
4584 .e_shoff = offsetof(struct ElfImage, shdr),
4585 .e_ehsize = sizeof(ElfW(Shdr)),
4586 .e_phentsize = sizeof(ElfW(Phdr)),
4587 .e_phnum = 1,
4588 .e_shentsize = sizeof(ElfW(Shdr)),
4589 .e_shnum = ARRAY_SIZE(img->shdr),
4590 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4591 #ifdef ELF_HOST_FLAGS
4592 .e_flags = ELF_HOST_FLAGS,
4593 #endif
4594 #ifdef ELF_OSABI
4595 .e_ident[EI_OSABI] = ELF_OSABI,
4596 #endif
4597 },
4598 .phdr = {
4599 .p_type = PT_LOAD,
4600 .p_flags = PF_X,
4601 },
4602 .shdr = {
4603 [0] = { .sh_type = SHT_NULL },
4604 /* Trick: The contents of code_gen_buffer are not present in
4605 this fake ELF file; that got allocated elsewhere. Therefore
4606 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4607 will not look for contents. We can record any address. */
4608 [1] = { /* .text */
4609 .sh_type = SHT_NOBITS,
4610 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4611 },
4612 [2] = { /* .debug_info */
4613 .sh_type = SHT_PROGBITS,
4614 .sh_offset = offsetof(struct ElfImage, di),
4615 .sh_size = sizeof(struct DebugInfo),
4616 },
4617 [3] = { /* .debug_abbrev */
4618 .sh_type = SHT_PROGBITS,
4619 .sh_offset = offsetof(struct ElfImage, da),
4620 .sh_size = sizeof(img->da),
4621 },
4622 [4] = { /* .debug_frame */
4623 .sh_type = SHT_PROGBITS,
4624 .sh_offset = sizeof(struct ElfImage),
4625 },
4626 [5] = { /* .symtab */
4627 .sh_type = SHT_SYMTAB,
4628 .sh_offset = offsetof(struct ElfImage, sym),
4629 .sh_size = sizeof(img->sym),
4630 .sh_info = 1,
4631 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4632 .sh_entsize = sizeof(ElfW(Sym)),
4633 },
4634 [6] = { /* .strtab */
4635 .sh_type = SHT_STRTAB,
4636 .sh_offset = offsetof(struct ElfImage, str),
4637 .sh_size = sizeof(img->str),
4638 }
4639 },
4640 .sym = {
4641 [1] = { /* code_gen_buffer */
4642 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4643 .st_shndx = 1,
4644 }
4645 },
4646 .di = {
4647 .len = sizeof(struct DebugInfo) - 4,
4648 .version = 2,
4649 .ptr_size = sizeof(void *),
4650 .cu_die = 1,
4651 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4652 .fn_die = 2,
4653 .fn_name = "code_gen_buffer"
4654 },
4655 .da = {
4656 1, /* abbrev number (the cu) */
4657 0x11, 1, /* DW_TAG_compile_unit, has children */
4658 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4659 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4660 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4661 0, 0, /* end of abbrev */
4662 2, /* abbrev number (the fn) */
4663 0x2e, 0, /* DW_TAG_subprogram, no children */
4664 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4665 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4666 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4667 0, 0, /* end of abbrev */
4668 0 /* no more abbrev */
4669 },
4670 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4671 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4672 };
4673
4674 /* We only need a single jit entry; statically allocate it. */
4675 static struct jit_code_entry one_entry;
4676
4677 uintptr_t buf = (uintptr_t)buf_ptr;
4678 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4679 DebugFrameHeader *dfh;
4680
4681 img = g_malloc(img_size);
4682 *img = img_template;
4683
4684 img->phdr.p_vaddr = buf;
4685 img->phdr.p_paddr = buf;
4686 img->phdr.p_memsz = buf_size;
4687
4688 img->shdr[1].sh_name = find_string(img->str, ".text");
4689 img->shdr[1].sh_addr = buf;
4690 img->shdr[1].sh_size = buf_size;
4691
4692 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4693 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4694
4695 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4696 img->shdr[4].sh_size = debug_frame_size;
4697
4698 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4699 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4700
4701 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4702 img->sym[1].st_value = buf;
4703 img->sym[1].st_size = buf_size;
4704
4705 img->di.cu_low_pc = buf;
4706 img->di.cu_high_pc = buf + buf_size;
4707 img->di.fn_low_pc = buf;
4708 img->di.fn_high_pc = buf + buf_size;
4709
4710 dfh = (DebugFrameHeader *)(img + 1);
4711 memcpy(dfh, debug_frame, debug_frame_size);
4712 dfh->fde.func_start = buf;
4713 dfh->fde.func_len = buf_size;
4714
4715 #ifdef DEBUG_JIT
4716 /* Enable this block to be able to debug the ELF image file creation.
4717 One can use readelf, objdump, or other inspection utilities. */
4718 {
4719 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4720 if (f) {
4721 if (fwrite(img, img_size, 1, f) != img_size) {
4722 /* Avoid stupid unused return value warning for fwrite. */
4723 }
4724 fclose(f);
4725 }
4726 }
4727 #endif
4728
4729 one_entry.symfile_addr = img;
4730 one_entry.symfile_size = img_size;
4731
4732 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4733 __jit_debug_descriptor.relevant_entry = &one_entry;
4734 __jit_debug_descriptor.first_entry = &one_entry;
4735 __jit_debug_register_code();
4736 }
4737 #else
4738 /* No support for the feature. Provide the entry point expected by exec.c,
4739 and implement the internal function we declared earlier. */
4740
4741 static void tcg_register_jit_int(const void *buf, size_t size,
4742 const void *debug_frame,
4743 size_t debug_frame_size)
4744 {
4745 }
4746
4747 void tcg_register_jit(const void *buf, size_t buf_size)
4748 {
4749 }
4750 #endif /* ELF_HOST_MACHINE */
4751
4752 #if !TCG_TARGET_MAYBE_vec
4753 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4754 {
4755 g_assert_not_reached();
4756 }
4757 #endif