]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/region.c
include: Move qemu_madvise() and related #defines to new qemu/madvise.h
[mirror_qemu.git] / tcg / region.c
CommitLineData
5ff7258c
RH
1/*
2 * Memory region management for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "qemu/osdep.h"
c46184a9 26#include "qemu/units.h"
b85ea5fa 27#include "qemu/madvise.h"
c46184a9 28#include "qapi/error.h"
5ff7258c
RH
29#include "exec/exec-all.h"
30#include "tcg/tcg.h"
5ff7258c
RH
31#include "tcg-internal.h"
32
33
34struct tcg_region_tree {
35 QemuMutex lock;
36 GTree *tree;
37 /* padding to avoid false sharing is computed at run-time */
38};
39
40/*
41 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
42 * dynamically allocate from as demand dictates. Given appropriate region
43 * sizing, this minimizes flushes even when some TCG threads generate a lot
44 * more code than others.
45 */
46struct tcg_region_state {
47 QemuMutex lock;
48
49 /* fields set at init time */
5ff7258c 50 void *start_aligned;
c2471ca0 51 void *after_prologue;
5ff7258c
RH
52 size_t n;
53 size_t size; /* size of one region */
54 size_t stride; /* .size + guard size */
77bd7fd1 55 size_t total_size; /* size of entire buffer, >= n * stride */
5ff7258c
RH
56
57 /* fields protected by the lock */
58 size_t current; /* current region index */
59 size_t agg_size_full; /* aggregate size of full regions */
60};
61
62static struct tcg_region_state region;
63
64/*
65 * This is an array of struct tcg_region_tree's, with padding.
66 * We use void * to simplify the computation of region_trees[i]; each
67 * struct is found every tree_size bytes.
68 */
69static void *region_trees;
70static size_t tree_size;
71
47d590df
RH
72bool in_code_gen_buffer(const void *p)
73{
47d590df
RH
74 /*
75 * Much like it is valid to have a pointer to the byte past the
76 * end of an array (so long as you don't dereference it), allow
77 * a pointer to the byte past the end of the code gen buffer.
78 */
032a4b1b 79 return (size_t)(p - region.start_aligned) <= region.total_size;
47d590df
RH
80}
81
82#ifdef CONFIG_DEBUG_TCG
83const void *tcg_splitwx_to_rx(void *rw)
84{
85 /* Pass NULL pointers unchanged. */
86 if (rw) {
87 g_assert(in_code_gen_buffer(rw));
88 rw += tcg_splitwx_diff;
89 }
90 return rw;
91}
92
93void *tcg_splitwx_to_rw(const void *rx)
94{
95 /* Pass NULL pointers unchanged. */
96 if (rx) {
97 rx -= tcg_splitwx_diff;
98 /* Assert that we end with a pointer in the rw region. */
99 g_assert(in_code_gen_buffer(rx));
100 }
101 return (void *)rx;
102}
103#endif /* CONFIG_DEBUG_TCG */
104
5ff7258c
RH
105/* compare a pointer @ptr and a tb_tc @s */
106static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
107{
108 if (ptr >= s->ptr + s->size) {
109 return 1;
110 } else if (ptr < s->ptr) {
111 return -1;
112 }
113 return 0;
114}
115
834361ef 116static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
5ff7258c
RH
117{
118 const struct tb_tc *a = ap;
119 const struct tb_tc *b = bp;
120
121 /*
122 * When both sizes are set, we know this isn't a lookup.
123 * This is the most likely case: every TB must be inserted; lookups
124 * are a lot less frequent.
125 */
126 if (likely(a->size && b->size)) {
127 if (a->ptr > b->ptr) {
128 return 1;
129 } else if (a->ptr < b->ptr) {
130 return -1;
131 }
132 /* a->ptr == b->ptr should happen only on deletions */
133 g_assert(a->size == b->size);
134 return 0;
135 }
136 /*
137 * All lookups have either .size field set to 0.
138 * From the glib sources we see that @ap is always the lookup key. However
139 * the docs provide no guarantee, so we just mark this case as likely.
140 */
141 if (likely(a->size == 0)) {
142 return ptr_cmp_tb_tc(a->ptr, b);
143 }
144 return ptr_cmp_tb_tc(b->ptr, a);
145}
146
834361ef
LW
147static void tb_destroy(gpointer value)
148{
149 TranslationBlock *tb = value;
150 qemu_spin_destroy(&tb->jmp_lock);
151}
152
5ff7258c
RH
153static void tcg_region_trees_init(void)
154{
155 size_t i;
156
157 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
158 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
159 for (i = 0; i < region.n; i++) {
160 struct tcg_region_tree *rt = region_trees + i * tree_size;
161
162 qemu_mutex_init(&rt->lock);
834361ef 163 rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
5ff7258c
RH
164 }
165}
166
167static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
168{
169 size_t region_idx;
170
171 /*
172 * Like tcg_splitwx_to_rw, with no assert. The pc may come from
173 * a signal handler over which the caller has no control.
174 */
175 if (!in_code_gen_buffer(p)) {
176 p -= tcg_splitwx_diff;
177 if (!in_code_gen_buffer(p)) {
178 return NULL;
179 }
180 }
181
182 if (p < region.start_aligned) {
183 region_idx = 0;
184 } else {
185 ptrdiff_t offset = p - region.start_aligned;
186
187 if (offset > region.stride * (region.n - 1)) {
188 region_idx = region.n - 1;
189 } else {
190 region_idx = offset / region.stride;
191 }
192 }
193 return region_trees + region_idx * tree_size;
194}
195
196void tcg_tb_insert(TranslationBlock *tb)
197{
198 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
199
200 g_assert(rt != NULL);
201 qemu_mutex_lock(&rt->lock);
202 g_tree_insert(rt->tree, &tb->tc, tb);
203 qemu_mutex_unlock(&rt->lock);
204}
205
206void tcg_tb_remove(TranslationBlock *tb)
207{
208 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
209
210 g_assert(rt != NULL);
211 qemu_mutex_lock(&rt->lock);
212 g_tree_remove(rt->tree, &tb->tc);
213 qemu_mutex_unlock(&rt->lock);
214}
215
216/*
217 * Find the TB 'tb' such that
218 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
219 * Return NULL if not found.
220 */
221TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
222{
223 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
224 TranslationBlock *tb;
225 struct tb_tc s = { .ptr = (void *)tc_ptr };
226
227 if (rt == NULL) {
228 return NULL;
229 }
230
231 qemu_mutex_lock(&rt->lock);
232 tb = g_tree_lookup(rt->tree, &s);
233 qemu_mutex_unlock(&rt->lock);
234 return tb;
235}
236
237static void tcg_region_tree_lock_all(void)
238{
239 size_t i;
240
241 for (i = 0; i < region.n; i++) {
242 struct tcg_region_tree *rt = region_trees + i * tree_size;
243
244 qemu_mutex_lock(&rt->lock);
245 }
246}
247
248static void tcg_region_tree_unlock_all(void)
249{
250 size_t i;
251
252 for (i = 0; i < region.n; i++) {
253 struct tcg_region_tree *rt = region_trees + i * tree_size;
254
255 qemu_mutex_unlock(&rt->lock);
256 }
257}
258
259void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
260{
261 size_t i;
262
263 tcg_region_tree_lock_all();
264 for (i = 0; i < region.n; i++) {
265 struct tcg_region_tree *rt = region_trees + i * tree_size;
266
267 g_tree_foreach(rt->tree, func, user_data);
268 }
269 tcg_region_tree_unlock_all();
270}
271
272size_t tcg_nb_tbs(void)
273{
274 size_t nb_tbs = 0;
275 size_t i;
276
277 tcg_region_tree_lock_all();
278 for (i = 0; i < region.n; i++) {
279 struct tcg_region_tree *rt = region_trees + i * tree_size;
280
281 nb_tbs += g_tree_nnodes(rt->tree);
282 }
283 tcg_region_tree_unlock_all();
284 return nb_tbs;
285}
286
5ff7258c
RH
287static void tcg_region_tree_reset_all(void)
288{
289 size_t i;
290
291 tcg_region_tree_lock_all();
292 for (i = 0; i < region.n; i++) {
293 struct tcg_region_tree *rt = region_trees + i * tree_size;
294
5ff7258c
RH
295 /* Increment the refcount first so that destroy acts as a reset */
296 g_tree_ref(rt->tree);
297 g_tree_destroy(rt->tree);
298 }
299 tcg_region_tree_unlock_all();
300}
301
302static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
303{
304 void *start, *end;
305
306 start = region.start_aligned + curr_region * region.stride;
307 end = start + region.size;
308
309 if (curr_region == 0) {
c2471ca0 310 start = region.after_prologue;
5ff7258c 311 }
77bd7fd1 312 /* The final region may have a few extra pages due to earlier rounding. */
5ff7258c 313 if (curr_region == region.n - 1) {
77bd7fd1 314 end = region.start_aligned + region.total_size;
5ff7258c
RH
315 }
316
317 *pstart = start;
318 *pend = end;
319}
320
321static void tcg_region_assign(TCGContext *s, size_t curr_region)
322{
323 void *start, *end;
324
325 tcg_region_bounds(curr_region, &start, &end);
326
327 s->code_gen_buffer = start;
328 s->code_gen_ptr = start;
329 s->code_gen_buffer_size = end - start;
330 s->code_gen_highwater = end - TCG_HIGHWATER;
331}
332
333static bool tcg_region_alloc__locked(TCGContext *s)
334{
335 if (region.current == region.n) {
336 return true;
337 }
338 tcg_region_assign(s, region.current);
339 region.current++;
340 return false;
341}
342
343/*
344 * Request a new region once the one in use has filled up.
345 * Returns true on error.
346 */
347bool tcg_region_alloc(TCGContext *s)
348{
349 bool err;
350 /* read the region size now; alloc__locked will overwrite it on success */
351 size_t size_full = s->code_gen_buffer_size;
352
353 qemu_mutex_lock(&region.lock);
354 err = tcg_region_alloc__locked(s);
355 if (!err) {
356 region.agg_size_full += size_full - TCG_HIGHWATER;
357 }
358 qemu_mutex_unlock(&region.lock);
359 return err;
360}
361
362/*
363 * Perform a context's first region allocation.
364 * This function does _not_ increment region.agg_size_full.
365 */
366static void tcg_region_initial_alloc__locked(TCGContext *s)
367{
368 bool err = tcg_region_alloc__locked(s);
369 g_assert(!err);
370}
371
372void tcg_region_initial_alloc(TCGContext *s)
373{
374 qemu_mutex_lock(&region.lock);
375 tcg_region_initial_alloc__locked(s);
376 qemu_mutex_unlock(&region.lock);
377}
378
379/* Call from a safe-work context */
380void tcg_region_reset_all(void)
381{
0e2d61cf 382 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5ff7258c
RH
383 unsigned int i;
384
385 qemu_mutex_lock(&region.lock);
386 region.current = 0;
387 region.agg_size_full = 0;
388
389 for (i = 0; i < n_ctxs; i++) {
390 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
391 tcg_region_initial_alloc__locked(s);
392 }
393 qemu_mutex_unlock(&region.lock);
394
395 tcg_region_tree_reset_all();
396}
397
01afda99 398static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
5ff7258c 399{
43b972b7 400#ifdef CONFIG_USER_ONLY
5ff7258c 401 return 1;
5ff7258c 402#else
01afda99
RH
403 size_t n_regions;
404
43b972b7
RH
405 /*
406 * It is likely that some vCPUs will translate more code than others,
407 * so we first try to set more regions than max_cpus, with those regions
408 * being of reasonable size. If that's not possible we make do by evenly
409 * dividing the code_gen_buffer among the vCPUs.
410 */
5ff7258c 411 /* Use a single region if all we have is one vCPU thread */
5ff7258c
RH
412 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
413 return 1;
414 }
415
01afda99
RH
416 /*
417 * Try to have more regions than max_cpus, with each region being >= 2 MB.
418 * If we can't, then just allocate one region per vCPU thread.
419 */
420 n_regions = tb_size / (2 * MiB);
421 if (n_regions <= max_cpus) {
422 return max_cpus;
5ff7258c 423 }
01afda99 424 return MIN(n_regions, max_cpus * 8);
5ff7258c 425#endif
43b972b7 426}
5ff7258c 427
c46184a9
RH
428/*
429 * Minimum size of the code gen buffer. This number is randomly chosen,
430 * but not so small that we can't have a fair number of TB's live.
26a75d12
RH
431 *
432 * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
433 * Unless otherwise indicated, this is constrained by the range of
434 * direct branches on the host cpu, as used by the TCG implementation
435 * of goto_tb.
c46184a9
RH
436 */
437#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
438
c46184a9
RH
439#if TCG_TARGET_REG_BITS == 32
440#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
441#ifdef CONFIG_USER_ONLY
442/*
443 * For user mode on smaller 32 bit systems we may run into trouble
444 * allocating big chunks of data in the right place. On these systems
445 * we utilise a static code generation buffer directly in the binary.
446 */
447#define USE_STATIC_CODE_GEN_BUFFER
448#endif
449#else /* TCG_TARGET_REG_BITS == 64 */
450#ifdef CONFIG_USER_ONLY
451/*
452 * As user-mode emulation typically means running multiple instances
453 * of the translator don't go too nuts with our default code gen
454 * buffer lest we make things too hard for the OS.
455 */
456#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
457#else
458/*
459 * We expect most system emulation to run one or two guests per host.
460 * Users running large scale system emulation may want to tweak their
461 * runtime setup via the tb-size control on the command line.
462 */
463#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
464#endif
465#endif
466
467#define DEFAULT_CODE_GEN_BUFFER_SIZE \
468 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
469 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
470
c46184a9
RH
471#ifdef USE_STATIC_CODE_GEN_BUFFER
472static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
473 __attribute__((aligned(CODE_GEN_ALIGN)));
474
7be9ebcf 475static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
c46184a9
RH
476{
477 void *buf, *end;
478 size_t size;
479
480 if (splitwx > 0) {
481 error_setg(errp, "jit split-wx not supported");
7be9ebcf 482 return -1;
c46184a9
RH
483 }
484
485 /* page-align the beginning and end of the buffer */
486 buf = static_code_gen_buffer;
487 end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
488 buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
489 end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
490
491 size = end - buf;
492
493 /* Honor a command-line option limiting the size of the buffer. */
494 if (size > tb_size) {
495 size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
496 }
c46184a9 497
032a4b1b
RH
498 region.start_aligned = buf;
499 region.total_size = size;
7be9ebcf
RH
500
501 return PROT_READ | PROT_WRITE;
c46184a9
RH
502}
503#elif defined(_WIN32)
7be9ebcf 504static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
c46184a9
RH
505{
506 void *buf;
507
508 if (splitwx > 0) {
509 error_setg(errp, "jit split-wx not supported");
7be9ebcf 510 return -1;
c46184a9
RH
511 }
512
513 buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
514 PAGE_EXECUTE_READWRITE);
515 if (buf == NULL) {
516 error_setg_win32(errp, GetLastError(),
517 "allocate %zu bytes for jit buffer", size);
518 return false;
519 }
520
032a4b1b
RH
521 region.start_aligned = buf;
522 region.total_size = size;
7be9ebcf
RH
523
524 return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
c46184a9
RH
525}
526#else
7be9ebcf
RH
527static int alloc_code_gen_buffer_anon(size_t size, int prot,
528 int flags, Error **errp)
c46184a9
RH
529{
530 void *buf;
531
532 buf = mmap(NULL, size, prot, flags, -1, 0);
533 if (buf == MAP_FAILED) {
534 error_setg_errno(errp, errno,
535 "allocate %zu bytes for jit buffer", size);
7be9ebcf 536 return -1;
c46184a9 537 }
c46184a9 538
032a4b1b
RH
539 region.start_aligned = buf;
540 region.total_size = size;
7be9ebcf 541 return prot;
c46184a9
RH
542}
543
544#ifndef CONFIG_TCG_INTERPRETER
545#ifdef CONFIG_POSIX
546#include "qemu/memfd.h"
547
548static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
549{
550 void *buf_rw = NULL, *buf_rx = MAP_FAILED;
551 int fd = -1;
552
c46184a9
RH
553 buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
554 if (buf_rw == NULL) {
555 goto fail;
556 }
557
c46184a9
RH
558 buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
559 if (buf_rx == MAP_FAILED) {
560 goto fail_rx;
561 }
c46184a9
RH
562
563 close(fd);
032a4b1b
RH
564 region.start_aligned = buf_rw;
565 region.total_size = size;
c46184a9
RH
566 tcg_splitwx_diff = buf_rx - buf_rw;
567
7be9ebcf 568 return PROT_READ | PROT_WRITE;
c46184a9
RH
569
570 fail_rx:
571 error_setg_errno(errp, errno, "failed to map shared memory for execute");
572 fail:
573 if (buf_rx != MAP_FAILED) {
574 munmap(buf_rx, size);
575 }
576 if (buf_rw) {
577 munmap(buf_rw, size);
578 }
579 if (fd >= 0) {
580 close(fd);
581 }
7be9ebcf 582 return -1;
c46184a9
RH
583}
584#endif /* CONFIG_POSIX */
585
586#ifdef CONFIG_DARWIN
587#include <mach/mach.h>
588
589extern kern_return_t mach_vm_remap(vm_map_t target_task,
590 mach_vm_address_t *target_address,
591 mach_vm_size_t size,
592 mach_vm_offset_t mask,
593 int flags,
594 vm_map_t src_task,
595 mach_vm_address_t src_address,
596 boolean_t copy,
597 vm_prot_t *cur_protection,
598 vm_prot_t *max_protection,
599 vm_inherit_t inheritance);
600
7be9ebcf 601static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
c46184a9
RH
602{
603 kern_return_t ret;
604 mach_vm_address_t buf_rw, buf_rx;
605 vm_prot_t cur_prot, max_prot;
606
607 /* Map the read-write portion via normal anon memory. */
608 if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
609 MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
7be9ebcf 610 return -1;
c46184a9
RH
611 }
612
032a4b1b 613 buf_rw = (mach_vm_address_t)region.start_aligned;
c46184a9
RH
614 buf_rx = 0;
615 ret = mach_vm_remap(mach_task_self(),
616 &buf_rx,
617 size,
618 0,
619 VM_FLAGS_ANYWHERE,
620 mach_task_self(),
621 buf_rw,
622 false,
623 &cur_prot,
624 &max_prot,
625 VM_INHERIT_NONE);
626 if (ret != KERN_SUCCESS) {
627 /* TODO: Convert "ret" to a human readable error message. */
628 error_setg(errp, "vm_remap for jit splitwx failed");
629 munmap((void *)buf_rw, size);
7be9ebcf 630 return -1;
c46184a9
RH
631 }
632
633 if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
634 error_setg_errno(errp, errno, "mprotect for jit splitwx");
635 munmap((void *)buf_rx, size);
636 munmap((void *)buf_rw, size);
7be9ebcf 637 return -1;
c46184a9
RH
638 }
639
640 tcg_splitwx_diff = buf_rx - buf_rw;
7be9ebcf 641 return PROT_READ | PROT_WRITE;
c46184a9
RH
642}
643#endif /* CONFIG_DARWIN */
644#endif /* CONFIG_TCG_INTERPRETER */
645
7be9ebcf 646static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
c46184a9
RH
647{
648#ifndef CONFIG_TCG_INTERPRETER
649# ifdef CONFIG_DARWIN
650 return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
651# endif
652# ifdef CONFIG_POSIX
653 return alloc_code_gen_buffer_splitwx_memfd(size, errp);
654# endif
655#endif
656 error_setg(errp, "jit split-wx not supported");
7be9ebcf 657 return -1;
c46184a9
RH
658}
659
7be9ebcf 660static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
c46184a9
RH
661{
662 ERRP_GUARD();
663 int prot, flags;
664
665 if (splitwx) {
7be9ebcf
RH
666 prot = alloc_code_gen_buffer_splitwx(size, errp);
667 if (prot >= 0) {
668 return prot;
c46184a9
RH
669 }
670 /*
671 * If splitwx force-on (1), fail;
672 * if splitwx default-on (-1), fall through to splitwx off.
673 */
674 if (splitwx > 0) {
7be9ebcf 675 return -1;
c46184a9
RH
676 }
677 error_free_or_abort(errp);
678 }
679
b7da02da
RH
680 /*
681 * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
682 * rejects a permission change from RWX -> NONE when reserving the
683 * guard pages later. We can go the other way with the same number
684 * of syscalls, so always begin with PROT_NONE.
685 */
686 prot = PROT_NONE;
c46184a9 687 flags = MAP_PRIVATE | MAP_ANONYMOUS;
b7da02da 688#ifdef CONFIG_DARWIN
c46184a9
RH
689 /* Applicable to both iOS and macOS (Apple Silicon). */
690 if (!splitwx) {
691 flags |= MAP_JIT;
692 }
693#endif
694
695 return alloc_code_gen_buffer_anon(size, prot, flags, errp);
696}
697#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
698
5ff7258c
RH
699/*
700 * Initializes region partitioning.
701 *
702 * Called at init time from the parent thread (i.e. the one calling
703 * tcg_context_init), after the target's TCG globals have been set.
704 *
705 * Region partitioning works by splitting code_gen_buffer into separate regions,
706 * and then assigning regions to TCG threads so that the threads can translate
707 * code in parallel without synchronization.
708 *
709 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
710 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
711 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
712 * must have been parsed before calling this function, since it calls
713 * qemu_tcg_mttcg_enabled().
714 *
715 * In user-mode we use a single region. Having multiple regions in user-mode
716 * is not supported, because the number of vCPU threads (recall that each thread
717 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
718 * OS, and usually this number is huge (tens of thousands is not uncommon).
719 * Thus, given this large bound on the number of vCPU threads and the fact
720 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
721 * that the availability of at least one region per vCPU thread.
722 *
723 * However, this user-mode limitation is unlikely to be a significant problem
724 * in practice. Multi-threaded guests share most if not all of their translated
725 * code, which makes parallel code generation less appealing than in softmmu.
726 */
43b972b7 727void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
5ff7258c 728{
ba22783d 729 const size_t page_size = qemu_real_host_page_size;
5ff7258c 730 size_t region_size;
22c6a993 731 int have_prot, need_prot;
c46184a9 732
ba22783d
RH
733 /* Size the buffer. */
734 if (tb_size == 0) {
735 size_t phys_mem = qemu_get_host_physmem();
736 if (phys_mem == 0) {
737 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
738 } else {
739 tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
740 tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
741 }
742 }
743 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
744 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
745 }
746 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
747 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
748 }
749
750 have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
7be9ebcf 751 assert(have_prot >= 0);
5ff7258c 752
cd9ea992
RH
753 /* Request large pages for the buffer and the splitwx. */
754 qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
755 if (tcg_splitwx_diff) {
756 qemu_madvise(region.start_aligned + tcg_splitwx_diff,
757 region.total_size, QEMU_MADV_HUGEPAGE);
758 }
759
5ff7258c
RH
760 /*
761 * Make region_size a multiple of page_size, using aligned as the start.
762 * As a result of this we might end up with a few extra pages at the end of
763 * the buffer; we will assign those to the last region.
764 */
ba22783d
RH
765 region.n = tcg_n_regions(tb_size, max_cpus);
766 region_size = tb_size / region.n;
5ff7258c
RH
767 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
768
769 /* A region must have at least 2 pages; one code, one guard */
770 g_assert(region_size >= 2 * page_size);
032a4b1b
RH
771 region.stride = region_size;
772
773 /* Reserve space for guard pages. */
774 region.size = region_size - page_size;
775 region.total_size -= page_size;
776
777 /*
778 * The first region will be smaller than the others, via the prologue,
779 * which has yet to be allocated. For now, the first region begins at
780 * the page boundary.
781 */
782 region.after_prologue = region.start_aligned;
5ff7258c
RH
783
784 /* init the region struct */
785 qemu_mutex_init(&region.lock);
5ff7258c
RH
786
787 /*
788 * Set guard pages in the rw buffer, as that's the one into which
789 * buffer overruns could occur. Do not set guard pages in the rx
790 * buffer -- let that one use hugepages throughout.
22c6a993 791 * Work with the page protections set up with the initial mapping.
5ff7258c 792 */
22c6a993
RH
793 need_prot = PAGE_READ | PAGE_WRITE;
794#ifndef CONFIG_TCG_INTERPRETER
795 if (tcg_splitwx_diff == 0) {
796 need_prot |= PAGE_EXEC;
797 }
798#endif
799 for (size_t i = 0, n = region.n; i < n; i++) {
5ff7258c
RH
800 void *start, *end;
801
802 tcg_region_bounds(i, &start, &end);
22c6a993
RH
803 if (have_prot != need_prot) {
804 int rc;
5ff7258c 805
22c6a993
RH
806 if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
807 rc = qemu_mprotect_rwx(start, end - start);
808 } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
809 rc = qemu_mprotect_rw(start, end - start);
810 } else {
811 g_assert_not_reached();
812 }
813 if (rc) {
814 error_setg_errno(&error_fatal, errno,
815 "mprotect of jit buffer");
816 }
817 }
818 if (have_prot != 0) {
b7da02da 819 /* Guard pages are nice for bug detection but are not essential. */
22c6a993
RH
820 (void)qemu_mprotect_none(end, page_size);
821 }
5ff7258c
RH
822 }
823
824 tcg_region_trees_init();
825
826 /*
827 * Leave the initial context initialized to the first region.
828 * This will be the context into which we generate the prologue.
829 * It is also the only context for CONFIG_USER_ONLY.
830 */
831 tcg_region_initial_alloc__locked(&tcg_init_ctx);
832}
833
834void tcg_region_prologue_set(TCGContext *s)
835{
836 /* Deduct the prologue from the first region. */
c2471ca0
RH
837 g_assert(region.start_aligned == s->code_gen_buffer);
838 region.after_prologue = s->code_ptr;
5ff7258c
RH
839
840 /* Recompute boundaries of the first region. */
841 tcg_region_assign(s, 0);
842
843 /* Register the balance of the buffer with gdb. */
c2471ca0
RH
844 tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
845 region.start_aligned + region.total_size -
846 region.after_prologue);
5ff7258c
RH
847}
848
849/*
850 * Returns the size (in bytes) of all translated code (i.e. from all regions)
851 * currently in the cache.
852 * See also: tcg_code_capacity()
853 * Do not confuse with tcg_current_code_size(); that one applies to a single
854 * TCG context.
855 */
856size_t tcg_code_size(void)
857{
0e2d61cf 858 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5ff7258c
RH
859 unsigned int i;
860 size_t total;
861
862 qemu_mutex_lock(&region.lock);
863 total = region.agg_size_full;
864 for (i = 0; i < n_ctxs; i++) {
865 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
866 size_t size;
867
868 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
869 g_assert(size <= s->code_gen_buffer_size);
870 total += size;
871 }
872 qemu_mutex_unlock(&region.lock);
873 return total;
874}
875
876/*
877 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
878 * regions.
879 * See also: tcg_code_size()
880 */
881size_t tcg_code_capacity(void)
882{
883 size_t guard_size, capacity;
884
885 /* no need for synchronization; these variables are set at init time */
886 guard_size = region.stride - region.size;
77bd7fd1
RH
887 capacity = region.total_size;
888 capacity -= (region.n - 1) * guard_size;
889 capacity -= region.n * TCG_HIGHWATER;
890
5ff7258c
RH
891 return capacity;
892}