#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "qemu/madvise.h"
+#include "qemu/mprotect.h"
+#include "qemu/memalign.h"
+#include "qemu/cacheinfo.h"
#include "qapi/error.h"
#include "exec/exec-all.h"
#include "tcg/tcg.h"
QemuMutex lock;
/* fields set at init time */
- void *start;
void *start_aligned;
- void *end;
+ void *after_prologue;
size_t n;
size_t size; /* size of one region */
size_t stride; /* .size + guard size */
+ size_t total_size; /* size of entire buffer, >= n * stride */
/* fields protected by the lock */
size_t current; /* current region index */
static void *region_trees;
static size_t tree_size;
+bool in_code_gen_buffer(const void *p)
+{
+ /*
+ * Much like it is valid to have a pointer to the byte past the
+ * end of an array (so long as you don't dereference it), allow
+ * a pointer to the byte past the end of the code gen buffer.
+ */
+ return (size_t)(p - region.start_aligned) <= region.total_size;
+}
+
+#ifdef CONFIG_DEBUG_TCG
+const void *tcg_splitwx_to_rx(void *rw)
+{
+ /* Pass NULL pointers unchanged. */
+ if (rw) {
+ g_assert(in_code_gen_buffer(rw));
+ rw += tcg_splitwx_diff;
+ }
+ return rw;
+}
+
+void *tcg_splitwx_to_rw(const void *rx)
+{
+ /* Pass NULL pointers unchanged. */
+ if (rx) {
+ rx -= tcg_splitwx_diff;
+ /* Assert that we end with a pointer in the rw region. */
+ g_assert(in_code_gen_buffer(rx));
+ }
+ return (void *)rx;
+}
+#endif /* CONFIG_DEBUG_TCG */
+
/* compare a pointer @ptr and a tb_tc @s */
static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
{
return 0;
}
-static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
+static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
{
const struct tb_tc *a = ap;
const struct tb_tc *b = bp;
return ptr_cmp_tb_tc(b->ptr, a);
}
+static void tb_destroy(gpointer value)
+{
+ TranslationBlock *tb = value;
+ qemu_spin_destroy(&tb->jmp_lock);
+}
+
static void tcg_region_trees_init(void)
{
size_t i;
struct tcg_region_tree *rt = region_trees + i * tree_size;
qemu_mutex_init(&rt->lock);
- rt->tree = g_tree_new(tb_tc_cmp);
+ rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
}
}
return nb_tbs;
}
-static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
-{
- TranslationBlock *tb = v;
-
- tb_destroy(tb);
- return FALSE;
-}
-
static void tcg_region_tree_reset_all(void)
{
size_t i;
for (i = 0; i < region.n; i++) {
struct tcg_region_tree *rt = region_trees + i * tree_size;
- g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
/* Increment the refcount first so that destroy acts as a reset */
g_tree_ref(rt->tree);
g_tree_destroy(rt->tree);
end = start + region.size;
if (curr_region == 0) {
- start = region.start;
+ start = region.after_prologue;
}
+ /* The final region may have a few extra pages due to earlier rounding. */
if (curr_region == region.n - 1) {
- end = region.end;
+ end = region.start_aligned + region.total_size;
}
*pstart = start;
tcg_region_tree_reset_all();
}
-static size_t tcg_n_regions(unsigned max_cpus)
+static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
{
#ifdef CONFIG_USER_ONLY
return 1;
#else
+ size_t n_regions;
+
/*
* It is likely that some vCPUs will translate more code than others,
* so we first try to set more regions than max_cpus, with those regions
* being of reasonable size. If that's not possible we make do by evenly
* dividing the code_gen_buffer among the vCPUs.
*/
- size_t i;
-
/* Use a single region if all we have is one vCPU thread */
if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
return 1;
}
- /* Try to have more regions than max_cpus, with each region being >= 2 MB */
- for (i = 8; i > 0; i--) {
- size_t regions_per_thread = i;
- size_t region_size;
-
- region_size = tcg_init_ctx.code_gen_buffer_size;
- region_size /= max_cpus * regions_per_thread;
-
- if (region_size >= 2 * 1024u * 1024) {
- return max_cpus * regions_per_thread;
- }
+ /*
+ * Try to have more regions than max_cpus, with each region being >= 2 MB.
+ * If we can't, then just allocate one region per vCPU thread.
+ */
+ n_regions = tb_size / (2 * MiB);
+ if (n_regions <= max_cpus) {
+ return max_cpus;
}
- /* If we can't, then just allocate one region per vCPU thread */
- return max_cpus;
+ return MIN(n_regions, max_cpus * 8);
#endif
}
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
-static size_t size_code_gen_buffer(size_t tb_size)
-{
- /* Size the buffer. */
- if (tb_size == 0) {
- size_t phys_mem = qemu_get_host_physmem();
- if (phys_mem == 0) {
- tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
- } else {
- tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
- }
- }
- if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
- tb_size = MIN_CODE_GEN_BUFFER_SIZE;
- }
- if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
- tb_size = MAX_CODE_GEN_BUFFER_SIZE;
- }
- return tb_size;
-}
-
-#ifdef __mips__
-/*
- * In order to use J and JAL within the code_gen_buffer, we require
- * that the buffer not cross a 256MB boundary.
- */
-static inline bool cross_256mb(void *addr, size_t size)
-{
- return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
-}
-
-/*
- * We weren't able to allocate a buffer without crossing that boundary,
- * so make do with the larger portion of the buffer that doesn't cross.
- * Returns the new base of the buffer, and adjusts code_gen_buffer_size.
- */
-static inline void *split_cross_256mb(void *buf1, size_t size1)
-{
- void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
- size_t size2 = buf1 + size1 - buf2;
-
- size1 = buf2 - buf1;
- if (size1 < size2) {
- size1 = size2;
- buf1 = buf2;
- }
-
- tcg_ctx->code_gen_buffer_size = size1;
- return buf1;
-}
-#endif
-
#ifdef USE_STATIC_CODE_GEN_BUFFER
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
__attribute__((aligned(CODE_GEN_ALIGN)));
-static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
+static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
{
void *buf, *end;
size_t size;
if (splitwx > 0) {
error_setg(errp, "jit split-wx not supported");
- return false;
+ return -1;
}
/* page-align the beginning and end of the buffer */
buf = static_code_gen_buffer;
end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
- buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
- end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
+ buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());
+ end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());
size = end - buf;
/* Honor a command-line option limiting the size of the buffer. */
if (size > tb_size) {
- size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
- }
- tcg_ctx->code_gen_buffer_size = size;
-
-#ifdef __mips__
- if (cross_256mb(buf, size)) {
- buf = split_cross_256mb(buf, size);
- size = tcg_ctx->code_gen_buffer_size;
+ size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());
}
-#endif
- if (qemu_mprotect_rwx(buf, size)) {
- error_setg_errno(errp, errno, "mprotect of jit buffer");
- return false;
- }
- qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+ region.start_aligned = buf;
+ region.total_size = size;
- tcg_ctx->code_gen_buffer = buf;
- return true;
+ return PROT_READ | PROT_WRITE;
}
#elif defined(_WIN32)
-static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
void *buf;
if (splitwx > 0) {
error_setg(errp, "jit split-wx not supported");
- return false;
+ return -1;
}
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
return false;
}
- tcg_ctx->code_gen_buffer = buf;
- tcg_ctx->code_gen_buffer_size = size;
- return true;
+ region.start_aligned = buf;
+ region.total_size = size;
+
+ return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
}
#else
-static bool alloc_code_gen_buffer_anon(size_t size, int prot,
- int flags, Error **errp)
+static int alloc_code_gen_buffer_anon(size_t size, int prot,
+ int flags, Error **errp)
{
void *buf;
if (buf == MAP_FAILED) {
error_setg_errno(errp, errno,
"allocate %zu bytes for jit buffer", size);
- return false;
- }
- tcg_ctx->code_gen_buffer_size = size;
-
-#ifdef __mips__
- if (cross_256mb(buf, size)) {
- /*
- * Try again, with the original still mapped, to avoid re-acquiring
- * the same 256mb crossing.
- */
- size_t size2;
- void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
- switch ((int)(buf2 != MAP_FAILED)) {
- case 1:
- if (!cross_256mb(buf2, size)) {
- /* Success! Use the new buffer. */
- munmap(buf, size);
- break;
- }
- /* Failure. Work with what we had. */
- munmap(buf2, size);
- /* fallthru */
- default:
- /* Split the original buffer. Free the smaller half. */
- buf2 = split_cross_256mb(buf, size);
- size2 = tcg_ctx->code_gen_buffer_size;
- if (buf == buf2) {
- munmap(buf + size2, size - size2);
- } else {
- munmap(buf, size - size2);
- }
- size = size2;
- break;
- }
- buf = buf2;
+ return -1;
}
-#endif
-
- /* Request large pages for the buffer. */
- qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
- tcg_ctx->code_gen_buffer = buf;
- return true;
+ region.start_aligned = buf;
+ region.total_size = size;
+ return prot;
}
#ifndef CONFIG_TCG_INTERPRETER
#ifdef CONFIG_POSIX
#include "qemu/memfd.h"
-static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
+static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
{
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
int fd = -1;
-#ifdef __mips__
- /* Find space for the RX mapping, vs the 256MiB regions. */
- if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
- MAP_PRIVATE | MAP_ANONYMOUS |
- MAP_NORESERVE, errp)) {
- return false;
- }
- /* The size of the mapping may have been adjusted. */
- size = tcg_ctx->code_gen_buffer_size;
- buf_rx = tcg_ctx->code_gen_buffer;
-#endif
-
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
if (buf_rw == NULL) {
goto fail;
}
-#ifdef __mips__
- void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
- MAP_SHARED | MAP_FIXED, fd, 0);
- if (tmp != buf_rx) {
- goto fail_rx;
- }
-#else
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
if (buf_rx == MAP_FAILED) {
goto fail_rx;
}
-#endif
close(fd);
- tcg_ctx->code_gen_buffer = buf_rw;
- tcg_ctx->code_gen_buffer_size = size;
+ region.start_aligned = buf_rw;
+ region.total_size = size;
tcg_splitwx_diff = buf_rx - buf_rw;
- /* Request large pages for the buffer and the splitwx. */
- qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
- qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
- return true;
+ return PROT_READ | PROT_WRITE;
fail_rx:
error_setg_errno(errp, errno, "failed to map shared memory for execute");
if (fd >= 0) {
close(fd);
}
- return false;
+ return -1;
}
#endif /* CONFIG_POSIX */
vm_prot_t *max_protection,
vm_inherit_t inheritance);
-static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
+static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
{
kern_return_t ret;
mach_vm_address_t buf_rw, buf_rx;
/* Map the read-write portion via normal anon memory. */
if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
- return false;
+ return -1;
}
- buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
+ buf_rw = (mach_vm_address_t)region.start_aligned;
buf_rx = 0;
ret = mach_vm_remap(mach_task_self(),
&buf_rx,
/* TODO: Convert "ret" to a human readable error message. */
error_setg(errp, "vm_remap for jit splitwx failed");
munmap((void *)buf_rw, size);
- return false;
+ return -1;
}
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
error_setg_errno(errp, errno, "mprotect for jit splitwx");
munmap((void *)buf_rx, size);
munmap((void *)buf_rw, size);
- return false;
+ return -1;
}
tcg_splitwx_diff = buf_rx - buf_rw;
- return true;
+ return PROT_READ | PROT_WRITE;
}
#endif /* CONFIG_DARWIN */
#endif /* CONFIG_TCG_INTERPRETER */
-static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
+static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
{
#ifndef CONFIG_TCG_INTERPRETER
# ifdef CONFIG_DARWIN
# endif
#endif
error_setg(errp, "jit split-wx not supported");
- return false;
+ return -1;
}
-static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
ERRP_GUARD();
int prot, flags;
if (splitwx) {
- if (alloc_code_gen_buffer_splitwx(size, errp)) {
- return true;
+ prot = alloc_code_gen_buffer_splitwx(size, errp);
+ if (prot >= 0) {
+ return prot;
}
/*
* If splitwx force-on (1), fail;
* if splitwx default-on (-1), fall through to splitwx off.
*/
if (splitwx > 0) {
- return false;
+ return -1;
}
error_free_or_abort(errp);
}
- prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+ /*
+ * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
+ * rejects a permission change from RWX -> NONE when reserving the
+ * guard pages later. We can go the other way with the same number
+ * of syscalls, so always begin with PROT_NONE.
+ */
+ prot = PROT_NONE;
flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#ifdef CONFIG_TCG_INTERPRETER
- /* The tcg interpreter does not need execute permission. */
- prot = PROT_READ | PROT_WRITE;
-#elif defined(CONFIG_DARWIN)
+#ifdef CONFIG_DARWIN
/* Applicable to both iOS and macOS (Apple Silicon). */
if (!splitwx) {
flags |= MAP_JIT;
*/
void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
{
- void *buf, *aligned;
- size_t size;
- size_t page_size;
+ const size_t page_size = qemu_real_host_page_size();
size_t region_size;
- size_t n_regions;
- size_t i;
- bool ok;
+ int have_prot, need_prot;
- ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
- splitwx, &error_fatal);
- assert(ok);
+ /* Size the buffer. */
+ if (tb_size == 0) {
+ size_t phys_mem = qemu_get_host_physmem();
+ if (phys_mem == 0) {
+ tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+ } else {
+ tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
+ tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
+ }
+ }
+ if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+ tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+ }
+ if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+ tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+ }
+
+ have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
+ assert(have_prot >= 0);
- buf = tcg_init_ctx.code_gen_buffer;
- size = tcg_init_ctx.code_gen_buffer_size;
- page_size = qemu_real_host_page_size;
- n_regions = tcg_n_regions(max_cpus);
+ /* Request large pages for the buffer and the splitwx. */
+ qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
+ if (tcg_splitwx_diff) {
+ qemu_madvise(region.start_aligned + tcg_splitwx_diff,
+ region.total_size, QEMU_MADV_HUGEPAGE);
+ }
- /* The first region will be 'aligned - buf' bytes larger than the others */
- aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
- g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
/*
* Make region_size a multiple of page_size, using aligned as the start.
* As a result of this we might end up with a few extra pages at the end of
* the buffer; we will assign those to the last region.
*/
- region_size = (size - (aligned - buf)) / n_regions;
+ region.n = tcg_n_regions(tb_size, max_cpus);
+ region_size = tb_size / region.n;
region_size = QEMU_ALIGN_DOWN(region_size, page_size);
/* A region must have at least 2 pages; one code, one guard */
g_assert(region_size >= 2 * page_size);
+ region.stride = region_size;
+
+ /* Reserve space for guard pages. */
+ region.size = region_size - page_size;
+ region.total_size -= page_size;
+
+ /*
+ * The first region will be smaller than the others, via the prologue,
+ * which has yet to be allocated. For now, the first region begins at
+ * the page boundary.
+ */
+ region.after_prologue = region.start_aligned;
/* init the region struct */
qemu_mutex_init(®ion.lock);
- region.n = n_regions;
- region.size = region_size - page_size;
- region.stride = region_size;
- region.start = buf;
- region.start_aligned = aligned;
- /* page-align the end, since its last page will be a guard page */
- region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
- /* account for that last guard page */
- region.end -= page_size;
/*
* Set guard pages in the rw buffer, as that's the one into which
* buffer overruns could occur. Do not set guard pages in the rx
* buffer -- let that one use hugepages throughout.
+ * Work with the page protections set up with the initial mapping.
*/
- for (i = 0; i < region.n; i++) {
+ need_prot = PAGE_READ | PAGE_WRITE;
+#ifndef CONFIG_TCG_INTERPRETER
+ if (tcg_splitwx_diff == 0) {
+ need_prot |= PAGE_EXEC;
+ }
+#endif
+ for (size_t i = 0, n = region.n; i < n; i++) {
void *start, *end;
tcg_region_bounds(i, &start, &end);
+ if (have_prot != need_prot) {
+ int rc;
- /*
- * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
- * rejects a permission change from RWX -> NONE. Guard pages are
- * nice for bug detection but are not essential; ignore any failure.
- */
- (void)qemu_mprotect_none(end, page_size);
+ if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
+ rc = qemu_mprotect_rwx(start, end - start);
+ } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
+ rc = qemu_mprotect_rw(start, end - start);
+ } else {
+ g_assert_not_reached();
+ }
+ if (rc) {
+ error_setg_errno(&error_fatal, errno,
+ "mprotect of jit buffer");
+ }
+ }
+ if (have_prot != 0) {
+ /* Guard pages are nice for bug detection but are not essential. */
+ (void)qemu_mprotect_none(end, page_size);
+ }
}
tcg_region_trees_init();
void tcg_region_prologue_set(TCGContext *s)
{
/* Deduct the prologue from the first region. */
- g_assert(region.start == s->code_gen_buffer);
- region.start = s->code_ptr;
+ g_assert(region.start_aligned == s->code_gen_buffer);
+ region.after_prologue = s->code_ptr;
/* Recompute boundaries of the first region. */
tcg_region_assign(s, 0);
/* Register the balance of the buffer with gdb. */
- tcg_register_jit(tcg_splitwx_to_rx(region.start),
- region.end - region.start);
+ tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
+ region.start_aligned + region.total_size -
+ region.after_prologue);
}
/*
/* no need for synchronization; these variables are set at init time */
guard_size = region.stride - region.size;
- capacity = region.end + guard_size - region.start;
- capacity -= region.n * (guard_size + TCG_HIGHWATER);
- return capacity;
-}
+ capacity = region.total_size;
+ capacity -= (region.n - 1) * guard_size;
+ capacity -= region.n * TCG_HIGHWATER;
-size_t tcg_tb_phys_invalidate_count(void)
-{
- unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
- unsigned int i;
- size_t total = 0;
-
- for (i = 0; i < n_ctxs; i++) {
- const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
-
- total += qatomic_read(&s->tb_phys_invalidate_count);
- }
- return total;
+ return capacity;
}