tcg/ppc: Remove TARGET_LONG_BITS, TCG_TYPE_TL

[mirror_qemu.git] / tcg / region.c
diff --git a/tcg/region.c b/tcg/region.c

index 162b4d6486e74bf3ce1439f64857eb31215ba1a7..bef4c4756f8009e129442e1e6c04334b0e79d526 100644 (file)
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -24,18 +24,20 @@
  
  #include "qemu/osdep.h"
  #include "qemu/units.h"
+#include "qemu/madvise.h"
+#include "qemu/mprotect.h"
+#include "qemu/memalign.h"
+#include "qemu/cacheinfo.h"
+#include "qemu/qtree.h"
  #include "qapi/error.h"
  #include "exec/exec-all.h"
  #include "tcg/tcg.h"
-#if !defined(CONFIG_USER_ONLY)
-#include "hw/boards.h"
-#endif
  #include "tcg-internal.h"
  
  
  struct tcg_region_tree {
      QemuMutex lock;
-    GTree *tree;
+    QTree *tree;
      /* padding to avoid false sharing is computed at run-time */
  };
  
@@ -49,12 +51,12 @@ struct tcg_region_state {
      QemuMutex lock;
  
      /* fields set at init time */
-    void *start;
      void *start_aligned;
-    void *end;
+    void *after_prologue;
      size_t n;
      size_t size; /* size of one region */
      size_t stride; /* .size + guard size */
+    size_t total_size; /* size of entire buffer, >= n * stride */
  
      /* fields protected by the lock */
      size_t current; /* current region index */
@@ -71,6 +73,39 @@ static struct tcg_region_state region;
  static void *region_trees;
  static size_t tree_size;
  
+bool in_code_gen_buffer(const void *p)
+{
+    /*
+     * Much like it is valid to have a pointer to the byte past the
+     * end of an array (so long as you don't dereference it), allow
+     * a pointer to the byte past the end of the code gen buffer.
+     */
+    return (size_t)(p - region.start_aligned) <= region.total_size;
+}
+
+#ifdef CONFIG_DEBUG_TCG
+const void *tcg_splitwx_to_rx(void *rw)
+{
+    /* Pass NULL pointers unchanged. */
+    if (rw) {
+        g_assert(in_code_gen_buffer(rw));
+        rw += tcg_splitwx_diff;
+    }
+    return rw;
+}
+
+void *tcg_splitwx_to_rw(const void *rx)
+{
+    /* Pass NULL pointers unchanged. */
+    if (rx) {
+        rx -= tcg_splitwx_diff;
+        /* Assert that we end with a pointer in the rw region. */
+        g_assert(in_code_gen_buffer(rx));
+    }
+    return (void *)rx;
+}
+#endif /* CONFIG_DEBUG_TCG */
+
  /* compare a pointer @ptr and a tb_tc @s */
  static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
  {
@@ -82,7 +117,7 @@ static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
      return 0;
  }
  
-static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
+static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
  {
      const struct tb_tc *a = ap;
      const struct tb_tc *b = bp;
@@ -113,6 +148,12 @@ static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
      return ptr_cmp_tb_tc(b->ptr, a);
  }
  
+static void tb_destroy(gpointer value)
+{
+    TranslationBlock *tb = value;
+    qemu_spin_destroy(&tb->jmp_lock);
+}
+
  static void tcg_region_trees_init(void)
  {
      size_t i;
@@ -123,7 +164,7 @@ static void tcg_region_trees_init(void)
          struct tcg_region_tree *rt = region_trees + i * tree_size;
  
          qemu_mutex_init(&rt->lock);
-        rt->tree = g_tree_new(tb_tc_cmp);
+        rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
      }
  }
  
@@ -162,7 +203,7 @@ void tcg_tb_insert(TranslationBlock *tb)
  
      g_assert(rt != NULL);
      qemu_mutex_lock(&rt->lock);
-    g_tree_insert(rt->tree, &tb->tc, tb);
+    q_tree_insert(rt->tree, &tb->tc, tb);
      qemu_mutex_unlock(&rt->lock);
  }
  
@@ -172,7 +213,7 @@ void tcg_tb_remove(TranslationBlock *tb)
  
      g_assert(rt != NULL);
      qemu_mutex_lock(&rt->lock);
-    g_tree_remove(rt->tree, &tb->tc);
+    q_tree_remove(rt->tree, &tb->tc);
      qemu_mutex_unlock(&rt->lock);
  }
  
@@ -192,7 +233,7 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
      }
  
      qemu_mutex_lock(&rt->lock);
-    tb = g_tree_lookup(rt->tree, &s);
+    tb = q_tree_lookup(rt->tree, &s);
      qemu_mutex_unlock(&rt->lock);
      return tb;
  }
@@ -227,7 +268,7 @@ void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
      for (i = 0; i < region.n; i++) {
          struct tcg_region_tree *rt = region_trees + i * tree_size;
  
-        g_tree_foreach(rt->tree, func, user_data);
+        q_tree_foreach(rt->tree, func, user_data);
      }
      tcg_region_tree_unlock_all();
  }
@@ -241,20 +282,12 @@ size_t tcg_nb_tbs(void)
      for (i = 0; i < region.n; i++) {
          struct tcg_region_tree *rt = region_trees + i * tree_size;
  
-        nb_tbs += g_tree_nnodes(rt->tree);
+        nb_tbs += q_tree_nnodes(rt->tree);
      }
      tcg_region_tree_unlock_all();
      return nb_tbs;
  }
  
-static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
-{
-    TranslationBlock *tb = v;
-
-    tb_destroy(tb);
-    return FALSE;
-}
-
  static void tcg_region_tree_reset_all(void)
  {
      size_t i;
@@ -263,10 +296,9 @@ static void tcg_region_tree_reset_all(void)
      for (i = 0; i < region.n; i++) {
          struct tcg_region_tree *rt = region_trees + i * tree_size;
  
-        g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
          /* Increment the refcount first so that destroy acts as a reset */
-        g_tree_ref(rt->tree);
-        g_tree_destroy(rt->tree);
+        q_tree_ref(rt->tree);
+        q_tree_destroy(rt->tree);
      }
      tcg_region_tree_unlock_all();
  }
@@ -279,10 +311,11 @@ static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
      end = start + region.size;
  
      if (curr_region == 0) {
-        start = region.start;
+        start = region.after_prologue;
      }
+    /* The final region may have a few extra pages due to earlier rounding. */
      if (curr_region == region.n - 1) {
-        end = region.end;
+        end = region.start_aligned + region.total_size;
      }
  
      *pstart = start;
@@ -350,7 +383,7 @@ void tcg_region_initial_alloc(TCGContext *s)
  /* Call from a safe-work context */
  void tcg_region_reset_all(void)
  {
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
      unsigned int i;
  
      qemu_mutex_lock(&region.lock);
@@ -366,82 +399,47 @@ void tcg_region_reset_all(void)
      tcg_region_tree_reset_all();
  }
  
-#ifdef CONFIG_USER_ONLY
-static size_t tcg_n_regions(void)
+static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
  {
+#ifdef CONFIG_USER_ONLY
      return 1;
-}
  #else
-/*
- * It is likely that some vCPUs will translate more code than others, so we
- * first try to set more regions than max_cpus, with those regions being of
- * reasonable size. If that's not possible we make do by evenly dividing
- * the code_gen_buffer among the vCPUs.
- */
-static size_t tcg_n_regions(void)
-{
-    size_t i;
+    size_t n_regions;
  
+    /*
+     * It is likely that some vCPUs will translate more code than others,
+     * so we first try to set more regions than max_cpus, with those regions
+     * being of reasonable size. If that's not possible we make do by evenly
+     * dividing the code_gen_buffer among the vCPUs.
+     */
      /* Use a single region if all we have is one vCPU thread */
-#if !defined(CONFIG_USER_ONLY)
-    MachineState *ms = MACHINE(qdev_get_machine());
-    unsigned int max_cpus = ms->smp.max_cpus;
-#endif
      if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
          return 1;
      }
  
-    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
-    for (i = 8; i > 0; i--) {
-        size_t regions_per_thread = i;
-        size_t region_size;
-
-        region_size = tcg_init_ctx.code_gen_buffer_size;
-        region_size /= max_cpus * regions_per_thread;
-
-        if (region_size >= 2 * 1024u * 1024) {
-            return max_cpus * regions_per_thread;
-        }
+    /*
+     * Try to have more regions than max_cpus, with each region being >= 2 MB.
+     * If we can't, then just allocate one region per vCPU thread.
+     */
+    n_regions = tb_size / (2 * MiB);
+    if (n_regions <= max_cpus) {
+        return max_cpus;
      }
-    /* If we can't, then just allocate one region per vCPU thread */
-    return max_cpus;
-}
+    return MIN(n_regions, max_cpus * 8);
  #endif
+}
  
  /*
   * Minimum size of the code gen buffer.  This number is randomly chosen,
   * but not so small that we can't have a fair number of TB's live.
+ *
+ * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
+ * Unless otherwise indicated, this is constrained by the range of
+ * direct branches on the host cpu, as used by the TCG implementation
+ * of goto_tb.
   */
  #define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
  
-/*
- * Maximum size of the code gen buffer we'd like to use.  Unless otherwise
- * indicated, this is constrained by the range of direct branches on the
- * host cpu, as used by the TCG implementation of goto_tb.
- */
-#if defined(__x86_64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__sparc__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__powerpc64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__powerpc__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (32 * MiB)
-#elif defined(__aarch64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__s390x__)
-  /* We have a +- 4GB range on the branches; leave some slop.  */
-# define MAX_CODE_GEN_BUFFER_SIZE  (3 * GiB)
-#elif defined(__mips__)
-  /*
-   * We have a 256MB branch region, but leave room to make sure the
-   * main executable is also within that region.
-   */
-# define MAX_CODE_GEN_BUFFER_SIZE  (128 * MiB)
-#else
-# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
-#endif
-
  #if TCG_TARGET_REG_BITS == 32
  #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
  #ifdef CONFIG_USER_ONLY
@@ -474,109 +472,46 @@ static size_t tcg_n_regions(void)
    (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
     ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
  
-static size_t size_code_gen_buffer(size_t tb_size)
-{
-    /* Size the buffer.  */
-    if (tb_size == 0) {
-        size_t phys_mem = qemu_get_host_physmem();
-        if (phys_mem == 0) {
-            tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-        } else {
-            tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
-        }
-    }
-    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
-    }
-    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
-    }
-    return tb_size;
-}
-
-#ifdef __mips__
-/*
- * In order to use J and JAL within the code_gen_buffer, we require
- * that the buffer not cross a 256MB boundary.
- */
-static inline bool cross_256mb(void *addr, size_t size)
-{
-    return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
-}
-
-/*
- * We weren't able to allocate a buffer without crossing that boundary,
- * so make do with the larger portion of the buffer that doesn't cross.
- * Returns the new base of the buffer, and adjusts code_gen_buffer_size.
- */
-static inline void *split_cross_256mb(void *buf1, size_t size1)
-{
-    void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
-    size_t size2 = buf1 + size1 - buf2;
-
-    size1 = buf2 - buf1;
-    if (size1 < size2) {
-        size1 = size2;
-        buf1 = buf2;
-    }
-
-    tcg_ctx->code_gen_buffer_size = size1;
-    return buf1;
-}
-#endif
-
  #ifdef USE_STATIC_CODE_GEN_BUFFER
  static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
      __attribute__((aligned(CODE_GEN_ALIGN)));
  
-static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
+static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
  {
      void *buf, *end;
      size_t size;
  
      if (splitwx > 0) {
          error_setg(errp, "jit split-wx not supported");
-        return false;
+        return -1;
      }
  
      /* page-align the beginning and end of the buffer */
      buf = static_code_gen_buffer;
      end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
-    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
-    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
+    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());
+    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());
  
      size = end - buf;
  
      /* Honor a command-line option limiting the size of the buffer.  */
      if (size > tb_size) {
-        size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
-    }
-    tcg_ctx->code_gen_buffer_size = size;
-
-#ifdef __mips__
-    if (cross_256mb(buf, size)) {
-        buf = split_cross_256mb(buf, size);
-        size = tcg_ctx->code_gen_buffer_size;
+        size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());
      }
-#endif
  
-    if (qemu_mprotect_rwx(buf, size)) {
-        error_setg_errno(errp, errno, "mprotect of jit buffer");
-        return false;
-    }
-    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+    region.start_aligned = buf;
+    region.total_size = size;
  
-    tcg_ctx->code_gen_buffer = buf;
-    return true;
+    return PROT_READ | PROT_WRITE;
  }
  #elif defined(_WIN32)
-static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
  {
      void *buf;
  
      if (splitwx > 0) {
          error_setg(errp, "jit split-wx not supported");
-        return false;
+        return -1;
      }
  
      buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
@@ -587,13 +522,14 @@ static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
          return false;
      }
  
-    tcg_ctx->code_gen_buffer = buf;
-    tcg_ctx->code_gen_buffer_size = size;
-    return true;
+    region.start_aligned = buf;
+    region.total_size = size;
+
+    return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
  }
  #else
-static bool alloc_code_gen_buffer_anon(size_t size, int prot,
-                                       int flags, Error **errp)
+static int alloc_code_gen_buffer_anon(size_t size, int prot,
+                                      int flags, Error **errp)
  {
      void *buf;
  
@@ -601,99 +537,39 @@ static bool alloc_code_gen_buffer_anon(size_t size, int prot,
      if (buf == MAP_FAILED) {
          error_setg_errno(errp, errno,
                           "allocate %zu bytes for jit buffer", size);
-        return false;
+        return -1;
      }
-    tcg_ctx->code_gen_buffer_size = size;
  
-#ifdef __mips__
-    if (cross_256mb(buf, size)) {
-        /*
-         * Try again, with the original still mapped, to avoid re-acquiring
-         * the same 256mb crossing.
-         */
-        size_t size2;
-        void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
-        switch ((int)(buf2 != MAP_FAILED)) {
-        case 1:
-            if (!cross_256mb(buf2, size)) {
-                /* Success!  Use the new buffer.  */
-                munmap(buf, size);
-                break;
-            }
-            /* Failure.  Work with what we had.  */
-            munmap(buf2, size);
-            /* fallthru */
-        default:
-            /* Split the original buffer.  Free the smaller half.  */
-            buf2 = split_cross_256mb(buf, size);
-            size2 = tcg_ctx->code_gen_buffer_size;
-            if (buf == buf2) {
-                munmap(buf + size2, size - size2);
-            } else {
-                munmap(buf, size - size2);
-            }
-            size = size2;
-            break;
-        }
-        buf = buf2;
-    }
-#endif
-
-    /* Request large pages for the buffer.  */
-    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
-
-    tcg_ctx->code_gen_buffer = buf;
-    return true;
+    region.start_aligned = buf;
+    region.total_size = size;
+    return prot;
  }
  
  #ifndef CONFIG_TCG_INTERPRETER
  #ifdef CONFIG_POSIX
  #include "qemu/memfd.h"
  
-static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
+static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
  {
      void *buf_rw = NULL, *buf_rx = MAP_FAILED;
      int fd = -1;
  
-#ifdef __mips__
-    /* Find space for the RX mapping, vs the 256MiB regions. */
-    if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
-                                    MAP_PRIVATE | MAP_ANONYMOUS |
-                                    MAP_NORESERVE, errp)) {
-        return false;
-    }
-    /* The size of the mapping may have been adjusted. */
-    size = tcg_ctx->code_gen_buffer_size;
-    buf_rx = tcg_ctx->code_gen_buffer;
-#endif
-
      buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
      if (buf_rw == NULL) {
          goto fail;
      }
  
-#ifdef __mips__
-    void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
-                     MAP_SHARED | MAP_FIXED, fd, 0);
-    if (tmp != buf_rx) {
-        goto fail_rx;
-    }
-#else
      buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
      if (buf_rx == MAP_FAILED) {
          goto fail_rx;
      }
-#endif
  
      close(fd);
-    tcg_ctx->code_gen_buffer = buf_rw;
-    tcg_ctx->code_gen_buffer_size = size;
+    region.start_aligned = buf_rw;
+    region.total_size = size;
      tcg_splitwx_diff = buf_rx - buf_rw;
  
-    /* Request large pages for the buffer and the splitwx.  */
-    qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
-    qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
-    return true;
+    return PROT_READ | PROT_WRITE;
  
   fail_rx:
      error_setg_errno(errp, errno, "failed to map shared memory for execute");
@@ -707,7 +583,7 @@ static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
      if (fd >= 0) {
          close(fd);
      }
-    return false;
+    return -1;
  }
  #endif /* CONFIG_POSIX */
  
@@ -726,7 +602,7 @@ extern kern_return_t mach_vm_remap(vm_map_t target_task,
                                     vm_prot_t *max_protection,
                                     vm_inherit_t inheritance);
  
-static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
+static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
  {
      kern_return_t ret;
      mach_vm_address_t buf_rw, buf_rx;
@@ -735,10 +611,10 @@ static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
      /* Map the read-write portion via normal anon memory. */
      if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
                                      MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
-        return false;
+        return -1;
      }
  
-    buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
+    buf_rw = (mach_vm_address_t)region.start_aligned;
      buf_rx = 0;
      ret = mach_vm_remap(mach_task_self(),
                          &buf_rx,
@@ -755,23 +631,23 @@ static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
          /* TODO: Convert "ret" to a human readable error message. */
          error_setg(errp, "vm_remap for jit splitwx failed");
          munmap((void *)buf_rw, size);
-        return false;
+        return -1;
      }
  
      if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
          error_setg_errno(errp, errno, "mprotect for jit splitwx");
          munmap((void *)buf_rx, size);
          munmap((void *)buf_rw, size);
-        return false;
+        return -1;
      }
  
      tcg_splitwx_diff = buf_rx - buf_rw;
-    return true;
+    return PROT_READ | PROT_WRITE;
  }
  #endif /* CONFIG_DARWIN */
  #endif /* CONFIG_TCG_INTERPRETER */
  
-static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
+static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
  {
  #ifndef CONFIG_TCG_INTERPRETER
  # ifdef CONFIG_DARWIN
@@ -782,34 +658,38 @@ static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
  # endif
  #endif
      error_setg(errp, "jit split-wx not supported");
-    return false;
+    return -1;
  }
  
-static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
  {
      ERRP_GUARD();
      int prot, flags;
  
      if (splitwx) {
-        if (alloc_code_gen_buffer_splitwx(size, errp)) {
-            return true;
+        prot = alloc_code_gen_buffer_splitwx(size, errp);
+        if (prot >= 0) {
+            return prot;
          }
          /*
           * If splitwx force-on (1), fail;
           * if splitwx default-on (-1), fall through to splitwx off.
           */
          if (splitwx > 0) {
-            return false;
+            return -1;
          }
          error_free_or_abort(errp);
      }
  
-    prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+    /*
+     * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
+     * rejects a permission change from RWX -> NONE when reserving the
+     * guard pages later.  We can go the other way with the same number
+     * of syscalls, so always begin with PROT_NONE.
+     */
+    prot = PROT_NONE;
      flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#ifdef CONFIG_TCG_INTERPRETER
-    /* The tcg interpreter does not need execute permission. */
-    prot = PROT_READ | PROT_WRITE;
-#elif defined(CONFIG_DARWIN)
+#ifdef CONFIG_DARWIN
      /* Applicable to both iOS and macOS (Apple Silicon). */
      if (!splitwx) {
          flags |= MAP_JIT;
@@ -848,67 +728,101 @@ static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
   * in practice. Multi-threaded guests share most if not all of their translated
   * code, which makes parallel code generation less appealing than in softmmu.
   */
-void tcg_region_init(size_t tb_size, int splitwx)
+void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
  {
-    void *buf, *aligned;
-    size_t size;
-    size_t page_size;
+    const size_t page_size = qemu_real_host_page_size();
      size_t region_size;
-    size_t n_regions;
-    size_t i;
-    bool ok;
+    int have_prot, need_prot;
+
+    /* Size the buffer.  */
+    if (tb_size == 0) {
+        size_t phys_mem = qemu_get_host_physmem();
+        if (phys_mem == 0) {
+            tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+        } else {
+            tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
+            tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
+        }
+    }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+    }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+    }
  
-    ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
-                               splitwx, &error_fatal);
-    assert(ok);
+    have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
+    assert(have_prot >= 0);
  
-    buf = tcg_init_ctx.code_gen_buffer;
-    size = tcg_init_ctx.code_gen_buffer_size;
-    page_size = qemu_real_host_page_size;
-    n_regions = tcg_n_regions();
+    /* Request large pages for the buffer and the splitwx.  */
+    qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
+    if (tcg_splitwx_diff) {
+        qemu_madvise(region.start_aligned + tcg_splitwx_diff,
+                     region.total_size, QEMU_MADV_HUGEPAGE);
+    }
  
-    /* The first region will be 'aligned - buf' bytes larger than the others */
-    aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
-    g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
      /*
       * Make region_size a multiple of page_size, using aligned as the start.
       * As a result of this we might end up with a few extra pages at the end of
       * the buffer; we will assign those to the last region.
       */
-    region_size = (size - (aligned - buf)) / n_regions;
+    region.n = tcg_n_regions(tb_size, max_cpus);
+    region_size = tb_size / region.n;
      region_size = QEMU_ALIGN_DOWN(region_size, page_size);
  
      /* A region must have at least 2 pages; one code, one guard */
      g_assert(region_size >= 2 * page_size);
+    region.stride = region_size;
+
+    /* Reserve space for guard pages. */
+    region.size = region_size - page_size;
+    region.total_size -= page_size;
+
+    /*
+     * The first region will be smaller than the others, via the prologue,
+     * which has yet to be allocated.  For now, the first region begins at
+     * the page boundary.
+     */
+    region.after_prologue = region.start_aligned;
  
      /* init the region struct */
      qemu_mutex_init(&region.lock);
-    region.n = n_regions;
-    region.size = region_size - page_size;
-    region.stride = region_size;
-    region.start = buf;
-    region.start_aligned = aligned;
-    /* page-align the end, since its last page will be a guard page */
-    region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
-    /* account for that last guard page */
-    region.end -= page_size;
  
      /*
       * Set guard pages in the rw buffer, as that's the one into which
       * buffer overruns could occur.  Do not set guard pages in the rx
       * buffer -- let that one use hugepages throughout.
+     * Work with the page protections set up with the initial mapping.
       */
-    for (i = 0; i < region.n; i++) {
+    need_prot = PAGE_READ | PAGE_WRITE;
+#ifndef CONFIG_TCG_INTERPRETER
+    if (tcg_splitwx_diff == 0) {
+        need_prot |= PAGE_EXEC;
+    }
+#endif
+    for (size_t i = 0, n = region.n; i < n; i++) {
          void *start, *end;
  
          tcg_region_bounds(i, &start, &end);
+        if (have_prot != need_prot) {
+            int rc;
  
-        /*
-         * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
-         * rejects a permission change from RWX -> NONE.  Guard pages are
-         * nice for bug detection but are not essential; ignore any failure.
-         */
-        (void)qemu_mprotect_none(end, page_size);
+            if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
+                rc = qemu_mprotect_rwx(start, end - start);
+            } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
+                rc = qemu_mprotect_rw(start, end - start);
+            } else {
+                g_assert_not_reached();
+            }
+            if (rc) {
+                error_setg_errno(&error_fatal, errno,
+                                 "mprotect of jit buffer");
+            }
+        }
+        if (have_prot != 0) {
+            /* Guard pages are nice for bug detection but are not essential. */
+            (void)qemu_mprotect_none(end, page_size);
+        }
      }
  
      tcg_region_trees_init();
@@ -924,15 +838,16 @@ void tcg_region_init(size_t tb_size, int splitwx)
  void tcg_region_prologue_set(TCGContext *s)
  {
      /* Deduct the prologue from the first region.  */
-    g_assert(region.start == s->code_gen_buffer);
-    region.start = s->code_ptr;
+    g_assert(region.start_aligned == s->code_gen_buffer);
+    region.after_prologue = s->code_ptr;
  
      /* Recompute boundaries of the first region. */
      tcg_region_assign(s, 0);
  
      /* Register the balance of the buffer with gdb. */
-    tcg_register_jit(tcg_splitwx_to_rx(region.start),
-                     region.end - region.start);
+    tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
+                     region.start_aligned + region.total_size -
+                     region.after_prologue);
  }
  
  /*
@@ -944,7 +859,7 @@ void tcg_region_prologue_set(TCGContext *s)
   */
  size_t tcg_code_size(void)
  {
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
      unsigned int i;
      size_t total;
  
@@ -973,21 +888,9 @@ size_t tcg_code_capacity(void)
  
      /* no need for synchronization; these variables are set at init time */
      guard_size = region.stride - region.size;
-    capacity = region.end + guard_size - region.start;
-    capacity -= region.n * (guard_size + TCG_HIGHWATER);
-    return capacity;
-}
-
-size_t tcg_tb_phys_invalidate_count(void)
-{
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
-    unsigned int i;
-    size_t total = 0;
-
-    for (i = 0; i < n_ctxs; i++) {
-        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+    capacity = region.total_size;
+    capacity -= (region.n - 1) * guard_size;
+    capacity -= region.n * TCG_HIGHWATER;
  
-        total += qatomic_read(&s->tb_phys_invalidate_count);
-    }
-    return total;
+    return capacity;
  }