oslib-posix: initialize backend memory objects in parallel

author Mark Kanda <mark.kanda@oracle.com>

Wed, 31 Jan 2024 16:53:27 +0000 (10:53 -0600)

committer David Hildenbrand <david@redhat.com>

Tue, 6 Feb 2024 07:15:22 +0000 (08:15 +0100)
author Mark Kanda <mark.kanda@oracle.com>
Wed, 31 Jan 2024 16:53:27 +0000 (10:53 -0600)
committer David Hildenbrand <david@redhat.com>
Tue, 6 Feb 2024 07:15:22 +0000 (08:15 +0100)
diff --git a/backends/hostmem.c b/backends/hostmem.c

index 987f6f591e6d972f872d5dcafacfb4376eb8505f..81a72ce40b78fffdd05ed81c314b408288ce0ae3 100644 (file)
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,6 +20,7 @@
  #include "qom/object_interfaces.h"
  #include "qemu/mmap-alloc.h"
  #include "qemu/madvise.h"
+#include "hw/qdev-core.h"
  
  #ifdef CONFIG_NUMA
  #include <numaif.h>
@@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
          uint64_t sz = memory_region_size(&backend->mr);
  
          if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
-                               backend->prealloc_context, errp)) {
+                               backend->prealloc_context, false, errp)) {
              return;
          }
          backend->prealloc = true;
@@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
      HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
      void *ptr;
      uint64_t sz;
+    bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
  
      if (!bc->alloc) {
          return;
@@ -402,7 +404,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
      if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
                                                  ptr, sz,
                                                  backend->prealloc_threads,
-                                                backend->prealloc_context, errp)) {
+                                                backend->prealloc_context,
+                                                async, errp)) {
          return;
      }
  }
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c

index 99ab989852b0a60fcab9f2a25509bf3d21096d4d..ffd119ebacb766773322137a82bb64e1bb7e7b14 100644 (file)
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
          int fd = memory_region_get_fd(&vmem->memdev->mr);
          Error *local_err = NULL;
  
-        if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+        if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
              static bool warned;
  
              /*
@@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
      int fd = memory_region_get_fd(&vmem->memdev->mr);
      Error *local_err = NULL;
  
-    if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+    if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
          error_report_err(local_err);
          return -ENOMEM;
      }
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h

index d47536eadb11b76289ac0d4b7e185f91f93a4279..9228e96c87e9af0fe9e6162e011e7813d977ebf4 100644 (file)
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -1083,6 +1083,11 @@ typedef enum MachineInitPhase {
       */
      PHASE_ACCEL_CREATED,
  
+    /*
+     * Late backend objects have been created and initialized.
+     */
+    PHASE_LATE_BACKENDS_CREATED,
+
      /*
       * machine_class->init has been called, thus creating any embedded
       * devices and validating machine properties.  Devices created at
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h

index c9692cc31425e64e2d1a09f7eabea21988fd4eb3..7d359dabc46a0990e6f70f319377f350515e4040 100644 (file)
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext;
   * @area: start address of the are to preallocate
   * @sz: the size of the area to preallocate
   * @max_threads: maximum number of threads to use
+ * @tc: prealloc context threads pointer, NULL if not in use
+ * @async: request asynchronous preallocation, requires @tc
   * @errp: returns an error if this function fails
   *
   * Preallocate memory (populate/prefault page tables writable) for the virtual
@@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext;
   * each page in the area was faulted in writable at least once, for example,
   * after allocating file blocks for mapped files.
   *
+ * When setting @async, allocation might be performed asynchronously.
+ * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous
+ * preallocation.
+ *
   * Return: true on success, else false setting @errp with error.
   */
  bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-                       ThreadContext *tc, Error **errp);
+                       ThreadContext *tc, bool async, Error **errp);
+
+/**
+ * qemu_finish_async_prealloc_mem:
+ * @errp: returns an error if this function fails
+ *
+ * Finish all outstanding asynchronous memory preallocation.
+ *
+ * Return: true on success, else false setting @errp with error.
+ */
+bool qemu_finish_async_prealloc_mem(Error **errp);
  
  /**
   * qemu_get_pid_name:
diff --git a/system/vl.c b/system/vl.c

index bb959cbc440b83131ef8f9bb10c96289014b1387..2a0bd08ff19217e5f1c1c393275bc6c3bb8bf251 100644 (file)
--- a/system/vl.c
+++ b/system/vl.c
@@ -2013,6 +2013,14 @@ static void qemu_create_late_backends(void)
  
      object_option_foreach_add(object_create_late);
  
+    /*
+     * Wait for any outstanding memory prealloc from created memory
+     * backends to complete.
+     */
+    if (!qemu_finish_async_prealloc_mem(&error_fatal)) {
+        exit(1);
+    }
+
      if (tpm_init() < 0) {
          exit(1);
      }
@@ -3699,6 +3707,7 @@ void qemu_init(int argc, char **argv)
       * over memory-backend-file objects).
       */
      qemu_create_late_backends();
+    phase_advance(PHASE_LATE_BACKENDS_CREATED);
  
      /*
       * Note: creates a QOM object, must run only after global and
diff --git a/util/oslib-posix.c b/util/oslib-posix.c

index 7c297003b9ff27dce09524aaf55ffc451db8469b..3c379f96c26d6b55fe7114f791ece2afa2765272 100644 (file)
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -42,6 +42,7 @@
  #include "qemu/cutils.h"
  #include "qemu/units.h"
  #include "qemu/thread-context.h"
+#include "qemu/main-loop.h"
  
  #ifdef CONFIG_LINUX
  #include <sys/syscall.h>
@@ -63,11 +64,15 @@
  
  struct MemsetThread;
  
+static QLIST_HEAD(, MemsetContext) memset_contexts =
+    QLIST_HEAD_INITIALIZER(memset_contexts);
+
  typedef struct MemsetContext {
      bool all_threads_created;
      bool any_thread_failed;
      struct MemsetThread *threads;
      int num_threads;
+    QLIST_ENTRY(MemsetContext) next;
  } MemsetContext;
  
  struct MemsetThread {
@@ -412,19 +417,44 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
      return ret;
  }
  
+static int wait_and_free_mem_prealloc_context(MemsetContext *context)
+{
+    int i, ret = 0, tmp;
+
+    for (i = 0; i < context->num_threads; i++) {
+        tmp = (uintptr_t)qemu_thread_join(&context->threads[i].pgthread);
+
+        if (tmp) {
+            ret = tmp;
+        }
+    }
+    g_free(context->threads);
+    g_free(context);
+    return ret;
+}
+
  static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
-                           int max_threads, ThreadContext *tc,
+                           int max_threads, ThreadContext *tc, bool async,
                             bool use_madv_populate_write)
  {
      static gsize initialized = 0;
-    MemsetContext context = {
-        .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
-    };
+    MemsetContext *context = g_malloc0(sizeof(MemsetContext));
      size_t numpages_per_thread, leftover;
      void *(*touch_fn)(void *);
-    int ret = 0, i = 0;
+    int ret, i = 0;
      char *addr = area;
  
+    /*
+     * Asynchronous preallocation is only allowed when using MADV_POPULATE_WRITE
+     * and prealloc context for thread placement.
+     */
+    if (!use_madv_populate_write || !tc) {
+        async = false;
+    }
+
+    context->num_threads =
+        get_memset_num_threads(hpagesize, numpages, max_threads);
+
      if (g_once_init_enter(&initialized)) {
          qemu_mutex_init(&page_mutex);
          qemu_cond_init(&page_cond);
@@ -432,8 +462,11 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
      }
  
      if (use_madv_populate_write) {
-        /* Avoid creating a single thread for MADV_POPULATE_WRITE */
-        if (context.num_threads == 1) {
+        /*
+         * Avoid creating a single thread for MADV_POPULATE_WRITE when
+         * preallocating synchronously.
+         */
+        if (context->num_threads == 1 && !async) {
              if (qemu_madvise(area, hpagesize * numpages,
                               QEMU_MADV_POPULATE_WRITE)) {
                  return -errno;
@@ -445,50 +478,86 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
          touch_fn = do_touch_pages;
      }
  
-    context.threads = g_new0(MemsetThread, context.num_threads);
-    numpages_per_thread = numpages / context.num_threads;
-    leftover = numpages % context.num_threads;
-    for (i = 0; i < context.num_threads; i++) {
-        context.threads[i].addr = addr;
-        context.threads[i].numpages = numpages_per_thread + (i < leftover);
-        context.threads[i].hpagesize = hpagesize;
-        context.threads[i].context = &context;
+    context->threads = g_new0(MemsetThread, context->num_threads);
+    numpages_per_thread = numpages / context->num_threads;
+    leftover = numpages % context->num_threads;
+    for (i = 0; i < context->num_threads; i++) {
+        context->threads[i].addr = addr;
+        context->threads[i].numpages = numpages_per_thread + (i < leftover);
+        context->threads[i].hpagesize = hpagesize;
+        context->threads[i].context = context;
          if (tc) {
-            thread_context_create_thread(tc, &context.threads[i].pgthread,
+            thread_context_create_thread(tc, &context->threads[i].pgthread,
                                           "touch_pages",
-                                         touch_fn, &context.threads[i],
+                                         touch_fn, &context->threads[i],
                                           QEMU_THREAD_JOINABLE);
          } else {
-            qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
-                               touch_fn, &context.threads[i],
+            qemu_thread_create(&context->threads[i].pgthread, "touch_pages",
+                               touch_fn, &context->threads[i],
                                 QEMU_THREAD_JOINABLE);
          }
-        addr += context.threads[i].numpages * hpagesize;
+        addr += context->threads[i].numpages * hpagesize;
+    }
+
+    if (async) {
+        /*
+         * async requests currently require the BQL. Add it to the list and kick
+         * preallocation off during qemu_finish_async_prealloc_mem().
+         */
+        assert(bql_locked());
+        QLIST_INSERT_HEAD(&memset_contexts, context, next);
+        return 0;
      }
  
      if (!use_madv_populate_write) {
-        sigbus_memset_context = &context;
+        sigbus_memset_context = context;
      }
  
      qemu_mutex_lock(&page_mutex);
-    context.all_threads_created = true;
+    context->all_threads_created = true;
      qemu_cond_broadcast(&page_cond);
      qemu_mutex_unlock(&page_mutex);
  
-    for (i = 0; i < context.num_threads; i++) {
-        int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
+    ret = wait_and_free_mem_prealloc_context(context);
  
+    if (!use_madv_populate_write) {
+        sigbus_memset_context = NULL;
+    }
+    return ret;
+}
+
+bool qemu_finish_async_prealloc_mem(Error **errp)
+{
+    int ret = 0, tmp;
+    MemsetContext *context, *next_context;
+
+    /* Waiting for preallocation requires the BQL. */
+    assert(bql_locked());
+    if (QLIST_EMPTY(&memset_contexts)) {
+        return true;
+    }
+
+    qemu_mutex_lock(&page_mutex);
+    QLIST_FOREACH(context, &memset_contexts, next) {
+        context->all_threads_created = true;
+    }
+    qemu_cond_broadcast(&page_cond);
+    qemu_mutex_unlock(&page_mutex);
+
+    QLIST_FOREACH_SAFE(context, &memset_contexts, next, next_context) {
+        QLIST_REMOVE(context, next);
+        tmp = wait_and_free_mem_prealloc_context(context);
          if (tmp) {
              ret = tmp;
          }
      }
  
-    if (!use_madv_populate_write) {
-        sigbus_memset_context = NULL;
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "qemu_prealloc_mem: preallocating memory failed");
+        return false;
      }
-    g_free(context.threads);
-
-    return ret;
+    return true;
  }
  
  static bool madv_populate_write_possible(char *area, size_t pagesize)
@@ -498,7 +567,7 @@ static bool madv_populate_write_possible(char *area, size_t pagesize)
  }
  
  bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-                       ThreadContext *tc, Error **errp)
+                       ThreadContext *tc, bool async, Error **errp)
  {
      static gsize initialized;
      int ret;
@@ -540,7 +609,7 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
      }
  
      /* touch pages simultaneously */
-    ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
+    ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async,
                            use_madv_populate_write);
      if (ret) {
          error_setg_errno(errp, -ret,
diff --git a/util/oslib-win32.c b/util/oslib-win32.c

index c4a5f05a49ad9c6fc3f5f39314ef20eb4a75b970..b623830d624f28b9cd2876f57684e9e5b69b8da7 100644 (file)
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -265,7 +265,7 @@ int getpagesize(void)
  }
  
  bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-                       ThreadContext *tc, Error **errp)
+                       ThreadContext *tc, bool async, Error **errp)
  {
      int i;
      size_t pagesize = qemu_real_host_page_size();
@@ -278,6 +278,12 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
      return true;
  }
  
+bool qemu_finish_async_prealloc_mem(Error **errp)
+{
+    /* async prealloc not supported, there is nothing to finish */
+    return true;
+}
+
  char *qemu_get_pid_name(pid_t pid)
  {
      /* XXX Implement me */
author	Mark Kanda <mark.kanda@oracle.com>
	Wed, 31 Jan 2024 16:53:27 +0000 (10:53 -0600)
committer	David Hildenbrand <david@redhat.com>
	Tue, 6 Feb 2024 07:15:22 +0000 (08:15 +0100)
backends/hostmem.c		patch \| blob \| blame \| history
hw/virtio/virtio-mem.c		patch \| blob \| blame \| history
include/hw/qdev-core.h		patch \| blob \| blame \| history
include/qemu/osdep.h		patch \| blob \| blame \| history
system/vl.c		patch \| blob \| blame \| history
util/oslib-posix.c		patch \| blob \| blame \| history
util/oslib-win32.c		patch \| blob \| blame \| history