Merge tag 'pull-maintainer-may24-160524-2' of https://gitlab.com/stsquad/qemu into...

[mirror_qemu.git] / util / qemu-coroutine.c
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c

index 72412e564939013c8183d558199f3df45fb23b84..eb4eebefdfb5d18b0c8a19f575b292e8e5e03260 100644 (file)
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -14,64 +14,214 @@
  
  #include "qemu/osdep.h"
  #include "trace.h"
-#include "qemu-common.h"
  #include "qemu/thread.h"
  #include "qemu/atomic.h"
-#include "qemu/coroutine.h"
  #include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
+#include "qemu/cutils.h"
  #include "block/aio.h"
  
  enum {
-    POOL_BATCH_SIZE = 64,
+    COROUTINE_POOL_BATCH_MAX_SIZE = 128,
  };
  
-/** Free list to speed up creation */
-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
-static unsigned int release_pool_size;
-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
-static __thread unsigned int alloc_pool_size;
-static __thread Notifier coroutine_pool_cleanup_notifier;
+/*
+ * Coroutine creation and deletion is expensive so a pool of unused coroutines
+ * is kept as a cache. When the pool has coroutines available, they are
+ * recycled instead of creating new ones from scratch. Coroutines are added to
+ * the pool upon termination.
+ *
+ * The pool is global but each thread maintains a small local pool to avoid
+ * global pool contention. Threads fetch and return batches of coroutines from
+ * the global pool to maintain their local pool. The local pool holds up to two
+ * batches whereas the maximum size of the global pool is controlled by the
+ * qemu_coroutine_inc_pool_size() API.
+ *
+ * .-----------------------------------.
+ * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
+ * `-----------------------------------'
+ *
+ * .-------------------.
+ * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
+ * `-------------------'
+ */
+typedef struct CoroutinePoolBatch {
+    /* Batches are kept in a list */
+    QSLIST_ENTRY(CoroutinePoolBatch) next;
+
+    /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */
+    QSLIST_HEAD(, Coroutine) list;
+    unsigned int size;
+} CoroutinePoolBatch;
+
+typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool;
+
+/* Host operating system limit on number of pooled coroutines */
+static unsigned int global_pool_hard_max_size;
  
-static void coroutine_pool_cleanup(Notifier *n, void *value)
+static QemuMutex global_pool_lock; /* protects the following variables */
+static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool);
+static unsigned int global_pool_size;
+static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE;
+
+QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool);
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier);
+
+static CoroutinePoolBatch *coroutine_pool_batch_new(void)
+{
+    CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1);
+
+    QSLIST_INIT(&batch->list);
+    batch->size = 0;
+    return batch;
+}
+
+static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch)
  {
      Coroutine *co;
      Coroutine *tmp;
  
-    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
-        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+    QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(&batch->list, pool_next);
          qemu_coroutine_delete(co);
      }
+    g_free(batch);
  }
  
-Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
+static void local_pool_cleanup(Notifier *n, void *value)
  {
-    Coroutine *co = NULL;
+    CoroutinePool *local_pool = get_ptr_local_pool();
+    CoroutinePoolBatch *batch;
+    CoroutinePoolBatch *tmp;
+
+    QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) {
+        QSLIST_REMOVE_HEAD(local_pool, next);
+        coroutine_pool_batch_delete(batch);
+    }
+}
+
+/* Ensure the atexit notifier is registered */
+static void local_pool_cleanup_init_once(void)
+{
+    Notifier *notifier = get_ptr_local_pool_cleanup_notifier();
+    if (!notifier->notify) {
+        notifier->notify = local_pool_cleanup;
+        qemu_thread_atexit_add(notifier);
+    }
+}
  
-    if (CONFIG_COROUTINE_POOL) {
-        co = QSLIST_FIRST(&alloc_pool);
-        if (!co) {
-            if (release_pool_size > POOL_BATCH_SIZE) {
-                /* Slow path; a good place to register the destructor, too.  */
-                if (!coroutine_pool_cleanup_notifier.notify) {
-                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
-                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
-                }
-
-                /* This is not exact; there could be a little skew between
-                 * release_pool_size and the actual size of release_pool.  But
-                 * it is just a heuristic, it does not need to be perfect.
-                 */
-                alloc_pool_size = atomic_xchg(&release_pool_size, 0);
-                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
-                co = QSLIST_FIRST(&alloc_pool);
-            }
+/* Helper to get the next unused coroutine from the local pool */
+static Coroutine *coroutine_pool_get_local(void)
+{
+    CoroutinePool *local_pool = get_ptr_local_pool();
+    CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
+    Coroutine *co;
+
+    if (unlikely(!batch)) {
+        return NULL;
+    }
+
+    co = QSLIST_FIRST(&batch->list);
+    QSLIST_REMOVE_HEAD(&batch->list, pool_next);
+    batch->size--;
+
+    if (batch->size == 0) {
+        QSLIST_REMOVE_HEAD(local_pool, next);
+        coroutine_pool_batch_delete(batch);
+    }
+    return co;
+}
+
+/* Get the next batch from the global pool */
+static void coroutine_pool_refill_local(void)
+{
+    CoroutinePool *local_pool = get_ptr_local_pool();
+    CoroutinePoolBatch *batch;
+
+    WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
+        batch = QSLIST_FIRST(&global_pool);
+
+        if (batch) {
+            QSLIST_REMOVE_HEAD(&global_pool, next);
+            global_pool_size -= batch->size;
          }
-        if (co) {
-            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
-            alloc_pool_size--;
+    }
+
+    if (batch) {
+        QSLIST_INSERT_HEAD(local_pool, batch, next);
+        local_pool_cleanup_init_once();
+    }
+}
+
+/* Add a batch of coroutines to the global pool */
+static void coroutine_pool_put_global(CoroutinePoolBatch *batch)
+{
+    WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
+        unsigned int max = MIN(global_pool_max_size,
+                               global_pool_hard_max_size);
+
+        if (global_pool_size < max) {
+            QSLIST_INSERT_HEAD(&global_pool, batch, next);
+
+            /* Overshooting the max pool size is allowed */
+            global_pool_size += batch->size;
+            return;
          }
      }
  
+    /* The global pool was full, so throw away this batch */
+    coroutine_pool_batch_delete(batch);
+}
+
+/* Get the next unused coroutine from the pool or return NULL */
+static Coroutine *coroutine_pool_get(void)
+{
+    Coroutine *co;
+
+    co = coroutine_pool_get_local();
+    if (!co) {
+        coroutine_pool_refill_local();
+        co = coroutine_pool_get_local();
+    }
+    return co;
+}
+
+static void coroutine_pool_put(Coroutine *co)
+{
+    CoroutinePool *local_pool = get_ptr_local_pool();
+    CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
+
+    if (unlikely(!batch)) {
+        batch = coroutine_pool_batch_new();
+        QSLIST_INSERT_HEAD(local_pool, batch, next);
+        local_pool_cleanup_init_once();
+    }
+
+    if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) {
+        CoroutinePoolBatch *next = QSLIST_NEXT(batch, next);
+
+        /* Is the local pool full? */
+        if (next) {
+            QSLIST_REMOVE_HEAD(local_pool, next);
+            coroutine_pool_put_global(batch);
+        }
+
+        batch = coroutine_pool_batch_new();
+        QSLIST_INSERT_HEAD(local_pool, batch, next);
+    }
+
+    QSLIST_INSERT_HEAD(&batch->list, co, pool_next);
+    batch->size++;
+}
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
+{
+    Coroutine *co = NULL;
+
+    if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
+        co = coroutine_pool_get();
+    }
+
      if (!co) {
          co = qemu_coroutine_new();
      }
@@ -86,59 +236,86 @@ static void coroutine_delete(Coroutine *co)
  {
      co->caller = NULL;
  
-    if (CONFIG_COROUTINE_POOL) {
-        if (release_pool_size < POOL_BATCH_SIZE * 2) {
-            QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
-            atomic_inc(&release_pool_size);
-            return;
-        }
-        if (alloc_pool_size < POOL_BATCH_SIZE) {
-            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
-            alloc_pool_size++;
-            return;
-        }
+    if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
+        coroutine_pool_put(co);
+    } else {
+        qemu_coroutine_delete(co);
      }
-
-    qemu_coroutine_delete(co);
  }
  
-void qemu_coroutine_enter(Coroutine *co)
+void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
  {
-    Coroutine *self = qemu_coroutine_self();
-    CoroutineAction ret;
+    QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
+    Coroutine *from = qemu_coroutine_self();
  
-    trace_qemu_coroutine_enter(self, co, co->entry_arg);
+    QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next);
  
-    if (co->caller) {
-        fprintf(stderr, "Co-routine re-entered recursively\n");
-        abort();
-    }
+    /* Run co and any queued coroutines */
+    while (!QSIMPLEQ_EMPTY(&pending)) {
+        Coroutine *to = QSIMPLEQ_FIRST(&pending);
+        CoroutineAction ret;
  
-    co->caller = self;
-    co->ctx = qemu_get_current_aio_context();
+        /*
+         * Read to before to->scheduled; pairs with qatomic_cmpxchg in
+         * qemu_co_sleep(), aio_co_schedule() etc.
+         */
+        smp_read_barrier_depends();
  
-    /* Store co->ctx before anything that stores co.  Matches
-     * barrier in aio_co_wake and qemu_co_mutex_wake.
-     */
-    smp_wmb();
+        const char *scheduled = qatomic_read(&to->scheduled);
  
-    ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
+        QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
  
-    qemu_co_queue_run_restart(co);
+        trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg);
  
-    switch (ret) {
-    case COROUTINE_YIELD:
-        return;
-    case COROUTINE_TERMINATE:
-        assert(!co->locks_held);
-        trace_qemu_coroutine_terminate(co);
-        coroutine_delete(co);
-        return;
-    default:
-        abort();
+        /* if the Coroutine has already been scheduled, entering it again will
+         * cause us to enter it twice, potentially even after the coroutine has
+         * been deleted */
+        if (scheduled) {
+            fprintf(stderr,
+                    "%s: Co-routine was already scheduled in '%s'\n",
+                    __func__, scheduled);
+            abort();
+        }
+
+        if (to->caller) {
+            fprintf(stderr, "Co-routine re-entered recursively\n");
+            abort();
+        }
+
+        to->caller = from;
+        to->ctx = ctx;
+
+        /* Store to->ctx before anything that stores to.  Matches
+         * barrier in aio_co_wake and qemu_co_mutex_wake.
+         */
+        smp_wmb();
+
+        ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER);
+
+        /* Queued coroutines are run depth-first; previously pending coroutines
+         * run after those queued more recently.
+         */
+        QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup);
+
+        switch (ret) {
+        case COROUTINE_YIELD:
+            break;
+        case COROUTINE_TERMINATE:
+            assert(!to->locks_held);
+            trace_qemu_coroutine_terminate(to);
+            coroutine_delete(to);
+            break;
+        default:
+            abort();
+        }
      }
  }
  
+void qemu_coroutine_enter(Coroutine *co)
+{
+    qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co);
+}
+
  void qemu_coroutine_enter_if_inactive(Coroutine *co)
  {
      if (!qemu_coroutine_entered(co)) {
@@ -166,3 +343,59 @@ bool qemu_coroutine_entered(Coroutine *co)
  {
      return co->caller;
  }
+
+AioContext *qemu_coroutine_get_aio_context(Coroutine *co)
+{
+    return co->ctx;
+}
+
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
+{
+    QEMU_LOCK_GUARD(&global_pool_lock);
+    global_pool_max_size += additional_pool_size;
+}
+
+void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
+{
+    QEMU_LOCK_GUARD(&global_pool_lock);
+    global_pool_max_size -= removing_pool_size;
+}
+
+static unsigned int get_global_pool_hard_max_size(void)
+{
+#ifdef __linux__
+    g_autofree char *contents = NULL;
+    int max_map_count;
+
+    /*
+     * Linux processes can have up to max_map_count virtual memory areas
+     * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We
+     * must limit the coroutine pool to a safe size to avoid running out of
+     * VMAs.
+     */
+    if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL,
+                            NULL) &&
+        qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
+        /*
+         * This is an upper bound that avoids exceeding max_map_count. Leave a
+         * fixed amount for non-coroutine users like library dependencies,
+         * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the
+         * remaining amount.
+         */
+        if (max_map_count > 5000) {
+            return (max_map_count - 5000) / 2;
+        } else {
+            /* Disable the global pool but threads still have local pools */
+            return 0;
+        }
+    }
+#endif
+
+    return UINT_MAX;
+}
+
+static void __attribute__((constructor)) qemu_coroutine_init(void)
+{
+    qemu_mutex_init(&global_pool_lock);
+    global_pool_hard_max_size = get_global_pool_hard_max_size();
+}