throttle: use enum ThrottleDirection instead of bool is_write

[mirror_qemu.git] / block / block-backend.c
diff --git a/block/block-backend.c b/block/block-backend.c

index 2ae768f24d68b2fc4df15a4cf8447d6652b1c90d..4009ed5feda580c8a99304dcf35fa11d025458a8 100644 (file)
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -81,8 +81,9 @@ struct BlockBackend {
      QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
  
      int quiesce_counter; /* atomic: written under BQL, read by other threads */
+    QemuMutex queued_requests_lock; /* protects queued_requests */
      CoQueue queued_requests;
-    bool disable_request_queuing;
+    bool disable_request_queuing; /* atomic */
  
      VMChangeStateEntry *vmsh;
      bool force_allow_inactivate;
@@ -368,6 +369,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
  
      block_acct_init(&blk->stats);
  
+    qemu_mutex_init(&blk->queued_requests_lock);
      qemu_co_queue_init(&blk->queued_requests);
      notifier_list_init(&blk->remove_bs_notifiers);
      notifier_list_init(&blk->insert_bs_notifiers);
@@ -387,6 +389,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
   * Both sets of permissions can be changed later using blk_set_perm().
   *
   * Return the new BlockBackend on success, null on failure.
+ *
+ * Callers must hold the AioContext lock of @bs.
   */
  BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
                                uint64_t shared_perm, Error **errp)
@@ -404,11 +408,15 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
  
  /*
   * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
- * The new BlockBackend is in the main AioContext.
+ * By default, the new BlockBackend is in the main AioContext, but if the
+ * parameters connect it with any existing node in a different AioContext, it
+ * may end up there instead.
   *
   * Just as with bdrv_open(), after having called this function the reference to
   * @options belongs to the block layer (even on failure).
   *
+ * Called without holding an AioContext lock.
+ *
   * TODO: Remove @filename and @flags; it should be possible to specify a whole
   * BDS tree just by specifying the @options QDict (or @reference,
   * alternatively). At the time of adding this function, this is not possible,
@@ -420,6 +428,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
  {
      BlockBackend *blk;
      BlockDriverState *bs;
+    AioContext *ctx;
      uint64_t perm = 0;
      uint64_t shared = BLK_PERM_ALL;
  
@@ -449,16 +458,24 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
          shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
      }
  
-    blk = blk_new(qemu_get_aio_context(), perm, shared);
+    aio_context_acquire(qemu_get_aio_context());
      bs = bdrv_open(filename, reference, options, flags, errp);
+    aio_context_release(qemu_get_aio_context());
      if (!bs) {
-        blk_unref(blk);
          return NULL;
      }
  
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
-                                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                                       perm, shared, blk, errp);
+    /* bdrv_open() could have moved bs to a different AioContext */
+    ctx = bdrv_get_aio_context(bs);
+    blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
+    blk->perm = perm;
+    blk->shared_perm = shared;
+
+    aio_context_acquire(ctx);
+    blk_insert_bs(blk, bs, errp);
+    bdrv_unref(bs);
+    aio_context_release(ctx);
+
      if (!blk->root) {
          blk_unref(blk);
          return NULL;
@@ -485,6 +502,8 @@ static void blk_delete(BlockBackend *blk)
      assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
      assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
      assert(QLIST_EMPTY(&blk->aio_notifiers));
+    assert(qemu_co_queue_empty(&blk->queued_requests));
+    qemu_mutex_destroy(&blk->queued_requests_lock);
      QTAILQ_REMOVE(&block_backends, blk, link);
      drive_info_del(blk->legacy_dinfo);
      block_acct_cleanup(&blk->stats);
@@ -897,6 +916,8 @@ void blk_remove_bs(BlockBackend *blk)
  
  /*
   * Associates a new BlockDriverState with @blk.
+ *
+ * Callers must hold the AioContext lock of @bs.
   */
  int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
  {
@@ -1232,7 +1253,7 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
  void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
  {
      IO_CODE();
-    blk->disable_request_queuing = disable;
+    qatomic_set(&blk->disable_request_queuing, disable);
  }
  
  static int coroutine_fn GRAPH_RDLOCK
@@ -1266,15 +1287,30 @@ blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
      return 0;
  }
  
+/* Are we currently in a drained section? */
+bool blk_in_drain(BlockBackend *blk)
+{
+    GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
+    return qatomic_read(&blk->quiesce_counter);
+}
+
  /* To be called between exactly one pair of blk_inc/dec_in_flight() */
  static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
  {
      assert(blk->in_flight > 0);
  
-    if (qatomic_read(&blk->quiesce_counter) && !blk->disable_request_queuing) {
+    if (qatomic_read(&blk->quiesce_counter) &&
+        !qatomic_read(&blk->disable_request_queuing)) {
+        /*
+         * Take lock before decrementing in flight counter so main loop thread
+         * waits for us to enqueue ourselves before it can leave the drained
+         * section.
+         */
+        qemu_mutex_lock(&blk->queued_requests_lock);
          blk_dec_in_flight(blk);
-        qemu_co_queue_wait(&blk->queued_requests, NULL);
+        qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
          blk_inc_in_flight(blk);
+        qemu_mutex_unlock(&blk->queued_requests_lock);
      }
  }
  
@@ -1833,6 +1869,204 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
      return ret;
  }
  
+static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
+                                   (unsigned int*)(uintptr_t)acb->bytes,
+                                   rwco->iobuf);
+    blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
+                                unsigned int *nr_zones,
+                                BlockZoneDescriptor  *zones,
+                                BlockCompletionFunc *cb, void *opaque)
+{
+    BlkAioEmAIOCB *acb;
+    Coroutine *co;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+    acb->rwco = (BlkRwCo) {
+        .blk    = blk,
+        .offset = offset,
+        .iobuf  = zones,
+        .ret    = NOT_DONE,
+    };
+    acb->bytes = (int64_t)(uintptr_t)nr_zones,
+    acb->has_returned = false;
+
+    co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
+    aio_co_enter(blk_get_aio_context(blk), co);
+
+    acb->has_returned = true;
+    if (acb->rwco.ret != NOT_DONE) {
+        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+                                         blk_aio_complete_bh, acb);
+    }
+
+    return &acb->common;
+}
+
+static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_zone_mgmt(rwco->blk,
+                                 (BlockZoneOp)(uintptr_t)rwco->iobuf,
+                                 rwco->offset, acb->bytes);
+    blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                              int64_t offset, int64_t len,
+                              BlockCompletionFunc *cb, void *opaque) {
+    BlkAioEmAIOCB *acb;
+    Coroutine *co;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+    acb->rwco = (BlkRwCo) {
+        .blk    = blk,
+        .offset = offset,
+        .iobuf  = (void *)(uintptr_t)op,
+        .ret    = NOT_DONE,
+    };
+    acb->bytes = len;
+    acb->has_returned = false;
+
+    co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
+    aio_co_enter(blk_get_aio_context(blk), co);
+
+    acb->has_returned = true;
+    if (acb->rwco.ret != NOT_DONE) {
+        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+                                         blk_aio_complete_bh, acb);
+    }
+
+    return &acb->common;
+}
+
+static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
+                                   rwco->iobuf, rwco->flags);
+    blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+                                QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                BlockCompletionFunc *cb, void *opaque) {
+    BlkAioEmAIOCB *acb;
+    Coroutine *co;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+    acb->rwco = (BlkRwCo) {
+        .blk    = blk,
+        .ret    = NOT_DONE,
+        .flags  = flags,
+        .iobuf  = qiov,
+    };
+    acb->bytes = (int64_t)(uintptr_t)offset;
+    acb->has_returned = false;
+
+    co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
+    aio_co_enter(blk_get_aio_context(blk), co);
+    acb->has_returned = true;
+    if (acb->rwco.ret != NOT_DONE) {
+        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+                                         blk_aio_complete_bh, acb);
+    }
+
+    return &acb->common;
+}
+
+/*
+ * Send a zone_report command.
+ * offset is a byte offset from the start of the device. No alignment
+ * required for offset.
+ * nr_zones represents IN maximum and OUT actual.
+ */
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+                                    unsigned int *nr_zones,
+                                    BlockZoneDescriptor *zones)
+{
+    int ret;
+    IO_CODE();
+
+    blk_inc_in_flight(blk); /* increase before waiting */
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
+    if (!blk_is_available(blk)) {
+        blk_dec_in_flight(blk);
+        return -ENOMEDIUM;
+    }
+    ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
+    blk_dec_in_flight(blk);
+    return ret;
+}
+
+/*
+ * Send a zone_management command.
+ * op is the zone operation;
+ * offset is the byte offset from the start of the zoned device;
+ * len is the maximum number of bytes the command should operate on. It
+ * should be aligned with the device zone size.
+ */
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+        int64_t offset, int64_t len)
+{
+    int ret;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
+
+    ret = blk_check_byte_request(blk, offset, len);
+    if (ret < 0) {
+        blk_dec_in_flight(blk);
+        return ret;
+    }
+
+    ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
+    blk_dec_in_flight(blk);
+    return ret;
+}
+
+/*
+ * Send a zone_append command.
+ */
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+        QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    int ret;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
+    if (!blk_is_available(blk)) {
+        blk_dec_in_flight(blk);
+        return -ENOMEDIUM;
+    }
+
+    ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
+    blk_dec_in_flight(blk);
+    return ret;
+}
+
  void blk_drain(BlockBackend *blk)
  {
      BlockDriverState *bs = blk_bs(blk);
@@ -1862,14 +2096,8 @@ void blk_drain_all(void)
      bdrv_drain_all_begin();
  
      while ((blk = blk_all_next(blk)) != NULL) {
-        AioContext *ctx = blk_get_aio_context(blk);
-
-        aio_context_acquire(ctx);
-
          /* We may have -ENOMEDIUM completions in flight */
-        AIO_WAIT_WHILE(ctx, qatomic_read(&blk->in_flight) > 0);
-
-        aio_context_release(ctx);
+        AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
      }
  
      bdrv_drain_all_end();
@@ -2018,7 +2246,15 @@ void blk_activate(BlockBackend *blk, Error **errp)
          return;
      }
  
-    bdrv_activate(bs, errp);
+    /*
+     * Migration code can call this function in coroutine context, so leave
+     * coroutine context if necessary.
+     */
+    if (qemu_in_coroutine()) {
+        bdrv_co_activate(bs, errp);
+    } else {
+        bdrv_activate(bs, errp);
+    }
  }
  
  bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
@@ -2182,9 +2418,14 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
  
  AioContext *blk_get_aio_context(BlockBackend *blk)
  {
-    BlockDriverState *bs = blk_bs(blk);
+    BlockDriverState *bs;
      IO_CODE();
  
+    if (!blk) {
+        return qemu_get_aio_context();
+    }
+
+    bs = blk_bs(blk);
      if (bs) {
          AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
          assert(ctx == blk->ctx);
@@ -2199,52 +2440,31 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
      return blk_get_aio_context(blk_acb->blk);
  }
  
-static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
-                                  bool update_root_node, Error **errp)
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp)
  {
+    bool old_allow_change;
      BlockDriverState *bs = blk_bs(blk);
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
      int ret;
  
-    if (bs) {
-        bdrv_ref(bs);
-
-        if (update_root_node) {
-            /*
-             * update_root_node MUST be false for blk_root_set_aio_ctx_commit(),
-             * as we are already in the commit function of a transaction.
-             */
-            ret = bdrv_try_change_aio_context(bs, new_context, blk->root, errp);
-            if (ret < 0) {
-                bdrv_unref(bs);
-                return ret;
-            }
-        }
-        /*
-         * Make blk->ctx consistent with the root node before we invoke any
-         * other operations like drain that might inquire blk->ctx
-         */
-        blk->ctx = new_context;
-        if (tgm->throttle_state) {
-            bdrv_drained_begin(bs);
-            throttle_group_detach_aio_context(tgm);
-            throttle_group_attach_aio_context(tgm, new_context);
-            bdrv_drained_end(bs);
-        }
+    GLOBAL_STATE_CODE();
  
-        bdrv_unref(bs);
-    } else {
+    if (!bs) {
          blk->ctx = new_context;
+        return 0;
      }
  
-    return 0;
-}
+    bdrv_ref(bs);
  
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
-                        Error **errp)
-{
-    GLOBAL_STATE_CODE();
-    return blk_do_set_aio_context(blk, new_context, true, errp);
+    old_allow_change = blk->allow_aio_context_change;
+    blk->allow_aio_context_change = true;
+
+    ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
+
+    blk->allow_aio_context_change = old_allow_change;
+
+    bdrv_unref(bs);
+    return ret;
  }
  
  typedef struct BdrvStateBlkRootContext {
@@ -2256,8 +2476,14 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
  {
      BdrvStateBlkRootContext *s = opaque;
      BlockBackend *blk = s->blk;
+    AioContext *new_context = s->new_ctx;
+    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
  
-    blk_do_set_aio_context(blk, s->new_ctx, false, &error_abort);
+    blk->ctx = new_context;
+    if (tgm->throttle_state) {
+        throttle_group_detach_aio_context(tgm);
+        throttle_group_attach_aio_context(tgm, new_context);
+    }
  }
  
  static TransactionActionDrv set_blk_root_context = {
@@ -2356,28 +2582,6 @@ void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
      notifier_list_add(&blk->insert_bs_notifiers, notify);
  }
  
-void coroutine_fn blk_co_io_plug(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-    IO_CODE();
-    GRAPH_RDLOCK_GUARD();
-
-    if (bs) {
-        bdrv_co_io_plug(bs);
-    }
-}
-
-void coroutine_fn blk_co_io_unplug(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-    IO_CODE();
-    GRAPH_RDLOCK_GUARD();
-
-    if (bs) {
-        bdrv_co_io_unplug(bs);
-    }
-}
-
  BlockAcctStats *blk_get_stats(BlockBackend *blk)
  {
      IO_CODE();
@@ -2633,9 +2837,12 @@ static void blk_root_drained_end(BdrvChild *child)
          if (blk->dev_ops && blk->dev_ops->drained_end) {
              blk->dev_ops->drained_end(blk->dev_opaque);
          }
-        while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+        qemu_mutex_lock(&blk->queued_requests_lock);
+        while (qemu_co_enter_next(&blk->queued_requests,
+                                  &blk->queued_requests_lock)) {
              /* Resume all queued requests */
          }
+        qemu_mutex_unlock(&blk->queued_requests_lock);
      }
  }