block: Mark bdrv_filter_bs() and callers GRAPH_RDLOCK

[mirror_qemu.git] / block / block-copy.c
diff --git a/block/block-copy.c b/block/block-copy.c

index 0becad52da620bd84f960fc0e8578a951c06ad6c..1c60368d72d5ecb03d73496a033c7d8356f79c7f 100644 (file)
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -17,16 +17,24 @@
  #include "trace.h"
  #include "qapi/error.h"
  #include "block/block-copy.h"
+#include "block/block_int-io.h"
+#include "block/dirty-bitmap.h"
+#include "block/reqlist.h"
  #include "sysemu/block-backend.h"
  #include "qemu/units.h"
+#include "qemu/co-shared-resource.h"
  #include "qemu/coroutine.h"
+#include "qemu/ratelimit.h"
  #include "block/aio_task.h"
+#include "qemu/error-report.h"
+#include "qemu/memalign.h"
  
  #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
  #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
  #define BLOCK_COPY_MAX_MEM (128 * MiB)
  #define BLOCK_COPY_MAX_WORKERS 64
  #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
+#define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)
  
  typedef enum {
      COPY_READ_WRITE_CLUSTER,
@@ -59,7 +67,7 @@ typedef struct BlockCopyCallState {
      QLIST_ENTRY(BlockCopyCallState) list;
  
      /*
-     * Fields that report information about return values and erros.
+     * Fields that report information about return values and errors.
       * Protected by lock in BlockCopyState.
       */
      bool error_is_read;
@@ -81,7 +89,6 @@ typedef struct BlockCopyTask {
       */
      BlockCopyState *s;
      BlockCopyCallState *call_state;
-    int64_t offset;
      /*
       * @method can also be set again in the while loop of
       * block_copy_dirty_clusters(), but it is never accessed concurrently
@@ -92,21 +99,17 @@ typedef struct BlockCopyTask {
      BlockCopyMethod method;
  
      /*
-     * Fields whose state changes throughout the execution
-     * Protected by lock in BlockCopyState.
-     */
-    CoQueue wait_queue; /* coroutines blocked on this task */
-    /*
-     * Only protect the case of parallel read while updating @bytes
-     * value in block_copy_task_shrink().
+     * Generally, req is protected by lock in BlockCopyState, Still req.offset
+     * is only set on task creation, so may be read concurrently after creation.
+     * req.bytes is changed at most once, and need only protecting the case of
+     * parallel read while updating @bytes value in block_copy_task_shrink().
       */
-    int64_t bytes;
-    QLIST_ENTRY(BlockCopyTask) list;
+    BlockReq req;
  } BlockCopyTask;
  
  static int64_t task_end(BlockCopyTask *task)
  {
-    return task->offset + task->bytes;
+    return task->req.offset + task->req.bytes;
  }
  
  typedef struct BlockCopyState {
@@ -134,7 +137,7 @@ typedef struct BlockCopyState {
      CoMutex lock;
      int64_t in_flight_bytes;
      BlockCopyMethod method;
-    QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
+    BlockReqList reqs;
      QLIST_HEAD(, BlockCopyCallState) calls;
      /*
       * skip_unallocated:
@@ -158,42 +161,6 @@ typedef struct BlockCopyState {
      RateLimit rate_limit;
  } BlockCopyState;
  
-/* Called with lock held */
-static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
-                                            int64_t offset, int64_t bytes)
-{
-    BlockCopyTask *t;
-
-    QLIST_FOREACH(t, &s->tasks, list) {
-        if (offset + bytes > t->offset && offset < t->offset + t->bytes) {
-            return t;
-        }
-    }
-
-    return NULL;
-}
-
-/*
- * If there are no intersecting tasks return false. Otherwise, wait for the
- * first found intersecting tasks to finish and return true.
- *
- * Called with lock held. May temporary release the lock.
- * Return value of 0 proves that lock was NOT released.
- */
-static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
-                                             int64_t bytes)
-{
-    BlockCopyTask *task = find_conflicting_task(s, offset, bytes);
-
-    if (!task) {
-        return false;
-    }
-
-    qemu_co_queue_wait(&task->wait_queue, &s->lock);
-
-    return true;
-}
-
  /* Called with lock held */
  static int64_t block_copy_chunk_size(BlockCopyState *s)
  {
@@ -237,7 +204,7 @@ block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
      bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
  
      /* region is dirty, so no existent tasks possible in it */
-    assert(!find_conflicting_task(s, offset, bytes));
+    assert(!reqlist_find_conflict(&s->reqs, offset, bytes));
  
      bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
      s->in_flight_bytes += bytes;
@@ -247,12 +214,9 @@ block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
          .task.func = block_copy_task_entry,
          .s = s,
          .call_state = call_state,
-        .offset = offset,
-        .bytes = bytes,
          .method = s->method,
      };
-    qemu_co_queue_init(&task->wait_queue);
-    QLIST_INSERT_HEAD(&s->tasks, task, list);
+    reqlist_init_req(&s->reqs, &task->req, offset, bytes);
  
      return task;
  }
@@ -268,32 +232,34 @@ static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
                                                  int64_t new_bytes)
  {
      QEMU_LOCK_GUARD(&task->s->lock);
-    if (new_bytes == task->bytes) {
+    if (new_bytes == task->req.bytes) {
          return;
      }
  
-    assert(new_bytes > 0 && new_bytes < task->bytes);
+    assert(new_bytes > 0 && new_bytes < task->req.bytes);
  
-    task->s->in_flight_bytes -= task->bytes - new_bytes;
+    task->s->in_flight_bytes -= task->req.bytes - new_bytes;
      bdrv_set_dirty_bitmap(task->s->copy_bitmap,
-                          task->offset + new_bytes, task->bytes - new_bytes);
+                          task->req.offset + new_bytes,
+                          task->req.bytes - new_bytes);
  
-    task->bytes = new_bytes;
-    qemu_co_queue_restart_all(&task->wait_queue);
+    reqlist_shrink_req(&task->req, new_bytes);
  }
  
  static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
  {
      QEMU_LOCK_GUARD(&task->s->lock);
-    task->s->in_flight_bytes -= task->bytes;
+    task->s->in_flight_bytes -= task->req.bytes;
      if (ret < 0) {
-        bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
+        bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset,
+                              task->req.bytes);
      }
-    QLIST_REMOVE(task, list);
-    progress_set_remaining(task->s->progress,
-                           bdrv_get_dirty_count(task->s->copy_bitmap) +
-                           task->s->in_flight_bytes);
-    qemu_co_queue_restart_all(&task->wait_queue);
+    if (task->s->progress) {
+        progress_set_remaining(task->s->progress,
+                               bdrv_get_dirty_count(task->s->copy_bitmap) +
+                               task->s->in_flight_bytes);
+    }
+    reqlist_remove_req(&task->req);
  }
  
  void block_copy_state_free(BlockCopyState *s)
@@ -315,12 +281,84 @@ static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
                                       target->bs->bl.max_transfer));
  }
  
+void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+                              bool compress)
+{
+    /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
+    s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
+        (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+
+    if (s->max_transfer < s->cluster_size) {
+        /*
+         * copy_range does not respect max_transfer. We don't want to bother
+         * with requests smaller than block-copy cluster size, so fallback to
+         * buffered copying (read and write respect max_transfer on their
+         * behalf).
+         */
+        s->method = COPY_READ_WRITE_CLUSTER;
+    } else if (compress) {
+        /* Compression supports only cluster-size writes and no copy-range. */
+        s->method = COPY_READ_WRITE_CLUSTER;
+    } else {
+        /*
+         * If copy range enabled, start with COPY_RANGE_SMALL, until first
+         * successful copy_range (look at block_copy_do_copy).
+         */
+        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
+    }
+}
+
+static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                                                 Error **errp)
+{
+    int ret;
+    BlockDriverInfo bdi;
+    bool target_does_cow = bdrv_backing_chain_next(target);
+
+    /*
+     * If there is no backing file on the target, we cannot rely on COW if our
+     * backup cluster size is smaller than the target cluster size. Even for
+     * targets with a backing file, try to avoid COW if possible.
+     */
+    ret = bdrv_get_info(target, &bdi);
+    if (ret == -ENOTSUP && !target_does_cow) {
+        /* Cluster size is not defined */
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+    } else if (ret < 0 && !target_does_cow) {
+        error_setg_errno(errp, -ret,
+            "Couldn't determine the cluster size of the target image, "
+            "which has no backing file");
+        error_append_hint(errp,
+            "Aborting, since this may create an unusable destination image\n");
+        return ret;
+    } else if (ret < 0 && target_does_cow) {
+        /* Not fatal; just trudge on ahead. */
+        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+    }
+
+    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+}
+
  BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                     int64_t cluster_size, bool use_copy_range,
-                                     BdrvRequestFlags write_flags, Error **errp)
+                                     const BdrvDirtyBitmap *bitmap,
+                                     Error **errp)
  {
+    ERRP_GUARD();
      BlockCopyState *s;
+    int64_t cluster_size;
      BdrvDirtyBitmap *copy_bitmap;
+    bool is_fleecing;
+
+    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+    if (cluster_size < 0) {
+        return NULL;
+    }
  
      copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
                                             errp);
@@ -328,6 +366,33 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
          return NULL;
      }
      bdrv_disable_dirty_bitmap(copy_bitmap);
+    if (bitmap) {
+        if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) {
+            error_prepend(errp, "Failed to merge bitmap '%s' to internal "
+                          "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap));
+            bdrv_release_dirty_bitmap(copy_bitmap);
+            return NULL;
+        }
+    } else {
+        bdrv_set_dirty_bitmap(copy_bitmap, 0,
+                              bdrv_dirty_bitmap_size(copy_bitmap));
+    }
+
+    /*
+     * If source is in backing chain of target assume that target is going to be
+     * used for "image fleecing", i.e. it should represent a kind of snapshot of
+     * source at backup-start point in time. And target is going to be read by
+     * somebody (for example, used as NBD export) during backup job.
+     *
+     * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
+     * intersection of backup writes and third party reads from target,
+     * otherwise reading from target we may occasionally read already updated by
+     * guest data.
+     *
+     * For more information see commit f8d59dfb40bb and test
+     * tests/qemu-iotests/222
+     */
+    is_fleecing = bdrv_chain_contains(target->bs, source->bs);
  
      s = g_new(BlockCopyState, 1);
      *s = (BlockCopyState) {
@@ -336,35 +401,18 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
          .copy_bitmap = copy_bitmap,
          .cluster_size = cluster_size,
          .len = bdrv_dirty_bitmap_size(copy_bitmap),
-        .write_flags = write_flags,
+        .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
          .mem = shres_create(BLOCK_COPY_MAX_MEM),
          .max_transfer = QEMU_ALIGN_DOWN(
                                      block_copy_max_transfer(source, target),
                                      cluster_size),
      };
  
-    if (s->max_transfer < cluster_size) {
-        /*
-         * copy_range does not respect max_transfer. We don't want to bother
-         * with requests smaller than block-copy cluster size, so fallback to
-         * buffered copying (read and write respect max_transfer on their
-         * behalf).
-         */
-        s->method = COPY_READ_WRITE_CLUSTER;
-    } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
-        /* Compression supports only cluster-size writes and no copy-range. */
-        s->method = COPY_READ_WRITE_CLUSTER;
-    } else {
-        /*
-         * If copy range enabled, start with COPY_RANGE_SMALL, until first
-         * successful copy_range (look at block_copy_do_copy).
-         */
-        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
-    }
+    block_copy_set_copy_opts(s, false, false);
  
      ratelimit_init(&s->rate_limit);
      qemu_co_mutex_init(&s->lock);
-    QLIST_INIT(&s->tasks);
+    QLIST_INIT(&s->reqs);
      QLIST_INIT(&s->calls);
  
      return s;
@@ -397,7 +445,7 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
  
      aio_task_pool_wait_slot(pool);
      if (aio_task_pool_status(pool) < 0) {
-        co_put_to_shres(task->s->mem, task->bytes);
+        co_put_to_shres(task->s->mem, task->req.bytes);
          block_copy_task_end(task, -ECANCELED);
          g_free(task);
          return -ECANCELED;
@@ -414,17 +462,16 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
   * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
   * s->len only to cover last cluster when s->len is not aligned to clusters.
   *
- * No sync here: nor bitmap neighter intersecting requests handling, only copy.
+ * No sync here: neither bitmap nor intersecting requests handling, only copy.
   *
   * @method is an in-out argument, so that copy_range can be either extended to
   * a full-size buffer or disabled if the copy_range attempt fails.  The output
   * value of @method should be used for subsequent tasks.
   * Returns 0 on success.
   */
-static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
-                                           int64_t offset, int64_t bytes,
-                                           BlockCopyMethod *method,
-                                           bool *error_is_read)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
+                   BlockCopyMethod *method, bool *error_is_read)
  {
      int ret;
      int64_t nbytes = MIN(offset + bytes, s->len) - offset;
@@ -510,7 +557,10 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
      BlockCopyMethod method = t->method;
      int ret;
  
-    ret = block_copy_do_copy(s, t->offset, t->bytes, &method, &error_is_read);
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
+                                 &error_is_read);
+    }
  
      WITH_QEMU_LOCK_GUARD(&s->lock) {
          if (s->method == t->method) {
@@ -522,18 +572,19 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
                  t->call_state->ret = ret;
                  t->call_state->error_is_read = error_is_read;
              }
-        } else {
-            progress_work_done(s->progress, t->bytes);
+        } else if (s->progress) {
+            progress_work_done(s->progress, t->req.bytes);
          }
      }
-    co_put_to_shres(s->mem, t->bytes);
+    co_put_to_shres(s->mem, t->req.bytes);
      block_copy_task_end(t, ret);
  
      return ret;
  }
  
-static int block_copy_block_status(BlockCopyState *s, int64_t offset,
-                                   int64_t bytes, int64_t *pnum)
+static coroutine_fn GRAPH_RDLOCK
+int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
+                            int64_t *pnum)
  {
      int64_t num;
      BlockDriverState *base;
@@ -545,8 +596,8 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset,
          base = NULL;
      }
  
-    ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
-                                  NULL, NULL);
+    ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
+                                     NULL, NULL);
      if (ret < 0 || num < s->cluster_size) {
          /*
           * On error or if failed to obtain large enough chunk just fallback to
@@ -568,8 +619,9 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset,
   * Check if the cluster starting at offset is allocated or not.
   * return via pnum the number of contiguous clusters sharing this allocation.
   */
-static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
-                                           int64_t *pnum)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
+                                int64_t *pnum)
  {
      BlockDriverState *bs = s->source->bs;
      int64_t count, total_count = 0;
@@ -579,7 +631,8 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
      assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
  
      while (true) {
-        ret = bdrv_is_allocated(bs, offset, bytes, &count);
+        /* protected in backup_run() */
+        ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
          if (ret < 0) {
              return ret;
          }
@@ -606,14 +659,27 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
      }
  }
  
+void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
+{
+    QEMU_LOCK_GUARD(&s->lock);
+
+    bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
+    if (s->progress) {
+        progress_set_remaining(s->progress,
+                               bdrv_get_dirty_count(s->copy_bitmap) +
+                               s->in_flight_bytes);
+    }
+}
+
  /*
   * Reset bits in copy_bitmap starting at offset if they represent unallocated
   * data in the image. May reset subsequent contiguous bits.
   * @return 0 when the cluster at @offset was unallocated,
   *         1 otherwise, and -ret on error.
   */
-int64_t block_copy_reset_unallocated(BlockCopyState *s,
-                                     int64_t offset, int64_t *count)
+int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
+                                                  int64_t offset,
+                                                  int64_t *count)
  {
      int ret;
      int64_t clusters, bytes;
@@ -626,12 +692,7 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
      bytes = clusters * s->cluster_size;
  
      if (!ret) {
-        qemu_co_mutex_lock(&s->lock);
-        bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
-        progress_set_remaining(s->progress,
-                               bdrv_get_dirty_count(s->copy_bitmap) +
-                               s->in_flight_bytes);
-        qemu_co_mutex_unlock(&s->lock);
+        block_copy_reset(s, offset, bytes);
      }
  
      *count = bytes;
@@ -645,7 +706,7 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
   * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
   * clusters found and -errno on failure.
   */
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
  block_copy_dirty_clusters(BlockCopyCallState *call_state)
  {
      BlockCopyState *s = call_state->s;
@@ -678,22 +739,22 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
              trace_block_copy_skip_range(s, offset, bytes);
              break;
          }
-        if (task->offset > offset) {
-            trace_block_copy_skip_range(s, offset, task->offset - offset);
+        if (task->req.offset > offset) {
+            trace_block_copy_skip_range(s, offset, task->req.offset - offset);
          }
  
          found_dirty = true;
  
-        ret = block_copy_block_status(s, task->offset, task->bytes,
+        ret = block_copy_block_status(s, task->req.offset, task->req.bytes,
                                        &status_bytes);
          assert(ret >= 0); /* never fail */
-        if (status_bytes < task->bytes) {
+        if (status_bytes < task->req.bytes) {
              block_copy_task_shrink(task, status_bytes);
          }
          if (qatomic_read(&s->skip_unallocated) &&
              !(ret & BDRV_BLOCK_ALLOCATED)) {
              block_copy_task_end(task, 0);
-            trace_block_copy_skip_range(s, task->offset, task->bytes);
+            trace_block_copy_skip_range(s, task->req.offset, task->req.bytes);
              offset = task_end(task);
              bytes = end - offset;
              g_free(task);
@@ -714,11 +775,11 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
              }
          }
  
-        ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+        ratelimit_calculate_delay(&s->rate_limit, task->req.bytes);
  
-        trace_block_copy_process(s, task->offset);
+        trace_block_copy_process(s, task->req.offset);
  
-        co_get_from_shres(s->mem, task->bytes);
+        co_get_from_shres(s->mem, task->req.bytes);
  
          offset = task_end(task);
          bytes = end - offset;
@@ -768,7 +829,8 @@ void block_copy_kick(BlockCopyCallState *call_state)
   * it means that some I/O operation failed in context of _this_ block_copy call,
   * not some parallel operation.
   */
-static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_common(BlockCopyCallState *call_state)
  {
      int ret;
      BlockCopyState *s = call_state->s;
@@ -786,8 +848,8 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
                   * Check that there is no task we still need to
                   * wait to complete
                   */
-                ret = block_copy_wait_one(s, call_state->offset,
-                                          call_state->bytes);
+                ret = reqlist_wait_one(&s->reqs, call_state->offset,
+                                       call_state->bytes, &s->lock);
                  if (ret == 0) {
                      /*
                       * No pending tasks, but check again the bitmap in this
@@ -795,7 +857,7 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
                       * between this and the critical section in
                       * block_copy_dirty_clusters().
                       *
-                     * block_copy_wait_one return value 0 also means that it
+                     * reqlist_wait_one return value 0 also means that it
                       * didn't release the lock. So, we are still in the same
                       * critical section, not interrupted by any concurrent
                       * access to state.
@@ -831,23 +893,43 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
      return ret;
  }
  
+static void coroutine_fn block_copy_async_co_entry(void *opaque)
+{
+    GRAPH_RDLOCK_GUARD();
+    block_copy_common(opaque);
+}
+
  int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
-                            bool ignore_ratelimit)
+                            bool ignore_ratelimit, uint64_t timeout_ns,
+                            BlockCopyAsyncCallbackFunc cb,
+                            void *cb_opaque)
  {
-    BlockCopyCallState call_state = {
+    int ret;
+    BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
+
+    *call_state = (BlockCopyCallState) {
          .s = s,
          .offset = start,
          .bytes = bytes,
          .ignore_ratelimit = ignore_ratelimit,
          .max_workers = BLOCK_COPY_MAX_WORKERS,
+        .cb = cb,
+        .cb_opaque = cb_opaque,
      };
  
-    return block_copy_common(&call_state);
-}
+    ret = qemu_co_timeout(block_copy_async_co_entry, call_state, timeout_ns,
+                          g_free);
+    if (ret < 0) {
+        assert(ret == -ETIMEDOUT);
+        block_copy_call_cancel(call_state);
+        /* call_state will be freed by running coroutine. */
+        return ret;
+    }
  
-static void coroutine_fn block_copy_async_co_entry(void *opaque)
-{
-    block_copy_common(opaque);
+    ret = call_state->ret;
+    g_free(call_state);
+
+    return ret;
  }
  
  BlockCopyCallState *block_copy_async(BlockCopyState *s,
@@ -933,6 +1015,11 @@ BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
      return s->copy_bitmap;
  }
  
+int64_t block_copy_cluster_size(BlockCopyState *s)
+{
+    return s->cluster_size;
+}
+
  void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
  {
      qatomic_set(&s->skip_unallocated, skip);