4 * Copyright (C) 2013 Proxmox Server Solutions
5 * Copyright (c) 2019 Virtuozzo International GmbH.
8 * Dietmar Maurer (dietmar@proxmox.com)
9 * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
15 #include "qemu/osdep.h"
18 #include "qapi/error.h"
19 #include "block/block-copy.h"
20 #include "sysemu/block-backend.h"
22 static void coroutine_fn
block_copy_wait_inflight_reqs(BlockCopyState
*s
,
26 BlockCopyInFlightReq
*req
;
31 QLIST_FOREACH(req
, &s
->inflight_reqs
, list
) {
32 if (end
> req
->start_byte
&& start
< req
->end_byte
) {
33 qemu_co_queue_wait(&req
->wait_queue
, NULL
);
41 static void block_copy_inflight_req_begin(BlockCopyState
*s
,
42 BlockCopyInFlightReq
*req
,
43 int64_t start
, int64_t end
)
45 req
->start_byte
= start
;
47 qemu_co_queue_init(&req
->wait_queue
);
48 QLIST_INSERT_HEAD(&s
->inflight_reqs
, req
, list
);
51 static void coroutine_fn
block_copy_inflight_req_end(BlockCopyInFlightReq
*req
)
53 QLIST_REMOVE(req
, list
);
54 qemu_co_queue_restart_all(&req
->wait_queue
);
57 void block_copy_state_free(BlockCopyState
*s
)
63 bdrv_release_dirty_bitmap(s
->source
->bs
, s
->copy_bitmap
);
67 BlockCopyState
*block_copy_state_new(BdrvChild
*source
, BdrvChild
*target
,
69 BdrvRequestFlags write_flags
, Error
**errp
)
72 BdrvDirtyBitmap
*copy_bitmap
;
73 uint32_t max_transfer
=
74 MIN_NON_ZERO(INT_MAX
, MIN_NON_ZERO(source
->bs
->bl
.max_transfer
,
75 target
->bs
->bl
.max_transfer
));
77 copy_bitmap
= bdrv_create_dirty_bitmap(source
->bs
, cluster_size
, NULL
,
82 bdrv_disable_dirty_bitmap(copy_bitmap
);
84 s
= g_new(BlockCopyState
, 1);
85 *s
= (BlockCopyState
) {
88 .copy_bitmap
= copy_bitmap
,
89 .cluster_size
= cluster_size
,
90 .len
= bdrv_dirty_bitmap_size(copy_bitmap
),
91 .write_flags
= write_flags
,
94 s
->copy_range_size
= QEMU_ALIGN_DOWN(max_transfer
, cluster_size
),
96 * Set use_copy_range, consider the following:
97 * 1. Compression is not supported for copy_range.
98 * 2. copy_range does not respect max_transfer (it's a TODO), so we factor
99 * that in here. If max_transfer is smaller than the job->cluster_size,
100 * we do not use copy_range (in that case it's zero after aligning down
104 !(write_flags
& BDRV_REQ_WRITE_COMPRESSED
) && s
->copy_range_size
> 0;
106 QLIST_INIT(&s
->inflight_reqs
);
111 void block_copy_set_callbacks(
113 ProgressBytesCallbackFunc progress_bytes_callback
,
114 ProgressResetCallbackFunc progress_reset_callback
,
115 void *progress_opaque
)
117 s
->progress_bytes_callback
= progress_bytes_callback
;
118 s
->progress_reset_callback
= progress_reset_callback
;
119 s
->progress_opaque
= progress_opaque
;
123 * Copy range to target with a bounce buffer and return the bytes copied. If
124 * error occurred, return a negative error number
126 static int coroutine_fn
block_copy_with_bounce_buffer(BlockCopyState
*s
,
130 void **bounce_buffer
)
135 assert(QEMU_IS_ALIGNED(start
, s
->cluster_size
));
136 bdrv_reset_dirty_bitmap(s
->copy_bitmap
, start
, s
->cluster_size
);
137 nbytes
= MIN(s
->cluster_size
, s
->len
- start
);
138 if (!*bounce_buffer
) {
139 *bounce_buffer
= qemu_blockalign(s
->source
->bs
, s
->cluster_size
);
142 ret
= bdrv_co_pread(s
->source
, start
, nbytes
, *bounce_buffer
, 0);
144 trace_block_copy_with_bounce_buffer_read_fail(s
, start
, ret
);
146 *error_is_read
= true;
151 ret
= bdrv_co_pwrite(s
->target
, start
, nbytes
, *bounce_buffer
,
154 trace_block_copy_with_bounce_buffer_write_fail(s
, start
, ret
);
156 *error_is_read
= false;
163 bdrv_set_dirty_bitmap(s
->copy_bitmap
, start
, s
->cluster_size
);
169 * Copy range to target and return the bytes copied. If error occurred, return a
170 * negative error number.
172 static int coroutine_fn
block_copy_with_offload(BlockCopyState
*s
,
180 assert(QEMU_IS_ALIGNED(s
->copy_range_size
, s
->cluster_size
));
181 assert(QEMU_IS_ALIGNED(start
, s
->cluster_size
));
182 nbytes
= MIN(s
->copy_range_size
, MIN(end
, s
->len
) - start
);
183 nr_clusters
= DIV_ROUND_UP(nbytes
, s
->cluster_size
);
184 bdrv_reset_dirty_bitmap(s
->copy_bitmap
, start
,
185 s
->cluster_size
* nr_clusters
);
186 ret
= bdrv_co_copy_range(s
->source
, start
, s
->target
, start
, nbytes
,
189 trace_block_copy_with_offload_fail(s
, start
, ret
);
190 bdrv_set_dirty_bitmap(s
->copy_bitmap
, start
,
191 s
->cluster_size
* nr_clusters
);
199 * Check if the cluster starting at offset is allocated or not.
200 * return via pnum the number of contiguous clusters sharing this allocation.
202 static int block_copy_is_cluster_allocated(BlockCopyState
*s
, int64_t offset
,
205 BlockDriverState
*bs
= s
->source
->bs
;
206 int64_t count
, total_count
= 0;
207 int64_t bytes
= s
->len
- offset
;
210 assert(QEMU_IS_ALIGNED(offset
, s
->cluster_size
));
213 ret
= bdrv_is_allocated(bs
, offset
, bytes
, &count
);
218 total_count
+= count
;
220 if (ret
|| count
== 0) {
222 * ret: partial segment(s) are considered allocated.
223 * otherwise: unallocated tail is treated as an entire segment.
225 *pnum
= DIV_ROUND_UP(total_count
, s
->cluster_size
);
229 /* Unallocated segment(s) with uncertain following segment(s) */
230 if (total_count
>= s
->cluster_size
) {
231 *pnum
= total_count
/ s
->cluster_size
;
241 * Reset bits in copy_bitmap starting at offset if they represent unallocated
242 * data in the image. May reset subsequent contiguous bits.
243 * @return 0 when the cluster at @offset was unallocated,
244 * 1 otherwise, and -ret on error.
246 int64_t block_copy_reset_unallocated(BlockCopyState
*s
,
247 int64_t offset
, int64_t *count
)
250 int64_t clusters
, bytes
;
252 ret
= block_copy_is_cluster_allocated(s
, offset
, &clusters
);
257 bytes
= clusters
* s
->cluster_size
;
260 bdrv_reset_dirty_bitmap(s
->copy_bitmap
, offset
, bytes
);
261 s
->progress_reset_callback(s
->progress_opaque
);
268 int coroutine_fn
block_copy(BlockCopyState
*s
,
269 int64_t start
, uint64_t bytes
,
273 int64_t end
= bytes
+ start
; /* bytes */
274 void *bounce_buffer
= NULL
;
275 int64_t status_bytes
;
276 BlockCopyInFlightReq req
;
279 * block_copy() user is responsible for keeping source and target in same
282 assert(bdrv_get_aio_context(s
->source
->bs
) ==
283 bdrv_get_aio_context(s
->target
->bs
));
285 assert(QEMU_IS_ALIGNED(start
, s
->cluster_size
));
286 assert(QEMU_IS_ALIGNED(end
, s
->cluster_size
));
288 block_copy_wait_inflight_reqs(s
, start
, bytes
);
289 block_copy_inflight_req_begin(s
, &req
, start
, end
);
291 while (start
< end
) {
294 if (!bdrv_dirty_bitmap_get(s
->copy_bitmap
, start
)) {
295 trace_block_copy_skip(s
, start
);
296 start
+= s
->cluster_size
;
297 continue; /* already copied */
300 dirty_end
= bdrv_dirty_bitmap_next_zero(s
->copy_bitmap
, start
,
306 if (s
->skip_unallocated
) {
307 ret
= block_copy_reset_unallocated(s
, start
, &status_bytes
);
309 trace_block_copy_skip_range(s
, start
, status_bytes
);
310 start
+= status_bytes
;
313 /* Clamp to known allocated region */
314 dirty_end
= MIN(dirty_end
, start
+ status_bytes
);
317 trace_block_copy_process(s
, start
);
319 if (s
->use_copy_range
) {
320 ret
= block_copy_with_offload(s
, start
, dirty_end
);
322 s
->use_copy_range
= false;
325 if (!s
->use_copy_range
) {
326 ret
= block_copy_with_bounce_buffer(s
, start
, dirty_end
,
327 error_is_read
, &bounce_buffer
);
334 s
->progress_bytes_callback(ret
, s
->progress_opaque
);
339 qemu_vfree(bounce_buffer
);
342 block_copy_inflight_req_end(&req
);