]> git.proxmox.com Git - mirror_qemu.git/blob - block/block-copy.c
block/block-copy: rename start to offset in interfaces
[mirror_qemu.git] / block / block-copy.c
1 /*
2 * block_copy API
3 *
4 * Copyright (C) 2013 Proxmox Server Solutions
5 * Copyright (c) 2019 Virtuozzo International GmbH.
6 *
7 * Authors:
8 * Dietmar Maurer (dietmar@proxmox.com)
9 * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 */
14
15 #include "qemu/osdep.h"
16
17 #include "trace.h"
18 #include "qapi/error.h"
19 #include "block/block-copy.h"
20 #include "sysemu/block-backend.h"
21 #include "qemu/units.h"
22
23 #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
24 #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
25 #define BLOCK_COPY_MAX_MEM (128 * MiB)
26
27 static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
28 int64_t offset,
29 int64_t bytes)
30 {
31 BlockCopyInFlightReq *req;
32
33 QLIST_FOREACH(req, &s->inflight_reqs, list) {
34 if (offset + bytes > req->offset && offset < req->offset + req->bytes) {
35 return req;
36 }
37 }
38
39 return NULL;
40 }
41
42 static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
43 int64_t offset,
44 int64_t bytes)
45 {
46 BlockCopyInFlightReq *req;
47
48 while ((req = find_conflicting_inflight_req(s, offset, bytes))) {
49 qemu_co_queue_wait(&req->wait_queue, NULL);
50 }
51 }
52
53 static void block_copy_inflight_req_begin(BlockCopyState *s,
54 BlockCopyInFlightReq *req,
55 int64_t offset, int64_t bytes)
56 {
57 req->offset = offset;
58 req->bytes = bytes;
59 qemu_co_queue_init(&req->wait_queue);
60 QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
61 }
62
63 static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
64 {
65 QLIST_REMOVE(req, list);
66 qemu_co_queue_restart_all(&req->wait_queue);
67 }
68
69 void block_copy_state_free(BlockCopyState *s)
70 {
71 if (!s) {
72 return;
73 }
74
75 bdrv_release_dirty_bitmap(s->copy_bitmap);
76 shres_destroy(s->mem);
77 g_free(s);
78 }
79
80 static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
81 {
82 return MIN_NON_ZERO(INT_MAX,
83 MIN_NON_ZERO(source->bs->bl.max_transfer,
84 target->bs->bl.max_transfer));
85 }
86
87 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
88 int64_t cluster_size,
89 BdrvRequestFlags write_flags, Error **errp)
90 {
91 BlockCopyState *s;
92 BdrvDirtyBitmap *copy_bitmap;
93
94 copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
95 errp);
96 if (!copy_bitmap) {
97 return NULL;
98 }
99 bdrv_disable_dirty_bitmap(copy_bitmap);
100
101 s = g_new(BlockCopyState, 1);
102 *s = (BlockCopyState) {
103 .source = source,
104 .target = target,
105 .copy_bitmap = copy_bitmap,
106 .cluster_size = cluster_size,
107 .len = bdrv_dirty_bitmap_size(copy_bitmap),
108 .write_flags = write_flags,
109 .mem = shres_create(BLOCK_COPY_MAX_MEM),
110 };
111
112 if (block_copy_max_transfer(source, target) < cluster_size) {
113 /*
114 * copy_range does not respect max_transfer. We don't want to bother
115 * with requests smaller than block-copy cluster size, so fallback to
116 * buffered copying (read and write respect max_transfer on their
117 * behalf).
118 */
119 s->use_copy_range = false;
120 s->copy_size = cluster_size;
121 } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
122 /* Compression supports only cluster-size writes and no copy-range. */
123 s->use_copy_range = false;
124 s->copy_size = cluster_size;
125 } else {
126 /*
127 * We enable copy-range, but keep small copy_size, until first
128 * successful copy_range (look at block_copy_do_copy).
129 */
130 s->use_copy_range = true;
131 s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
132 }
133
134 QLIST_INIT(&s->inflight_reqs);
135
136 return s;
137 }
138
139 void block_copy_set_progress_callback(
140 BlockCopyState *s,
141 ProgressBytesCallbackFunc progress_bytes_callback,
142 void *progress_opaque)
143 {
144 s->progress_bytes_callback = progress_bytes_callback;
145 s->progress_opaque = progress_opaque;
146 }
147
148 void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
149 {
150 s->progress = pm;
151 }
152
153 /*
154 * block_copy_do_copy
155 *
156 * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
157 * s->len only to cover last cluster when s->len is not aligned to clusters.
158 *
159 * No sync here: nor bitmap neighter intersecting requests handling, only copy.
160 *
161 * Returns 0 on success.
162 */
163 static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
164 int64_t offset, int64_t bytes,
165 bool zeroes, bool *error_is_read)
166 {
167 int ret;
168 int64_t nbytes = MIN(offset + bytes, s->len) - offset;
169 void *bounce_buffer = NULL;
170
171 assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
172 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
173 assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
174 assert(offset < s->len);
175 assert(offset + bytes <= s->len ||
176 offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
177 assert(nbytes < INT_MAX);
178
179 if (zeroes) {
180 ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
181 ~BDRV_REQ_WRITE_COMPRESSED);
182 if (ret < 0) {
183 trace_block_copy_write_zeroes_fail(s, offset, ret);
184 if (error_is_read) {
185 *error_is_read = false;
186 }
187 }
188 return ret;
189 }
190
191 if (s->use_copy_range) {
192 ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
193 0, s->write_flags);
194 if (ret < 0) {
195 trace_block_copy_copy_range_fail(s, offset, ret);
196 s->use_copy_range = false;
197 s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
198 /* Fallback to read+write with allocated buffer */
199 } else {
200 if (s->use_copy_range) {
201 /*
202 * Successful copy-range. Now increase copy_size. copy_range
203 * does not respect max_transfer (it's a TODO), so we factor
204 * that in here.
205 *
206 * Note: we double-check s->use_copy_range for the case when
207 * parallel block-copy request unsets it during previous
208 * bdrv_co_copy_range call.
209 */
210 s->copy_size =
211 MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
212 QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
213 s->target),
214 s->cluster_size));
215 }
216 goto out;
217 }
218 }
219
220 /*
221 * In case of failed copy_range request above, we may proceed with buffered
222 * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
223 * be properly limited, so don't care too much. Moreover the most likely
224 * case (copy_range is unsupported for the configuration, so the very first
225 * copy_range request fails) is handled by setting large copy_size only
226 * after first successful copy_range.
227 */
228
229 bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
230
231 ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
232 if (ret < 0) {
233 trace_block_copy_read_fail(s, offset, ret);
234 if (error_is_read) {
235 *error_is_read = true;
236 }
237 goto out;
238 }
239
240 ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
241 s->write_flags);
242 if (ret < 0) {
243 trace_block_copy_write_fail(s, offset, ret);
244 if (error_is_read) {
245 *error_is_read = false;
246 }
247 goto out;
248 }
249
250 out:
251 qemu_vfree(bounce_buffer);
252
253 return ret;
254 }
255
256 static int block_copy_block_status(BlockCopyState *s, int64_t offset,
257 int64_t bytes, int64_t *pnum)
258 {
259 int64_t num;
260 BlockDriverState *base;
261 int ret;
262
263 if (s->skip_unallocated && s->source->bs->backing) {
264 base = s->source->bs->backing->bs;
265 } else {
266 base = NULL;
267 }
268
269 ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
270 NULL, NULL);
271 if (ret < 0 || num < s->cluster_size) {
272 /*
273 * On error or if failed to obtain large enough chunk just fallback to
274 * copy one cluster.
275 */
276 num = s->cluster_size;
277 ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
278 } else if (offset + num == s->len) {
279 num = QEMU_ALIGN_UP(num, s->cluster_size);
280 } else {
281 num = QEMU_ALIGN_DOWN(num, s->cluster_size);
282 }
283
284 *pnum = num;
285 return ret;
286 }
287
288 /*
289 * Check if the cluster starting at offset is allocated or not.
290 * return via pnum the number of contiguous clusters sharing this allocation.
291 */
292 static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
293 int64_t *pnum)
294 {
295 BlockDriverState *bs = s->source->bs;
296 int64_t count, total_count = 0;
297 int64_t bytes = s->len - offset;
298 int ret;
299
300 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
301
302 while (true) {
303 ret = bdrv_is_allocated(bs, offset, bytes, &count);
304 if (ret < 0) {
305 return ret;
306 }
307
308 total_count += count;
309
310 if (ret || count == 0) {
311 /*
312 * ret: partial segment(s) are considered allocated.
313 * otherwise: unallocated tail is treated as an entire segment.
314 */
315 *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
316 return ret;
317 }
318
319 /* Unallocated segment(s) with uncertain following segment(s) */
320 if (total_count >= s->cluster_size) {
321 *pnum = total_count / s->cluster_size;
322 return 0;
323 }
324
325 offset += count;
326 bytes -= count;
327 }
328 }
329
330 /*
331 * Reset bits in copy_bitmap starting at offset if they represent unallocated
332 * data in the image. May reset subsequent contiguous bits.
333 * @return 0 when the cluster at @offset was unallocated,
334 * 1 otherwise, and -ret on error.
335 */
336 int64_t block_copy_reset_unallocated(BlockCopyState *s,
337 int64_t offset, int64_t *count)
338 {
339 int ret;
340 int64_t clusters, bytes;
341
342 ret = block_copy_is_cluster_allocated(s, offset, &clusters);
343 if (ret < 0) {
344 return ret;
345 }
346
347 bytes = clusters * s->cluster_size;
348
349 if (!ret) {
350 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
351 progress_set_remaining(s->progress,
352 bdrv_get_dirty_count(s->copy_bitmap) +
353 s->in_flight_bytes);
354 }
355
356 *count = bytes;
357 return ret;
358 }
359
360 int coroutine_fn block_copy(BlockCopyState *s,
361 int64_t offset, int64_t bytes,
362 bool *error_is_read)
363 {
364 int ret = 0;
365 BlockCopyInFlightReq req;
366
367 /*
368 * block_copy() user is responsible for keeping source and target in same
369 * aio context
370 */
371 assert(bdrv_get_aio_context(s->source->bs) ==
372 bdrv_get_aio_context(s->target->bs));
373
374 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
375 assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
376
377 block_copy_wait_inflight_reqs(s, offset, bytes);
378 block_copy_inflight_req_begin(s, &req, offset, bytes);
379
380 while (bytes) {
381 int64_t next_zero, cur_bytes, status_bytes;
382
383 if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
384 trace_block_copy_skip(s, offset);
385 offset += s->cluster_size;
386 bytes -= s->cluster_size;
387 continue; /* already copied */
388 }
389
390 cur_bytes = MIN(bytes, s->copy_size);
391
392 next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
393 cur_bytes);
394 if (next_zero >= 0) {
395 assert(next_zero > offset); /* offset is dirty */
396 assert(next_zero < offset + cur_bytes); /* no need to do MIN() */
397 cur_bytes = next_zero - offset;
398 }
399
400 ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
401 if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
402 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, status_bytes);
403 progress_set_remaining(s->progress,
404 bdrv_get_dirty_count(s->copy_bitmap) +
405 s->in_flight_bytes);
406 trace_block_copy_skip_range(s, offset, status_bytes);
407 offset += status_bytes;
408 bytes -= status_bytes;
409 continue;
410 }
411
412 cur_bytes = MIN(cur_bytes, status_bytes);
413
414 trace_block_copy_process(s, offset);
415
416 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
417 s->in_flight_bytes += cur_bytes;
418
419 co_get_from_shres(s->mem, cur_bytes);
420 ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
421 error_is_read);
422 co_put_to_shres(s->mem, cur_bytes);
423 s->in_flight_bytes -= cur_bytes;
424 if (ret < 0) {
425 bdrv_set_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
426 break;
427 }
428
429 progress_work_done(s->progress, cur_bytes);
430 s->progress_bytes_callback(cur_bytes, s->progress_opaque);
431 offset += cur_bytes;
432 bytes -= cur_bytes;
433 }
434
435 block_copy_inflight_req_end(&req);
436
437 return ret;
438 }