]>
Commit | Line | Data |
---|---|---|
beb5f545 VSO |
1 | /* |
2 | * block_copy API | |
3 | * | |
4 | * Copyright (C) 2013 Proxmox Server Solutions | |
5 | * Copyright (c) 2019 Virtuozzo International GmbH. | |
6 | * | |
7 | * Authors: | |
8 | * Dietmar Maurer (dietmar@proxmox.com) | |
9 | * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
12 | * See the COPYING file in the top-level directory. | |
13 | */ | |
14 | ||
15 | #include "qemu/osdep.h" | |
16 | ||
17 | #include "trace.h" | |
18 | #include "qapi/error.h" | |
19 | #include "block/block-copy.h" | |
20 | #include "sysemu/block-backend.h" | |
21 | ||
a6ffe199 VSO |
22 | static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s, |
23 | int64_t start, | |
24 | int64_t end) | |
25 | { | |
26 | BlockCopyInFlightReq *req; | |
27 | bool waited; | |
28 | ||
29 | do { | |
30 | waited = false; | |
31 | QLIST_FOREACH(req, &s->inflight_reqs, list) { | |
32 | if (end > req->start_byte && start < req->end_byte) { | |
33 | qemu_co_queue_wait(&req->wait_queue, NULL); | |
34 | waited = true; | |
35 | break; | |
36 | } | |
37 | } | |
38 | } while (waited); | |
39 | } | |
40 | ||
41 | static void block_copy_inflight_req_begin(BlockCopyState *s, | |
42 | BlockCopyInFlightReq *req, | |
43 | int64_t start, int64_t end) | |
44 | { | |
45 | req->start_byte = start; | |
46 | req->end_byte = end; | |
47 | qemu_co_queue_init(&req->wait_queue); | |
48 | QLIST_INSERT_HEAD(&s->inflight_reqs, req, list); | |
49 | } | |
50 | ||
51 | static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req) | |
52 | { | |
53 | QLIST_REMOVE(req, list); | |
54 | qemu_co_queue_restart_all(&req->wait_queue); | |
55 | } | |
56 | ||
beb5f545 VSO |
57 | void block_copy_state_free(BlockCopyState *s) |
58 | { | |
59 | if (!s) { | |
60 | return; | |
61 | } | |
62 | ||
63 | bdrv_release_dirty_bitmap(blk_bs(s->source), s->copy_bitmap); | |
64 | blk_unref(s->source); | |
65 | blk_unref(s->target); | |
66 | g_free(s); | |
67 | } | |
68 | ||
0f4b02b7 VSO |
69 | BlockCopyState *block_copy_state_new(BlockDriverState *source, |
70 | BlockDriverState *target, | |
71 | int64_t cluster_size, | |
72 | BdrvRequestFlags write_flags, Error **errp) | |
beb5f545 VSO |
73 | { |
74 | BlockCopyState *s; | |
75 | int ret; | |
76 | uint64_t no_resize = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | | |
77 | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD; | |
78 | BdrvDirtyBitmap *copy_bitmap; | |
79 | ||
80 | copy_bitmap = bdrv_create_dirty_bitmap(source, cluster_size, NULL, errp); | |
81 | if (!copy_bitmap) { | |
82 | return NULL; | |
83 | } | |
84 | bdrv_disable_dirty_bitmap(copy_bitmap); | |
85 | ||
86 | s = g_new(BlockCopyState, 1); | |
87 | *s = (BlockCopyState) { | |
88 | .source = blk_new(bdrv_get_aio_context(source), | |
89 | BLK_PERM_CONSISTENT_READ, no_resize), | |
90 | .target = blk_new(bdrv_get_aio_context(target), | |
91 | BLK_PERM_WRITE, no_resize), | |
92 | .copy_bitmap = copy_bitmap, | |
93 | .cluster_size = cluster_size, | |
94 | .len = bdrv_dirty_bitmap_size(copy_bitmap), | |
95 | .write_flags = write_flags, | |
beb5f545 VSO |
96 | }; |
97 | ||
98 | s->copy_range_size = QEMU_ALIGN_DOWN(MIN(blk_get_max_transfer(s->source), | |
99 | blk_get_max_transfer(s->target)), | |
100 | s->cluster_size); | |
101 | /* | |
102 | * Set use_copy_range, consider the following: | |
103 | * 1. Compression is not supported for copy_range. | |
104 | * 2. copy_range does not respect max_transfer (it's a TODO), so we factor | |
105 | * that in here. If max_transfer is smaller than the job->cluster_size, | |
106 | * we do not use copy_range (in that case it's zero after aligning down | |
107 | * above). | |
108 | */ | |
109 | s->use_copy_range = | |
110 | !(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0; | |
111 | ||
a6ffe199 VSO |
112 | QLIST_INIT(&s->inflight_reqs); |
113 | ||
beb5f545 VSO |
114 | /* |
115 | * We just allow aio context change on our block backends. block_copy() user | |
116 | * (now it's only backup) is responsible for source and target being in same | |
117 | * aio context. | |
118 | */ | |
119 | blk_set_disable_request_queuing(s->source, true); | |
120 | blk_set_allow_aio_context_change(s->source, true); | |
121 | blk_set_disable_request_queuing(s->target, true); | |
122 | blk_set_allow_aio_context_change(s->target, true); | |
123 | ||
124 | ret = blk_insert_bs(s->source, source, errp); | |
125 | if (ret < 0) { | |
126 | goto fail; | |
127 | } | |
128 | ||
129 | ret = blk_insert_bs(s->target, target, errp); | |
130 | if (ret < 0) { | |
131 | goto fail; | |
132 | } | |
133 | ||
134 | return s; | |
135 | ||
136 | fail: | |
137 | block_copy_state_free(s); | |
138 | ||
139 | return NULL; | |
140 | } | |
141 | ||
0f4b02b7 VSO |
142 | void block_copy_set_callbacks( |
143 | BlockCopyState *s, | |
144 | ProgressBytesCallbackFunc progress_bytes_callback, | |
145 | ProgressResetCallbackFunc progress_reset_callback, | |
146 | void *progress_opaque) | |
147 | { | |
148 | s->progress_bytes_callback = progress_bytes_callback; | |
149 | s->progress_reset_callback = progress_reset_callback; | |
150 | s->progress_opaque = progress_opaque; | |
151 | } | |
152 | ||
beb5f545 VSO |
153 | /* |
154 | * Copy range to target with a bounce buffer and return the bytes copied. If | |
155 | * error occurred, return a negative error number | |
156 | */ | |
157 | static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s, | |
158 | int64_t start, | |
159 | int64_t end, | |
160 | bool is_write_notifier, | |
161 | bool *error_is_read, | |
162 | void **bounce_buffer) | |
163 | { | |
164 | int ret; | |
165 | int nbytes; | |
166 | int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; | |
167 | ||
168 | assert(QEMU_IS_ALIGNED(start, s->cluster_size)); | |
169 | bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); | |
170 | nbytes = MIN(s->cluster_size, s->len - start); | |
171 | if (!*bounce_buffer) { | |
172 | *bounce_buffer = blk_blockalign(s->source, s->cluster_size); | |
173 | } | |
174 | ||
175 | ret = blk_co_pread(s->source, start, nbytes, *bounce_buffer, read_flags); | |
176 | if (ret < 0) { | |
177 | trace_block_copy_with_bounce_buffer_read_fail(s, start, ret); | |
178 | if (error_is_read) { | |
179 | *error_is_read = true; | |
180 | } | |
181 | goto fail; | |
182 | } | |
183 | ||
184 | ret = blk_co_pwrite(s->target, start, nbytes, *bounce_buffer, | |
185 | s->write_flags); | |
186 | if (ret < 0) { | |
187 | trace_block_copy_with_bounce_buffer_write_fail(s, start, ret); | |
188 | if (error_is_read) { | |
189 | *error_is_read = false; | |
190 | } | |
191 | goto fail; | |
192 | } | |
193 | ||
194 | return nbytes; | |
195 | fail: | |
196 | bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); | |
197 | return ret; | |
198 | ||
199 | } | |
200 | ||
201 | /* | |
202 | * Copy range to target and return the bytes copied. If error occurred, return a | |
203 | * negative error number. | |
204 | */ | |
205 | static int coroutine_fn block_copy_with_offload(BlockCopyState *s, | |
206 | int64_t start, | |
207 | int64_t end, | |
208 | bool is_write_notifier) | |
209 | { | |
210 | int ret; | |
211 | int nr_clusters; | |
212 | int nbytes; | |
213 | int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; | |
214 | ||
215 | assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size)); | |
216 | assert(QEMU_IS_ALIGNED(start, s->cluster_size)); | |
217 | nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start); | |
218 | nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size); | |
219 | bdrv_reset_dirty_bitmap(s->copy_bitmap, start, | |
220 | s->cluster_size * nr_clusters); | |
221 | ret = blk_co_copy_range(s->source, start, s->target, start, nbytes, | |
222 | read_flags, s->write_flags); | |
223 | if (ret < 0) { | |
224 | trace_block_copy_with_offload_fail(s, start, ret); | |
225 | bdrv_set_dirty_bitmap(s->copy_bitmap, start, | |
226 | s->cluster_size * nr_clusters); | |
227 | return ret; | |
228 | } | |
229 | ||
230 | return nbytes; | |
231 | } | |
232 | ||
233 | /* | |
234 | * Check if the cluster starting at offset is allocated or not. | |
235 | * return via pnum the number of contiguous clusters sharing this allocation. | |
236 | */ | |
237 | static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, | |
238 | int64_t *pnum) | |
239 | { | |
240 | BlockDriverState *bs = blk_bs(s->source); | |
241 | int64_t count, total_count = 0; | |
242 | int64_t bytes = s->len - offset; | |
243 | int ret; | |
244 | ||
245 | assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); | |
246 | ||
247 | while (true) { | |
248 | ret = bdrv_is_allocated(bs, offset, bytes, &count); | |
249 | if (ret < 0) { | |
250 | return ret; | |
251 | } | |
252 | ||
253 | total_count += count; | |
254 | ||
255 | if (ret || count == 0) { | |
256 | /* | |
257 | * ret: partial segment(s) are considered allocated. | |
258 | * otherwise: unallocated tail is treated as an entire segment. | |
259 | */ | |
260 | *pnum = DIV_ROUND_UP(total_count, s->cluster_size); | |
261 | return ret; | |
262 | } | |
263 | ||
264 | /* Unallocated segment(s) with uncertain following segment(s) */ | |
265 | if (total_count >= s->cluster_size) { | |
266 | *pnum = total_count / s->cluster_size; | |
267 | return 0; | |
268 | } | |
269 | ||
270 | offset += count; | |
271 | bytes -= count; | |
272 | } | |
273 | } | |
274 | ||
275 | /* | |
276 | * Reset bits in copy_bitmap starting at offset if they represent unallocated | |
277 | * data in the image. May reset subsequent contiguous bits. | |
278 | * @return 0 when the cluster at @offset was unallocated, | |
279 | * 1 otherwise, and -ret on error. | |
280 | */ | |
281 | int64_t block_copy_reset_unallocated(BlockCopyState *s, | |
282 | int64_t offset, int64_t *count) | |
283 | { | |
284 | int ret; | |
285 | int64_t clusters, bytes; | |
286 | ||
287 | ret = block_copy_is_cluster_allocated(s, offset, &clusters); | |
288 | if (ret < 0) { | |
289 | return ret; | |
290 | } | |
291 | ||
292 | bytes = clusters * s->cluster_size; | |
293 | ||
294 | if (!ret) { | |
295 | bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); | |
296 | s->progress_reset_callback(s->progress_opaque); | |
297 | } | |
298 | ||
299 | *count = bytes; | |
300 | return ret; | |
301 | } | |
302 | ||
303 | int coroutine_fn block_copy(BlockCopyState *s, | |
304 | int64_t start, uint64_t bytes, | |
305 | bool *error_is_read, | |
306 | bool is_write_notifier) | |
307 | { | |
308 | int ret = 0; | |
309 | int64_t end = bytes + start; /* bytes */ | |
310 | void *bounce_buffer = NULL; | |
311 | int64_t status_bytes; | |
a6ffe199 | 312 | BlockCopyInFlightReq req; |
beb5f545 VSO |
313 | |
314 | /* | |
315 | * block_copy() user is responsible for keeping source and target in same | |
316 | * aio context | |
317 | */ | |
318 | assert(blk_get_aio_context(s->source) == blk_get_aio_context(s->target)); | |
319 | ||
320 | assert(QEMU_IS_ALIGNED(start, s->cluster_size)); | |
321 | assert(QEMU_IS_ALIGNED(end, s->cluster_size)); | |
322 | ||
a6ffe199 VSO |
323 | block_copy_wait_inflight_reqs(s, start, bytes); |
324 | block_copy_inflight_req_begin(s, &req, start, end); | |
325 | ||
beb5f545 VSO |
326 | while (start < end) { |
327 | int64_t dirty_end; | |
328 | ||
329 | if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) { | |
330 | trace_block_copy_skip(s, start); | |
331 | start += s->cluster_size; | |
332 | continue; /* already copied */ | |
333 | } | |
334 | ||
335 | dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start, | |
336 | (end - start)); | |
337 | if (dirty_end < 0) { | |
338 | dirty_end = end; | |
339 | } | |
340 | ||
341 | if (s->skip_unallocated) { | |
342 | ret = block_copy_reset_unallocated(s, start, &status_bytes); | |
343 | if (ret == 0) { | |
344 | trace_block_copy_skip_range(s, start, status_bytes); | |
345 | start += status_bytes; | |
346 | continue; | |
347 | } | |
348 | /* Clamp to known allocated region */ | |
349 | dirty_end = MIN(dirty_end, start + status_bytes); | |
350 | } | |
351 | ||
352 | trace_block_copy_process(s, start); | |
353 | ||
354 | if (s->use_copy_range) { | |
355 | ret = block_copy_with_offload(s, start, dirty_end, | |
356 | is_write_notifier); | |
357 | if (ret < 0) { | |
358 | s->use_copy_range = false; | |
359 | } | |
360 | } | |
361 | if (!s->use_copy_range) { | |
362 | ret = block_copy_with_bounce_buffer(s, start, dirty_end, | |
363 | is_write_notifier, | |
364 | error_is_read, &bounce_buffer); | |
365 | } | |
366 | if (ret < 0) { | |
367 | break; | |
368 | } | |
369 | ||
370 | start += ret; | |
371 | s->progress_bytes_callback(ret, s->progress_opaque); | |
372 | ret = 0; | |
373 | } | |
374 | ||
375 | if (bounce_buffer) { | |
376 | qemu_vfree(bounce_buffer); | |
377 | } | |
378 | ||
a6ffe199 VSO |
379 | block_copy_inflight_req_end(&req); |
380 | ||
beb5f545 VSO |
381 | return ret; |
382 | } |