]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Live block commit | |
3 | * | |
4 | * Copyright Red Hat, Inc. 2012 | |
5 | * | |
6 | * Authors: | |
7 | * Jeff Cody <jcody@redhat.com> | |
8 | * Based on stream.c by Stefan Hajnoczi | |
9 | * | |
10 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | |
11 | * See the COPYING.LIB file in the top-level directory. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "qemu/osdep.h" | |
16 | #include "qemu/cutils.h" | |
17 | #include "trace.h" | |
18 | #include "block/block_int.h" | |
19 | #include "block/blockjob_int.h" | |
20 | #include "qapi/error.h" | |
21 | #include "qapi/qmp/qerror.h" | |
22 | #include "qemu/ratelimit.h" | |
23 | #include "sysemu/block-backend.h" | |
24 | ||
25 | enum { | |
26 | /* | |
27 | * Size of data buffer for populating the image file. This should be large | |
28 | * enough to process multiple clusters in a single call, so that populating | |
29 | * contiguous regions of the image is efficient. | |
30 | */ | |
31 | COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ | |
32 | }; | |
33 | ||
34 | typedef struct CommitBlockJob { | |
35 | BlockJob common; | |
36 | BlockDriverState *commit_top_bs; | |
37 | BlockBackend *top; | |
38 | BlockBackend *base; | |
39 | BlockDriverState *base_bs; | |
40 | BlockdevOnError on_error; | |
41 | bool base_read_only; | |
42 | bool chain_frozen; | |
43 | char *backing_file_str; | |
44 | } CommitBlockJob; | |
45 | ||
46 | static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, | |
47 | int64_t offset, uint64_t bytes, | |
48 | void *buf) | |
49 | { | |
50 | int ret = 0; | |
51 | ||
52 | assert(bytes < SIZE_MAX); | |
53 | ||
54 | ret = blk_co_pread(bs, offset, bytes, buf, 0); | |
55 | if (ret < 0) { | |
56 | return ret; | |
57 | } | |
58 | ||
59 | ret = blk_co_pwrite(base, offset, bytes, buf, 0); | |
60 | if (ret < 0) { | |
61 | return ret; | |
62 | } | |
63 | ||
64 | return 0; | |
65 | } | |
66 | ||
67 | static int commit_prepare(Job *job) | |
68 | { | |
69 | CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); | |
70 | ||
71 | bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs); | |
72 | s->chain_frozen = false; | |
73 | ||
74 | /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before | |
75 | * the normal backing chain can be restored. */ | |
76 | blk_unref(s->base); | |
77 | s->base = NULL; | |
78 | ||
79 | /* FIXME: bdrv_drop_intermediate treats total failures and partial failures | |
80 | * identically. Further work is needed to disambiguate these cases. */ | |
81 | return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs, | |
82 | s->backing_file_str); | |
83 | } | |
84 | ||
85 | static void commit_abort(Job *job) | |
86 | { | |
87 | CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); | |
88 | BlockDriverState *top_bs = blk_bs(s->top); | |
89 | ||
90 | if (s->chain_frozen) { | |
91 | bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs); | |
92 | } | |
93 | ||
94 | /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ | |
95 | bdrv_ref(top_bs); | |
96 | bdrv_ref(s->commit_top_bs); | |
97 | ||
98 | if (s->base) { | |
99 | blk_unref(s->base); | |
100 | } | |
101 | ||
102 | /* free the blockers on the intermediate nodes so that bdrv_replace_nodes | |
103 | * can succeed */ | |
104 | block_job_remove_all_bdrv(&s->common); | |
105 | ||
106 | /* If bdrv_drop_intermediate() failed (or was not invoked), remove the | |
107 | * commit filter driver from the backing chain now. Do this as the final | |
108 | * step so that the 'consistent read' permission can be granted. | |
109 | * | |
110 | * XXX Can (or should) we somehow keep 'consistent read' blocked even | |
111 | * after the failed/cancelled commit job is gone? If we already wrote | |
112 | * something to base, the intermediate images aren't valid any more. */ | |
113 | bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs), | |
114 | &error_abort); | |
115 | ||
116 | bdrv_unref(s->commit_top_bs); | |
117 | bdrv_unref(top_bs); | |
118 | } | |
119 | ||
120 | static void commit_clean(Job *job) | |
121 | { | |
122 | CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); | |
123 | ||
124 | /* restore base open flags here if appropriate (e.g., change the base back | |
125 | * to r/o). These reopens do not need to be atomic, since we won't abort | |
126 | * even on failure here */ | |
127 | if (s->base_read_only) { | |
128 | bdrv_reopen_set_read_only(s->base_bs, true, NULL); | |
129 | } | |
130 | ||
131 | g_free(s->backing_file_str); | |
132 | blk_unref(s->top); | |
133 | } | |
134 | ||
135 | static int coroutine_fn commit_run(Job *job, Error **errp) | |
136 | { | |
137 | CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); | |
138 | int64_t offset; | |
139 | uint64_t delay_ns = 0; | |
140 | int ret = 0; | |
141 | int64_t n = 0; /* bytes */ | |
142 | void *buf = NULL; | |
143 | int bytes_written = 0; | |
144 | int64_t len, base_len; | |
145 | ||
146 | ret = len = blk_getlength(s->top); | |
147 | if (len < 0) { | |
148 | goto out; | |
149 | } | |
150 | job_progress_set_remaining(&s->common.job, len); | |
151 | ||
152 | ret = base_len = blk_getlength(s->base); | |
153 | if (base_len < 0) { | |
154 | goto out; | |
155 | } | |
156 | ||
157 | if (base_len < len) { | |
158 | ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL); | |
159 | if (ret) { | |
160 | goto out; | |
161 | } | |
162 | } | |
163 | ||
164 | buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); | |
165 | ||
166 | for (offset = 0; offset < len; offset += n) { | |
167 | bool copy; | |
168 | ||
169 | /* Note that even when no rate limit is applied we need to yield | |
170 | * with no pending I/O here so that bdrv_drain_all() returns. | |
171 | */ | |
172 | job_sleep_ns(&s->common.job, delay_ns); | |
173 | if (job_is_cancelled(&s->common.job)) { | |
174 | break; | |
175 | } | |
176 | /* Copy if allocated above the base */ | |
177 | ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), false, | |
178 | offset, COMMIT_BUFFER_SIZE, &n); | |
179 | copy = (ret == 1); | |
180 | trace_commit_one_iteration(s, offset, n, ret); | |
181 | if (copy) { | |
182 | ret = commit_populate(s->top, s->base, offset, n, buf); | |
183 | bytes_written += n; | |
184 | } | |
185 | if (ret < 0) { | |
186 | BlockErrorAction action = | |
187 | block_job_error_action(&s->common, false, s->on_error, -ret); | |
188 | if (action == BLOCK_ERROR_ACTION_REPORT) { | |
189 | goto out; | |
190 | } else { | |
191 | n = 0; | |
192 | continue; | |
193 | } | |
194 | } | |
195 | /* Publish progress */ | |
196 | job_progress_update(&s->common.job, n); | |
197 | ||
198 | if (copy) { | |
199 | delay_ns = block_job_ratelimit_get_delay(&s->common, n); | |
200 | } else { | |
201 | delay_ns = 0; | |
202 | } | |
203 | } | |
204 | ||
205 | ret = 0; | |
206 | ||
207 | out: | |
208 | qemu_vfree(buf); | |
209 | ||
210 | return ret; | |
211 | } | |
212 | ||
213 | static const BlockJobDriver commit_job_driver = { | |
214 | .job_driver = { | |
215 | .instance_size = sizeof(CommitBlockJob), | |
216 | .job_type = JOB_TYPE_COMMIT, | |
217 | .free = block_job_free, | |
218 | .user_resume = block_job_user_resume, | |
219 | .drain = block_job_drain, | |
220 | .run = commit_run, | |
221 | .prepare = commit_prepare, | |
222 | .abort = commit_abort, | |
223 | .clean = commit_clean | |
224 | }, | |
225 | }; | |
226 | ||
227 | static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, | |
228 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) | |
229 | { | |
230 | return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); | |
231 | } | |
232 | ||
233 | static void bdrv_commit_top_refresh_filename(BlockDriverState *bs) | |
234 | { | |
235 | pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), | |
236 | bs->backing->bs->filename); | |
237 | } | |
238 | ||
239 | static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, | |
240 | const BdrvChildRole *role, | |
241 | BlockReopenQueue *reopen_queue, | |
242 | uint64_t perm, uint64_t shared, | |
243 | uint64_t *nperm, uint64_t *nshared) | |
244 | { | |
245 | *nperm = 0; | |
246 | *nshared = BLK_PERM_ALL; | |
247 | } | |
248 | ||
249 | /* Dummy node that provides consistent read to its users without requiring it | |
250 | * from its backing file and that allows writes on the backing file chain. */ | |
251 | static BlockDriver bdrv_commit_top = { | |
252 | .format_name = "commit_top", | |
253 | .bdrv_co_preadv = bdrv_commit_top_preadv, | |
254 | .bdrv_co_block_status = bdrv_co_block_status_from_backing, | |
255 | .bdrv_refresh_filename = bdrv_commit_top_refresh_filename, | |
256 | .bdrv_child_perm = bdrv_commit_top_child_perm, | |
257 | }; | |
258 | ||
259 | void commit_start(const char *job_id, BlockDriverState *bs, | |
260 | BlockDriverState *base, BlockDriverState *top, | |
261 | int creation_flags, int64_t speed, | |
262 | BlockdevOnError on_error, const char *backing_file_str, | |
263 | const char *filter_node_name, Error **errp) | |
264 | { | |
265 | CommitBlockJob *s; | |
266 | BlockDriverState *iter; | |
267 | BlockDriverState *commit_top_bs = NULL; | |
268 | Error *local_err = NULL; | |
269 | int ret; | |
270 | ||
271 | assert(top != bs); | |
272 | if (top == base) { | |
273 | error_setg(errp, "Invalid files for merge: top and base are the same"); | |
274 | return; | |
275 | } | |
276 | ||
277 | s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL, | |
278 | speed, creation_flags, NULL, NULL, errp); | |
279 | if (!s) { | |
280 | return; | |
281 | } | |
282 | ||
283 | /* convert base to r/w, if necessary */ | |
284 | s->base_read_only = bdrv_is_read_only(base); | |
285 | if (s->base_read_only) { | |
286 | if (bdrv_reopen_set_read_only(base, false, errp) != 0) { | |
287 | goto fail; | |
288 | } | |
289 | } | |
290 | ||
291 | /* Insert commit_top block node above top, so we can block consistent read | |
292 | * on the backing chain below it */ | |
293 | commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0, | |
294 | errp); | |
295 | if (commit_top_bs == NULL) { | |
296 | goto fail; | |
297 | } | |
298 | if (!filter_node_name) { | |
299 | commit_top_bs->implicit = true; | |
300 | } | |
301 | commit_top_bs->total_sectors = top->total_sectors; | |
302 | ||
303 | bdrv_append(commit_top_bs, top, &local_err); | |
304 | if (local_err) { | |
305 | commit_top_bs = NULL; | |
306 | error_propagate(errp, local_err); | |
307 | goto fail; | |
308 | } | |
309 | ||
310 | s->commit_top_bs = commit_top_bs; | |
311 | ||
312 | /* Block all nodes between top and base, because they will | |
313 | * disappear from the chain after this operation. */ | |
314 | assert(bdrv_chain_contains(top, base)); | |
315 | for (iter = top; iter != base; iter = backing_bs(iter)) { | |
316 | /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves | |
317 | * at s->base (if writes are blocked for a node, they are also blocked | |
318 | * for its backing file). The other options would be a second filter | |
319 | * driver above s->base. */ | |
320 | ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, | |
321 | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, | |
322 | errp); | |
323 | if (ret < 0) { | |
324 | goto fail; | |
325 | } | |
326 | } | |
327 | ||
328 | if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { | |
329 | goto fail; | |
330 | } | |
331 | s->chain_frozen = true; | |
332 | ||
333 | ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); | |
334 | if (ret < 0) { | |
335 | goto fail; | |
336 | } | |
337 | ||
338 | s->base = blk_new(s->common.job.aio_context, | |
339 | BLK_PERM_CONSISTENT_READ | |
340 | | BLK_PERM_WRITE | |
341 | | BLK_PERM_RESIZE, | |
342 | BLK_PERM_CONSISTENT_READ | |
343 | | BLK_PERM_GRAPH_MOD | |
344 | | BLK_PERM_WRITE_UNCHANGED); | |
345 | ret = blk_insert_bs(s->base, base, errp); | |
346 | if (ret < 0) { | |
347 | goto fail; | |
348 | } | |
349 | s->base_bs = base; | |
350 | ||
351 | /* Required permissions are already taken with block_job_add_bdrv() */ | |
352 | s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL); | |
353 | ret = blk_insert_bs(s->top, top, errp); | |
354 | if (ret < 0) { | |
355 | goto fail; | |
356 | } | |
357 | ||
358 | s->backing_file_str = g_strdup(backing_file_str); | |
359 | s->on_error = on_error; | |
360 | ||
361 | trace_commit_start(bs, base, top, s); | |
362 | job_start(&s->common.job); | |
363 | return; | |
364 | ||
365 | fail: | |
366 | if (s->chain_frozen) { | |
367 | bdrv_unfreeze_backing_chain(commit_top_bs, base); | |
368 | } | |
369 | if (s->base) { | |
370 | blk_unref(s->base); | |
371 | } | |
372 | if (s->top) { | |
373 | blk_unref(s->top); | |
374 | } | |
375 | if (s->base_read_only) { | |
376 | bdrv_reopen_set_read_only(base, true, NULL); | |
377 | } | |
378 | job_early_fail(&s->common.job); | |
379 | /* commit_top_bs has to be replaced after deleting the block job, | |
380 | * otherwise this would fail because of lack of permissions. */ | |
381 | if (commit_top_bs) { | |
382 | bdrv_replace_node(commit_top_bs, top, &error_abort); | |
383 | } | |
384 | } | |
385 | ||
386 | ||
387 | #define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE) | |
388 | ||
389 | /* commit COW file into the raw image */ | |
390 | int bdrv_commit(BlockDriverState *bs) | |
391 | { | |
392 | BlockBackend *src, *backing; | |
393 | BlockDriverState *backing_file_bs = NULL; | |
394 | BlockDriverState *commit_top_bs = NULL; | |
395 | BlockDriver *drv = bs->drv; | |
396 | AioContext *ctx; | |
397 | int64_t offset, length, backing_length; | |
398 | int ro; | |
399 | int64_t n; | |
400 | int ret = 0; | |
401 | uint8_t *buf = NULL; | |
402 | Error *local_err = NULL; | |
403 | ||
404 | if (!drv) | |
405 | return -ENOMEDIUM; | |
406 | ||
407 | if (!bs->backing) { | |
408 | return -ENOTSUP; | |
409 | } | |
410 | ||
411 | if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || | |
412 | bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { | |
413 | return -EBUSY; | |
414 | } | |
415 | ||
416 | ro = bs->backing->bs->read_only; | |
417 | ||
418 | if (ro) { | |
419 | if (bdrv_reopen_set_read_only(bs->backing->bs, false, NULL)) { | |
420 | return -EACCES; | |
421 | } | |
422 | } | |
423 | ||
424 | ctx = bdrv_get_aio_context(bs); | |
425 | src = blk_new(ctx, BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); | |
426 | backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); | |
427 | ||
428 | ret = blk_insert_bs(src, bs, &local_err); | |
429 | if (ret < 0) { | |
430 | error_report_err(local_err); | |
431 | goto ro_cleanup; | |
432 | } | |
433 | ||
434 | /* Insert commit_top block node above backing, so we can write to it */ | |
435 | backing_file_bs = backing_bs(bs); | |
436 | ||
437 | commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, | |
438 | &local_err); | |
439 | if (commit_top_bs == NULL) { | |
440 | error_report_err(local_err); | |
441 | goto ro_cleanup; | |
442 | } | |
443 | ||
444 | bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); | |
445 | bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); | |
446 | ||
447 | ret = blk_insert_bs(backing, backing_file_bs, &local_err); | |
448 | if (ret < 0) { | |
449 | error_report_err(local_err); | |
450 | goto ro_cleanup; | |
451 | } | |
452 | ||
453 | length = blk_getlength(src); | |
454 | if (length < 0) { | |
455 | ret = length; | |
456 | goto ro_cleanup; | |
457 | } | |
458 | ||
459 | backing_length = blk_getlength(backing); | |
460 | if (backing_length < 0) { | |
461 | ret = backing_length; | |
462 | goto ro_cleanup; | |
463 | } | |
464 | ||
465 | /* If our top snapshot is larger than the backing file image, | |
466 | * grow the backing file image if possible. If not possible, | |
467 | * we must return an error */ | |
468 | if (length > backing_length) { | |
469 | ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err); | |
470 | if (ret < 0) { | |
471 | error_report_err(local_err); | |
472 | goto ro_cleanup; | |
473 | } | |
474 | } | |
475 | ||
476 | /* blk_try_blockalign() for src will choose an alignment that works for | |
477 | * backing as well, so no need to compare the alignment manually. */ | |
478 | buf = blk_try_blockalign(src, COMMIT_BUF_SIZE); | |
479 | if (buf == NULL) { | |
480 | ret = -ENOMEM; | |
481 | goto ro_cleanup; | |
482 | } | |
483 | ||
484 | for (offset = 0; offset < length; offset += n) { | |
485 | ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n); | |
486 | if (ret < 0) { | |
487 | goto ro_cleanup; | |
488 | } | |
489 | if (ret) { | |
490 | ret = blk_pread(src, offset, buf, n); | |
491 | if (ret < 0) { | |
492 | goto ro_cleanup; | |
493 | } | |
494 | ||
495 | ret = blk_pwrite(backing, offset, buf, n, 0); | |
496 | if (ret < 0) { | |
497 | goto ro_cleanup; | |
498 | } | |
499 | } | |
500 | } | |
501 | ||
502 | if (drv->bdrv_make_empty) { | |
503 | ret = drv->bdrv_make_empty(bs); | |
504 | if (ret < 0) { | |
505 | goto ro_cleanup; | |
506 | } | |
507 | blk_flush(src); | |
508 | } | |
509 | ||
510 | /* | |
511 | * Make sure all data we wrote to the backing device is actually | |
512 | * stable on disk. | |
513 | */ | |
514 | blk_flush(backing); | |
515 | ||
516 | ret = 0; | |
517 | ro_cleanup: | |
518 | qemu_vfree(buf); | |
519 | ||
520 | blk_unref(backing); | |
521 | if (backing_file_bs) { | |
522 | bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); | |
523 | } | |
524 | bdrv_unref(commit_top_bs); | |
525 | blk_unref(src); | |
526 | ||
527 | if (ro) { | |
528 | /* ignoring error return here */ | |
529 | bdrv_reopen_set_read_only(bs->backing->bs, true, NULL); | |
530 | } | |
531 | ||
532 | return ret; | |
533 | } |