]> git.proxmox.com Git - mirror_qemu.git/blob - block/replication.c
replication: Properly attach children
[mirror_qemu.git] / block / replication.c
1 /*
2 * Replication Block filter
3 *
4 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
5 * Copyright (c) 2016 Intel Corporation
6 * Copyright (c) 2016 FUJITSU LIMITED
7 *
8 * Author:
9 * Wen Congyang <wency@cn.fujitsu.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 */
14
15 #include "qemu/osdep.h"
16 #include "qemu/module.h"
17 #include "qemu/option.h"
18 #include "block/nbd.h"
19 #include "block/blockjob.h"
20 #include "block/block_int.h"
21 #include "block/block_backup.h"
22 #include "sysemu/block-backend.h"
23 #include "qapi/error.h"
24 #include "qapi/qmp/qdict.h"
25 #include "block/replication.h"
26
27 typedef enum {
28 BLOCK_REPLICATION_NONE, /* block replication is not started */
29 BLOCK_REPLICATION_RUNNING, /* block replication is running */
30 BLOCK_REPLICATION_FAILOVER, /* failover is running in background */
31 BLOCK_REPLICATION_FAILOVER_FAILED, /* failover failed */
32 BLOCK_REPLICATION_DONE, /* block replication is done */
33 } ReplicationStage;
34
35 typedef struct BDRVReplicationState {
36 ReplicationMode mode;
37 ReplicationStage stage;
38 BlockJob *commit_job;
39 BdrvChild *hidden_disk;
40 BdrvChild *secondary_disk;
41 BlockJob *backup_job;
42 char *top_id;
43 ReplicationState *rs;
44 Error *blocker;
45 bool orig_hidden_read_only;
46 bool orig_secondary_read_only;
47 int error;
48 } BDRVReplicationState;
49
50 static void replication_start(ReplicationState *rs, ReplicationMode mode,
51 Error **errp);
52 static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
53 static void replication_get_error(ReplicationState *rs, Error **errp);
54 static void replication_stop(ReplicationState *rs, bool failover,
55 Error **errp);
56
57 #define REPLICATION_MODE "mode"
58 #define REPLICATION_TOP_ID "top-id"
59 static QemuOptsList replication_runtime_opts = {
60 .name = "replication",
61 .head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
62 .desc = {
63 {
64 .name = REPLICATION_MODE,
65 .type = QEMU_OPT_STRING,
66 },
67 {
68 .name = REPLICATION_TOP_ID,
69 .type = QEMU_OPT_STRING,
70 },
71 { /* end of list */ }
72 },
73 };
74
75 static ReplicationOps replication_ops = {
76 .start = replication_start,
77 .checkpoint = replication_do_checkpoint,
78 .get_error = replication_get_error,
79 .stop = replication_stop,
80 };
81
82 static int replication_open(BlockDriverState *bs, QDict *options,
83 int flags, Error **errp)
84 {
85 int ret;
86 BDRVReplicationState *s = bs->opaque;
87 QemuOpts *opts = NULL;
88 const char *mode;
89 const char *top_id;
90
91 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
92 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
93 false, errp);
94 if (!bs->file) {
95 return -EINVAL;
96 }
97
98 ret = -EINVAL;
99 opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
100 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
101 goto fail;
102 }
103
104 mode = qemu_opt_get(opts, REPLICATION_MODE);
105 if (!mode) {
106 error_setg(errp, "Missing the option mode");
107 goto fail;
108 }
109
110 if (!strcmp(mode, "primary")) {
111 s->mode = REPLICATION_MODE_PRIMARY;
112 top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
113 if (top_id) {
114 error_setg(errp,
115 "The primary side does not support option top-id");
116 goto fail;
117 }
118 } else if (!strcmp(mode, "secondary")) {
119 s->mode = REPLICATION_MODE_SECONDARY;
120 top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
121 s->top_id = g_strdup(top_id);
122 if (!s->top_id) {
123 error_setg(errp, "Missing the option top-id");
124 goto fail;
125 }
126 } else {
127 error_setg(errp,
128 "The option mode's value should be primary or secondary");
129 goto fail;
130 }
131
132 s->rs = replication_new(bs, &replication_ops);
133
134 ret = 0;
135
136 fail:
137 qemu_opts_del(opts);
138 return ret;
139 }
140
141 static void replication_close(BlockDriverState *bs)
142 {
143 BDRVReplicationState *s = bs->opaque;
144 Job *commit_job;
145
146 if (s->stage == BLOCK_REPLICATION_RUNNING) {
147 replication_stop(s->rs, false, NULL);
148 }
149 if (s->stage == BLOCK_REPLICATION_FAILOVER) {
150 commit_job = &s->commit_job->job;
151 assert(commit_job->aio_context == qemu_get_current_aio_context());
152 job_cancel_sync(commit_job);
153 }
154
155 if (s->mode == REPLICATION_MODE_SECONDARY) {
156 g_free(s->top_id);
157 }
158
159 replication_remove(s->rs);
160 }
161
162 static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
163 BdrvChildRole role,
164 BlockReopenQueue *reopen_queue,
165 uint64_t perm, uint64_t shared,
166 uint64_t *nperm, uint64_t *nshared)
167 {
168 if (role & BDRV_CHILD_PRIMARY) {
169 *nperm = BLK_PERM_CONSISTENT_READ;
170 } else {
171 *nperm = 0;
172 }
173
174 if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
175 *nperm |= BLK_PERM_WRITE;
176 }
177 *nshared = BLK_PERM_CONSISTENT_READ
178 | BLK_PERM_WRITE
179 | BLK_PERM_WRITE_UNCHANGED;
180 return;
181 }
182
183 static int64_t replication_getlength(BlockDriverState *bs)
184 {
185 return bdrv_getlength(bs->file->bs);
186 }
187
188 static int replication_get_io_status(BDRVReplicationState *s)
189 {
190 switch (s->stage) {
191 case BLOCK_REPLICATION_NONE:
192 return -EIO;
193 case BLOCK_REPLICATION_RUNNING:
194 return 0;
195 case BLOCK_REPLICATION_FAILOVER:
196 return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
197 case BLOCK_REPLICATION_FAILOVER_FAILED:
198 return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1;
199 case BLOCK_REPLICATION_DONE:
200 /*
201 * active commit job completes, and active disk and secondary_disk
202 * is swapped, so we can operate bs->file directly
203 */
204 return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
205 default:
206 abort();
207 }
208 }
209
210 static int replication_return_value(BDRVReplicationState *s, int ret)
211 {
212 if (s->mode == REPLICATION_MODE_SECONDARY) {
213 return ret;
214 }
215
216 if (ret < 0) {
217 s->error = ret;
218 ret = 0;
219 }
220
221 return ret;
222 }
223
224 static coroutine_fn int replication_co_readv(BlockDriverState *bs,
225 int64_t sector_num,
226 int remaining_sectors,
227 QEMUIOVector *qiov)
228 {
229 BDRVReplicationState *s = bs->opaque;
230 int ret;
231
232 if (s->mode == REPLICATION_MODE_PRIMARY) {
233 /* We only use it to forward primary write requests */
234 return -EIO;
235 }
236
237 ret = replication_get_io_status(s);
238 if (ret < 0) {
239 return ret;
240 }
241
242 ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
243 remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
244
245 return replication_return_value(s, ret);
246 }
247
248 static coroutine_fn int replication_co_writev(BlockDriverState *bs,
249 int64_t sector_num,
250 int remaining_sectors,
251 QEMUIOVector *qiov,
252 int flags)
253 {
254 BDRVReplicationState *s = bs->opaque;
255 QEMUIOVector hd_qiov;
256 uint64_t bytes_done = 0;
257 BdrvChild *top = bs->file;
258 BdrvChild *base = s->secondary_disk;
259 BdrvChild *target;
260 int ret;
261 int64_t n;
262
263 assert(!flags);
264 ret = replication_get_io_status(s);
265 if (ret < 0) {
266 goto out;
267 }
268
269 if (ret == 0) {
270 ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE,
271 remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
272 return replication_return_value(s, ret);
273 }
274
275 /*
276 * Failover failed, only write to active disk if the sectors
277 * have already been allocated in active disk/hidden disk.
278 */
279 qemu_iovec_init(&hd_qiov, qiov->niov);
280 while (remaining_sectors > 0) {
281 int64_t count;
282
283 ret = bdrv_is_allocated_above(top->bs, base->bs, false,
284 sector_num * BDRV_SECTOR_SIZE,
285 remaining_sectors * BDRV_SECTOR_SIZE,
286 &count);
287 if (ret < 0) {
288 goto out1;
289 }
290
291 assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
292 n = count >> BDRV_SECTOR_BITS;
293 qemu_iovec_reset(&hd_qiov);
294 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
295
296 target = ret ? top : base;
297 ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE,
298 n * BDRV_SECTOR_SIZE, &hd_qiov, 0);
299 if (ret < 0) {
300 goto out1;
301 }
302
303 remaining_sectors -= n;
304 sector_num += n;
305 bytes_done += count;
306 }
307
308 out1:
309 qemu_iovec_destroy(&hd_qiov);
310 out:
311 return ret;
312 }
313
314 static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
315 {
316 BDRVReplicationState *s = bs->opaque;
317 BdrvChild *active_disk = bs->file;
318 Error *local_err = NULL;
319 int ret;
320
321 if (!s->backup_job) {
322 error_setg(errp, "Backup job was cancelled unexpectedly");
323 return;
324 }
325
326 backup_do_checkpoint(s->backup_job, &local_err);
327 if (local_err) {
328 error_propagate(errp, local_err);
329 return;
330 }
331
332 if (!active_disk->bs->drv) {
333 error_setg(errp, "Active disk %s is ejected",
334 active_disk->bs->node_name);
335 return;
336 }
337
338 ret = bdrv_make_empty(active_disk, errp);
339 if (ret < 0) {
340 return;
341 }
342
343 if (!s->hidden_disk->bs->drv) {
344 error_setg(errp, "Hidden disk %s is ejected",
345 s->hidden_disk->bs->node_name);
346 return;
347 }
348
349 BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
350 BLK_PERM_WRITE, BLK_PERM_ALL);
351 blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
352 if (local_err) {
353 error_propagate(errp, local_err);
354 blk_unref(blk);
355 return;
356 }
357
358 ret = blk_make_empty(blk, errp);
359 blk_unref(blk);
360 if (ret < 0) {
361 return;
362 }
363 }
364
365 /* This function is supposed to be called twice:
366 * first with writable = true, then with writable = false.
367 * The first call puts s->hidden_disk and s->secondary_disk in
368 * r/w mode, and the second puts them back in their original state.
369 */
370 static void reopen_backing_file(BlockDriverState *bs, bool writable,
371 Error **errp)
372 {
373 BDRVReplicationState *s = bs->opaque;
374 BdrvChild *hidden_disk, *secondary_disk;
375 BlockReopenQueue *reopen_queue = NULL;
376
377 /*
378 * s->hidden_disk and s->secondary_disk may not be set yet, as they will
379 * only be set after the children are writable.
380 */
381 hidden_disk = bs->file->bs->backing;
382 secondary_disk = hidden_disk->bs->backing;
383
384 if (writable) {
385 s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
386 s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
387 }
388
389 bdrv_subtree_drained_begin(hidden_disk->bs);
390 bdrv_subtree_drained_begin(secondary_disk->bs);
391
392 if (s->orig_hidden_read_only) {
393 QDict *opts = qdict_new();
394 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
395 reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs,
396 opts, true);
397 }
398
399 if (s->orig_secondary_read_only) {
400 QDict *opts = qdict_new();
401 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
402 reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs,
403 opts, true);
404 }
405
406 if (reopen_queue) {
407 AioContext *ctx = bdrv_get_aio_context(bs);
408 if (ctx != qemu_get_aio_context()) {
409 aio_context_release(ctx);
410 }
411 bdrv_reopen_multiple(reopen_queue, errp);
412 if (ctx != qemu_get_aio_context()) {
413 aio_context_acquire(ctx);
414 }
415 }
416
417 bdrv_subtree_drained_end(hidden_disk->bs);
418 bdrv_subtree_drained_end(secondary_disk->bs);
419 }
420
421 static void backup_job_cleanup(BlockDriverState *bs)
422 {
423 BDRVReplicationState *s = bs->opaque;
424 BlockDriverState *top_bs;
425
426 s->backup_job = NULL;
427
428 top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
429 if (!top_bs) {
430 return;
431 }
432 bdrv_op_unblock_all(top_bs, s->blocker);
433 error_free(s->blocker);
434 reopen_backing_file(bs, false, NULL);
435 }
436
437 static void backup_job_completed(void *opaque, int ret)
438 {
439 BlockDriverState *bs = opaque;
440 BDRVReplicationState *s = bs->opaque;
441
442 if (s->stage != BLOCK_REPLICATION_FAILOVER) {
443 /* The backup job is cancelled unexpectedly */
444 s->error = -EIO;
445 }
446
447 backup_job_cleanup(bs);
448 }
449
450 static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
451 {
452 BdrvChild *child;
453
454 /* The bs itself is the top_bs */
455 if (top_bs == bs) {
456 return true;
457 }
458
459 /* Iterate over top_bs's children */
460 QLIST_FOREACH(child, &top_bs->children, next) {
461 if (child->bs == bs || check_top_bs(child->bs, bs)) {
462 return true;
463 }
464 }
465
466 return false;
467 }
468
469 static void replication_start(ReplicationState *rs, ReplicationMode mode,
470 Error **errp)
471 {
472 BlockDriverState *bs = rs->opaque;
473 BDRVReplicationState *s;
474 BlockDriverState *top_bs;
475 BdrvChild *active_disk, *hidden_disk, *secondary_disk;
476 int64_t active_length, hidden_length, disk_length;
477 AioContext *aio_context;
478 Error *local_err = NULL;
479 BackupPerf perf = { .use_copy_range = true, .max_workers = 1 };
480
481 aio_context = bdrv_get_aio_context(bs);
482 aio_context_acquire(aio_context);
483 s = bs->opaque;
484
485 if (s->stage == BLOCK_REPLICATION_DONE ||
486 s->stage == BLOCK_REPLICATION_FAILOVER) {
487 /*
488 * This case happens when a secondary is promoted to primary.
489 * Ignore the request because the secondary side of replication
490 * doesn't have to do anything anymore.
491 */
492 aio_context_release(aio_context);
493 return;
494 }
495
496 if (s->stage != BLOCK_REPLICATION_NONE) {
497 error_setg(errp, "Block replication is running or done");
498 aio_context_release(aio_context);
499 return;
500 }
501
502 if (s->mode != mode) {
503 error_setg(errp, "The parameter mode's value is invalid, needs %d,"
504 " but got %d", s->mode, mode);
505 aio_context_release(aio_context);
506 return;
507 }
508
509 switch (s->mode) {
510 case REPLICATION_MODE_PRIMARY:
511 break;
512 case REPLICATION_MODE_SECONDARY:
513 active_disk = bs->file;
514 if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
515 error_setg(errp, "Active disk doesn't have backing file");
516 aio_context_release(aio_context);
517 return;
518 }
519
520 hidden_disk = active_disk->bs->backing;
521 if (!hidden_disk->bs || !hidden_disk->bs->backing) {
522 error_setg(errp, "Hidden disk doesn't have backing file");
523 aio_context_release(aio_context);
524 return;
525 }
526
527 secondary_disk = hidden_disk->bs->backing;
528 if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
529 error_setg(errp, "The secondary disk doesn't have block backend");
530 aio_context_release(aio_context);
531 return;
532 }
533
534 /* verify the length */
535 active_length = bdrv_getlength(active_disk->bs);
536 hidden_length = bdrv_getlength(hidden_disk->bs);
537 disk_length = bdrv_getlength(secondary_disk->bs);
538 if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
539 active_length != hidden_length || hidden_length != disk_length) {
540 error_setg(errp, "Active disk, hidden disk, secondary disk's length"
541 " are not the same");
542 aio_context_release(aio_context);
543 return;
544 }
545
546 /* Must be true, or the bdrv_getlength() calls would have failed */
547 assert(active_disk->bs->drv && hidden_disk->bs->drv);
548
549 if (!active_disk->bs->drv->bdrv_make_empty ||
550 !hidden_disk->bs->drv->bdrv_make_empty) {
551 error_setg(errp,
552 "Active disk or hidden disk doesn't support make_empty");
553 aio_context_release(aio_context);
554 return;
555 }
556
557 /* reopen the backing file in r/w mode */
558 reopen_backing_file(bs, true, &local_err);
559 if (local_err) {
560 error_propagate(errp, local_err);
561 aio_context_release(aio_context);
562 return;
563 }
564
565 bdrv_ref(hidden_disk->bs);
566 s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
567 &child_of_bds, BDRV_CHILD_DATA,
568 &local_err);
569 if (local_err) {
570 error_propagate(errp, local_err);
571 aio_context_release(aio_context);
572 return;
573 }
574
575 bdrv_ref(secondary_disk->bs);
576 s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
577 "secondary disk", &child_of_bds,
578 BDRV_CHILD_DATA, &local_err);
579 if (local_err) {
580 error_propagate(errp, local_err);
581 aio_context_release(aio_context);
582 return;
583 }
584
585 /* start backup job now */
586 error_setg(&s->blocker,
587 "Block device is in use by internal backup job");
588
589 top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
590 if (!top_bs || !bdrv_is_root_node(top_bs) ||
591 !check_top_bs(top_bs, bs)) {
592 error_setg(errp, "No top_bs or it is invalid");
593 reopen_backing_file(bs, false, NULL);
594 aio_context_release(aio_context);
595 return;
596 }
597 bdrv_op_block_all(top_bs, s->blocker);
598 bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
599
600 s->backup_job = backup_job_create(
601 NULL, s->secondary_disk->bs, s->hidden_disk->bs,
602 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
603 &perf,
604 BLOCKDEV_ON_ERROR_REPORT,
605 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
606 backup_job_completed, bs, NULL, &local_err);
607 if (local_err) {
608 error_propagate(errp, local_err);
609 backup_job_cleanup(bs);
610 aio_context_release(aio_context);
611 return;
612 }
613 job_start(&s->backup_job->job);
614 break;
615 default:
616 aio_context_release(aio_context);
617 abort();
618 }
619
620 s->stage = BLOCK_REPLICATION_RUNNING;
621
622 if (s->mode == REPLICATION_MODE_SECONDARY) {
623 secondary_do_checkpoint(bs, errp);
624 }
625
626 s->error = 0;
627 aio_context_release(aio_context);
628 }
629
630 static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
631 {
632 BlockDriverState *bs = rs->opaque;
633 BDRVReplicationState *s;
634 AioContext *aio_context;
635
636 aio_context = bdrv_get_aio_context(bs);
637 aio_context_acquire(aio_context);
638 s = bs->opaque;
639
640 if (s->stage == BLOCK_REPLICATION_DONE ||
641 s->stage == BLOCK_REPLICATION_FAILOVER) {
642 /*
643 * This case happens when a secondary was promoted to primary.
644 * Ignore the request because the secondary side of replication
645 * doesn't have to do anything anymore.
646 */
647 aio_context_release(aio_context);
648 return;
649 }
650
651 if (s->mode == REPLICATION_MODE_SECONDARY) {
652 secondary_do_checkpoint(bs, errp);
653 }
654 aio_context_release(aio_context);
655 }
656
657 static void replication_get_error(ReplicationState *rs, Error **errp)
658 {
659 BlockDriverState *bs = rs->opaque;
660 BDRVReplicationState *s;
661 AioContext *aio_context;
662
663 aio_context = bdrv_get_aio_context(bs);
664 aio_context_acquire(aio_context);
665 s = bs->opaque;
666
667 if (s->stage == BLOCK_REPLICATION_NONE) {
668 error_setg(errp, "Block replication is not running");
669 aio_context_release(aio_context);
670 return;
671 }
672
673 if (s->error) {
674 error_setg(errp, "I/O error occurred");
675 aio_context_release(aio_context);
676 return;
677 }
678 aio_context_release(aio_context);
679 }
680
681 static void replication_done(void *opaque, int ret)
682 {
683 BlockDriverState *bs = opaque;
684 BDRVReplicationState *s = bs->opaque;
685
686 if (ret == 0) {
687 s->stage = BLOCK_REPLICATION_DONE;
688
689 bdrv_unref_child(bs, s->secondary_disk);
690 s->secondary_disk = NULL;
691 bdrv_unref_child(bs, s->hidden_disk);
692 s->hidden_disk = NULL;
693 s->error = 0;
694 } else {
695 s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
696 s->error = -EIO;
697 }
698 }
699
700 static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
701 {
702 BlockDriverState *bs = rs->opaque;
703 BDRVReplicationState *s;
704 AioContext *aio_context;
705
706 aio_context = bdrv_get_aio_context(bs);
707 aio_context_acquire(aio_context);
708 s = bs->opaque;
709
710 if (s->stage == BLOCK_REPLICATION_DONE ||
711 s->stage == BLOCK_REPLICATION_FAILOVER) {
712 /*
713 * This case happens when a secondary was promoted to primary.
714 * Ignore the request because the secondary side of replication
715 * doesn't have to do anything anymore.
716 */
717 aio_context_release(aio_context);
718 return;
719 }
720
721 if (s->stage != BLOCK_REPLICATION_RUNNING) {
722 error_setg(errp, "Block replication is not running");
723 aio_context_release(aio_context);
724 return;
725 }
726
727 switch (s->mode) {
728 case REPLICATION_MODE_PRIMARY:
729 s->stage = BLOCK_REPLICATION_DONE;
730 s->error = 0;
731 break;
732 case REPLICATION_MODE_SECONDARY:
733 /*
734 * This BDS will be closed, and the job should be completed
735 * before the BDS is closed, because we will access hidden
736 * disk, secondary disk in backup_job_completed().
737 */
738 if (s->backup_job) {
739 job_cancel_sync(&s->backup_job->job);
740 }
741
742 if (!failover) {
743 secondary_do_checkpoint(bs, errp);
744 s->stage = BLOCK_REPLICATION_DONE;
745 aio_context_release(aio_context);
746 return;
747 }
748
749 s->stage = BLOCK_REPLICATION_FAILOVER;
750 s->commit_job = commit_active_start(
751 NULL, bs->file->bs, s->secondary_disk->bs,
752 JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
753 NULL, replication_done, bs, true, errp);
754 break;
755 default:
756 aio_context_release(aio_context);
757 abort();
758 }
759 aio_context_release(aio_context);
760 }
761
762 static const char *const replication_strong_runtime_opts[] = {
763 REPLICATION_MODE,
764 REPLICATION_TOP_ID,
765
766 NULL
767 };
768
769 static BlockDriver bdrv_replication = {
770 .format_name = "replication",
771 .instance_size = sizeof(BDRVReplicationState),
772
773 .bdrv_open = replication_open,
774 .bdrv_close = replication_close,
775 .bdrv_child_perm = replication_child_perm,
776
777 .bdrv_getlength = replication_getlength,
778 .bdrv_co_readv = replication_co_readv,
779 .bdrv_co_writev = replication_co_writev,
780
781 .is_filter = true,
782
783 .has_variable_length = true,
784 .strong_runtime_opts = replication_strong_runtime_opts,
785 };
786
787 static void bdrv_replication_init(void)
788 {
789 bdrv_register(&bdrv_replication);
790 }
791
792 block_init(bdrv_replication_init);