]> git.proxmox.com Git - pve-qemu-kvm.git/blame - debian/patches/0002-add-basic-backup-support-to-block-driver.patch
update backup patches
[pve-qemu-kvm.git] / debian / patches / 0002-add-basic-backup-support-to-block-driver.patch
CommitLineData
309874bd 1From 577b000e947d817cf4e0189615c0d0257cb20259 Mon Sep 17 00:00:00 2001
5ad5891c
DM
2From: Dietmar Maurer <dietmar@proxmox.com>
3Date: Tue, 13 Nov 2012 10:03:52 +0100
309874bd 4Subject: [PATCH v3 2/7] add basic backup support to block driver
5ad5891c 5
309874bd
DM
6Function backup_job_create() creates a block job to backup a block device.
7The coroutine is started with backup_job_start().
5ad5891c
DM
8
9We call backup_do_cow() for each write during backup. That function
10reads the original data and pass it to backup_dump_cb().
11
12The tracked_request infrastructure is used to serialize access.
13
14Currently backup cluster size is hardcoded to 65536 bytes.
15
16Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
17---
18 Makefile.objs | 1 +
309874bd
DM
19 backup.c | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
20 backup.h | 32 ++++++
5ad5891c
DM
21 block.c | 71 ++++++++++++-
22 block.h | 2 +
23 blockjob.h | 10 ++
309874bd 24 6 files changed, 418 insertions(+), 6 deletions(-)
5ad5891c
DM
25 create mode 100644 backup.c
26 create mode 100644 backup.h
27
28diff --git a/Makefile.objs b/Makefile.objs
29index 3c7abca..cb46be5 100644
30--- a/Makefile.objs
31+++ b/Makefile.objs
32@@ -48,6 +48,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
33 block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
34 block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
35 block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
36+block-obj-y += backup.o
37 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
38 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
39 block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
40diff --git a/backup.c b/backup.c
41new file mode 100644
309874bd 42index 0000000..2c13e21
5ad5891c
DM
43--- /dev/null
44+++ b/backup.c
309874bd 45@@ -0,0 +1,308 @@
5ad5891c
DM
46+/*
47+ * QEMU backup
48+ *
49+ * Copyright (C) 2012 Proxmox Server Solutions
50+ *
51+ * Authors:
52+ * Dietmar Maurer (dietmar@proxmox.com)
53+ *
54+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
55+ * See the COPYING file in the top-level directory.
56+ *
57+ */
58+
59+#include <stdio.h>
60+#include <errno.h>
61+#include <unistd.h>
62+
63+#include "block.h"
64+#include "block_int.h"
65+#include "blockjob.h"
66+#include "backup.h"
67+
68+#define DEBUG_BACKUP 0
69+
70+#define DPRINTF(fmt, ...) \
71+ do { if (DEBUG_BACKUP) { printf("backup: " fmt, ## __VA_ARGS__); } } \
72+ while (0)
73+
74+
75+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
76+
77+typedef struct BackupBlockJob {
78+ BlockJob common;
79+ unsigned long *bitmap;
80+ int bitmap_size;
81+ BackupDumpFunc *backup_dump_cb;
82+ BlockDriverCompletionFunc *backup_complete_cb;
83+ void *opaque;
84+} BackupBlockJob;
85+
309874bd 86+static int backup_get_bitmap(BackupBlockJob *job, int64_t cluster_num)
5ad5891c 87+{
5ad5891c
DM
88+ assert(job);
89+ assert(job->bitmap);
90+
91+ unsigned long val, idx, bit;
92+
93+ idx = cluster_num / BITS_PER_LONG;
94+
95+ assert(job->bitmap_size > idx);
96+
97+ bit = cluster_num % BITS_PER_LONG;
98+ val = job->bitmap[idx];
99+
100+ return !!(val & (1UL << bit));
101+}
102+
309874bd 103+static void backup_set_bitmap(BackupBlockJob *job, int64_t cluster_num,
5ad5891c
DM
104+ int dirty)
105+{
5ad5891c
DM
106+ assert(job);
107+ assert(job->bitmap);
108+
109+ unsigned long val, idx, bit;
110+
111+ idx = cluster_num / BITS_PER_LONG;
112+
113+ assert(job->bitmap_size > idx);
114+
115+ bit = cluster_num % BITS_PER_LONG;
116+ val = job->bitmap[idx];
117+ if (dirty) {
118+ if (!(val & (1UL << bit))) {
119+ val |= 1UL << bit;
120+ }
121+ } else {
122+ if (val & (1UL << bit)) {
123+ val &= ~(1UL << bit);
124+ }
125+ }
126+ job->bitmap[idx] = val;
127+}
128+
129+static int backup_in_progress_count;
130+
131+static int coroutine_fn backup_do_cow(BlockDriverState *bs,
132+ int64_t sector_num, int nb_sectors)
133+{
134+ assert(bs);
135+ BackupBlockJob *job = (BackupBlockJob *)bs->job;
136+ assert(job);
137+
138+ BlockDriver *drv = bs->drv;
139+ struct iovec iov;
140+ QEMUIOVector bounce_qiov;
141+ void *bounce_buffer = NULL;
142+ int ret = 0;
143+
144+ backup_in_progress_count++;
145+
146+ int64_t start, end;
147+
148+ start = sector_num / BACKUP_BLOCKS_PER_CLUSTER;
149+ end = (sector_num + nb_sectors + BACKUP_BLOCKS_PER_CLUSTER - 1) /
150+ BACKUP_BLOCKS_PER_CLUSTER;
151+
152+ DPRINTF("brdv_co_backup_cow enter %s C%zd %zd %d\n",
153+ bdrv_get_device_name(bs), start, sector_num, nb_sectors);
154+
155+ for (; start < end; start++) {
309874bd 156+ if (backup_get_bitmap(job, start)) {
5ad5891c
DM
157+ DPRINTF("brdv_co_backup_cow skip C%zd\n", start);
158+ continue; /* already copied */
159+ }
160+
161+ /* immediately set bitmap (avoid coroutine race) */
309874bd 162+ backup_set_bitmap(job, start, 1);
5ad5891c
DM
163+
164+ DPRINTF("brdv_co_backup_cow C%zd\n", start);
165+
166+ if (!bounce_buffer) {
167+ iov.iov_len = BACKUP_CLUSTER_SIZE;
168+ iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
169+ qemu_iovec_init_external(&bounce_qiov, &iov, 1);
170+ }
171+
172+ ret = drv->bdrv_co_readv(bs, start * BACKUP_BLOCKS_PER_CLUSTER,
173+ BACKUP_BLOCKS_PER_CLUSTER,
174+ &bounce_qiov);
175+ if (ret < 0) {
176+ DPRINTF("brdv_co_backup_cow bdrv_read C%zd failed\n", start);
177+ goto out;
178+ }
179+
180+ ret = job->backup_dump_cb(job->opaque, bs, start, bounce_buffer);
181+ if (ret < 0) {
182+ DPRINTF("brdv_co_backup_cow dump_cluster_cb C%zd failed\n", start);
183+ goto out;
184+ }
185+
186+ DPRINTF("brdv_co_backup_cow done C%zd\n", start);
187+ }
188+
189+out:
190+ if (bounce_buffer) {
191+ qemu_vfree(bounce_buffer);
192+ }
193+
194+ backup_in_progress_count--;
195+
196+ return ret;
197+}
198+
199+static int coroutine_fn backup_before_read(BlockDriverState *bs,
200+ int64_t sector_num,
201+ int nb_sectors, QEMUIOVector *qiov)
202+{
203+ return backup_do_cow(bs, sector_num, nb_sectors);
204+}
205+
206+static int coroutine_fn backup_before_write(BlockDriverState *bs,
207+ int64_t sector_num,
208+ int nb_sectors, QEMUIOVector *qiov)
209+{
210+ return backup_do_cow(bs, sector_num, nb_sectors);
211+}
212+
213+
214+static BlockJobType backup_job_type = {
215+ .instance_size = sizeof(BackupBlockJob),
216+ .before_read = backup_before_read,
217+ .before_write = backup_before_write,
218+ .job_type = "backup",
219+};
220+
221+static void coroutine_fn backup_run(void *opaque)
222+{
223+ BackupBlockJob *job = opaque;
224+ BlockDriverState *bs = job->common.bs;
225+ assert(bs);
226+
227+ int64_t start, end;
228+
229+ start = 0;
230+ end = (bs->total_sectors + BACKUP_BLOCKS_PER_CLUSTER - 1) /
231+ BACKUP_BLOCKS_PER_CLUSTER;
232+
233+ DPRINTF("backup_run start %s %zd %zd\n", bdrv_get_device_name(bs),
234+ start, end);
235+
236+ int ret = 0;
237+
238+ for (; start < end; start++) {
239+ if (block_job_is_cancelled(&job->common)) {
240+ ret = -1;
241+ break;
242+ }
243+
309874bd 244+ if (backup_get_bitmap(job, start)) {
5ad5891c
DM
245+ continue; /* already copied */
246+ }
247+
248+ /* we need to yield so that qemu_aio_flush() returns.
249+ * (without, VM does not reboot)
250+ * todo: can we avoid that?
309874bd 251+ * Note: use 1000 instead of 0 (0 priorize this task too much)
5ad5891c 252+ */
309874bd 253+ block_job_sleep_ns(&job->common, rt_clock, 1000);
5ad5891c
DM
254+ if (block_job_is_cancelled(&job->common)) {
255+ ret = -1;
256+ break;
257+ }
258+ DPRINTF("backup_run loop C%zd\n", start);
259+
260+ /**
261+ * This triggers a cluster copy
262+ * Note: avoid direct call to brdv_co_backup_cow, because
263+ * this does not call tracked_request_begin()
264+ */
265+ ret = bdrv_co_backup(bs, start*BACKUP_BLOCKS_PER_CLUSTER, 1);
266+ if (ret < 0) {
267+ break;
268+ }
269+ /* Publish progress */
270+ job->common.offset += BACKUP_CLUSTER_SIZE;
271+ }
272+
273+ while (backup_in_progress_count > 0) {
274+ DPRINTF("backup_run backup_in_progress_count != 0 (%d)",
275+ backup_in_progress_count);
276+ co_sleep_ns(rt_clock, 10000);
277+ }
278+
279+ DPRINTF("backup_run complete %d\n", ret);
280+ block_job_completed(&job->common, ret);
281+}
282+
283+static void backup_job_cleanup_cb(void *opaque, int ret)
284+{
285+ BlockDriverState *bs = opaque;
286+ assert(bs);
287+ BackupBlockJob *job = (BackupBlockJob *)bs->job;
288+ assert(job);
289+
290+ DPRINTF("backup_job_cleanup_cb start %d\n", ret);
291+
292+ job->backup_complete_cb(job->opaque, ret);
293+
294+ DPRINTF("backup_job_cleanup_cb end\n");
295+
296+ g_free(job->bitmap);
297+}
298+
309874bd
DM
299+void
300+backup_job_start(BlockDriverState *bs)
301+{
302+ assert(bs);
303+ assert(bs->job);
304+ assert(bs->job->co == NULL);
305+
306+ bs->job->co = qemu_coroutine_create(backup_run);
307+ qemu_coroutine_enter(bs->job->co, bs->job);
308+}
309+
5ad5891c 310+int
309874bd
DM
311+backup_job_create(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb,
312+ BlockDriverCompletionFunc *backup_complete_cb,
313+ void *opaque)
5ad5891c
DM
314+{
315+ assert(bs);
316+ assert(backup_dump_cb);
317+ assert(backup_complete_cb);
318+
319+ if (bs->job) {
320+ DPRINTF("bdrv_backup_init failed - running job on %s\n",
321+ bdrv_get_device_name(bs));
322+ return -1;
323+ }
324+
325+ int64_t bitmap_size;
326+ const char *devname = bdrv_get_device_name(bs);
327+
328+ if (!devname || !devname[0]) {
329+ return -1;
330+ }
331+
332+ DPRINTF("bdrv_backup_init %s\n", bdrv_get_device_name(bs));
333+
334+ Error *errp;
335+ BackupBlockJob *job = block_job_create(&backup_job_type, bs, 0,
336+ backup_job_cleanup_cb, bs, &errp);
337+
338+ job->common.cluster_size = BACKUP_CLUSTER_SIZE;
339+
340+ bitmap_size = bs->total_sectors +
341+ BACKUP_BLOCKS_PER_CLUSTER * BITS_PER_LONG - 1;
342+ bitmap_size /= BACKUP_BLOCKS_PER_CLUSTER * BITS_PER_LONG;
343+
344+ job->backup_dump_cb = backup_dump_cb;
345+ job->backup_complete_cb = backup_complete_cb;
346+ job->opaque = opaque;
347+ job->bitmap_size = bitmap_size;
348+ job->bitmap = g_new0(unsigned long, bitmap_size);
349+
350+ job->common.len = bs->total_sectors*BDRV_SECTOR_SIZE;
309874bd 351+
5ad5891c
DM
352+ return 0;
353+}
354diff --git a/backup.h b/backup.h
355new file mode 100644
309874bd 356index 0000000..87b9942
5ad5891c
DM
357--- /dev/null
358+++ b/backup.h
309874bd 359@@ -0,0 +1,32 @@
5ad5891c
DM
360+/*
361+ * QEMU backup related definitions
362+ *
363+ * Copyright (C) Proxmox Server Solutions
364+ *
365+ * Authors:
366+ * Dietmar Maurer (dietmar@proxmox.com)
367+ *
368+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
369+ * See the COPYING file in the top-level directory.
370+ *
371+ */
372+
373+#ifndef QEMU_BACKUP_H
374+#define QEMU_BACKUP_H
375+
376+#include <uuid/uuid.h>
377+
378+#define BACKUP_CLUSTER_BITS 16
379+#define BACKUP_CLUSTER_SIZE (1<<BACKUP_CLUSTER_BITS)
380+#define BACKUP_BLOCKS_PER_CLUSTER (BACKUP_CLUSTER_SIZE/BDRV_SECTOR_SIZE)
381+
382+typedef int BackupDumpFunc(void *opaque, BlockDriverState *bs,
383+ int64_t cluster_num, unsigned char *buf);
384+
309874bd
DM
385+void backup_job_start(BlockDriverState *bs);
386+
387+int backup_job_create(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb,
388+ BlockDriverCompletionFunc *backup_complete_cb,
389+ void *opaque);
5ad5891c
DM
390+
391+#endif /* QEMU_BACKUP_H */
392diff --git a/block.c b/block.c
309874bd 393index c05875f..4de7fbd 100644
5ad5891c
DM
394--- a/block.c
395+++ b/block.c
396@@ -54,6 +54,7 @@
397 typedef enum {
398 BDRV_REQ_COPY_ON_READ = 0x1,
399 BDRV_REQ_ZERO_WRITE = 0x2,
400+ BDRV_REQ_BACKUP_ONLY = 0x4,
401 } BdrvRequestFlags;
402
403 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
404@@ -1542,7 +1543,7 @@ int bdrv_commit(BlockDriverState *bs)
405
406 if (!drv)
407 return -ENOMEDIUM;
408-
409+
410 if (!bs->backing_hd) {
411 return -ENOTSUP;
412 }
413@@ -1679,6 +1680,22 @@ static void round_to_clusters(BlockDriverState *bs,
414 }
415 }
416
417+/**
418+ * Round a region to job cluster boundaries
419+ */
420+static void round_to_job_clusters(BlockDriverState *bs,
421+ int64_t sector_num, int nb_sectors,
422+ int job_cluster_size,
423+ int64_t *cluster_sector_num,
424+ int *cluster_nb_sectors)
425+{
426+ int64_t c = job_cluster_size/BDRV_SECTOR_SIZE;
427+
428+ *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
429+ *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
430+ nb_sectors, c);
431+}
432+
433 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
434 int64_t sector_num, int nb_sectors) {
435 /* aaaa bbbb */
436@@ -1693,7 +1710,9 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
437 }
438
439 static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
440- int64_t sector_num, int nb_sectors)
441+ int64_t sector_num,
442+ int nb_sectors,
443+ int job_cluster_size)
444 {
445 BdrvTrackedRequest *req;
446 int64_t cluster_sector_num;
447@@ -1709,6 +1728,11 @@ static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
448 round_to_clusters(bs, sector_num, nb_sectors,
449 &cluster_sector_num, &cluster_nb_sectors);
450
451+ if (job_cluster_size) {
452+ round_to_job_clusters(bs, sector_num, nb_sectors, job_cluster_size,
453+ &cluster_sector_num, &cluster_nb_sectors);
454+ }
455+
456 do {
457 retry = false;
458 QLIST_FOREACH(req, &bs->tracked_requests, list) {
459@@ -2278,12 +2302,24 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
460 bs->copy_on_read_in_flight++;
461 }
462
463- if (bs->copy_on_read_in_flight) {
464- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
465+ int job_cluster_size = bs->job && bs->job->cluster_size ?
466+ bs->job->cluster_size : 0;
467+
468+ if (bs->copy_on_read_in_flight || job_cluster_size) {
469+ wait_for_overlapping_requests(bs, sector_num, nb_sectors,
470+ job_cluster_size);
471 }
472
473 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
474
475+ if (bs->job && bs->job->job_type->before_read) {
476+ ret = bs->job->job_type->before_read(bs, sector_num, nb_sectors, qiov);
309874bd 477+ if ((ret < 0) || (flags & BDRV_REQ_BACKUP_ONLY)) {
5ad5891c
DM
478+ /* Note: We do not return any data to the caller */
479+ goto out;
480+ }
481+ }
482+
483 if (flags & BDRV_REQ_COPY_ON_READ) {
484 int pnum;
485
486@@ -2327,6 +2363,17 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
487 BDRV_REQ_COPY_ON_READ);
488 }
489
490+int coroutine_fn bdrv_co_backup(BlockDriverState *bs,
491+ int64_t sector_num, int nb_sectors)
492+{
493+ if (!bs->job) {
494+ return -ENOTSUP;
495+ }
496+
497+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, NULL,
498+ BDRV_REQ_BACKUP_ONLY);
499+}
500+
501 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
502 int64_t sector_num, int nb_sectors)
503 {
504@@ -2384,12 +2431,23 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
505 bdrv_io_limits_intercept(bs, true, nb_sectors);
506 }
507
508- if (bs->copy_on_read_in_flight) {
509- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
510+ int job_cluster_size = bs->job && bs->job->cluster_size ?
511+ bs->job->cluster_size : 0;
512+
513+ if (bs->copy_on_read_in_flight || job_cluster_size) {
514+ wait_for_overlapping_requests(bs, sector_num, nb_sectors,
515+ job_cluster_size);
516 }
517
518 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
519
520+ if (bs->job && bs->job->job_type->before_write) {
521+ ret = bs->job->job_type->before_write(bs, sector_num, nb_sectors, qiov);
522+ if (ret < 0) {
523+ goto out;
524+ }
525+ }
526+
527 if (flags & BDRV_REQ_ZERO_WRITE) {
528 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
529 } else {
530@@ -2408,6 +2466,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
531 bs->wr_highest_sector = sector_num + nb_sectors - 1;
532 }
533
534+out:
535 tracked_request_end(&req);
536
537 return ret;
538diff --git a/block.h b/block.h
539index 722c620..94e5903 100644
540--- a/block.h
541+++ b/block.h
542@@ -172,6 +172,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
543 int nb_sectors, QEMUIOVector *qiov);
544 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
545 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
546+int coroutine_fn bdrv_co_backup(BlockDriverState *bs,
547+ int64_t sector_num, int nb_sectors);
548 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
549 int nb_sectors, QEMUIOVector *qiov);
550 /*
551diff --git a/blockjob.h b/blockjob.h
552index 3792b73..6621173 100644
553--- a/blockjob.h
554+++ b/blockjob.h
555@@ -50,6 +50,13 @@ typedef struct BlockJobType {
556 * manually.
557 */
558 void (*complete)(BlockJob *job, Error **errp);
559+
560+ /** tracked requests */
561+ int coroutine_fn (*before_read)(BlockDriverState *bs, int64_t sector_num,
562+ int nb_sectors, QEMUIOVector *qiov);
563+ int coroutine_fn (*before_write)(BlockDriverState *bs, int64_t sector_num,
564+ int nb_sectors, QEMUIOVector *qiov);
565+
566 } BlockJobType;
567
568 /**
569@@ -103,6 +110,9 @@ struct BlockJob {
570 /** Speed that was set with @block_job_set_speed. */
571 int64_t speed;
572
573+ /** tracked requests */
574+ int cluster_size;
575+
576 /** The completion function that will be called when the job completes. */
577 BlockDriverCompletionFunc *cb;
578
579--
5801.7.2.5
581