From 309874bde80a85c604796eef90b343d62a97a4ae Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Wed, 16 Jan 2013 13:31:17 +0100 Subject: [PATCH] update backup patches bump version to 1.3-12 --- Makefile | 2 +- debian/changelog | 2 +- ...001-RFC-Efficient-VM-backup-for-qemu.patch | 6 +- ...basic-backup-support-to-block-driver.patch | 71 +-- ...-add-backup-related-monitor-commands.patch | 93 ++-- ...004-introduce-new-vma-archive-format.patch | 124 ++--- ...0005-add-regression-tests-for-backup.patch | 22 +- .../0006-add-vm-state-to-backups.patch | 44 +- ...0007-use-extra-thread-for-vma-writer.patch | 451 ++++++++++++++++++ debian/patches/series | 1 + 10 files changed, 662 insertions(+), 154 deletions(-) create mode 100644 debian/patches/0007-use-extra-thread-for-vma-writer.patch diff --git a/Makefile b/Makefile index e1f2d8f..94e4cd8 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ RELEASE=2.3 # also update debian/changelog KVMVER=1.3 -KVMPKGREL=11 +KVMPKGREL=12 KVMPACKAGE=pve-qemu-kvm KVMDIR=qemu-kvm diff --git a/debian/changelog b/debian/changelog index c146870..9e412b4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -pve-qemu-kvm (1.3-11) unstable; urgency=low +pve-qemu-kvm (1.3-12) unstable; urgency=low * update vma patches diff --git a/debian/patches/0001-RFC-Efficient-VM-backup-for-qemu.patch b/debian/patches/0001-RFC-Efficient-VM-backup-for-qemu.patch index e56c700..1ef9376 100644 --- a/debian/patches/0001-RFC-Efficient-VM-backup-for-qemu.patch +++ b/debian/patches/0001-RFC-Efficient-VM-backup-for-qemu.patch @@ -1,7 +1,7 @@ -From 793525f2a3b92fdc0ce27e48c3421171b87c367c Mon Sep 17 00:00:00 2001 +From f4a34368cdc254ea7602c5913c50506f61e7652e Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Tue, 13 Nov 2012 09:24:50 +0100 -Subject: [PATCH v3 1/6] RFC: Efficient VM backup for qemu +Subject: [PATCH v3 1/7] RFC: Efficient VM backup for qemu This series provides a way to efficiently backup VMs. @@ -26,6 +26,8 @@ Changes since v2: * BackupDriver: remove cancel_cb * use enum for BackupFormat * vma: use bdrv_open instead of bdrv_file_open +* vma: use extra writer thread +* backup on drive after another (try to avoid high load) Signed-off-by: Dietmar Maurer --- diff --git a/debian/patches/0002-add-basic-backup-support-to-block-driver.patch b/debian/patches/0002-add-basic-backup-support-to-block-driver.patch index f3a2bf9..eaa169e 100644 --- a/debian/patches/0002-add-basic-backup-support-to-block-driver.patch +++ b/debian/patches/0002-add-basic-backup-support-to-block-driver.patch @@ -1,9 +1,10 @@ -From 11bf5a3156abfd98d13cc5f03cd5f57e6dac06f3 Mon Sep 17 00:00:00 2001 +From 577b000e947d817cf4e0189615c0d0257cb20259 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Tue, 13 Nov 2012 10:03:52 +0100 -Subject: [PATCH v3 2/6] add basic backup support to block driver +Subject: [PATCH v3 2/7] add basic backup support to block driver -Function backup_job_start() creates a block job to backup a block device. +Function backup_job_create() creates a block job to backup a block device. +The coroutine is started with backup_job_start(). We call backup_do_cow() for each write during backup. That function reads the original data and pass it to backup_dump_cb(). @@ -15,12 +16,12 @@ Currently backup cluster size is hardcoded to 65536 bytes. Signed-off-by: Dietmar Maurer --- Makefile.objs | 1 + - backup.c | 302 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - backup.h | 30 ++++++ + backup.c | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + backup.h | 32 ++++++ block.c | 71 ++++++++++++- block.h | 2 + blockjob.h | 10 ++ - 6 files changed, 410 insertions(+), 6 deletions(-) + 6 files changed, 418 insertions(+), 6 deletions(-) create mode 100644 backup.c create mode 100644 backup.h @@ -38,10 +39,10 @@ index 3c7abca..cb46be5 100644 block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o diff --git a/backup.c b/backup.c new file mode 100644 -index 0000000..6a44974 +index 0000000..2c13e21 --- /dev/null +++ b/backup.c -@@ -0,0 +1,302 @@ +@@ -0,0 +1,308 @@ +/* + * QEMU backup + * @@ -82,10 +83,8 @@ index 0000000..6a44974 + void *opaque; +} BackupBlockJob; + -+static int backup_get_bitmap(BlockDriverState *bs, int64_t cluster_num) ++static int backup_get_bitmap(BackupBlockJob *job, int64_t cluster_num) +{ -+ assert(bs); -+ BackupBlockJob *job = (BackupBlockJob *)bs->job; + assert(job); + assert(job->bitmap); + @@ -101,11 +100,9 @@ index 0000000..6a44974 + return !!(val & (1UL << bit)); +} + -+static void backup_set_bitmap(BlockDriverState *bs, int64_t cluster_num, ++static void backup_set_bitmap(BackupBlockJob *job, int64_t cluster_num, + int dirty) +{ -+ assert(bs); -+ BackupBlockJob *job = (BackupBlockJob *)bs->job; + assert(job); + assert(job->bitmap); + @@ -156,13 +153,13 @@ index 0000000..6a44974 + bdrv_get_device_name(bs), start, sector_num, nb_sectors); + + for (; start < end; start++) { -+ if (backup_get_bitmap(bs, start)) { ++ if (backup_get_bitmap(job, start)) { + DPRINTF("brdv_co_backup_cow skip C%zd\n", start); + continue; /* already copied */ + } + + /* immediately set bitmap (avoid coroutine race) */ -+ backup_set_bitmap(bs, start, 1); ++ backup_set_bitmap(job, start, 1); + + DPRINTF("brdv_co_backup_cow C%zd\n", start); + @@ -244,15 +241,16 @@ index 0000000..6a44974 + break; + } + -+ if (backup_get_bitmap(bs, start)) { ++ if (backup_get_bitmap(job, start)) { + continue; /* already copied */ + } + + /* we need to yield so that qemu_aio_flush() returns. + * (without, VM does not reboot) + * todo: can we avoid that? ++ * Note: use 1000 instead of 0 (0 priorize this task too much) + */ -+ co_sleep_ns(rt_clock, 0); ++ block_job_sleep_ns(&job->common, rt_clock, 1000); + if (block_job_is_cancelled(&job->common)) { + ret = -1; + break; @@ -298,10 +296,21 @@ index 0000000..6a44974 + g_free(job->bitmap); +} + ++void ++backup_job_start(BlockDriverState *bs) ++{ ++ assert(bs); ++ assert(bs->job); ++ assert(bs->job->co == NULL); ++ ++ bs->job->co = qemu_coroutine_create(backup_run); ++ qemu_coroutine_enter(bs->job->co, bs->job); ++} ++ +int -+backup_job_start(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb, -+ BlockDriverCompletionFunc *backup_complete_cb, -+ void *opaque) ++backup_job_create(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb, ++ BlockDriverCompletionFunc *backup_complete_cb, ++ void *opaque) +{ + assert(bs); + assert(backup_dump_cb); @@ -339,17 +348,15 @@ index 0000000..6a44974 + job->bitmap = g_new0(unsigned long, bitmap_size); + + job->common.len = bs->total_sectors*BDRV_SECTOR_SIZE; -+ job->common.co = qemu_coroutine_create(backup_run); -+ qemu_coroutine_enter(job->common.co, job); -+ ++ + return 0; +} diff --git a/backup.h b/backup.h new file mode 100644 -index 0000000..e1f0290 +index 0000000..87b9942 --- /dev/null +++ b/backup.h -@@ -0,0 +1,30 @@ +@@ -0,0 +1,32 @@ +/* + * QEMU backup related definitions + * @@ -375,13 +382,15 @@ index 0000000..e1f0290 +typedef int BackupDumpFunc(void *opaque, BlockDriverState *bs, + int64_t cluster_num, unsigned char *buf); + -+int backup_job_start(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb, -+ BlockDriverCompletionFunc *backup_complete_cb, -+ void *opaque); ++void backup_job_start(BlockDriverState *bs); ++ ++int backup_job_create(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb, ++ BlockDriverCompletionFunc *backup_complete_cb, ++ void *opaque); + +#endif /* QEMU_BACKUP_H */ diff --git a/block.c b/block.c -index c05875f..2f7c2eb 100644 +index c05875f..4de7fbd 100644 --- a/block.c +++ b/block.c @@ -54,6 +54,7 @@ @@ -465,7 +474,7 @@ index c05875f..2f7c2eb 100644 + if (bs->job && bs->job->job_type->before_read) { + ret = bs->job->job_type->before_read(bs, sector_num, nb_sectors, qiov); -+ if (flags & BDRV_REQ_BACKUP_ONLY) { ++ if ((ret < 0) || (flags & BDRV_REQ_BACKUP_ONLY)) { + /* Note: We do not return any data to the caller */ + goto out; + } diff --git a/debian/patches/0003-add-backup-related-monitor-commands.patch b/debian/patches/0003-add-backup-related-monitor-commands.patch index 79f5e32..6c57ee9 100644 --- a/debian/patches/0003-add-backup-related-monitor-commands.patch +++ b/debian/patches/0003-add-backup-related-monitor-commands.patch @@ -1,7 +1,7 @@ -From d97630338d62186229192d0724fae489fc500acc Mon Sep 17 00:00:00 2001 +From 7bab4498b955ced91ad838fb711e013d9b7ed81f Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Tue, 13 Nov 2012 11:27:56 +0100 -Subject: [PATCH v3 3/6] add backup related monitor commands +Subject: [PATCH v3 3/7] add backup related monitor commands We use a generic BackupDriver struct to encapsulate all archive format related function. @@ -13,22 +13,22 @@ could move the whole archive format related code out of qemu. Signed-off-by: Dietmar Maurer --- backup.h | 13 ++ - blockdev.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + blockdev.c | 396 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ hmp-commands.hx | 31 +++++ hmp.c | 63 +++++++++ hmp.h | 3 + monitor.c | 7 + qapi-schema.json | 91 +++++++++++++ qmp-commands.hx | 27 ++++ - 8 files changed, 610 insertions(+), 0 deletions(-) + 8 files changed, 631 insertions(+), 0 deletions(-) diff --git a/backup.h b/backup.h -index e1f0290..ae4aa8c 100644 +index 87b9942..c6e5d3c 100644 --- a/backup.h +++ b/backup.h -@@ -27,4 +27,17 @@ int backup_job_start(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb, - BlockDriverCompletionFunc *backup_complete_cb, - void *opaque); +@@ -29,4 +29,17 @@ int backup_job_create(BlockDriverState *bs, BackupDumpFunc *backup_dump_cb, + BlockDriverCompletionFunc *backup_complete_cb, + void *opaque); +typedef struct BackupDriver { + const char *format; @@ -45,7 +45,7 @@ index e1f0290..ae4aa8c 100644 + #endif /* QEMU_BACKUP_H */ diff --git a/blockdev.c b/blockdev.c -index e73fd6e..08ac7ad 100644 +index e73fd6e..68a8c55 100644 --- a/blockdev.c +++ b/blockdev.c @@ -20,6 +20,7 @@ @@ -56,12 +56,14 @@ index e73fd6e..08ac7ad 100644 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives); -@@ -1321,6 +1322,380 @@ void qmp_drive_mirror(const char *device, const char *target, +@@ -1321,6 +1322,401 @@ void qmp_drive_mirror(const char *device, const char *target, drive_get_ref(drive_get_by_blockdev(bs)); } +/* Backup related function */ + ++static void backup_run_next_job(void); ++ +static struct GenericBackupState { + Error *error; + uuid_t uuid; @@ -79,7 +81,6 @@ index e73fd6e..08ac7ad 100644 + +typedef struct BackupCB { + BlockDriverState *bs; -+ bool job_started; + uint8_t dev_id; + size_t size; + size_t transferred; @@ -121,14 +122,17 @@ index e73fd6e..08ac7ad 100644 + error_propagate(&backup_state.error, local_err); + backup_state.writer = NULL; + -+ GList *l = backup_state.bcb_list; -+ while (l) { -+ g_free(l->data); -+ l = g_list_next(l); -+ } -+ g_list_free(backup_state.bcb_list); -+ backup_state.bcb_list = NULL; + } ++ ++ GList *l = backup_state.bcb_list; ++ while (l) { ++ BackupCB *bcb = l->data; ++ drive_put_ref_bh_schedule(drive_get_by_blockdev(bcb->bs)); ++ g_free(l->data); ++ l = g_list_next(l); ++ } ++ g_list_free(backup_state.bcb_list); ++ backup_state.bcb_list = NULL; +} + +static void backup_complete_cb(void *opaque, int ret) @@ -148,6 +152,8 @@ index e73fd6e..08ac7ad 100644 + + if (g_list_length(backup_state.bcb_list) == 0) { + backup_cleanup(); ++ } else { ++ backup_run_next_job(); + } + + g_free(bcb); @@ -161,7 +167,7 @@ index e73fd6e..08ac7ad 100644 + while (l) { + BackupCB *bcb = l->data; + l = g_list_next(l); -+ if (bcb->bs->job && bcb->job_started) { ++ if (bcb->bs->job) { + block_job_cancel(bcb->bs->job); + job_count++; + } @@ -181,6 +187,15 @@ index e73fd6e..08ac7ad 100644 + backup_cancel(); +} + ++static void backup_run_next_job(void) ++{ ++ GList *l = backup_state.bcb_list; ++ if (l) { ++ BackupCB *bcb = l->data; ++ backup_job_start(bcb->bs); ++ } ++} ++ +static void backup_start_jobs(void) +{ + /* start all jobs (one for each device) */ @@ -189,20 +204,15 @@ index e73fd6e..08ac7ad 100644 + BackupCB *bcb = l->data; + l = g_list_next(l); + -+ if (backup_job_start(bcb->bs, backup_dump_cb, backup_complete_cb, -+ bcb) == 0) { -+ bcb->job_started = true; -+ /* Grab a reference so hotplug does not delete the -+ * BlockDriverState from underneath us. -+ */ -+ drive_get_ref(drive_get_by_blockdev(bcb->bs)); -+ } else { -+ if (!backup_state.error) { -+ error_setg(&backup_state.error, "backup_job_start failed"); -+ } ++ if (backup_job_create(bcb->bs, backup_dump_cb, backup_complete_cb, ++ bcb) != 0) { + backup_cancel(); -+ return; -+ } ++ break; ++ } ++ } ++ ++ if (!l) { /* no errors */ ++ backup_run_next_job(); + } +} + @@ -286,7 +296,8 @@ index e73fd6e..08ac7ad 100644 + + uuid_generate(uuid); + -+ writer = driver->open_cb(backupfile, uuid, speed, &local_err); ++ writer = driver->open_cb(backupfile, uuid, has_speed ? speed : 0, ++ &local_err); + if (!writer) { + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); @@ -361,6 +372,16 @@ index e73fd6e..08ac7ad 100644 + backup_state.transferred = 0; + backup_state.zero_bytes = 0; + ++ /* Grab a reference so hotplug does not delete the ++ * BlockDriverState from underneath us. ++ */ ++ l = bcblist; ++ while (l) { ++ BackupCB *bcb = l->data; ++ l = g_list_next(l); ++ drive_get_ref(drive_get_by_blockdev(bcb->bs)); ++ } ++ + backup_start_jobs(); + + return g_strdup(backup_state.uuid_str); @@ -487,7 +508,7 @@ index 010b8c9..57be357 100644 show current migration capabilities @item info migrate_cache_size diff --git a/hmp.c b/hmp.c -index 180ba2b..77076fa 100644 +index 180ba2b..27fd421 100644 --- a/hmp.c +++ b/hmp.c @@ -130,6 +130,38 @@ void hmp_info_mice(Monitor *mon) @@ -554,8 +575,8 @@ index 180ba2b..77076fa 100644 + + Error *errp = NULL; + -+ qmp_backup(backupfile, true, BACKUP_FORMAT_VMA, false, NULL, !!devlist, -+ devlist, qdict_haskey(qdict, "speed"), speed, &errp); ++ qmp_backup(backupfile, true, BACKUP_FORMAT_VMA, false, NULL, !!devlist, ++ devlist, qdict_haskey(qdict, "speed"), speed, &errp); + + if (error_is_set(&errp)) { + monitor_printf(mon, "%s\n", error_get_pretty(errp)); diff --git a/debian/patches/0004-introduce-new-vma-archive-format.patch b/debian/patches/0004-introduce-new-vma-archive-format.patch index 2c24f26..29fa10e 100644 --- a/debian/patches/0004-introduce-new-vma-archive-format.patch +++ b/debian/patches/0004-introduce-new-vma-archive-format.patch @@ -1,7 +1,7 @@ -From 5fbe9dc9bb921a1ee4814028d42c4fc46b04e172 Mon Sep 17 00:00:00 2001 +From 77a6564c9985c37990c3e90f558f2ef48d6d5f15 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Tue, 13 Nov 2012 11:11:38 +0100 -Subject: [PATCH v3 4/6] introduce new vma archive format +Subject: [PATCH v3 4/7] introduce new vma archive format This is a very simple archive format, see docs/specs/vma_spec.txt @@ -11,11 +11,11 @@ Signed-off-by: Dietmar Maurer Makefile.objs | 2 +- blockdev.c | 6 +- docs/specs/vma_spec.txt | 24 ++ - vma-reader.c | 801 +++++++++++++++++++++++++++++++++++++++++ - vma-writer.c | 920 +++++++++++++++++++++++++++++++++++++++++++++++ - vma.c | 558 ++++++++++++++++++++++++++++ + vma-reader.c | 801 ++++++++++++++++++++++++++++++++++++++++ + vma-writer.c | 931 +++++++++++++++++++++++++++++++++++++++++++++++ + vma.c | 561 ++++++++++++++++++++++++++++ vma.h | 146 ++++++++ - 8 files changed, 2456 insertions(+), 4 deletions(-) + 8 files changed, 2470 insertions(+), 4 deletions(-) create mode 100644 docs/specs/vma_spec.txt create mode 100644 vma-reader.c create mode 100644 vma-writer.c @@ -57,7 +57,7 @@ index cb46be5..b5732e2 100644 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o diff --git a/blockdev.c b/blockdev.c -index 08ac7ad..e16091f 100644 +index 68a8c55..80cb04d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -21,6 +21,7 @@ @@ -68,7 +68,7 @@ index 08ac7ad..e16091f 100644 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives); -@@ -1483,10 +1484,11 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, +@@ -1493,10 +1494,11 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, /* Todo: try to auto-detect format based on file name */ format = has_format ? format : BACKUP_FORMAT_VMA; @@ -114,7 +114,7 @@ index 0000000..052c629 + diff --git a/vma-reader.c b/vma-reader.c new file mode 100644 -index 0000000..b6a550b +index 0000000..2217a94 --- /dev/null +++ b/vma-reader.c @@ -0,0 +1,801 @@ @@ -649,7 +649,7 @@ index 0000000..b6a550b + return 0; +} +static int restore_extent(VmaReader *vmar, unsigned char *buf, -+ int extent_size, int vmstate_fd, ++ int extent_size, int vmstate_fd, + bool verbose, Error **errp) +{ + assert(vmar); @@ -703,7 +703,7 @@ index 0000000..b6a550b + time_t duration = time(NULL) - vmar->start_time; + int percent = (vmar->clusters_read*100)/vmar->cluster_count; + if (percent != vmar->clusters_read_per) { -+ printf("progress %d%% (read %zd bytes, duration %zd sec)\n", ++ printf("progress %d%% (read %zd bytes, duration %zd sec)\n", + percent, vmar->clusters_read*VMA_CLUSTER_SIZE, + duration); + fflush(stdout); @@ -802,7 +802,7 @@ index 0000000..b6a550b +} + +int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose, -+ Error **errp) ++ Error **errp) +{ + assert(vmar); + assert(vmar->head_data); @@ -868,7 +868,7 @@ index 0000000..b6a550b + return -1; + } + -+ if (restore_extent(vmar, buf, extent_size, vmstate_fd, verbose, ++ if (restore_extent(vmar, buf, extent_size, vmstate_fd, verbose, + errp) < 0) { + return -1; + } @@ -921,10 +921,10 @@ index 0000000..b6a550b + diff --git a/vma-writer.c b/vma-writer.c new file mode 100644 -index 0000000..c1e1afe +index 0000000..688af4b --- /dev/null +++ b/vma-writer.c -@@ -0,0 +1,920 @@ +@@ -0,0 +1,931 @@ +/* + * VMA: Virtual Machine Archive + * @@ -996,7 +996,6 @@ index 0000000..c1e1afe + CoMutex flush_lock; + Coroutine *co_writer; + RateLimit limit; -+ uint64_t delay_ns; + + /* drive informations */ + VmaStreamInfo stream_info[256]; @@ -1314,22 +1313,22 @@ index 0000000..c1e1afe + } else { + struct stat st; + int oflags; -+ const char *tmp_id_str; ++ const char *tmp_id_str; + + if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) { + oflags = O_NONBLOCK|O_WRONLY; -+ vmaw->fd = qemu_open(filename, oflags, 0644); ++ vmaw->fd = qemu_open(filename, oflags, 0644); + } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) { + oflags = O_NONBLOCK|O_WRONLY; + vmaw->fd = qemu_open(filename, oflags, 0644); -+ } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) { ++ } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) { + vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp); + if (vmaw->fd < 0) { + goto err; + } -+ } else { ++ } else { + oflags = O_NONBLOCK|O_WRONLY|O_CREAT|O_EXCL; -+ vmaw->fd = qemu_open(filename, oflags, 0644); ++ vmaw->fd = qemu_open(filename, oflags, 0644); + } + + if (vmaw->fd < 0) { @@ -1579,24 +1578,31 @@ index 0000000..c1e1afe + return vmaw->status; +} + -+static int vma_writer_get_buffer(VmaWriter *vmaw) ++static int vma_writer_get_buffer(VmaWriter *vmaw, size_t bytes) +{ ++ int ret = 0; + -+ /* wait until buffer is available */ -+ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) { -+ int res = 0; ++ qemu_co_mutex_lock(&vmaw->flush_lock); + -+ qemu_co_mutex_lock(&vmaw->flush_lock); -+ res = vma_writer_flush(vmaw); -+ qemu_co_mutex_unlock(&vmaw->flush_lock); ++ /* rate limit */ ++ uint64_t delay_ns = ratelimit_calculate_delay(&vmaw->limit, bytes); ++ if (delay_ns) { ++ DPRINTF("DELAY %zd\n", delay_ns); ++ co_sleep_ns(rt_clock, delay_ns); ++ } + -+ if (res < 0) { ++ /* wait until buffer is available */ ++ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) { ++ ret = vma_writer_flush(vmaw); ++ if (ret < 0) { + vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed"); -+ return -1; ++ break; + } + } + -+ return 0; ++ qemu_co_mutex_unlock(&vmaw->flush_lock); ++ ++ return ret; +} + + @@ -1643,34 +1649,39 @@ index 0000000..c1e1afe + return -1; + } + ++ /* detect block containing zeroes */ ++ int i; ++ int bit = 1; ++ uint16_t mask = 0; ++ size_t real_size = 0; ++ for (i = 0; i < 16; i++) { ++ unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE); ++ if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) { ++ mask |= bit; ++ real_size += VMA_BLOCK_SIZE; ++ } ++ bit = bit << 1; ++ } ++ + /* wait until buffer is available */ -+ if (vma_writer_get_buffer(vmaw) < 0) { ++ if (vma_writer_get_buffer(vmaw, real_size) < 0) { + vma_writer_set_error(vmaw, "vma_writer_write: " + "vma_writer_get_buffer failed"); + return -1; + } + -+ DPRINTF("VMA WRITE %zd\n", cluster_num); ++ DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num); + -+ int i; -+ int bit = 1; -+ uint16_t mask = 0; ++ bit = 1; + for (i = 0; i < 16; i++) { + unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE); -+ if (buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) { ++ if (mask & bit) { ++ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock, VMA_BLOCK_SIZE); ++ vmaw->outbuf_pos += VMA_BLOCK_SIZE; ++ } else { + DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i); + vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE; + *zero_bytes += VMA_BLOCK_SIZE; -+ } else { -+ mask |= bit; -+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock, VMA_BLOCK_SIZE); -+ vmaw->outbuf_pos += VMA_BLOCK_SIZE; -+ -+ vmaw->delay_ns = ratelimit_calculate_delay(&vmaw->limit, -+ VMA_BLOCK_SIZE); -+ if (vmaw->delay_ns) { -+ co_sleep_ns(rt_clock, vmaw->delay_ns); -+ } + } + + bit = bit << 1; @@ -1679,7 +1690,7 @@ index 0000000..c1e1afe + uint64_t block_info = ((uint64_t)mask) << (32+16); + block_info |= ((uint64_t)dev_id) << 32; + block_info |= (cluster_num & 0xffffffff); -+ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info; ++ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info; + + DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info); + @@ -1847,10 +1858,10 @@ index 0000000..c1e1afe + diff --git a/vma.c b/vma.c new file mode 100644 -index 0000000..d50a312 +index 0000000..9b47b92 --- /dev/null +++ b/vma.c -@@ -0,0 +1,558 @@ +@@ -0,0 +1,561 @@ +/* + * VMA: Virtual Machine Archive + * @@ -2015,7 +2026,7 @@ index 0000000..d50a312 + case 'r': + readmap = optarg; + break; -+ case 'v': ++ case 'v': + verbose = 1; + break; + default: @@ -2316,9 +2327,12 @@ index 0000000..d50a312 + bcb->vmaw = vmaw; + bcb->dev_id = dev_id; + -+ if (backup_job_start(bs, backup_dump_cb, backup_complete_cb, bcb) < 0) { ++ if (backup_job_create(bs, backup_dump_cb, backup_complete_cb, ++ bcb) < 0) { + unlink(archivename); + g_error("backup_job_start failed"); ++ } else { ++ backup_job_start(bs); + } + } + @@ -2411,7 +2425,7 @@ index 0000000..d50a312 +} diff --git a/vma.h b/vma.h new file mode 100644 -index 0000000..f30a2bc +index 0000000..689e639 --- /dev/null +++ b/vma.h @@ -0,0 +1,146 @@ @@ -2557,8 +2571,8 @@ index 0000000..f30a2bc +int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, + BlockDriverState *bs, bool write_zeroes, + Error **errp); -+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose, -+ Error **errp); ++int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose, ++ Error **errp); + +#endif /* BACKUP_VMA_H */ -- diff --git a/debian/patches/0005-add-regression-tests-for-backup.patch b/debian/patches/0005-add-regression-tests-for-backup.patch index 0e52839..7f9545a 100644 --- a/debian/patches/0005-add-regression-tests-for-backup.patch +++ b/debian/patches/0005-add-regression-tests-for-backup.patch @@ -1,15 +1,17 @@ -From 6a409b2401390d672b7f2a105d8446475fa5bee2 Mon Sep 17 00:00:00 2001 +From 37ec0e0badcbf3cc6d3959f1867b8a1d6db3be13 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Wed, 14 Nov 2012 09:57:04 +0100 -Subject: [PATCH v3 5/6] add regression tests for backup +Subject: [PATCH v3 5/7] add regression tests for backup Simple regression tests using vma-reader and vma-writer. +Note: the call to g_thread_init() solves problems with g_slice_alloc() - without that call we get arbitrary crashes. + Signed-off-by: Dietmar Maurer --- tests/Makefile | 11 +- - tests/backup-test.c | 511 +++++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 520 insertions(+), 2 deletions(-) + tests/backup-test.c | 515 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 524 insertions(+), 2 deletions(-) create mode 100644 tests/backup-test.c diff --git a/tests/Makefile b/tests/Makefile @@ -52,10 +54,10 @@ index b60f0fb..cffbd22 100644 -include $(wildcard tests/*.d) diff --git a/tests/backup-test.c b/tests/backup-test.c new file mode 100644 -index 0000000..b14e9a0 +index 0000000..813590e --- /dev/null +++ b/tests/backup-test.c -@@ -0,0 +1,511 @@ +@@ -0,0 +1,515 @@ +/* + * QEMU backup test suit + * @@ -430,8 +432,10 @@ index 0000000..b14e9a0 + bcb.vmaw = vmaw; + bcb.dev_id = vma_writer_register_stream(vmaw, bdrv_get_device_name(bs), + bdrv_getlength(bs)); -+ if (backup_job_start(bs, backup_dump_cb, backup_complete_cb, &bcb) < 0) { -+ g_error("backup_job_start failed"); ++ if (backup_job_create(bs, backup_dump_cb, backup_complete_cb, &bcb) < 0) { ++ g_error("backup_job_create failed"); ++ } else { ++ backup_job_start(bs); + } + + request_term = false; @@ -511,6 +515,8 @@ index 0000000..b14e9a0 +{ + int c; + ++ g_thread_init(NULL); ++ + for (;;) { + c = getopt(argc, argv, "hdl"); + if (c == -1) { diff --git a/debian/patches/0006-add-vm-state-to-backups.patch b/debian/patches/0006-add-vm-state-to-backups.patch index 9bf11a4..5842e1e 100644 --- a/debian/patches/0006-add-vm-state-to-backups.patch +++ b/debian/patches/0006-add-vm-state-to-backups.patch @@ -1,18 +1,18 @@ -From ea2827e070e10b515be9b05628e372d281fcf868 Mon Sep 17 00:00:00 2001 +From 9c8e193692a2f96116793ddbe3fa89650985f272 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Thu, 29 Nov 2012 10:46:49 +0100 -Subject: [PATCH v3 6/6] add vm state to backups +Subject: [PATCH v3 6/7] add vm state to backups Signed-off-by: Dietmar Maurer --- blockdev.c | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++++- - hmp.c | 5 +- + hmp.c | 3 +- qapi-schema.json | 6 +- - 3 files changed, 201 insertions(+), 6 deletions(-) + 3 files changed, 200 insertions(+), 5 deletions(-) diff --git a/blockdev.c b/blockdev.c -index e16091f..a5e2058 100644 +index 80cb04d..96d7a3b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -22,6 +22,8 @@ @@ -24,7 +24,7 @@ index e16091f..a5e2058 100644 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives); -@@ -1338,6 +1340,10 @@ static struct GenericBackupState { +@@ -1340,6 +1342,10 @@ static struct GenericBackupState { size_t total; size_t transferred; size_t zero_bytes; @@ -35,7 +35,7 @@ index e16091f..a5e2058 100644 } backup_state; typedef struct BackupCB { -@@ -1469,10 +1475,170 @@ static void backup_start_jobs(void) +@@ -1479,10 +1485,170 @@ static void backup_start_jobs(void) } } @@ -167,7 +167,7 @@ index e16091f..a5e2058 100644 + err = g_strdup("backup_start_savevm: complete_cb failed"); + goto abort; + } -+ backup_start_jobs(); ++ backup_run_next_job(); + goto out; + } + } @@ -207,7 +207,7 @@ index e16091f..a5e2058 100644 { BlockDriverState *bs; Error *local_err = NULL; -@@ -1481,6 +1647,8 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, +@@ -1491,6 +1657,8 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, gchar **devs = NULL; GList *bcblist = NULL; @@ -216,7 +216,7 @@ index e16091f..a5e2058 100644 /* Todo: try to auto-detect format based on file name */ format = has_format ? format : BACKUP_FORMAT_VMA; -@@ -1561,6 +1729,22 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, +@@ -1572,6 +1740,22 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, size_t total = 0; /* register all devices for vma writer */ @@ -239,7 +239,7 @@ index e16091f..a5e2058 100644 l = bcblist; while (l) { BackupCB *bcb = l->data; -@@ -1624,8 +1808,16 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, +@@ -1635,6 +1819,9 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, backup_state.total = total; backup_state.transferred = 0; backup_state.zero_bytes = 0; @@ -247,32 +247,36 @@ index e16091f..a5e2058 100644 + backup_state.buf_cluster_num = 0; + backup_state.vmstate_dev_id = vmstate_dev_id; + /* Grab a reference so hotplug does not delete the + * BlockDriverState from underneath us. +@@ -1646,7 +1833,12 @@ char *qmp_backup(const char *backupfile, bool has_format, BackupFormat format, + drive_get_ref(drive_get_by_blockdev(bcb->bs)); + } + - backup_start_jobs(); + if (save_state) { + Coroutine *co = qemu_coroutine_create(backup_start_savevm); + qemu_coroutine_enter(co, NULL); + } else { -+ backup_start_jobs(); ++ backup_start_jobs(); + } return g_strdup(backup_state.uuid_str); diff --git a/hmp.c b/hmp.c -index 77076fa..61ba5c3 100644 +index 27fd421..56e1cdc 100644 --- a/hmp.c +++ b/hmp.c -@@ -1031,8 +1031,9 @@ void hmp_backup(Monitor *mon, const QDict *qdict) +@@ -1031,7 +1031,8 @@ void hmp_backup(Monitor *mon, const QDict *qdict) Error *errp = NULL; - qmp_backup(backupfile, true, BACKUP_FORMAT_VMA, false, NULL, !!devlist, -- devlist, qdict_haskey(qdict, "speed"), speed, &errp); -- -+ devlist, qdict_haskey(qdict, "speed"), speed, false, false, + qmp_backup(backupfile, true, BACKUP_FORMAT_VMA, false, NULL, !!devlist, +- devlist, qdict_haskey(qdict, "speed"), speed, &errp); ++ devlist, qdict_haskey(qdict, "speed"), speed, false, false, + &errp); -+ + if (error_is_set(&errp)) { monitor_printf(mon, "%s\n", error_get_pretty(errp)); - error_free(errp); diff --git a/qapi-schema.json b/qapi-schema.json index 2d3699b..3f4889e 100644 --- a/qapi-schema.json diff --git a/debian/patches/0007-use-extra-thread-for-vma-writer.patch b/debian/patches/0007-use-extra-thread-for-vma-writer.patch new file mode 100644 index 0000000..f7836c1 --- /dev/null +++ b/debian/patches/0007-use-extra-thread-for-vma-writer.patch @@ -0,0 +1,451 @@ +From 9cf799746fb3b21362fd62574cd76bfa14a24070 Mon Sep 17 00:00:00 2001 +From: Dietmar Maurer +Date: Mon, 14 Jan 2013 08:05:40 +0100 +Subject: [PATCH v3 7/7] use extra thread for vma writer + +The previous AIO approach has problem with bdrv_drain_all(), because writer +coroutines are not considered there. Those coroutines are not restarted, so +bdrv_drain_all() can fail (tracked_requests list not empty). + +We now use a thread, so we could also add compression here. + +Signed-off-by: Dietmar Maurer +--- + vma-writer.c | 296 +++++++++++++++++++++++++++++++++++----------------------- + 1 files changed, 180 insertions(+), 116 deletions(-) + +diff --git a/vma-writer.c b/vma-writer.c +index 688af4b..e18591e 100644 +--- a/vma-writer.c ++++ b/vma-writer.c +@@ -38,13 +38,20 @@ + + #define WRITE_BUFFERS 5 + +-typedef struct VmaAIOCB VmaAIOCB; +-struct VmaAIOCB { +- VmaWriter *vmaw; ++typedef struct WriteBuffer { + unsigned char buffer[VMA_MAX_EXTENT_SIZE]; + size_t bytes; +- Coroutine *co; +-}; ++} WriteBuffer; ++ ++typedef struct WriterThread { ++ int fd; ++ int error; ++ bool cancel; ++ GThread *thread; ++ GMutex *mutex; ++ GCond *change_cond; ++ WriteBuffer wbuf[WRITE_BUFFERS]; ++} WriterThread; + + struct VmaWriter { + int fd; +@@ -61,8 +68,7 @@ struct VmaWriter { + int outbuf_count; /* in VMA_BLOCKS */ + uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT]; + +- VmaAIOCB aiocbs[WRITE_BUFFERS]; +- CoQueue wqueue; ++ WriterThread wt; + + GChecksum *md5csum; + CoMutex writer_lock; +@@ -88,6 +94,80 @@ struct VmaWriter { + uint32_t config_count; + }; + ++static gpointer vma_writer_thread(gpointer data) ++{ ++ WriterThread *wt = (WriterThread *)data; ++ ++ while (1) { ++ WriteBuffer *b = NULL; ++ ++ g_mutex_lock(wt->mutex); ++ int i; ++ for (i = 0; i < WRITE_BUFFERS; i++) { ++ if (wt->wbuf[i].bytes) { ++ b = &wt->wbuf[i]; ++ break; ++ } ++ } ++ g_mutex_unlock(wt->mutex); ++ ++ if (b) { ++ size_t done = 0; ++ while (done < b->bytes) { ++ int ret = write(wt->fd, b->buffer + done, b->bytes - done); ++ if (ret > 0) { ++ done += ret; ++ } else if (ret < 0) { ++ if (!(errno == EAGAIN || errno == EWOULDBLOCK)) { ++ g_mutex_lock(wt->mutex); ++ wt->error = errno; ++ g_mutex_unlock(wt->mutex); ++ break; ++ } ++ } else if (ret == 0) { ++ /* should not happen - simply try again */ ++ } ++ } ++ g_mutex_lock(wt->mutex); ++ b->bytes = 0; ++ g_mutex_unlock(wt->mutex); ++ } ++ ++ if (wt->error) { ++ DPRINTF("WRITER THREAD ERROR %d - exit thread\n", wt->error); ++ g_thread_exit(NULL); ++ } ++ ++ g_mutex_lock(wt->mutex); ++ bool cancel = wt->cancel; ++ if (!b && !cancel) { ++ g_cond_wait(wt->change_cond, wt->mutex); ++ } ++ g_mutex_unlock(wt->mutex); ++ ++ if (cancel) { ++ DPRINTF("END WRITER THREAD\n"); ++ g_thread_exit(NULL); ++ } ++ } ++ ++ return NULL; ++} ++ ++static void vma_stop_writer_thread(VmaWriter *vmaw) ++{ ++ assert(vmaw); ++ ++ if (vmaw->wt.thread) { ++ g_mutex_lock(vmaw->wt.mutex); ++ vmaw->wt.cancel = true; ++ g_cond_signal(vmaw->wt.change_cond); ++ g_mutex_unlock(vmaw->wt.mutex); ++ g_thread_join(vmaw->wt.thread); ++ vmaw->wt.thread = NULL; ++ } ++} ++ + void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) + { + va_list ap; +@@ -215,111 +295,47 @@ int vma_writer_register_stream(VmaWriter *vmaw, const char *devname, + return n; + } + +-static void vma_co_continue_write(void *opaque) +-{ +- VmaWriter *vmaw = opaque; +- +- qemu_aio_set_fd_handler(vmaw->fd, NULL, NULL, NULL, NULL); +- +- DPRINTF("vma_co_continue_write\n"); +- qemu_coroutine_enter(vmaw->co_writer, NULL); +-} +- +-static ssize_t coroutine_fn +-vma_co_write(VmaWriter *vmaw, const void *buf, size_t bytes) +-{ +- size_t done = 0; +- ssize_t ret; +- +- /* atomic writes (we cannot interleave writes) */ +- qemu_co_mutex_lock(&vmaw->writer_lock); +- +- DPRINTF("vma_co_write enter %zd\n", bytes); +- +- while (done < bytes) { +- ret = write(vmaw->fd, buf + done, bytes - done); +- if (ret > 0) { +- done += ret; +- DPRINTF("vma_co_write written %zd %zd\n", done, ret); +- } else if (ret < 0) { +- if (errno == EAGAIN || errno == EWOULDBLOCK) { +- DPRINTF("vma_co_write yield %zd\n", done); +- +- vmaw->co_writer = qemu_coroutine_self(); +- qemu_aio_set_fd_handler(vmaw->fd, NULL, vma_co_continue_write, +- NULL, vmaw); +- +- qemu_coroutine_yield(); +- DPRINTF("vma_co_write restart %zd\n", done); +- } else { +- vma_writer_set_error(vmaw, "vma_co_write write error - %s", +- strerror(errno)); +- done = -1; /* always return failure for partial writes */ +- break; +- } +- } else if (ret == 0) { +- /* should not happen - simply try again */ +- } +- } +- +- qemu_co_mutex_unlock(&vmaw->writer_lock); +- +- DPRINTF("vma_co_write leave %zd\n", done); +- return done; +-} +- +-static void coroutine_fn vma_co_writer_task(void *opaque) +-{ +- VmaAIOCB *cb = opaque; +- +- DPRINTF("vma_co_writer_task start\n"); +- +- int64_t done = vma_co_write(cb->vmaw, cb->buffer, cb->bytes); +- DPRINTF("vma_co_writer_task write done %zd\n", done); +- +- if (done != cb->bytes) { +- DPRINTF("vma_co_writer_task failed write %zd %zd", cb->bytes, done); +- vma_writer_set_error(cb->vmaw, "vma_co_writer_task failed write %zd", +- done); +- } +- +- cb->bytes = 0; +- +- qemu_co_queue_next(&cb->vmaw->wqueue); +- +- DPRINTF("vma_co_writer_task end\n"); +-} +- + static void coroutine_fn vma_queue_flush(VmaWriter *vmaw) + { + DPRINTF("vma_queue_flush enter\n"); + + assert(vmaw); + ++ int error; ++ + while (1) { + int i; +- VmaAIOCB *cb = NULL; ++ WriteBuffer *b = NULL; ++ g_mutex_lock(vmaw->wt.mutex); ++ ++ error = vmaw->wt.error; ++ + for (i = 0; i < WRITE_BUFFERS; i++) { +- if (vmaw->aiocbs[i].bytes) { +- cb = &vmaw->aiocbs[i]; +- DPRINTF("FOUND USED AIO BUFFER %d %zd\n", i, +- vmaw->aiocbs[i].bytes); ++ if (vmaw->wt.wbuf[i].bytes) { ++ b = &vmaw->wt.wbuf[i]; ++ DPRINTF("FOUND USED WRITE BUFFER %d %zd\n", i, ++ vmaw->wt.wbuf[i].bytes); + break; + } + } +- if (!cb) { ++ g_mutex_unlock(vmaw->wt.mutex); ++ ++ if (!b || error) { + break; + } +- qemu_co_queue_wait(&vmaw->wqueue); ++ uint64_t delay_ns = 10000; ++ DPRINTF("WAIT FOR BUFFER FLUSH %zd\n", delay_ns); ++ co_sleep_ns(rt_clock, delay_ns); ++ } ++ ++ if (error) { ++ vma_writer_set_error(vmaw, "vma_queue_flush write error - %s", ++ strerror(error)); + } + + DPRINTF("vma_queue_flush leave\n"); + } + +-/** +- * NOTE: pipe buffer size in only 4096 bytes on linux (see 'ulimit -a') +- * So we need to create a coroutione to allow 'parallel' execution. +- */ + static ssize_t coroutine_fn + vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes) + { +@@ -329,29 +345,46 @@ vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes) + assert(buf); + assert(bytes <= VMA_MAX_EXTENT_SIZE); + +- VmaAIOCB *cb = NULL; +- while (!cb) { ++ int error = 0; ++ ++ /* wait for a free output buffer */ ++ g_mutex_lock(vmaw->wt.mutex); ++ WriteBuffer *b = NULL; ++ while (!b) { ++ error = vmaw->wt.error; ++ if (error) { ++ g_mutex_unlock(vmaw->wt.mutex); ++ vma_writer_set_error(vmaw, "vma_queue_write error - %s", ++ strerror(error)); ++ return -1; ++ } ++ + int i; + for (i = 0; i < WRITE_BUFFERS; i++) { +- if (!vmaw->aiocbs[i].bytes) { +- cb = &vmaw->aiocbs[i]; ++ if (!vmaw->wt.wbuf[i].bytes) { ++ b = &vmaw->wt.wbuf[i]; + break; + } + } +- if (!cb) { +- qemu_co_queue_wait(&vmaw->wqueue); ++ if (!b) { ++ uint64_t delay_ns = 10000; ++ DPRINTF("WAIT FOR BUFFER %zd\n", delay_ns); ++ g_mutex_unlock(vmaw->wt.mutex); ++ co_sleep_ns(rt_clock, delay_ns); ++ g_mutex_lock(vmaw->wt.mutex); + } + } + +- memcpy(cb->buffer, buf, bytes); +- cb->bytes = bytes; +- cb->vmaw = vmaw; ++ /* copy data to output buffer */ ++ memcpy(b->buffer, buf, bytes); ++ b->bytes = bytes; ++ ++ /* signal writer thread that we have new data */ ++ g_cond_signal(vmaw->wt.change_cond); + +- DPRINTF("vma_queue_write start %zd\n", bytes); +- cb->co = qemu_coroutine_create(vma_co_writer_task); +- qemu_coroutine_enter(cb->co, cb); ++ g_mutex_unlock(vmaw->wt.mutex); + +- DPRINTF("vma_queue_write leave\n"); ++ DPRINTF("vma_queue_write queued %zd\n", bytes); + + return bytes; + } +@@ -389,10 +422,10 @@ VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed, + const char *tmp_id_str; + + if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) { +- oflags = O_NONBLOCK|O_WRONLY; ++ oflags = O_WRONLY; + vmaw->fd = qemu_open(filename, oflags, 0644); + } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) { +- oflags = O_NONBLOCK|O_WRONLY; ++ oflags = O_WRONLY; + vmaw->fd = qemu_open(filename, oflags, 0644); + } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) { + vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp); +@@ -400,7 +433,7 @@ VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed, + goto err; + } + } else { +- oflags = O_NONBLOCK|O_WRONLY|O_CREAT|O_EXCL; ++ oflags = O_WRONLY|O_CREAT|O_EXCL; + vmaw->fd = qemu_open(filename, oflags, 0644); + } + +@@ -418,7 +451,6 @@ VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed, + + qemu_co_mutex_init(&vmaw->writer_lock); + qemu_co_mutex_init(&vmaw->flush_lock); +- qemu_co_queue_init(&vmaw->wqueue); + + uuid_copy(vmaw->uuid, uuid); + +@@ -428,6 +460,15 @@ VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed, + + ratelimit_set_speed(&vmaw->limit, speed, 100000000ULL /* 0.1 sec */); + ++ vmaw->wt.mutex = g_mutex_new(); ++ vmaw->wt.change_cond = g_cond_new(); ++ vmaw->wt.fd = vmaw->fd; ++ vmaw->wt.thread = g_thread_create(vma_writer_thread, &vmaw->wt, true, NULL); ++ if (vmaw->wt.thread == NULL) { ++ error_setg(errp, "can't allocate writer thread\n"); ++ goto err; ++ } ++ + return vmaw; + + err: +@@ -442,6 +483,14 @@ err: + g_checksum_free(vmaw->md5csum); + } + ++ if (vmaw->wt.mutex) { ++ g_mutex_free(vmaw->wt.mutex); ++ } ++ ++ if (vmaw->wt.change_cond) { ++ g_cond_free(vmaw->wt.change_cond); ++ } ++ + g_free(vmaw); + } + +@@ -688,6 +737,16 @@ vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num, + + *zero_bytes = 0; + ++ g_mutex_lock(vmaw->wt.mutex); ++ int error = vmaw->wt.error; ++ g_mutex_unlock(vmaw->wt.mutex); ++ ++ if (error) { ++ vma_writer_set_error(vmaw, "vma_writer_get_buffer write error - %s", ++ strerror(error)); ++ return -1; ++ } ++ + if (vmaw->status < 0) { + return vmaw->status; + } +@@ -801,11 +860,7 @@ int vma_writer_close(VmaWriter *vmaw, Error **errp) + + vma_queue_flush(vmaw); + +- /* this should not happen - just to be sure */ +- while (!qemu_co_queue_empty(&vmaw->wqueue)) { +- DPRINTF("vma_writer_close wait\n"); +- co_sleep_ns(rt_clock, 1000000); +- } ++ vma_stop_writer_thread(vmaw); + + if (vmaw->cmd) { + if (pclose(vmaw->cmd) < 0) { +@@ -851,8 +906,9 @@ void vma_writer_destroy(VmaWriter *vmaw) + { + assert(vmaw); + +- int i; ++ vma_stop_writer_thread(vmaw); + ++ int i; + for (i = 0; i <= 255; i++) { + if (vmaw->stream_info[i].devname) { + g_free(vmaw->stream_info[i].devname); +@@ -863,6 +919,14 @@ void vma_writer_destroy(VmaWriter *vmaw) + g_checksum_free(vmaw->md5csum); + } + ++ if (vmaw->wt.mutex) { ++ g_mutex_free(vmaw->wt.mutex); ++ } ++ ++ if (vmaw->wt.change_cond) { ++ g_cond_free(vmaw->wt.change_cond); ++ } ++ + g_free(vmaw); + } + +-- +1.7.2.5 + diff --git a/debian/patches/series b/debian/patches/series index 87b2068..18243d8 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -18,6 +18,7 @@ enable-kvm-by-default.patch 0004-introduce-new-vma-archive-format.patch 0005-add-regression-tests-for-backup.patch 0006-add-vm-state-to-backups.patch +0007-use-extra-thread-for-vma-writer.patch virtio-balloon-drop-old-stats-code.patch virtio-balloon-re-enable-balloon-stats.patch virtio-balloon-document-stats.patch -- 2.39.2