]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0022-PVE-Deprecated-adding-old-vma-files.patch
update qemu submodule to v4.0.0
[pve-qemu.git] / debian / patches / pve / 0022-PVE-Deprecated-adding-old-vma-files.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
67af0fa4
WB
2From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3Date: Mon, 7 Aug 2017 08:51:16 +0200
53e83913 4Subject: [PATCH] PVE: [Deprecated] adding old vma files
95259824 5
53e83913 6TODO: Move to using a libvma block backend
95259824 7---
67af0fa4
WB
8 Makefile | 3 +-
9 Makefile.objs | 1 +
53e83913 10 block/backup.c | 107 ++++--
67af0fa4 11 block/replication.c | 1 +
53e83913 12 blockdev.c | 208 +++++++----
67af0fa4 13 include/block/block_int.h | 4 +
53e83913 14 job.c | 3 +-
67af0fa4
WB
15 vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++
16 vma-writer.c | 771 +++++++++++++++++++++++++++++++++++++++++
6838f038
WB
17 vma.c | 756 ++++++++++++++++++++++++++++++++++++++++
18 vma.h | 150 ++++++++
53e83913 19 11 files changed, 2754 insertions(+), 107 deletions(-)
95259824
WB
20 create mode 100644 vma-reader.c
21 create mode 100644 vma-writer.c
22 create mode 100644 vma.c
23 create mode 100644 vma.h
24
25diff --git a/Makefile b/Makefile
53e83913 26index 2da686be33..5a0aad2004 100644
95259824
WB
27--- a/Makefile
28+++ b/Makefile
53e83913 29@@ -436,7 +436,7 @@ dummy := $(call unnest-vars,, \
6838f038 30
95259824 31 include $(SRC_PATH)/tests/Makefile.include
95259824
WB
32
33-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
34+all: $(DOCS) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all modules
35
36 qemu-version.h: FORCE
37 $(call quiet-command, \
53e83913 38@@ -537,6 +537,7 @@ qemu-img.o: qemu-img-cmds.h
a544966d
WB
39 qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
40 qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
41 qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
42+vma$(EXESUF): vma.o vma-reader.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
95259824 43
a544966d 44 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
95259824
WB
45
46diff --git a/Makefile.objs b/Makefile.objs
53e83913 47index a836ee87d7..92c7886dee 100644
95259824
WB
48--- a/Makefile.objs
49+++ b/Makefile.objs
53e83913 50@@ -70,6 +70,7 @@ block-obj-y += block.o blockjob.o job.o
6838f038 51 block-obj-y += block/ scsi/
95259824 52 block-obj-y += qemu-io-cmds.o
a544966d 53 block-obj-$(CONFIG_REPLICATION) += replication.o
95259824
WB
54+block-obj-y += vma-writer.o
55
56 block-obj-m = block/
57
67af0fa4 58diff --git a/block/backup.c b/block/backup.c
0775f12b 59index 3aaa75892a..2410cca257 100644
67af0fa4
WB
60--- a/block/backup.c
61+++ b/block/backup.c
53e83913
WB
62@@ -34,6 +34,7 @@ typedef struct BackupBlockJob {
63 /* bitmap for sync=incremental */
67af0fa4
WB
64 BdrvDirtyBitmap *sync_bitmap;
65 MirrorSyncMode sync_mode;
67af0fa4
WB
66+ BackupDumpFunc *dump_cb;
67 BlockdevOnError on_source_error;
68 BlockdevOnError on_target_error;
69 CoRwlock flush_rwlock;
53e83913
WB
70@@ -126,12 +127,20 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
71 }
67af0fa4 72
53e83913
WB
73 if (qemu_iovec_is_zero(&qiov)) {
74- ret = blk_co_pwrite_zeroes(job->target, start,
75- qiov.size, write_flags | BDRV_REQ_MAY_UNMAP);
76+ if (job->dump_cb) {
77+ ret = job->dump_cb(job->common.job.opaque, job->target, start, qiov.size, NULL);
78+ } else {
79+ ret = blk_co_pwrite_zeroes(job->target, start,
80+ qiov.size, write_flags | BDRV_REQ_MAY_UNMAP);
81+ }
82 } else {
83- ret = blk_co_pwritev(job->target, start,
84- qiov.size, &qiov, write_flags |
85- (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0));
86+ if (job->dump_cb) {
87+ ret = job->dump_cb(job->common.job.opaque, job->target, start, qiov.size, *bounce_buffer);
88+ } else {
89+ ret = blk_co_pwritev(job->target, start,
90+ qiov.size, &qiov, write_flags |
91+ (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0));
92+ }
93 }
94 if (ret < 0) {
95 trace_backup_do_cow_write_fail(job, start, ret);
96@@ -209,7 +218,11 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
97 trace_backup_do_cow_process(job, start);
98
99 if (job->use_copy_range) {
100- ret = backup_cow_with_offload(job, start, end, is_write_notifier);
67af0fa4 101+ if (job->dump_cb) {
53e83913
WB
102+ ret = - 1;
103+ } else {
104+ ret = backup_cow_with_offload(job, start, end, is_write_notifier);
67af0fa4 105+ }
53e83913
WB
106 if (ret < 0) {
107 job->use_copy_range = false;
108 }
109@@ -293,7 +306,9 @@ static void backup_abort(Job *job)
110 static void backup_clean(Job *job)
67af0fa4 111 {
53e83913 112 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
6838f038 113- assert(s->target);
507c2194 114+ if (!s->target) {
67af0fa4 115+ return;
507c2194 116+ }
67af0fa4
WB
117 blk_unref(s->target);
118 s->target = NULL;
6838f038 119 }
53e83913 120@@ -302,7 +317,9 @@ static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
02709230
FG
121 {
122 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
123
124- blk_set_aio_context(s->target, aio_context);
507c2194 125+ if (s->target) {
02709230 126+ blk_set_aio_context(s->target, aio_context);
507c2194 127+ }
02709230
FG
128 }
129
130 void backup_do_checkpoint(BlockJob *job, Error **errp)
53e83913 131@@ -374,9 +391,11 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
67af0fa4
WB
132 if (read) {
133 return block_job_error_action(&job->common, job->on_source_error,
134 true, error);
135- } else {
136+ } else if (job->target) {
137 return block_job_error_action(&job->common, job->on_target_error,
138 false, error);
139+ } else {
140+ return BLOCK_ERROR_ACTION_REPORT;
141 }
142 }
143
53e83913 144@@ -612,6 +631,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
145 BlockdevOnError on_source_error,
146 BlockdevOnError on_target_error,
147 int creation_flags,
148+ BackupDumpFunc *dump_cb,
149 BlockCompletionFunc *cb, void *opaque,
150 int pause_count,
53e83913
WB
151 JobTxn *txn, Error **errp)
152@@ -622,7 +642,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
153 int ret;
154
155 assert(bs);
156- assert(target);
157+ assert(target || dump_cb);
158
159 if (bs == target) {
160 error_setg(errp, "Source and target cannot be the same");
53e83913 161@@ -635,13 +655,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
162 return NULL;
163 }
164
165- if (!bdrv_is_inserted(target)) {
166+ if (target && !bdrv_is_inserted(target)) {
167 error_setg(errp, "Device is not inserted: %s",
168 bdrv_get_device_name(target));
169 return NULL;
170 }
171
172- if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
173+ if (target && compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
174 error_setg(errp, "Compression is not supported for this drive %s",
175 bdrv_get_device_name(target));
176 return NULL;
53e83913 177@@ -651,7 +671,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
178 return NULL;
179 }
180
181- if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
182+ if (target && bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
183 return NULL;
184 }
185
53e83913 186@@ -691,15 +711,18 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
187 goto error;
188 }
189
190- /* The target must match the source in size, so no resize here either */
191- job->target = blk_new(BLK_PERM_WRITE,
192- BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
193- BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
194- ret = blk_insert_bs(job->target, target, errp);
195- if (ret < 0) {
196- goto error;
197+ if (target) {
198+ /* The target must match the source in size, so no resize here either */
199+ job->target = blk_new(BLK_PERM_WRITE,
200+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
201+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
202+ ret = blk_insert_bs(job->target, target, errp);
203+ if (ret < 0) {
204+ goto error;
205+ }
206 }
207
208+ job->dump_cb = dump_cb;
209 job->on_source_error = on_source_error;
210 job->on_target_error = on_target_error;
211 job->sync_mode = sync_mode;
53e83913
WB
212@@ -710,6 +733,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
213 /* Detect image-fleecing (and similar) schemes */
214 job->serialize_target_writes = bdrv_chain_contains(target, bs);
215
216+ if (!target) {
217+ goto use_default_cluster_size;
218+ }
219 /* If there is no backing file on the target, we cannot rely on COW if our
220 * backup cluster size is smaller than the target cluster size. Even for
221 * targets with a backing file, try to avoid COW if possible. */
222@@ -734,18 +760,35 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
223 /* Not fatal; just trudge on ahead. */
224 job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
67af0fa4
WB
225 } else {
226- job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
53e83913
WB
227- }
228- job->use_copy_range = true;
229- job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
230- blk_get_max_transfer(job->target));
231- job->copy_range_size = MAX(job->cluster_size,
232- QEMU_ALIGN_UP(job->copy_range_size,
233- job->cluster_size));
234-
235- /* Required permissions are already taken with target's blk_new() */
236- block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
237- &error_abort);
238+ use_default_cluster_size:
67af0fa4
WB
239+ ret = bdrv_get_info(bs, &bdi);
240+ if (ret < 0) {
241+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
242+ } else {
243+ /* round down to nearest BACKUP_CLUSTER_SIZE_DEFAULT */
244+ job->cluster_size = (bdi.cluster_size / BACKUP_CLUSTER_SIZE_DEFAULT) * BACKUP_CLUSTER_SIZE_DEFAULT;
245+ if (job->cluster_size == 0) {
246+ /* but we can't go below it */
247+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
248+ }
249+ }
53e83913
WB
250+ }
251+ if (target) {
252+ job->use_copy_range = true;
253+ job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
254+ blk_get_max_transfer(job->target));
255+ job->copy_range_size = MAX(job->cluster_size,
256+ QEMU_ALIGN_UP(job->copy_range_size,
257+ job->cluster_size));
258+ } else {
259+ job->use_copy_range = false;
260+ }
261+
67af0fa4
WB
262+ if (target) {
263+ /* Required permissions are already taken with target's blk_new() */
264+ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
265+ &error_abort);
67af0fa4 266+ }
53e83913 267 job->len = len;
0775f12b 268 job->common.job.pause_count += pause_count;
53e83913 269
67af0fa4 270diff --git a/block/replication.c b/block/replication.c
53e83913 271index 84e07cc4d4..04fa448a5b 100644
67af0fa4
WB
272--- a/block/replication.c
273+++ b/block/replication.c
53e83913 274@@ -571,6 +571,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
67af0fa4
WB
275 0, MIRROR_SYNC_MODE_NONE, NULL, false,
276 BLOCKDEV_ON_ERROR_REPORT,
53e83913 277 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
67af0fa4
WB
278+ NULL,
279 backup_job_completed, bs, 0, NULL, &local_err);
280 if (local_err) {
281 error_propagate(errp, local_err);
282diff --git a/blockdev.c b/blockdev.c
0775f12b 283index 4f18d3c3d7..86508066cc 100644
67af0fa4
WB
284--- a/blockdev.c
285+++ b/blockdev.c
286@@ -31,7 +31,6 @@
287 */
288
289 #include "qemu/osdep.h"
290-#include "qemu/uuid.h"
291 #include "sysemu/block-backend.h"
292 #include "sysemu/blockdev.h"
293 #include "hw/block/block.h"
53e83913 294@@ -63,6 +62,7 @@
67af0fa4
WB
295 #include "qemu/cutils.h"
296 #include "qemu/help_option.h"
297 #include "qemu/throttle-options.h"
298+#include "vma.h"
299
300 static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
301 QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
53e83913 302@@ -3228,15 +3228,14 @@ out:
67af0fa4
WB
303 static struct PVEBackupState {
304 Error *error;
305 bool cancel;
306- QemuUUID uuid;
307+ uuid_t uuid;
308 char uuid_str[37];
309 int64_t speed;
310 time_t start_time;
311 time_t end_time;
312 char *backup_file;
313- Object *vmaobj;
314+ VmaWriter *vmaw;
315 GList *di_list;
316- size_t next_job;
317 size_t total;
318 size_t transferred;
319 size_t zero_bytes;
53e83913 320@@ -3255,6 +3254,71 @@ typedef struct PVEBackupDevInfo {
67af0fa4
WB
321
322 static void pvebackup_run_next_job(void);
323
324+static int pvebackup_dump_cb(void *opaque, BlockBackend *target,
6838f038
WB
325+ uint64_t start, uint64_t bytes,
326+ const void *pbuf)
67af0fa4 327+{
6838f038
WB
328+ const uint64_t size = bytes;
329+ const unsigned char *buf = pbuf;
67af0fa4
WB
330+ PVEBackupDevInfo *di = opaque;
331+
67af0fa4
WB
332+ if (backup_state.cancel) {
333+ return size; // return success
334+ }
335+
6838f038
WB
336+ uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
337+ if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
67af0fa4
WB
338+ if (!backup_state.error) {
339+ error_setg(&backup_state.error,
340+ "got unaligned write inside backup dump "
6838f038 341+ "callback (sector %ld)", start);
67af0fa4
WB
342+ }
343+ return -1; // not aligned to cluster size
344+ }
345+
67af0fa4
WB
346+ int ret = -1;
347+
348+ if (backup_state.vmaw) {
349+ size_t zero_bytes = 0;
6838f038 350+ uint64_t remaining = size;
2ab9b48e 351+ while (remaining > 0) {
67af0fa4
WB
352+ ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num,
353+ buf, &zero_bytes);
67af0fa4
WB
354+ ++cluster_num;
355+ if (buf) {
356+ buf += VMA_CLUSTER_SIZE;
357+ }
358+ if (ret < 0) {
359+ if (!backup_state.error) {
360+ vma_writer_error_propagate(backup_state.vmaw, &backup_state.error);
361+ }
362+ if (di->bs && di->bs->job) {
53e83913 363+ job_cancel(&di->bs->job->job, true);
67af0fa4 364+ }
2ab9b48e 365+ break;
67af0fa4
WB
366+ } else {
367+ backup_state.zero_bytes += zero_bytes;
2ab9b48e
WB
368+ if (remaining >= VMA_CLUSTER_SIZE) {
369+ backup_state.transferred += VMA_CLUSTER_SIZE;
6838f038 370+ remaining -= VMA_CLUSTER_SIZE;
2ab9b48e
WB
371+ } else {
372+ backup_state.transferred += remaining;
6838f038 373+ remaining = 0;
2ab9b48e 374+ }
67af0fa4
WB
375+ }
376+ }
377+ } else {
378+ if (!buf) {
379+ backup_state.zero_bytes += size;
380+ }
381+ backup_state.transferred += size;
382+ }
383+
384+ // Note: always return success, because we want that writes succeed anyways.
385+
386+ return size;
387+}
388+
389 static void pvebackup_cleanup(void)
390 {
6838f038 391 qemu_mutex_lock(&backup_state.backup_mutex);
53e83913 392@@ -3266,9 +3330,11 @@ static void pvebackup_cleanup(void)
6838f038 393
67af0fa4
WB
394 backup_state.end_time = time(NULL);
395
396- if (backup_state.vmaobj) {
397- object_unparent(backup_state.vmaobj);
398- backup_state.vmaobj = NULL;
399+ if (backup_state.vmaw) {
400+ Error *local_err = NULL;
401+ vma_writer_close(backup_state.vmaw, &local_err);
402+ error_propagate(&backup_state.error, local_err);
403+ backup_state.vmaw = NULL;
404 }
405
6838f038 406 g_list_free(backup_state.di_list);
53e83913 407@@ -3276,6 +3342,13 @@ static void pvebackup_cleanup(void)
6838f038 408 qemu_mutex_unlock(&backup_state.backup_mutex);
67af0fa4
WB
409 }
410
411+static void coroutine_fn backup_close_vma_stream(void *opaque)
412+{
413+ PVEBackupDevInfo *di = opaque;
414+
415+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
416+}
417+
418 static void pvebackup_complete_cb(void *opaque, int ret)
419 {
6838f038 420 // This always runs in the main loop
53e83913 421@@ -3292,9 +3365,9 @@ static void pvebackup_complete_cb(void *opaque, int ret)
67af0fa4
WB
422 di->bs = NULL;
423 di->target = NULL;
424
425- if (backup_state.vmaobj) {
426- object_unparent(backup_state.vmaobj);
427- backup_state.vmaobj = NULL;
428+ if (backup_state.vmaw) {
429+ Coroutine *co = qemu_coroutine_create(backup_close_vma_stream, di);
430+ qemu_coroutine_enter(co);
431 }
432
6838f038 433 // remove self from job queue
53e83913 434@@ -3322,14 +3395,9 @@ static void pvebackup_cancel(void *opaque)
67af0fa4
WB
435 error_setg(&backup_state.error, "backup cancelled");
436 }
437
438- if (backup_state.vmaobj) {
439- Error *err;
440+ if (backup_state.vmaw) {
441 /* make sure vma writer does not block anymore */
442- if (!object_set_props(backup_state.vmaobj, &err, "blocked", "yes", NULL)) {
443- if (err) {
444- error_report_err(err);
445- }
446- }
447+ vma_writer_set_error(backup_state.vmaw, "backup cancelled");
448 }
449
450 GList *l = backup_state.di_list;
53e83913 451@@ -3360,18 +3428,14 @@ void qmp_backup_cancel(Error **errp)
67af0fa4
WB
452 Coroutine *co = qemu_coroutine_create(pvebackup_cancel, NULL);
453 qemu_coroutine_enter(co);
454
455- while (backup_state.vmaobj) {
456- /* FIXME: Find something better for this */
457+ while (backup_state.vmaw) {
458+ /* vma writer use main aio context */
459 aio_poll(qemu_get_aio_context(), true);
460 }
461 }
462
463-void vma_object_add_config_file(Object *obj, const char *name,
464- const char *contents, size_t len,
465- Error **errp);
466 static int config_to_vma(const char *file, BackupFormat format,
467- Object *vmaobj,
468- const char *backup_dir,
6838f038
WB
469+ const char *backup_dir, VmaWriter *vmaw,
470 Error **errp)
67af0fa4 471 {
6838f038 472 char *cdata = NULL;
53e83913 473@@ -3385,7 +3449,12 @@ static int config_to_vma(const char *file, BackupFormat format,
6838f038 474 char *basename = g_path_get_basename(file);
67af0fa4 475
6838f038
WB
476 if (format == BACKUP_FORMAT_VMA) {
477- vma_object_add_config_file(vmaobj, basename, cdata, clen, errp);
478+ if (vma_writer_add_config(vmaw, basename, cdata, clen) != 0) {
479+ error_setg(errp, "unable to add %s config data to vma archive", file);
480+ g_free(cdata);
481+ g_free(basename);
482+ return 1;
483+ }
484 } else if (format == BACKUP_FORMAT_DIR) {
485 char config_path[PATH_MAX];
486 snprintf(config_path, PATH_MAX, "%s/%s", backup_dir, basename);
53e83913
WB
487@@ -3402,28 +3471,30 @@ static int config_to_vma(const char *file, BackupFormat format,
488 return 0;
67af0fa4
WB
489 }
490
53e83913 491+bool job_should_pause(Job *job);
67af0fa4
WB
492 static void pvebackup_run_next_job(void)
493 {
6838f038
WB
494 qemu_mutex_lock(&backup_state.backup_mutex);
495
67af0fa4
WB
496- GList *next = g_list_nth(backup_state.di_list, backup_state.next_job);
497- while (next) {
498- PVEBackupDevInfo *di = (PVEBackupDevInfo *)next->data;
499- backup_state.next_job++;
500+ GList *l = backup_state.di_list;
501+ while (l) {
502+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
503+ l = g_list_next(l);
504 if (!di->completed && di->bs && di->bs->job) {
505 BlockJob *job = di->bs->job;
6838f038
WB
506 AioContext *aio_context = blk_get_aio_context(job->blk);
507 aio_context_acquire(aio_context);
508 qemu_mutex_unlock(&backup_state.backup_mutex);
509- if (backup_state.error || backup_state.cancel) {
53e83913 510- job_cancel_sync(job);
67af0fa4 511- } else {
53e83913
WB
512- job_resume(job);
513+ if (job_should_pause(&job->job)) {
6838f038 514+ if (backup_state.error || backup_state.cancel) {
53e83913 515+ job_cancel_sync(&job->job);
67af0fa4 516+ } else {
53e83913 517+ job_resume(&job->job);
67af0fa4
WB
518+ }
519 }
6838f038 520 aio_context_release(aio_context);
67af0fa4
WB
521 return;
522 }
523- next = g_list_next(next);
524 }
6838f038 525 qemu_mutex_unlock(&backup_state.backup_mutex);
67af0fa4 526
53e83913 527@@ -3434,7 +3505,7 @@ static void pvebackup_run_next_job(void)
67af0fa4
WB
528 UuidInfo *qmp_backup(const char *backup_file, bool has_format,
529 BackupFormat format,
530 bool has_config_file, const char *config_file,
531- bool has_firewall_file, const char *firewall_file,
532+ bool has_firewall_file, const char *firewall_file,
533 bool has_devlist, const char *devlist,
534 bool has_speed, int64_t speed, Error **errp)
535 {
53e83913 536@@ -3442,7 +3513,8 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
537 BlockDriverState *bs = NULL;
538 const char *backup_dir = NULL;
539 Error *local_err = NULL;
540- QemuUUID uuid;
541+ uuid_t uuid;
542+ VmaWriter *vmaw = NULL;
543 gchar **devs = NULL;
544 GList *di_list = NULL;
545 GList *l;
53e83913 546@@ -3454,7 +3526,7 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
6838f038
WB
547 backup_state.backup_mutex_initialized = true;
548 }
67af0fa4
WB
549
550- if (backup_state.di_list || backup_state.vmaobj) {
551+ if (backup_state.di_list) {
552 error_set(errp, ERROR_CLASS_GENERIC_ERROR,
553 "previous backup not finished");
554 return NULL;
53e83913 555@@ -3529,40 +3601,28 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
556 total += size;
557 }
558
559- qemu_uuid_generate(&uuid);
560+ uuid_generate(uuid);
561
562 if (format == BACKUP_FORMAT_VMA) {
563- char uuidstr[UUID_FMT_LEN+1];
564- qemu_uuid_unparse(&uuid, uuidstr);
565- uuidstr[UUID_FMT_LEN] = 0;
566- backup_state.vmaobj =
567- object_new_with_props("vma", object_get_objects_root(),
568- "vma-backup-obj", &local_err,
569- "filename", backup_file,
570- "uuid", uuidstr,
571- NULL);
572- if (!backup_state.vmaobj) {
573+ vmaw = vma_writer_create(backup_file, uuid, &local_err);
574+ if (!vmaw) {
575 if (local_err) {
576 error_propagate(errp, local_err);
577 }
578 goto err;
579 }
580
581+ /* register all devices for vma writer */
582 l = di_list;
583 while (l) {
584- QDict *options = qdict_new();
585-
586 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
587 l = g_list_next(l);
588
589 const char *devname = bdrv_get_device_name(di->bs);
590- snprintf(di->targetfile, PATH_MAX, "vma-backup-obj/%s.raw", devname);
591-
592- qdict_put(options, "driver", qstring_from_str("vma-drive"));
593- qdict_put(options, "size", qint_from_int(di->size));
594- di->target = bdrv_open(di->targetfile, NULL, options, BDRV_O_RDWR, &local_err);
595- if (!di->target) {
596- error_propagate(errp, local_err);
597+ di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
598+ if (di->dev_id <= 0) {
599+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
600+ "register_stream failed");
601 goto err;
602 }
603 }
53e83913 604@@ -3603,14 +3663,14 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
605
606 /* add configuration file to archive */
607 if (has_config_file) {
6838f038
WB
608- if(config_to_vma(config_file, format, backup_state.vmaobj, backup_dir, errp) != 0) {
609+ if (config_to_vma(config_file, format, backup_dir, vmaw, errp) != 0) {
610 goto err;
611 }
67af0fa4
WB
612 }
613
614 /* add firewall file to archive */
615 if (has_firewall_file) {
6838f038
WB
616- if(config_to_vma(firewall_file, format, backup_state.vmaobj, backup_dir, errp) != 0) {
617+ if (config_to_vma(firewall_file, format, backup_dir, vmaw, errp) != 0) {
618 goto err;
619 }
67af0fa4 620 }
53e83913 621@@ -3633,12 +3693,13 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
622 }
623 backup_state.backup_file = g_strdup(backup_file);
624
625- memcpy(&backup_state.uuid, &uuid, sizeof(uuid));
626- qemu_uuid_unparse(&uuid, backup_state.uuid_str);
627+ backup_state.vmaw = vmaw;
628+
629+ uuid_copy(backup_state.uuid, uuid);
630+ uuid_unparse_lower(uuid, backup_state.uuid_str);
631
6838f038 632 qemu_mutex_lock(&backup_state.backup_mutex);
67af0fa4
WB
633 backup_state.di_list = di_list;
634- backup_state.next_job = 0;
635
636 backup_state.total = total;
637 backup_state.transferred = 0;
53e83913 638@@ -3649,21 +3710,21 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
639 while (l) {
640 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
641 l = g_list_next(l);
642-
643 job = backup_job_create(NULL, di->bs, di->target, speed, MIRROR_SYNC_MODE_FULL, NULL,
644 false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
53e83913 645 JOB_DEFAULT,
67af0fa4
WB
646- pvebackup_complete_cb, di, 2, NULL, &local_err);
647- if (di->target) {
648- bdrv_unref(di->target);
649- di->target = NULL;
650- }
651+ pvebackup_dump_cb, pvebackup_complete_cb, di,
0775f12b 652+ 1, NULL, &local_err);
67af0fa4
WB
653 if (!job || local_err != NULL) {
654 error_setg(&backup_state.error, "backup_job_create failed");
655 pvebackup_cancel(NULL);
53e83913
WB
656 } else {
657 job_start(&job->job);
67af0fa4 658 }
53e83913
WB
659+ if (di->target) {
660+ bdrv_unref(di->target);
661+ di->target = NULL;
662+ }
67af0fa4
WB
663 }
664
6838f038 665 qemu_mutex_unlock(&backup_state.backup_mutex);
53e83913 666@@ -3699,9 +3760,10 @@ err:
67af0fa4
WB
667 g_strfreev(devs);
668 }
669
670- if (backup_state.vmaobj) {
671- object_unparent(backup_state.vmaobj);
672- backup_state.vmaobj = NULL;
673+ if (vmaw) {
674+ Error *err = NULL;
675+ vma_writer_close(vmaw, &err);
676+ unlink(backup_file);
677 }
678
679 if (backup_dir) {
53e83913 680@@ -4104,7 +4166,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
67af0fa4
WB
681 job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
682 backup->sync, bmap, backup->compress,
683 backup->on_source_error, backup->on_target_error,
53e83913
WB
684- job_flags, NULL, NULL, 0, txn, &local_err);
685+ job_flags, NULL, NULL, NULL, 0, txn, &local_err);
67af0fa4
WB
686 bdrv_unref(target_bs);
687 if (local_err != NULL) {
688 error_propagate(errp, local_err);
53e83913 689@@ -4196,7 +4258,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
67af0fa4
WB
690 job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
691 backup->sync, NULL, backup->compress,
692 backup->on_source_error, backup->on_target_error,
53e83913
WB
693- job_flags, NULL, NULL, 0, txn, &local_err);
694+ job_flags, NULL, NULL, NULL, 0, txn, &local_err);
67af0fa4
WB
695 if (local_err != NULL) {
696 error_propagate(errp, local_err);
697 }
67af0fa4 698diff --git a/include/block/block_int.h b/include/block/block_int.h
53e83913 699index 0b2516c3cf..ecd6243440 100644
67af0fa4
WB
700--- a/include/block/block_int.h
701+++ b/include/block/block_int.h
53e83913 702@@ -59,6 +59,9 @@
67af0fa4
WB
703
704 #define BLOCK_PROBE_BUF_SIZE 512
705
706+typedef int BackupDumpFunc(void *opaque, BlockBackend *be,
6838f038 707+ uint64_t offset, uint64_t bytes, const void *buf);
67af0fa4
WB
708+
709 enum BdrvTrackedRequestType {
710 BDRV_TRACKED_READ,
711 BDRV_TRACKED_WRITE,
53e83913 712@@ -1082,6 +1085,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
713 BlockdevOnError on_source_error,
714 BlockdevOnError on_target_error,
715 int creation_flags,
716+ BackupDumpFunc *dump_cb,
717 BlockCompletionFunc *cb, void *opaque,
718 int pause_count,
53e83913
WB
719 JobTxn *txn, Error **errp);
720diff --git a/job.c b/job.c
0775f12b 721index 950924ebad..b4eaf57e64 100644
53e83913
WB
722--- a/job.c
723+++ b/job.c
0775f12b 724@@ -248,7 +248,8 @@ static bool job_started(Job *job)
53e83913
WB
725 return job->co;
726 }
727
728-static bool job_should_pause(Job *job)
729+bool job_should_pause(Job *job);
730+bool job_should_pause(Job *job)
731 {
732 return job->pause_count > 0;
733 }
95259824
WB
734diff --git a/vma-reader.c b/vma-reader.c
735new file mode 100644
9b05d1d4 736index 0000000000..2b1d1cdab3
95259824
WB
737--- /dev/null
738+++ b/vma-reader.c
67af0fa4 739@@ -0,0 +1,857 @@
95259824
WB
740+/*
741+ * VMA: Virtual Machine Archive
742+ *
743+ * Copyright (C) 2012 Proxmox Server Solutions
744+ *
745+ * Authors:
746+ * Dietmar Maurer (dietmar@proxmox.com)
747+ *
748+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
749+ * See the COPYING file in the top-level directory.
750+ *
751+ */
752+
753+#include "qemu/osdep.h"
754+#include <glib.h>
755+#include <uuid/uuid.h>
756+
757+#include "qemu-common.h"
758+#include "qemu/timer.h"
759+#include "qemu/ratelimit.h"
760+#include "vma.h"
761+#include "block/block.h"
762+#include "sysemu/block-backend.h"
763+
764+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
765+
766+typedef struct VmaRestoreState {
67af0fa4 767+ BlockBackend *target;
95259824
WB
768+ bool write_zeroes;
769+ unsigned long *bitmap;
770+ int bitmap_size;
771+} VmaRestoreState;
772+
773+struct VmaReader {
774+ int fd;
775+ GChecksum *md5csum;
776+ GHashTable *blob_hash;
777+ unsigned char *head_data;
778+ VmaDeviceInfo devinfo[256];
779+ VmaRestoreState rstate[256];
780+ GList *cdata_list;
781+ guint8 vmstate_stream;
782+ uint32_t vmstate_clusters;
783+ /* to show restore percentage if run with -v */
784+ time_t start_time;
785+ int64_t cluster_count;
786+ int64_t clusters_read;
67af0fa4
WB
787+ int64_t zero_cluster_data;
788+ int64_t partial_zero_cluster_data;
95259824
WB
789+ int clusters_read_per;
790+};
791+
792+static guint
793+g_int32_hash(gconstpointer v)
794+{
795+ return *(const uint32_t *)v;
796+}
797+
798+static gboolean
799+g_int32_equal(gconstpointer v1, gconstpointer v2)
800+{
801+ return *((const uint32_t *)v1) == *((const uint32_t *)v2);
802+}
803+
804+static int vma_reader_get_bitmap(VmaRestoreState *rstate, int64_t cluster_num)
805+{
806+ assert(rstate);
807+ assert(rstate->bitmap);
808+
809+ unsigned long val, idx, bit;
810+
811+ idx = cluster_num / BITS_PER_LONG;
812+
813+ assert(rstate->bitmap_size > idx);
814+
815+ bit = cluster_num % BITS_PER_LONG;
816+ val = rstate->bitmap[idx];
817+
818+ return !!(val & (1UL << bit));
819+}
820+
821+static void vma_reader_set_bitmap(VmaRestoreState *rstate, int64_t cluster_num,
822+ int dirty)
823+{
824+ assert(rstate);
825+ assert(rstate->bitmap);
826+
827+ unsigned long val, idx, bit;
828+
829+ idx = cluster_num / BITS_PER_LONG;
830+
831+ assert(rstate->bitmap_size > idx);
832+
833+ bit = cluster_num % BITS_PER_LONG;
834+ val = rstate->bitmap[idx];
835+ if (dirty) {
836+ if (!(val & (1UL << bit))) {
837+ val |= 1UL << bit;
838+ }
839+ } else {
840+ if (val & (1UL << bit)) {
841+ val &= ~(1UL << bit);
842+ }
843+ }
844+ rstate->bitmap[idx] = val;
845+}
846+
847+typedef struct VmaBlob {
848+ uint32_t start;
849+ uint32_t len;
850+ void *data;
851+} VmaBlob;
852+
853+static const VmaBlob *get_header_blob(VmaReader *vmar, uint32_t pos)
854+{
855+ assert(vmar);
856+ assert(vmar->blob_hash);
857+
858+ return g_hash_table_lookup(vmar->blob_hash, &pos);
859+}
860+
861+static const char *get_header_str(VmaReader *vmar, uint32_t pos)
862+{
863+ const VmaBlob *blob = get_header_blob(vmar, pos);
864+ if (!blob) {
865+ return NULL;
866+ }
867+ const char *res = (char *)blob->data;
868+ if (res[blob->len-1] != '\0') {
869+ return NULL;
870+ }
871+ return res;
872+}
873+
874+static ssize_t
875+safe_read(int fd, unsigned char *buf, size_t count)
876+{
877+ ssize_t n;
878+
879+ do {
880+ n = read(fd, buf, count);
881+ } while (n < 0 && errno == EINTR);
882+
883+ return n;
884+}
885+
886+static ssize_t
887+full_read(int fd, unsigned char *buf, size_t len)
888+{
889+ ssize_t n;
890+ size_t total;
891+
892+ total = 0;
893+
894+ while (len > 0) {
895+ n = safe_read(fd, buf, len);
896+
897+ if (n == 0) {
898+ return total;
899+ }
900+
901+ if (n <= 0) {
902+ break;
903+ }
904+
905+ buf += n;
906+ total += n;
907+ len -= n;
908+ }
909+
910+ if (len) {
911+ return -1;
912+ }
913+
914+ return total;
915+}
916+
917+void vma_reader_destroy(VmaReader *vmar)
918+{
919+ assert(vmar);
920+
921+ if (vmar->fd >= 0) {
922+ close(vmar->fd);
923+ }
924+
925+ if (vmar->cdata_list) {
926+ g_list_free(vmar->cdata_list);
927+ }
928+
929+ int i;
930+ for (i = 1; i < 256; i++) {
931+ if (vmar->rstate[i].bitmap) {
932+ g_free(vmar->rstate[i].bitmap);
933+ }
934+ }
935+
936+ if (vmar->md5csum) {
937+ g_checksum_free(vmar->md5csum);
938+ }
939+
940+ if (vmar->blob_hash) {
941+ g_hash_table_destroy(vmar->blob_hash);
942+ }
943+
944+ if (vmar->head_data) {
945+ g_free(vmar->head_data);
946+ }
947+
948+ g_free(vmar);
949+
950+};
951+
952+static int vma_reader_read_head(VmaReader *vmar, Error **errp)
953+{
954+ assert(vmar);
955+ assert(errp);
956+ assert(*errp == NULL);
957+
958+ unsigned char md5sum[16];
959+ int i;
960+ int ret = 0;
961+
962+ vmar->head_data = g_malloc(sizeof(VmaHeader));
963+
964+ if (full_read(vmar->fd, vmar->head_data, sizeof(VmaHeader)) !=
965+ sizeof(VmaHeader)) {
966+ error_setg(errp, "can't read vma header - %s",
967+ errno ? g_strerror(errno) : "got EOF");
968+ return -1;
969+ }
970+
971+ VmaHeader *h = (VmaHeader *)vmar->head_data;
972+
973+ if (h->magic != VMA_MAGIC) {
974+ error_setg(errp, "not a vma file - wrong magic number");
975+ return -1;
976+ }
977+
978+ uint32_t header_size = GUINT32_FROM_BE(h->header_size);
979+ int need = header_size - sizeof(VmaHeader);
980+ if (need <= 0) {
981+ error_setg(errp, "wrong vma header size %d", header_size);
982+ return -1;
983+ }
984+
985+ vmar->head_data = g_realloc(vmar->head_data, header_size);
986+ h = (VmaHeader *)vmar->head_data;
987+
988+ if (full_read(vmar->fd, vmar->head_data + sizeof(VmaHeader), need) !=
989+ need) {
990+ error_setg(errp, "can't read vma header data - %s",
991+ errno ? g_strerror(errno) : "got EOF");
992+ return -1;
993+ }
994+
995+ memcpy(md5sum, h->md5sum, 16);
996+ memset(h->md5sum, 0, 16);
997+
998+ g_checksum_reset(vmar->md5csum);
999+ g_checksum_update(vmar->md5csum, vmar->head_data, header_size);
1000+ gsize csize = 16;
1001+ g_checksum_get_digest(vmar->md5csum, (guint8 *)(h->md5sum), &csize);
1002+
1003+ if (memcmp(md5sum, h->md5sum, 16) != 0) {
1004+ error_setg(errp, "wrong vma header chechsum");
1005+ return -1;
1006+ }
1007+
1008+ /* we can modify header data after checksum verify */
1009+ h->header_size = header_size;
1010+
1011+ h->version = GUINT32_FROM_BE(h->version);
1012+ if (h->version != 1) {
1013+ error_setg(errp, "wrong vma version %d", h->version);
1014+ return -1;
1015+ }
1016+
1017+ h->ctime = GUINT64_FROM_BE(h->ctime);
1018+ h->blob_buffer_offset = GUINT32_FROM_BE(h->blob_buffer_offset);
1019+ h->blob_buffer_size = GUINT32_FROM_BE(h->blob_buffer_size);
1020+
1021+ uint32_t bstart = h->blob_buffer_offset + 1;
1022+ uint32_t bend = h->blob_buffer_offset + h->blob_buffer_size;
1023+
1024+ if (bstart <= sizeof(VmaHeader)) {
1025+ error_setg(errp, "wrong vma blob buffer offset %d",
1026+ h->blob_buffer_offset);
1027+ return -1;
1028+ }
1029+
1030+ if (bend > header_size) {
1031+ error_setg(errp, "wrong vma blob buffer size %d/%d",
1032+ h->blob_buffer_offset, h->blob_buffer_size);
1033+ return -1;
1034+ }
1035+
1036+ while ((bstart + 2) <= bend) {
1037+ uint32_t size = vmar->head_data[bstart] +
1038+ (vmar->head_data[bstart+1] << 8);
1039+ if ((bstart + size + 2) <= bend) {
1040+ VmaBlob *blob = g_new0(VmaBlob, 1);
1041+ blob->start = bstart - h->blob_buffer_offset;
1042+ blob->len = size;
1043+ blob->data = vmar->head_data + bstart + 2;
1044+ g_hash_table_insert(vmar->blob_hash, &blob->start, blob);
1045+ }
1046+ bstart += size + 2;
1047+ }
1048+
1049+
1050+ int count = 0;
1051+ for (i = 1; i < 256; i++) {
1052+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
1053+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
1054+ uint64_t size = GUINT64_FROM_BE(dih->size);
1055+ const char *devname = get_header_str(vmar, devname_ptr);
1056+
1057+ if (size && devname) {
1058+ count++;
1059+ vmar->devinfo[i].size = size;
1060+ vmar->devinfo[i].devname = devname;
1061+
1062+ if (strcmp(devname, "vmstate") == 0) {
1063+ vmar->vmstate_stream = i;
1064+ }
1065+ }
1066+ }
1067+
95259824
WB
1068+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1069+ uint32_t name_ptr = GUINT32_FROM_BE(h->config_names[i]);
1070+ uint32_t data_ptr = GUINT32_FROM_BE(h->config_data[i]);
1071+
1072+ if (!(name_ptr && data_ptr)) {
1073+ continue;
1074+ }
1075+ const char *name = get_header_str(vmar, name_ptr);
1076+ const VmaBlob *blob = get_header_blob(vmar, data_ptr);
1077+
1078+ if (!(name && blob)) {
1079+ error_setg(errp, "vma contains invalid data pointers");
1080+ return -1;
1081+ }
1082+
1083+ VmaConfigData *cdata = g_new0(VmaConfigData, 1);
1084+ cdata->name = name;
1085+ cdata->data = blob->data;
1086+ cdata->len = blob->len;
1087+
1088+ vmar->cdata_list = g_list_append(vmar->cdata_list, cdata);
1089+ }
1090+
1091+ return ret;
1092+};
1093+
1094+VmaReader *vma_reader_create(const char *filename, Error **errp)
1095+{
1096+ assert(filename);
1097+ assert(errp);
1098+
1099+ VmaReader *vmar = g_new0(VmaReader, 1);
1100+
1101+ if (strcmp(filename, "-") == 0) {
1102+ vmar->fd = dup(0);
1103+ } else {
1104+ vmar->fd = open(filename, O_RDONLY);
1105+ }
1106+
1107+ if (vmar->fd < 0) {
1108+ error_setg(errp, "can't open file %s - %s\n", filename,
1109+ g_strerror(errno));
1110+ goto err;
1111+ }
1112+
1113+ vmar->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1114+ if (!vmar->md5csum) {
1115+ error_setg(errp, "can't allocate cmsum\n");
1116+ goto err;
1117+ }
1118+
1119+ vmar->blob_hash = g_hash_table_new_full(g_int32_hash, g_int32_equal,
1120+ NULL, g_free);
1121+
1122+ if (vma_reader_read_head(vmar, errp) < 0) {
1123+ goto err;
1124+ }
1125+
1126+ return vmar;
1127+
1128+err:
1129+ if (vmar) {
1130+ vma_reader_destroy(vmar);
1131+ }
1132+
1133+ return NULL;
1134+}
1135+
1136+VmaHeader *vma_reader_get_header(VmaReader *vmar)
1137+{
1138+ assert(vmar);
1139+ assert(vmar->head_data);
1140+
1141+ return (VmaHeader *)(vmar->head_data);
1142+}
1143+
1144+GList *vma_reader_get_config_data(VmaReader *vmar)
1145+{
1146+ assert(vmar);
1147+ assert(vmar->head_data);
1148+
1149+ return vmar->cdata_list;
1150+}
1151+
1152+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
1153+{
1154+ assert(vmar);
1155+ assert(dev_id);
1156+
1157+ if (vmar->devinfo[dev_id].size && vmar->devinfo[dev_id].devname) {
1158+ return &vmar->devinfo[dev_id];
1159+ }
1160+
1161+ return NULL;
1162+}
1163+
67af0fa4
WB
1164+static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
1165+ BlockBackend *target, bool write_zeroes)
1166+{
1167+ assert(vmar);
1168+ assert(dev_id);
1169+
1170+ vmar->rstate[dev_id].target = target;
1171+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
1172+
1173+ int64_t size = vmar->devinfo[dev_id].size;
1174+
1175+ int64_t bitmap_size = (size/BDRV_SECTOR_SIZE) +
1176+ (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG - 1;
1177+ bitmap_size /= (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG;
1178+
1179+ vmar->rstate[dev_id].bitmap_size = bitmap_size;
1180+ vmar->rstate[dev_id].bitmap = g_new0(unsigned long, bitmap_size);
1181+
1182+ vmar->cluster_count += size/VMA_CLUSTER_SIZE;
1183+}
1184+
1185+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
95259824
WB
1186+ bool write_zeroes, Error **errp)
1187+{
1188+ assert(vmar);
67af0fa4 1189+ assert(target != NULL);
95259824 1190+ assert(dev_id);
67af0fa4 1191+ assert(vmar->rstate[dev_id].target == NULL);
95259824 1192+
67af0fa4 1193+ int64_t size = blk_getlength(target);
95259824
WB
1194+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
1195+
1196+ /* storage types can have different size restrictions, so it
1197+ * is not always possible to create an image with exact size.
1198+ * So we tolerate a size difference up to 4MB.
1199+ */
1200+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
1201+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
1202+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
1203+ size, vmar->devinfo[dev_id].size);
1204+ return -1;
1205+ }
1206+
67af0fa4 1207+ allocate_rstate(vmar, dev_id, target, write_zeroes);
95259824
WB
1208+
1209+ return 0;
1210+}
1211+
1212+static ssize_t safe_write(int fd, void *buf, size_t count)
1213+{
1214+ ssize_t n;
1215+
1216+ do {
1217+ n = write(fd, buf, count);
1218+ } while (n < 0 && errno == EINTR);
1219+
1220+ return n;
1221+}
1222+
1223+static size_t full_write(int fd, void *buf, size_t len)
1224+{
1225+ ssize_t n;
1226+ size_t total;
1227+
1228+ total = 0;
1229+
1230+ while (len > 0) {
1231+ n = safe_write(fd, buf, len);
1232+ if (n < 0) {
1233+ return n;
1234+ }
1235+ buf += n;
1236+ total += n;
1237+ len -= n;
1238+ }
1239+
1240+ if (len) {
1241+ /* incomplete write ? */
1242+ return -1;
1243+ }
1244+
1245+ return total;
1246+}
1247+
1248+static int restore_write_data(VmaReader *vmar, guint8 dev_id,
67af0fa4 1249+ BlockBackend *target, int vmstate_fd,
95259824
WB
1250+ unsigned char *buf, int64_t sector_num,
1251+ int nb_sectors, Error **errp)
1252+{
1253+ assert(vmar);
1254+
1255+ if (dev_id == vmar->vmstate_stream) {
1256+ if (vmstate_fd >= 0) {
1257+ int len = nb_sectors * BDRV_SECTOR_SIZE;
1258+ int res = full_write(vmstate_fd, buf, len);
1259+ if (res < 0) {
1260+ error_setg(errp, "write vmstate failed %d", res);
1261+ return -1;
1262+ }
1263+ }
1264+ } else {
67af0fa4 1265+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, buf, nb_sectors * BDRV_SECTOR_SIZE, 0);
95259824 1266+ if (res < 0) {
67af0fa4
WB
1267+ error_setg(errp, "blk_pwrite to %s failed (%d)",
1268+ bdrv_get_device_name(blk_bs(target)), res);
95259824
WB
1269+ return -1;
1270+ }
1271+ }
1272+ return 0;
1273+}
67af0fa4 1274+
95259824
WB
1275+static int restore_extent(VmaReader *vmar, unsigned char *buf,
1276+ int extent_size, int vmstate_fd,
67af0fa4 1277+ bool verbose, bool verify, Error **errp)
95259824
WB
1278+{
1279+ assert(vmar);
1280+ assert(buf);
1281+
1282+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
1283+ int start = VMA_EXTENT_HEADER_SIZE;
1284+ int i;
1285+
1286+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1287+ uint64_t block_info = GUINT64_FROM_BE(ehead->blockinfo[i]);
1288+ uint64_t cluster_num = block_info & 0xffffffff;
1289+ uint8_t dev_id = (block_info >> 32) & 0xff;
1290+ uint16_t mask = block_info >> (32+16);
1291+ int64_t max_sector;
1292+
1293+ if (!dev_id) {
1294+ continue;
1295+ }
1296+
1297+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
67af0fa4 1298+ BlockBackend *target = NULL;
95259824
WB
1299+
1300+ if (dev_id != vmar->vmstate_stream) {
67af0fa4
WB
1301+ target = rstate->target;
1302+ if (!verify && !target) {
95259824
WB
1303+ error_setg(errp, "got wrong dev id %d", dev_id);
1304+ return -1;
1305+ }
1306+
1307+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
1308+ error_setg(errp, "found duplicated cluster %zd for stream %s",
1309+ cluster_num, vmar->devinfo[dev_id].devname);
1310+ return -1;
1311+ }
1312+ vma_reader_set_bitmap(rstate, cluster_num, 1);
1313+
1314+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
1315+ } else {
1316+ max_sector = G_MAXINT64;
1317+ if (cluster_num != vmar->vmstate_clusters) {
1318+ error_setg(errp, "found out of order vmstate data");
1319+ return -1;
1320+ }
1321+ vmar->vmstate_clusters++;
1322+ }
1323+
1324+ vmar->clusters_read++;
1325+
1326+ if (verbose) {
1327+ time_t duration = time(NULL) - vmar->start_time;
1328+ int percent = (vmar->clusters_read*100)/vmar->cluster_count;
1329+ if (percent != vmar->clusters_read_per) {
1330+ printf("progress %d%% (read %zd bytes, duration %zd sec)\n",
1331+ percent, vmar->clusters_read*VMA_CLUSTER_SIZE,
1332+ duration);
1333+ fflush(stdout);
1334+ vmar->clusters_read_per = percent;
1335+ }
1336+ }
1337+
1338+ /* try to write whole clusters to speedup restore */
1339+ if (mask == 0xffff) {
1340+ if ((start + VMA_CLUSTER_SIZE) > extent_size) {
1341+ error_setg(errp, "short vma extent - too many blocks");
1342+ return -1;
1343+ }
1344+ int64_t sector_num = (cluster_num * VMA_CLUSTER_SIZE) /
1345+ BDRV_SECTOR_SIZE;
1346+ int64_t end_sector = sector_num +
1347+ VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE;
1348+
1349+ if (end_sector > max_sector) {
1350+ end_sector = max_sector;
1351+ }
1352+
1353+ if (end_sector <= sector_num) {
9b05d1d4 1354+ error_setg(errp, "got wrong block address - write beyond end");
95259824
WB
1355+ return -1;
1356+ }
1357+
67af0fa4
WB
1358+ if (!verify) {
1359+ int nb_sectors = end_sector - sector_num;
1360+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
1361+ buf + start, sector_num, nb_sectors,
1362+ errp) < 0) {
1363+ return -1;
1364+ }
95259824
WB
1365+ }
1366+
1367+ start += VMA_CLUSTER_SIZE;
1368+ } else {
1369+ int j;
1370+ int bit = 1;
1371+
1372+ for (j = 0; j < 16; j++) {
1373+ int64_t sector_num = (cluster_num*VMA_CLUSTER_SIZE +
1374+ j*VMA_BLOCK_SIZE)/BDRV_SECTOR_SIZE;
1375+
1376+ int64_t end_sector = sector_num +
1377+ VMA_BLOCK_SIZE/BDRV_SECTOR_SIZE;
1378+ if (end_sector > max_sector) {
1379+ end_sector = max_sector;
1380+ }
1381+
1382+ if (mask & bit) {
1383+ if ((start + VMA_BLOCK_SIZE) > extent_size) {
1384+ error_setg(errp, "short vma extent - too many blocks");
1385+ return -1;
1386+ }
1387+
1388+ if (end_sector <= sector_num) {
1389+ error_setg(errp, "got wrong block address - "
9b05d1d4 1390+ "write beyond end");
95259824
WB
1391+ return -1;
1392+ }
1393+
67af0fa4
WB
1394+ if (!verify) {
1395+ int nb_sectors = end_sector - sector_num;
1396+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
1397+ buf + start, sector_num,
1398+ nb_sectors, errp) < 0) {
1399+ return -1;
1400+ }
95259824
WB
1401+ }
1402+
1403+ start += VMA_BLOCK_SIZE;
1404+
1405+ } else {
1406+
67af0fa4
WB
1407+
1408+ if (end_sector > sector_num) {
95259824
WB
1409+ /* Todo: use bdrv_co_write_zeroes (but that need to
1410+ * be run inside coroutine?)
1411+ */
1412+ int nb_sectors = end_sector - sector_num;
67af0fa4
WB
1413+ int zero_size = BDRV_SECTOR_SIZE*nb_sectors;
1414+ vmar->zero_cluster_data += zero_size;
1415+ if (mask != 0) {
1416+ vmar->partial_zero_cluster_data += zero_size;
1417+ }
1418+
1419+ if (rstate->write_zeroes && !verify) {
1420+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
1421+ zero_vma_block, sector_num,
1422+ nb_sectors, errp) < 0) {
1423+ return -1;
1424+ }
95259824
WB
1425+ }
1426+ }
1427+ }
1428+
1429+ bit = bit << 1;
1430+ }
1431+ }
1432+ }
1433+
1434+ if (start != extent_size) {
1435+ error_setg(errp, "vma extent error - missing blocks");
1436+ return -1;
1437+ }
1438+
1439+ return 0;
1440+}
1441+
67af0fa4
WB
1442+static int vma_reader_restore_full(VmaReader *vmar, int vmstate_fd,
1443+ bool verbose, bool verify,
1444+ Error **errp)
95259824
WB
1445+{
1446+ assert(vmar);
1447+ assert(vmar->head_data);
1448+
1449+ int ret = 0;
1450+ unsigned char buf[VMA_MAX_EXTENT_SIZE];
1451+ int buf_pos = 0;
1452+ unsigned char md5sum[16];
1453+ VmaHeader *h = (VmaHeader *)vmar->head_data;
1454+
1455+ vmar->start_time = time(NULL);
1456+
1457+ while (1) {
1458+ int bytes = full_read(vmar->fd, buf + buf_pos, sizeof(buf) - buf_pos);
1459+ if (bytes < 0) {
1460+ error_setg(errp, "read failed - %s", g_strerror(errno));
1461+ return -1;
1462+ }
1463+
1464+ buf_pos += bytes;
1465+
1466+ if (!buf_pos) {
1467+ break; /* EOF */
1468+ }
1469+
1470+ if (buf_pos < VMA_EXTENT_HEADER_SIZE) {
1471+ error_setg(errp, "read short extent (%d bytes)", buf_pos);
1472+ return -1;
1473+ }
1474+
1475+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
1476+
1477+ /* extract md5sum */
1478+ memcpy(md5sum, ehead->md5sum, sizeof(ehead->md5sum));
1479+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
1480+
1481+ g_checksum_reset(vmar->md5csum);
1482+ g_checksum_update(vmar->md5csum, buf, VMA_EXTENT_HEADER_SIZE);
1483+ gsize csize = 16;
1484+ g_checksum_get_digest(vmar->md5csum, ehead->md5sum, &csize);
1485+
1486+ if (memcmp(md5sum, ehead->md5sum, 16) != 0) {
1487+ error_setg(errp, "wrong vma extent header chechsum");
1488+ return -1;
1489+ }
1490+
1491+ if (memcmp(h->uuid, ehead->uuid, sizeof(ehead->uuid)) != 0) {
1492+ error_setg(errp, "wrong vma extent uuid");
1493+ return -1;
1494+ }
1495+
1496+ if (ehead->magic != VMA_EXTENT_MAGIC || ehead->reserved1 != 0) {
1497+ error_setg(errp, "wrong vma extent header magic");
1498+ return -1;
1499+ }
1500+
1501+ int block_count = GUINT16_FROM_BE(ehead->block_count);
1502+ int extent_size = VMA_EXTENT_HEADER_SIZE + block_count*VMA_BLOCK_SIZE;
1503+
1504+ if (buf_pos < extent_size) {
1505+ error_setg(errp, "short vma extent (%d < %d)", buf_pos,
1506+ extent_size);
1507+ return -1;
1508+ }
1509+
1510+ if (restore_extent(vmar, buf, extent_size, vmstate_fd, verbose,
67af0fa4 1511+ verify, errp) < 0) {
95259824
WB
1512+ return -1;
1513+ }
1514+
1515+ if (buf_pos > extent_size) {
1516+ memmove(buf, buf + extent_size, buf_pos - extent_size);
1517+ buf_pos = buf_pos - extent_size;
1518+ } else {
1519+ buf_pos = 0;
1520+ }
1521+ }
1522+
1523+ bdrv_drain_all();
1524+
1525+ int i;
1526+ for (i = 1; i < 256; i++) {
1527+ VmaRestoreState *rstate = &vmar->rstate[i];
67af0fa4 1528+ if (!rstate->target) {
95259824
WB
1529+ continue;
1530+ }
1531+
67af0fa4
WB
1532+ if (blk_flush(rstate->target) < 0) {
1533+ error_setg(errp, "vma blk_flush %s failed",
95259824
WB
1534+ vmar->devinfo[i].devname);
1535+ return -1;
1536+ }
1537+
1538+ if (vmar->devinfo[i].size &&
1539+ (strcmp(vmar->devinfo[i].devname, "vmstate") != 0)) {
1540+ assert(rstate->bitmap);
1541+
1542+ int64_t cluster_num, end;
1543+
1544+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
1545+ VMA_CLUSTER_SIZE;
1546+
1547+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
1548+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
1549+ error_setg(errp, "detected missing cluster %zd "
1550+ "for stream %s", cluster_num,
1551+ vmar->devinfo[i].devname);
1552+ return -1;
1553+ }
1554+ }
1555+ }
1556+ }
1557+
67af0fa4
WB
1558+ if (verbose) {
1559+ if (vmar->clusters_read) {
1560+ printf("total bytes read %zd, sparse bytes %zd (%.3g%%)\n",
1561+ vmar->clusters_read*VMA_CLUSTER_SIZE,
1562+ vmar->zero_cluster_data,
1563+ (double)(100.0*vmar->zero_cluster_data)/
1564+ (vmar->clusters_read*VMA_CLUSTER_SIZE));
1565+
1566+ int64_t datasize = vmar->clusters_read*VMA_CLUSTER_SIZE-vmar->zero_cluster_data;
1567+ if (datasize) { // this does not make sense for empty files
1568+ printf("space reduction due to 4K zero blocks %.3g%%\n",
1569+ (double)(100.0*vmar->partial_zero_cluster_data) / datasize);
1570+ }
1571+ } else {
1572+ printf("vma archive contains no image data\n");
1573+ }
1574+ }
95259824
WB
1575+ return ret;
1576+}
1577+
67af0fa4
WB
1578+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
1579+ Error **errp)
1580+{
1581+ return vma_reader_restore_full(vmar, vmstate_fd, verbose, false, errp);
1582+}
1583+
1584+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp)
1585+{
1586+ guint8 dev_id;
1587+
1588+ for (dev_id = 1; dev_id < 255; dev_id++) {
1589+ if (vma_reader_get_device_info(vmar, dev_id)) {
1590+ allocate_rstate(vmar, dev_id, NULL, false);
1591+ }
1592+ }
1593+
1594+ return vma_reader_restore_full(vmar, -1, verbose, true, errp);
1595+}
1596+
95259824
WB
1597diff --git a/vma-writer.c b/vma-writer.c
1598new file mode 100644
6838f038 1599index 0000000000..fd9567634d
95259824
WB
1600--- /dev/null
1601+++ b/vma-writer.c
67af0fa4 1602@@ -0,0 +1,771 @@
95259824
WB
1603+/*
1604+ * VMA: Virtual Machine Archive
1605+ *
1606+ * Copyright (C) 2012 Proxmox Server Solutions
1607+ *
1608+ * Authors:
1609+ * Dietmar Maurer (dietmar@proxmox.com)
1610+ *
1611+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
1612+ * See the COPYING file in the top-level directory.
1613+ *
1614+ */
1615+
1616+#include "qemu/osdep.h"
1617+#include <glib.h>
1618+#include <uuid/uuid.h>
1619+
1620+#include "vma.h"
1621+#include "block/block.h"
1622+#include "monitor/monitor.h"
1623+#include "qemu/main-loop.h"
1624+#include "qemu/coroutine.h"
1625+#include "qemu/cutils.h"
1626+
1627+#define DEBUG_VMA 0
1628+
1629+#define DPRINTF(fmt, ...)\
1630+ do { if (DEBUG_VMA) { printf("vma: " fmt, ## __VA_ARGS__); } } while (0)
1631+
1632+#define WRITE_BUFFERS 5
67af0fa4
WB
1633+#define HEADER_CLUSTERS 8
1634+#define HEADERBUF_SIZE (VMA_CLUSTER_SIZE*HEADER_CLUSTERS)
95259824
WB
1635+
1636+struct VmaWriter {
1637+ int fd;
1638+ FILE *cmd;
1639+ int status;
1640+ char errmsg[8192];
1641+ uuid_t uuid;
1642+ bool header_written;
1643+ bool closed;
1644+
1645+ /* we always write extents */
67af0fa4 1646+ unsigned char *outbuf;
95259824
WB
1647+ int outbuf_pos; /* in bytes */
1648+ int outbuf_count; /* in VMA_BLOCKS */
1649+ uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT];
1650+
67af0fa4 1651+ unsigned char *headerbuf;
95259824
WB
1652+
1653+ GChecksum *md5csum;
95259824
WB
1654+ CoMutex flush_lock;
1655+ Coroutine *co_writer;
1656+
1657+ /* drive informations */
1658+ VmaStreamInfo stream_info[256];
1659+ guint stream_count;
1660+
1661+ guint8 vmstate_stream;
1662+ uint32_t vmstate_clusters;
1663+
1664+ /* header blob table */
1665+ char *header_blob_table;
1666+ uint32_t header_blob_table_size;
1667+ uint32_t header_blob_table_pos;
1668+
1669+ /* store for config blobs */
1670+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1671+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1672+ uint32_t config_count;
1673+};
1674+
1675+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...)
1676+{
1677+ va_list ap;
1678+
1679+ if (vmaw->status < 0) {
1680+ return;
1681+ }
1682+
1683+ vmaw->status = -1;
1684+
1685+ va_start(ap, fmt);
1686+ g_vsnprintf(vmaw->errmsg, sizeof(vmaw->errmsg), fmt, ap);
1687+ va_end(ap);
1688+
1689+ DPRINTF("vma_writer_set_error: %s\n", vmaw->errmsg);
1690+}
1691+
1692+static uint32_t allocate_header_blob(VmaWriter *vmaw, const char *data,
1693+ size_t len)
1694+{
1695+ if (len > 65535) {
1696+ return 0;
1697+ }
1698+
1699+ if (!vmaw->header_blob_table ||
1700+ (vmaw->header_blob_table_size <
1701+ (vmaw->header_blob_table_pos + len + 2))) {
1702+ int newsize = vmaw->header_blob_table_size + ((len + 2 + 511)/512)*512;
1703+
1704+ vmaw->header_blob_table = g_realloc(vmaw->header_blob_table, newsize);
1705+ memset(vmaw->header_blob_table + vmaw->header_blob_table_size,
1706+ 0, newsize - vmaw->header_blob_table_size);
1707+ vmaw->header_blob_table_size = newsize;
1708+ }
1709+
1710+ uint32_t cpos = vmaw->header_blob_table_pos;
1711+ vmaw->header_blob_table[cpos] = len & 255;
1712+ vmaw->header_blob_table[cpos+1] = (len >> 8) & 255;
1713+ memcpy(vmaw->header_blob_table + cpos + 2, data, len);
1714+ vmaw->header_blob_table_pos += len + 2;
1715+ return cpos;
1716+}
1717+
1718+static uint32_t allocate_header_string(VmaWriter *vmaw, const char *str)
1719+{
1720+ assert(vmaw);
1721+
1722+ size_t len = strlen(str) + 1;
1723+
1724+ return allocate_header_blob(vmaw, str, len);
1725+}
1726+
1727+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
1728+ gsize len)
1729+{
1730+ assert(vmaw);
1731+ assert(!vmaw->header_written);
1732+ assert(vmaw->config_count < VMA_MAX_CONFIGS);
1733+ assert(name);
1734+ assert(data);
95259824
WB
1735+
1736+ gchar *basename = g_path_get_basename(name);
1737+ uint32_t name_ptr = allocate_header_string(vmaw, basename);
1738+ g_free(basename);
1739+
1740+ if (!name_ptr) {
1741+ return -1;
1742+ }
1743+
1744+ uint32_t data_ptr = allocate_header_blob(vmaw, data, len);
1745+ if (!data_ptr) {
1746+ return -1;
1747+ }
1748+
1749+ vmaw->config_names[vmaw->config_count] = name_ptr;
1750+ vmaw->config_data[vmaw->config_count] = data_ptr;
1751+
1752+ vmaw->config_count++;
1753+
1754+ return 0;
1755+}
1756+
1757+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
1758+ size_t size)
1759+{
1760+ assert(vmaw);
1761+ assert(devname);
1762+ assert(!vmaw->status);
1763+
1764+ if (vmaw->header_written) {
1765+ vma_writer_set_error(vmaw, "vma_writer_register_stream: header "
1766+ "already written");
1767+ return -1;
1768+ }
1769+
1770+ guint n = vmaw->stream_count + 1;
1771+
1772+ /* we can have dev_ids form 1 to 255 (0 reserved)
1773+ * 255(-1) reseverd for safety
1774+ */
1775+ if (n > 254) {
1776+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1777+ "too many drives");
1778+ return -1;
1779+ }
1780+
1781+ if (size <= 0) {
1782+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1783+ "got strange size %zd", size);
1784+ return -1;
1785+ }
1786+
1787+ DPRINTF("vma_writer_register_stream %s %zu %d\n", devname, size, n);
1788+
1789+ vmaw->stream_info[n].devname = g_strdup(devname);
1790+ vmaw->stream_info[n].size = size;
1791+
1792+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
1793+ VMA_CLUSTER_SIZE;
1794+
1795+ vmaw->stream_count = n;
1796+
1797+ if (strcmp(devname, "vmstate") == 0) {
1798+ vmaw->vmstate_stream = n;
1799+ }
1800+
1801+ return n;
1802+}
1803+
1804+static void vma_co_continue_write(void *opaque)
1805+{
1806+ VmaWriter *vmaw = opaque;
1807+
1808+ DPRINTF("vma_co_continue_write\n");
1809+ qemu_coroutine_enter(vmaw->co_writer);
1810+}
1811+
1812+static ssize_t coroutine_fn
67af0fa4 1813+vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes)
95259824 1814+{
67af0fa4 1815+ DPRINTF("vma_queue_write enter %zd\n", bytes);
95259824 1816+
67af0fa4
WB
1817+ assert(vmaw);
1818+ assert(buf);
1819+ assert(bytes <= VMA_MAX_EXTENT_SIZE);
95259824 1820+
67af0fa4
WB
1821+ size_t done = 0;
1822+ ssize_t ret;
95259824
WB
1823+
1824+ assert(vmaw->co_writer == NULL);
1825+
1826+ vmaw->co_writer = qemu_coroutine_self();
1827+
95259824 1828+ while (done < bytes) {
67af0fa4
WB
1829+ aio_set_fd_handler(qemu_get_aio_context(), vmaw->fd, false, NULL, vma_co_continue_write, NULL, vmaw);
1830+ qemu_coroutine_yield();
1831+ aio_set_fd_handler(qemu_get_aio_context(), vmaw->fd, false, NULL, NULL, NULL, NULL);
1832+ if (vmaw->status < 0) {
1833+ DPRINTF("vma_queue_write detected canceled backup\n");
1834+ done = -1;
1835+ break;
1836+ }
95259824
WB
1837+ ret = write(vmaw->fd, buf + done, bytes - done);
1838+ if (ret > 0) {
1839+ done += ret;
67af0fa4 1840+ DPRINTF("vma_queue_write written %zd %zd\n", done, ret);
95259824
WB
1841+ } else if (ret < 0) {
1842+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
67af0fa4
WB
1843+ /* try again */
1844+ } else {
1845+ vma_writer_set_error(vmaw, "vma_queue_write: write error - %s",
95259824
WB
1846+ g_strerror(errno));
1847+ done = -1; /* always return failure for partial writes */
1848+ break;
1849+ }
1850+ } else if (ret == 0) {
1851+ /* should not happen - simply try again */
1852+ }
1853+ }
1854+
95259824
WB
1855+ vmaw->co_writer = NULL;
1856+
67af0fa4 1857+ return (done == bytes) ? bytes : -1;
95259824
WB
1858+}
1859+
1860+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
1861+{
1862+ const char *p;
1863+
1864+ assert(sizeof(VmaHeader) == (4096 + 8192));
1865+ assert(G_STRUCT_OFFSET(VmaHeader, config_names) == 2044);
1866+ assert(G_STRUCT_OFFSET(VmaHeader, config_data) == 3068);
1867+ assert(G_STRUCT_OFFSET(VmaHeader, dev_info) == 4096);
1868+ assert(sizeof(VmaExtentHeader) == 512);
1869+
1870+ VmaWriter *vmaw = g_new0(VmaWriter, 1);
1871+ vmaw->fd = -1;
1872+
1873+ vmaw->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1874+ if (!vmaw->md5csum) {
1875+ error_setg(errp, "can't allocate cmsum\n");
1876+ goto err;
1877+ }
1878+
1879+ if (strstart(filename, "exec:", &p)) {
1880+ vmaw->cmd = popen(p, "w");
1881+ if (vmaw->cmd == NULL) {
1882+ error_setg(errp, "can't popen command '%s' - %s\n", p,
1883+ g_strerror(errno));
1884+ goto err;
1885+ }
1886+ vmaw->fd = fileno(vmaw->cmd);
1887+
67af0fa4 1888+ /* try to use O_NONBLOCK */
95259824 1889+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
95259824
WB
1890+
1891+ } else {
1892+ struct stat st;
1893+ int oflags;
1894+ const char *tmp_id_str;
1895+
1896+ if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
67af0fa4 1897+ oflags = O_NONBLOCK|O_WRONLY;
95259824
WB
1898+ vmaw->fd = qemu_open(filename, oflags, 0644);
1899+ } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) {
67af0fa4 1900+ oflags = O_NONBLOCK|O_WRONLY;
95259824
WB
1901+ vmaw->fd = qemu_open(filename, oflags, 0644);
1902+ } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) {
1903+ vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp);
1904+ if (vmaw->fd < 0) {
1905+ goto err;
1906+ }
67af0fa4 1907+ /* try to use O_NONBLOCK */
95259824 1908+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
95259824
WB
1909+ } else {
1910+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_CREAT|O_EXCL;
1911+ vmaw->fd = qemu_open(filename, oflags, 0644);
1912+ }
1913+
1914+ if (vmaw->fd < 0) {
1915+ error_setg(errp, "can't open file %s - %s\n", filename,
1916+ g_strerror(errno));
1917+ goto err;
1918+ }
1919+ }
1920+
1921+ /* we use O_DIRECT, so we need to align IO buffers */
67af0fa4
WB
1922+
1923+ vmaw->outbuf = qemu_memalign(512, VMA_MAX_EXTENT_SIZE);
1924+ vmaw->headerbuf = qemu_memalign(512, HEADERBUF_SIZE);
95259824
WB
1925+
1926+ vmaw->outbuf_count = 0;
1927+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1928+
1929+ vmaw->header_blob_table_pos = 1; /* start at pos 1 */
1930+
95259824 1931+ qemu_co_mutex_init(&vmaw->flush_lock);
95259824
WB
1932+
1933+ uuid_copy(vmaw->uuid, uuid);
1934+
1935+ return vmaw;
1936+
1937+err:
1938+ if (vmaw) {
1939+ if (vmaw->cmd) {
1940+ pclose(vmaw->cmd);
1941+ } else if (vmaw->fd >= 0) {
1942+ close(vmaw->fd);
1943+ }
1944+
1945+ if (vmaw->md5csum) {
1946+ g_checksum_free(vmaw->md5csum);
1947+ }
1948+
1949+ g_free(vmaw);
1950+ }
1951+
1952+ return NULL;
1953+}
1954+
1955+static int coroutine_fn vma_write_header(VmaWriter *vmaw)
1956+{
1957+ assert(vmaw);
67af0fa4 1958+ unsigned char *buf = vmaw->headerbuf;
95259824
WB
1959+ VmaHeader *head = (VmaHeader *)buf;
1960+
1961+ int i;
1962+
1963+ DPRINTF("VMA WRITE HEADER\n");
1964+
1965+ if (vmaw->status < 0) {
1966+ return vmaw->status;
1967+ }
1968+
67af0fa4 1969+ memset(buf, 0, HEADERBUF_SIZE);
95259824
WB
1970+
1971+ head->magic = VMA_MAGIC;
1972+ head->version = GUINT32_TO_BE(1); /* v1 */
1973+ memcpy(head->uuid, vmaw->uuid, 16);
1974+
1975+ time_t ctime = time(NULL);
1976+ head->ctime = GUINT64_TO_BE(ctime);
1977+
95259824
WB
1978+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1979+ head->config_names[i] = GUINT32_TO_BE(vmaw->config_names[i]);
1980+ head->config_data[i] = GUINT32_TO_BE(vmaw->config_data[i]);
1981+ }
1982+
1983+ /* 32 bytes per device (12 used currently) = 8192 bytes max */
1984+ for (i = 1; i <= 254; i++) {
1985+ VmaStreamInfo *si = &vmaw->stream_info[i];
1986+ if (si->size) {
1987+ assert(si->devname);
1988+ uint32_t devname_ptr = allocate_header_string(vmaw, si->devname);
1989+ if (!devname_ptr) {
1990+ return -1;
1991+ }
1992+ head->dev_info[i].devname_ptr = GUINT32_TO_BE(devname_ptr);
1993+ head->dev_info[i].size = GUINT64_TO_BE(si->size);
1994+ }
1995+ }
1996+
1997+ uint32_t header_size = sizeof(VmaHeader) + vmaw->header_blob_table_size;
1998+ head->header_size = GUINT32_TO_BE(header_size);
1999+
67af0fa4 2000+ if (header_size > HEADERBUF_SIZE) {
95259824
WB
2001+ return -1; /* just to be sure */
2002+ }
2003+
2004+ uint32_t blob_buffer_offset = sizeof(VmaHeader);
2005+ memcpy(buf + blob_buffer_offset, vmaw->header_blob_table,
2006+ vmaw->header_blob_table_size);
2007+ head->blob_buffer_offset = GUINT32_TO_BE(blob_buffer_offset);
2008+ head->blob_buffer_size = GUINT32_TO_BE(vmaw->header_blob_table_pos);
2009+
2010+ g_checksum_reset(vmaw->md5csum);
2011+ g_checksum_update(vmaw->md5csum, (const guchar *)buf, header_size);
2012+ gsize csize = 16;
2013+ g_checksum_get_digest(vmaw->md5csum, (guint8 *)(head->md5sum), &csize);
2014+
2015+ return vma_queue_write(vmaw, buf, header_size);
2016+}
2017+
2018+static int coroutine_fn vma_writer_flush(VmaWriter *vmaw)
2019+{
2020+ assert(vmaw);
2021+
2022+ int ret;
2023+ int i;
2024+
2025+ if (vmaw->status < 0) {
2026+ return vmaw->status;
2027+ }
2028+
2029+ if (!vmaw->header_written) {
2030+ vmaw->header_written = true;
2031+ ret = vma_write_header(vmaw);
2032+ if (ret < 0) {
2033+ vma_writer_set_error(vmaw, "vma_writer_flush: write header failed");
2034+ return ret;
2035+ }
2036+ }
2037+
2038+ DPRINTF("VMA WRITE FLUSH %d %d\n", vmaw->outbuf_count, vmaw->outbuf_pos);
2039+
2040+
2041+ VmaExtentHeader *ehead = (VmaExtentHeader *)vmaw->outbuf;
2042+
2043+ ehead->magic = VMA_EXTENT_MAGIC;
2044+ ehead->reserved1 = 0;
2045+
2046+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
2047+ ehead->blockinfo[i] = GUINT64_TO_BE(vmaw->outbuf_block_info[i]);
2048+ }
2049+
2050+ guint16 block_count = (vmaw->outbuf_pos - VMA_EXTENT_HEADER_SIZE) /
2051+ VMA_BLOCK_SIZE;
2052+
2053+ ehead->block_count = GUINT16_TO_BE(block_count);
2054+
2055+ memcpy(ehead->uuid, vmaw->uuid, sizeof(ehead->uuid));
2056+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
2057+
2058+ g_checksum_reset(vmaw->md5csum);
2059+ g_checksum_update(vmaw->md5csum, vmaw->outbuf, VMA_EXTENT_HEADER_SIZE);
2060+ gsize csize = 16;
2061+ g_checksum_get_digest(vmaw->md5csum, ehead->md5sum, &csize);
2062+
2063+ int bytes = vmaw->outbuf_pos;
2064+ ret = vma_queue_write(vmaw, vmaw->outbuf, bytes);
2065+ if (ret != bytes) {
2066+ vma_writer_set_error(vmaw, "vma_writer_flush: failed write");
2067+ }
2068+
2069+ vmaw->outbuf_count = 0;
2070+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
2071+
2072+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
2073+ vmaw->outbuf_block_info[i] = 0;
2074+ }
2075+
2076+ return vmaw->status;
2077+}
2078+
2079+static int vma_count_open_streams(VmaWriter *vmaw)
2080+{
2081+ g_assert(vmaw != NULL);
2082+
2083+ int i;
2084+ int open_drives = 0;
2085+ for (i = 0; i <= 255; i++) {
2086+ if (vmaw->stream_info[i].size && !vmaw->stream_info[i].finished) {
2087+ open_drives++;
2088+ }
2089+ }
2090+
2091+ return open_drives;
2092+}
2093+
67af0fa4
WB
2094+
2095+/**
2096+ * You need to call this if the vma archive does not contain
2097+ * any data stream.
2098+ */
2099+int coroutine_fn
2100+vma_writer_flush_output(VmaWriter *vmaw)
2101+{
2102+ qemu_co_mutex_lock(&vmaw->flush_lock);
2103+ int ret = vma_writer_flush(vmaw);
2104+ qemu_co_mutex_unlock(&vmaw->flush_lock);
2105+ if (ret < 0) {
2106+ vma_writer_set_error(vmaw, "vma_writer_flush_header failed");
2107+ }
2108+ return ret;
2109+}
2110+
95259824
WB
2111+/**
2112+ * all jobs should call this when there is no more data
2113+ * Returns: number of remaining stream (0 ==> finished)
2114+ */
2115+int coroutine_fn
2116+vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id)
2117+{
2118+ g_assert(vmaw != NULL);
2119+
2120+ DPRINTF("vma_writer_set_status %d\n", dev_id);
2121+ if (!vmaw->stream_info[dev_id].size) {
2122+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
2123+ "no such stream %d", dev_id);
2124+ return -1;
2125+ }
2126+ if (vmaw->stream_info[dev_id].finished) {
2127+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
2128+ "stream already closed %d", dev_id);
2129+ return -1;
2130+ }
2131+
2132+ vmaw->stream_info[dev_id].finished = true;
2133+
2134+ int open_drives = vma_count_open_streams(vmaw);
2135+
2136+ if (open_drives <= 0) {
2137+ DPRINTF("vma_writer_set_status all drives completed\n");
67af0fa4 2138+ vma_writer_flush_output(vmaw);
95259824
WB
2139+ }
2140+
2141+ return open_drives;
2142+}
2143+
2144+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status)
2145+{
2146+ int i;
2147+
2148+ g_assert(vmaw != NULL);
2149+
2150+ if (status) {
2151+ status->status = vmaw->status;
2152+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
2153+ for (i = 0; i <= 255; i++) {
2154+ status->stream_info[i] = vmaw->stream_info[i];
2155+ }
2156+
2157+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
2158+ }
2159+
2160+ status->closed = vmaw->closed;
2161+
2162+ return vmaw->status;
2163+}
2164+
2165+static int vma_writer_get_buffer(VmaWriter *vmaw)
2166+{
2167+ int ret = 0;
2168+
2169+ qemu_co_mutex_lock(&vmaw->flush_lock);
2170+
2171+ /* wait until buffer is available */
2172+ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) {
2173+ ret = vma_writer_flush(vmaw);
2174+ if (ret < 0) {
2175+ vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed");
2176+ break;
2177+ }
2178+ }
2179+
2180+ qemu_co_mutex_unlock(&vmaw->flush_lock);
2181+
2182+ return ret;
2183+}
2184+
2185+
2186+int64_t coroutine_fn
2187+vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
6838f038 2188+ const unsigned char *buf, size_t *zero_bytes)
95259824
WB
2189+{
2190+ g_assert(vmaw != NULL);
2191+ g_assert(zero_bytes != NULL);
2192+
2193+ *zero_bytes = 0;
2194+
2195+ if (vmaw->status < 0) {
2196+ return vmaw->status;
2197+ }
2198+
2199+ if (!dev_id || !vmaw->stream_info[dev_id].size) {
2200+ vma_writer_set_error(vmaw, "vma_writer_write: "
2201+ "no such stream %d", dev_id);
2202+ return -1;
2203+ }
2204+
2205+ if (vmaw->stream_info[dev_id].finished) {
2206+ vma_writer_set_error(vmaw, "vma_writer_write: "
2207+ "stream already closed %d", dev_id);
2208+ return -1;
2209+ }
2210+
2211+
2212+ if (cluster_num >= (((uint64_t)1)<<32)) {
2213+ vma_writer_set_error(vmaw, "vma_writer_write: "
2214+ "cluster number out of range");
2215+ return -1;
2216+ }
2217+
2218+ if (dev_id == vmaw->vmstate_stream) {
2219+ if (cluster_num != vmaw->vmstate_clusters) {
2220+ vma_writer_set_error(vmaw, "vma_writer_write: "
2221+ "non sequential vmstate write");
2222+ }
2223+ vmaw->vmstate_clusters++;
2224+ } else if (cluster_num >= vmaw->stream_info[dev_id].cluster_count) {
2225+ vma_writer_set_error(vmaw, "vma_writer_write: cluster number too big");
2226+ return -1;
2227+ }
2228+
2229+ /* wait until buffer is available */
2230+ if (vma_writer_get_buffer(vmaw) < 0) {
2231+ vma_writer_set_error(vmaw, "vma_writer_write: "
2232+ "vma_writer_get_buffer failed");
2233+ return -1;
2234+ }
2235+
2236+ DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
2237+
2238+ uint16_t mask = 0;
2239+
2240+ if (buf) {
2241+ int i;
2242+ int bit = 1;
2243+ for (i = 0; i < 16; i++) {
6838f038 2244+ const unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
95259824
WB
2245+ if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
2246+ mask |= bit;
2247+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock,
2248+ VMA_BLOCK_SIZE);
2249+ vmaw->outbuf_pos += VMA_BLOCK_SIZE;
2250+ } else {
2251+ DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
2252+ vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE;
2253+ *zero_bytes += VMA_BLOCK_SIZE;
2254+ }
2255+
2256+ bit = bit << 1;
2257+ }
2258+ } else {
2259+ DPRINTF("VMA WRITE %zd ZERO CLUSTER\n", cluster_num);
2260+ vmaw->stream_info[dev_id].zero_bytes += VMA_CLUSTER_SIZE;
2261+ *zero_bytes += VMA_CLUSTER_SIZE;
2262+ }
2263+
2264+ uint64_t block_info = ((uint64_t)mask) << (32+16);
2265+ block_info |= ((uint64_t)dev_id) << 32;
2266+ block_info |= (cluster_num & 0xffffffff);
2267+ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info;
2268+
2269+ DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info);
2270+
2271+ vmaw->outbuf_count++;
2272+
2273+ /** NOTE: We allways write whole clusters, but we correctly set
2274+ * transferred bytes. So transferred == size when when everything
2275+ * went OK.
2276+ */
2277+ size_t transferred = VMA_CLUSTER_SIZE;
2278+
2279+ if (dev_id != vmaw->vmstate_stream) {
2280+ uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
2281+ if (last > vmaw->stream_info[dev_id].size) {
2282+ uint64_t diff = last - vmaw->stream_info[dev_id].size;
2283+ if (diff >= VMA_CLUSTER_SIZE) {
2284+ vma_writer_set_error(vmaw, "vma_writer_write: "
2285+ "read after last cluster");
2286+ return -1;
2287+ }
2288+ transferred -= diff;
2289+ }
2290+ }
2291+
2292+ vmaw->stream_info[dev_id].transferred += transferred;
2293+
2294+ return transferred;
2295+}
2296+
67af0fa4
WB
2297+void vma_writer_error_propagate(VmaWriter *vmaw, Error **errp)
2298+{
2299+ if (vmaw->status < 0 && *errp == NULL) {
2300+ error_setg(errp, "%s", vmaw->errmsg);
2301+ }
2302+}
2303+
95259824
WB
2304+int vma_writer_close(VmaWriter *vmaw, Error **errp)
2305+{
2306+ g_assert(vmaw != NULL);
2307+
2308+ int i;
2309+
67af0fa4
WB
2310+ while (vmaw->co_writer) {
2311+ aio_poll(qemu_get_aio_context(), true);
95259824
WB
2312+ }
2313+
67af0fa4
WB
2314+ assert(vmaw->co_writer == NULL);
2315+
95259824
WB
2316+ if (vmaw->cmd) {
2317+ if (pclose(vmaw->cmd) < 0) {
2318+ vma_writer_set_error(vmaw, "vma_writer_close: "
2319+ "pclose failed - %s", g_strerror(errno));
2320+ }
2321+ } else {
2322+ if (close(vmaw->fd) < 0) {
2323+ vma_writer_set_error(vmaw, "vma_writer_close: "
2324+ "close failed - %s", g_strerror(errno));
2325+ }
2326+ }
2327+
2328+ for (i = 0; i <= 255; i++) {
2329+ VmaStreamInfo *si = &vmaw->stream_info[i];
2330+ if (si->size) {
2331+ if (!si->finished) {
2332+ vma_writer_set_error(vmaw, "vma_writer_close: "
2333+ "detected open stream '%s'", si->devname);
2334+ } else if ((si->transferred != si->size) &&
2335+ (i != vmaw->vmstate_stream)) {
2336+ vma_writer_set_error(vmaw, "vma_writer_close: "
2337+ "incomplete stream '%s' (%zd != %zd)",
2338+ si->devname, si->transferred, si->size);
2339+ }
2340+ }
2341+ }
2342+
2343+ for (i = 0; i <= 255; i++) {
2344+ vmaw->stream_info[i].finished = 1; /* mark as closed */
2345+ }
2346+
2347+ vmaw->closed = 1;
2348+
2349+ if (vmaw->status < 0 && *errp == NULL) {
2350+ error_setg(errp, "%s", vmaw->errmsg);
2351+ }
2352+
2353+ return vmaw->status;
2354+}
2355+
2356+void vma_writer_destroy(VmaWriter *vmaw)
2357+{
2358+ assert(vmaw);
2359+
2360+ int i;
2361+
2362+ for (i = 0; i <= 255; i++) {
2363+ if (vmaw->stream_info[i].devname) {
2364+ g_free(vmaw->stream_info[i].devname);
2365+ }
2366+ }
2367+
2368+ if (vmaw->md5csum) {
2369+ g_checksum_free(vmaw->md5csum);
2370+ }
2371+
95259824
WB
2372+ g_free(vmaw);
2373+}
2374diff --git a/vma.c b/vma.c
2375new file mode 100644
6838f038 2376index 0000000000..1b59fd1555
95259824
WB
2377--- /dev/null
2378+++ b/vma.c
6838f038 2379@@ -0,0 +1,756 @@
95259824
WB
2380+/*
2381+ * VMA: Virtual Machine Archive
2382+ *
2383+ * Copyright (C) 2012-2013 Proxmox Server Solutions
2384+ *
2385+ * Authors:
2386+ * Dietmar Maurer (dietmar@proxmox.com)
2387+ *
2388+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
2389+ * See the COPYING file in the top-level directory.
2390+ *
2391+ */
2392+
2393+#include "qemu/osdep.h"
2394+#include <glib.h>
2395+
2396+#include "vma.h"
2397+#include "qemu-common.h"
2398+#include "qemu/error-report.h"
2399+#include "qemu/main-loop.h"
a544966d 2400+#include "qapi/qmp/qstring.h"
67af0fa4 2401+#include "sysemu/block-backend.h"
95259824
WB
2402+
2403+static void help(void)
2404+{
2405+ const char *help_msg =
2406+ "usage: vma command [command options]\n"
2407+ "\n"
2408+ "vma list <filename>\n"
67af0fa4
WB
2409+ "vma config <filename> [-c config]\n"
2410+ "vma create <filename> [-c config] pathname ...\n"
95259824 2411+ "vma extract <filename> [-r <fifo>] <targetdir>\n"
67af0fa4 2412+ "vma verify <filename> [-v]\n"
95259824
WB
2413+ ;
2414+
2415+ printf("%s", help_msg);
2416+ exit(1);
2417+}
2418+
2419+static const char *extract_devname(const char *path, char **devname, int index)
2420+{
2421+ assert(path);
2422+
2423+ const char *sep = strchr(path, '=');
2424+
2425+ if (sep) {
2426+ *devname = g_strndup(path, sep - path);
2427+ path = sep + 1;
2428+ } else {
2429+ if (index >= 0) {
2430+ *devname = g_strdup_printf("disk%d", index);
2431+ } else {
2432+ *devname = NULL;
2433+ }
2434+ }
2435+
2436+ return path;
2437+}
2438+
2439+static void print_content(VmaReader *vmar)
2440+{
2441+ assert(vmar);
2442+
2443+ VmaHeader *head = vma_reader_get_header(vmar);
2444+
2445+ GList *l = vma_reader_get_config_data(vmar);
2446+ while (l && l->data) {
2447+ VmaConfigData *cdata = (VmaConfigData *)l->data;
2448+ l = g_list_next(l);
2449+ printf("CFG: size: %d name: %s\n", cdata->len, cdata->name);
2450+ }
2451+
2452+ int i;
2453+ VmaDeviceInfo *di;
2454+ for (i = 1; i < 255; i++) {
2455+ di = vma_reader_get_device_info(vmar, i);
2456+ if (di) {
2457+ if (strcmp(di->devname, "vmstate") == 0) {
2458+ printf("VMSTATE: dev_id=%d memory: %zd\n", i, di->size);
2459+ } else {
2460+ printf("DEV: dev_id=%d size: %zd devname: %s\n",
2461+ i, di->size, di->devname);
2462+ }
2463+ }
2464+ }
2465+ /* ctime is the last entry we print */
2466+ printf("CTIME: %s", ctime(&head->ctime));
2467+ fflush(stdout);
2468+}
2469+
2470+static int list_content(int argc, char **argv)
2471+{
2472+ int c, ret = 0;
2473+ const char *filename;
2474+
2475+ for (;;) {
2476+ c = getopt(argc, argv, "h");
2477+ if (c == -1) {
2478+ break;
2479+ }
2480+ switch (c) {
2481+ case '?':
2482+ case 'h':
2483+ help();
2484+ break;
2485+ default:
2486+ g_assert_not_reached();
2487+ }
2488+ }
2489+
2490+ /* Get the filename */
2491+ if ((optind + 1) != argc) {
2492+ help();
2493+ }
2494+ filename = argv[optind++];
2495+
2496+ Error *errp = NULL;
2497+ VmaReader *vmar = vma_reader_create(filename, &errp);
2498+
2499+ if (!vmar) {
2500+ g_error("%s", error_get_pretty(errp));
2501+ }
2502+
2503+ print_content(vmar);
2504+
2505+ vma_reader_destroy(vmar);
2506+
2507+ return ret;
2508+}
2509+
2510+typedef struct RestoreMap {
2511+ char *devname;
2512+ char *path;
67af0fa4 2513+ char *format;
95259824
WB
2514+ bool write_zero;
2515+} RestoreMap;
2516+
2517+static int extract_content(int argc, char **argv)
2518+{
2519+ int c, ret = 0;
2520+ int verbose = 0;
2521+ const char *filename;
2522+ const char *dirname;
2523+ const char *readmap = NULL;
2524+
2525+ for (;;) {
2526+ c = getopt(argc, argv, "hvr:");
2527+ if (c == -1) {
2528+ break;
2529+ }
2530+ switch (c) {
2531+ case '?':
2532+ case 'h':
2533+ help();
2534+ break;
2535+ case 'r':
2536+ readmap = optarg;
2537+ break;
2538+ case 'v':
2539+ verbose = 1;
2540+ break;
2541+ default:
2542+ help();
2543+ }
2544+ }
2545+
2546+ /* Get the filename */
2547+ if ((optind + 2) != argc) {
2548+ help();
2549+ }
2550+ filename = argv[optind++];
2551+ dirname = argv[optind++];
2552+
2553+ Error *errp = NULL;
2554+ VmaReader *vmar = vma_reader_create(filename, &errp);
2555+
2556+ if (!vmar) {
2557+ g_error("%s", error_get_pretty(errp));
2558+ }
2559+
2560+ if (mkdir(dirname, 0777) < 0) {
2561+ g_error("unable to create target directory %s - %s",
2562+ dirname, g_strerror(errno));
2563+ }
2564+
2565+ GList *l = vma_reader_get_config_data(vmar);
2566+ while (l && l->data) {
2567+ VmaConfigData *cdata = (VmaConfigData *)l->data;
2568+ l = g_list_next(l);
2569+ char *cfgfn = g_strdup_printf("%s/%s", dirname, cdata->name);
2570+ GError *err = NULL;
2571+ if (!g_file_set_contents(cfgfn, (gchar *)cdata->data, cdata->len,
2572+ &err)) {
2573+ g_error("unable to write file: %s", err->message);
2574+ }
2575+ }
2576+
2577+ GHashTable *devmap = g_hash_table_new(g_str_hash, g_str_equal);
2578+
2579+ if (readmap) {
2580+ print_content(vmar);
2581+
2582+ FILE *map = fopen(readmap, "r");
2583+ if (!map) {
2584+ g_error("unable to open fifo %s - %s", readmap, g_strerror(errno));
2585+ }
2586+
2587+ while (1) {
2588+ char inbuf[8192];
2589+ char *line = fgets(inbuf, sizeof(inbuf), map);
2590+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
2591+ break;
2592+ }
2593+ int len = strlen(line);
2594+ if (line[len - 1] == '\n') {
2595+ line[len - 1] = '\0';
2596+ if (len == 1) {
2597+ break;
2598+ }
2599+ }
2600+
67af0fa4
WB
2601+ char *format = NULL;
2602+ if (strncmp(line, "format=", sizeof("format=")-1) == 0) {
2603+ format = line + sizeof("format=")-1;
2604+ char *colon = strchr(format, ':');
2605+ if (!colon) {
2606+ g_error("read map failed - found only a format ('%s')", inbuf);
2607+ }
2608+ format = g_strndup(format, colon - format);
2609+ line = colon+1;
2610+ }
2611+
95259824
WB
2612+ const char *path;
2613+ bool write_zero;
2614+ if (line[0] == '0' && line[1] == ':') {
67af0fa4 2615+ path = line + 2;
95259824
WB
2616+ write_zero = false;
2617+ } else if (line[0] == '1' && line[1] == ':') {
67af0fa4 2618+ path = line + 2;
95259824
WB
2619+ write_zero = true;
2620+ } else {
2621+ g_error("read map failed - parse error ('%s')", inbuf);
2622+ }
2623+
2624+ char *devname = NULL;
2625+ path = extract_devname(path, &devname, -1);
2626+ if (!devname) {
2627+ g_error("read map failed - no dev name specified ('%s')",
2628+ inbuf);
2629+ }
2630+
2631+ RestoreMap *map = g_new0(RestoreMap, 1);
2632+ map->devname = g_strdup(devname);
2633+ map->path = g_strdup(path);
67af0fa4 2634+ map->format = format;
95259824
WB
2635+ map->write_zero = write_zero;
2636+
2637+ g_hash_table_insert(devmap, map->devname, map);
2638+
2639+ };
2640+ }
2641+
2642+ int i;
2643+ int vmstate_fd = -1;
2644+ guint8 vmstate_stream = 0;
2645+
67af0fa4
WB
2646+ BlockBackend *blk = NULL;
2647+
95259824
WB
2648+ for (i = 1; i < 255; i++) {
2649+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2650+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
2651+ vmstate_stream = i;
2652+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
2653+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
2654+ if (vmstate_fd < 0) {
2655+ g_error("create vmstate file '%s' failed - %s", statefn,
2656+ g_strerror(errno));
2657+ }
2658+ g_free(statefn);
2659+ } else if (di) {
2660+ char *devfn = NULL;
67af0fa4
WB
2661+ const char *format = NULL;
2662+ int flags = BDRV_O_RDWR | BDRV_O_NO_FLUSH;
95259824
WB
2663+ bool write_zero = true;
2664+
2665+ if (readmap) {
2666+ RestoreMap *map;
2667+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
2668+ if (map == NULL) {
2669+ g_error("no device name mapping for %s", di->devname);
2670+ }
2671+ devfn = map->path;
67af0fa4 2672+ format = map->format;
95259824
WB
2673+ write_zero = map->write_zero;
2674+ } else {
2675+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2676+ dirname, di->devname);
2677+ printf("DEVINFO %s %zd\n", devfn, di->size);
2678+
2679+ bdrv_img_create(devfn, "raw", NULL, NULL, NULL, di->size,
6838f038 2680+ flags, true, &errp);
95259824
WB
2681+ if (errp) {
2682+ g_error("can't create file %s: %s", devfn,
2683+ error_get_pretty(errp));
2684+ }
2685+
2686+ /* Note: we created an empty file above, so there is no
2687+ * need to write zeroes (so we generate a sparse file)
2688+ */
2689+ write_zero = false;
2690+ }
2691+
67af0fa4
WB
2692+ size_t devlen = strlen(devfn);
2693+ QDict *options = NULL;
2694+ if (format) {
2695+ /* explicit format from commandline */
2696+ options = qdict_new();
2697+ qdict_put(options, "driver", qstring_from_str(format));
2698+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
2699+ strncmp(devfn, "/dev/", 5) == 0)
2700+ {
2701+ /* This part is now deprecated for PVE as well (just as qemu
2702+ * deprecated not specifying an explicit raw format, too.
2703+ */
2704+ /* explicit raw format */
2705+ options = qdict_new();
2706+ qdict_put(options, "driver", qstring_from_str("raw"));
2707+ }
2708+
2709+
2710+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
95259824
WB
2711+ g_error("can't open file %s - %s", devfn,
2712+ error_get_pretty(errp));
2713+ }
67af0fa4
WB
2714+
2715+ if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
95259824
WB
2716+ g_error("%s", error_get_pretty(errp));
2717+ }
2718+
2719+ if (!readmap) {
2720+ g_free(devfn);
2721+ }
2722+ }
2723+ }
2724+
2725+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
2726+ g_error("restore failed - %s", error_get_pretty(errp));
2727+ }
2728+
2729+ if (!readmap) {
2730+ for (i = 1; i < 255; i++) {
2731+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2732+ if (di && (i != vmstate_stream)) {
2733+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2734+ dirname, di->devname);
2735+ char *fn = g_strdup_printf("%s/disk-%s.raw",
2736+ dirname, di->devname);
2737+ if (rename(tmpfn, fn) != 0) {
2738+ g_error("rename %s to %s failed - %s",
2739+ tmpfn, fn, g_strerror(errno));
2740+ }
2741+ }
2742+ }
2743+ }
2744+
2745+ vma_reader_destroy(vmar);
2746+
67af0fa4
WB
2747+ blk_unref(blk);
2748+
2749+ bdrv_close_all();
2750+
2751+ return ret;
2752+}
2753+
2754+static int verify_content(int argc, char **argv)
2755+{
2756+ int c, ret = 0;
2757+ int verbose = 0;
2758+ const char *filename;
2759+
2760+ for (;;) {
2761+ c = getopt(argc, argv, "hv");
2762+ if (c == -1) {
2763+ break;
2764+ }
2765+ switch (c) {
2766+ case '?':
2767+ case 'h':
2768+ help();
2769+ break;
2770+ case 'v':
2771+ verbose = 1;
2772+ break;
2773+ default:
2774+ help();
2775+ }
2776+ }
2777+
2778+ /* Get the filename */
2779+ if ((optind + 1) != argc) {
2780+ help();
2781+ }
2782+ filename = argv[optind++];
2783+
2784+ Error *errp = NULL;
2785+ VmaReader *vmar = vma_reader_create(filename, &errp);
2786+
2787+ if (!vmar) {
2788+ g_error("%s", error_get_pretty(errp));
2789+ }
2790+
2791+ if (verbose) {
2792+ print_content(vmar);
2793+ }
2794+
2795+ if (vma_reader_verify(vmar, verbose, &errp) < 0) {
2796+ g_error("verify failed - %s", error_get_pretty(errp));
2797+ }
2798+
2799+ vma_reader_destroy(vmar);
2800+
95259824
WB
2801+ bdrv_close_all();
2802+
2803+ return ret;
2804+}
2805+
2806+typedef struct BackupJob {
67af0fa4 2807+ BlockBackend *target;
95259824
WB
2808+ int64_t len;
2809+ VmaWriter *vmaw;
2810+ uint8_t dev_id;
2811+} BackupJob;
2812+
2813+#define BACKUP_SECTORS_PER_CLUSTER (VMA_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
2814+
67af0fa4
WB
2815+static void coroutine_fn backup_run_empty(void *opaque)
2816+{
2817+ VmaWriter *vmaw = (VmaWriter *)opaque;
2818+
2819+ vma_writer_flush_output(vmaw);
2820+
2821+ Error *err = NULL;
2822+ if (vma_writer_close(vmaw, &err) != 0) {
2823+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2824+ }
2825+}
2826+
95259824
WB
2827+static void coroutine_fn backup_run(void *opaque)
2828+{
2829+ BackupJob *job = (BackupJob *)opaque;
2830+ struct iovec iov;
2831+ QEMUIOVector qiov;
2832+
2833+ int64_t start, end;
2834+ int ret = 0;
2835+
67af0fa4 2836+ unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
95259824
WB
2837+
2838+ start = 0;
2839+ end = DIV_ROUND_UP(job->len / BDRV_SECTOR_SIZE,
2840+ BACKUP_SECTORS_PER_CLUSTER);
2841+
2842+ for (; start < end; start++) {
2843+ iov.iov_base = buf;
2844+ iov.iov_len = VMA_CLUSTER_SIZE;
2845+ qemu_iovec_init_external(&qiov, &iov, 1);
2846+
67af0fa4
WB
2847+ ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
2848+ VMA_CLUSTER_SIZE, &qiov, 0);
95259824
WB
2849+ if (ret < 0) {
2850+ vma_writer_set_error(job->vmaw, "read error", -1);
2851+ goto out;
2852+ }
2853+
2854+ size_t zb = 0;
2855+ if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
2856+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed", -1);
2857+ goto out;
2858+ }
2859+ }
2860+
2861+
2862+out:
2863+ if (vma_writer_close_stream(job->vmaw, job->dev_id) <= 0) {
2864+ Error *err = NULL;
2865+ if (vma_writer_close(job->vmaw, &err) != 0) {
2866+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2867+ }
2868+ }
2869+}
2870+
2871+static int create_archive(int argc, char **argv)
2872+{
2873+ int i, c;
2874+ int verbose = 0;
2875+ const char *archivename;
2876+ GList *config_files = NULL;
2877+
2878+ for (;;) {
2879+ c = getopt(argc, argv, "hvc:");
2880+ if (c == -1) {
2881+ break;
2882+ }
2883+ switch (c) {
2884+ case '?':
2885+ case 'h':
2886+ help();
2887+ break;
2888+ case 'c':
2889+ config_files = g_list_append(config_files, optarg);
2890+ break;
2891+ case 'v':
2892+ verbose = 1;
2893+ break;
2894+ default:
2895+ g_assert_not_reached();
2896+ }
2897+ }
2898+
2899+
67af0fa4
WB
2900+ /* make sure we an archive name */
2901+ if ((optind + 1) > argc) {
95259824
WB
2902+ help();
2903+ }
2904+
2905+ archivename = argv[optind++];
2906+
2907+ uuid_t uuid;
2908+ uuid_generate(uuid);
2909+
2910+ Error *local_err = NULL;
2911+ VmaWriter *vmaw = vma_writer_create(archivename, uuid, &local_err);
2912+
2913+ if (vmaw == NULL) {
2914+ g_error("%s", error_get_pretty(local_err));
2915+ }
2916+
2917+ GList *l = config_files;
2918+ while (l && l->data) {
2919+ char *name = l->data;
2920+ char *cdata = NULL;
2921+ gsize clen = 0;
2922+ GError *err = NULL;
2923+ if (!g_file_get_contents(name, &cdata, &clen, &err)) {
2924+ unlink(archivename);
2925+ g_error("Unable to read file: %s", err->message);
2926+ }
2927+
2928+ if (vma_writer_add_config(vmaw, name, cdata, clen) != 0) {
2929+ unlink(archivename);
2930+ g_error("Unable to append config data %s (len = %zd)",
2931+ name, clen);
2932+ }
2933+ l = g_list_next(l);
2934+ }
2935+
67af0fa4 2936+ int devcount = 0;
95259824
WB
2937+ while (optind < argc) {
2938+ const char *path = argv[optind++];
2939+ char *devname = NULL;
67af0fa4 2940+ path = extract_devname(path, &devname, devcount++);
95259824
WB
2941+
2942+ Error *errp = NULL;
67af0fa4 2943+ BlockBackend *target;
95259824 2944+
67af0fa4
WB
2945+ target = blk_new_open(path, NULL, NULL, 0, &errp);
2946+ if (!target) {
95259824
WB
2947+ unlink(archivename);
2948+ g_error("bdrv_open '%s' failed - %s", path, error_get_pretty(errp));
2949+ }
67af0fa4 2950+ int64_t size = blk_getlength(target);
95259824
WB
2951+ int dev_id = vma_writer_register_stream(vmaw, devname, size);
2952+ if (dev_id <= 0) {
2953+ unlink(archivename);
2954+ g_error("vma_writer_register_stream '%s' failed", devname);
2955+ }
2956+
2957+ BackupJob *job = g_new0(BackupJob, 1);
2958+ job->len = size;
67af0fa4 2959+ job->target = target;
95259824
WB
2960+ job->vmaw = vmaw;
2961+ job->dev_id = dev_id;
2962+
2963+ Coroutine *co = qemu_coroutine_create(backup_run, job);
2964+ qemu_coroutine_enter(co);
2965+ }
2966+
2967+ VmaStatus vmastat;
2968+ int percent = 0;
2969+ int last_percent = -1;
2970+
67af0fa4
WB
2971+ if (devcount) {
2972+ while (1) {
2973+ main_loop_wait(false);
2974+ vma_writer_get_status(vmaw, &vmastat);
95259824 2975+
67af0fa4 2976+ if (verbose) {
95259824 2977+
67af0fa4
WB
2978+ uint64_t total = 0;
2979+ uint64_t transferred = 0;
2980+ uint64_t zero_bytes = 0;
95259824 2981+
67af0fa4
WB
2982+ int i;
2983+ for (i = 0; i < 256; i++) {
2984+ if (vmastat.stream_info[i].size) {
2985+ total += vmastat.stream_info[i].size;
2986+ transferred += vmastat.stream_info[i].transferred;
2987+ zero_bytes += vmastat.stream_info[i].zero_bytes;
2988+ }
95259824 2989+ }
67af0fa4
WB
2990+ percent = (transferred*100)/total;
2991+ if (percent != last_percent) {
2992+ fprintf(stderr, "progress %d%% %zd/%zd %zd\n", percent,
2993+ transferred, total, zero_bytes);
2994+ fflush(stderr);
95259824 2995+
67af0fa4
WB
2996+ last_percent = percent;
2997+ }
95259824 2998+ }
95259824 2999+
67af0fa4
WB
3000+ if (vmastat.closed) {
3001+ break;
3002+ }
95259824
WB
3003+ }
3004+ } else {
3005+ Coroutine *co = qemu_coroutine_create(backup_run_empty, vmaw);
3006+ qemu_coroutine_enter(co);
3007+ while (1) {
3008+ main_loop_wait(false);
3009+ vma_writer_get_status(vmaw, &vmastat);
3010+ if (vmastat.closed) {
3011+ break;
3012+ }
3013+ }
3014+ }
3015+
3016+ bdrv_drain_all();
3017+
3018+ vma_writer_get_status(vmaw, &vmastat);
3019+
3020+ if (verbose) {
3021+ for (i = 0; i < 256; i++) {
3022+ VmaStreamInfo *si = &vmastat.stream_info[i];
3023+ if (si->size) {
3024+ fprintf(stderr, "image %s: size=%zd zeros=%zd saved=%zd\n",
3025+ si->devname, si->size, si->zero_bytes,
3026+ si->size - si->zero_bytes);
3027+ }
3028+ }
3029+ }
3030+
3031+ if (vmastat.status < 0) {
3032+ unlink(archivename);
3033+ g_error("creating vma archive failed");
3034+ }
3035+
3036+ return 0;
3037+}
3038+
67af0fa4
WB
3039+static int dump_config(int argc, char **argv)
3040+{
3041+ int c, ret = 0;
3042+ const char *filename;
3043+ const char *config_name = "qemu-server.conf";
3044+
3045+ for (;;) {
3046+ c = getopt(argc, argv, "hc:");
3047+ if (c == -1) {
3048+ break;
3049+ }
3050+ switch (c) {
3051+ case '?':
3052+ case 'h':
3053+ help();
3054+ break;
3055+ case 'c':
3056+ config_name = optarg;
3057+ break;
3058+ default:
3059+ help();
3060+ }
3061+ }
3062+
3063+ /* Get the filename */
3064+ if ((optind + 1) != argc) {
3065+ help();
3066+ }
3067+ filename = argv[optind++];
3068+
3069+ Error *errp = NULL;
3070+ VmaReader *vmar = vma_reader_create(filename, &errp);
3071+
3072+ if (!vmar) {
3073+ g_error("%s", error_get_pretty(errp));
3074+ }
3075+
3076+ int found = 0;
3077+ GList *l = vma_reader_get_config_data(vmar);
3078+ while (l && l->data) {
3079+ VmaConfigData *cdata = (VmaConfigData *)l->data;
3080+ l = g_list_next(l);
3081+ if (strcmp(cdata->name, config_name) == 0) {
3082+ found = 1;
3083+ fwrite(cdata->data, cdata->len, 1, stdout);
3084+ break;
3085+ }
3086+ }
3087+
3088+ vma_reader_destroy(vmar);
3089+
3090+ bdrv_close_all();
3091+
3092+ if (!found) {
3093+ fprintf(stderr, "unable to find configuration data '%s'\n", config_name);
3094+ return -1;
3095+ }
3096+
3097+ return ret;
3098+}
3099+
95259824
WB
3100+int main(int argc, char **argv)
3101+{
3102+ const char *cmdname;
3103+ Error *main_loop_err = NULL;
3104+
3105+ error_set_progname(argv[0]);
3106+
3107+ if (qemu_init_main_loop(&main_loop_err)) {
3108+ g_error("%s", error_get_pretty(main_loop_err));
3109+ }
3110+
3111+ bdrv_init();
3112+
3113+ if (argc < 2) {
3114+ help();
3115+ }
3116+
3117+ cmdname = argv[1];
3118+ argc--; argv++;
3119+
3120+
3121+ if (!strcmp(cmdname, "list")) {
3122+ return list_content(argc, argv);
3123+ } else if (!strcmp(cmdname, "create")) {
3124+ return create_archive(argc, argv);
3125+ } else if (!strcmp(cmdname, "extract")) {
3126+ return extract_content(argc, argv);
67af0fa4
WB
3127+ } else if (!strcmp(cmdname, "verify")) {
3128+ return verify_content(argc, argv);
3129+ } else if (!strcmp(cmdname, "config")) {
3130+ return dump_config(argc, argv);
95259824
WB
3131+ }
3132+
3133+ help();
3134+ return 0;
3135+}
3136diff --git a/vma.h b/vma.h
3137new file mode 100644
6838f038 3138index 0000000000..c895c97f6d
95259824
WB
3139--- /dev/null
3140+++ b/vma.h
6838f038 3141@@ -0,0 +1,150 @@
95259824
WB
3142+/*
3143+ * VMA: Virtual Machine Archive
3144+ *
3145+ * Copyright (C) Proxmox Server Solutions
3146+ *
3147+ * Authors:
3148+ * Dietmar Maurer (dietmar@proxmox.com)
3149+ *
3150+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
3151+ * See the COPYING file in the top-level directory.
3152+ *
3153+ */
3154+
3155+#ifndef BACKUP_VMA_H
3156+#define BACKUP_VMA_H
3157+
3158+#include <uuid/uuid.h>
3159+#include "qapi/error.h"
3160+#include "block/block.h"
3161+
3162+#define VMA_BLOCK_BITS 12
3163+#define VMA_BLOCK_SIZE (1<<VMA_BLOCK_BITS)
3164+#define VMA_CLUSTER_BITS (VMA_BLOCK_BITS+4)
3165+#define VMA_CLUSTER_SIZE (1<<VMA_CLUSTER_BITS)
3166+
3167+#if VMA_CLUSTER_SIZE != 65536
3168+#error unexpected cluster size
3169+#endif
3170+
3171+#define VMA_EXTENT_HEADER_SIZE 512
3172+#define VMA_BLOCKS_PER_EXTENT 59
3173+#define VMA_MAX_CONFIGS 256
3174+
3175+#define VMA_MAX_EXTENT_SIZE \
3176+ (VMA_EXTENT_HEADER_SIZE+VMA_CLUSTER_SIZE*VMA_BLOCKS_PER_EXTENT)
3177+#if VMA_MAX_EXTENT_SIZE != 3867136
3178+#error unexpected VMA_EXTENT_SIZE
3179+#endif
3180+
3181+/* File Format Definitions */
3182+
3183+#define VMA_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|0x00))
3184+#define VMA_EXTENT_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|'E'))
3185+
3186+typedef struct VmaDeviceInfoHeader {
3187+ uint32_t devname_ptr; /* offset into blob_buffer table */
3188+ uint32_t reserved0;
3189+ uint64_t size; /* device size in bytes */
3190+ uint64_t reserved1;
3191+ uint64_t reserved2;
3192+} VmaDeviceInfoHeader;
3193+
3194+typedef struct VmaHeader {
3195+ uint32_t magic;
3196+ uint32_t version;
3197+ unsigned char uuid[16];
3198+ int64_t ctime;
3199+ unsigned char md5sum[16];
3200+
3201+ uint32_t blob_buffer_offset;
3202+ uint32_t blob_buffer_size;
3203+ uint32_t header_size;
3204+
3205+ unsigned char reserved[1984];
3206+
3207+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
3208+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
3209+
3210+ uint32_t reserved1;
3211+
3212+ VmaDeviceInfoHeader dev_info[256];
3213+} VmaHeader;
3214+
3215+typedef struct VmaExtentHeader {
3216+ uint32_t magic;
3217+ uint16_t reserved1;
3218+ uint16_t block_count;
3219+ unsigned char uuid[16];
3220+ unsigned char md5sum[16];
3221+ uint64_t blockinfo[VMA_BLOCKS_PER_EXTENT];
3222+} VmaExtentHeader;
3223+
3224+/* functions/definitions to read/write vma files */
3225+
3226+typedef struct VmaReader VmaReader;
3227+
3228+typedef struct VmaWriter VmaWriter;
3229+
3230+typedef struct VmaConfigData {
3231+ const char *name;
3232+ const void *data;
3233+ uint32_t len;
3234+} VmaConfigData;
3235+
3236+typedef struct VmaStreamInfo {
3237+ uint64_t size;
3238+ uint64_t cluster_count;
3239+ uint64_t transferred;
3240+ uint64_t zero_bytes;
3241+ int finished;
3242+ char *devname;
3243+} VmaStreamInfo;
3244+
3245+typedef struct VmaStatus {
3246+ int status;
3247+ bool closed;
3248+ char errmsg[8192];
3249+ char uuid_str[37];
3250+ VmaStreamInfo stream_info[256];
3251+} VmaStatus;
3252+
3253+typedef struct VmaDeviceInfo {
3254+ uint64_t size; /* device size in bytes */
3255+ const char *devname;
3256+} VmaDeviceInfo;
3257+
3258+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp);
3259+int vma_writer_close(VmaWriter *vmaw, Error **errp);
67af0fa4 3260+void vma_writer_error_propagate(VmaWriter *vmaw, Error **errp);
95259824
WB
3261+void vma_writer_destroy(VmaWriter *vmaw);
3262+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
3263+ size_t len);
3264+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
3265+ size_t size);
3266+
3267+int64_t coroutine_fn vma_writer_write(VmaWriter *vmaw, uint8_t dev_id,
6838f038
WB
3268+ int64_t cluster_num,
3269+ const unsigned char *buf,
95259824
WB
3270+ size_t *zero_bytes);
3271+
3272+int coroutine_fn vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id);
67af0fa4 3273+int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
95259824
WB
3274+
3275+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
3276+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
3277+
3278+
3279+VmaReader *vma_reader_create(const char *filename, Error **errp);
3280+void vma_reader_destroy(VmaReader *vmar);
3281+VmaHeader *vma_reader_get_header(VmaReader *vmar);
3282+GList *vma_reader_get_config_data(VmaReader *vmar);
3283+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
3284+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
67af0fa4 3285+ BlockBackend *target, bool write_zeroes,
95259824
WB
3286+ Error **errp);
3287+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
3288+ Error **errp);
67af0fa4 3289+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);
95259824
WB
3290+
3291+#endif /* BACKUP_VMA_H */
3292--
45169293 32932.11.0
95259824 3294