]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0027-adding-old-vma-files.patch
bump version to 2.11.1-5
[pve-qemu.git] / debian / patches / pve / 0027-adding-old-vma-files.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
67af0fa4
WB
2From: Wolfgang Bumiller <w.bumiller@proxmox.com>
3Date: Mon, 7 Aug 2017 08:51:16 +0200
23102ed6 4Subject: [PATCH] adding old vma files
95259824 5
95259824 6---
67af0fa4
WB
7 Makefile | 3 +-
8 Makefile.objs | 1 +
6838f038 9 block/backup.c | 130 ++++---
67af0fa4 10 block/replication.c | 1 +
6838f038
WB
11 blockdev.c | 207 +++++++----
12 blockjob.c | 3 +-
67af0fa4
WB
13 include/block/block_int.h | 4 +
14 vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++
15 vma-writer.c | 771 +++++++++++++++++++++++++++++++++++++++++
6838f038
WB
16 vma.c | 756 ++++++++++++++++++++++++++++++++++++++++
17 vma.h | 150 ++++++++
18 11 files changed, 2760 insertions(+), 123 deletions(-)
95259824
WB
19 create mode 100644 vma-reader.c
20 create mode 100644 vma-writer.c
21 create mode 100644 vma.c
22 create mode 100644 vma.h
23
24diff --git a/Makefile b/Makefile
6838f038 25index ab0354c153..ad28227b6c 100644
95259824
WB
26--- a/Makefile
27+++ b/Makefile
6838f038
WB
28@@ -340,7 +340,7 @@ dummy := $(call unnest-vars,, \
29
95259824 30 include $(SRC_PATH)/tests/Makefile.include
95259824
WB
31
32-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
33+all: $(DOCS) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all modules
34
35 qemu-version.h: FORCE
36 $(call quiet-command, \
6838f038 37@@ -439,6 +439,7 @@ qemu-img.o: qemu-img-cmds.h
a544966d
WB
38 qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
39 qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
40 qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
41+vma$(EXESUF): vma.o vma-reader.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
95259824 42
a544966d 43 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
95259824
WB
44
45diff --git a/Makefile.objs b/Makefile.objs
6838f038 46index 686247b556..34e62547d8 100644
95259824
WB
47--- a/Makefile.objs
48+++ b/Makefile.objs
a544966d 49@@ -14,6 +14,7 @@ block-obj-y += block.o blockjob.o
6838f038 50 block-obj-y += block/ scsi/
95259824 51 block-obj-y += qemu-io-cmds.o
a544966d 52 block-obj-$(CONFIG_REPLICATION) += replication.o
95259824
WB
53+block-obj-y += vma-writer.o
54
55 block-obj-m = block/
56
67af0fa4 57diff --git a/block/backup.c b/block/backup.c
6838f038 58index 8c2967a8cb..0870acdae7 100644
67af0fa4
WB
59--- a/block/backup.c
60+++ b/block/backup.c
61@@ -36,6 +36,7 @@ typedef struct BackupBlockJob {
62 BdrvDirtyBitmap *sync_bitmap;
63 MirrorSyncMode sync_mode;
64 RateLimit limit;
65+ BackupDumpFunc *dump_cb;
66 BlockdevOnError on_source_error;
67 BlockdevOnError on_target_error;
68 CoRwlock flush_rwlock;
6838f038 69@@ -135,13 +136,24 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
67af0fa4
WB
70 goto out;
71 }
72
6838f038 73+
67af0fa4 74 if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
6838f038 75- ret = blk_co_pwrite_zeroes(job->target, start,
67af0fa4
WB
76- bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
77+ if (job->dump_cb) {
6838f038 78+ ret = job->dump_cb(job->common.opaque, job->target, start, bounce_qiov.size, NULL);
67af0fa4
WB
79+ }
80+ if (job->target) {
6838f038 81+ ret = blk_co_pwrite_zeroes(job->target, start,
67af0fa4
WB
82+ bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
83+ }
84 } else {
6838f038 85- ret = blk_co_pwritev(job->target, start,
67af0fa4
WB
86- bounce_qiov.size, &bounce_qiov,
87- job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
88+ if (job->dump_cb) {
6838f038 89+ ret = job->dump_cb(job->common.opaque, job->target, start, bounce_qiov.size, bounce_buffer);
67af0fa4
WB
90+ }
91+ if (job->target) {
6838f038 92+ ret = blk_co_pwritev(job->target, start,
67af0fa4
WB
93+ bounce_qiov.size, &bounce_qiov,
94+ job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
95+ }
96 }
97 if (ret < 0) {
98 trace_backup_do_cow_write_fail(job, start, ret);
6838f038 99@@ -234,7 +246,9 @@ static void backup_abort(BlockJob *job)
67af0fa4
WB
100 static void backup_clean(BlockJob *job)
101 {
102 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
6838f038 103- assert(s->target);
507c2194 104+ if (!s->target) {
67af0fa4 105+ return;
507c2194 106+ }
67af0fa4
WB
107 blk_unref(s->target);
108 s->target = NULL;
6838f038
WB
109 }
110@@ -243,7 +257,9 @@ static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
02709230
FG
111 {
112 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
113
114- blk_set_aio_context(s->target, aio_context);
507c2194 115+ if (s->target) {
02709230 116+ blk_set_aio_context(s->target, aio_context);
507c2194 117+ }
02709230
FG
118 }
119
120 void backup_do_checkpoint(BlockJob *job, Error **errp)
6838f038 121@@ -315,9 +331,11 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
67af0fa4
WB
122 if (read) {
123 return block_job_error_action(&job->common, job->on_source_error,
124 true, error);
125- } else {
126+ } else if (job->target) {
127 return block_job_error_action(&job->common, job->on_target_error,
128 false, error);
129+ } else {
130+ return BLOCK_ERROR_ACTION_REPORT;
131 }
132 }
133
6838f038 134@@ -538,6 +556,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
135 BlockdevOnError on_source_error,
136 BlockdevOnError on_target_error,
137 int creation_flags,
138+ BackupDumpFunc *dump_cb,
139 BlockCompletionFunc *cb, void *opaque,
140 int pause_count,
141 BlockJobTxn *txn, Error **errp)
6838f038 142@@ -548,7 +567,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
143 int ret;
144
145 assert(bs);
146- assert(target);
147+ assert(target || dump_cb);
148
149 if (bs == target) {
150 error_setg(errp, "Source and target cannot be the same");
6838f038 151@@ -561,13 +580,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
152 return NULL;
153 }
154
155- if (!bdrv_is_inserted(target)) {
156+ if (target && !bdrv_is_inserted(target)) {
157 error_setg(errp, "Device is not inserted: %s",
158 bdrv_get_device_name(target));
159 return NULL;
160 }
161
162- if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
163+ if (target && compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
164 error_setg(errp, "Compression is not supported for this drive %s",
165 bdrv_get_device_name(target));
166 return NULL;
6838f038 167@@ -577,7 +596,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
168 return NULL;
169 }
170
171- if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
172+ if (target && bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
173 return NULL;
174 }
175
6838f038 176@@ -617,15 +636,18 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
177 goto error;
178 }
179
180- /* The target must match the source in size, so no resize here either */
181- job->target = blk_new(BLK_PERM_WRITE,
182- BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
183- BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
184- ret = blk_insert_bs(job->target, target, errp);
185- if (ret < 0) {
186- goto error;
187+ if (target) {
188+ /* The target must match the source in size, so no resize here either */
189+ job->target = blk_new(BLK_PERM_WRITE,
190+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
191+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
192+ ret = blk_insert_bs(job->target, target, errp);
193+ if (ret < 0) {
194+ goto error;
195+ }
196 }
197
198+ job->dump_cb = dump_cb;
199 job->on_source_error = on_source_error;
200 job->on_target_error = on_target_error;
201 job->sync_mode = sync_mode;
6838f038 202@@ -633,36 +655,52 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
203 sync_bitmap : NULL;
204 job->compress = compress;
205
206- /* If there is no backing file on the target, we cannot rely on COW if our
207- * backup cluster size is smaller than the target cluster size. Even for
208- * targets with a backing file, try to avoid COW if possible. */
209- ret = bdrv_get_info(target, &bdi);
210- if (ret == -ENOTSUP && !target->backing) {
211- /* Cluster size is not defined */
6838f038
WB
212- warn_report("The target block device doesn't provide "
213- "information about the block size and it doesn't have a "
214- "backing file. The default block size of %u bytes is "
215- "used. If the actual block size of the target exceeds "
216- "this default, the backup may be unusable",
217- BACKUP_CLUSTER_SIZE_DEFAULT);
67af0fa4
WB
218- job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
219- } else if (ret < 0 && !target->backing) {
220- error_setg_errno(errp, -ret,
221- "Couldn't determine the cluster size of the target image, "
222- "which has no backing file");
223- error_append_hint(errp,
224- "Aborting, since this may create an unusable destination image\n");
225- goto error;
226- } else if (ret < 0 && target->backing) {
227- /* Not fatal; just trudge on ahead. */
228- job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
229+ if (target) {
230+ /* If there is no backing file on the target, we cannot rely on COW if our
231+ * backup cluster size is smaller than the target cluster size. Even for
232+ * targets with a backing file, try to avoid COW if possible. */
233+ ret = bdrv_get_info(target, &bdi);
234+ if (ret == -ENOTSUP && !target->backing) {
235+ /* Cluster size is not defined */
6838f038
WB
236+ warn_report("The target block device doesn't provide "
237+ "information about the block size and it doesn't have a "
238+ "backing file. The default block size of %u bytes is "
239+ "used. If the actual block size of the target exceeds "
240+ "this default, the backup may be unusable",
241+ BACKUP_CLUSTER_SIZE_DEFAULT);
67af0fa4
WB
242+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
243+ } else if (ret < 0 && !target->backing) {
244+ error_setg_errno(errp, -ret,
245+ "Couldn't determine the cluster size of the target image, "
246+ "which has no backing file");
247+ error_append_hint(errp,
248+ "Aborting, since this may create an unusable destination image\n");
249+ goto error;
250+ } else if (ret < 0 && target->backing) {
251+ /* Not fatal; just trudge on ahead. */
252+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
253+ } else {
254+ job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
255+ }
256 } else {
257- job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
258+ ret = bdrv_get_info(bs, &bdi);
259+ if (ret < 0) {
260+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
261+ } else {
262+ /* round down to nearest BACKUP_CLUSTER_SIZE_DEFAULT */
263+ job->cluster_size = (bdi.cluster_size / BACKUP_CLUSTER_SIZE_DEFAULT) * BACKUP_CLUSTER_SIZE_DEFAULT;
264+ if (job->cluster_size == 0) {
265+ /* but we can't go below it */
266+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
267+ }
268+ }
269 }
270
271- /* Required permissions are already taken with target's blk_new() */
272- block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
273- &error_abort);
274+ if (target) {
275+ /* Required permissions are already taken with target's blk_new() */
276+ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
277+ &error_abort);
67af0fa4
WB
278+ }
279 job->common.len = len;
6838f038 280 job->common.pause_count = pause_count;
67af0fa4 281 block_job_txn_add_job(txn, &job->common);
67af0fa4 282diff --git a/block/replication.c b/block/replication.c
6838f038 283index 1b08b242eb..3d101ce6e6 100644
67af0fa4
WB
284--- a/block/replication.c
285+++ b/block/replication.c
6838f038 286@@ -561,6 +561,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
67af0fa4
WB
287 0, MIRROR_SYNC_MODE_NONE, NULL, false,
288 BLOCKDEV_ON_ERROR_REPORT,
289 BLOCKDEV_ON_ERROR_REPORT, BLOCK_JOB_INTERNAL,
290+ NULL,
291 backup_job_completed, bs, 0, NULL, &local_err);
292 if (local_err) {
293 error_propagate(errp, local_err);
294diff --git a/blockdev.c b/blockdev.c
6838f038 295index 3ffd064c48..4b6091afc6 100644
67af0fa4
WB
296--- a/blockdev.c
297+++ b/blockdev.c
298@@ -31,7 +31,6 @@
299 */
300
301 #include "qemu/osdep.h"
302-#include "qemu/uuid.h"
303 #include "sysemu/block-backend.h"
304 #include "sysemu/blockdev.h"
305 #include "hw/block/block.h"
306@@ -55,6 +54,7 @@
307 #include "qemu/cutils.h"
308 #include "qemu/help_option.h"
309 #include "qemu/throttle-options.h"
310+#include "vma.h"
311
312 static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
313 QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
6838f038 314@@ -2970,15 +2970,14 @@ out:
67af0fa4
WB
315 static struct PVEBackupState {
316 Error *error;
317 bool cancel;
318- QemuUUID uuid;
319+ uuid_t uuid;
320 char uuid_str[37];
321 int64_t speed;
322 time_t start_time;
323 time_t end_time;
324 char *backup_file;
325- Object *vmaobj;
326+ VmaWriter *vmaw;
327 GList *di_list;
328- size_t next_job;
329 size_t total;
330 size_t transferred;
331 size_t zero_bytes;
6838f038 332@@ -2997,6 +2996,71 @@ typedef struct PVEBackupDevInfo {
67af0fa4
WB
333
334 static void pvebackup_run_next_job(void);
335
336+static int pvebackup_dump_cb(void *opaque, BlockBackend *target,
6838f038
WB
337+ uint64_t start, uint64_t bytes,
338+ const void *pbuf)
67af0fa4 339+{
6838f038
WB
340+ const uint64_t size = bytes;
341+ const unsigned char *buf = pbuf;
67af0fa4
WB
342+ PVEBackupDevInfo *di = opaque;
343+
67af0fa4
WB
344+ if (backup_state.cancel) {
345+ return size; // return success
346+ }
347+
6838f038
WB
348+ uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
349+ if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
67af0fa4
WB
350+ if (!backup_state.error) {
351+ error_setg(&backup_state.error,
352+ "got unaligned write inside backup dump "
6838f038 353+ "callback (sector %ld)", start);
67af0fa4
WB
354+ }
355+ return -1; // not aligned to cluster size
356+ }
357+
67af0fa4
WB
358+ int ret = -1;
359+
360+ if (backup_state.vmaw) {
361+ size_t zero_bytes = 0;
6838f038 362+ uint64_t remaining = size;
2ab9b48e 363+ while (remaining > 0) {
67af0fa4
WB
364+ ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num,
365+ buf, &zero_bytes);
67af0fa4
WB
366+ ++cluster_num;
367+ if (buf) {
368+ buf += VMA_CLUSTER_SIZE;
369+ }
370+ if (ret < 0) {
371+ if (!backup_state.error) {
372+ vma_writer_error_propagate(backup_state.vmaw, &backup_state.error);
373+ }
374+ if (di->bs && di->bs->job) {
375+ block_job_cancel(di->bs->job);
376+ }
2ab9b48e 377+ break;
67af0fa4
WB
378+ } else {
379+ backup_state.zero_bytes += zero_bytes;
2ab9b48e
WB
380+ if (remaining >= VMA_CLUSTER_SIZE) {
381+ backup_state.transferred += VMA_CLUSTER_SIZE;
6838f038 382+ remaining -= VMA_CLUSTER_SIZE;
2ab9b48e
WB
383+ } else {
384+ backup_state.transferred += remaining;
6838f038 385+ remaining = 0;
2ab9b48e 386+ }
67af0fa4
WB
387+ }
388+ }
389+ } else {
390+ if (!buf) {
391+ backup_state.zero_bytes += size;
392+ }
393+ backup_state.transferred += size;
394+ }
395+
396+ // Note: always return success, because we want that writes succeed anyways.
397+
398+ return size;
399+}
400+
401 static void pvebackup_cleanup(void)
402 {
6838f038
WB
403 qemu_mutex_lock(&backup_state.backup_mutex);
404@@ -3008,9 +3072,11 @@ static void pvebackup_cleanup(void)
405
67af0fa4
WB
406 backup_state.end_time = time(NULL);
407
408- if (backup_state.vmaobj) {
409- object_unparent(backup_state.vmaobj);
410- backup_state.vmaobj = NULL;
411+ if (backup_state.vmaw) {
412+ Error *local_err = NULL;
413+ vma_writer_close(backup_state.vmaw, &local_err);
414+ error_propagate(&backup_state.error, local_err);
415+ backup_state.vmaw = NULL;
416 }
417
6838f038
WB
418 g_list_free(backup_state.di_list);
419@@ -3018,6 +3084,13 @@ static void pvebackup_cleanup(void)
420 qemu_mutex_unlock(&backup_state.backup_mutex);
67af0fa4
WB
421 }
422
423+static void coroutine_fn backup_close_vma_stream(void *opaque)
424+{
425+ PVEBackupDevInfo *di = opaque;
426+
427+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
428+}
429+
430 static void pvebackup_complete_cb(void *opaque, int ret)
431 {
6838f038
WB
432 // This always runs in the main loop
433@@ -3034,9 +3107,9 @@ static void pvebackup_complete_cb(void *opaque, int ret)
67af0fa4
WB
434 di->bs = NULL;
435 di->target = NULL;
436
437- if (backup_state.vmaobj) {
438- object_unparent(backup_state.vmaobj);
439- backup_state.vmaobj = NULL;
440+ if (backup_state.vmaw) {
441+ Coroutine *co = qemu_coroutine_create(backup_close_vma_stream, di);
442+ qemu_coroutine_enter(co);
443 }
444
6838f038
WB
445 // remove self from job queue
446@@ -3064,14 +3137,9 @@ static void pvebackup_cancel(void *opaque)
67af0fa4
WB
447 error_setg(&backup_state.error, "backup cancelled");
448 }
449
450- if (backup_state.vmaobj) {
451- Error *err;
452+ if (backup_state.vmaw) {
453 /* make sure vma writer does not block anymore */
454- if (!object_set_props(backup_state.vmaobj, &err, "blocked", "yes", NULL)) {
455- if (err) {
456- error_report_err(err);
457- }
458- }
459+ vma_writer_set_error(backup_state.vmaw, "backup cancelled");
460 }
461
462 GList *l = backup_state.di_list;
6838f038 463@@ -3102,18 +3170,14 @@ void qmp_backup_cancel(Error **errp)
67af0fa4
WB
464 Coroutine *co = qemu_coroutine_create(pvebackup_cancel, NULL);
465 qemu_coroutine_enter(co);
466
467- while (backup_state.vmaobj) {
468- /* FIXME: Find something better for this */
469+ while (backup_state.vmaw) {
470+ /* vma writer use main aio context */
471 aio_poll(qemu_get_aio_context(), true);
472 }
473 }
474
475-void vma_object_add_config_file(Object *obj, const char *name,
476- const char *contents, size_t len,
477- Error **errp);
478 static int config_to_vma(const char *file, BackupFormat format,
479- Object *vmaobj,
480- const char *backup_dir,
6838f038
WB
481+ const char *backup_dir, VmaWriter *vmaw,
482 Error **errp)
67af0fa4 483 {
6838f038
WB
484 char *cdata = NULL;
485@@ -3127,7 +3191,12 @@ static int config_to_vma(const char *file, BackupFormat format,
486 char *basename = g_path_get_basename(file);
67af0fa4 487
6838f038
WB
488 if (format == BACKUP_FORMAT_VMA) {
489- vma_object_add_config_file(vmaobj, basename, cdata, clen, errp);
490+ if (vma_writer_add_config(vmaw, basename, cdata, clen) != 0) {
491+ error_setg(errp, "unable to add %s config data to vma archive", file);
492+ g_free(cdata);
493+ g_free(basename);
494+ return 1;
495+ }
496 } else if (format == BACKUP_FORMAT_DIR) {
497 char config_path[PATH_MAX];
498 snprintf(config_path, PATH_MAX, "%s/%s", backup_dir, basename);
499@@ -3145,28 +3214,30 @@ static int config_to_vma(const char *file, BackupFormat format,
67af0fa4
WB
500 }
501
6838f038 502 void block_job_resume(BlockJob *job);
67af0fa4
WB
503+bool block_job_should_pause(BlockJob *job);
504 static void pvebackup_run_next_job(void)
505 {
6838f038
WB
506 qemu_mutex_lock(&backup_state.backup_mutex);
507
67af0fa4
WB
508- GList *next = g_list_nth(backup_state.di_list, backup_state.next_job);
509- while (next) {
510- PVEBackupDevInfo *di = (PVEBackupDevInfo *)next->data;
511- backup_state.next_job++;
512+ GList *l = backup_state.di_list;
513+ while (l) {
514+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
515+ l = g_list_next(l);
516 if (!di->completed && di->bs && di->bs->job) {
517 BlockJob *job = di->bs->job;
6838f038
WB
518 AioContext *aio_context = blk_get_aio_context(job->blk);
519 aio_context_acquire(aio_context);
520 qemu_mutex_unlock(&backup_state.backup_mutex);
521- if (backup_state.error || backup_state.cancel) {
522- block_job_cancel_sync(job);
67af0fa4
WB
523- } else {
524- block_job_resume(job);
525+ if (block_job_should_pause(job)) {
6838f038
WB
526+ if (backup_state.error || backup_state.cancel) {
527+ block_job_cancel_sync(job);
67af0fa4
WB
528+ } else {
529+ block_job_resume(job);
530+ }
531 }
6838f038 532 aio_context_release(aio_context);
67af0fa4
WB
533 return;
534 }
535- next = g_list_next(next);
536 }
6838f038 537 qemu_mutex_unlock(&backup_state.backup_mutex);
67af0fa4 538
6838f038 539@@ -3177,7 +3248,7 @@ static void pvebackup_run_next_job(void)
67af0fa4
WB
540 UuidInfo *qmp_backup(const char *backup_file, bool has_format,
541 BackupFormat format,
542 bool has_config_file, const char *config_file,
543- bool has_firewall_file, const char *firewall_file,
544+ bool has_firewall_file, const char *firewall_file,
545 bool has_devlist, const char *devlist,
546 bool has_speed, int64_t speed, Error **errp)
547 {
6838f038 548@@ -3185,7 +3256,8 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
549 BlockDriverState *bs = NULL;
550 const char *backup_dir = NULL;
551 Error *local_err = NULL;
552- QemuUUID uuid;
553+ uuid_t uuid;
554+ VmaWriter *vmaw = NULL;
555 gchar **devs = NULL;
556 GList *di_list = NULL;
557 GList *l;
6838f038
WB
558@@ -3197,7 +3269,7 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
559 backup_state.backup_mutex_initialized = true;
560 }
67af0fa4
WB
561
562- if (backup_state.di_list || backup_state.vmaobj) {
563+ if (backup_state.di_list) {
564 error_set(errp, ERROR_CLASS_GENERIC_ERROR,
565 "previous backup not finished");
566 return NULL;
6838f038 567@@ -3272,40 +3344,28 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
568 total += size;
569 }
570
571- qemu_uuid_generate(&uuid);
572+ uuid_generate(uuid);
573
574 if (format == BACKUP_FORMAT_VMA) {
575- char uuidstr[UUID_FMT_LEN+1];
576- qemu_uuid_unparse(&uuid, uuidstr);
577- uuidstr[UUID_FMT_LEN] = 0;
578- backup_state.vmaobj =
579- object_new_with_props("vma", object_get_objects_root(),
580- "vma-backup-obj", &local_err,
581- "filename", backup_file,
582- "uuid", uuidstr,
583- NULL);
584- if (!backup_state.vmaobj) {
585+ vmaw = vma_writer_create(backup_file, uuid, &local_err);
586+ if (!vmaw) {
587 if (local_err) {
588 error_propagate(errp, local_err);
589 }
590 goto err;
591 }
592
593+ /* register all devices for vma writer */
594 l = di_list;
595 while (l) {
596- QDict *options = qdict_new();
597-
598 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
599 l = g_list_next(l);
600
601 const char *devname = bdrv_get_device_name(di->bs);
602- snprintf(di->targetfile, PATH_MAX, "vma-backup-obj/%s.raw", devname);
603-
604- qdict_put(options, "driver", qstring_from_str("vma-drive"));
605- qdict_put(options, "size", qint_from_int(di->size));
606- di->target = bdrv_open(di->targetfile, NULL, options, BDRV_O_RDWR, &local_err);
607- if (!di->target) {
608- error_propagate(errp, local_err);
609+ di->dev_id = vma_writer_register_stream(vmaw, devname, di->size);
610+ if (di->dev_id <= 0) {
611+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
612+ "register_stream failed");
613 goto err;
614 }
615 }
6838f038 616@@ -3346,14 +3406,14 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
617
618 /* add configuration file to archive */
619 if (has_config_file) {
6838f038
WB
620- if(config_to_vma(config_file, format, backup_state.vmaobj, backup_dir, errp) != 0) {
621+ if (config_to_vma(config_file, format, backup_dir, vmaw, errp) != 0) {
622 goto err;
623 }
67af0fa4
WB
624 }
625
626 /* add firewall file to archive */
627 if (has_firewall_file) {
6838f038
WB
628- if(config_to_vma(firewall_file, format, backup_state.vmaobj, backup_dir, errp) != 0) {
629+ if (config_to_vma(firewall_file, format, backup_dir, vmaw, errp) != 0) {
630 goto err;
631 }
67af0fa4 632 }
6838f038 633@@ -3376,12 +3436,13 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
634 }
635 backup_state.backup_file = g_strdup(backup_file);
636
637- memcpy(&backup_state.uuid, &uuid, sizeof(uuid));
638- qemu_uuid_unparse(&uuid, backup_state.uuid_str);
639+ backup_state.vmaw = vmaw;
640+
641+ uuid_copy(backup_state.uuid, uuid);
642+ uuid_unparse_lower(uuid, backup_state.uuid_str);
643
6838f038 644 qemu_mutex_lock(&backup_state.backup_mutex);
67af0fa4
WB
645 backup_state.di_list = di_list;
646- backup_state.next_job = 0;
647
648 backup_state.total = total;
649 backup_state.transferred = 0;
6838f038 650@@ -3392,21 +3453,16 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
67af0fa4
WB
651 while (l) {
652 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
653 l = g_list_next(l);
654-
655 job = backup_job_create(NULL, di->bs, di->target, speed, MIRROR_SYNC_MODE_FULL, NULL,
656 false, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
657 BLOCK_JOB_DEFAULT,
658- pvebackup_complete_cb, di, 2, NULL, &local_err);
659- if (di->target) {
660- bdrv_unref(di->target);
661- di->target = NULL;
662- }
663+ pvebackup_dump_cb, pvebackup_complete_cb, di,
664+ 2, NULL, &local_err);
665 if (!job || local_err != NULL) {
666 error_setg(&backup_state.error, "backup_job_create failed");
667 pvebackup_cancel(NULL);
668- } else {
669- block_job_start(job);
670 }
671+ block_job_start(job);
672 }
673
6838f038
WB
674 qemu_mutex_unlock(&backup_state.backup_mutex);
675@@ -3442,9 +3498,10 @@ err:
67af0fa4
WB
676 g_strfreev(devs);
677 }
678
679- if (backup_state.vmaobj) {
680- object_unparent(backup_state.vmaobj);
681- backup_state.vmaobj = NULL;
682+ if (vmaw) {
683+ Error *err = NULL;
684+ vma_writer_close(vmaw, &err);
685+ unlink(backup_file);
686 }
687
688 if (backup_dir) {
6838f038 689@@ -3829,7 +3886,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
67af0fa4
WB
690 job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
691 backup->sync, bmap, backup->compress,
692 backup->on_source_error, backup->on_target_error,
693- BLOCK_JOB_DEFAULT, NULL, NULL, 0, txn, &local_err);
694+ BLOCK_JOB_DEFAULT, NULL, NULL, NULL, 0, txn, &local_err);
695 bdrv_unref(target_bs);
696 if (local_err != NULL) {
697 error_propagate(errp, local_err);
6838f038 698@@ -3908,7 +3965,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
67af0fa4
WB
699 job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
700 backup->sync, NULL, backup->compress,
701 backup->on_source_error, backup->on_target_error,
702- BLOCK_JOB_DEFAULT, NULL, NULL, 0, txn, &local_err);
703+ BLOCK_JOB_DEFAULT, NULL, NULL, NULL, 0, txn, &local_err);
704 if (local_err != NULL) {
705 error_propagate(errp, local_err);
706 }
707diff --git a/blockjob.c b/blockjob.c
6838f038 708index 2de9f8f4dd..1df33bd194 100644
67af0fa4
WB
709--- a/blockjob.c
710+++ b/blockjob.c
6838f038
WB
711@@ -757,7 +757,8 @@ void block_job_completed(BlockJob *job, int ret)
712 }
67af0fa4
WB
713 }
714
715-static bool block_job_should_pause(BlockJob *job)
716+bool block_job_should_pause(BlockJob *job);
717+bool block_job_should_pause(BlockJob *job)
718 {
719 return job->pause_count > 0;
720 }
67af0fa4 721diff --git a/include/block/block_int.h b/include/block/block_int.h
6838f038 722index 1dbbdafd31..2ed3e41437 100644
67af0fa4
WB
723--- a/include/block/block_int.h
724+++ b/include/block/block_int.h
6838f038 725@@ -60,6 +60,9 @@
67af0fa4
WB
726
727 #define BLOCK_PROBE_BUF_SIZE 512
728
729+typedef int BackupDumpFunc(void *opaque, BlockBackend *be,
6838f038 730+ uint64_t offset, uint64_t bytes, const void *buf);
67af0fa4
WB
731+
732 enum BdrvTrackedRequestType {
733 BDRV_TRACKED_READ,
734 BDRV_TRACKED_WRITE,
6838f038 735@@ -984,6 +987,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
67af0fa4
WB
736 BlockdevOnError on_source_error,
737 BlockdevOnError on_target_error,
738 int creation_flags,
739+ BackupDumpFunc *dump_cb,
740 BlockCompletionFunc *cb, void *opaque,
741 int pause_count,
742 BlockJobTxn *txn, Error **errp);
95259824
WB
743diff --git a/vma-reader.c b/vma-reader.c
744new file mode 100644
67af0fa4 745index 0000000000..2000889bd3
95259824
WB
746--- /dev/null
747+++ b/vma-reader.c
67af0fa4 748@@ -0,0 +1,857 @@
95259824
WB
749+/*
750+ * VMA: Virtual Machine Archive
751+ *
752+ * Copyright (C) 2012 Proxmox Server Solutions
753+ *
754+ * Authors:
755+ * Dietmar Maurer (dietmar@proxmox.com)
756+ *
757+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
758+ * See the COPYING file in the top-level directory.
759+ *
760+ */
761+
762+#include "qemu/osdep.h"
763+#include <glib.h>
764+#include <uuid/uuid.h>
765+
766+#include "qemu-common.h"
767+#include "qemu/timer.h"
768+#include "qemu/ratelimit.h"
769+#include "vma.h"
770+#include "block/block.h"
771+#include "sysemu/block-backend.h"
772+
773+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
774+
775+typedef struct VmaRestoreState {
67af0fa4 776+ BlockBackend *target;
95259824
WB
777+ bool write_zeroes;
778+ unsigned long *bitmap;
779+ int bitmap_size;
780+} VmaRestoreState;
781+
782+struct VmaReader {
783+ int fd;
784+ GChecksum *md5csum;
785+ GHashTable *blob_hash;
786+ unsigned char *head_data;
787+ VmaDeviceInfo devinfo[256];
788+ VmaRestoreState rstate[256];
789+ GList *cdata_list;
790+ guint8 vmstate_stream;
791+ uint32_t vmstate_clusters;
792+ /* to show restore percentage if run with -v */
793+ time_t start_time;
794+ int64_t cluster_count;
795+ int64_t clusters_read;
67af0fa4
WB
796+ int64_t zero_cluster_data;
797+ int64_t partial_zero_cluster_data;
95259824
WB
798+ int clusters_read_per;
799+};
800+
801+static guint
802+g_int32_hash(gconstpointer v)
803+{
804+ return *(const uint32_t *)v;
805+}
806+
807+static gboolean
808+g_int32_equal(gconstpointer v1, gconstpointer v2)
809+{
810+ return *((const uint32_t *)v1) == *((const uint32_t *)v2);
811+}
812+
813+static int vma_reader_get_bitmap(VmaRestoreState *rstate, int64_t cluster_num)
814+{
815+ assert(rstate);
816+ assert(rstate->bitmap);
817+
818+ unsigned long val, idx, bit;
819+
820+ idx = cluster_num / BITS_PER_LONG;
821+
822+ assert(rstate->bitmap_size > idx);
823+
824+ bit = cluster_num % BITS_PER_LONG;
825+ val = rstate->bitmap[idx];
826+
827+ return !!(val & (1UL << bit));
828+}
829+
830+static void vma_reader_set_bitmap(VmaRestoreState *rstate, int64_t cluster_num,
831+ int dirty)
832+{
833+ assert(rstate);
834+ assert(rstate->bitmap);
835+
836+ unsigned long val, idx, bit;
837+
838+ idx = cluster_num / BITS_PER_LONG;
839+
840+ assert(rstate->bitmap_size > idx);
841+
842+ bit = cluster_num % BITS_PER_LONG;
843+ val = rstate->bitmap[idx];
844+ if (dirty) {
845+ if (!(val & (1UL << bit))) {
846+ val |= 1UL << bit;
847+ }
848+ } else {
849+ if (val & (1UL << bit)) {
850+ val &= ~(1UL << bit);
851+ }
852+ }
853+ rstate->bitmap[idx] = val;
854+}
855+
856+typedef struct VmaBlob {
857+ uint32_t start;
858+ uint32_t len;
859+ void *data;
860+} VmaBlob;
861+
862+static const VmaBlob *get_header_blob(VmaReader *vmar, uint32_t pos)
863+{
864+ assert(vmar);
865+ assert(vmar->blob_hash);
866+
867+ return g_hash_table_lookup(vmar->blob_hash, &pos);
868+}
869+
870+static const char *get_header_str(VmaReader *vmar, uint32_t pos)
871+{
872+ const VmaBlob *blob = get_header_blob(vmar, pos);
873+ if (!blob) {
874+ return NULL;
875+ }
876+ const char *res = (char *)blob->data;
877+ if (res[blob->len-1] != '\0') {
878+ return NULL;
879+ }
880+ return res;
881+}
882+
883+static ssize_t
884+safe_read(int fd, unsigned char *buf, size_t count)
885+{
886+ ssize_t n;
887+
888+ do {
889+ n = read(fd, buf, count);
890+ } while (n < 0 && errno == EINTR);
891+
892+ return n;
893+}
894+
895+static ssize_t
896+full_read(int fd, unsigned char *buf, size_t len)
897+{
898+ ssize_t n;
899+ size_t total;
900+
901+ total = 0;
902+
903+ while (len > 0) {
904+ n = safe_read(fd, buf, len);
905+
906+ if (n == 0) {
907+ return total;
908+ }
909+
910+ if (n <= 0) {
911+ break;
912+ }
913+
914+ buf += n;
915+ total += n;
916+ len -= n;
917+ }
918+
919+ if (len) {
920+ return -1;
921+ }
922+
923+ return total;
924+}
925+
926+void vma_reader_destroy(VmaReader *vmar)
927+{
928+ assert(vmar);
929+
930+ if (vmar->fd >= 0) {
931+ close(vmar->fd);
932+ }
933+
934+ if (vmar->cdata_list) {
935+ g_list_free(vmar->cdata_list);
936+ }
937+
938+ int i;
939+ for (i = 1; i < 256; i++) {
940+ if (vmar->rstate[i].bitmap) {
941+ g_free(vmar->rstate[i].bitmap);
942+ }
943+ }
944+
945+ if (vmar->md5csum) {
946+ g_checksum_free(vmar->md5csum);
947+ }
948+
949+ if (vmar->blob_hash) {
950+ g_hash_table_destroy(vmar->blob_hash);
951+ }
952+
953+ if (vmar->head_data) {
954+ g_free(vmar->head_data);
955+ }
956+
957+ g_free(vmar);
958+
959+};
960+
961+static int vma_reader_read_head(VmaReader *vmar, Error **errp)
962+{
963+ assert(vmar);
964+ assert(errp);
965+ assert(*errp == NULL);
966+
967+ unsigned char md5sum[16];
968+ int i;
969+ int ret = 0;
970+
971+ vmar->head_data = g_malloc(sizeof(VmaHeader));
972+
973+ if (full_read(vmar->fd, vmar->head_data, sizeof(VmaHeader)) !=
974+ sizeof(VmaHeader)) {
975+ error_setg(errp, "can't read vma header - %s",
976+ errno ? g_strerror(errno) : "got EOF");
977+ return -1;
978+ }
979+
980+ VmaHeader *h = (VmaHeader *)vmar->head_data;
981+
982+ if (h->magic != VMA_MAGIC) {
983+ error_setg(errp, "not a vma file - wrong magic number");
984+ return -1;
985+ }
986+
987+ uint32_t header_size = GUINT32_FROM_BE(h->header_size);
988+ int need = header_size - sizeof(VmaHeader);
989+ if (need <= 0) {
990+ error_setg(errp, "wrong vma header size %d", header_size);
991+ return -1;
992+ }
993+
994+ vmar->head_data = g_realloc(vmar->head_data, header_size);
995+ h = (VmaHeader *)vmar->head_data;
996+
997+ if (full_read(vmar->fd, vmar->head_data + sizeof(VmaHeader), need) !=
998+ need) {
999+ error_setg(errp, "can't read vma header data - %s",
1000+ errno ? g_strerror(errno) : "got EOF");
1001+ return -1;
1002+ }
1003+
1004+ memcpy(md5sum, h->md5sum, 16);
1005+ memset(h->md5sum, 0, 16);
1006+
1007+ g_checksum_reset(vmar->md5csum);
1008+ g_checksum_update(vmar->md5csum, vmar->head_data, header_size);
1009+ gsize csize = 16;
1010+ g_checksum_get_digest(vmar->md5csum, (guint8 *)(h->md5sum), &csize);
1011+
1012+ if (memcmp(md5sum, h->md5sum, 16) != 0) {
1013+ error_setg(errp, "wrong vma header chechsum");
1014+ return -1;
1015+ }
1016+
1017+ /* we can modify header data after checksum verify */
1018+ h->header_size = header_size;
1019+
1020+ h->version = GUINT32_FROM_BE(h->version);
1021+ if (h->version != 1) {
1022+ error_setg(errp, "wrong vma version %d", h->version);
1023+ return -1;
1024+ }
1025+
1026+ h->ctime = GUINT64_FROM_BE(h->ctime);
1027+ h->blob_buffer_offset = GUINT32_FROM_BE(h->blob_buffer_offset);
1028+ h->blob_buffer_size = GUINT32_FROM_BE(h->blob_buffer_size);
1029+
1030+ uint32_t bstart = h->blob_buffer_offset + 1;
1031+ uint32_t bend = h->blob_buffer_offset + h->blob_buffer_size;
1032+
1033+ if (bstart <= sizeof(VmaHeader)) {
1034+ error_setg(errp, "wrong vma blob buffer offset %d",
1035+ h->blob_buffer_offset);
1036+ return -1;
1037+ }
1038+
1039+ if (bend > header_size) {
1040+ error_setg(errp, "wrong vma blob buffer size %d/%d",
1041+ h->blob_buffer_offset, h->blob_buffer_size);
1042+ return -1;
1043+ }
1044+
1045+ while ((bstart + 2) <= bend) {
1046+ uint32_t size = vmar->head_data[bstart] +
1047+ (vmar->head_data[bstart+1] << 8);
1048+ if ((bstart + size + 2) <= bend) {
1049+ VmaBlob *blob = g_new0(VmaBlob, 1);
1050+ blob->start = bstart - h->blob_buffer_offset;
1051+ blob->len = size;
1052+ blob->data = vmar->head_data + bstart + 2;
1053+ g_hash_table_insert(vmar->blob_hash, &blob->start, blob);
1054+ }
1055+ bstart += size + 2;
1056+ }
1057+
1058+
1059+ int count = 0;
1060+ for (i = 1; i < 256; i++) {
1061+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
1062+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
1063+ uint64_t size = GUINT64_FROM_BE(dih->size);
1064+ const char *devname = get_header_str(vmar, devname_ptr);
1065+
1066+ if (size && devname) {
1067+ count++;
1068+ vmar->devinfo[i].size = size;
1069+ vmar->devinfo[i].devname = devname;
1070+
1071+ if (strcmp(devname, "vmstate") == 0) {
1072+ vmar->vmstate_stream = i;
1073+ }
1074+ }
1075+ }
1076+
95259824
WB
1077+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1078+ uint32_t name_ptr = GUINT32_FROM_BE(h->config_names[i]);
1079+ uint32_t data_ptr = GUINT32_FROM_BE(h->config_data[i]);
1080+
1081+ if (!(name_ptr && data_ptr)) {
1082+ continue;
1083+ }
1084+ const char *name = get_header_str(vmar, name_ptr);
1085+ const VmaBlob *blob = get_header_blob(vmar, data_ptr);
1086+
1087+ if (!(name && blob)) {
1088+ error_setg(errp, "vma contains invalid data pointers");
1089+ return -1;
1090+ }
1091+
1092+ VmaConfigData *cdata = g_new0(VmaConfigData, 1);
1093+ cdata->name = name;
1094+ cdata->data = blob->data;
1095+ cdata->len = blob->len;
1096+
1097+ vmar->cdata_list = g_list_append(vmar->cdata_list, cdata);
1098+ }
1099+
1100+ return ret;
1101+};
1102+
1103+VmaReader *vma_reader_create(const char *filename, Error **errp)
1104+{
1105+ assert(filename);
1106+ assert(errp);
1107+
1108+ VmaReader *vmar = g_new0(VmaReader, 1);
1109+
1110+ if (strcmp(filename, "-") == 0) {
1111+ vmar->fd = dup(0);
1112+ } else {
1113+ vmar->fd = open(filename, O_RDONLY);
1114+ }
1115+
1116+ if (vmar->fd < 0) {
1117+ error_setg(errp, "can't open file %s - %s\n", filename,
1118+ g_strerror(errno));
1119+ goto err;
1120+ }
1121+
1122+ vmar->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1123+ if (!vmar->md5csum) {
1124+ error_setg(errp, "can't allocate cmsum\n");
1125+ goto err;
1126+ }
1127+
1128+ vmar->blob_hash = g_hash_table_new_full(g_int32_hash, g_int32_equal,
1129+ NULL, g_free);
1130+
1131+ if (vma_reader_read_head(vmar, errp) < 0) {
1132+ goto err;
1133+ }
1134+
1135+ return vmar;
1136+
1137+err:
1138+ if (vmar) {
1139+ vma_reader_destroy(vmar);
1140+ }
1141+
1142+ return NULL;
1143+}
1144+
1145+VmaHeader *vma_reader_get_header(VmaReader *vmar)
1146+{
1147+ assert(vmar);
1148+ assert(vmar->head_data);
1149+
1150+ return (VmaHeader *)(vmar->head_data);
1151+}
1152+
1153+GList *vma_reader_get_config_data(VmaReader *vmar)
1154+{
1155+ assert(vmar);
1156+ assert(vmar->head_data);
1157+
1158+ return vmar->cdata_list;
1159+}
1160+
1161+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
1162+{
1163+ assert(vmar);
1164+ assert(dev_id);
1165+
1166+ if (vmar->devinfo[dev_id].size && vmar->devinfo[dev_id].devname) {
1167+ return &vmar->devinfo[dev_id];
1168+ }
1169+
1170+ return NULL;
1171+}
1172+
67af0fa4
WB
1173+static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
1174+ BlockBackend *target, bool write_zeroes)
1175+{
1176+ assert(vmar);
1177+ assert(dev_id);
1178+
1179+ vmar->rstate[dev_id].target = target;
1180+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
1181+
1182+ int64_t size = vmar->devinfo[dev_id].size;
1183+
1184+ int64_t bitmap_size = (size/BDRV_SECTOR_SIZE) +
1185+ (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG - 1;
1186+ bitmap_size /= (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG;
1187+
1188+ vmar->rstate[dev_id].bitmap_size = bitmap_size;
1189+ vmar->rstate[dev_id].bitmap = g_new0(unsigned long, bitmap_size);
1190+
1191+ vmar->cluster_count += size/VMA_CLUSTER_SIZE;
1192+}
1193+
1194+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
95259824
WB
1195+ bool write_zeroes, Error **errp)
1196+{
1197+ assert(vmar);
67af0fa4 1198+ assert(target != NULL);
95259824 1199+ assert(dev_id);
67af0fa4 1200+ assert(vmar->rstate[dev_id].target == NULL);
95259824 1201+
67af0fa4 1202+ int64_t size = blk_getlength(target);
95259824
WB
1203+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
1204+
1205+ /* storage types can have different size restrictions, so it
1206+ * is not always possible to create an image with exact size.
1207+ * So we tolerate a size difference up to 4MB.
1208+ */
1209+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
1210+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
1211+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
1212+ size, vmar->devinfo[dev_id].size);
1213+ return -1;
1214+ }
1215+
67af0fa4 1216+ allocate_rstate(vmar, dev_id, target, write_zeroes);
95259824
WB
1217+
1218+ return 0;
1219+}
1220+
1221+static ssize_t safe_write(int fd, void *buf, size_t count)
1222+{
1223+ ssize_t n;
1224+
1225+ do {
1226+ n = write(fd, buf, count);
1227+ } while (n < 0 && errno == EINTR);
1228+
1229+ return n;
1230+}
1231+
1232+static size_t full_write(int fd, void *buf, size_t len)
1233+{
1234+ ssize_t n;
1235+ size_t total;
1236+
1237+ total = 0;
1238+
1239+ while (len > 0) {
1240+ n = safe_write(fd, buf, len);
1241+ if (n < 0) {
1242+ return n;
1243+ }
1244+ buf += n;
1245+ total += n;
1246+ len -= n;
1247+ }
1248+
1249+ if (len) {
1250+ /* incomplete write ? */
1251+ return -1;
1252+ }
1253+
1254+ return total;
1255+}
1256+
1257+static int restore_write_data(VmaReader *vmar, guint8 dev_id,
67af0fa4 1258+ BlockBackend *target, int vmstate_fd,
95259824
WB
1259+ unsigned char *buf, int64_t sector_num,
1260+ int nb_sectors, Error **errp)
1261+{
1262+ assert(vmar);
1263+
1264+ if (dev_id == vmar->vmstate_stream) {
1265+ if (vmstate_fd >= 0) {
1266+ int len = nb_sectors * BDRV_SECTOR_SIZE;
1267+ int res = full_write(vmstate_fd, buf, len);
1268+ if (res < 0) {
1269+ error_setg(errp, "write vmstate failed %d", res);
1270+ return -1;
1271+ }
1272+ }
1273+ } else {
67af0fa4 1274+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, buf, nb_sectors * BDRV_SECTOR_SIZE, 0);
95259824 1275+ if (res < 0) {
67af0fa4
WB
1276+ error_setg(errp, "blk_pwrite to %s failed (%d)",
1277+ bdrv_get_device_name(blk_bs(target)), res);
95259824
WB
1278+ return -1;
1279+ }
1280+ }
1281+ return 0;
1282+}
67af0fa4 1283+
95259824
WB
1284+static int restore_extent(VmaReader *vmar, unsigned char *buf,
1285+ int extent_size, int vmstate_fd,
67af0fa4 1286+ bool verbose, bool verify, Error **errp)
95259824
WB
1287+{
1288+ assert(vmar);
1289+ assert(buf);
1290+
1291+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
1292+ int start = VMA_EXTENT_HEADER_SIZE;
1293+ int i;
1294+
1295+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1296+ uint64_t block_info = GUINT64_FROM_BE(ehead->blockinfo[i]);
1297+ uint64_t cluster_num = block_info & 0xffffffff;
1298+ uint8_t dev_id = (block_info >> 32) & 0xff;
1299+ uint16_t mask = block_info >> (32+16);
1300+ int64_t max_sector;
1301+
1302+ if (!dev_id) {
1303+ continue;
1304+ }
1305+
1306+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
67af0fa4 1307+ BlockBackend *target = NULL;
95259824
WB
1308+
1309+ if (dev_id != vmar->vmstate_stream) {
67af0fa4
WB
1310+ target = rstate->target;
1311+ if (!verify && !target) {
95259824
WB
1312+ error_setg(errp, "got wrong dev id %d", dev_id);
1313+ return -1;
1314+ }
1315+
1316+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
1317+ error_setg(errp, "found duplicated cluster %zd for stream %s",
1318+ cluster_num, vmar->devinfo[dev_id].devname);
1319+ return -1;
1320+ }
1321+ vma_reader_set_bitmap(rstate, cluster_num, 1);
1322+
1323+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
1324+ } else {
1325+ max_sector = G_MAXINT64;
1326+ if (cluster_num != vmar->vmstate_clusters) {
1327+ error_setg(errp, "found out of order vmstate data");
1328+ return -1;
1329+ }
1330+ vmar->vmstate_clusters++;
1331+ }
1332+
1333+ vmar->clusters_read++;
1334+
1335+ if (verbose) {
1336+ time_t duration = time(NULL) - vmar->start_time;
1337+ int percent = (vmar->clusters_read*100)/vmar->cluster_count;
1338+ if (percent != vmar->clusters_read_per) {
1339+ printf("progress %d%% (read %zd bytes, duration %zd sec)\n",
1340+ percent, vmar->clusters_read*VMA_CLUSTER_SIZE,
1341+ duration);
1342+ fflush(stdout);
1343+ vmar->clusters_read_per = percent;
1344+ }
1345+ }
1346+
1347+ /* try to write whole clusters to speedup restore */
1348+ if (mask == 0xffff) {
1349+ if ((start + VMA_CLUSTER_SIZE) > extent_size) {
1350+ error_setg(errp, "short vma extent - too many blocks");
1351+ return -1;
1352+ }
1353+ int64_t sector_num = (cluster_num * VMA_CLUSTER_SIZE) /
1354+ BDRV_SECTOR_SIZE;
1355+ int64_t end_sector = sector_num +
1356+ VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE;
1357+
1358+ if (end_sector > max_sector) {
1359+ end_sector = max_sector;
1360+ }
1361+
1362+ if (end_sector <= sector_num) {
1363+ error_setg(errp, "got wrong block address - write bejond end");
1364+ return -1;
1365+ }
1366+
67af0fa4
WB
1367+ if (!verify) {
1368+ int nb_sectors = end_sector - sector_num;
1369+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
1370+ buf + start, sector_num, nb_sectors,
1371+ errp) < 0) {
1372+ return -1;
1373+ }
95259824
WB
1374+ }
1375+
1376+ start += VMA_CLUSTER_SIZE;
1377+ } else {
1378+ int j;
1379+ int bit = 1;
1380+
1381+ for (j = 0; j < 16; j++) {
1382+ int64_t sector_num = (cluster_num*VMA_CLUSTER_SIZE +
1383+ j*VMA_BLOCK_SIZE)/BDRV_SECTOR_SIZE;
1384+
1385+ int64_t end_sector = sector_num +
1386+ VMA_BLOCK_SIZE/BDRV_SECTOR_SIZE;
1387+ if (end_sector > max_sector) {
1388+ end_sector = max_sector;
1389+ }
1390+
1391+ if (mask & bit) {
1392+ if ((start + VMA_BLOCK_SIZE) > extent_size) {
1393+ error_setg(errp, "short vma extent - too many blocks");
1394+ return -1;
1395+ }
1396+
1397+ if (end_sector <= sector_num) {
1398+ error_setg(errp, "got wrong block address - "
1399+ "write bejond end");
1400+ return -1;
1401+ }
1402+
67af0fa4
WB
1403+ if (!verify) {
1404+ int nb_sectors = end_sector - sector_num;
1405+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
1406+ buf + start, sector_num,
1407+ nb_sectors, errp) < 0) {
1408+ return -1;
1409+ }
95259824
WB
1410+ }
1411+
1412+ start += VMA_BLOCK_SIZE;
1413+
1414+ } else {
1415+
67af0fa4
WB
1416+
1417+ if (end_sector > sector_num) {
95259824
WB
1418+ /* Todo: use bdrv_co_write_zeroes (but that need to
1419+ * be run inside coroutine?)
1420+ */
1421+ int nb_sectors = end_sector - sector_num;
67af0fa4
WB
1422+ int zero_size = BDRV_SECTOR_SIZE*nb_sectors;
1423+ vmar->zero_cluster_data += zero_size;
1424+ if (mask != 0) {
1425+ vmar->partial_zero_cluster_data += zero_size;
1426+ }
1427+
1428+ if (rstate->write_zeroes && !verify) {
1429+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
1430+ zero_vma_block, sector_num,
1431+ nb_sectors, errp) < 0) {
1432+ return -1;
1433+ }
95259824
WB
1434+ }
1435+ }
1436+ }
1437+
1438+ bit = bit << 1;
1439+ }
1440+ }
1441+ }
1442+
1443+ if (start != extent_size) {
1444+ error_setg(errp, "vma extent error - missing blocks");
1445+ return -1;
1446+ }
1447+
1448+ return 0;
1449+}
1450+
67af0fa4
WB
1451+static int vma_reader_restore_full(VmaReader *vmar, int vmstate_fd,
1452+ bool verbose, bool verify,
1453+ Error **errp)
95259824
WB
1454+{
1455+ assert(vmar);
1456+ assert(vmar->head_data);
1457+
1458+ int ret = 0;
1459+ unsigned char buf[VMA_MAX_EXTENT_SIZE];
1460+ int buf_pos = 0;
1461+ unsigned char md5sum[16];
1462+ VmaHeader *h = (VmaHeader *)vmar->head_data;
1463+
1464+ vmar->start_time = time(NULL);
1465+
1466+ while (1) {
1467+ int bytes = full_read(vmar->fd, buf + buf_pos, sizeof(buf) - buf_pos);
1468+ if (bytes < 0) {
1469+ error_setg(errp, "read failed - %s", g_strerror(errno));
1470+ return -1;
1471+ }
1472+
1473+ buf_pos += bytes;
1474+
1475+ if (!buf_pos) {
1476+ break; /* EOF */
1477+ }
1478+
1479+ if (buf_pos < VMA_EXTENT_HEADER_SIZE) {
1480+ error_setg(errp, "read short extent (%d bytes)", buf_pos);
1481+ return -1;
1482+ }
1483+
1484+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
1485+
1486+ /* extract md5sum */
1487+ memcpy(md5sum, ehead->md5sum, sizeof(ehead->md5sum));
1488+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
1489+
1490+ g_checksum_reset(vmar->md5csum);
1491+ g_checksum_update(vmar->md5csum, buf, VMA_EXTENT_HEADER_SIZE);
1492+ gsize csize = 16;
1493+ g_checksum_get_digest(vmar->md5csum, ehead->md5sum, &csize);
1494+
1495+ if (memcmp(md5sum, ehead->md5sum, 16) != 0) {
1496+ error_setg(errp, "wrong vma extent header chechsum");
1497+ return -1;
1498+ }
1499+
1500+ if (memcmp(h->uuid, ehead->uuid, sizeof(ehead->uuid)) != 0) {
1501+ error_setg(errp, "wrong vma extent uuid");
1502+ return -1;
1503+ }
1504+
1505+ if (ehead->magic != VMA_EXTENT_MAGIC || ehead->reserved1 != 0) {
1506+ error_setg(errp, "wrong vma extent header magic");
1507+ return -1;
1508+ }
1509+
1510+ int block_count = GUINT16_FROM_BE(ehead->block_count);
1511+ int extent_size = VMA_EXTENT_HEADER_SIZE + block_count*VMA_BLOCK_SIZE;
1512+
1513+ if (buf_pos < extent_size) {
1514+ error_setg(errp, "short vma extent (%d < %d)", buf_pos,
1515+ extent_size);
1516+ return -1;
1517+ }
1518+
1519+ if (restore_extent(vmar, buf, extent_size, vmstate_fd, verbose,
67af0fa4 1520+ verify, errp) < 0) {
95259824
WB
1521+ return -1;
1522+ }
1523+
1524+ if (buf_pos > extent_size) {
1525+ memmove(buf, buf + extent_size, buf_pos - extent_size);
1526+ buf_pos = buf_pos - extent_size;
1527+ } else {
1528+ buf_pos = 0;
1529+ }
1530+ }
1531+
1532+ bdrv_drain_all();
1533+
1534+ int i;
1535+ for (i = 1; i < 256; i++) {
1536+ VmaRestoreState *rstate = &vmar->rstate[i];
67af0fa4 1537+ if (!rstate->target) {
95259824
WB
1538+ continue;
1539+ }
1540+
67af0fa4
WB
1541+ if (blk_flush(rstate->target) < 0) {
1542+ error_setg(errp, "vma blk_flush %s failed",
95259824
WB
1543+ vmar->devinfo[i].devname);
1544+ return -1;
1545+ }
1546+
1547+ if (vmar->devinfo[i].size &&
1548+ (strcmp(vmar->devinfo[i].devname, "vmstate") != 0)) {
1549+ assert(rstate->bitmap);
1550+
1551+ int64_t cluster_num, end;
1552+
1553+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
1554+ VMA_CLUSTER_SIZE;
1555+
1556+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
1557+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
1558+ error_setg(errp, "detected missing cluster %zd "
1559+ "for stream %s", cluster_num,
1560+ vmar->devinfo[i].devname);
1561+ return -1;
1562+ }
1563+ }
1564+ }
1565+ }
1566+
67af0fa4
WB
1567+ if (verbose) {
1568+ if (vmar->clusters_read) {
1569+ printf("total bytes read %zd, sparse bytes %zd (%.3g%%)\n",
1570+ vmar->clusters_read*VMA_CLUSTER_SIZE,
1571+ vmar->zero_cluster_data,
1572+ (double)(100.0*vmar->zero_cluster_data)/
1573+ (vmar->clusters_read*VMA_CLUSTER_SIZE));
1574+
1575+ int64_t datasize = vmar->clusters_read*VMA_CLUSTER_SIZE-vmar->zero_cluster_data;
1576+ if (datasize) { // this does not make sense for empty files
1577+ printf("space reduction due to 4K zero blocks %.3g%%\n",
1578+ (double)(100.0*vmar->partial_zero_cluster_data) / datasize);
1579+ }
1580+ } else {
1581+ printf("vma archive contains no image data\n");
1582+ }
1583+ }
95259824
WB
1584+ return ret;
1585+}
1586+
67af0fa4
WB
1587+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
1588+ Error **errp)
1589+{
1590+ return vma_reader_restore_full(vmar, vmstate_fd, verbose, false, errp);
1591+}
1592+
1593+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp)
1594+{
1595+ guint8 dev_id;
1596+
1597+ for (dev_id = 1; dev_id < 255; dev_id++) {
1598+ if (vma_reader_get_device_info(vmar, dev_id)) {
1599+ allocate_rstate(vmar, dev_id, NULL, false);
1600+ }
1601+ }
1602+
1603+ return vma_reader_restore_full(vmar, -1, verbose, true, errp);
1604+}
1605+
95259824
WB
1606diff --git a/vma-writer.c b/vma-writer.c
1607new file mode 100644
6838f038 1608index 0000000000..fd9567634d
95259824
WB
1609--- /dev/null
1610+++ b/vma-writer.c
67af0fa4 1611@@ -0,0 +1,771 @@
95259824
WB
1612+/*
1613+ * VMA: Virtual Machine Archive
1614+ *
1615+ * Copyright (C) 2012 Proxmox Server Solutions
1616+ *
1617+ * Authors:
1618+ * Dietmar Maurer (dietmar@proxmox.com)
1619+ *
1620+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
1621+ * See the COPYING file in the top-level directory.
1622+ *
1623+ */
1624+
1625+#include "qemu/osdep.h"
1626+#include <glib.h>
1627+#include <uuid/uuid.h>
1628+
1629+#include "vma.h"
1630+#include "block/block.h"
1631+#include "monitor/monitor.h"
1632+#include "qemu/main-loop.h"
1633+#include "qemu/coroutine.h"
1634+#include "qemu/cutils.h"
1635+
1636+#define DEBUG_VMA 0
1637+
1638+#define DPRINTF(fmt, ...)\
1639+ do { if (DEBUG_VMA) { printf("vma: " fmt, ## __VA_ARGS__); } } while (0)
1640+
1641+#define WRITE_BUFFERS 5
67af0fa4
WB
1642+#define HEADER_CLUSTERS 8
1643+#define HEADERBUF_SIZE (VMA_CLUSTER_SIZE*HEADER_CLUSTERS)
95259824
WB
1644+
1645+struct VmaWriter {
1646+ int fd;
1647+ FILE *cmd;
1648+ int status;
1649+ char errmsg[8192];
1650+ uuid_t uuid;
1651+ bool header_written;
1652+ bool closed;
1653+
1654+ /* we always write extents */
67af0fa4 1655+ unsigned char *outbuf;
95259824
WB
1656+ int outbuf_pos; /* in bytes */
1657+ int outbuf_count; /* in VMA_BLOCKS */
1658+ uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT];
1659+
67af0fa4 1660+ unsigned char *headerbuf;
95259824
WB
1661+
1662+ GChecksum *md5csum;
95259824
WB
1663+ CoMutex flush_lock;
1664+ Coroutine *co_writer;
1665+
1666+ /* drive informations */
1667+ VmaStreamInfo stream_info[256];
1668+ guint stream_count;
1669+
1670+ guint8 vmstate_stream;
1671+ uint32_t vmstate_clusters;
1672+
1673+ /* header blob table */
1674+ char *header_blob_table;
1675+ uint32_t header_blob_table_size;
1676+ uint32_t header_blob_table_pos;
1677+
1678+ /* store for config blobs */
1679+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1680+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1681+ uint32_t config_count;
1682+};
1683+
1684+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...)
1685+{
1686+ va_list ap;
1687+
1688+ if (vmaw->status < 0) {
1689+ return;
1690+ }
1691+
1692+ vmaw->status = -1;
1693+
1694+ va_start(ap, fmt);
1695+ g_vsnprintf(vmaw->errmsg, sizeof(vmaw->errmsg), fmt, ap);
1696+ va_end(ap);
1697+
1698+ DPRINTF("vma_writer_set_error: %s\n", vmaw->errmsg);
1699+}
1700+
1701+static uint32_t allocate_header_blob(VmaWriter *vmaw, const char *data,
1702+ size_t len)
1703+{
1704+ if (len > 65535) {
1705+ return 0;
1706+ }
1707+
1708+ if (!vmaw->header_blob_table ||
1709+ (vmaw->header_blob_table_size <
1710+ (vmaw->header_blob_table_pos + len + 2))) {
1711+ int newsize = vmaw->header_blob_table_size + ((len + 2 + 511)/512)*512;
1712+
1713+ vmaw->header_blob_table = g_realloc(vmaw->header_blob_table, newsize);
1714+ memset(vmaw->header_blob_table + vmaw->header_blob_table_size,
1715+ 0, newsize - vmaw->header_blob_table_size);
1716+ vmaw->header_blob_table_size = newsize;
1717+ }
1718+
1719+ uint32_t cpos = vmaw->header_blob_table_pos;
1720+ vmaw->header_blob_table[cpos] = len & 255;
1721+ vmaw->header_blob_table[cpos+1] = (len >> 8) & 255;
1722+ memcpy(vmaw->header_blob_table + cpos + 2, data, len);
1723+ vmaw->header_blob_table_pos += len + 2;
1724+ return cpos;
1725+}
1726+
1727+static uint32_t allocate_header_string(VmaWriter *vmaw, const char *str)
1728+{
1729+ assert(vmaw);
1730+
1731+ size_t len = strlen(str) + 1;
1732+
1733+ return allocate_header_blob(vmaw, str, len);
1734+}
1735+
1736+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
1737+ gsize len)
1738+{
1739+ assert(vmaw);
1740+ assert(!vmaw->header_written);
1741+ assert(vmaw->config_count < VMA_MAX_CONFIGS);
1742+ assert(name);
1743+ assert(data);
95259824
WB
1744+
1745+ gchar *basename = g_path_get_basename(name);
1746+ uint32_t name_ptr = allocate_header_string(vmaw, basename);
1747+ g_free(basename);
1748+
1749+ if (!name_ptr) {
1750+ return -1;
1751+ }
1752+
1753+ uint32_t data_ptr = allocate_header_blob(vmaw, data, len);
1754+ if (!data_ptr) {
1755+ return -1;
1756+ }
1757+
1758+ vmaw->config_names[vmaw->config_count] = name_ptr;
1759+ vmaw->config_data[vmaw->config_count] = data_ptr;
1760+
1761+ vmaw->config_count++;
1762+
1763+ return 0;
1764+}
1765+
1766+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
1767+ size_t size)
1768+{
1769+ assert(vmaw);
1770+ assert(devname);
1771+ assert(!vmaw->status);
1772+
1773+ if (vmaw->header_written) {
1774+ vma_writer_set_error(vmaw, "vma_writer_register_stream: header "
1775+ "already written");
1776+ return -1;
1777+ }
1778+
1779+ guint n = vmaw->stream_count + 1;
1780+
1781+ /* we can have dev_ids form 1 to 255 (0 reserved)
1782+ * 255(-1) reseverd for safety
1783+ */
1784+ if (n > 254) {
1785+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1786+ "too many drives");
1787+ return -1;
1788+ }
1789+
1790+ if (size <= 0) {
1791+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1792+ "got strange size %zd", size);
1793+ return -1;
1794+ }
1795+
1796+ DPRINTF("vma_writer_register_stream %s %zu %d\n", devname, size, n);
1797+
1798+ vmaw->stream_info[n].devname = g_strdup(devname);
1799+ vmaw->stream_info[n].size = size;
1800+
1801+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
1802+ VMA_CLUSTER_SIZE;
1803+
1804+ vmaw->stream_count = n;
1805+
1806+ if (strcmp(devname, "vmstate") == 0) {
1807+ vmaw->vmstate_stream = n;
1808+ }
1809+
1810+ return n;
1811+}
1812+
1813+static void vma_co_continue_write(void *opaque)
1814+{
1815+ VmaWriter *vmaw = opaque;
1816+
1817+ DPRINTF("vma_co_continue_write\n");
1818+ qemu_coroutine_enter(vmaw->co_writer);
1819+}
1820+
1821+static ssize_t coroutine_fn
67af0fa4 1822+vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes)
95259824 1823+{
67af0fa4 1824+ DPRINTF("vma_queue_write enter %zd\n", bytes);
95259824 1825+
67af0fa4
WB
1826+ assert(vmaw);
1827+ assert(buf);
1828+ assert(bytes <= VMA_MAX_EXTENT_SIZE);
95259824 1829+
67af0fa4
WB
1830+ size_t done = 0;
1831+ ssize_t ret;
95259824
WB
1832+
1833+ assert(vmaw->co_writer == NULL);
1834+
1835+ vmaw->co_writer = qemu_coroutine_self();
1836+
95259824 1837+ while (done < bytes) {
67af0fa4
WB
1838+ aio_set_fd_handler(qemu_get_aio_context(), vmaw->fd, false, NULL, vma_co_continue_write, NULL, vmaw);
1839+ qemu_coroutine_yield();
1840+ aio_set_fd_handler(qemu_get_aio_context(), vmaw->fd, false, NULL, NULL, NULL, NULL);
1841+ if (vmaw->status < 0) {
1842+ DPRINTF("vma_queue_write detected canceled backup\n");
1843+ done = -1;
1844+ break;
1845+ }
95259824
WB
1846+ ret = write(vmaw->fd, buf + done, bytes - done);
1847+ if (ret > 0) {
1848+ done += ret;
67af0fa4 1849+ DPRINTF("vma_queue_write written %zd %zd\n", done, ret);
95259824
WB
1850+ } else if (ret < 0) {
1851+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
67af0fa4
WB
1852+ /* try again */
1853+ } else {
1854+ vma_writer_set_error(vmaw, "vma_queue_write: write error - %s",
95259824
WB
1855+ g_strerror(errno));
1856+ done = -1; /* always return failure for partial writes */
1857+ break;
1858+ }
1859+ } else if (ret == 0) {
1860+ /* should not happen - simply try again */
1861+ }
1862+ }
1863+
95259824
WB
1864+ vmaw->co_writer = NULL;
1865+
67af0fa4 1866+ return (done == bytes) ? bytes : -1;
95259824
WB
1867+}
1868+
1869+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
1870+{
1871+ const char *p;
1872+
1873+ assert(sizeof(VmaHeader) == (4096 + 8192));
1874+ assert(G_STRUCT_OFFSET(VmaHeader, config_names) == 2044);
1875+ assert(G_STRUCT_OFFSET(VmaHeader, config_data) == 3068);
1876+ assert(G_STRUCT_OFFSET(VmaHeader, dev_info) == 4096);
1877+ assert(sizeof(VmaExtentHeader) == 512);
1878+
1879+ VmaWriter *vmaw = g_new0(VmaWriter, 1);
1880+ vmaw->fd = -1;
1881+
1882+ vmaw->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1883+ if (!vmaw->md5csum) {
1884+ error_setg(errp, "can't allocate cmsum\n");
1885+ goto err;
1886+ }
1887+
1888+ if (strstart(filename, "exec:", &p)) {
1889+ vmaw->cmd = popen(p, "w");
1890+ if (vmaw->cmd == NULL) {
1891+ error_setg(errp, "can't popen command '%s' - %s\n", p,
1892+ g_strerror(errno));
1893+ goto err;
1894+ }
1895+ vmaw->fd = fileno(vmaw->cmd);
1896+
67af0fa4 1897+ /* try to use O_NONBLOCK */
95259824 1898+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
95259824
WB
1899+
1900+ } else {
1901+ struct stat st;
1902+ int oflags;
1903+ const char *tmp_id_str;
1904+
1905+ if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
67af0fa4 1906+ oflags = O_NONBLOCK|O_WRONLY;
95259824
WB
1907+ vmaw->fd = qemu_open(filename, oflags, 0644);
1908+ } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) {
67af0fa4 1909+ oflags = O_NONBLOCK|O_WRONLY;
95259824
WB
1910+ vmaw->fd = qemu_open(filename, oflags, 0644);
1911+ } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) {
1912+ vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp);
1913+ if (vmaw->fd < 0) {
1914+ goto err;
1915+ }
67af0fa4 1916+ /* try to use O_NONBLOCK */
95259824 1917+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
95259824
WB
1918+ } else {
1919+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_CREAT|O_EXCL;
1920+ vmaw->fd = qemu_open(filename, oflags, 0644);
1921+ }
1922+
1923+ if (vmaw->fd < 0) {
1924+ error_setg(errp, "can't open file %s - %s\n", filename,
1925+ g_strerror(errno));
1926+ goto err;
1927+ }
1928+ }
1929+
1930+ /* we use O_DIRECT, so we need to align IO buffers */
67af0fa4
WB
1931+
1932+ vmaw->outbuf = qemu_memalign(512, VMA_MAX_EXTENT_SIZE);
1933+ vmaw->headerbuf = qemu_memalign(512, HEADERBUF_SIZE);
95259824
WB
1934+
1935+ vmaw->outbuf_count = 0;
1936+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1937+
1938+ vmaw->header_blob_table_pos = 1; /* start at pos 1 */
1939+
95259824 1940+ qemu_co_mutex_init(&vmaw->flush_lock);
95259824
WB
1941+
1942+ uuid_copy(vmaw->uuid, uuid);
1943+
1944+ return vmaw;
1945+
1946+err:
1947+ if (vmaw) {
1948+ if (vmaw->cmd) {
1949+ pclose(vmaw->cmd);
1950+ } else if (vmaw->fd >= 0) {
1951+ close(vmaw->fd);
1952+ }
1953+
1954+ if (vmaw->md5csum) {
1955+ g_checksum_free(vmaw->md5csum);
1956+ }
1957+
1958+ g_free(vmaw);
1959+ }
1960+
1961+ return NULL;
1962+}
1963+
1964+static int coroutine_fn vma_write_header(VmaWriter *vmaw)
1965+{
1966+ assert(vmaw);
67af0fa4 1967+ unsigned char *buf = vmaw->headerbuf;
95259824
WB
1968+ VmaHeader *head = (VmaHeader *)buf;
1969+
1970+ int i;
1971+
1972+ DPRINTF("VMA WRITE HEADER\n");
1973+
1974+ if (vmaw->status < 0) {
1975+ return vmaw->status;
1976+ }
1977+
67af0fa4 1978+ memset(buf, 0, HEADERBUF_SIZE);
95259824
WB
1979+
1980+ head->magic = VMA_MAGIC;
1981+ head->version = GUINT32_TO_BE(1); /* v1 */
1982+ memcpy(head->uuid, vmaw->uuid, 16);
1983+
1984+ time_t ctime = time(NULL);
1985+ head->ctime = GUINT64_TO_BE(ctime);
1986+
95259824
WB
1987+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1988+ head->config_names[i] = GUINT32_TO_BE(vmaw->config_names[i]);
1989+ head->config_data[i] = GUINT32_TO_BE(vmaw->config_data[i]);
1990+ }
1991+
1992+ /* 32 bytes per device (12 used currently) = 8192 bytes max */
1993+ for (i = 1; i <= 254; i++) {
1994+ VmaStreamInfo *si = &vmaw->stream_info[i];
1995+ if (si->size) {
1996+ assert(si->devname);
1997+ uint32_t devname_ptr = allocate_header_string(vmaw, si->devname);
1998+ if (!devname_ptr) {
1999+ return -1;
2000+ }
2001+ head->dev_info[i].devname_ptr = GUINT32_TO_BE(devname_ptr);
2002+ head->dev_info[i].size = GUINT64_TO_BE(si->size);
2003+ }
2004+ }
2005+
2006+ uint32_t header_size = sizeof(VmaHeader) + vmaw->header_blob_table_size;
2007+ head->header_size = GUINT32_TO_BE(header_size);
2008+
67af0fa4 2009+ if (header_size > HEADERBUF_SIZE) {
95259824
WB
2010+ return -1; /* just to be sure */
2011+ }
2012+
2013+ uint32_t blob_buffer_offset = sizeof(VmaHeader);
2014+ memcpy(buf + blob_buffer_offset, vmaw->header_blob_table,
2015+ vmaw->header_blob_table_size);
2016+ head->blob_buffer_offset = GUINT32_TO_BE(blob_buffer_offset);
2017+ head->blob_buffer_size = GUINT32_TO_BE(vmaw->header_blob_table_pos);
2018+
2019+ g_checksum_reset(vmaw->md5csum);
2020+ g_checksum_update(vmaw->md5csum, (const guchar *)buf, header_size);
2021+ gsize csize = 16;
2022+ g_checksum_get_digest(vmaw->md5csum, (guint8 *)(head->md5sum), &csize);
2023+
2024+ return vma_queue_write(vmaw, buf, header_size);
2025+}
2026+
2027+static int coroutine_fn vma_writer_flush(VmaWriter *vmaw)
2028+{
2029+ assert(vmaw);
2030+
2031+ int ret;
2032+ int i;
2033+
2034+ if (vmaw->status < 0) {
2035+ return vmaw->status;
2036+ }
2037+
2038+ if (!vmaw->header_written) {
2039+ vmaw->header_written = true;
2040+ ret = vma_write_header(vmaw);
2041+ if (ret < 0) {
2042+ vma_writer_set_error(vmaw, "vma_writer_flush: write header failed");
2043+ return ret;
2044+ }
2045+ }
2046+
2047+ DPRINTF("VMA WRITE FLUSH %d %d\n", vmaw->outbuf_count, vmaw->outbuf_pos);
2048+
2049+
2050+ VmaExtentHeader *ehead = (VmaExtentHeader *)vmaw->outbuf;
2051+
2052+ ehead->magic = VMA_EXTENT_MAGIC;
2053+ ehead->reserved1 = 0;
2054+
2055+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
2056+ ehead->blockinfo[i] = GUINT64_TO_BE(vmaw->outbuf_block_info[i]);
2057+ }
2058+
2059+ guint16 block_count = (vmaw->outbuf_pos - VMA_EXTENT_HEADER_SIZE) /
2060+ VMA_BLOCK_SIZE;
2061+
2062+ ehead->block_count = GUINT16_TO_BE(block_count);
2063+
2064+ memcpy(ehead->uuid, vmaw->uuid, sizeof(ehead->uuid));
2065+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
2066+
2067+ g_checksum_reset(vmaw->md5csum);
2068+ g_checksum_update(vmaw->md5csum, vmaw->outbuf, VMA_EXTENT_HEADER_SIZE);
2069+ gsize csize = 16;
2070+ g_checksum_get_digest(vmaw->md5csum, ehead->md5sum, &csize);
2071+
2072+ int bytes = vmaw->outbuf_pos;
2073+ ret = vma_queue_write(vmaw, vmaw->outbuf, bytes);
2074+ if (ret != bytes) {
2075+ vma_writer_set_error(vmaw, "vma_writer_flush: failed write");
2076+ }
2077+
2078+ vmaw->outbuf_count = 0;
2079+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
2080+
2081+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
2082+ vmaw->outbuf_block_info[i] = 0;
2083+ }
2084+
2085+ return vmaw->status;
2086+}
2087+
2088+static int vma_count_open_streams(VmaWriter *vmaw)
2089+{
2090+ g_assert(vmaw != NULL);
2091+
2092+ int i;
2093+ int open_drives = 0;
2094+ for (i = 0; i <= 255; i++) {
2095+ if (vmaw->stream_info[i].size && !vmaw->stream_info[i].finished) {
2096+ open_drives++;
2097+ }
2098+ }
2099+
2100+ return open_drives;
2101+}
2102+
67af0fa4
WB
2103+
2104+/**
2105+ * You need to call this if the vma archive does not contain
2106+ * any data stream.
2107+ */
2108+int coroutine_fn
2109+vma_writer_flush_output(VmaWriter *vmaw)
2110+{
2111+ qemu_co_mutex_lock(&vmaw->flush_lock);
2112+ int ret = vma_writer_flush(vmaw);
2113+ qemu_co_mutex_unlock(&vmaw->flush_lock);
2114+ if (ret < 0) {
2115+ vma_writer_set_error(vmaw, "vma_writer_flush_header failed");
2116+ }
2117+ return ret;
2118+}
2119+
95259824
WB
2120+/**
2121+ * all jobs should call this when there is no more data
2122+ * Returns: number of remaining stream (0 ==> finished)
2123+ */
2124+int coroutine_fn
2125+vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id)
2126+{
2127+ g_assert(vmaw != NULL);
2128+
2129+ DPRINTF("vma_writer_set_status %d\n", dev_id);
2130+ if (!vmaw->stream_info[dev_id].size) {
2131+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
2132+ "no such stream %d", dev_id);
2133+ return -1;
2134+ }
2135+ if (vmaw->stream_info[dev_id].finished) {
2136+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
2137+ "stream already closed %d", dev_id);
2138+ return -1;
2139+ }
2140+
2141+ vmaw->stream_info[dev_id].finished = true;
2142+
2143+ int open_drives = vma_count_open_streams(vmaw);
2144+
2145+ if (open_drives <= 0) {
2146+ DPRINTF("vma_writer_set_status all drives completed\n");
67af0fa4 2147+ vma_writer_flush_output(vmaw);
95259824
WB
2148+ }
2149+
2150+ return open_drives;
2151+}
2152+
2153+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status)
2154+{
2155+ int i;
2156+
2157+ g_assert(vmaw != NULL);
2158+
2159+ if (status) {
2160+ status->status = vmaw->status;
2161+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
2162+ for (i = 0; i <= 255; i++) {
2163+ status->stream_info[i] = vmaw->stream_info[i];
2164+ }
2165+
2166+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
2167+ }
2168+
2169+ status->closed = vmaw->closed;
2170+
2171+ return vmaw->status;
2172+}
2173+
2174+static int vma_writer_get_buffer(VmaWriter *vmaw)
2175+{
2176+ int ret = 0;
2177+
2178+ qemu_co_mutex_lock(&vmaw->flush_lock);
2179+
2180+ /* wait until buffer is available */
2181+ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) {
2182+ ret = vma_writer_flush(vmaw);
2183+ if (ret < 0) {
2184+ vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed");
2185+ break;
2186+ }
2187+ }
2188+
2189+ qemu_co_mutex_unlock(&vmaw->flush_lock);
2190+
2191+ return ret;
2192+}
2193+
2194+
2195+int64_t coroutine_fn
2196+vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
6838f038 2197+ const unsigned char *buf, size_t *zero_bytes)
95259824
WB
2198+{
2199+ g_assert(vmaw != NULL);
2200+ g_assert(zero_bytes != NULL);
2201+
2202+ *zero_bytes = 0;
2203+
2204+ if (vmaw->status < 0) {
2205+ return vmaw->status;
2206+ }
2207+
2208+ if (!dev_id || !vmaw->stream_info[dev_id].size) {
2209+ vma_writer_set_error(vmaw, "vma_writer_write: "
2210+ "no such stream %d", dev_id);
2211+ return -1;
2212+ }
2213+
2214+ if (vmaw->stream_info[dev_id].finished) {
2215+ vma_writer_set_error(vmaw, "vma_writer_write: "
2216+ "stream already closed %d", dev_id);
2217+ return -1;
2218+ }
2219+
2220+
2221+ if (cluster_num >= (((uint64_t)1)<<32)) {
2222+ vma_writer_set_error(vmaw, "vma_writer_write: "
2223+ "cluster number out of range");
2224+ return -1;
2225+ }
2226+
2227+ if (dev_id == vmaw->vmstate_stream) {
2228+ if (cluster_num != vmaw->vmstate_clusters) {
2229+ vma_writer_set_error(vmaw, "vma_writer_write: "
2230+ "non sequential vmstate write");
2231+ }
2232+ vmaw->vmstate_clusters++;
2233+ } else if (cluster_num >= vmaw->stream_info[dev_id].cluster_count) {
2234+ vma_writer_set_error(vmaw, "vma_writer_write: cluster number too big");
2235+ return -1;
2236+ }
2237+
2238+ /* wait until buffer is available */
2239+ if (vma_writer_get_buffer(vmaw) < 0) {
2240+ vma_writer_set_error(vmaw, "vma_writer_write: "
2241+ "vma_writer_get_buffer failed");
2242+ return -1;
2243+ }
2244+
2245+ DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
2246+
2247+ uint16_t mask = 0;
2248+
2249+ if (buf) {
2250+ int i;
2251+ int bit = 1;
2252+ for (i = 0; i < 16; i++) {
6838f038 2253+ const unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
95259824
WB
2254+ if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
2255+ mask |= bit;
2256+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock,
2257+ VMA_BLOCK_SIZE);
2258+ vmaw->outbuf_pos += VMA_BLOCK_SIZE;
2259+ } else {
2260+ DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
2261+ vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE;
2262+ *zero_bytes += VMA_BLOCK_SIZE;
2263+ }
2264+
2265+ bit = bit << 1;
2266+ }
2267+ } else {
2268+ DPRINTF("VMA WRITE %zd ZERO CLUSTER\n", cluster_num);
2269+ vmaw->stream_info[dev_id].zero_bytes += VMA_CLUSTER_SIZE;
2270+ *zero_bytes += VMA_CLUSTER_SIZE;
2271+ }
2272+
2273+ uint64_t block_info = ((uint64_t)mask) << (32+16);
2274+ block_info |= ((uint64_t)dev_id) << 32;
2275+ block_info |= (cluster_num & 0xffffffff);
2276+ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info;
2277+
2278+ DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info);
2279+
2280+ vmaw->outbuf_count++;
2281+
2282+ /** NOTE: We allways write whole clusters, but we correctly set
2283+ * transferred bytes. So transferred == size when when everything
2284+ * went OK.
2285+ */
2286+ size_t transferred = VMA_CLUSTER_SIZE;
2287+
2288+ if (dev_id != vmaw->vmstate_stream) {
2289+ uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
2290+ if (last > vmaw->stream_info[dev_id].size) {
2291+ uint64_t diff = last - vmaw->stream_info[dev_id].size;
2292+ if (diff >= VMA_CLUSTER_SIZE) {
2293+ vma_writer_set_error(vmaw, "vma_writer_write: "
2294+ "read after last cluster");
2295+ return -1;
2296+ }
2297+ transferred -= diff;
2298+ }
2299+ }
2300+
2301+ vmaw->stream_info[dev_id].transferred += transferred;
2302+
2303+ return transferred;
2304+}
2305+
67af0fa4
WB
2306+void vma_writer_error_propagate(VmaWriter *vmaw, Error **errp)
2307+{
2308+ if (vmaw->status < 0 && *errp == NULL) {
2309+ error_setg(errp, "%s", vmaw->errmsg);
2310+ }
2311+}
2312+
95259824
WB
2313+int vma_writer_close(VmaWriter *vmaw, Error **errp)
2314+{
2315+ g_assert(vmaw != NULL);
2316+
2317+ int i;
2318+
67af0fa4
WB
2319+ while (vmaw->co_writer) {
2320+ aio_poll(qemu_get_aio_context(), true);
95259824
WB
2321+ }
2322+
67af0fa4
WB
2323+ assert(vmaw->co_writer == NULL);
2324+
95259824
WB
2325+ if (vmaw->cmd) {
2326+ if (pclose(vmaw->cmd) < 0) {
2327+ vma_writer_set_error(vmaw, "vma_writer_close: "
2328+ "pclose failed - %s", g_strerror(errno));
2329+ }
2330+ } else {
2331+ if (close(vmaw->fd) < 0) {
2332+ vma_writer_set_error(vmaw, "vma_writer_close: "
2333+ "close failed - %s", g_strerror(errno));
2334+ }
2335+ }
2336+
2337+ for (i = 0; i <= 255; i++) {
2338+ VmaStreamInfo *si = &vmaw->stream_info[i];
2339+ if (si->size) {
2340+ if (!si->finished) {
2341+ vma_writer_set_error(vmaw, "vma_writer_close: "
2342+ "detected open stream '%s'", si->devname);
2343+ } else if ((si->transferred != si->size) &&
2344+ (i != vmaw->vmstate_stream)) {
2345+ vma_writer_set_error(vmaw, "vma_writer_close: "
2346+ "incomplete stream '%s' (%zd != %zd)",
2347+ si->devname, si->transferred, si->size);
2348+ }
2349+ }
2350+ }
2351+
2352+ for (i = 0; i <= 255; i++) {
2353+ vmaw->stream_info[i].finished = 1; /* mark as closed */
2354+ }
2355+
2356+ vmaw->closed = 1;
2357+
2358+ if (vmaw->status < 0 && *errp == NULL) {
2359+ error_setg(errp, "%s", vmaw->errmsg);
2360+ }
2361+
2362+ return vmaw->status;
2363+}
2364+
2365+void vma_writer_destroy(VmaWriter *vmaw)
2366+{
2367+ assert(vmaw);
2368+
2369+ int i;
2370+
2371+ for (i = 0; i <= 255; i++) {
2372+ if (vmaw->stream_info[i].devname) {
2373+ g_free(vmaw->stream_info[i].devname);
2374+ }
2375+ }
2376+
2377+ if (vmaw->md5csum) {
2378+ g_checksum_free(vmaw->md5csum);
2379+ }
2380+
95259824
WB
2381+ g_free(vmaw);
2382+}
2383diff --git a/vma.c b/vma.c
2384new file mode 100644
6838f038 2385index 0000000000..1b59fd1555
95259824
WB
2386--- /dev/null
2387+++ b/vma.c
6838f038 2388@@ -0,0 +1,756 @@
95259824
WB
2389+/*
2390+ * VMA: Virtual Machine Archive
2391+ *
2392+ * Copyright (C) 2012-2013 Proxmox Server Solutions
2393+ *
2394+ * Authors:
2395+ * Dietmar Maurer (dietmar@proxmox.com)
2396+ *
2397+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
2398+ * See the COPYING file in the top-level directory.
2399+ *
2400+ */
2401+
2402+#include "qemu/osdep.h"
2403+#include <glib.h>
2404+
2405+#include "vma.h"
2406+#include "qemu-common.h"
2407+#include "qemu/error-report.h"
2408+#include "qemu/main-loop.h"
a544966d 2409+#include "qapi/qmp/qstring.h"
67af0fa4 2410+#include "sysemu/block-backend.h"
95259824
WB
2411+
2412+static void help(void)
2413+{
2414+ const char *help_msg =
2415+ "usage: vma command [command options]\n"
2416+ "\n"
2417+ "vma list <filename>\n"
67af0fa4
WB
2418+ "vma config <filename> [-c config]\n"
2419+ "vma create <filename> [-c config] pathname ...\n"
95259824 2420+ "vma extract <filename> [-r <fifo>] <targetdir>\n"
67af0fa4 2421+ "vma verify <filename> [-v]\n"
95259824
WB
2422+ ;
2423+
2424+ printf("%s", help_msg);
2425+ exit(1);
2426+}
2427+
2428+static const char *extract_devname(const char *path, char **devname, int index)
2429+{
2430+ assert(path);
2431+
2432+ const char *sep = strchr(path, '=');
2433+
2434+ if (sep) {
2435+ *devname = g_strndup(path, sep - path);
2436+ path = sep + 1;
2437+ } else {
2438+ if (index >= 0) {
2439+ *devname = g_strdup_printf("disk%d", index);
2440+ } else {
2441+ *devname = NULL;
2442+ }
2443+ }
2444+
2445+ return path;
2446+}
2447+
2448+static void print_content(VmaReader *vmar)
2449+{
2450+ assert(vmar);
2451+
2452+ VmaHeader *head = vma_reader_get_header(vmar);
2453+
2454+ GList *l = vma_reader_get_config_data(vmar);
2455+ while (l && l->data) {
2456+ VmaConfigData *cdata = (VmaConfigData *)l->data;
2457+ l = g_list_next(l);
2458+ printf("CFG: size: %d name: %s\n", cdata->len, cdata->name);
2459+ }
2460+
2461+ int i;
2462+ VmaDeviceInfo *di;
2463+ for (i = 1; i < 255; i++) {
2464+ di = vma_reader_get_device_info(vmar, i);
2465+ if (di) {
2466+ if (strcmp(di->devname, "vmstate") == 0) {
2467+ printf("VMSTATE: dev_id=%d memory: %zd\n", i, di->size);
2468+ } else {
2469+ printf("DEV: dev_id=%d size: %zd devname: %s\n",
2470+ i, di->size, di->devname);
2471+ }
2472+ }
2473+ }
2474+ /* ctime is the last entry we print */
2475+ printf("CTIME: %s", ctime(&head->ctime));
2476+ fflush(stdout);
2477+}
2478+
2479+static int list_content(int argc, char **argv)
2480+{
2481+ int c, ret = 0;
2482+ const char *filename;
2483+
2484+ for (;;) {
2485+ c = getopt(argc, argv, "h");
2486+ if (c == -1) {
2487+ break;
2488+ }
2489+ switch (c) {
2490+ case '?':
2491+ case 'h':
2492+ help();
2493+ break;
2494+ default:
2495+ g_assert_not_reached();
2496+ }
2497+ }
2498+
2499+ /* Get the filename */
2500+ if ((optind + 1) != argc) {
2501+ help();
2502+ }
2503+ filename = argv[optind++];
2504+
2505+ Error *errp = NULL;
2506+ VmaReader *vmar = vma_reader_create(filename, &errp);
2507+
2508+ if (!vmar) {
2509+ g_error("%s", error_get_pretty(errp));
2510+ }
2511+
2512+ print_content(vmar);
2513+
2514+ vma_reader_destroy(vmar);
2515+
2516+ return ret;
2517+}
2518+
2519+typedef struct RestoreMap {
2520+ char *devname;
2521+ char *path;
67af0fa4 2522+ char *format;
95259824
WB
2523+ bool write_zero;
2524+} RestoreMap;
2525+
2526+static int extract_content(int argc, char **argv)
2527+{
2528+ int c, ret = 0;
2529+ int verbose = 0;
2530+ const char *filename;
2531+ const char *dirname;
2532+ const char *readmap = NULL;
2533+
2534+ for (;;) {
2535+ c = getopt(argc, argv, "hvr:");
2536+ if (c == -1) {
2537+ break;
2538+ }
2539+ switch (c) {
2540+ case '?':
2541+ case 'h':
2542+ help();
2543+ break;
2544+ case 'r':
2545+ readmap = optarg;
2546+ break;
2547+ case 'v':
2548+ verbose = 1;
2549+ break;
2550+ default:
2551+ help();
2552+ }
2553+ }
2554+
2555+ /* Get the filename */
2556+ if ((optind + 2) != argc) {
2557+ help();
2558+ }
2559+ filename = argv[optind++];
2560+ dirname = argv[optind++];
2561+
2562+ Error *errp = NULL;
2563+ VmaReader *vmar = vma_reader_create(filename, &errp);
2564+
2565+ if (!vmar) {
2566+ g_error("%s", error_get_pretty(errp));
2567+ }
2568+
2569+ if (mkdir(dirname, 0777) < 0) {
2570+ g_error("unable to create target directory %s - %s",
2571+ dirname, g_strerror(errno));
2572+ }
2573+
2574+ GList *l = vma_reader_get_config_data(vmar);
2575+ while (l && l->data) {
2576+ VmaConfigData *cdata = (VmaConfigData *)l->data;
2577+ l = g_list_next(l);
2578+ char *cfgfn = g_strdup_printf("%s/%s", dirname, cdata->name);
2579+ GError *err = NULL;
2580+ if (!g_file_set_contents(cfgfn, (gchar *)cdata->data, cdata->len,
2581+ &err)) {
2582+ g_error("unable to write file: %s", err->message);
2583+ }
2584+ }
2585+
2586+ GHashTable *devmap = g_hash_table_new(g_str_hash, g_str_equal);
2587+
2588+ if (readmap) {
2589+ print_content(vmar);
2590+
2591+ FILE *map = fopen(readmap, "r");
2592+ if (!map) {
2593+ g_error("unable to open fifo %s - %s", readmap, g_strerror(errno));
2594+ }
2595+
2596+ while (1) {
2597+ char inbuf[8192];
2598+ char *line = fgets(inbuf, sizeof(inbuf), map);
2599+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
2600+ break;
2601+ }
2602+ int len = strlen(line);
2603+ if (line[len - 1] == '\n') {
2604+ line[len - 1] = '\0';
2605+ if (len == 1) {
2606+ break;
2607+ }
2608+ }
2609+
67af0fa4
WB
2610+ char *format = NULL;
2611+ if (strncmp(line, "format=", sizeof("format=")-1) == 0) {
2612+ format = line + sizeof("format=")-1;
2613+ char *colon = strchr(format, ':');
2614+ if (!colon) {
2615+ g_error("read map failed - found only a format ('%s')", inbuf);
2616+ }
2617+ format = g_strndup(format, colon - format);
2618+ line = colon+1;
2619+ }
2620+
95259824
WB
2621+ const char *path;
2622+ bool write_zero;
2623+ if (line[0] == '0' && line[1] == ':') {
67af0fa4 2624+ path = line + 2;
95259824
WB
2625+ write_zero = false;
2626+ } else if (line[0] == '1' && line[1] == ':') {
67af0fa4 2627+ path = line + 2;
95259824
WB
2628+ write_zero = true;
2629+ } else {
2630+ g_error("read map failed - parse error ('%s')", inbuf);
2631+ }
2632+
2633+ char *devname = NULL;
2634+ path = extract_devname(path, &devname, -1);
2635+ if (!devname) {
2636+ g_error("read map failed - no dev name specified ('%s')",
2637+ inbuf);
2638+ }
2639+
2640+ RestoreMap *map = g_new0(RestoreMap, 1);
2641+ map->devname = g_strdup(devname);
2642+ map->path = g_strdup(path);
67af0fa4 2643+ map->format = format;
95259824
WB
2644+ map->write_zero = write_zero;
2645+
2646+ g_hash_table_insert(devmap, map->devname, map);
2647+
2648+ };
2649+ }
2650+
2651+ int i;
2652+ int vmstate_fd = -1;
2653+ guint8 vmstate_stream = 0;
2654+
67af0fa4
WB
2655+ BlockBackend *blk = NULL;
2656+
95259824
WB
2657+ for (i = 1; i < 255; i++) {
2658+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2659+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
2660+ vmstate_stream = i;
2661+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
2662+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
2663+ if (vmstate_fd < 0) {
2664+ g_error("create vmstate file '%s' failed - %s", statefn,
2665+ g_strerror(errno));
2666+ }
2667+ g_free(statefn);
2668+ } else if (di) {
2669+ char *devfn = NULL;
67af0fa4
WB
2670+ const char *format = NULL;
2671+ int flags = BDRV_O_RDWR | BDRV_O_NO_FLUSH;
95259824
WB
2672+ bool write_zero = true;
2673+
2674+ if (readmap) {
2675+ RestoreMap *map;
2676+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
2677+ if (map == NULL) {
2678+ g_error("no device name mapping for %s", di->devname);
2679+ }
2680+ devfn = map->path;
67af0fa4 2681+ format = map->format;
95259824
WB
2682+ write_zero = map->write_zero;
2683+ } else {
2684+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2685+ dirname, di->devname);
2686+ printf("DEVINFO %s %zd\n", devfn, di->size);
2687+
2688+ bdrv_img_create(devfn, "raw", NULL, NULL, NULL, di->size,
6838f038 2689+ flags, true, &errp);
95259824
WB
2690+ if (errp) {
2691+ g_error("can't create file %s: %s", devfn,
2692+ error_get_pretty(errp));
2693+ }
2694+
2695+ /* Note: we created an empty file above, so there is no
2696+ * need to write zeroes (so we generate a sparse file)
2697+ */
2698+ write_zero = false;
2699+ }
2700+
67af0fa4
WB
2701+ size_t devlen = strlen(devfn);
2702+ QDict *options = NULL;
2703+ if (format) {
2704+ /* explicit format from commandline */
2705+ options = qdict_new();
2706+ qdict_put(options, "driver", qstring_from_str(format));
2707+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
2708+ strncmp(devfn, "/dev/", 5) == 0)
2709+ {
2710+ /* This part is now deprecated for PVE as well (just as qemu
2711+ * deprecated not specifying an explicit raw format, too.
2712+ */
2713+ /* explicit raw format */
2714+ options = qdict_new();
2715+ qdict_put(options, "driver", qstring_from_str("raw"));
2716+ }
2717+
2718+
2719+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
95259824
WB
2720+ g_error("can't open file %s - %s", devfn,
2721+ error_get_pretty(errp));
2722+ }
67af0fa4
WB
2723+
2724+ if (vma_reader_register_bs(vmar, i, blk, write_zero, &errp) < 0) {
95259824
WB
2725+ g_error("%s", error_get_pretty(errp));
2726+ }
2727+
2728+ if (!readmap) {
2729+ g_free(devfn);
2730+ }
2731+ }
2732+ }
2733+
2734+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
2735+ g_error("restore failed - %s", error_get_pretty(errp));
2736+ }
2737+
2738+ if (!readmap) {
2739+ for (i = 1; i < 255; i++) {
2740+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2741+ if (di && (i != vmstate_stream)) {
2742+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2743+ dirname, di->devname);
2744+ char *fn = g_strdup_printf("%s/disk-%s.raw",
2745+ dirname, di->devname);
2746+ if (rename(tmpfn, fn) != 0) {
2747+ g_error("rename %s to %s failed - %s",
2748+ tmpfn, fn, g_strerror(errno));
2749+ }
2750+ }
2751+ }
2752+ }
2753+
2754+ vma_reader_destroy(vmar);
2755+
67af0fa4
WB
2756+ blk_unref(blk);
2757+
2758+ bdrv_close_all();
2759+
2760+ return ret;
2761+}
2762+
2763+static int verify_content(int argc, char **argv)
2764+{
2765+ int c, ret = 0;
2766+ int verbose = 0;
2767+ const char *filename;
2768+
2769+ for (;;) {
2770+ c = getopt(argc, argv, "hv");
2771+ if (c == -1) {
2772+ break;
2773+ }
2774+ switch (c) {
2775+ case '?':
2776+ case 'h':
2777+ help();
2778+ break;
2779+ case 'v':
2780+ verbose = 1;
2781+ break;
2782+ default:
2783+ help();
2784+ }
2785+ }
2786+
2787+ /* Get the filename */
2788+ if ((optind + 1) != argc) {
2789+ help();
2790+ }
2791+ filename = argv[optind++];
2792+
2793+ Error *errp = NULL;
2794+ VmaReader *vmar = vma_reader_create(filename, &errp);
2795+
2796+ if (!vmar) {
2797+ g_error("%s", error_get_pretty(errp));
2798+ }
2799+
2800+ if (verbose) {
2801+ print_content(vmar);
2802+ }
2803+
2804+ if (vma_reader_verify(vmar, verbose, &errp) < 0) {
2805+ g_error("verify failed - %s", error_get_pretty(errp));
2806+ }
2807+
2808+ vma_reader_destroy(vmar);
2809+
95259824
WB
2810+ bdrv_close_all();
2811+
2812+ return ret;
2813+}
2814+
2815+typedef struct BackupJob {
67af0fa4 2816+ BlockBackend *target;
95259824
WB
2817+ int64_t len;
2818+ VmaWriter *vmaw;
2819+ uint8_t dev_id;
2820+} BackupJob;
2821+
2822+#define BACKUP_SECTORS_PER_CLUSTER (VMA_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
2823+
67af0fa4
WB
2824+static void coroutine_fn backup_run_empty(void *opaque)
2825+{
2826+ VmaWriter *vmaw = (VmaWriter *)opaque;
2827+
2828+ vma_writer_flush_output(vmaw);
2829+
2830+ Error *err = NULL;
2831+ if (vma_writer_close(vmaw, &err) != 0) {
2832+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2833+ }
2834+}
2835+
95259824
WB
2836+static void coroutine_fn backup_run(void *opaque)
2837+{
2838+ BackupJob *job = (BackupJob *)opaque;
2839+ struct iovec iov;
2840+ QEMUIOVector qiov;
2841+
2842+ int64_t start, end;
2843+ int ret = 0;
2844+
67af0fa4 2845+ unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
95259824
WB
2846+
2847+ start = 0;
2848+ end = DIV_ROUND_UP(job->len / BDRV_SECTOR_SIZE,
2849+ BACKUP_SECTORS_PER_CLUSTER);
2850+
2851+ for (; start < end; start++) {
2852+ iov.iov_base = buf;
2853+ iov.iov_len = VMA_CLUSTER_SIZE;
2854+ qemu_iovec_init_external(&qiov, &iov, 1);
2855+
67af0fa4
WB
2856+ ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
2857+ VMA_CLUSTER_SIZE, &qiov, 0);
95259824
WB
2858+ if (ret < 0) {
2859+ vma_writer_set_error(job->vmaw, "read error", -1);
2860+ goto out;
2861+ }
2862+
2863+ size_t zb = 0;
2864+ if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
2865+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed", -1);
2866+ goto out;
2867+ }
2868+ }
2869+
2870+
2871+out:
2872+ if (vma_writer_close_stream(job->vmaw, job->dev_id) <= 0) {
2873+ Error *err = NULL;
2874+ if (vma_writer_close(job->vmaw, &err) != 0) {
2875+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2876+ }
2877+ }
2878+}
2879+
2880+static int create_archive(int argc, char **argv)
2881+{
2882+ int i, c;
2883+ int verbose = 0;
2884+ const char *archivename;
2885+ GList *config_files = NULL;
2886+
2887+ for (;;) {
2888+ c = getopt(argc, argv, "hvc:");
2889+ if (c == -1) {
2890+ break;
2891+ }
2892+ switch (c) {
2893+ case '?':
2894+ case 'h':
2895+ help();
2896+ break;
2897+ case 'c':
2898+ config_files = g_list_append(config_files, optarg);
2899+ break;
2900+ case 'v':
2901+ verbose = 1;
2902+ break;
2903+ default:
2904+ g_assert_not_reached();
2905+ }
2906+ }
2907+
2908+
67af0fa4
WB
2909+ /* make sure we an archive name */
2910+ if ((optind + 1) > argc) {
95259824
WB
2911+ help();
2912+ }
2913+
2914+ archivename = argv[optind++];
2915+
2916+ uuid_t uuid;
2917+ uuid_generate(uuid);
2918+
2919+ Error *local_err = NULL;
2920+ VmaWriter *vmaw = vma_writer_create(archivename, uuid, &local_err);
2921+
2922+ if (vmaw == NULL) {
2923+ g_error("%s", error_get_pretty(local_err));
2924+ }
2925+
2926+ GList *l = config_files;
2927+ while (l && l->data) {
2928+ char *name = l->data;
2929+ char *cdata = NULL;
2930+ gsize clen = 0;
2931+ GError *err = NULL;
2932+ if (!g_file_get_contents(name, &cdata, &clen, &err)) {
2933+ unlink(archivename);
2934+ g_error("Unable to read file: %s", err->message);
2935+ }
2936+
2937+ if (vma_writer_add_config(vmaw, name, cdata, clen) != 0) {
2938+ unlink(archivename);
2939+ g_error("Unable to append config data %s (len = %zd)",
2940+ name, clen);
2941+ }
2942+ l = g_list_next(l);
2943+ }
2944+
67af0fa4 2945+ int devcount = 0;
95259824
WB
2946+ while (optind < argc) {
2947+ const char *path = argv[optind++];
2948+ char *devname = NULL;
67af0fa4 2949+ path = extract_devname(path, &devname, devcount++);
95259824
WB
2950+
2951+ Error *errp = NULL;
67af0fa4 2952+ BlockBackend *target;
95259824 2953+
67af0fa4
WB
2954+ target = blk_new_open(path, NULL, NULL, 0, &errp);
2955+ if (!target) {
95259824
WB
2956+ unlink(archivename);
2957+ g_error("bdrv_open '%s' failed - %s", path, error_get_pretty(errp));
2958+ }
67af0fa4 2959+ int64_t size = blk_getlength(target);
95259824
WB
2960+ int dev_id = vma_writer_register_stream(vmaw, devname, size);
2961+ if (dev_id <= 0) {
2962+ unlink(archivename);
2963+ g_error("vma_writer_register_stream '%s' failed", devname);
2964+ }
2965+
2966+ BackupJob *job = g_new0(BackupJob, 1);
2967+ job->len = size;
67af0fa4 2968+ job->target = target;
95259824
WB
2969+ job->vmaw = vmaw;
2970+ job->dev_id = dev_id;
2971+
2972+ Coroutine *co = qemu_coroutine_create(backup_run, job);
2973+ qemu_coroutine_enter(co);
2974+ }
2975+
2976+ VmaStatus vmastat;
2977+ int percent = 0;
2978+ int last_percent = -1;
2979+
67af0fa4
WB
2980+ if (devcount) {
2981+ while (1) {
2982+ main_loop_wait(false);
2983+ vma_writer_get_status(vmaw, &vmastat);
95259824 2984+
67af0fa4 2985+ if (verbose) {
95259824 2986+
67af0fa4
WB
2987+ uint64_t total = 0;
2988+ uint64_t transferred = 0;
2989+ uint64_t zero_bytes = 0;
95259824 2990+
67af0fa4
WB
2991+ int i;
2992+ for (i = 0; i < 256; i++) {
2993+ if (vmastat.stream_info[i].size) {
2994+ total += vmastat.stream_info[i].size;
2995+ transferred += vmastat.stream_info[i].transferred;
2996+ zero_bytes += vmastat.stream_info[i].zero_bytes;
2997+ }
95259824 2998+ }
67af0fa4
WB
2999+ percent = (transferred*100)/total;
3000+ if (percent != last_percent) {
3001+ fprintf(stderr, "progress %d%% %zd/%zd %zd\n", percent,
3002+ transferred, total, zero_bytes);
3003+ fflush(stderr);
95259824 3004+
67af0fa4
WB
3005+ last_percent = percent;
3006+ }
95259824 3007+ }
95259824 3008+
67af0fa4
WB
3009+ if (vmastat.closed) {
3010+ break;
3011+ }
95259824
WB
3012+ }
3013+ } else {
3014+ Coroutine *co = qemu_coroutine_create(backup_run_empty, vmaw);
3015+ qemu_coroutine_enter(co);
3016+ while (1) {
3017+ main_loop_wait(false);
3018+ vma_writer_get_status(vmaw, &vmastat);
3019+ if (vmastat.closed) {
3020+ break;
3021+ }
3022+ }
3023+ }
3024+
3025+ bdrv_drain_all();
3026+
3027+ vma_writer_get_status(vmaw, &vmastat);
3028+
3029+ if (verbose) {
3030+ for (i = 0; i < 256; i++) {
3031+ VmaStreamInfo *si = &vmastat.stream_info[i];
3032+ if (si->size) {
3033+ fprintf(stderr, "image %s: size=%zd zeros=%zd saved=%zd\n",
3034+ si->devname, si->size, si->zero_bytes,
3035+ si->size - si->zero_bytes);
3036+ }
3037+ }
3038+ }
3039+
3040+ if (vmastat.status < 0) {
3041+ unlink(archivename);
3042+ g_error("creating vma archive failed");
3043+ }
3044+
3045+ return 0;
3046+}
3047+
67af0fa4
WB
3048+static int dump_config(int argc, char **argv)
3049+{
3050+ int c, ret = 0;
3051+ const char *filename;
3052+ const char *config_name = "qemu-server.conf";
3053+
3054+ for (;;) {
3055+ c = getopt(argc, argv, "hc:");
3056+ if (c == -1) {
3057+ break;
3058+ }
3059+ switch (c) {
3060+ case '?':
3061+ case 'h':
3062+ help();
3063+ break;
3064+ case 'c':
3065+ config_name = optarg;
3066+ break;
3067+ default:
3068+ help();
3069+ }
3070+ }
3071+
3072+ /* Get the filename */
3073+ if ((optind + 1) != argc) {
3074+ help();
3075+ }
3076+ filename = argv[optind++];
3077+
3078+ Error *errp = NULL;
3079+ VmaReader *vmar = vma_reader_create(filename, &errp);
3080+
3081+ if (!vmar) {
3082+ g_error("%s", error_get_pretty(errp));
3083+ }
3084+
3085+ int found = 0;
3086+ GList *l = vma_reader_get_config_data(vmar);
3087+ while (l && l->data) {
3088+ VmaConfigData *cdata = (VmaConfigData *)l->data;
3089+ l = g_list_next(l);
3090+ if (strcmp(cdata->name, config_name) == 0) {
3091+ found = 1;
3092+ fwrite(cdata->data, cdata->len, 1, stdout);
3093+ break;
3094+ }
3095+ }
3096+
3097+ vma_reader_destroy(vmar);
3098+
3099+ bdrv_close_all();
3100+
3101+ if (!found) {
3102+ fprintf(stderr, "unable to find configuration data '%s'\n", config_name);
3103+ return -1;
3104+ }
3105+
3106+ return ret;
3107+}
3108+
95259824
WB
3109+int main(int argc, char **argv)
3110+{
3111+ const char *cmdname;
3112+ Error *main_loop_err = NULL;
3113+
3114+ error_set_progname(argv[0]);
3115+
3116+ if (qemu_init_main_loop(&main_loop_err)) {
3117+ g_error("%s", error_get_pretty(main_loop_err));
3118+ }
3119+
3120+ bdrv_init();
3121+
3122+ if (argc < 2) {
3123+ help();
3124+ }
3125+
3126+ cmdname = argv[1];
3127+ argc--; argv++;
3128+
3129+
3130+ if (!strcmp(cmdname, "list")) {
3131+ return list_content(argc, argv);
3132+ } else if (!strcmp(cmdname, "create")) {
3133+ return create_archive(argc, argv);
3134+ } else if (!strcmp(cmdname, "extract")) {
3135+ return extract_content(argc, argv);
67af0fa4
WB
3136+ } else if (!strcmp(cmdname, "verify")) {
3137+ return verify_content(argc, argv);
3138+ } else if (!strcmp(cmdname, "config")) {
3139+ return dump_config(argc, argv);
95259824
WB
3140+ }
3141+
3142+ help();
3143+ return 0;
3144+}
3145diff --git a/vma.h b/vma.h
3146new file mode 100644
6838f038 3147index 0000000000..c895c97f6d
95259824
WB
3148--- /dev/null
3149+++ b/vma.h
6838f038 3150@@ -0,0 +1,150 @@
95259824
WB
3151+/*
3152+ * VMA: Virtual Machine Archive
3153+ *
3154+ * Copyright (C) Proxmox Server Solutions
3155+ *
3156+ * Authors:
3157+ * Dietmar Maurer (dietmar@proxmox.com)
3158+ *
3159+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
3160+ * See the COPYING file in the top-level directory.
3161+ *
3162+ */
3163+
3164+#ifndef BACKUP_VMA_H
3165+#define BACKUP_VMA_H
3166+
3167+#include <uuid/uuid.h>
3168+#include "qapi/error.h"
3169+#include "block/block.h"
3170+
3171+#define VMA_BLOCK_BITS 12
3172+#define VMA_BLOCK_SIZE (1<<VMA_BLOCK_BITS)
3173+#define VMA_CLUSTER_BITS (VMA_BLOCK_BITS+4)
3174+#define VMA_CLUSTER_SIZE (1<<VMA_CLUSTER_BITS)
3175+
3176+#if VMA_CLUSTER_SIZE != 65536
3177+#error unexpected cluster size
3178+#endif
3179+
3180+#define VMA_EXTENT_HEADER_SIZE 512
3181+#define VMA_BLOCKS_PER_EXTENT 59
3182+#define VMA_MAX_CONFIGS 256
3183+
3184+#define VMA_MAX_EXTENT_SIZE \
3185+ (VMA_EXTENT_HEADER_SIZE+VMA_CLUSTER_SIZE*VMA_BLOCKS_PER_EXTENT)
3186+#if VMA_MAX_EXTENT_SIZE != 3867136
3187+#error unexpected VMA_EXTENT_SIZE
3188+#endif
3189+
3190+/* File Format Definitions */
3191+
3192+#define VMA_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|0x00))
3193+#define VMA_EXTENT_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|'E'))
3194+
3195+typedef struct VmaDeviceInfoHeader {
3196+ uint32_t devname_ptr; /* offset into blob_buffer table */
3197+ uint32_t reserved0;
3198+ uint64_t size; /* device size in bytes */
3199+ uint64_t reserved1;
3200+ uint64_t reserved2;
3201+} VmaDeviceInfoHeader;
3202+
3203+typedef struct VmaHeader {
3204+ uint32_t magic;
3205+ uint32_t version;
3206+ unsigned char uuid[16];
3207+ int64_t ctime;
3208+ unsigned char md5sum[16];
3209+
3210+ uint32_t blob_buffer_offset;
3211+ uint32_t blob_buffer_size;
3212+ uint32_t header_size;
3213+
3214+ unsigned char reserved[1984];
3215+
3216+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
3217+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
3218+
3219+ uint32_t reserved1;
3220+
3221+ VmaDeviceInfoHeader dev_info[256];
3222+} VmaHeader;
3223+
3224+typedef struct VmaExtentHeader {
3225+ uint32_t magic;
3226+ uint16_t reserved1;
3227+ uint16_t block_count;
3228+ unsigned char uuid[16];
3229+ unsigned char md5sum[16];
3230+ uint64_t blockinfo[VMA_BLOCKS_PER_EXTENT];
3231+} VmaExtentHeader;
3232+
3233+/* functions/definitions to read/write vma files */
3234+
3235+typedef struct VmaReader VmaReader;
3236+
3237+typedef struct VmaWriter VmaWriter;
3238+
3239+typedef struct VmaConfigData {
3240+ const char *name;
3241+ const void *data;
3242+ uint32_t len;
3243+} VmaConfigData;
3244+
3245+typedef struct VmaStreamInfo {
3246+ uint64_t size;
3247+ uint64_t cluster_count;
3248+ uint64_t transferred;
3249+ uint64_t zero_bytes;
3250+ int finished;
3251+ char *devname;
3252+} VmaStreamInfo;
3253+
3254+typedef struct VmaStatus {
3255+ int status;
3256+ bool closed;
3257+ char errmsg[8192];
3258+ char uuid_str[37];
3259+ VmaStreamInfo stream_info[256];
3260+} VmaStatus;
3261+
3262+typedef struct VmaDeviceInfo {
3263+ uint64_t size; /* device size in bytes */
3264+ const char *devname;
3265+} VmaDeviceInfo;
3266+
3267+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp);
3268+int vma_writer_close(VmaWriter *vmaw, Error **errp);
67af0fa4 3269+void vma_writer_error_propagate(VmaWriter *vmaw, Error **errp);
95259824
WB
3270+void vma_writer_destroy(VmaWriter *vmaw);
3271+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
3272+ size_t len);
3273+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
3274+ size_t size);
3275+
3276+int64_t coroutine_fn vma_writer_write(VmaWriter *vmaw, uint8_t dev_id,
6838f038
WB
3277+ int64_t cluster_num,
3278+ const unsigned char *buf,
95259824
WB
3279+ size_t *zero_bytes);
3280+
3281+int coroutine_fn vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id);
67af0fa4 3282+int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
95259824
WB
3283+
3284+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
3285+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
3286+
3287+
3288+VmaReader *vma_reader_create(const char *filename, Error **errp);
3289+void vma_reader_destroy(VmaReader *vmar);
3290+VmaHeader *vma_reader_get_header(VmaReader *vmar);
3291+GList *vma_reader_get_config_data(VmaReader *vmar);
3292+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
3293+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
67af0fa4 3294+ BlockBackend *target, bool write_zeroes,
95259824
WB
3295+ Error **errp);
3296+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
3297+ Error **errp);
67af0fa4 3298+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);
95259824
WB
3299+
3300+#endif /* BACKUP_VMA_H */
3301--
45169293 33022.11.0
95259824 3303