]> git.proxmox.com Git - pve-qemu-kvm.git/blame - debian/patches/0004-introduce-new-vma-archive-format.patch
update to qemu 1.3 final
[pve-qemu-kvm.git] / debian / patches / 0004-introduce-new-vma-archive-format.patch
CommitLineData
efa8e5de 1From 12a179d66c3ead14c821dbc570f2448c8c8b355f Mon Sep 17 00:00:00 2001
5ad5891c
DM
2From: Dietmar Maurer <dietmar@proxmox.com>
3Date: Tue, 13 Nov 2012 11:11:38 +0100
4Subject: [PATCH v3 4/6] introduce new vma archive format
5
6This is a very simple archive format, see docs/specs/vma_spec.txt
7
8Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
9---
10 Makefile | 3 +-
11 Makefile.objs | 2 +-
12 blockdev.c | 6 +-
13 docs/specs/vma_spec.txt | 24 ++
14 vma-reader.c | 772 ++++++++++++++++++++++++++++++++++++++++
efa8e5de
DM
15 vma-writer.c | 907 +++++++++++++++++++++++++++++++++++++++++++++++
16 vma.c | 550 ++++++++++++++++++++++++++++
5ad5891c 17 vma.h | 145 ++++++++
efa8e5de 18 8 files changed, 2405 insertions(+), 4 deletions(-)
5ad5891c
DM
19 create mode 100644 docs/specs/vma_spec.txt
20 create mode 100644 vma-reader.c
21 create mode 100644 vma-writer.c
22 create mode 100644 vma.c
23 create mode 100644 vma.h
24
25diff --git a/Makefile b/Makefile
26index 9ecbcbb..30a9268 100644
27--- a/Makefile
28+++ b/Makefile
29@@ -100,7 +100,7 @@ defconfig:
30
31 -include config-all-devices.mak
32
33-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
34+all: $(DOCS) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all
35
36 config-host.h: config-host.h-timestamp
37 config-host.h-timestamp: config-host.mak
38@@ -194,6 +194,7 @@ tools-obj-$(CONFIG_POSIX) += compatfd.o
39 qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) libqemustub.a
40 qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y) libqemustub.a
41 qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) libqemustub.a
42+vma$(EXESUF): vma.o vma-writer.o vma-reader.o $(tools-obj-y) $(block-obj-y) libqemustub.a
43
44 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
45
46diff --git a/Makefile.objs b/Makefile.objs
47index cb46be5..b5732e2 100644
48--- a/Makefile.objs
49+++ b/Makefile.objs
50@@ -48,7 +48,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
51 block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
52 block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
53 block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
54-block-obj-y += backup.o
55+block-obj-y += vma-writer.o backup.o
56 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
57 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
58 block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
59diff --git a/blockdev.c b/blockdev.c
efa8e5de 60index 9e85ffe..139e350 100644
5ad5891c
DM
61--- a/blockdev.c
62+++ b/blockdev.c
63@@ -21,6 +21,7 @@
64 #include "trace.h"
65 #include "arch_init.h"
66 #include "backup.h"
67+#include "vma.h"
68
69 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
70
71@@ -1483,10 +1484,11 @@ char *qmp_backup(const char *backupfile, bool has_format, const char *format,
72 /* Todo: try to auto-detect format based on file name */
73 format = has_format ? format : "vma";
74
75- /* fixme: find driver for specifued format */
76 const BackupDriver *driver = NULL;
77
78- if (!driver) {
79+ if (strcmp(format, "vma") == 0) {
80+ driver = &backup_vma_driver;
81+ } else {
82 error_set(errp, ERROR_CLASS_GENERIC_ERROR,
83 "no backup driver for format '%s'", format);
84 return NULL;
85diff --git a/docs/specs/vma_spec.txt b/docs/specs/vma_spec.txt
86new file mode 100644
87index 0000000..052c629
88--- /dev/null
89+++ b/docs/specs/vma_spec.txt
90@@ -0,0 +1,24 @@
91+=Virtual Machine Archive format (VMA)=
92+
93+This format contains a header which includes the VM configuration as
94+binary blobs, and a list of devices (dev_id, name).
95+
96+The actual VM image data is stored inside extents. An extent contains
97+up to 64 clusters, and start with a 512 byte header containing
98+additional information for those clusters.
99+
100+We use a cluster size of 65536, and use 8 bytes for each
101+cluster in the header to store the following information:
102+
103+* 1 byte dev_id (to identity the drive)
104+* 2 bytes zero indicator (mark zero regions (16x4096))
105+* 4 bytes cluster number
106+* 1 byte not used (reserved)
107+
108+We only store non-zero blocks (such block is 4096 bytes).
109+
110+Each archive is marked with a uuid. The archive header and all
111+extent headers includes that uuid and a MD5 checksum (over header
112+data).
113+
114+
115diff --git a/vma-reader.c b/vma-reader.c
116new file mode 100644
117index 0000000..154c96b
118--- /dev/null
119+++ b/vma-reader.c
120@@ -0,0 +1,772 @@
121+/*
122+ * VMA: Virtual Machine Archive
123+ *
124+ * Copyright (C) 2012 Proxmox Server Solutions
125+ *
126+ * Authors:
127+ * Dietmar Maurer (dietmar@proxmox.com)
128+ *
129+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
130+ * See the COPYING file in the top-level directory.
131+ *
132+ */
133+
134+#include <stdio.h>
135+#include <errno.h>
136+#include <unistd.h>
137+#include <stdio.h>
138+#include <string.h>
139+#include <sys/types.h>
140+#include <sys/stat.h>
141+#include <fcntl.h>
142+#include <glib.h>
143+#include <uuid/uuid.h>
144+
145+#include "qemu-common.h"
146+#include "qemu_socket.h"
147+#include "qemu-coroutine.h"
148+#include "qemu-aio.h"
149+#include "qemu/ratelimit.h"
150+#include "vma.h"
151+#include "block.h"
152+
153+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
154+
155+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
156+
157+typedef struct VmaRestoreState {
158+ BlockDriverState *bs;
159+ bool write_zeroes;
160+ unsigned long *bitmap;
161+ int bitmap_size;
162+} VmaRestoreState;
163+
164+struct VmaReader {
165+ int fd;
166+ GChecksum *md5csum;
167+ GHashTable *blob_hash;
168+ unsigned char *head_data;
169+ VmaDeviceInfo devinfo[256];
170+ VmaRestoreState rstate[256];
171+ GList *cdata_list;
172+ guint8 vmstate_stream;
173+ uint32_t vmstate_clusters;
174+};
175+
176+static guint
177+g_int32_hash(gconstpointer v)
178+{
179+ return *(const uint32_t *)v;
180+}
181+
182+static gboolean
183+g_int32_equal(gconstpointer v1, gconstpointer v2)
184+{
185+ return *((const uint32_t *)v1) == *((const uint32_t *)v2);
186+}
187+
188+static int vma_reader_get_bitmap(VmaRestoreState *rstate, int64_t cluster_num)
189+{
190+ assert(rstate);
191+ assert(rstate->bitmap);
192+
193+ unsigned long val, idx, bit;
194+
195+ idx = cluster_num / BITS_PER_LONG;
196+
197+ assert(rstate->bitmap_size > idx);
198+
199+ bit = cluster_num % BITS_PER_LONG;
200+ val = rstate->bitmap[idx];
201+
202+ return !!(val & (1UL << bit));
203+}
204+
205+static void vma_reader_set_bitmap(VmaRestoreState *rstate, int64_t cluster_num,
206+ int dirty)
207+{
208+ assert(rstate);
209+ assert(rstate->bitmap);
210+
211+ unsigned long val, idx, bit;
212+
213+ idx = cluster_num / BITS_PER_LONG;
214+
215+ assert(rstate->bitmap_size > idx);
216+
217+ bit = cluster_num % BITS_PER_LONG;
218+ val = rstate->bitmap[idx];
219+ if (dirty) {
220+ if (!(val & (1UL << bit))) {
221+ val |= 1UL << bit;
222+ }
223+ } else {
224+ if (val & (1UL << bit)) {
225+ val &= ~(1UL << bit);
226+ }
227+ }
228+ rstate->bitmap[idx] = val;
229+}
230+
231+typedef struct VmaBlob {
232+ uint32_t start;
233+ uint32_t len;
234+ void *data;
235+} VmaBlob;
236+
237+static const VmaBlob *get_header_blob(VmaReader *vmar, uint32_t pos)
238+{
239+ assert(vmar);
240+ assert(vmar->blob_hash);
241+
242+ return g_hash_table_lookup(vmar->blob_hash, &pos);
243+}
244+
245+static const char *get_header_str(VmaReader *vmar, uint32_t pos)
246+{
247+ const VmaBlob *blob = get_header_blob(vmar, pos);
248+ if (!blob) {
249+ return NULL;
250+ }
251+ const char *res = (char *)blob->data;
252+ if (res[blob->len-1] != '\0') {
253+ return NULL;
254+ }
255+ return res;
256+}
257+
258+static ssize_t
259+safe_read(int fd, unsigned char *buf, size_t count)
260+{
261+ ssize_t n;
262+
263+ do {
264+ n = read(fd, buf, count);
265+ } while (n < 0 && errno == EINTR);
266+
267+ return n;
268+}
269+
270+static ssize_t
271+full_read(int fd, unsigned char *buf, size_t len)
272+{
273+ ssize_t n;
274+ size_t total;
275+
276+ total = 0;
277+
278+ while (len > 0) {
279+ n = safe_read(fd, buf, len);
280+
281+ if (n == 0) {
282+ return total;
283+ }
284+
285+ if (n <= 0) {
286+ break;
287+ }
288+
289+ buf += n;
290+ total += n;
291+ len -= n;
292+ }
293+
294+ if (len) {
295+ return -1;
296+ }
297+
298+ return total;
299+}
300+
301+void vma_reader_destroy(VmaReader *vmar)
302+{
303+ assert(vmar);
304+
305+ if (vmar->fd >= 0) {
306+ close(vmar->fd);
307+ }
308+
309+ if (vmar->cdata_list) {
310+ g_list_free(vmar->cdata_list);
311+ }
312+
313+ int i;
314+ for (i = 1; i < 256; i++) {
315+ if (vmar->rstate[i].bitmap) {
316+ g_free(vmar->rstate[i].bitmap);
317+ }
318+ }
319+
320+ if (vmar->md5csum) {
321+ g_checksum_free(vmar->md5csum);
322+ }
323+
324+ if (vmar->blob_hash) {
325+ g_hash_table_destroy(vmar->blob_hash);
326+ }
327+
328+ if (vmar->head_data) {
329+ g_free(vmar->head_data);
330+ }
331+
332+ g_free(vmar);
333+
334+};
335+
336+static int vma_reader_read_head(VmaReader *vmar, Error **errp)
337+{
338+ assert(vmar);
339+ assert(errp);
340+ assert(*errp == NULL);
341+
342+ unsigned char md5sum[16];
343+ int i;
344+ int ret = 0;
345+
346+ vmar->head_data = g_malloc(sizeof(VmaHeader));
347+
348+ if (full_read(vmar->fd, vmar->head_data, sizeof(VmaHeader)) !=
349+ sizeof(VmaHeader)) {
350+ error_setg(errp, "can't read vma header - %s",
351+ errno ? strerror(errno) : "got EOF");
352+ return -1;
353+ }
354+
355+ VmaHeader *h = (VmaHeader *)vmar->head_data;
356+
357+ if (h->magic != VMA_MAGIC) {
358+ error_setg(errp, "not a vma file - wrong magic number");
359+ return -1;
360+ }
361+
362+ uint32_t header_size = GUINT32_FROM_BE(h->header_size);
363+ int need = header_size - sizeof(VmaHeader);
364+ if (need <= 0) {
365+ error_setg(errp, "wrong vma header size %d", header_size);
366+ return -1;
367+ }
368+
369+ vmar->head_data = g_realloc(vmar->head_data, header_size);
370+ h = (VmaHeader *)vmar->head_data;
371+
372+ if (full_read(vmar->fd, vmar->head_data + sizeof(VmaHeader), need) !=
373+ need) {
374+ error_setg(errp, "can't read vma header data - %s",
375+ errno ? strerror(errno) : "got EOF");
376+ return -1;
377+ }
378+
379+ memcpy(md5sum, h->md5sum, 16);
380+ memset(h->md5sum, 0, 16);
381+
382+ g_checksum_reset(vmar->md5csum);
383+ g_checksum_update(vmar->md5csum, vmar->head_data, header_size);
384+ gsize csize = 16;
385+ g_checksum_get_digest(vmar->md5csum, (guint8 *)(h->md5sum), &csize);
386+
387+ if (memcmp(md5sum, h->md5sum, 16) != 0) {
388+ error_setg(errp, "wrong vma header chechsum");
389+ return -1;
390+ }
391+
392+ /* we can modify header data after checksum verify */
393+ h->header_size = header_size;
394+
395+ h->version = GUINT32_FROM_BE(h->version);
396+ if (h->version != 1) {
397+ error_setg(errp, "wrong vma version %d", h->version);
398+ return -1;
399+ }
400+
401+ h->ctime = GUINT64_FROM_BE(h->ctime);
402+ h->blob_buffer_offset = GUINT32_FROM_BE(h->blob_buffer_offset);
403+ h->blob_buffer_size = GUINT32_FROM_BE(h->blob_buffer_size);
404+
405+ uint32_t bstart = h->blob_buffer_offset + 1;
406+ uint32_t bend = h->blob_buffer_offset + h->blob_buffer_size;
407+
408+ if (bstart <= sizeof(VmaHeader)) {
409+ error_setg(errp, "wrong vma blob buffer offset %d",
410+ h->blob_buffer_offset);
411+ return -1;
412+ }
413+
414+ if (bend > header_size) {
415+ error_setg(errp, "wrong vma blob buffer size %d/%d",
416+ h->blob_buffer_offset, h->blob_buffer_size);
417+ return -1;
418+ }
419+
420+ while ((bstart + 2) <= bend) {
421+ uint32_t size = vmar->head_data[bstart] +
422+ (vmar->head_data[bstart+1] << 8);
423+ if ((bstart + size + 2) <= bend) {
424+ VmaBlob *blob = g_new0(VmaBlob, 1);
425+ blob->start = bstart - h->blob_buffer_offset;
426+ blob->len = size;
427+ blob->data = vmar->head_data + bstart + 2;
428+ g_hash_table_insert(vmar->blob_hash, &blob->start, blob);
429+ }
430+ bstart += size + 2;
431+ }
432+
433+
434+ int count = 0;
435+ for (i = 1; i < 256; i++) {
436+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
437+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
438+ uint64_t size = GUINT64_FROM_BE(dih->size);
439+ const char *devname = get_header_str(vmar, devname_ptr);
440+
441+ if (size && devname) {
442+ count++;
443+ vmar->devinfo[i].size = size;
444+ vmar->devinfo[i].devname = devname;
445+
446+ if (strcmp(devname, "vmstate") == 0) {
447+ vmar->vmstate_stream = i;
448+ }
449+ }
450+ }
451+
452+ if (!count) {
453+ error_setg(errp, "vma does not contain data");
454+ return -1;
455+ }
456+
457+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
458+ uint32_t name_ptr = GUINT32_FROM_BE(h->config_names[i]);
459+ uint32_t data_ptr = GUINT32_FROM_BE(h->config_data[i]);
460+
461+ if (!(name_ptr && data_ptr)) {
462+ continue;
463+ }
464+ const char *name = get_header_str(vmar, name_ptr);
465+ const VmaBlob *blob = get_header_blob(vmar, data_ptr);
466+
467+ if (!(name && blob)) {
468+ error_setg(errp, "vma contains invalid data pointers");
469+ return -1;
470+ }
471+
472+ VmaConfigData *cdata = g_new0(VmaConfigData, 1);
473+ cdata->name = name;
474+ cdata->data = blob->data;
475+ cdata->len = blob->len;
476+
477+ vmar->cdata_list = g_list_append(vmar->cdata_list, cdata);
478+ }
479+
480+ return ret;
481+};
482+
483+VmaReader *vma_reader_create(const char *filename, Error **errp)
484+{
485+ assert(filename);
486+ assert(errp);
487+
488+ VmaReader *vmar = g_new0(VmaReader, 1);
489+
490+ vmar->fd = open(filename, O_RDONLY);
491+
492+ if (vmar->fd < 0) {
493+ error_setg(errp, "can't open file %s - %s\n", filename,
494+ strerror(errno));
495+ goto err;
496+ }
497+
498+ vmar->md5csum = g_checksum_new(G_CHECKSUM_MD5);
499+ if (!vmar->md5csum) {
500+ error_setg(errp, "can't allocate cmsum\n");
501+ goto err;
502+ }
503+
504+ vmar->blob_hash = g_hash_table_new_full(g_int32_hash, g_int32_equal,
505+ NULL, g_free);
506+
507+ if (vma_reader_read_head(vmar, errp) < 0) {
508+ goto err;
509+ }
510+
511+ return vmar;
512+
513+err:
514+ if (vmar) {
515+ vma_reader_destroy(vmar);
516+ }
517+
518+ return NULL;
519+}
520+
521+VmaHeader *vma_reader_get_header(VmaReader *vmar)
522+{
523+ assert(vmar);
524+ assert(vmar->head_data);
525+
526+ return (VmaHeader *)(vmar->head_data);
527+}
528+
529+GList *vma_reader_get_config_data(VmaReader *vmar)
530+{
531+ assert(vmar);
532+ assert(vmar->head_data);
533+
534+ return vmar->cdata_list;
535+}
536+
537+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
538+{
539+ assert(vmar);
540+ assert(dev_id);
541+
542+ if (vmar->devinfo[dev_id].size && vmar->devinfo[dev_id].devname) {
543+ return &vmar->devinfo[dev_id];
544+ }
545+
546+ return NULL;
547+}
548+
549+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockDriverState *bs,
550+ bool write_zeroes, Error **errp)
551+{
552+ assert(vmar);
553+ assert(bs != NULL);
554+ assert(dev_id);
555+ assert(vmar->rstate[dev_id].bs == NULL);
556+
557+ int64_t size = bdrv_getlength(bs);
558+ if (size != vmar->devinfo[dev_id].size) {
559+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
560+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
561+ size, vmar->devinfo[dev_id].size);
562+ return -1;
563+ }
564+
565+ vmar->rstate[dev_id].bs = bs;
566+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
567+
568+ int64_t bitmap_size = (size/BDRV_SECTOR_SIZE) +
569+ (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG - 1;
570+ bitmap_size /= (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG;
571+
572+ vmar->rstate[dev_id].bitmap_size = bitmap_size;
573+ vmar->rstate[dev_id].bitmap = g_new0(unsigned long, bitmap_size);
574+
575+ return 0;
576+}
577+
578+static ssize_t safe_write(int fd, void *buf, size_t count)
579+{
580+ ssize_t n;
581+
582+ do {
583+ n = write(fd, buf, count);
584+ } while (n < 0 && errno == EINTR);
585+
586+ return n;
587+}
588+
589+static size_t full_write(int fd, void *buf, size_t len)
590+{
591+ ssize_t n;
592+ size_t total;
593+
594+ total = 0;
595+
596+ while (len > 0) {
597+ n = safe_write(fd, buf, len);
598+ if (n < 0) {
599+ return n;
600+ }
601+ buf += n;
602+ total += n;
603+ len -= n;
604+ }
605+
606+ if (len) {
607+ /* incomplete write ? */
608+ return -1;
609+ }
610+
611+ return total;
612+}
613+
614+static int restore_write_data(VmaReader *vmar, guint8 dev_id,
615+ BlockDriverState *bs, int vmstate_fd,
616+ unsigned char *buf, int64_t sector_num,
617+ int nb_sectors, Error **errp)
618+{
619+ assert(vmar);
620+
621+ if (dev_id == vmar->vmstate_stream) {
622+ if (vmstate_fd >= 0) {
623+ int len = nb_sectors * BDRV_SECTOR_SIZE;
624+ int res = full_write(vmstate_fd, buf, len);
625+ if (res < 0) {
626+ error_setg(errp, "write vmstate failed %d", res);
627+ return -1;
628+ }
629+ }
630+ } else {
631+ int res = bdrv_write(bs, sector_num, buf, nb_sectors);
632+ if (res < 0) {
633+ error_setg(errp, "bdrv_write to %s failed (%d)",
634+ bdrv_get_device_name(bs), res);
635+ return -1;
636+ }
637+ }
638+ return 0;
639+}
640+static int restore_extent(VmaReader *vmar, unsigned char *buf,
641+ int extent_size, int vmstate_fd, Error **errp)
642+{
643+ assert(vmar);
644+ assert(buf);
645+
646+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
647+ int start = VMA_EXTENT_HEADER_SIZE;
648+ int i;
649+
650+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
651+ uint64_t block_info = GUINT64_FROM_BE(ehead->blockinfo[i]);
652+ uint32_t cluster_num = block_info & 0xffffffff;
653+ uint8_t dev_id = (block_info >> 32) & 0xff;
654+ uint16_t mask = block_info >> (32+16);
655+ int64_t max_sector;
656+
657+ if (!dev_id) {
658+ continue;
659+ }
660+
661+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
662+ BlockDriverState *bs = NULL;
663+
664+ if (dev_id != vmar->vmstate_stream) {
665+ bs = rstate->bs;
666+ if (!bs) {
667+ error_setg(errp, "got wrong dev id %d", dev_id);
668+ return -1;
669+ }
670+
671+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
672+ error_setg(errp, "found duplicated cluster %d for stream %s",
673+ cluster_num, vmar->devinfo[dev_id].devname);
674+ return -1;
675+ }
676+ vma_reader_set_bitmap(rstate, cluster_num, 1);
677+
678+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
679+ } else {
680+ max_sector = G_MAXINT64;
681+ if (cluster_num != vmar->vmstate_clusters) {
682+ error_setg(errp, "found out of order vmstate data");
683+ return -1;
684+ }
685+ vmar->vmstate_clusters++;
686+ }
687+
688+ /* try to write whole clusters to speedup restore */
689+ if (mask == 0xffff) {
690+ if ((start + VMA_CLUSTER_SIZE) > extent_size) {
691+ error_setg(errp, "short vma extent - too many blocks");
692+ return -1;
693+ }
694+ int64_t sector_num = (cluster_num * VMA_CLUSTER_SIZE) /
695+ BDRV_SECTOR_SIZE;
696+ int64_t end_sector = sector_num +
697+ VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE;
698+
699+ if (end_sector > max_sector) {
700+ end_sector = max_sector;
701+ }
702+
703+ if (end_sector <= sector_num) {
704+ error_setg(errp, "got wrong block address - write bejond end");
705+ return -1;
706+ }
707+
708+ int nb_sectors = end_sector - sector_num;
709+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd, buf + start,
710+ sector_num, nb_sectors, errp) < 0) {
711+ return -1;
712+ }
713+
714+ start += VMA_CLUSTER_SIZE;
715+ } else {
716+ int j;
717+ int bit = 1;
718+
719+ for (j = 0; j < 16; j++) {
720+ int64_t sector_num = (cluster_num*VMA_CLUSTER_SIZE +
721+ j*VMA_BLOCK_SIZE)/BDRV_SECTOR_SIZE;
722+
723+ int64_t end_sector = sector_num +
724+ VMA_BLOCK_SIZE/BDRV_SECTOR_SIZE;
725+ if (end_sector > max_sector) {
726+ end_sector = max_sector;
727+ }
728+
729+ if (mask & bit) {
730+ if ((start + VMA_BLOCK_SIZE) > extent_size) {
731+ error_setg(errp, "short vma extent - too many blocks");
732+ return -1;
733+ }
734+
735+ if (end_sector <= sector_num) {
736+ error_setg(errp, "got wrong block address - "
737+ "write bejond end");
738+ return -1;
739+ }
740+
741+ int nb_sectors = end_sector - sector_num;
742+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
743+ buf + start, sector_num,
744+ nb_sectors, errp) < 0) {
745+ return -1;
746+ }
747+
748+ start += VMA_BLOCK_SIZE;
749+
750+ } else {
751+
752+ if (rstate->write_zeroes & (end_sector > sector_num)) {
753+ /* Todo: use bdrv_co_write_zeroes (but that need to
754+ * be run inside coroutine?)
755+ */
756+ int nb_sectors = end_sector - sector_num;
757+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
758+ zero_vma_block, sector_num,
759+ nb_sectors, errp) < 0) {
760+ return -1;
761+ }
762+ }
763+ }
764+
765+ bit = bit << 1;
766+ }
767+ }
768+ }
769+
770+ if (start != extent_size) {
771+ error_setg(errp, "vma extent error - missing blocks");
772+ return -1;
773+ }
774+
775+ return 0;
776+}
777+
778+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, Error **errp)
779+{
780+ assert(vmar);
781+ assert(vmar->head_data);
782+
783+ int ret = 0;
784+ unsigned char buf[VMA_MAX_EXTENT_SIZE];
785+ int buf_pos = 0;
786+ unsigned char md5sum[16];
787+ VmaHeader *h = (VmaHeader *)vmar->head_data;
788+
789+
790+ while (1) {
791+ int bytes = full_read(vmar->fd, buf + buf_pos, sizeof(buf) - buf_pos);
792+ if (bytes < 0) {
793+ error_setg(errp, "read failed - %s", strerror(errno));
794+ return -1;
795+ }
796+
797+ buf_pos += bytes;
798+
799+ if (!buf_pos) {
800+ break; /* EOF */
801+ }
802+
803+ if (buf_pos < VMA_EXTENT_HEADER_SIZE) {
804+ error_setg(errp, "read short extent (%d bytes)", buf_pos);
805+ return -1;
806+ }
807+
808+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
809+
810+ /* extract md5sum */
811+ memcpy(md5sum, ehead->md5sum, sizeof(ehead->md5sum));
812+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
813+
814+ g_checksum_reset(vmar->md5csum);
815+ g_checksum_update(vmar->md5csum, buf, VMA_EXTENT_HEADER_SIZE);
816+ gsize csize = 16;
817+ g_checksum_get_digest(vmar->md5csum, ehead->md5sum, &csize);
818+
819+ if (memcmp(md5sum, ehead->md5sum, 16) != 0) {
820+ error_setg(errp, "wrong vma extent header chechsum");
821+ return -1;
822+ }
823+
824+ if (memcmp(h->uuid, ehead->uuid, sizeof(ehead->uuid)) != 0) {
825+ error_setg(errp, "wrong vma extent uuid");
826+ return -1;
827+ }
828+
829+ if (ehead->magic != VMA_EXTENT_MAGIC || ehead->reserved1 != 0) {
830+ error_setg(errp, "wrong vma extent header magic");
831+ return -1;
832+ }
833+
834+ int block_count = GUINT16_FROM_BE(ehead->block_count);
835+ int extent_size = VMA_EXTENT_HEADER_SIZE + block_count*VMA_BLOCK_SIZE;
836+
837+ if (buf_pos < extent_size) {
838+ error_setg(errp, "short vma extent (%d < %d)", buf_pos,
839+ extent_size);
840+ return -1;
841+ }
842+
843+ if (restore_extent(vmar, buf, extent_size, vmstate_fd, errp) < 0) {
844+ return -1;
845+ }
846+
847+ if (buf_pos > extent_size) {
848+ memmove(buf, buf + extent_size, buf_pos - extent_size);
849+ buf_pos = buf_pos - extent_size;
850+ } else {
851+ buf_pos = 0;
852+ }
853+ }
854+
855+ bdrv_drain_all();
856+
857+ int i;
858+ for (i = 1; i < 256; i++) {
859+ VmaRestoreState *rstate = &vmar->rstate[i];
860+ if (!rstate->bs) {
861+ continue;
862+ }
863+
864+ if (bdrv_flush(rstate->bs) < 0) {
865+ error_setg(errp, "vma bdrv_flush %s failed",
866+ vmar->devinfo[i].devname);
867+ return -1;
868+ }
869+
870+ if (vmar->devinfo[i].size &&
871+ (strcmp(vmar->devinfo[i].devname, "vmstate") != 0)) {
872+ assert(rstate->bitmap);
873+
874+ int64_t cluster_num, end;
875+
876+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
877+ VMA_CLUSTER_SIZE;
878+
879+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
880+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
881+ error_setg(errp, "detected missing cluster %zd "
882+ "for stream %s", cluster_num,
883+ vmar->devinfo[i].devname);
884+ return -1;
885+ }
886+ }
887+ }
888+ }
889+
890+ return ret;
891+}
892+
893diff --git a/vma-writer.c b/vma-writer.c
894new file mode 100644
efa8e5de 895index 0000000..917c77f
5ad5891c
DM
896--- /dev/null
897+++ b/vma-writer.c
efa8e5de 898@@ -0,0 +1,907 @@
5ad5891c
DM
899+/*
900+ * VMA: Virtual Machine Archive
901+ *
902+ * Copyright (C) 2012 Proxmox Server Solutions
903+ *
904+ * Authors:
905+ * Dietmar Maurer (dietmar@proxmox.com)
906+ *
907+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
908+ * See the COPYING file in the top-level directory.
909+ *
910+ */
911+
912+#include <stdio.h>
913+#include <errno.h>
914+#include <unistd.h>
915+#include <stdio.h>
916+#include <string.h>
917+#include <sys/types.h>
918+#include <sys/stat.h>
919+#include <fcntl.h>
920+#include <glib.h>
921+#include <uuid/uuid.h>
922+
923+#include "qemu-common.h"
924+#include "qemu_socket.h"
925+#include "qemu-coroutine.h"
926+#include "qemu-aio.h"
927+#include "qemu/ratelimit.h"
928+#include "vma.h"
929+#include "block.h"
930+
931+#define DEBUG_VMA 0
932+
933+#define DPRINTF(fmt, ...)\
934+ do { if (DEBUG_VMA) { printf("vma: " fmt, ## __VA_ARGS__); } } while (0)
935+
936+#define WRITE_BUFFERS 5
937+
938+typedef struct VmaAIOCB VmaAIOCB;
939+struct VmaAIOCB {
940+ VmaWriter *vmaw;
941+ unsigned char buffer[VMA_MAX_EXTENT_SIZE];
942+ size_t bytes;
943+ Coroutine *co;
944+};
945+
946+struct VmaWriter {
947+ int fd;
948+ FILE *cmd;
949+ int status;
950+ char errmsg[8192];
951+ uuid_t uuid;
952+ bool header_written;
953+ bool closed;
954+
955+ /* we always write extents */
956+ unsigned char outbuf[VMA_MAX_EXTENT_SIZE];
957+ int outbuf_pos; /* in bytes */
958+ int outbuf_count; /* in VMA_BLOCKS */
959+ uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT];
960+
961+ VmaAIOCB aiocbs[WRITE_BUFFERS];
962+ CoQueue wqueue;
963+
964+ GChecksum *md5csum;
965+ CoMutex writer_lock;
966+ CoMutex flush_lock;
967+ Coroutine *co_writer;
968+ RateLimit limit;
969+ uint64_t delay_ns;
970+
971+ /* drive informations */
972+ VmaStreamInfo stream_info[256];
973+ guint stream_count;
974+
975+ guint8 vmstate_stream;
976+ uint32_t vmstate_clusters;
977+
978+ /* header blob table */
979+ char *header_blob_table;
980+ uint32_t header_blob_table_size;
981+ uint32_t header_blob_table_pos;
982+
983+ /* store for config blobs */
984+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
985+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
986+ uint32_t config_count;
987+};
988+
989+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...)
990+{
991+ va_list ap;
992+
993+ if (vmaw->status < 0) {
994+ return;
995+ }
996+
997+ vmaw->status = -1;
998+
999+ va_start(ap, fmt);
1000+ g_vsnprintf(vmaw->errmsg, sizeof(vmaw->errmsg), fmt, ap);
1001+ va_end(ap);
1002+
1003+ DPRINTF("vma_writer_set_error: %s\n", vmaw->errmsg);
1004+}
1005+
1006+static uint32_t allocate_header_blob(VmaWriter *vmaw, const char *data,
1007+ size_t len)
1008+{
1009+ if (len > 65535) {
1010+ return 0;
1011+ }
1012+
1013+ if (!vmaw->header_blob_table ||
1014+ (vmaw->header_blob_table_size <
1015+ (vmaw->header_blob_table_pos + len + 2))) {
1016+ int newsize = vmaw->header_blob_table_size + ((len + 2 + 511)/512)*512;
1017+
1018+ vmaw->header_blob_table = g_realloc(vmaw->header_blob_table, newsize);
1019+ memset(vmaw->header_blob_table + vmaw->header_blob_table_size,
1020+ 0, newsize - vmaw->header_blob_table_size);
1021+ vmaw->header_blob_table_size = newsize;
1022+ }
1023+
1024+ uint32_t cpos = vmaw->header_blob_table_pos;
1025+ vmaw->header_blob_table[cpos] = len & 255;
1026+ vmaw->header_blob_table[cpos+1] = (len >> 8) & 255;
1027+ memcpy(vmaw->header_blob_table + cpos + 2, data, len);
1028+ vmaw->header_blob_table_pos += len + 2;
1029+ return cpos;
1030+}
1031+
1032+static uint32_t allocate_header_string(VmaWriter *vmaw, const char *str)
1033+{
1034+ assert(vmaw);
1035+
1036+ size_t len = strlen(str) + 1;
1037+
1038+ return allocate_header_blob(vmaw, str, len);
1039+}
1040+
1041+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
1042+ gsize len)
1043+{
1044+ assert(vmaw);
1045+ assert(!vmaw->header_written);
1046+ assert(vmaw->config_count < VMA_MAX_CONFIGS);
1047+ assert(name);
1048+ assert(data);
1049+ assert(len);
1050+
1051+ uint32_t name_ptr = allocate_header_string(vmaw, name);
1052+ if (!name_ptr) {
1053+ return -1;
1054+ }
1055+
1056+ uint32_t data_ptr = allocate_header_blob(vmaw, data, len);
1057+ if (!data_ptr) {
1058+ return -1;
1059+ }
1060+
1061+ vmaw->config_names[vmaw->config_count] = name_ptr;
1062+ vmaw->config_data[vmaw->config_count] = data_ptr;
1063+
1064+ vmaw->config_count++;
1065+
1066+ return 0;
1067+}
1068+
1069+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
1070+ size_t size)
1071+{
1072+ assert(vmaw);
1073+ assert(devname);
1074+ assert(!vmaw->status);
1075+
1076+ if (vmaw->header_written) {
1077+ vma_writer_set_error(vmaw, "vma_writer_register_stream: header "
1078+ "already written");
1079+ return -1;
1080+ }
1081+
1082+ guint n = vmaw->stream_count + 1;
1083+
1084+ /* we can have dev_ids form 1 to 255 (0 reserved)
1085+ * 255(-1) reseverd for safety
1086+ */
1087+ if (n > 254) {
1088+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1089+ "too many drives");
1090+ return -1;
1091+ }
1092+
1093+ if (size <= 0) {
1094+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1095+ "got strange size %zd", size);
1096+ return -1;
1097+ }
1098+
1099+ DPRINTF("vma_writer_register_stream %s %zu %d\n", devname, size, n);
1100+
1101+ vmaw->stream_info[n].devname = g_strdup(devname);
1102+ vmaw->stream_info[n].size = size;
1103+
1104+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
1105+ VMA_CLUSTER_SIZE;
1106+
1107+ vmaw->stream_count = n;
1108+
1109+ if (strcmp(devname, "vmstate") == 0) {
1110+ vmaw->vmstate_stream = n;
1111+ }
1112+
1113+ return n;
1114+}
1115+
1116+static void vma_co_continue_write(void *opaque)
1117+{
1118+ VmaWriter *vmaw = opaque;
1119+
1120+ qemu_aio_set_fd_handler(vmaw->fd, NULL, NULL, NULL, NULL);
1121+
1122+ DPRINTF("vma_co_continue_write\n");
1123+ qemu_coroutine_enter(vmaw->co_writer, NULL);
1124+}
1125+
1126+static ssize_t coroutine_fn
1127+vma_co_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1128+{
1129+ size_t done = 0;
1130+ ssize_t ret;
1131+
1132+ /* atomic writes (we cannot interleave writes) */
1133+ qemu_co_mutex_lock(&vmaw->writer_lock);
1134+
1135+ DPRINTF("vma_co_write enter %zd\n", bytes);
1136+
1137+ while (done < bytes) {
1138+ ret = write(vmaw->fd, buf + done, bytes - done);
1139+ if (ret > 0) {
1140+ done += ret;
1141+ DPRINTF("vma_co_write written %zd %zd\n", done, ret);
1142+ } else if (ret < 0) {
1143+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
1144+ DPRINTF("vma_co_write yield %zd\n", done);
1145+
1146+ vmaw->co_writer = qemu_coroutine_self();
1147+ qemu_aio_set_fd_handler(vmaw->fd, NULL, vma_co_continue_write,
1148+ NULL, vmaw);
1149+
1150+ qemu_coroutine_yield();
1151+ DPRINTF("vma_co_write restart %zd\n", done);
1152+ } else {
1153+ vma_writer_set_error(vmaw, "vma_co_write write error - %s",
1154+ strerror(errno));
1155+ done = -1; /* always return failure for partial writes */
1156+ break;
1157+ }
1158+ } else if (ret == 0) {
1159+ /* should not happen - simply try again */
1160+ }
1161+ }
1162+
1163+ qemu_co_mutex_unlock(&vmaw->writer_lock);
1164+
1165+ DPRINTF("vma_co_write leave %zd\n", done);
1166+ return done;
1167+}
1168+
1169+static void coroutine_fn vma_co_writer_task(void *opaque)
1170+{
1171+ VmaAIOCB *cb = opaque;
1172+
1173+ DPRINTF("vma_co_writer_task start\n");
1174+
1175+ int64_t done = vma_co_write(cb->vmaw, cb->buffer, cb->bytes);
1176+ DPRINTF("vma_co_writer_task write done %zd\n", done);
1177+
1178+ if (done != cb->bytes) {
1179+ DPRINTF("vma_co_writer_task failed write %zd %zd", cb->bytes, done);
1180+ vma_writer_set_error(cb->vmaw, "vma_co_writer_task failed write %zd",
1181+ done);
1182+ }
1183+
1184+ cb->bytes = 0;
1185+
1186+ qemu_co_queue_next(&cb->vmaw->wqueue);
1187+
1188+ DPRINTF("vma_co_writer_task end\n");
1189+}
1190+
1191+static void coroutine_fn vma_queue_flush(VmaWriter *vmaw)
1192+{
1193+ DPRINTF("vma_queue_flush enter\n");
1194+
1195+ assert(vmaw);
1196+
1197+ while (1) {
1198+ int i;
1199+ VmaAIOCB *cb = NULL;
1200+ for (i = 0; i < WRITE_BUFFERS; i++) {
1201+ if (vmaw->aiocbs[i].bytes) {
1202+ cb = &vmaw->aiocbs[i];
1203+ DPRINTF("FOUND USED AIO BUFFER %d %zd\n", i,
1204+ vmaw->aiocbs[i].bytes);
1205+ break;
1206+ }
1207+ }
1208+ if (!cb) {
1209+ break;
1210+ }
1211+ qemu_co_queue_wait(&vmaw->wqueue);
1212+ }
1213+
1214+ DPRINTF("vma_queue_flush leave\n");
1215+}
1216+
1217+/**
1218+ * NOTE: pipe buffer size in only 4096 bytes on linux (see 'ulimit -a')
1219+ * So we need to create a coroutione to allow 'parallel' execution.
1220+ */
1221+static ssize_t coroutine_fn
1222+vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1223+{
1224+ DPRINTF("vma_queue_write enter %zd\n", bytes);
1225+
1226+ assert(vmaw);
1227+ assert(buf);
1228+ assert(bytes <= VMA_MAX_EXTENT_SIZE);
1229+
1230+ VmaAIOCB *cb = NULL;
1231+ while (!cb) {
1232+ int i;
1233+ for (i = 0; i < WRITE_BUFFERS; i++) {
1234+ if (!vmaw->aiocbs[i].bytes) {
1235+ cb = &vmaw->aiocbs[i];
1236+ break;
1237+ }
1238+ }
1239+ if (!cb) {
1240+ qemu_co_queue_wait(&vmaw->wqueue);
1241+ }
1242+ }
1243+
1244+ memcpy(cb->buffer, buf, bytes);
1245+ cb->bytes = bytes;
1246+ cb->vmaw = vmaw;
1247+
1248+ DPRINTF("vma_queue_write start %zd\n", bytes);
1249+ cb->co = qemu_coroutine_create(vma_co_writer_task);
1250+ qemu_coroutine_enter(cb->co, cb);
1251+
1252+ DPRINTF("vma_queue_write leave\n");
1253+
1254+ return bytes;
1255+}
1256+
1257+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed,
1258+ Error **errp)
1259+{
1260+ const char *p;
1261+
1262+ assert(sizeof(VmaHeader) == (4096 + 8192));
1263+ assert(sizeof(VmaExtentHeader) == 512);
1264+
1265+ VmaWriter *vmaw = g_new0(VmaWriter, 1);
1266+ vmaw->fd = -1;
1267+
1268+ vmaw->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1269+ if (!vmaw->md5csum) {
1270+ error_setg(errp, "can't allocate cmsum\n");
1271+ goto err;
1272+ }
1273+
1274+ if (strstart(filename, "exec:", &p)) {
1275+ vmaw->cmd = popen(p, "w");
1276+ if (vmaw->cmd == NULL) {
1277+ error_setg(errp, "can't popen command '%s' - %s\n", p,
1278+ strerror(errno));
1279+ goto err;
1280+ }
1281+ vmaw->fd = fileno(vmaw->cmd);
1282+ socket_set_nonblock(vmaw->fd);
1283+
1284+ } else {
efa8e5de
DM
1285+ struct stat st;
1286+ int oflags;
1287+ if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
1288+ oflags = O_NONBLOCK|O_WRONLY;
1289+ } else {
1290+ oflags = O_NONBLOCK|O_WRONLY|O_CREAT|O_EXCL;
1291+ }
1292+ vmaw->fd = open(filename, oflags, 0644);
5ad5891c
DM
1293+ if (vmaw->fd < 0) {
1294+ error_setg(errp, "can't open file %s - %s\n", filename,
1295+ strerror(errno));
1296+ goto err;
1297+ }
1298+ }
1299+
1300+ vmaw->outbuf_count = 0;
1301+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1302+
1303+ vmaw->header_blob_table_pos = 1; /* start at pos 1 */
1304+
1305+ qemu_co_mutex_init(&vmaw->writer_lock);
1306+ qemu_co_mutex_init(&vmaw->flush_lock);
1307+ qemu_co_queue_init(&vmaw->wqueue);
1308+
1309+ uuid_copy(vmaw->uuid, uuid);
1310+
1311+ if (speed <= 0) {
1312+ speed = 10*1024*1024*1024LLU; /* default 10GB/s */
1313+ }
1314+
1315+ ratelimit_set_speed(&vmaw->limit, speed, 100000000ULL /* 0.1 sec */);
1316+
1317+ return vmaw;
1318+
1319+err:
1320+ if (vmaw) {
1321+ if (vmaw->cmd) {
1322+ pclose(vmaw->cmd);
1323+ } else if (vmaw->fd >= 0) {
1324+ close(vmaw->fd);
1325+ }
1326+
1327+ if (vmaw->md5csum) {
1328+ g_checksum_free(vmaw->md5csum);
1329+ }
1330+
1331+ g_free(vmaw);
1332+ }
1333+
1334+ return NULL;
1335+}
1336+
1337+static int coroutine_fn vma_write_header(VmaWriter *vmaw)
1338+{
1339+ assert(vmaw);
1340+ int header_clusters = 8;
1341+ char buf[65536*header_clusters];
1342+ VmaHeader *head = (VmaHeader *)buf;
1343+
1344+ int i;
1345+
1346+ DPRINTF("VMA WRITE HEADER\n");
1347+
1348+ if (vmaw->status < 0) {
1349+ return vmaw->status;
1350+ }
1351+
1352+ memset(buf, 0, sizeof(buf));
1353+
1354+ head->magic = VMA_MAGIC;
1355+ head->version = GUINT32_TO_BE(1); /* v1 */
1356+ memcpy(head->uuid, vmaw->uuid, 16);
1357+
1358+ time_t ctime = time(NULL);
1359+ head->ctime = GUINT64_TO_BE(ctime);
1360+
1361+ if (!vmaw->stream_count) {
1362+ return -1;
1363+ }
1364+
1365+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1366+ head->config_names[i] = GUINT32_TO_BE(vmaw->config_names[i]);
1367+ head->config_data[i] = GUINT32_TO_BE(vmaw->config_data[i]);
1368+ }
1369+
1370+ /* 32 bytes per device (12 used currently) = 8192 bytes max */
1371+ for (i = 1; i <= 254; i++) {
1372+ VmaStreamInfo *si = &vmaw->stream_info[i];
1373+ if (si->size) {
1374+ assert(si->devname);
1375+ uint32_t devname_ptr = allocate_header_string(vmaw, si->devname);
1376+ if (!devname_ptr) {
1377+ return -1;
1378+ }
1379+ head->dev_info[i].devname_ptr = GUINT32_TO_BE(devname_ptr);
1380+ head->dev_info[i].size = GUINT64_TO_BE(si->size);
1381+ }
1382+ }
1383+
1384+ uint32_t header_size = sizeof(VmaHeader) + vmaw->header_blob_table_size;
1385+ head->header_size = GUINT32_TO_BE(header_size);
1386+
1387+ if (header_size > sizeof(buf)) {
1388+ return -1; /* just to be sure */
1389+ }
1390+
1391+ uint32_t blob_buffer_offset = sizeof(VmaHeader);
1392+ memcpy(buf + blob_buffer_offset, vmaw->header_blob_table,
1393+ vmaw->header_blob_table_size);
1394+ head->blob_buffer_offset = GUINT32_TO_BE(blob_buffer_offset);
1395+ head->blob_buffer_size = GUINT32_TO_BE(vmaw->header_blob_table_pos);
1396+
1397+ g_checksum_reset(vmaw->md5csum);
1398+ g_checksum_update(vmaw->md5csum, (const guchar *)buf, header_size);
1399+ gsize csize = 16;
1400+ g_checksum_get_digest(vmaw->md5csum, (guint8 *)(head->md5sum), &csize);
1401+
1402+ return vma_queue_write(vmaw, buf, header_size);
1403+}
1404+
1405+static int coroutine_fn vma_writer_flush(VmaWriter *vmaw)
1406+{
1407+ assert(vmaw);
1408+
1409+ int ret;
1410+ int i;
1411+
1412+ if (vmaw->status < 0) {
1413+ return vmaw->status;
1414+ }
1415+
1416+ if (!vmaw->header_written) {
1417+ vmaw->header_written = true;
1418+ ret = vma_write_header(vmaw);
1419+ if (ret < 0) {
1420+ vma_writer_set_error(vmaw, "vma_writer_flush: write header failed");
1421+ return ret;
1422+ }
1423+ }
1424+
1425+ DPRINTF("VMA WRITE FLUSH %d %d\n", vmaw->outbuf_count, vmaw->outbuf_pos);
1426+
1427+
1428+ VmaExtentHeader *ehead = (VmaExtentHeader *)vmaw->outbuf;
1429+
1430+ ehead->magic = VMA_EXTENT_MAGIC;
1431+ ehead->reserved1 = 0;
1432+
1433+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1434+ ehead->blockinfo[i] = GUINT64_TO_BE(vmaw->outbuf_block_info[i]);
1435+ }
1436+
1437+ guint16 block_count = (vmaw->outbuf_pos - VMA_EXTENT_HEADER_SIZE) /
1438+ VMA_BLOCK_SIZE;
1439+
1440+ ehead->block_count = GUINT16_TO_BE(block_count);
1441+
1442+ memcpy(ehead->uuid, vmaw->uuid, sizeof(ehead->uuid));
1443+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
1444+
1445+ g_checksum_reset(vmaw->md5csum);
1446+ g_checksum_update(vmaw->md5csum, vmaw->outbuf, VMA_EXTENT_HEADER_SIZE);
1447+ gsize csize = 16;
1448+ g_checksum_get_digest(vmaw->md5csum, ehead->md5sum, &csize);
1449+
1450+ int bytes = vmaw->outbuf_pos;
1451+ ret = vma_queue_write(vmaw, vmaw->outbuf, bytes);
1452+ if (ret != bytes) {
1453+ vma_writer_set_error(vmaw, "vma_writer_flush: failed write");
1454+ }
1455+
1456+ vmaw->outbuf_count = 0;
1457+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1458+
1459+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1460+ vmaw->outbuf_block_info[i] = 0;
1461+ }
1462+
1463+ return vmaw->status;
1464+}
1465+
1466+static int vma_count_open_streams(VmaWriter *vmaw)
1467+{
1468+ g_assert(vmaw != NULL);
1469+
1470+ int i;
1471+ int open_drives = 0;
1472+ for (i = 0; i <= 255; i++) {
1473+ if (vmaw->stream_info[i].size && !vmaw->stream_info[i].finished) {
1474+ open_drives++;
1475+ }
1476+ }
1477+
1478+ return open_drives;
1479+}
1480+
1481+/**
1482+ * all jobs should call this when there is no more data
1483+ * Returns: number of remaining stream (0 ==> finished)
1484+ */
1485+int coroutine_fn
1486+vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id)
1487+{
1488+ g_assert(vmaw != NULL);
1489+
1490+ DPRINTF("vma_writer_set_status %d\n", dev_id);
1491+ if (!vmaw->stream_info[dev_id].size) {
1492+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1493+ "no such stream %d", dev_id);
1494+ return -1;
1495+ }
1496+ if (vmaw->stream_info[dev_id].finished) {
1497+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1498+ "stream already closed %d", dev_id);
1499+ return -1;
1500+ }
1501+
1502+ vmaw->stream_info[dev_id].finished = true;
1503+
1504+ int open_drives = vma_count_open_streams(vmaw);
1505+
1506+ if (open_drives <= 0) {
1507+ DPRINTF("vma_writer_set_status all drives completed\n");
1508+ qemu_co_mutex_lock(&vmaw->flush_lock);
1509+ int ret = vma_writer_flush(vmaw);
1510+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1511+ if (ret < 0) {
1512+ vma_writer_set_error(vmaw, "vma_writer_close_stream: flush failed");
1513+ }
1514+ }
1515+
1516+ return open_drives;
1517+}
1518+
1519+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status)
1520+{
1521+ int i;
1522+
1523+ g_assert(vmaw != NULL);
1524+
1525+ if (status) {
1526+ status->status = vmaw->status;
1527+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
1528+ for (i = 0; i <= 255; i++) {
1529+ status->stream_info[i] = vmaw->stream_info[i];
1530+ }
1531+
1532+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
1533+ }
1534+
1535+ status->closed = vmaw->closed;
1536+
1537+ return vmaw->status;
1538+}
1539+
1540+static int vma_writer_get_buffer(VmaWriter *vmaw)
1541+{
1542+
1543+ /* wait until buffer is available */
1544+ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) {
1545+ int res = 0;
1546+
1547+ qemu_co_mutex_lock(&vmaw->flush_lock);
1548+ res = vma_writer_flush(vmaw);
1549+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1550+
1551+ if (res < 0) {
1552+ vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed");
1553+ return -1;
1554+ }
1555+ }
1556+
1557+ return 0;
1558+}
1559+
1560+
1561+int64_t coroutine_fn
1562+vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
1563+ unsigned char *buf, size_t *zero_bytes)
1564+{
1565+ g_assert(vmaw != NULL);
1566+ g_assert(zero_bytes != NULL);
1567+
1568+ *zero_bytes = 0;
1569+
1570+ if (vmaw->status < 0) {
1571+ return vmaw->status;
1572+ }
1573+
1574+ if (!dev_id || !vmaw->stream_info[dev_id].size) {
1575+ vma_writer_set_error(vmaw, "vma_writer_write: "
1576+ "no such stream %d", dev_id);
1577+ return -1;
1578+ }
1579+
1580+ if (vmaw->stream_info[dev_id].finished) {
1581+ vma_writer_set_error(vmaw, "vma_writer_write: "
1582+ "stream already closed %d", dev_id);
1583+ return -1;
1584+ }
1585+
1586+
1587+ if (cluster_num >= (((uint64_t)1)<<32)) {
1588+ vma_writer_set_error(vmaw, "vma_writer_write: "
1589+ "cluster number out of range");
1590+ return -1;
1591+ }
1592+
1593+ if (dev_id == vmaw->vmstate_stream) {
1594+ if (cluster_num != vmaw->vmstate_clusters) {
1595+ vma_writer_set_error(vmaw, "vma_writer_write: "
1596+ "non sequential vmstate write");
1597+ }
1598+ vmaw->vmstate_clusters++;
1599+ } else if (cluster_num >= vmaw->stream_info[dev_id].cluster_count) {
1600+ vma_writer_set_error(vmaw, "vma_writer_write: cluster number too big");
1601+ return -1;
1602+ }
1603+
1604+ /* wait until buffer is available */
1605+ if (vma_writer_get_buffer(vmaw) < 0) {
1606+ vma_writer_set_error(vmaw, "vma_writer_write: "
1607+ "vma_writer_get_buffer failed");
1608+ return -1;
1609+ }
1610+
1611+ DPRINTF("VMA WRITE %zd\n", cluster_num);
1612+
1613+ int i;
1614+ int bit = 1;
1615+ uint16_t mask = 0;
1616+ for (i = 0; i < 16; i++) {
1617+ unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
1618+ if (buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
1619+ DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
1620+ vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE;
1621+ *zero_bytes += VMA_BLOCK_SIZE;
1622+ } else {
1623+ mask |= bit;
1624+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock, VMA_BLOCK_SIZE);
1625+ vmaw->outbuf_pos += VMA_BLOCK_SIZE;
1626+
1627+ vmaw->delay_ns = ratelimit_calculate_delay(&vmaw->limit,
1628+ VMA_BLOCK_SIZE);
1629+ if (vmaw->delay_ns) {
1630+ co_sleep_ns(rt_clock, vmaw->delay_ns);
1631+ }
1632+ }
1633+
1634+ bit = bit << 1;
1635+ }
1636+
1637+ uint64_t block_info = ((uint64_t)mask) << (32+16);
1638+ block_info |= ((uint64_t)dev_id) << 32;
1639+ block_info |= (cluster_num & 0xffffffff);
1640+ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info;
1641+
1642+ DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info);
1643+
1644+ vmaw->outbuf_count++;
1645+
1646+ /** NOTE: We allways write whole clusters, but we correctly set
1647+ * transferred bytes. So transferred == size when when everything
1648+ * went OK.
1649+ */
1650+ size_t transferred = VMA_CLUSTER_SIZE;
1651+
1652+ if (dev_id != vmaw->vmstate_stream) {
1653+ uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
1654+ if (last > vmaw->stream_info[dev_id].size) {
1655+ uint64_t diff = last - vmaw->stream_info[dev_id].size;
1656+ if (diff >= VMA_CLUSTER_SIZE) {
1657+ vma_writer_set_error(vmaw, "vma_writer_write: "
1658+ "read after last cluster");
1659+ return -1;
1660+ }
1661+ transferred -= diff;
1662+ }
1663+ }
1664+
1665+ vmaw->stream_info[dev_id].transferred += transferred;
1666+
1667+ return transferred;
1668+}
1669+
1670+int vma_writer_close(VmaWriter *vmaw, Error **errp)
1671+{
1672+ g_assert(vmaw != NULL);
1673+
1674+ int i;
1675+
1676+ vma_queue_flush(vmaw);
1677+
1678+ /* this should not happen - just to be sure */
1679+ while (!qemu_co_queue_empty(&vmaw->wqueue)) {
1680+ DPRINTF("vma_writer_close wait\n");
1681+ co_sleep_ns(rt_clock, 1000000);
1682+ }
1683+
1684+ if (vmaw->cmd) {
1685+ if (pclose(vmaw->cmd) < 0) {
1686+ vma_writer_set_error(vmaw, "vma_writer_close: "
1687+ "pclose failed - %s", strerror(errno));
1688+ }
1689+ } else {
1690+ if (close(vmaw->fd) < 0) {
1691+ vma_writer_set_error(vmaw, "vma_writer_close: "
1692+ "close failed - %s", strerror(errno));
1693+ }
1694+ }
1695+
1696+ for (i = 0; i <= 255; i++) {
1697+ VmaStreamInfo *si = &vmaw->stream_info[i];
1698+ if (si->size) {
1699+ if (!si->finished) {
1700+ vma_writer_set_error(vmaw, "vma_writer_close: "
1701+ "detected open stream '%s'", si->devname);
1702+ } else if ((si->transferred != si->size) &&
1703+ (i != vmaw->vmstate_stream)) {
1704+ vma_writer_set_error(vmaw, "vma_writer_close: "
1705+ "incomplete stream '%s' (%zd != %zd)",
1706+ si->devname, si->transferred, si->size);
1707+ }
1708+ }
1709+ }
1710+
1711+ for (i = 0; i <= 255; i++) {
1712+ vmaw->stream_info[i].finished = 1; /* mark as closed */
1713+ }
1714+
1715+ vmaw->closed = 1;
1716+
1717+ if (vmaw->status < 0 && *errp == NULL) {
1718+ error_setg(errp, "%s", vmaw->errmsg);
1719+ }
1720+
1721+ return vmaw->status;
1722+}
1723+
1724+void vma_writer_destroy(VmaWriter *vmaw)
1725+{
1726+ assert(vmaw);
1727+
1728+ int i;
1729+
1730+ for (i = 0; i <= 255; i++) {
1731+ if (vmaw->stream_info[i].devname) {
1732+ g_free(vmaw->stream_info[i].devname);
1733+ }
1734+ }
1735+
1736+ if (vmaw->md5csum) {
1737+ g_checksum_free(vmaw->md5csum);
1738+ }
1739+
1740+ g_free(vmaw);
1741+}
1742+
1743+/* backup driver plugin */
1744+
1745+static int vma_dump_cb(void *opaque, uint8_t dev_id, int64_t cluster_num,
1746+ unsigned char *buf, size_t *zero_bytes)
1747+{
1748+ VmaWriter *vmaw = opaque;
1749+
1750+ return vma_writer_write(vmaw, dev_id, cluster_num, buf, zero_bytes);
1751+}
1752+
1753+static int vma_close_cb(void *opaque, Error **errp)
1754+{
1755+ VmaWriter *vmaw = opaque;
1756+
1757+ int res = vma_writer_close(vmaw, errp);
1758+ vma_writer_destroy(vmaw);
1759+
1760+ return res;
1761+}
1762+
1763+static int vma_complete_cb(void *opaque, uint8_t dev_id, int ret)
1764+{
1765+ VmaWriter *vmaw = opaque;
1766+
1767+ if (ret < 0) {
1768+ vma_writer_set_error(vmaw, "backup_complete_cb %d", ret);
1769+ }
1770+
1771+ return vma_writer_close_stream(vmaw, dev_id);
1772+}
1773+
1774+static int vma_register_stream_cb(void *opaque, const char *devname,
1775+ size_t size)
1776+{
1777+ VmaWriter *vmaw = opaque;
1778+
1779+ return vma_writer_register_stream(vmaw, devname, size);
1780+}
1781+
1782+static int vma_register_config_cb(void *opaque, const char *name,
1783+ gpointer data, size_t data_len)
1784+{
1785+ VmaWriter *vmaw = opaque;
1786+
1787+ return vma_writer_add_config(vmaw, name, data, data_len);
1788+}
1789+
1790+static void *vma_open_cb(const char *filename, uuid_t uuid, int64_t speed,
1791+ Error **errp)
1792+{
1793+ return vma_writer_create(filename, uuid, speed, errp);
1794+}
1795+
1796+const BackupDriver backup_vma_driver = {
1797+ .format = "vma",
1798+ .open_cb = vma_open_cb,
1799+ .close_cb = vma_close_cb,
1800+ .register_config_cb = vma_register_config_cb,
1801+ .register_stream_cb = vma_register_stream_cb,
1802+ .dump_cb = vma_dump_cb,
1803+ .complete_cb = vma_complete_cb,
1804+};
1805+
1806diff --git a/vma.c b/vma.c
1807new file mode 100644
1808index 0000000..69af80c
1809--- /dev/null
1810+++ b/vma.c
1811@@ -0,0 +1,550 @@
1812+/*
1813+ * VMA: Virtual Machine Archive
1814+ *
1815+ * Copyright (C) 2012 Proxmox Server Solutions
1816+ *
1817+ * Authors:
1818+ * Dietmar Maurer (dietmar@proxmox.com)
1819+ *
1820+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
1821+ * See the COPYING file in the top-level directory.
1822+ *
1823+ */
1824+
1825+#include <stdio.h>
1826+#include <errno.h>
1827+#include <unistd.h>
1828+#include <stdio.h>
1829+#include <string.h>
1830+#include <sys/types.h>
1831+#include <sys/stat.h>
1832+#include <fcntl.h>
1833+#include <glib.h>
1834+
1835+#include "qemu-common.h"
1836+#include "qemu-option.h"
1837+#include "qemu-error.h"
1838+#include "osdep.h"
1839+#include "sysemu.h"
1840+#include "block_int.h"
1841+#include <stdio.h>
1842+#include "vma.h"
1843+
1844+static void help(void)
1845+{
1846+ const char *help_msg =
1847+ "usage: vma command [command options]\n"
1848+ "\n"
1849+ "vma list <filename>\n"
1850+ "vma create <filename> [-c config] <archive> pathname ...\n"
1851+ "vma extract <filename> [-r] <targetdir>\n"
1852+ ;
1853+
1854+ printf("%s", help_msg);
1855+ exit(1);
1856+}
1857+
1858+static const char *extract_devname(const char *path, char **devname, int index)
1859+{
1860+ assert(path);
1861+
1862+ const char *sep = strchr(path, '=');
1863+
1864+ if (sep) {
1865+ *devname = g_strndup(path, sep - path);
1866+ path = sep + 1;
1867+ } else {
1868+ if (index >= 0) {
1869+ *devname = g_strdup_printf("disk%d", index);
1870+ } else {
1871+ *devname = NULL;
1872+ }
1873+ }
1874+
1875+ return path;
1876+}
1877+
1878+static void print_content(VmaReader *vmar)
1879+{
1880+ assert(vmar);
1881+
1882+ VmaHeader *head = vma_reader_get_header(vmar);
1883+
1884+ printf("CTIME: %s", ctime(&head->ctime));
1885+
1886+ GList *l = vma_reader_get_config_data(vmar);
1887+ while (l && l->data) {
1888+ VmaConfigData *cdata = (VmaConfigData *)l->data;
1889+ l = g_list_next(l);
1890+ printf("CFG: size: %d name: %s\n", cdata->len, cdata->name);
1891+ }
1892+
1893+ int i;
1894+ VmaDeviceInfo *di;
1895+ for (i = 1; i < 255; i++) {
1896+ di = vma_reader_get_device_info(vmar, i);
1897+ if (di) {
1898+ if (strcmp(di->devname, "vmstate") == 0) {
1899+ printf("VMSTATE: dev_id=%d memory: %zd\n", i, di->size);
1900+ } else {
1901+ printf("DEV: dev_id=%d size: %zd devname: %s\n",
1902+ i, di->size, di->devname);
1903+ }
1904+ }
1905+ }
1906+}
1907+
1908+static int list_content(int argc, char **argv)
1909+{
1910+ int c, ret = 0;
1911+ const char *filename;
1912+
1913+ for (;;) {
1914+ c = getopt(argc, argv, "h");
1915+ if (c == -1) {
1916+ break;
1917+ }
1918+ switch (c) {
1919+ case '?':
1920+ case 'h':
1921+ help();
1922+ break;
1923+ default:
1924+ g_assert_not_reached();
1925+ }
1926+ }
1927+
1928+ /* Get the filename */
1929+ if ((optind + 1) != argc) {
1930+ help();
1931+ }
1932+ filename = argv[optind++];
1933+
1934+ Error *errp = NULL;
1935+ VmaReader *vmar = vma_reader_create(filename, &errp);
1936+
1937+ if (!vmar) {
1938+ g_error("%s", error_get_pretty(errp));
1939+ }
1940+
1941+ print_content(vmar);
1942+
1943+ vma_reader_destroy(vmar);
1944+
1945+ return ret;
1946+}
1947+
1948+typedef struct RestoreMap {
1949+ char *devname;
1950+ char *path;
1951+ bool write_zero;
1952+} RestoreMap;
1953+
1954+static int extract_content(int argc, char **argv)
1955+{
1956+ int c, ret = 0;
1957+ const char *filename;
1958+ const char *dirname;
1959+ int readmap = 0;
1960+
1961+ for (;;) {
1962+ c = getopt(argc, argv, "hr");
1963+ if (c == -1) {
1964+ break;
1965+ }
1966+ switch (c) {
1967+ case '?':
1968+ case 'h':
1969+ help();
1970+ break;
1971+ case 'r':
1972+ readmap = 1;
1973+ break;
1974+ default:
1975+ help();
1976+ }
1977+ }
1978+
1979+ /* Get the filename */
1980+ if ((optind + 2) != argc) {
1981+ help();
1982+ }
1983+ filename = argv[optind++];
1984+ dirname = argv[optind++];
1985+
1986+ Error *errp = NULL;
1987+ VmaReader *vmar = vma_reader_create(filename, &errp);
1988+
1989+ if (!vmar) {
1990+ g_error("%s", error_get_pretty(errp));
1991+ }
1992+
1993+ if (mkdir(dirname, 0777) < 0) {
1994+ g_error("unable to create target directory %s - %s",
1995+ dirname, strerror(errno));
1996+ }
1997+
1998+ GList *l = vma_reader_get_config_data(vmar);
1999+ while (l && l->data) {
2000+ VmaConfigData *cdata = (VmaConfigData *)l->data;
2001+ l = g_list_next(l);
2002+ char *cfgfn = g_strdup_printf("%s/%s", dirname, cdata->name);
2003+ GError *err = NULL;
2004+ if (!g_file_set_contents(cfgfn, (gchar *)cdata->data, cdata->len,
2005+ &err)) {
2006+ g_error("Unable to write file: %s", err->message);
2007+ }
2008+ }
2009+
2010+ GHashTable *devmap = g_hash_table_new(g_str_hash, g_str_equal);
2011+
2012+ if (readmap) {
2013+ print_content(vmar);
2014+
2015+ while (1) {
2016+ char inbuf[8192];
2017+ char *line = fgets(inbuf, sizeof(inbuf), stdin);
2018+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
2019+ break;
2020+ }
2021+ int len = strlen(line);
2022+ if (line[len - 1] == '\n') {
2023+ line[len - 1] = '\0';
2024+ if (len == 1) {
2025+ break;
2026+ }
2027+ }
2028+
2029+ const char *path;
2030+ bool write_zero;
2031+ if (line[0] == '0' && line[1] == ':') {
2032+ path = inbuf + 2;
2033+ write_zero = false;
2034+ } else if (line[0] == '1' && line[1] == ':') {
2035+ path = inbuf + 2;
2036+ write_zero = true;
2037+ } else {
2038+ g_error("read map failed - parse error ('%s')", inbuf);
2039+ }
2040+
2041+ char *devname = NULL;
2042+ path = extract_devname(path, &devname, -1);
2043+ if (!devname) {
2044+ g_error("read map failed - no dev name specified ('%s')",
2045+ inbuf);
2046+ }
2047+
2048+ printf("TEST %s %s\n", path, devname);
2049+
2050+ RestoreMap *map = g_new0(RestoreMap, 1);
2051+ map->devname = g_strdup(devname);
2052+ map->path = g_strdup(path);
2053+ map->write_zero = write_zero;
2054+
2055+ g_hash_table_insert(devmap, map->devname, map);
2056+
2057+ };
2058+ }
2059+
2060+ int i;
2061+ int vmstate_fd = -1;
2062+ guint8 vmstate_stream = 0;
2063+
2064+ for (i = 1; i < 255; i++) {
2065+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2066+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
2067+ vmstate_stream = i;
2068+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
2069+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
2070+ if (vmstate_fd < 0) {
2071+ g_error("create vmstate file '%s' failed - %s", statefn,
2072+ strerror(errno));
2073+ }
2074+ g_free(statefn);
2075+ } else if (di) {
2076+ char *devfn = NULL;
2077+ int flags = BDRV_O_RDWR|BDRV_O_CACHE_WB;
2078+ bool write_zero = true;
2079+
2080+ if (readmap) {
2081+ RestoreMap *map;
2082+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
2083+ if (map == NULL) {
2084+ g_error("no device name mapping for %s", di->devname);
2085+ }
2086+ devfn = map->path;
2087+ write_zero = map->write_zero;
2088+ } else {
2089+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2090+ dirname, di->devname);
2091+ printf("DEVINFO %s %zd\n", devfn, di->size);
2092+
2093+ if (bdrv_img_create(devfn, "raw", NULL, NULL, NULL,
2094+ di->size, flags)) {
2095+ g_error("can't create file %s", devfn);
2096+ }
2097+
2098+ /* Note: we created an empty file above, so there is no
2099+ * need to write zeroes (so we generate a sparse file)
2100+ */
2101+ write_zero = false;
2102+ }
2103+
2104+ BlockDriverState *bs = NULL;
2105+ if (bdrv_file_open(&bs, devfn, flags)) {
2106+ g_error("can't open file %s", devfn);
2107+ }
2108+ if (vma_reader_register_bs(vmar, i, bs, write_zero, &errp) < 0) {
2109+ g_error("%s", error_get_pretty(errp));
2110+ }
2111+
2112+ if (!readmap) {
2113+ g_free(devfn);
2114+ }
2115+ }
2116+ }
2117+
2118+ if (vma_reader_restore(vmar, vmstate_fd, &errp) < 0) {
2119+ g_error("restore failed - %s", error_get_pretty(errp));
2120+ }
2121+
2122+ if (!readmap) {
2123+ for (i = 1; i < 255; i++) {
2124+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2125+ if (di && (i != vmstate_stream)) {
2126+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2127+ dirname, di->devname);
2128+ char *fn = g_strdup_printf("%s/disk-%s.raw",
2129+ dirname, di->devname);
2130+ if (rename(tmpfn, fn) != 0) {
2131+ g_error("rename %s to %s failed - %s",
2132+ tmpfn, fn, strerror(errno));
2133+ }
2134+ }
2135+ }
2136+ }
2137+
2138+ vma_reader_destroy(vmar);
2139+
2140+ bdrv_close_all();
2141+
2142+ return ret;
2143+}
2144+
2145+typedef struct BackupCB {
2146+ VmaWriter *vmaw;
2147+ uint8_t dev_id;
2148+} BackupCB;
2149+
2150+static int backup_dump_cb(void *opaque, BlockDriverState *bs,
2151+ int64_t cluster_num, unsigned char *buf)
2152+{
2153+ BackupCB *bcb = opaque;
2154+ size_t zb = 0;
2155+ if (vma_writer_write(bcb->vmaw, bcb->dev_id, cluster_num, buf, &zb) < 0) {
2156+ g_warning("backup_dump_cb vma_writer_write failed");
2157+ return -1;
2158+ }
2159+
2160+ return 0;
2161+}
2162+
2163+static void backup_complete_cb(void *opaque, int ret)
2164+{
2165+ BackupCB *bcb = opaque;
2166+
2167+ if (ret < 0) {
2168+ vma_writer_set_error(bcb->vmaw, "backup_complete_cb %d", ret);
2169+ }
2170+
2171+ if (vma_writer_close_stream(bcb->vmaw, bcb->dev_id) <= 0) {
2172+ Error *err = NULL;
2173+ if (vma_writer_close(bcb->vmaw, &err) != 0) {
2174+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2175+ }
2176+ }
2177+}
2178+
2179+static int create_archive(int argc, char **argv)
2180+{
2181+ int i, c, res;
2182+ int verbose = 0;
2183+ const char *archivename;
2184+ GList *config_files = NULL;
2185+
2186+ for (;;) {
2187+ c = getopt(argc, argv, "hvc:");
2188+ if (c == -1) {
2189+ break;
2190+ }
2191+ switch (c) {
2192+ case '?':
2193+ case 'h':
2194+ help();
2195+ break;
2196+ case 'c':
2197+ config_files = g_list_append(config_files, optarg);
2198+ break;
2199+ case 'v':
2200+ verbose = 1;
2201+ break;
2202+ default:
2203+ g_assert_not_reached();
2204+ }
2205+ }
2206+
2207+
2208+ /* make sure we have archive name and at least one path */
2209+ if ((optind + 2) > argc) {
2210+ help();
2211+ }
2212+
2213+ archivename = argv[optind++];
2214+
2215+ uuid_t uuid;
2216+ uuid_generate(uuid);
2217+
2218+ Error *local_err = NULL;
2219+ VmaWriter *vmaw = vma_writer_create(archivename, uuid, 0, &local_err);
2220+
2221+ if (vmaw == NULL) {
2222+ g_error("%s", error_get_pretty(local_err));
2223+ }
2224+
2225+ GList *l = config_files;
2226+ while (l && l->data) {
2227+ char *name = l->data;
2228+ char *cdata = NULL;
2229+ gsize clen = 0;
2230+ GError *err = NULL;
2231+ if (!g_file_get_contents(name, &cdata, &clen, &err)) {
2232+ unlink(archivename);
2233+ g_error("Unable to read file: %s", err->message);
2234+ }
2235+
2236+ if (vma_writer_add_config(vmaw, name, cdata, clen) != 0) {
2237+ unlink(archivename);
2238+ g_error("Unable to append config data %s (len = %zd)",
2239+ name, clen);
2240+ }
2241+ l = g_list_next(l);
2242+ }
2243+
2244+ int ind = 0;
2245+ while (optind < argc) {
2246+ const char *path = argv[optind++];
2247+ char *devname = NULL;
2248+ path = extract_devname(path, &devname, ind++);
2249+
2250+ BlockDriver *drv = NULL;
2251+ BlockDriverState *bs = bdrv_new(devname);
2252+
2253+ res = bdrv_open(bs, path, BDRV_O_CACHE_WB , drv);
2254+ if (res < 0) {
2255+ unlink(archivename);
2256+ g_error("bdrv_open '%s' failed", path);
2257+ }
2258+ int64_t size = bdrv_getlength(bs);
2259+ int dev_id = vma_writer_register_stream(vmaw, devname, size);
2260+ if (dev_id <= 0) {
2261+ unlink(archivename);
2262+ g_error("vma_writer_register_stream '%s' failed", devname);
2263+ }
2264+
2265+ BackupCB *bcb = g_new0(BackupCB, 1);
2266+ bcb->vmaw = vmaw;
2267+ bcb->dev_id = dev_id;
2268+
2269+ if (backup_job_start(bs, backup_dump_cb, backup_complete_cb, bcb) < 0) {
2270+ unlink(archivename);
2271+ g_error("backup_job_start failed");
2272+ }
2273+ }
2274+
2275+ VmaStatus vmastat;
2276+ int percent = 0;
2277+ int last_percent = -1;
2278+
2279+ while (1) {
2280+ main_loop_wait(false);
2281+ vma_writer_get_status(vmaw, &vmastat);
2282+
2283+ if (verbose) {
2284+
2285+ uint64_t total = 0;
2286+ uint64_t transferred = 0;
2287+ uint64_t zero_bytes = 0;
2288+
2289+ int i;
2290+ for (i = 0; i < 256; i++) {
2291+ if (vmastat.stream_info[i].size) {
2292+ total += vmastat.stream_info[i].size;
2293+ transferred += vmastat.stream_info[i].transferred;
2294+ zero_bytes += vmastat.stream_info[i].zero_bytes;
2295+ }
2296+ }
2297+ percent = (transferred*100)/total;
2298+ if (percent != last_percent) {
2299+ printf("progress %d%% %zd/%zd %zd\n", percent,
2300+ transferred, total, zero_bytes);
2301+
2302+ last_percent = percent;
2303+ }
2304+ }
2305+
2306+ if (vmastat.closed) {
2307+ break;
2308+ }
2309+ }
2310+
2311+ bdrv_drain_all();
2312+
2313+ vma_writer_get_status(vmaw, &vmastat);
2314+
2315+ if (verbose) {
2316+ for (i = 0; i < 256; i++) {
2317+ VmaStreamInfo *si = &vmastat.stream_info[i];
2318+ if (si->size) {
2319+ printf("image %s: size=%zd zeros=%zd saved=%zd\n", si->devname,
2320+ si->size, si->zero_bytes, si->size - si->zero_bytes);
2321+ }
2322+ }
2323+ }
2324+
2325+ if (vmastat.status < 0) {
2326+ unlink(archivename);
2327+ g_error("creating vma archive failed");
2328+ }
2329+
2330+ return 0;
2331+}
2332+
2333+int main(int argc, char **argv)
2334+{
2335+ const char *cmdname;
2336+
2337+ error_set_progname(argv[0]);
2338+
2339+ qemu_init_main_loop();
2340+
2341+ bdrv_init();
2342+
2343+ if (argc < 2) {
2344+ help();
2345+ }
2346+
2347+ cmdname = argv[1];
2348+ argc--; argv++;
2349+
2350+
2351+ if (!strcmp(cmdname, "list")) {
2352+ return list_content(argc, argv);
2353+ } else if (!strcmp(cmdname, "create")) {
2354+ return create_archive(argc, argv);
2355+ } else if (!strcmp(cmdname, "extract")) {
2356+ return extract_content(argc, argv);
2357+ }
2358+
2359+ help();
2360+ return 0;
2361+}
2362diff --git a/vma.h b/vma.h
2363new file mode 100644
2364index 0000000..10800a1
2365--- /dev/null
2366+++ b/vma.h
2367@@ -0,0 +1,145 @@
2368+/*
2369+ * VMA: Virtual Machine Archive
2370+ *
2371+ * Copyright (C) Proxmox Server Solutions
2372+ *
2373+ * Authors:
2374+ * Dietmar Maurer (dietmar@proxmox.com)
2375+ *
2376+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
2377+ * See the COPYING file in the top-level directory.
2378+ *
2379+ */
2380+
2381+#ifndef BACKUP_VMA_H
2382+#define BACKUP_VMA_H
2383+
2384+#include "backup.h"
2385+#include "error.h"
2386+
2387+#define VMA_BLOCK_BITS 12
2388+#define VMA_BLOCK_SIZE (1<<VMA_BLOCK_BITS)
2389+#define VMA_CLUSTER_BITS (VMA_BLOCK_BITS+4)
2390+#define VMA_CLUSTER_SIZE (1<<VMA_CLUSTER_BITS)
2391+
2392+#if VMA_CLUSTER_SIZE != 65536
2393+#error unexpected cluster size
2394+#endif
2395+
2396+#define VMA_EXTENT_HEADER_SIZE 512
2397+#define VMA_BLOCKS_PER_EXTENT 59
2398+#define VMA_MAX_CONFIGS 256
2399+
2400+#define VMA_MAX_EXTENT_SIZE \
2401+ (VMA_EXTENT_HEADER_SIZE+VMA_CLUSTER_SIZE*VMA_BLOCKS_PER_EXTENT)
2402+#if VMA_MAX_EXTENT_SIZE != 3867136
2403+#error unexpected VMA_EXTENT_SIZE
2404+#endif
2405+
2406+/* File Format Definitions */
2407+
2408+#define VMA_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|0x00))
2409+#define VMA_EXTENT_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|'E'))
2410+
2411+typedef struct VmaDeviceInfoHeader {
2412+ uint32_t devname_ptr; /* offset into blob_buffer table */
2413+ uint32_t reserved0;
2414+ uint64_t size; /* device size in bytes */
2415+ uint64_t reserved1;
2416+ uint64_t reserved2;
2417+} VmaDeviceInfoHeader;
2418+
2419+typedef struct VmaHeader {
2420+ uint32_t magic;
2421+ uint32_t version;
2422+ unsigned char uuid[16];
2423+ int64_t ctime;
2424+ unsigned char md5sum[16];
2425+
2426+ uint32_t blob_buffer_offset;
2427+ uint32_t blob_buffer_size;
2428+ uint32_t header_size;
2429+
2430+ unsigned char reserved[1984];
2431+
2432+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2433+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2434+
2435+ VmaDeviceInfoHeader dev_info[256];
2436+} VmaHeader;
2437+
2438+typedef struct VmaExtentHeader {
2439+ uint32_t magic;
2440+ uint16_t reserved1;
2441+ uint16_t block_count;
2442+ unsigned char uuid[16];
2443+ unsigned char md5sum[16];
2444+ uint64_t blockinfo[VMA_BLOCKS_PER_EXTENT];
2445+} VmaExtentHeader;
2446+
2447+/* functions/definitions to read/write vma files */
2448+
2449+typedef struct VmaReader VmaReader;
2450+
2451+typedef struct VmaWriter VmaWriter;
2452+
2453+typedef struct VmaConfigData {
2454+ const char *name;
2455+ const void *data;
2456+ uint32_t len;
2457+} VmaConfigData;
2458+
2459+typedef struct VmaStreamInfo {
2460+ uint64_t size;
2461+ uint64_t cluster_count;
2462+ uint64_t transferred;
2463+ uint64_t zero_bytes;
2464+ int finished;
2465+ char *devname;
2466+} VmaStreamInfo;
2467+
2468+typedef struct VmaStatus {
2469+ int status;
2470+ bool closed;
2471+ char errmsg[8192];
2472+ char uuid_str[37];
2473+ VmaStreamInfo stream_info[256];
2474+} VmaStatus;
2475+
2476+typedef struct VmaDeviceInfo {
2477+ uint64_t size; /* device size in bytes */
2478+ const char *devname;
2479+} VmaDeviceInfo;
2480+
2481+extern const BackupDriver backup_vma_driver;
2482+
2483+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed,
2484+ Error **errp);
2485+int vma_writer_close(VmaWriter *vmaw, Error **errp);
2486+void vma_writer_destroy(VmaWriter *vmaw);
2487+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
2488+ size_t len);
2489+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
2490+ size_t size);
2491+
2492+int64_t coroutine_fn vma_writer_write(VmaWriter *vmaw, uint8_t dev_id,
2493+ int64_t cluster_num, unsigned char *buf,
2494+ size_t *zero_bytes);
2495+
2496+int coroutine_fn vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id);
2497+
2498+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
2499+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
2500+
2501+
2502+VmaReader *vma_reader_create(const char *filename, Error **errp);
2503+void vma_reader_destroy(VmaReader *vmar);
2504+VmaHeader *vma_reader_get_header(VmaReader *vmar);
2505+GList *vma_reader_get_config_data(VmaReader *vmar);
2506+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
2507+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
2508+ BlockDriverState *bs, bool write_zeroes,
2509+ Error **errp);
2510+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, Error **errp);
2511+
2512+#endif /* BACKUP_VMA_H */
2513--
25141.7.2.5
2515