]> git.proxmox.com Git - pve-qemu-kvm.git/blame - debian/patches/0004-introduce-new-vma-archive-format.patch
updates for qemu 1.3.0 rc2
[pve-qemu-kvm.git] / debian / patches / 0004-introduce-new-vma-archive-format.patch
CommitLineData
16aecab6 1From 07c102f539d0ee90afbd695cdb1a8955a141d8c1 Mon Sep 17 00:00:00 2001
5ad5891c
DM
2From: Dietmar Maurer <dietmar@proxmox.com>
3Date: Tue, 13 Nov 2012 11:11:38 +0100
4Subject: [PATCH v3 4/6] introduce new vma archive format
5
6This is a very simple archive format, see docs/specs/vma_spec.txt
7
8Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
9---
10 Makefile | 3 +-
11 Makefile.objs | 2 +-
12 blockdev.c | 6 +-
13 docs/specs/vma_spec.txt | 24 ++
14 vma-reader.c | 772 ++++++++++++++++++++++++++++++++++++++++
15 vma-writer.c | 900 +++++++++++++++++++++++++++++++++++++++++++++++
16 vma.c | 550 +++++++++++++++++++++++++++++
17 vma.h | 145 ++++++++
18 8 files changed, 2398 insertions(+), 4 deletions(-)
19 create mode 100644 docs/specs/vma_spec.txt
20 create mode 100644 vma-reader.c
21 create mode 100644 vma-writer.c
22 create mode 100644 vma.c
23 create mode 100644 vma.h
24
25diff --git a/Makefile b/Makefile
26index 9ecbcbb..30a9268 100644
27--- a/Makefile
28+++ b/Makefile
29@@ -100,7 +100,7 @@ defconfig:
30
31 -include config-all-devices.mak
32
33-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
34+all: $(DOCS) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all
35
36 config-host.h: config-host.h-timestamp
37 config-host.h-timestamp: config-host.mak
38@@ -194,6 +194,7 @@ tools-obj-$(CONFIG_POSIX) += compatfd.o
39 qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) libqemustub.a
40 qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y) libqemustub.a
41 qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) libqemustub.a
42+vma$(EXESUF): vma.o vma-writer.o vma-reader.o $(tools-obj-y) $(block-obj-y) libqemustub.a
43
44 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
45
46diff --git a/Makefile.objs b/Makefile.objs
47index cb46be5..b5732e2 100644
48--- a/Makefile.objs
49+++ b/Makefile.objs
50@@ -48,7 +48,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
51 block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
52 block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
53 block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
54-block-obj-y += backup.o
55+block-obj-y += vma-writer.o backup.o
56 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
57 block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
58 block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
59diff --git a/blockdev.c b/blockdev.c
60index c635d21..f424933 100644
61--- a/blockdev.c
62+++ b/blockdev.c
63@@ -21,6 +21,7 @@
64 #include "trace.h"
65 #include "arch_init.h"
66 #include "backup.h"
67+#include "vma.h"
68
69 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
70
71@@ -1483,10 +1484,11 @@ char *qmp_backup(const char *backupfile, bool has_format, const char *format,
72 /* Todo: try to auto-detect format based on file name */
73 format = has_format ? format : "vma";
74
75- /* fixme: find driver for specifued format */
76 const BackupDriver *driver = NULL;
77
78- if (!driver) {
79+ if (strcmp(format, "vma") == 0) {
80+ driver = &backup_vma_driver;
81+ } else {
82 error_set(errp, ERROR_CLASS_GENERIC_ERROR,
83 "no backup driver for format '%s'", format);
84 return NULL;
85diff --git a/docs/specs/vma_spec.txt b/docs/specs/vma_spec.txt
86new file mode 100644
87index 0000000..052c629
88--- /dev/null
89+++ b/docs/specs/vma_spec.txt
90@@ -0,0 +1,24 @@
91+=Virtual Machine Archive format (VMA)=
92+
93+This format contains a header which includes the VM configuration as
94+binary blobs, and a list of devices (dev_id, name).
95+
96+The actual VM image data is stored inside extents. An extent contains
97+up to 64 clusters, and start with a 512 byte header containing
98+additional information for those clusters.
99+
100+We use a cluster size of 65536, and use 8 bytes for each
101+cluster in the header to store the following information:
102+
103+* 1 byte dev_id (to identity the drive)
104+* 2 bytes zero indicator (mark zero regions (16x4096))
105+* 4 bytes cluster number
106+* 1 byte not used (reserved)
107+
108+We only store non-zero blocks (such block is 4096 bytes).
109+
110+Each archive is marked with a uuid. The archive header and all
111+extent headers includes that uuid and a MD5 checksum (over header
112+data).
113+
114+
115diff --git a/vma-reader.c b/vma-reader.c
116new file mode 100644
117index 0000000..154c96b
118--- /dev/null
119+++ b/vma-reader.c
120@@ -0,0 +1,772 @@
121+/*
122+ * VMA: Virtual Machine Archive
123+ *
124+ * Copyright (C) 2012 Proxmox Server Solutions
125+ *
126+ * Authors:
127+ * Dietmar Maurer (dietmar@proxmox.com)
128+ *
129+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
130+ * See the COPYING file in the top-level directory.
131+ *
132+ */
133+
134+#include <stdio.h>
135+#include <errno.h>
136+#include <unistd.h>
137+#include <stdio.h>
138+#include <string.h>
139+#include <sys/types.h>
140+#include <sys/stat.h>
141+#include <fcntl.h>
142+#include <glib.h>
143+#include <uuid/uuid.h>
144+
145+#include "qemu-common.h"
146+#include "qemu_socket.h"
147+#include "qemu-coroutine.h"
148+#include "qemu-aio.h"
149+#include "qemu/ratelimit.h"
150+#include "vma.h"
151+#include "block.h"
152+
153+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
154+
155+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
156+
157+typedef struct VmaRestoreState {
158+ BlockDriverState *bs;
159+ bool write_zeroes;
160+ unsigned long *bitmap;
161+ int bitmap_size;
162+} VmaRestoreState;
163+
164+struct VmaReader {
165+ int fd;
166+ GChecksum *md5csum;
167+ GHashTable *blob_hash;
168+ unsigned char *head_data;
169+ VmaDeviceInfo devinfo[256];
170+ VmaRestoreState rstate[256];
171+ GList *cdata_list;
172+ guint8 vmstate_stream;
173+ uint32_t vmstate_clusters;
174+};
175+
176+static guint
177+g_int32_hash(gconstpointer v)
178+{
179+ return *(const uint32_t *)v;
180+}
181+
182+static gboolean
183+g_int32_equal(gconstpointer v1, gconstpointer v2)
184+{
185+ return *((const uint32_t *)v1) == *((const uint32_t *)v2);
186+}
187+
188+static int vma_reader_get_bitmap(VmaRestoreState *rstate, int64_t cluster_num)
189+{
190+ assert(rstate);
191+ assert(rstate->bitmap);
192+
193+ unsigned long val, idx, bit;
194+
195+ idx = cluster_num / BITS_PER_LONG;
196+
197+ assert(rstate->bitmap_size > idx);
198+
199+ bit = cluster_num % BITS_PER_LONG;
200+ val = rstate->bitmap[idx];
201+
202+ return !!(val & (1UL << bit));
203+}
204+
205+static void vma_reader_set_bitmap(VmaRestoreState *rstate, int64_t cluster_num,
206+ int dirty)
207+{
208+ assert(rstate);
209+ assert(rstate->bitmap);
210+
211+ unsigned long val, idx, bit;
212+
213+ idx = cluster_num / BITS_PER_LONG;
214+
215+ assert(rstate->bitmap_size > idx);
216+
217+ bit = cluster_num % BITS_PER_LONG;
218+ val = rstate->bitmap[idx];
219+ if (dirty) {
220+ if (!(val & (1UL << bit))) {
221+ val |= 1UL << bit;
222+ }
223+ } else {
224+ if (val & (1UL << bit)) {
225+ val &= ~(1UL << bit);
226+ }
227+ }
228+ rstate->bitmap[idx] = val;
229+}
230+
231+typedef struct VmaBlob {
232+ uint32_t start;
233+ uint32_t len;
234+ void *data;
235+} VmaBlob;
236+
237+static const VmaBlob *get_header_blob(VmaReader *vmar, uint32_t pos)
238+{
239+ assert(vmar);
240+ assert(vmar->blob_hash);
241+
242+ return g_hash_table_lookup(vmar->blob_hash, &pos);
243+}
244+
245+static const char *get_header_str(VmaReader *vmar, uint32_t pos)
246+{
247+ const VmaBlob *blob = get_header_blob(vmar, pos);
248+ if (!blob) {
249+ return NULL;
250+ }
251+ const char *res = (char *)blob->data;
252+ if (res[blob->len-1] != '\0') {
253+ return NULL;
254+ }
255+ return res;
256+}
257+
258+static ssize_t
259+safe_read(int fd, unsigned char *buf, size_t count)
260+{
261+ ssize_t n;
262+
263+ do {
264+ n = read(fd, buf, count);
265+ } while (n < 0 && errno == EINTR);
266+
267+ return n;
268+}
269+
270+static ssize_t
271+full_read(int fd, unsigned char *buf, size_t len)
272+{
273+ ssize_t n;
274+ size_t total;
275+
276+ total = 0;
277+
278+ while (len > 0) {
279+ n = safe_read(fd, buf, len);
280+
281+ if (n == 0) {
282+ return total;
283+ }
284+
285+ if (n <= 0) {
286+ break;
287+ }
288+
289+ buf += n;
290+ total += n;
291+ len -= n;
292+ }
293+
294+ if (len) {
295+ return -1;
296+ }
297+
298+ return total;
299+}
300+
301+void vma_reader_destroy(VmaReader *vmar)
302+{
303+ assert(vmar);
304+
305+ if (vmar->fd >= 0) {
306+ close(vmar->fd);
307+ }
308+
309+ if (vmar->cdata_list) {
310+ g_list_free(vmar->cdata_list);
311+ }
312+
313+ int i;
314+ for (i = 1; i < 256; i++) {
315+ if (vmar->rstate[i].bitmap) {
316+ g_free(vmar->rstate[i].bitmap);
317+ }
318+ }
319+
320+ if (vmar->md5csum) {
321+ g_checksum_free(vmar->md5csum);
322+ }
323+
324+ if (vmar->blob_hash) {
325+ g_hash_table_destroy(vmar->blob_hash);
326+ }
327+
328+ if (vmar->head_data) {
329+ g_free(vmar->head_data);
330+ }
331+
332+ g_free(vmar);
333+
334+};
335+
336+static int vma_reader_read_head(VmaReader *vmar, Error **errp)
337+{
338+ assert(vmar);
339+ assert(errp);
340+ assert(*errp == NULL);
341+
342+ unsigned char md5sum[16];
343+ int i;
344+ int ret = 0;
345+
346+ vmar->head_data = g_malloc(sizeof(VmaHeader));
347+
348+ if (full_read(vmar->fd, vmar->head_data, sizeof(VmaHeader)) !=
349+ sizeof(VmaHeader)) {
350+ error_setg(errp, "can't read vma header - %s",
351+ errno ? strerror(errno) : "got EOF");
352+ return -1;
353+ }
354+
355+ VmaHeader *h = (VmaHeader *)vmar->head_data;
356+
357+ if (h->magic != VMA_MAGIC) {
358+ error_setg(errp, "not a vma file - wrong magic number");
359+ return -1;
360+ }
361+
362+ uint32_t header_size = GUINT32_FROM_BE(h->header_size);
363+ int need = header_size - sizeof(VmaHeader);
364+ if (need <= 0) {
365+ error_setg(errp, "wrong vma header size %d", header_size);
366+ return -1;
367+ }
368+
369+ vmar->head_data = g_realloc(vmar->head_data, header_size);
370+ h = (VmaHeader *)vmar->head_data;
371+
372+ if (full_read(vmar->fd, vmar->head_data + sizeof(VmaHeader), need) !=
373+ need) {
374+ error_setg(errp, "can't read vma header data - %s",
375+ errno ? strerror(errno) : "got EOF");
376+ return -1;
377+ }
378+
379+ memcpy(md5sum, h->md5sum, 16);
380+ memset(h->md5sum, 0, 16);
381+
382+ g_checksum_reset(vmar->md5csum);
383+ g_checksum_update(vmar->md5csum, vmar->head_data, header_size);
384+ gsize csize = 16;
385+ g_checksum_get_digest(vmar->md5csum, (guint8 *)(h->md5sum), &csize);
386+
387+ if (memcmp(md5sum, h->md5sum, 16) != 0) {
388+ error_setg(errp, "wrong vma header chechsum");
389+ return -1;
390+ }
391+
392+ /* we can modify header data after checksum verify */
393+ h->header_size = header_size;
394+
395+ h->version = GUINT32_FROM_BE(h->version);
396+ if (h->version != 1) {
397+ error_setg(errp, "wrong vma version %d", h->version);
398+ return -1;
399+ }
400+
401+ h->ctime = GUINT64_FROM_BE(h->ctime);
402+ h->blob_buffer_offset = GUINT32_FROM_BE(h->blob_buffer_offset);
403+ h->blob_buffer_size = GUINT32_FROM_BE(h->blob_buffer_size);
404+
405+ uint32_t bstart = h->blob_buffer_offset + 1;
406+ uint32_t bend = h->blob_buffer_offset + h->blob_buffer_size;
407+
408+ if (bstart <= sizeof(VmaHeader)) {
409+ error_setg(errp, "wrong vma blob buffer offset %d",
410+ h->blob_buffer_offset);
411+ return -1;
412+ }
413+
414+ if (bend > header_size) {
415+ error_setg(errp, "wrong vma blob buffer size %d/%d",
416+ h->blob_buffer_offset, h->blob_buffer_size);
417+ return -1;
418+ }
419+
420+ while ((bstart + 2) <= bend) {
421+ uint32_t size = vmar->head_data[bstart] +
422+ (vmar->head_data[bstart+1] << 8);
423+ if ((bstart + size + 2) <= bend) {
424+ VmaBlob *blob = g_new0(VmaBlob, 1);
425+ blob->start = bstart - h->blob_buffer_offset;
426+ blob->len = size;
427+ blob->data = vmar->head_data + bstart + 2;
428+ g_hash_table_insert(vmar->blob_hash, &blob->start, blob);
429+ }
430+ bstart += size + 2;
431+ }
432+
433+
434+ int count = 0;
435+ for (i = 1; i < 256; i++) {
436+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
437+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
438+ uint64_t size = GUINT64_FROM_BE(dih->size);
439+ const char *devname = get_header_str(vmar, devname_ptr);
440+
441+ if (size && devname) {
442+ count++;
443+ vmar->devinfo[i].size = size;
444+ vmar->devinfo[i].devname = devname;
445+
446+ if (strcmp(devname, "vmstate") == 0) {
447+ vmar->vmstate_stream = i;
448+ }
449+ }
450+ }
451+
452+ if (!count) {
453+ error_setg(errp, "vma does not contain data");
454+ return -1;
455+ }
456+
457+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
458+ uint32_t name_ptr = GUINT32_FROM_BE(h->config_names[i]);
459+ uint32_t data_ptr = GUINT32_FROM_BE(h->config_data[i]);
460+
461+ if (!(name_ptr && data_ptr)) {
462+ continue;
463+ }
464+ const char *name = get_header_str(vmar, name_ptr);
465+ const VmaBlob *blob = get_header_blob(vmar, data_ptr);
466+
467+ if (!(name && blob)) {
468+ error_setg(errp, "vma contains invalid data pointers");
469+ return -1;
470+ }
471+
472+ VmaConfigData *cdata = g_new0(VmaConfigData, 1);
473+ cdata->name = name;
474+ cdata->data = blob->data;
475+ cdata->len = blob->len;
476+
477+ vmar->cdata_list = g_list_append(vmar->cdata_list, cdata);
478+ }
479+
480+ return ret;
481+};
482+
483+VmaReader *vma_reader_create(const char *filename, Error **errp)
484+{
485+ assert(filename);
486+ assert(errp);
487+
488+ VmaReader *vmar = g_new0(VmaReader, 1);
489+
490+ vmar->fd = open(filename, O_RDONLY);
491+
492+ if (vmar->fd < 0) {
493+ error_setg(errp, "can't open file %s - %s\n", filename,
494+ strerror(errno));
495+ goto err;
496+ }
497+
498+ vmar->md5csum = g_checksum_new(G_CHECKSUM_MD5);
499+ if (!vmar->md5csum) {
500+ error_setg(errp, "can't allocate cmsum\n");
501+ goto err;
502+ }
503+
504+ vmar->blob_hash = g_hash_table_new_full(g_int32_hash, g_int32_equal,
505+ NULL, g_free);
506+
507+ if (vma_reader_read_head(vmar, errp) < 0) {
508+ goto err;
509+ }
510+
511+ return vmar;
512+
513+err:
514+ if (vmar) {
515+ vma_reader_destroy(vmar);
516+ }
517+
518+ return NULL;
519+}
520+
521+VmaHeader *vma_reader_get_header(VmaReader *vmar)
522+{
523+ assert(vmar);
524+ assert(vmar->head_data);
525+
526+ return (VmaHeader *)(vmar->head_data);
527+}
528+
529+GList *vma_reader_get_config_data(VmaReader *vmar)
530+{
531+ assert(vmar);
532+ assert(vmar->head_data);
533+
534+ return vmar->cdata_list;
535+}
536+
537+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
538+{
539+ assert(vmar);
540+ assert(dev_id);
541+
542+ if (vmar->devinfo[dev_id].size && vmar->devinfo[dev_id].devname) {
543+ return &vmar->devinfo[dev_id];
544+ }
545+
546+ return NULL;
547+}
548+
549+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockDriverState *bs,
550+ bool write_zeroes, Error **errp)
551+{
552+ assert(vmar);
553+ assert(bs != NULL);
554+ assert(dev_id);
555+ assert(vmar->rstate[dev_id].bs == NULL);
556+
557+ int64_t size = bdrv_getlength(bs);
558+ if (size != vmar->devinfo[dev_id].size) {
559+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
560+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
561+ size, vmar->devinfo[dev_id].size);
562+ return -1;
563+ }
564+
565+ vmar->rstate[dev_id].bs = bs;
566+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
567+
568+ int64_t bitmap_size = (size/BDRV_SECTOR_SIZE) +
569+ (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG - 1;
570+ bitmap_size /= (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG;
571+
572+ vmar->rstate[dev_id].bitmap_size = bitmap_size;
573+ vmar->rstate[dev_id].bitmap = g_new0(unsigned long, bitmap_size);
574+
575+ return 0;
576+}
577+
578+static ssize_t safe_write(int fd, void *buf, size_t count)
579+{
580+ ssize_t n;
581+
582+ do {
583+ n = write(fd, buf, count);
584+ } while (n < 0 && errno == EINTR);
585+
586+ return n;
587+}
588+
589+static size_t full_write(int fd, void *buf, size_t len)
590+{
591+ ssize_t n;
592+ size_t total;
593+
594+ total = 0;
595+
596+ while (len > 0) {
597+ n = safe_write(fd, buf, len);
598+ if (n < 0) {
599+ return n;
600+ }
601+ buf += n;
602+ total += n;
603+ len -= n;
604+ }
605+
606+ if (len) {
607+ /* incomplete write ? */
608+ return -1;
609+ }
610+
611+ return total;
612+}
613+
614+static int restore_write_data(VmaReader *vmar, guint8 dev_id,
615+ BlockDriverState *bs, int vmstate_fd,
616+ unsigned char *buf, int64_t sector_num,
617+ int nb_sectors, Error **errp)
618+{
619+ assert(vmar);
620+
621+ if (dev_id == vmar->vmstate_stream) {
622+ if (vmstate_fd >= 0) {
623+ int len = nb_sectors * BDRV_SECTOR_SIZE;
624+ int res = full_write(vmstate_fd, buf, len);
625+ if (res < 0) {
626+ error_setg(errp, "write vmstate failed %d", res);
627+ return -1;
628+ }
629+ }
630+ } else {
631+ int res = bdrv_write(bs, sector_num, buf, nb_sectors);
632+ if (res < 0) {
633+ error_setg(errp, "bdrv_write to %s failed (%d)",
634+ bdrv_get_device_name(bs), res);
635+ return -1;
636+ }
637+ }
638+ return 0;
639+}
640+static int restore_extent(VmaReader *vmar, unsigned char *buf,
641+ int extent_size, int vmstate_fd, Error **errp)
642+{
643+ assert(vmar);
644+ assert(buf);
645+
646+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
647+ int start = VMA_EXTENT_HEADER_SIZE;
648+ int i;
649+
650+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
651+ uint64_t block_info = GUINT64_FROM_BE(ehead->blockinfo[i]);
652+ uint32_t cluster_num = block_info & 0xffffffff;
653+ uint8_t dev_id = (block_info >> 32) & 0xff;
654+ uint16_t mask = block_info >> (32+16);
655+ int64_t max_sector;
656+
657+ if (!dev_id) {
658+ continue;
659+ }
660+
661+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
662+ BlockDriverState *bs = NULL;
663+
664+ if (dev_id != vmar->vmstate_stream) {
665+ bs = rstate->bs;
666+ if (!bs) {
667+ error_setg(errp, "got wrong dev id %d", dev_id);
668+ return -1;
669+ }
670+
671+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
672+ error_setg(errp, "found duplicated cluster %d for stream %s",
673+ cluster_num, vmar->devinfo[dev_id].devname);
674+ return -1;
675+ }
676+ vma_reader_set_bitmap(rstate, cluster_num, 1);
677+
678+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
679+ } else {
680+ max_sector = G_MAXINT64;
681+ if (cluster_num != vmar->vmstate_clusters) {
682+ error_setg(errp, "found out of order vmstate data");
683+ return -1;
684+ }
685+ vmar->vmstate_clusters++;
686+ }
687+
688+ /* try to write whole clusters to speedup restore */
689+ if (mask == 0xffff) {
690+ if ((start + VMA_CLUSTER_SIZE) > extent_size) {
691+ error_setg(errp, "short vma extent - too many blocks");
692+ return -1;
693+ }
694+ int64_t sector_num = (cluster_num * VMA_CLUSTER_SIZE) /
695+ BDRV_SECTOR_SIZE;
696+ int64_t end_sector = sector_num +
697+ VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE;
698+
699+ if (end_sector > max_sector) {
700+ end_sector = max_sector;
701+ }
702+
703+ if (end_sector <= sector_num) {
704+ error_setg(errp, "got wrong block address - write bejond end");
705+ return -1;
706+ }
707+
708+ int nb_sectors = end_sector - sector_num;
709+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd, buf + start,
710+ sector_num, nb_sectors, errp) < 0) {
711+ return -1;
712+ }
713+
714+ start += VMA_CLUSTER_SIZE;
715+ } else {
716+ int j;
717+ int bit = 1;
718+
719+ for (j = 0; j < 16; j++) {
720+ int64_t sector_num = (cluster_num*VMA_CLUSTER_SIZE +
721+ j*VMA_BLOCK_SIZE)/BDRV_SECTOR_SIZE;
722+
723+ int64_t end_sector = sector_num +
724+ VMA_BLOCK_SIZE/BDRV_SECTOR_SIZE;
725+ if (end_sector > max_sector) {
726+ end_sector = max_sector;
727+ }
728+
729+ if (mask & bit) {
730+ if ((start + VMA_BLOCK_SIZE) > extent_size) {
731+ error_setg(errp, "short vma extent - too many blocks");
732+ return -1;
733+ }
734+
735+ if (end_sector <= sector_num) {
736+ error_setg(errp, "got wrong block address - "
737+ "write bejond end");
738+ return -1;
739+ }
740+
741+ int nb_sectors = end_sector - sector_num;
742+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
743+ buf + start, sector_num,
744+ nb_sectors, errp) < 0) {
745+ return -1;
746+ }
747+
748+ start += VMA_BLOCK_SIZE;
749+
750+ } else {
751+
752+ if (rstate->write_zeroes & (end_sector > sector_num)) {
753+ /* Todo: use bdrv_co_write_zeroes (but that need to
754+ * be run inside coroutine?)
755+ */
756+ int nb_sectors = end_sector - sector_num;
757+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
758+ zero_vma_block, sector_num,
759+ nb_sectors, errp) < 0) {
760+ return -1;
761+ }
762+ }
763+ }
764+
765+ bit = bit << 1;
766+ }
767+ }
768+ }
769+
770+ if (start != extent_size) {
771+ error_setg(errp, "vma extent error - missing blocks");
772+ return -1;
773+ }
774+
775+ return 0;
776+}
777+
778+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, Error **errp)
779+{
780+ assert(vmar);
781+ assert(vmar->head_data);
782+
783+ int ret = 0;
784+ unsigned char buf[VMA_MAX_EXTENT_SIZE];
785+ int buf_pos = 0;
786+ unsigned char md5sum[16];
787+ VmaHeader *h = (VmaHeader *)vmar->head_data;
788+
789+
790+ while (1) {
791+ int bytes = full_read(vmar->fd, buf + buf_pos, sizeof(buf) - buf_pos);
792+ if (bytes < 0) {
793+ error_setg(errp, "read failed - %s", strerror(errno));
794+ return -1;
795+ }
796+
797+ buf_pos += bytes;
798+
799+ if (!buf_pos) {
800+ break; /* EOF */
801+ }
802+
803+ if (buf_pos < VMA_EXTENT_HEADER_SIZE) {
804+ error_setg(errp, "read short extent (%d bytes)", buf_pos);
805+ return -1;
806+ }
807+
808+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
809+
810+ /* extract md5sum */
811+ memcpy(md5sum, ehead->md5sum, sizeof(ehead->md5sum));
812+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
813+
814+ g_checksum_reset(vmar->md5csum);
815+ g_checksum_update(vmar->md5csum, buf, VMA_EXTENT_HEADER_SIZE);
816+ gsize csize = 16;
817+ g_checksum_get_digest(vmar->md5csum, ehead->md5sum, &csize);
818+
819+ if (memcmp(md5sum, ehead->md5sum, 16) != 0) {
820+ error_setg(errp, "wrong vma extent header chechsum");
821+ return -1;
822+ }
823+
824+ if (memcmp(h->uuid, ehead->uuid, sizeof(ehead->uuid)) != 0) {
825+ error_setg(errp, "wrong vma extent uuid");
826+ return -1;
827+ }
828+
829+ if (ehead->magic != VMA_EXTENT_MAGIC || ehead->reserved1 != 0) {
830+ error_setg(errp, "wrong vma extent header magic");
831+ return -1;
832+ }
833+
834+ int block_count = GUINT16_FROM_BE(ehead->block_count);
835+ int extent_size = VMA_EXTENT_HEADER_SIZE + block_count*VMA_BLOCK_SIZE;
836+
837+ if (buf_pos < extent_size) {
838+ error_setg(errp, "short vma extent (%d < %d)", buf_pos,
839+ extent_size);
840+ return -1;
841+ }
842+
843+ if (restore_extent(vmar, buf, extent_size, vmstate_fd, errp) < 0) {
844+ return -1;
845+ }
846+
847+ if (buf_pos > extent_size) {
848+ memmove(buf, buf + extent_size, buf_pos - extent_size);
849+ buf_pos = buf_pos - extent_size;
850+ } else {
851+ buf_pos = 0;
852+ }
853+ }
854+
855+ bdrv_drain_all();
856+
857+ int i;
858+ for (i = 1; i < 256; i++) {
859+ VmaRestoreState *rstate = &vmar->rstate[i];
860+ if (!rstate->bs) {
861+ continue;
862+ }
863+
864+ if (bdrv_flush(rstate->bs) < 0) {
865+ error_setg(errp, "vma bdrv_flush %s failed",
866+ vmar->devinfo[i].devname);
867+ return -1;
868+ }
869+
870+ if (vmar->devinfo[i].size &&
871+ (strcmp(vmar->devinfo[i].devname, "vmstate") != 0)) {
872+ assert(rstate->bitmap);
873+
874+ int64_t cluster_num, end;
875+
876+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
877+ VMA_CLUSTER_SIZE;
878+
879+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
880+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
881+ error_setg(errp, "detected missing cluster %zd "
882+ "for stream %s", cluster_num,
883+ vmar->devinfo[i].devname);
884+ return -1;
885+ }
886+ }
887+ }
888+ }
889+
890+ return ret;
891+}
892+
893diff --git a/vma-writer.c b/vma-writer.c
894new file mode 100644
895index 0000000..02d4447
896--- /dev/null
897+++ b/vma-writer.c
898@@ -0,0 +1,900 @@
899+/*
900+ * VMA: Virtual Machine Archive
901+ *
902+ * Copyright (C) 2012 Proxmox Server Solutions
903+ *
904+ * Authors:
905+ * Dietmar Maurer (dietmar@proxmox.com)
906+ *
907+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
908+ * See the COPYING file in the top-level directory.
909+ *
910+ */
911+
912+#include <stdio.h>
913+#include <errno.h>
914+#include <unistd.h>
915+#include <stdio.h>
916+#include <string.h>
917+#include <sys/types.h>
918+#include <sys/stat.h>
919+#include <fcntl.h>
920+#include <glib.h>
921+#include <uuid/uuid.h>
922+
923+#include "qemu-common.h"
924+#include "qemu_socket.h"
925+#include "qemu-coroutine.h"
926+#include "qemu-aio.h"
927+#include "qemu/ratelimit.h"
928+#include "vma.h"
929+#include "block.h"
930+
931+#define DEBUG_VMA 0
932+
933+#define DPRINTF(fmt, ...)\
934+ do { if (DEBUG_VMA) { printf("vma: " fmt, ## __VA_ARGS__); } } while (0)
935+
936+#define WRITE_BUFFERS 5
937+
938+typedef struct VmaAIOCB VmaAIOCB;
939+struct VmaAIOCB {
940+ VmaWriter *vmaw;
941+ unsigned char buffer[VMA_MAX_EXTENT_SIZE];
942+ size_t bytes;
943+ Coroutine *co;
944+};
945+
946+struct VmaWriter {
947+ int fd;
948+ FILE *cmd;
949+ int status;
950+ char errmsg[8192];
951+ uuid_t uuid;
952+ bool header_written;
953+ bool closed;
954+
955+ /* we always write extents */
956+ unsigned char outbuf[VMA_MAX_EXTENT_SIZE];
957+ int outbuf_pos; /* in bytes */
958+ int outbuf_count; /* in VMA_BLOCKS */
959+ uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT];
960+
961+ VmaAIOCB aiocbs[WRITE_BUFFERS];
962+ CoQueue wqueue;
963+
964+ GChecksum *md5csum;
965+ CoMutex writer_lock;
966+ CoMutex flush_lock;
967+ Coroutine *co_writer;
968+ RateLimit limit;
969+ uint64_t delay_ns;
970+
971+ /* drive informations */
972+ VmaStreamInfo stream_info[256];
973+ guint stream_count;
974+
975+ guint8 vmstate_stream;
976+ uint32_t vmstate_clusters;
977+
978+ /* header blob table */
979+ char *header_blob_table;
980+ uint32_t header_blob_table_size;
981+ uint32_t header_blob_table_pos;
982+
983+ /* store for config blobs */
984+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
985+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
986+ uint32_t config_count;
987+};
988+
989+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...)
990+{
991+ va_list ap;
992+
993+ if (vmaw->status < 0) {
994+ return;
995+ }
996+
997+ vmaw->status = -1;
998+
999+ va_start(ap, fmt);
1000+ g_vsnprintf(vmaw->errmsg, sizeof(vmaw->errmsg), fmt, ap);
1001+ va_end(ap);
1002+
1003+ DPRINTF("vma_writer_set_error: %s\n", vmaw->errmsg);
1004+}
1005+
1006+static uint32_t allocate_header_blob(VmaWriter *vmaw, const char *data,
1007+ size_t len)
1008+{
1009+ if (len > 65535) {
1010+ return 0;
1011+ }
1012+
1013+ if (!vmaw->header_blob_table ||
1014+ (vmaw->header_blob_table_size <
1015+ (vmaw->header_blob_table_pos + len + 2))) {
1016+ int newsize = vmaw->header_blob_table_size + ((len + 2 + 511)/512)*512;
1017+
1018+ vmaw->header_blob_table = g_realloc(vmaw->header_blob_table, newsize);
1019+ memset(vmaw->header_blob_table + vmaw->header_blob_table_size,
1020+ 0, newsize - vmaw->header_blob_table_size);
1021+ vmaw->header_blob_table_size = newsize;
1022+ }
1023+
1024+ uint32_t cpos = vmaw->header_blob_table_pos;
1025+ vmaw->header_blob_table[cpos] = len & 255;
1026+ vmaw->header_blob_table[cpos+1] = (len >> 8) & 255;
1027+ memcpy(vmaw->header_blob_table + cpos + 2, data, len);
1028+ vmaw->header_blob_table_pos += len + 2;
1029+ return cpos;
1030+}
1031+
1032+static uint32_t allocate_header_string(VmaWriter *vmaw, const char *str)
1033+{
1034+ assert(vmaw);
1035+
1036+ size_t len = strlen(str) + 1;
1037+
1038+ return allocate_header_blob(vmaw, str, len);
1039+}
1040+
1041+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
1042+ gsize len)
1043+{
1044+ assert(vmaw);
1045+ assert(!vmaw->header_written);
1046+ assert(vmaw->config_count < VMA_MAX_CONFIGS);
1047+ assert(name);
1048+ assert(data);
1049+ assert(len);
1050+
1051+ uint32_t name_ptr = allocate_header_string(vmaw, name);
1052+ if (!name_ptr) {
1053+ return -1;
1054+ }
1055+
1056+ uint32_t data_ptr = allocate_header_blob(vmaw, data, len);
1057+ if (!data_ptr) {
1058+ return -1;
1059+ }
1060+
1061+ vmaw->config_names[vmaw->config_count] = name_ptr;
1062+ vmaw->config_data[vmaw->config_count] = data_ptr;
1063+
1064+ vmaw->config_count++;
1065+
1066+ return 0;
1067+}
1068+
1069+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
1070+ size_t size)
1071+{
1072+ assert(vmaw);
1073+ assert(devname);
1074+ assert(!vmaw->status);
1075+
1076+ if (vmaw->header_written) {
1077+ vma_writer_set_error(vmaw, "vma_writer_register_stream: header "
1078+ "already written");
1079+ return -1;
1080+ }
1081+
1082+ guint n = vmaw->stream_count + 1;
1083+
1084+ /* we can have dev_ids form 1 to 255 (0 reserved)
1085+ * 255(-1) reseverd for safety
1086+ */
1087+ if (n > 254) {
1088+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1089+ "too many drives");
1090+ return -1;
1091+ }
1092+
1093+ if (size <= 0) {
1094+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1095+ "got strange size %zd", size);
1096+ return -1;
1097+ }
1098+
1099+ DPRINTF("vma_writer_register_stream %s %zu %d\n", devname, size, n);
1100+
1101+ vmaw->stream_info[n].devname = g_strdup(devname);
1102+ vmaw->stream_info[n].size = size;
1103+
1104+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
1105+ VMA_CLUSTER_SIZE;
1106+
1107+ vmaw->stream_count = n;
1108+
1109+ if (strcmp(devname, "vmstate") == 0) {
1110+ vmaw->vmstate_stream = n;
1111+ }
1112+
1113+ return n;
1114+}
1115+
1116+static void vma_co_continue_write(void *opaque)
1117+{
1118+ VmaWriter *vmaw = opaque;
1119+
1120+ qemu_aio_set_fd_handler(vmaw->fd, NULL, NULL, NULL, NULL);
1121+
1122+ DPRINTF("vma_co_continue_write\n");
1123+ qemu_coroutine_enter(vmaw->co_writer, NULL);
1124+}
1125+
1126+static ssize_t coroutine_fn
1127+vma_co_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1128+{
1129+ size_t done = 0;
1130+ ssize_t ret;
1131+
1132+ /* atomic writes (we cannot interleave writes) */
1133+ qemu_co_mutex_lock(&vmaw->writer_lock);
1134+
1135+ DPRINTF("vma_co_write enter %zd\n", bytes);
1136+
1137+ while (done < bytes) {
1138+ ret = write(vmaw->fd, buf + done, bytes - done);
1139+ if (ret > 0) {
1140+ done += ret;
1141+ DPRINTF("vma_co_write written %zd %zd\n", done, ret);
1142+ } else if (ret < 0) {
1143+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
1144+ DPRINTF("vma_co_write yield %zd\n", done);
1145+
1146+ vmaw->co_writer = qemu_coroutine_self();
1147+ qemu_aio_set_fd_handler(vmaw->fd, NULL, vma_co_continue_write,
1148+ NULL, vmaw);
1149+
1150+ qemu_coroutine_yield();
1151+ DPRINTF("vma_co_write restart %zd\n", done);
1152+ } else {
1153+ vma_writer_set_error(vmaw, "vma_co_write write error - %s",
1154+ strerror(errno));
1155+ done = -1; /* always return failure for partial writes */
1156+ break;
1157+ }
1158+ } else if (ret == 0) {
1159+ /* should not happen - simply try again */
1160+ }
1161+ }
1162+
1163+ qemu_co_mutex_unlock(&vmaw->writer_lock);
1164+
1165+ DPRINTF("vma_co_write leave %zd\n", done);
1166+ return done;
1167+}
1168+
1169+static void coroutine_fn vma_co_writer_task(void *opaque)
1170+{
1171+ VmaAIOCB *cb = opaque;
1172+
1173+ DPRINTF("vma_co_writer_task start\n");
1174+
1175+ int64_t done = vma_co_write(cb->vmaw, cb->buffer, cb->bytes);
1176+ DPRINTF("vma_co_writer_task write done %zd\n", done);
1177+
1178+ if (done != cb->bytes) {
1179+ DPRINTF("vma_co_writer_task failed write %zd %zd", cb->bytes, done);
1180+ vma_writer_set_error(cb->vmaw, "vma_co_writer_task failed write %zd",
1181+ done);
1182+ }
1183+
1184+ cb->bytes = 0;
1185+
1186+ qemu_co_queue_next(&cb->vmaw->wqueue);
1187+
1188+ DPRINTF("vma_co_writer_task end\n");
1189+}
1190+
1191+static void coroutine_fn vma_queue_flush(VmaWriter *vmaw)
1192+{
1193+ DPRINTF("vma_queue_flush enter\n");
1194+
1195+ assert(vmaw);
1196+
1197+ while (1) {
1198+ int i;
1199+ VmaAIOCB *cb = NULL;
1200+ for (i = 0; i < WRITE_BUFFERS; i++) {
1201+ if (vmaw->aiocbs[i].bytes) {
1202+ cb = &vmaw->aiocbs[i];
1203+ DPRINTF("FOUND USED AIO BUFFER %d %zd\n", i,
1204+ vmaw->aiocbs[i].bytes);
1205+ break;
1206+ }
1207+ }
1208+ if (!cb) {
1209+ break;
1210+ }
1211+ qemu_co_queue_wait(&vmaw->wqueue);
1212+ }
1213+
1214+ DPRINTF("vma_queue_flush leave\n");
1215+}
1216+
1217+/**
1218+ * NOTE: pipe buffer size in only 4096 bytes on linux (see 'ulimit -a')
1219+ * So we need to create a coroutione to allow 'parallel' execution.
1220+ */
1221+static ssize_t coroutine_fn
1222+vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1223+{
1224+ DPRINTF("vma_queue_write enter %zd\n", bytes);
1225+
1226+ assert(vmaw);
1227+ assert(buf);
1228+ assert(bytes <= VMA_MAX_EXTENT_SIZE);
1229+
1230+ VmaAIOCB *cb = NULL;
1231+ while (!cb) {
1232+ int i;
1233+ for (i = 0; i < WRITE_BUFFERS; i++) {
1234+ if (!vmaw->aiocbs[i].bytes) {
1235+ cb = &vmaw->aiocbs[i];
1236+ break;
1237+ }
1238+ }
1239+ if (!cb) {
1240+ qemu_co_queue_wait(&vmaw->wqueue);
1241+ }
1242+ }
1243+
1244+ memcpy(cb->buffer, buf, bytes);
1245+ cb->bytes = bytes;
1246+ cb->vmaw = vmaw;
1247+
1248+ DPRINTF("vma_queue_write start %zd\n", bytes);
1249+ cb->co = qemu_coroutine_create(vma_co_writer_task);
1250+ qemu_coroutine_enter(cb->co, cb);
1251+
1252+ DPRINTF("vma_queue_write leave\n");
1253+
1254+ return bytes;
1255+}
1256+
1257+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed,
1258+ Error **errp)
1259+{
1260+ const char *p;
1261+
1262+ assert(sizeof(VmaHeader) == (4096 + 8192));
1263+ assert(sizeof(VmaExtentHeader) == 512);
1264+
1265+ VmaWriter *vmaw = g_new0(VmaWriter, 1);
1266+ vmaw->fd = -1;
1267+
1268+ vmaw->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1269+ if (!vmaw->md5csum) {
1270+ error_setg(errp, "can't allocate cmsum\n");
1271+ goto err;
1272+ }
1273+
1274+ if (strstart(filename, "exec:", &p)) {
1275+ vmaw->cmd = popen(p, "w");
1276+ if (vmaw->cmd == NULL) {
1277+ error_setg(errp, "can't popen command '%s' - %s\n", p,
1278+ strerror(errno));
1279+ goto err;
1280+ }
1281+ vmaw->fd = fileno(vmaw->cmd);
1282+ socket_set_nonblock(vmaw->fd);
1283+
1284+ } else {
1285+ vmaw->fd = open(filename, O_NONBLOCK|O_WRONLY|O_CREAT|O_EXCL, 0644);
1286+ if (vmaw->fd < 0) {
1287+ error_setg(errp, "can't open file %s - %s\n", filename,
1288+ strerror(errno));
1289+ goto err;
1290+ }
1291+ }
1292+
1293+ vmaw->outbuf_count = 0;
1294+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1295+
1296+ vmaw->header_blob_table_pos = 1; /* start at pos 1 */
1297+
1298+ qemu_co_mutex_init(&vmaw->writer_lock);
1299+ qemu_co_mutex_init(&vmaw->flush_lock);
1300+ qemu_co_queue_init(&vmaw->wqueue);
1301+
1302+ uuid_copy(vmaw->uuid, uuid);
1303+
1304+ if (speed <= 0) {
1305+ speed = 10*1024*1024*1024LLU; /* default 10GB/s */
1306+ }
1307+
1308+ ratelimit_set_speed(&vmaw->limit, speed, 100000000ULL /* 0.1 sec */);
1309+
1310+ return vmaw;
1311+
1312+err:
1313+ if (vmaw) {
1314+ if (vmaw->cmd) {
1315+ pclose(vmaw->cmd);
1316+ } else if (vmaw->fd >= 0) {
1317+ close(vmaw->fd);
1318+ }
1319+
1320+ if (vmaw->md5csum) {
1321+ g_checksum_free(vmaw->md5csum);
1322+ }
1323+
1324+ g_free(vmaw);
1325+ }
1326+
1327+ return NULL;
1328+}
1329+
1330+static int coroutine_fn vma_write_header(VmaWriter *vmaw)
1331+{
1332+ assert(vmaw);
1333+ int header_clusters = 8;
1334+ char buf[65536*header_clusters];
1335+ VmaHeader *head = (VmaHeader *)buf;
1336+
1337+ int i;
1338+
1339+ DPRINTF("VMA WRITE HEADER\n");
1340+
1341+ if (vmaw->status < 0) {
1342+ return vmaw->status;
1343+ }
1344+
1345+ memset(buf, 0, sizeof(buf));
1346+
1347+ head->magic = VMA_MAGIC;
1348+ head->version = GUINT32_TO_BE(1); /* v1 */
1349+ memcpy(head->uuid, vmaw->uuid, 16);
1350+
1351+ time_t ctime = time(NULL);
1352+ head->ctime = GUINT64_TO_BE(ctime);
1353+
1354+ if (!vmaw->stream_count) {
1355+ return -1;
1356+ }
1357+
1358+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1359+ head->config_names[i] = GUINT32_TO_BE(vmaw->config_names[i]);
1360+ head->config_data[i] = GUINT32_TO_BE(vmaw->config_data[i]);
1361+ }
1362+
1363+ /* 32 bytes per device (12 used currently) = 8192 bytes max */
1364+ for (i = 1; i <= 254; i++) {
1365+ VmaStreamInfo *si = &vmaw->stream_info[i];
1366+ if (si->size) {
1367+ assert(si->devname);
1368+ uint32_t devname_ptr = allocate_header_string(vmaw, si->devname);
1369+ if (!devname_ptr) {
1370+ return -1;
1371+ }
1372+ head->dev_info[i].devname_ptr = GUINT32_TO_BE(devname_ptr);
1373+ head->dev_info[i].size = GUINT64_TO_BE(si->size);
1374+ }
1375+ }
1376+
1377+ uint32_t header_size = sizeof(VmaHeader) + vmaw->header_blob_table_size;
1378+ head->header_size = GUINT32_TO_BE(header_size);
1379+
1380+ if (header_size > sizeof(buf)) {
1381+ return -1; /* just to be sure */
1382+ }
1383+
1384+ uint32_t blob_buffer_offset = sizeof(VmaHeader);
1385+ memcpy(buf + blob_buffer_offset, vmaw->header_blob_table,
1386+ vmaw->header_blob_table_size);
1387+ head->blob_buffer_offset = GUINT32_TO_BE(blob_buffer_offset);
1388+ head->blob_buffer_size = GUINT32_TO_BE(vmaw->header_blob_table_pos);
1389+
1390+ g_checksum_reset(vmaw->md5csum);
1391+ g_checksum_update(vmaw->md5csum, (const guchar *)buf, header_size);
1392+ gsize csize = 16;
1393+ g_checksum_get_digest(vmaw->md5csum, (guint8 *)(head->md5sum), &csize);
1394+
1395+ return vma_queue_write(vmaw, buf, header_size);
1396+}
1397+
1398+static int coroutine_fn vma_writer_flush(VmaWriter *vmaw)
1399+{
1400+ assert(vmaw);
1401+
1402+ int ret;
1403+ int i;
1404+
1405+ if (vmaw->status < 0) {
1406+ return vmaw->status;
1407+ }
1408+
1409+ if (!vmaw->header_written) {
1410+ vmaw->header_written = true;
1411+ ret = vma_write_header(vmaw);
1412+ if (ret < 0) {
1413+ vma_writer_set_error(vmaw, "vma_writer_flush: write header failed");
1414+ return ret;
1415+ }
1416+ }
1417+
1418+ DPRINTF("VMA WRITE FLUSH %d %d\n", vmaw->outbuf_count, vmaw->outbuf_pos);
1419+
1420+
1421+ VmaExtentHeader *ehead = (VmaExtentHeader *)vmaw->outbuf;
1422+
1423+ ehead->magic = VMA_EXTENT_MAGIC;
1424+ ehead->reserved1 = 0;
1425+
1426+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1427+ ehead->blockinfo[i] = GUINT64_TO_BE(vmaw->outbuf_block_info[i]);
1428+ }
1429+
1430+ guint16 block_count = (vmaw->outbuf_pos - VMA_EXTENT_HEADER_SIZE) /
1431+ VMA_BLOCK_SIZE;
1432+
1433+ ehead->block_count = GUINT16_TO_BE(block_count);
1434+
1435+ memcpy(ehead->uuid, vmaw->uuid, sizeof(ehead->uuid));
1436+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
1437+
1438+ g_checksum_reset(vmaw->md5csum);
1439+ g_checksum_update(vmaw->md5csum, vmaw->outbuf, VMA_EXTENT_HEADER_SIZE);
1440+ gsize csize = 16;
1441+ g_checksum_get_digest(vmaw->md5csum, ehead->md5sum, &csize);
1442+
1443+ int bytes = vmaw->outbuf_pos;
1444+ ret = vma_queue_write(vmaw, vmaw->outbuf, bytes);
1445+ if (ret != bytes) {
1446+ vma_writer_set_error(vmaw, "vma_writer_flush: failed write");
1447+ }
1448+
1449+ vmaw->outbuf_count = 0;
1450+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1451+
1452+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1453+ vmaw->outbuf_block_info[i] = 0;
1454+ }
1455+
1456+ return vmaw->status;
1457+}
1458+
1459+static int vma_count_open_streams(VmaWriter *vmaw)
1460+{
1461+ g_assert(vmaw != NULL);
1462+
1463+ int i;
1464+ int open_drives = 0;
1465+ for (i = 0; i <= 255; i++) {
1466+ if (vmaw->stream_info[i].size && !vmaw->stream_info[i].finished) {
1467+ open_drives++;
1468+ }
1469+ }
1470+
1471+ return open_drives;
1472+}
1473+
1474+/**
1475+ * all jobs should call this when there is no more data
1476+ * Returns: number of remaining stream (0 ==> finished)
1477+ */
1478+int coroutine_fn
1479+vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id)
1480+{
1481+ g_assert(vmaw != NULL);
1482+
1483+ DPRINTF("vma_writer_set_status %d\n", dev_id);
1484+ if (!vmaw->stream_info[dev_id].size) {
1485+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1486+ "no such stream %d", dev_id);
1487+ return -1;
1488+ }
1489+ if (vmaw->stream_info[dev_id].finished) {
1490+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1491+ "stream already closed %d", dev_id);
1492+ return -1;
1493+ }
1494+
1495+ vmaw->stream_info[dev_id].finished = true;
1496+
1497+ int open_drives = vma_count_open_streams(vmaw);
1498+
1499+ if (open_drives <= 0) {
1500+ DPRINTF("vma_writer_set_status all drives completed\n");
1501+ qemu_co_mutex_lock(&vmaw->flush_lock);
1502+ int ret = vma_writer_flush(vmaw);
1503+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1504+ if (ret < 0) {
1505+ vma_writer_set_error(vmaw, "vma_writer_close_stream: flush failed");
1506+ }
1507+ }
1508+
1509+ return open_drives;
1510+}
1511+
1512+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status)
1513+{
1514+ int i;
1515+
1516+ g_assert(vmaw != NULL);
1517+
1518+ if (status) {
1519+ status->status = vmaw->status;
1520+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
1521+ for (i = 0; i <= 255; i++) {
1522+ status->stream_info[i] = vmaw->stream_info[i];
1523+ }
1524+
1525+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
1526+ }
1527+
1528+ status->closed = vmaw->closed;
1529+
1530+ return vmaw->status;
1531+}
1532+
1533+static int vma_writer_get_buffer(VmaWriter *vmaw)
1534+{
1535+
1536+ /* wait until buffer is available */
1537+ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) {
1538+ int res = 0;
1539+
1540+ qemu_co_mutex_lock(&vmaw->flush_lock);
1541+ res = vma_writer_flush(vmaw);
1542+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1543+
1544+ if (res < 0) {
1545+ vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed");
1546+ return -1;
1547+ }
1548+ }
1549+
1550+ return 0;
1551+}
1552+
1553+
1554+int64_t coroutine_fn
1555+vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
1556+ unsigned char *buf, size_t *zero_bytes)
1557+{
1558+ g_assert(vmaw != NULL);
1559+ g_assert(zero_bytes != NULL);
1560+
1561+ *zero_bytes = 0;
1562+
1563+ if (vmaw->status < 0) {
1564+ return vmaw->status;
1565+ }
1566+
1567+ if (!dev_id || !vmaw->stream_info[dev_id].size) {
1568+ vma_writer_set_error(vmaw, "vma_writer_write: "
1569+ "no such stream %d", dev_id);
1570+ return -1;
1571+ }
1572+
1573+ if (vmaw->stream_info[dev_id].finished) {
1574+ vma_writer_set_error(vmaw, "vma_writer_write: "
1575+ "stream already closed %d", dev_id);
1576+ return -1;
1577+ }
1578+
1579+
1580+ if (cluster_num >= (((uint64_t)1)<<32)) {
1581+ vma_writer_set_error(vmaw, "vma_writer_write: "
1582+ "cluster number out of range");
1583+ return -1;
1584+ }
1585+
1586+ if (dev_id == vmaw->vmstate_stream) {
1587+ if (cluster_num != vmaw->vmstate_clusters) {
1588+ vma_writer_set_error(vmaw, "vma_writer_write: "
1589+ "non sequential vmstate write");
1590+ }
1591+ vmaw->vmstate_clusters++;
1592+ } else if (cluster_num >= vmaw->stream_info[dev_id].cluster_count) {
1593+ vma_writer_set_error(vmaw, "vma_writer_write: cluster number too big");
1594+ return -1;
1595+ }
1596+
1597+ /* wait until buffer is available */
1598+ if (vma_writer_get_buffer(vmaw) < 0) {
1599+ vma_writer_set_error(vmaw, "vma_writer_write: "
1600+ "vma_writer_get_buffer failed");
1601+ return -1;
1602+ }
1603+
1604+ DPRINTF("VMA WRITE %zd\n", cluster_num);
1605+
1606+ int i;
1607+ int bit = 1;
1608+ uint16_t mask = 0;
1609+ for (i = 0; i < 16; i++) {
1610+ unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
1611+ if (buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
1612+ DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
1613+ vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE;
1614+ *zero_bytes += VMA_BLOCK_SIZE;
1615+ } else {
1616+ mask |= bit;
1617+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock, VMA_BLOCK_SIZE);
1618+ vmaw->outbuf_pos += VMA_BLOCK_SIZE;
1619+
1620+ vmaw->delay_ns = ratelimit_calculate_delay(&vmaw->limit,
1621+ VMA_BLOCK_SIZE);
1622+ if (vmaw->delay_ns) {
1623+ co_sleep_ns(rt_clock, vmaw->delay_ns);
1624+ }
1625+ }
1626+
1627+ bit = bit << 1;
1628+ }
1629+
1630+ uint64_t block_info = ((uint64_t)mask) << (32+16);
1631+ block_info |= ((uint64_t)dev_id) << 32;
1632+ block_info |= (cluster_num & 0xffffffff);
1633+ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info;
1634+
1635+ DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info);
1636+
1637+ vmaw->outbuf_count++;
1638+
1639+ /** NOTE: We allways write whole clusters, but we correctly set
1640+ * transferred bytes. So transferred == size when when everything
1641+ * went OK.
1642+ */
1643+ size_t transferred = VMA_CLUSTER_SIZE;
1644+
1645+ if (dev_id != vmaw->vmstate_stream) {
1646+ uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
1647+ if (last > vmaw->stream_info[dev_id].size) {
1648+ uint64_t diff = last - vmaw->stream_info[dev_id].size;
1649+ if (diff >= VMA_CLUSTER_SIZE) {
1650+ vma_writer_set_error(vmaw, "vma_writer_write: "
1651+ "read after last cluster");
1652+ return -1;
1653+ }
1654+ transferred -= diff;
1655+ }
1656+ }
1657+
1658+ vmaw->stream_info[dev_id].transferred += transferred;
1659+
1660+ return transferred;
1661+}
1662+
1663+int vma_writer_close(VmaWriter *vmaw, Error **errp)
1664+{
1665+ g_assert(vmaw != NULL);
1666+
1667+ int i;
1668+
1669+ vma_queue_flush(vmaw);
1670+
1671+ /* this should not happen - just to be sure */
1672+ while (!qemu_co_queue_empty(&vmaw->wqueue)) {
1673+ DPRINTF("vma_writer_close wait\n");
1674+ co_sleep_ns(rt_clock, 1000000);
1675+ }
1676+
1677+ if (vmaw->cmd) {
1678+ if (pclose(vmaw->cmd) < 0) {
1679+ vma_writer_set_error(vmaw, "vma_writer_close: "
1680+ "pclose failed - %s", strerror(errno));
1681+ }
1682+ } else {
1683+ if (close(vmaw->fd) < 0) {
1684+ vma_writer_set_error(vmaw, "vma_writer_close: "
1685+ "close failed - %s", strerror(errno));
1686+ }
1687+ }
1688+
1689+ for (i = 0; i <= 255; i++) {
1690+ VmaStreamInfo *si = &vmaw->stream_info[i];
1691+ if (si->size) {
1692+ if (!si->finished) {
1693+ vma_writer_set_error(vmaw, "vma_writer_close: "
1694+ "detected open stream '%s'", si->devname);
1695+ } else if ((si->transferred != si->size) &&
1696+ (i != vmaw->vmstate_stream)) {
1697+ vma_writer_set_error(vmaw, "vma_writer_close: "
1698+ "incomplete stream '%s' (%zd != %zd)",
1699+ si->devname, si->transferred, si->size);
1700+ }
1701+ }
1702+ }
1703+
1704+ for (i = 0; i <= 255; i++) {
1705+ vmaw->stream_info[i].finished = 1; /* mark as closed */
1706+ }
1707+
1708+ vmaw->closed = 1;
1709+
1710+ if (vmaw->status < 0 && *errp == NULL) {
1711+ error_setg(errp, "%s", vmaw->errmsg);
1712+ }
1713+
1714+ return vmaw->status;
1715+}
1716+
1717+void vma_writer_destroy(VmaWriter *vmaw)
1718+{
1719+ assert(vmaw);
1720+
1721+ int i;
1722+
1723+ for (i = 0; i <= 255; i++) {
1724+ if (vmaw->stream_info[i].devname) {
1725+ g_free(vmaw->stream_info[i].devname);
1726+ }
1727+ }
1728+
1729+ if (vmaw->md5csum) {
1730+ g_checksum_free(vmaw->md5csum);
1731+ }
1732+
1733+ g_free(vmaw);
1734+}
1735+
1736+/* backup driver plugin */
1737+
1738+static int vma_dump_cb(void *opaque, uint8_t dev_id, int64_t cluster_num,
1739+ unsigned char *buf, size_t *zero_bytes)
1740+{
1741+ VmaWriter *vmaw = opaque;
1742+
1743+ return vma_writer_write(vmaw, dev_id, cluster_num, buf, zero_bytes);
1744+}
1745+
1746+static int vma_close_cb(void *opaque, Error **errp)
1747+{
1748+ VmaWriter *vmaw = opaque;
1749+
1750+ int res = vma_writer_close(vmaw, errp);
1751+ vma_writer_destroy(vmaw);
1752+
1753+ return res;
1754+}
1755+
1756+static int vma_complete_cb(void *opaque, uint8_t dev_id, int ret)
1757+{
1758+ VmaWriter *vmaw = opaque;
1759+
1760+ if (ret < 0) {
1761+ vma_writer_set_error(vmaw, "backup_complete_cb %d", ret);
1762+ }
1763+
1764+ return vma_writer_close_stream(vmaw, dev_id);
1765+}
1766+
1767+static int vma_register_stream_cb(void *opaque, const char *devname,
1768+ size_t size)
1769+{
1770+ VmaWriter *vmaw = opaque;
1771+
1772+ return vma_writer_register_stream(vmaw, devname, size);
1773+}
1774+
1775+static int vma_register_config_cb(void *opaque, const char *name,
1776+ gpointer data, size_t data_len)
1777+{
1778+ VmaWriter *vmaw = opaque;
1779+
1780+ return vma_writer_add_config(vmaw, name, data, data_len);
1781+}
1782+
1783+static void *vma_open_cb(const char *filename, uuid_t uuid, int64_t speed,
1784+ Error **errp)
1785+{
1786+ return vma_writer_create(filename, uuid, speed, errp);
1787+}
1788+
1789+const BackupDriver backup_vma_driver = {
1790+ .format = "vma",
1791+ .open_cb = vma_open_cb,
1792+ .close_cb = vma_close_cb,
1793+ .register_config_cb = vma_register_config_cb,
1794+ .register_stream_cb = vma_register_stream_cb,
1795+ .dump_cb = vma_dump_cb,
1796+ .complete_cb = vma_complete_cb,
1797+};
1798+
1799diff --git a/vma.c b/vma.c
1800new file mode 100644
1801index 0000000..69af80c
1802--- /dev/null
1803+++ b/vma.c
1804@@ -0,0 +1,550 @@
1805+/*
1806+ * VMA: Virtual Machine Archive
1807+ *
1808+ * Copyright (C) 2012 Proxmox Server Solutions
1809+ *
1810+ * Authors:
1811+ * Dietmar Maurer (dietmar@proxmox.com)
1812+ *
1813+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
1814+ * See the COPYING file in the top-level directory.
1815+ *
1816+ */
1817+
1818+#include <stdio.h>
1819+#include <errno.h>
1820+#include <unistd.h>
1821+#include <stdio.h>
1822+#include <string.h>
1823+#include <sys/types.h>
1824+#include <sys/stat.h>
1825+#include <fcntl.h>
1826+#include <glib.h>
1827+
1828+#include "qemu-common.h"
1829+#include "qemu-option.h"
1830+#include "qemu-error.h"
1831+#include "osdep.h"
1832+#include "sysemu.h"
1833+#include "block_int.h"
1834+#include <stdio.h>
1835+#include "vma.h"
1836+
1837+static void help(void)
1838+{
1839+ const char *help_msg =
1840+ "usage: vma command [command options]\n"
1841+ "\n"
1842+ "vma list <filename>\n"
1843+ "vma create <filename> [-c config] <archive> pathname ...\n"
1844+ "vma extract <filename> [-r] <targetdir>\n"
1845+ ;
1846+
1847+ printf("%s", help_msg);
1848+ exit(1);
1849+}
1850+
1851+static const char *extract_devname(const char *path, char **devname, int index)
1852+{
1853+ assert(path);
1854+
1855+ const char *sep = strchr(path, '=');
1856+
1857+ if (sep) {
1858+ *devname = g_strndup(path, sep - path);
1859+ path = sep + 1;
1860+ } else {
1861+ if (index >= 0) {
1862+ *devname = g_strdup_printf("disk%d", index);
1863+ } else {
1864+ *devname = NULL;
1865+ }
1866+ }
1867+
1868+ return path;
1869+}
1870+
1871+static void print_content(VmaReader *vmar)
1872+{
1873+ assert(vmar);
1874+
1875+ VmaHeader *head = vma_reader_get_header(vmar);
1876+
1877+ printf("CTIME: %s", ctime(&head->ctime));
1878+
1879+ GList *l = vma_reader_get_config_data(vmar);
1880+ while (l && l->data) {
1881+ VmaConfigData *cdata = (VmaConfigData *)l->data;
1882+ l = g_list_next(l);
1883+ printf("CFG: size: %d name: %s\n", cdata->len, cdata->name);
1884+ }
1885+
1886+ int i;
1887+ VmaDeviceInfo *di;
1888+ for (i = 1; i < 255; i++) {
1889+ di = vma_reader_get_device_info(vmar, i);
1890+ if (di) {
1891+ if (strcmp(di->devname, "vmstate") == 0) {
1892+ printf("VMSTATE: dev_id=%d memory: %zd\n", i, di->size);
1893+ } else {
1894+ printf("DEV: dev_id=%d size: %zd devname: %s\n",
1895+ i, di->size, di->devname);
1896+ }
1897+ }
1898+ }
1899+}
1900+
1901+static int list_content(int argc, char **argv)
1902+{
1903+ int c, ret = 0;
1904+ const char *filename;
1905+
1906+ for (;;) {
1907+ c = getopt(argc, argv, "h");
1908+ if (c == -1) {
1909+ break;
1910+ }
1911+ switch (c) {
1912+ case '?':
1913+ case 'h':
1914+ help();
1915+ break;
1916+ default:
1917+ g_assert_not_reached();
1918+ }
1919+ }
1920+
1921+ /* Get the filename */
1922+ if ((optind + 1) != argc) {
1923+ help();
1924+ }
1925+ filename = argv[optind++];
1926+
1927+ Error *errp = NULL;
1928+ VmaReader *vmar = vma_reader_create(filename, &errp);
1929+
1930+ if (!vmar) {
1931+ g_error("%s", error_get_pretty(errp));
1932+ }
1933+
1934+ print_content(vmar);
1935+
1936+ vma_reader_destroy(vmar);
1937+
1938+ return ret;
1939+}
1940+
1941+typedef struct RestoreMap {
1942+ char *devname;
1943+ char *path;
1944+ bool write_zero;
1945+} RestoreMap;
1946+
1947+static int extract_content(int argc, char **argv)
1948+{
1949+ int c, ret = 0;
1950+ const char *filename;
1951+ const char *dirname;
1952+ int readmap = 0;
1953+
1954+ for (;;) {
1955+ c = getopt(argc, argv, "hr");
1956+ if (c == -1) {
1957+ break;
1958+ }
1959+ switch (c) {
1960+ case '?':
1961+ case 'h':
1962+ help();
1963+ break;
1964+ case 'r':
1965+ readmap = 1;
1966+ break;
1967+ default:
1968+ help();
1969+ }
1970+ }
1971+
1972+ /* Get the filename */
1973+ if ((optind + 2) != argc) {
1974+ help();
1975+ }
1976+ filename = argv[optind++];
1977+ dirname = argv[optind++];
1978+
1979+ Error *errp = NULL;
1980+ VmaReader *vmar = vma_reader_create(filename, &errp);
1981+
1982+ if (!vmar) {
1983+ g_error("%s", error_get_pretty(errp));
1984+ }
1985+
1986+ if (mkdir(dirname, 0777) < 0) {
1987+ g_error("unable to create target directory %s - %s",
1988+ dirname, strerror(errno));
1989+ }
1990+
1991+ GList *l = vma_reader_get_config_data(vmar);
1992+ while (l && l->data) {
1993+ VmaConfigData *cdata = (VmaConfigData *)l->data;
1994+ l = g_list_next(l);
1995+ char *cfgfn = g_strdup_printf("%s/%s", dirname, cdata->name);
1996+ GError *err = NULL;
1997+ if (!g_file_set_contents(cfgfn, (gchar *)cdata->data, cdata->len,
1998+ &err)) {
1999+ g_error("Unable to write file: %s", err->message);
2000+ }
2001+ }
2002+
2003+ GHashTable *devmap = g_hash_table_new(g_str_hash, g_str_equal);
2004+
2005+ if (readmap) {
2006+ print_content(vmar);
2007+
2008+ while (1) {
2009+ char inbuf[8192];
2010+ char *line = fgets(inbuf, sizeof(inbuf), stdin);
2011+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
2012+ break;
2013+ }
2014+ int len = strlen(line);
2015+ if (line[len - 1] == '\n') {
2016+ line[len - 1] = '\0';
2017+ if (len == 1) {
2018+ break;
2019+ }
2020+ }
2021+
2022+ const char *path;
2023+ bool write_zero;
2024+ if (line[0] == '0' && line[1] == ':') {
2025+ path = inbuf + 2;
2026+ write_zero = false;
2027+ } else if (line[0] == '1' && line[1] == ':') {
2028+ path = inbuf + 2;
2029+ write_zero = true;
2030+ } else {
2031+ g_error("read map failed - parse error ('%s')", inbuf);
2032+ }
2033+
2034+ char *devname = NULL;
2035+ path = extract_devname(path, &devname, -1);
2036+ if (!devname) {
2037+ g_error("read map failed - no dev name specified ('%s')",
2038+ inbuf);
2039+ }
2040+
2041+ printf("TEST %s %s\n", path, devname);
2042+
2043+ RestoreMap *map = g_new0(RestoreMap, 1);
2044+ map->devname = g_strdup(devname);
2045+ map->path = g_strdup(path);
2046+ map->write_zero = write_zero;
2047+
2048+ g_hash_table_insert(devmap, map->devname, map);
2049+
2050+ };
2051+ }
2052+
2053+ int i;
2054+ int vmstate_fd = -1;
2055+ guint8 vmstate_stream = 0;
2056+
2057+ for (i = 1; i < 255; i++) {
2058+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2059+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
2060+ vmstate_stream = i;
2061+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
2062+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
2063+ if (vmstate_fd < 0) {
2064+ g_error("create vmstate file '%s' failed - %s", statefn,
2065+ strerror(errno));
2066+ }
2067+ g_free(statefn);
2068+ } else if (di) {
2069+ char *devfn = NULL;
2070+ int flags = BDRV_O_RDWR|BDRV_O_CACHE_WB;
2071+ bool write_zero = true;
2072+
2073+ if (readmap) {
2074+ RestoreMap *map;
2075+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
2076+ if (map == NULL) {
2077+ g_error("no device name mapping for %s", di->devname);
2078+ }
2079+ devfn = map->path;
2080+ write_zero = map->write_zero;
2081+ } else {
2082+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2083+ dirname, di->devname);
2084+ printf("DEVINFO %s %zd\n", devfn, di->size);
2085+
2086+ if (bdrv_img_create(devfn, "raw", NULL, NULL, NULL,
2087+ di->size, flags)) {
2088+ g_error("can't create file %s", devfn);
2089+ }
2090+
2091+ /* Note: we created an empty file above, so there is no
2092+ * need to write zeroes (so we generate a sparse file)
2093+ */
2094+ write_zero = false;
2095+ }
2096+
2097+ BlockDriverState *bs = NULL;
2098+ if (bdrv_file_open(&bs, devfn, flags)) {
2099+ g_error("can't open file %s", devfn);
2100+ }
2101+ if (vma_reader_register_bs(vmar, i, bs, write_zero, &errp) < 0) {
2102+ g_error("%s", error_get_pretty(errp));
2103+ }
2104+
2105+ if (!readmap) {
2106+ g_free(devfn);
2107+ }
2108+ }
2109+ }
2110+
2111+ if (vma_reader_restore(vmar, vmstate_fd, &errp) < 0) {
2112+ g_error("restore failed - %s", error_get_pretty(errp));
2113+ }
2114+
2115+ if (!readmap) {
2116+ for (i = 1; i < 255; i++) {
2117+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2118+ if (di && (i != vmstate_stream)) {
2119+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2120+ dirname, di->devname);
2121+ char *fn = g_strdup_printf("%s/disk-%s.raw",
2122+ dirname, di->devname);
2123+ if (rename(tmpfn, fn) != 0) {
2124+ g_error("rename %s to %s failed - %s",
2125+ tmpfn, fn, strerror(errno));
2126+ }
2127+ }
2128+ }
2129+ }
2130+
2131+ vma_reader_destroy(vmar);
2132+
2133+ bdrv_close_all();
2134+
2135+ return ret;
2136+}
2137+
2138+typedef struct BackupCB {
2139+ VmaWriter *vmaw;
2140+ uint8_t dev_id;
2141+} BackupCB;
2142+
2143+static int backup_dump_cb(void *opaque, BlockDriverState *bs,
2144+ int64_t cluster_num, unsigned char *buf)
2145+{
2146+ BackupCB *bcb = opaque;
2147+ size_t zb = 0;
2148+ if (vma_writer_write(bcb->vmaw, bcb->dev_id, cluster_num, buf, &zb) < 0) {
2149+ g_warning("backup_dump_cb vma_writer_write failed");
2150+ return -1;
2151+ }
2152+
2153+ return 0;
2154+}
2155+
2156+static void backup_complete_cb(void *opaque, int ret)
2157+{
2158+ BackupCB *bcb = opaque;
2159+
2160+ if (ret < 0) {
2161+ vma_writer_set_error(bcb->vmaw, "backup_complete_cb %d", ret);
2162+ }
2163+
2164+ if (vma_writer_close_stream(bcb->vmaw, bcb->dev_id) <= 0) {
2165+ Error *err = NULL;
2166+ if (vma_writer_close(bcb->vmaw, &err) != 0) {
2167+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2168+ }
2169+ }
2170+}
2171+
2172+static int create_archive(int argc, char **argv)
2173+{
2174+ int i, c, res;
2175+ int verbose = 0;
2176+ const char *archivename;
2177+ GList *config_files = NULL;
2178+
2179+ for (;;) {
2180+ c = getopt(argc, argv, "hvc:");
2181+ if (c == -1) {
2182+ break;
2183+ }
2184+ switch (c) {
2185+ case '?':
2186+ case 'h':
2187+ help();
2188+ break;
2189+ case 'c':
2190+ config_files = g_list_append(config_files, optarg);
2191+ break;
2192+ case 'v':
2193+ verbose = 1;
2194+ break;
2195+ default:
2196+ g_assert_not_reached();
2197+ }
2198+ }
2199+
2200+
2201+ /* make sure we have archive name and at least one path */
2202+ if ((optind + 2) > argc) {
2203+ help();
2204+ }
2205+
2206+ archivename = argv[optind++];
2207+
2208+ uuid_t uuid;
2209+ uuid_generate(uuid);
2210+
2211+ Error *local_err = NULL;
2212+ VmaWriter *vmaw = vma_writer_create(archivename, uuid, 0, &local_err);
2213+
2214+ if (vmaw == NULL) {
2215+ g_error("%s", error_get_pretty(local_err));
2216+ }
2217+
2218+ GList *l = config_files;
2219+ while (l && l->data) {
2220+ char *name = l->data;
2221+ char *cdata = NULL;
2222+ gsize clen = 0;
2223+ GError *err = NULL;
2224+ if (!g_file_get_contents(name, &cdata, &clen, &err)) {
2225+ unlink(archivename);
2226+ g_error("Unable to read file: %s", err->message);
2227+ }
2228+
2229+ if (vma_writer_add_config(vmaw, name, cdata, clen) != 0) {
2230+ unlink(archivename);
2231+ g_error("Unable to append config data %s (len = %zd)",
2232+ name, clen);
2233+ }
2234+ l = g_list_next(l);
2235+ }
2236+
2237+ int ind = 0;
2238+ while (optind < argc) {
2239+ const char *path = argv[optind++];
2240+ char *devname = NULL;
2241+ path = extract_devname(path, &devname, ind++);
2242+
2243+ BlockDriver *drv = NULL;
2244+ BlockDriverState *bs = bdrv_new(devname);
2245+
2246+ res = bdrv_open(bs, path, BDRV_O_CACHE_WB , drv);
2247+ if (res < 0) {
2248+ unlink(archivename);
2249+ g_error("bdrv_open '%s' failed", path);
2250+ }
2251+ int64_t size = bdrv_getlength(bs);
2252+ int dev_id = vma_writer_register_stream(vmaw, devname, size);
2253+ if (dev_id <= 0) {
2254+ unlink(archivename);
2255+ g_error("vma_writer_register_stream '%s' failed", devname);
2256+ }
2257+
2258+ BackupCB *bcb = g_new0(BackupCB, 1);
2259+ bcb->vmaw = vmaw;
2260+ bcb->dev_id = dev_id;
2261+
2262+ if (backup_job_start(bs, backup_dump_cb, backup_complete_cb, bcb) < 0) {
2263+ unlink(archivename);
2264+ g_error("backup_job_start failed");
2265+ }
2266+ }
2267+
2268+ VmaStatus vmastat;
2269+ int percent = 0;
2270+ int last_percent = -1;
2271+
2272+ while (1) {
2273+ main_loop_wait(false);
2274+ vma_writer_get_status(vmaw, &vmastat);
2275+
2276+ if (verbose) {
2277+
2278+ uint64_t total = 0;
2279+ uint64_t transferred = 0;
2280+ uint64_t zero_bytes = 0;
2281+
2282+ int i;
2283+ for (i = 0; i < 256; i++) {
2284+ if (vmastat.stream_info[i].size) {
2285+ total += vmastat.stream_info[i].size;
2286+ transferred += vmastat.stream_info[i].transferred;
2287+ zero_bytes += vmastat.stream_info[i].zero_bytes;
2288+ }
2289+ }
2290+ percent = (transferred*100)/total;
2291+ if (percent != last_percent) {
2292+ printf("progress %d%% %zd/%zd %zd\n", percent,
2293+ transferred, total, zero_bytes);
2294+
2295+ last_percent = percent;
2296+ }
2297+ }
2298+
2299+ if (vmastat.closed) {
2300+ break;
2301+ }
2302+ }
2303+
2304+ bdrv_drain_all();
2305+
2306+ vma_writer_get_status(vmaw, &vmastat);
2307+
2308+ if (verbose) {
2309+ for (i = 0; i < 256; i++) {
2310+ VmaStreamInfo *si = &vmastat.stream_info[i];
2311+ if (si->size) {
2312+ printf("image %s: size=%zd zeros=%zd saved=%zd\n", si->devname,
2313+ si->size, si->zero_bytes, si->size - si->zero_bytes);
2314+ }
2315+ }
2316+ }
2317+
2318+ if (vmastat.status < 0) {
2319+ unlink(archivename);
2320+ g_error("creating vma archive failed");
2321+ }
2322+
2323+ return 0;
2324+}
2325+
2326+int main(int argc, char **argv)
2327+{
2328+ const char *cmdname;
2329+
2330+ error_set_progname(argv[0]);
2331+
2332+ qemu_init_main_loop();
2333+
2334+ bdrv_init();
2335+
2336+ if (argc < 2) {
2337+ help();
2338+ }
2339+
2340+ cmdname = argv[1];
2341+ argc--; argv++;
2342+
2343+
2344+ if (!strcmp(cmdname, "list")) {
2345+ return list_content(argc, argv);
2346+ } else if (!strcmp(cmdname, "create")) {
2347+ return create_archive(argc, argv);
2348+ } else if (!strcmp(cmdname, "extract")) {
2349+ return extract_content(argc, argv);
2350+ }
2351+
2352+ help();
2353+ return 0;
2354+}
2355diff --git a/vma.h b/vma.h
2356new file mode 100644
2357index 0000000..10800a1
2358--- /dev/null
2359+++ b/vma.h
2360@@ -0,0 +1,145 @@
2361+/*
2362+ * VMA: Virtual Machine Archive
2363+ *
2364+ * Copyright (C) Proxmox Server Solutions
2365+ *
2366+ * Authors:
2367+ * Dietmar Maurer (dietmar@proxmox.com)
2368+ *
2369+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
2370+ * See the COPYING file in the top-level directory.
2371+ *
2372+ */
2373+
2374+#ifndef BACKUP_VMA_H
2375+#define BACKUP_VMA_H
2376+
2377+#include "backup.h"
2378+#include "error.h"
2379+
2380+#define VMA_BLOCK_BITS 12
2381+#define VMA_BLOCK_SIZE (1<<VMA_BLOCK_BITS)
2382+#define VMA_CLUSTER_BITS (VMA_BLOCK_BITS+4)
2383+#define VMA_CLUSTER_SIZE (1<<VMA_CLUSTER_BITS)
2384+
2385+#if VMA_CLUSTER_SIZE != 65536
2386+#error unexpected cluster size
2387+#endif
2388+
2389+#define VMA_EXTENT_HEADER_SIZE 512
2390+#define VMA_BLOCKS_PER_EXTENT 59
2391+#define VMA_MAX_CONFIGS 256
2392+
2393+#define VMA_MAX_EXTENT_SIZE \
2394+ (VMA_EXTENT_HEADER_SIZE+VMA_CLUSTER_SIZE*VMA_BLOCKS_PER_EXTENT)
2395+#if VMA_MAX_EXTENT_SIZE != 3867136
2396+#error unexpected VMA_EXTENT_SIZE
2397+#endif
2398+
2399+/* File Format Definitions */
2400+
2401+#define VMA_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|0x00))
2402+#define VMA_EXTENT_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|'E'))
2403+
2404+typedef struct VmaDeviceInfoHeader {
2405+ uint32_t devname_ptr; /* offset into blob_buffer table */
2406+ uint32_t reserved0;
2407+ uint64_t size; /* device size in bytes */
2408+ uint64_t reserved1;
2409+ uint64_t reserved2;
2410+} VmaDeviceInfoHeader;
2411+
2412+typedef struct VmaHeader {
2413+ uint32_t magic;
2414+ uint32_t version;
2415+ unsigned char uuid[16];
2416+ int64_t ctime;
2417+ unsigned char md5sum[16];
2418+
2419+ uint32_t blob_buffer_offset;
2420+ uint32_t blob_buffer_size;
2421+ uint32_t header_size;
2422+
2423+ unsigned char reserved[1984];
2424+
2425+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2426+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2427+
2428+ VmaDeviceInfoHeader dev_info[256];
2429+} VmaHeader;
2430+
2431+typedef struct VmaExtentHeader {
2432+ uint32_t magic;
2433+ uint16_t reserved1;
2434+ uint16_t block_count;
2435+ unsigned char uuid[16];
2436+ unsigned char md5sum[16];
2437+ uint64_t blockinfo[VMA_BLOCKS_PER_EXTENT];
2438+} VmaExtentHeader;
2439+
2440+/* functions/definitions to read/write vma files */
2441+
2442+typedef struct VmaReader VmaReader;
2443+
2444+typedef struct VmaWriter VmaWriter;
2445+
2446+typedef struct VmaConfigData {
2447+ const char *name;
2448+ const void *data;
2449+ uint32_t len;
2450+} VmaConfigData;
2451+
2452+typedef struct VmaStreamInfo {
2453+ uint64_t size;
2454+ uint64_t cluster_count;
2455+ uint64_t transferred;
2456+ uint64_t zero_bytes;
2457+ int finished;
2458+ char *devname;
2459+} VmaStreamInfo;
2460+
2461+typedef struct VmaStatus {
2462+ int status;
2463+ bool closed;
2464+ char errmsg[8192];
2465+ char uuid_str[37];
2466+ VmaStreamInfo stream_info[256];
2467+} VmaStatus;
2468+
2469+typedef struct VmaDeviceInfo {
2470+ uint64_t size; /* device size in bytes */
2471+ const char *devname;
2472+} VmaDeviceInfo;
2473+
2474+extern const BackupDriver backup_vma_driver;
2475+
2476+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, int64_t speed,
2477+ Error **errp);
2478+int vma_writer_close(VmaWriter *vmaw, Error **errp);
2479+void vma_writer_destroy(VmaWriter *vmaw);
2480+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
2481+ size_t len);
2482+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
2483+ size_t size);
2484+
2485+int64_t coroutine_fn vma_writer_write(VmaWriter *vmaw, uint8_t dev_id,
2486+ int64_t cluster_num, unsigned char *buf,
2487+ size_t *zero_bytes);
2488+
2489+int coroutine_fn vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id);
2490+
2491+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
2492+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
2493+
2494+
2495+VmaReader *vma_reader_create(const char *filename, Error **errp);
2496+void vma_reader_destroy(VmaReader *vmar);
2497+VmaHeader *vma_reader_get_header(VmaReader *vmar);
2498+GList *vma_reader_get_config_data(VmaReader *vmar);
2499+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
2500+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
2501+ BlockDriverState *bs, bool write_zeroes,
2502+ Error **errp);
2503+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, Error **errp);
2504+
2505+#endif /* BACKUP_VMA_H */
2506--
25071.7.2.5
2508